/rgi_links

Link RGI7 to RGI6 outlines

Primary LanguagePython

rgi_links

Temporary repository for the Randolph Glacier Inventory (RGI) 7 "links team". In the end, the code in this repository will likely be merged into either GLIMS-RGI/rgitools or GLIMS-RGI/rgi7_scripts.

Installation

git clone https://github.com/ezwelty/rgi_links

Example usage

Create the Python environment described in environment.yaml and open a Python console.

conda env create -f environment.yaml
conda activate rgi
python

Prepare outlines

Load RGI6 and RGI7 outlines and write them to parquet for fast read access.

import helpers

rgi6 = helpers.load_rgi6_outlines(path='nsidc0770_00.rgi60.complete.zip')
rgi6.to_parquet('rgi6.parquet')

rgi7 = helpers.load_rgi7_outlines(path='rgi7')
rgi7.to_parquet('rgi7.parquet')

Compute overlaps

Read RGI6 and RGI7 outlines, compute overlaps, and write the results. Overlap polygons and the area fractions are computed with geographic coordinates (computing the latter with projected coordinates had maximum 0.4% error), but the overlap areas are computed in square meters with projected coordinates.

import geopandas as gpd
import helpers

rgi7 = gpd.read_parquet('rgi7.parquet', columns=['geometry', 'rgi_id'])
equal_area_crs = {'proj': 'cea'}

# --- Compute RGI7 self overlaps (~ 100 s)
overlaps = helpers.compute_self_overlaps(rgi7.geometry)
overlaps['area'] = overlaps['geometry'].to_crs(equal_area_crs).area
overlaps['i'] = rgi7['rgi_id'].iloc[overlaps['i']].values
overlaps['j'] = rgi7['rgi_id'].iloc[overlaps['j']].values
overlaps.to_parquet('rgi7_self_overlaps.parquet')

# --- Compute RGI7-RGI6 overlaps (~ 400 s)
rgi6 = gpd.read_parquet('rgi6.parquet', columns=['geometry', 'RGIId'])
overlaps = helpers.compute_cross_overlaps(rgi7.geometry, rgi6.geometry)
overlaps['area'] = overlaps['geometry'].to_crs(equal_area_crs).area
overlaps['i'] = rgi7['rgi_id'].iloc[overlaps['i']].values
overlaps['j'] = rgi6['RGIId'].iloc[overlaps['j']].values
overlaps.to_parquet('rgi7_rgi6_overlaps.parquet')

Fix RGI7 self overlaps

import geopandas as gpd
import helpers
import pyproj

overlaps = gpd.read_parquet('rgi7_self_overlaps.parquet')
rgi7 = (
  gpd.read_parquet('rgi7.parquet', columns=['geometry', 'rgi_id'])
  .set_index('rgi_id')
)

# --- Resolve RGI7 self overlaps
resolved = helpers.resolve_self_overlaps(
  overlaps=overlaps,
  geoms=rgi7.geometry,
  min_area=1e4,
  transformer=pyproj.Transformer.from_crs(
    'EPSG:4326', {'proj': 'cea'}, always_xy=True
  )
)
resolved.reset_index(name='geometry').to_parquet('rgi7_fixes.parquet')

# --- Confirm that remaining overlaps are rounding artifacts
rgi7.geometry[resolved.index] = resolved
remaining = helpers.compute_self_overlaps(rgi7.geometry)
assert remaining['geometry'].to_crs({'proj': 'cea'}).area.lt(1e-6).all()

Inspect RGI7-RGI6 overlaps

import numpy as np
import pandas as pd

# --- Read overlaps without geometries
overlaps = pd.read_parquet(
  'rgi7_rgi6_overlaps.parquet',
  columns=['i', 'j', 'i_area_fraction', 'j_area_fraction', 'area']
)

# --- Compute min and max area fraction
overlaps['min_area_fraction'] = (
  overlaps[['i_area_fraction', 'j_area_fraction']].min(axis=1)
)
overlaps['max_area_fraction'] = (
  overlaps[['i_area_fraction', 'j_area_fraction']].max(axis=1)
)

# --- Total (380998)
overlaps
# Max area fraction < 0.05 (127936)
overlaps[overlaps['max_area_fraction'].lt(0.05)]

# --- 1:1 RGI7:RGI6 (103840)
is_unique = (
  ~overlaps['i'].duplicated(keep=False) &
  ~overlaps['j'].duplicated(keep=False)
)
overlaps[is_unique].sort_values('i_area_fraction')
# Min area fraction < 0.5 (6221)
overlaps[is_unique & overlaps['min_area_fraction'].lt(0.5)]
# Max area fraction < 0.5 (591)
overlaps[is_unique & overlaps['max_area_fraction'].lt(0.5)]

# --- 1:N RGI7:RGI6 (81192 RGI7)
has_multiple = overlaps.groupby('i')['j'].count().gt(1)
overlaps[overlaps['i'].isin(has_multiple[has_multiple].index)]
# RGI7 area fraction > 0.1 (12586 RGI7)
mask = overlaps['i_area_fraction'].gt(0.1)
has_multiple = overlaps[mask].groupby('i')['j'].count().gt(1)
overlaps[overlaps['i'].isin(has_multiple[has_multiple].index)]

# --- N:1 RGI7:RGI6 (81744 RGI6)
has_multiple = overlaps.groupby('j')['i'].count().gt(1)
overlaps[overlaps['j'].isin(has_multiple[has_multiple].index)]
# RGI6 area fraction > 0.1 (15740 RGI6)
mask = overlaps['j_area_fraction'].gt(0.1)
has_multiple = overlaps[mask].groupby('j')['i'].count().gt(1)
overlaps[overlaps['j'].isin(has_multiple[has_multiple].index)]

# --- 1:0 RGI7:RGI6 (51893)
rgi7_ids = pd.read_parquet('rgi7.parquet', columns=['rgi_id']).iloc[:, 0]
pd.Index(rgi7_ids).difference(overlaps['i'])

# --- 0:1 RGI7:RGI6 (11466)
rgi6_ids = pd.read_parquet('rgi6.parquet', columns=['RGIId']).iloc[:, 0]
pd.Index(rgi6_ids).difference(overlaps['j'])