1
2
3
4
5
6
7
8
9
import xarray as xr 
import arraylake as al
import numpy as np
import rioxarray as rio
from shapely.geometry import Point,Polygon
import geopandas as gpd
import matplotlib.pyplot as plt
from pyproj import CRS
import pyproj
1
import cf_xarray
1
2
import re
import ast
1
2
import warnings
warnings.filterwarnings('ignore')

(can ignore) simple 1d ds w/ x dim, want to be able to query along a lon dim, w/ transform info in attrs¶

1
2
3
4
toy_ds = xr.Dataset(
    {'var': (('x'),np.random.rand(10))},
    
)
1
toy_ds.attrs['toy_transform'] = 'transform info'
1
toy_ds
<xarray.Dataset> Size: 80B
Dimensions:  (x: 10)
Dimensions without coordinates: x
Data variables:
    var      (x) float64 80B 0.06115 0.7821 0.3504 ... 0.6899 0.09133 0.2684
Attributes:
    toy_transform:  transform info
xarray.Dataset
    • x: 10
      • var
        (x)
        float64
        0.06115 0.7821 ... 0.09133 0.2684
        array([0.06115455, 0.78207079, 0.35039687, 0.95577   , 0.58659999,
               0.1228086 , 0.63446092, 0.68994913, 0.09132916, 0.2684359 ])
      • toy_transform :
        transform info

      need to implement custom index to do this

      Following along xarray example: https://docs.xarray.dev/en/stable/internals/how-to-create-custom-index.html

      (where i'm working) Xarray custom index¶

      1
      2
      3
      from xarray import Index
      from xarray.core.indexes import PandasIndex
      from xarray.core.indexing import merge_sel_results
      

      Defining a toy transform for now to replace w/ actual

       1
       2
       3
       4
       5
       6
       7
       8
       9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      # create a custom index (RasterIndex)
      # this is a standin function, ultimately want a function that can read transform info from attrs and materialize array
      # .... need to figure out input, trickier than name,variable
      def toy_transform(name,variable):#, options=None):
          if name == 'x':
              key = variable[name].keys()
              #return variable * 2
              return variable[name][key]
          elif name == 'y':
              return variable * 5
          else:
              return variable
      
          
      class RasterIndex(Index): #rasterindex inherits xarray Index
          def __init__(self, xy_indexes): #need to pass an xy_indexes obj when creating rasterindex
              assert len(xy_indexes) == 2 #passed obj must be 2d
              
              dim = [idx.dims for idx in xy_indexes.values()] #make list of dims ? 
              assert dim[0] != dim[1] #dims can't be identical (need to be orthogonal)
              
              self._xy_indexes = xy_indexes
              
          @classmethod
          def from_variables(cls, variables,**kwargs):
              assert len(variables) ==2 
              xy_indexes = {
              #need to modify to take somethign that has access to the correct attrs?
                  f'transformed_{k}': toy_transform(k,v) for k,v in variables.items()
              }
              
              return cls(xy_indexes)
      
          def create_variables(self, variables):
              print(type(variables))
              print(type(variables['x']))
              idx_variables = {}
      
              for index in self._xy_indexes.items():
                  print(type(index))
                  idx_variables.update(index.create_variables(variables))
                  print(type(idx_variables))
                  return idx_variables
      
          def sel(self, labels):
              results = []
      
              for k, index in self._xy_indexes.items():
                  if k in labels: 
                      results.append(index.sel({k: labels[k]}))
      
                  return merge_sel_results(results)
      
      
                  
      
      1
      2
      3
      4
      5
      6
      7
      8
      9
      #create data array
      da = xr.DataArray(
          np.random.uniform(size=(100,50)),
          coords = {'x': ('x',np.arange(50)), 'y':('y', np.arange(100))},
          dims = ('y','x'),
          
      )
      da.x.attrs['xkey'] = '2'
      da.y.attrs['ykey'] = '5'
      
      1
      2
      #checkout inputs to from_variables
      variables = {name: da.coords[name] for name in ['x','y']}
      
      1
      variables['x'].attrs
      
      {'xkey': '2'}
      1
      2
      variable[name].keys()
              #return variable[name][key]
      
      ---------------------------------------------------------------------------
      NameError                                 Traceback (most recent call last)
      Cell In[73], line 1
      ----> 1 variable[name].keys()
            2         #return variable[name][key]
      
      NameError: name 'variable' is not defined
      1
      2
      #this is the output from from_variables(), its dict that holds mapping of of transformed vars in rasterindex to orig obj (i think?)
      xy_indexes = {k: toy_transform(k,v) for k,v in variables.items()}
      
      ---------------------------------------------------------------------------
      AttributeError                            Traceback (most recent call last)
      Cell In[72], line 2
            1 #this is the output from from_variables(), its dict that holds mapping of of transformed vars in rasterindex to orig obj (i think?)
      ----> 2 xy_indexes = {k: toy_transform(k,v) for k,v in variables.items()}
      
      Cell In[72], line 2, in <dictcomp>(.0)
            1 #this is the output from from_variables(), its dict that holds mapping of of transformed vars in rasterindex to orig obj (i think?)
      ----> 2 xy_indexes = {k: toy_transform(k,v) for k,v in variables.items()}
      
      Cell In[71], line 6, in toy_transform(name, variable)
            4 def toy_transform(name,variable):#, options=None):
            5     if name == 'x':
      ----> 6         key = variable[name].keys()
            7         #return variable * 2
            8         return variable[name][key]
      
      File ~/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/common.py:285, in AttrAccessMixin.__getattr__(self, name)
          283         with suppress(KeyError):
          284             return source[name]
      --> 285 raise AttributeError(
          286     f"{type(self).__name__!r} object has no attribute {name!r}"
          287 )
      
      AttributeError: 'DataArray' object has no attribute 'keys'
      1
      2
      #create a raster index obj
      index = RasterIndex.from_variables(variables)
      
      1
      2
      # this is the object that's failing when i try to pass RasterIndex to set_xindex
      type(index._xy_indexes.values)
      
      builtin_function_or_method
      1
      2
      3
      4
      5
      6
      #index.create_variables(variables)
      
      #this is index.create_variables()  https://github.com/pydata/xarray/blob/main/xarray/core/indexes.py#L163-L194
      
      #if variables is not None:
      #    print(dict(**variables))
      
      1
      index._xy_indexes['transformed_x']
      
      <xarray.DataArray 'x' (x: 50)> Size: 400B
      array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
             34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66,
             68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98])
      Coordinates:
        * x        (x) int64 400B 0 1 2 3 4 5 6 7 8 9 ... 41 42 43 44 45 46 47 48 49
      xarray.DataArray
      'x'
      • x: 50
      • 0 2 4 6 8 10 12 14 16 18 20 22 ... 76 78 80 82 84 86 88 90 92 94 96 98
        array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
               34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66,
               68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98])
        • x
          (x)
          int64
          0 1 2 3 4 5 6 ... 44 45 46 47 48 49
          array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
                 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
                 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])
        • x
          PandasIndex
          PandasIndex(Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
                 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
                 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
                dtype='int64', name='x'))
      1
      r_index = RasterIndex.from_variables(variables)
      

      Stuck on:¶

      _xy_indexes.values() returns a xr.DataArray object for each dict item.
      RasterIndex's create_variables() method seems to want to run xarray's create_variables() on index which is a xr.DataArray obj not an index obj, so doesn't have that method.

      • xr method that seems to be being applied to the wrong obj in my code:
        File ~/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/dataset.py:5102, in Dataset.set_xindex(self, coord_names, index_cls, **options) 5098 coord_vars = {name: self._variables[name] for name in coord_names} 5100 index = index_cls.from_variables(coord_vars, options=options) -> 5102 new_coord_vars = index.create_variables(coord_vars) 5104 # special case for setting a pandas multi-index from level coordinates 5105 # TODO: remove it once we depreciate pandas multi-index dimension (tuple 5106 # elements) coordinate 5107 if isinstance(index, PandasMultiIndex):

      Cell In[197], line 37, in RasterIndex.create_variables(self, variables) 35 for index in self._xy_indexes.values(): 36 print(type(index)) ---> 37 idx_variables.update(index.create_variables(variables)) 38 print(type(idx_variables)) 39 return idx_variables

      AttributeError: 'Variable' object has no attribute 'create_variables'

      1
      print(xr.__file__)
      
      /home/emmamarshall/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/__init__.py
      
      1
      2
      type(r_index._xy_indexes.values())
      r_index._xy_indexes['transformed_x'].indexes
      
      Indexes:
          x        Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
             18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
             36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
            dtype='int64', name='x')
      1
      2
      3
      for index in r_index._xy_indexes.values():
          print(type(index))
          a = index.create_variables(variables)
      
      <class 'xarray.core.dataarray.DataArray'>
      
      ---------------------------------------------------------------------------
      AttributeError                            Traceback (most recent call last)
      Cell In[42], line 3
            1 for index in r_index._xy_indexes.values():
            2     print(type(index))
      ----> 3     a = index.create_variables(variables)
      
      File ~/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/common.py:285, in AttrAccessMixin.__getattr__(self, name)
          283         with suppress(KeyError):
          284             return source[name]
      --> 285 raise AttributeError(
          286     f"{type(self).__name__!r} object has no attribute {name!r}"
          287 )
      
      AttributeError: 'DataArray' object has no attribute 'create_variables'
      1
      2
      #drop indexes from da obj
      da = da.drop_indexes(['x','y'])
      
      1
      2
      #try to replace w RasterIndex
      da.set_xindex(['x','y'], RasterIndex)
      
      <class 'dict'>
      <class 'xarray.core.variable.Variable'>
      <class 'tuple'>
      
      ---------------------------------------------------------------------------
      AttributeError                            Traceback (most recent call last)
      Cell In[20], line 2
            1 #try to replace w RasterIndex
      ----> 2 da.set_xindex(['x','y'], RasterIndex)
      
      File ~/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/dataarray.py:2785, in DataArray.set_xindex(self, coord_names, index_cls, **options)
         2759 def set_xindex(
         2760     self,
         2761     coord_names: str | Sequence[Hashable],
         2762     index_cls: type[Index] | None = None,
         2763     **options,
         2764 ) -> Self:
         2765     """Set a new, Xarray-compatible index from one or more existing
         2766     coordinate(s).
         2767 
         (...)
         2783 
         2784     """
      -> 2785     ds = self._to_temp_dataset().set_xindex(coord_names, index_cls, **options)
         2786     return self._from_temp_dataset(ds)
      
      File ~/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/dataset.py:5102, in Dataset.set_xindex(self, coord_names, index_cls, **options)
         5098 coord_vars = {name: self._variables[name] for name in coord_names}
         5100 index = index_cls.from_variables(coord_vars, options=options)
      -> 5102 new_coord_vars = index.create_variables(coord_vars)
         5104 # special case for setting a pandas multi-index from level coordinates
         5105 # TODO: remove it once we depreciate pandas multi-index dimension (tuple
         5106 # elements) coordinate
         5107 if isinstance(index, PandasMultiIndex):
      
      Cell In[6], line 37, in RasterIndex.create_variables(self, variables)
           35 for index in self._xy_indexes.items():
           36     print(type(index))
      ---> 37     idx_variables.update(index.create_variables(variables))
           38     print(type(idx_variables))
           39     return idx_variables
      
      AttributeError: 'tuple' object has no attribute 'create_variables'
      1
       
      
      1
       
      
      1
      %debug
      
      > /tmp/ipykernel_339560/2675453310.py(37)create_variables()
           35         for index in self._xy_indexes.values():
           36             print(type(index))
      ---> 37             idx_variables.update(index.create_variables(variables))
           38             print(type(idx_variables))
           39             return idx_variables
      
      
      > /home/emmamarshall/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/dataset.py(5102)set_xindex()
         5100         index = index_cls.from_variables(coord_vars, options=options)
         5101 
      -> 5102         new_coord_vars = index.create_variables(coord_vars)
         5103 
         5104         # special case for setting a pandas multi-index from level coordinates
      
      
      > /home/emmamarshall/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/dataarray.py(2785)set_xindex()
         2783 
         2784         """
      -> 2785         ds = self._to_temp_dataset().set_xindex(coord_names, index_cls, **options)
         2786         return self._from_temp_dataset(ds)
         2787 
      
      
      > /tmp/ipykernel_339560/410474929.py(2)<module>()
            1 #try to replace w RasterIndex
      ----> 2 da.set_xindex(['x','y'], RasterIndex)
      
      
      <xarray.DataArray (y: 100, x: 50)> Size: 40kB
      array([[0.10837434, 0.23823874, 0.6306959 , ..., 0.54295881, 0.62047778,
              0.58753719],
             [0.09471543, 0.91438352, 0.69484488, ..., 0.28760125, 0.8415709 ,
              0.16374968],
             [0.40204161, 0.39558747, 0.64918775, ..., 0.74735962, 0.1494628 ,
              0.87625343],
             ...,
             [0.0132363 , 0.31026525, 0.65222139, ..., 0.67515085, 0.54884137,
              0.62870549],
             [0.48292731, 0.48887484, 0.68113517, ..., 0.03204292, 0.83639248,
              0.95624243],
             [0.78715716, 0.76659419, 0.64572525, ..., 0.05881856, 0.09321289,
              0.14708312]])
      Coordinates:
          x        (x) int64 400B 0 1 2 3 4 5 6 7 8 9 ... 41 42 43 44 45 46 47 48 49
          y        (y) int64 800B 0 1 2 3 4 5 6 7 8 9 ... 91 92 93 94 95 96 97 98 99
      
      *** all frames above hidden, use `skip_hidden False` to get get into those.
      
      > /home/emmamarshall/miniconda/envs/arraylake/lib/python3.11/site-packages/IPython/core/interactiveshell.py(3577)run_code()
         3575                     await eval(code_obj, self.user_global_ns, self.user_ns)
         3576                 else:
      -> 3577                     exec(code_obj, self.user_global_ns, self.user_ns)
         3578             finally:
         3579                 # Reset our crash handler in place
      
      
      *** Oldest frame
      
      1
      2
      for k,v in variables.items():
          print(v.name)
      
      x
      y
      
      1
      2
      3
      4
      5
      6
      #now that we've created the custom index, implement it
      da = xr.DataArray(
          np.random.uniform(size=(100,50)),
          coords = {'x': ('x',np.arange(50)), 'y':('y', np.arange(100))},
          dims = ('y','x'),
      )
      
      1
      2
      3
      #xr creates default indexes for x, y coordinates
      #explicitly drop those
      da = da.drop_indexes(['x','y'])
      
      1
      xr.__version__
      
      '2024.5.0'
      1
      2
      3
      #changed RasterIndex since this but it first produced a notimplemented error -- nt sure why it isn't happening anymore
      # Build a RasterIndex from the 'x' and 'y' coordinates
      da_raster = da.set_xindex(["x", "y"], RasterIndex)
      
      ---------------------------------------------------------------------------
      NotImplementedError                       Traceback (most recent call last)
      Cell In[219], line 2
            1 # Build a RasterIndex from the 'x' and 'y' coordinates
      ----> 2 da_raster = da.set_xindex(["x", "y"], RasterIndex)
      
      File ~/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/dataarray.py:2785, in DataArray.set_xindex(self, coord_names, index_cls, **options)
         2759 def set_xindex(
         2760     self,
         2761     coord_names: str | Sequence[Hashable],
         2762     index_cls: type[Index] | None = None,
         2763     **options,
         2764 ) -> Self:
         2765     """Set a new, Xarray-compatible index from one or more existing
         2766     coordinate(s).
         2767 
         (...)
         2783 
         2784     """
      -> 2785     ds = self._to_temp_dataset().set_xindex(coord_names, index_cls, **options)
         2786     return self._from_temp_dataset(ds)
      
      File ~/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/dataset.py:5100, in Dataset.set_xindex(self, coord_names, index_cls, **options)
         5094     raise ValueError(
         5095         f"those coordinates already have an index: {indexed_coords}"
         5096     )
         5098 coord_vars = {name: self._variables[name] for name in coord_names}
      -> 5100 index = index_cls.from_variables(coord_vars, options=options)
         5102 new_coord_vars = index.create_variables(coord_vars)
         5104 # special case for setting a pandas multi-index from level coordinates
         5105 # TODO: remove it once we depreciate pandas multi-index dimension (tuple
         5106 # elements) coordinate
      
      File ~/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/indexes.py:87, in Index.from_variables(cls, variables, options)
           61 @classmethod
           62 def from_variables(
           63     cls,
         (...)
           66     options: Mapping[str, Any],
           67 ) -> Self:
           68     """Create a new index object from one or more coordinate variables.
           69 
           70     This factory method must be implemented in all subclasses of Index.
         (...)
           85         A new Index object.
           86     """
      ---> 87     raise NotImplementedError()
      
      NotImplementedError: 
      1
      %debug
      
      > /home/emmamarshall/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/indexes.py(87)from_variables()
           85             A new Index object.
           86         """
      ---> 87         raise NotImplementedError()
           88 
           89     @classmethod
      
      
      > /home/emmamarshall/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/dataset.py(5100)set_xindex()
         5098         coord_vars = {name: self._variables[name] for name in coord_names}
         5099 
      -> 5100         index = index_cls.from_variables(coord_vars, options=options)
         5101 
         5102         new_coord_vars = index.create_variables(coord_vars)
      
      
      <class '__main__.RasterIndex'>
      
      > /home/emmamarshall/miniconda/envs/arraylake/lib/python3.11/site-packages/xarray/core/dataarray.py(2785)set_xindex()
         2783 
         2784         """
      -> 2785         ds = self._to_temp_dataset().set_xindex(coord_names, index_cls, **options)
         2786         return self._from_temp_dataset(ds)
         2787 
      
      
      1
       
      

      Ignore below this just random stuff¶

      1
      2
      3
      4
      5
      ds_no2 = xr.open_dataset(
          repo.store,
          group='atmos_no2_density/2020_05_31',
          engine='zarr',
      )
      
      1
      2
      ds_no2_0 = ds_no2['0']
      ds_no2_0.attrs = ds_no2.attrs
      

      Take original resolution view from overview object. Want to go from X to Lon

      IFD = image file directory

      1
      ds_no2_0.attrs.keys()
      
      dict_keys(['multiscales', 'Description', 'EndOrbit', 'EndUTC', 'GranuleDay', 'GranuleDayOfYear', 'GranuleMonth', 'GranuleYear', 'InputPointer', 'InstrumentName', 'Offset', 'OrbitCount', 'OrbitNumber', 'OVR_RESAMPLING_ALG', 'Period', 'PGE', 'PGEVersion', 'ProcessLevel', 'Resolution', 'ScaleFactor', 'StartOrbit', 'StartUTC', 'TAI93At0zOfGranule', 'Title', 'Units', 'DESCRIPTION', 'KeyDirectoryVersion', 'KeyRevision', 'KeyRevisionMinor', 'GTModelTypeGeoKey', 'GTRasterTypeGeoKey', 'GeographicTypeGeoKey', 'GeogCitationGeoKey', 'GeogAngularUnitsGeoKey', 'GeogSemiMajorAxisGeoKey', 'GeogInvFlatteningGeoKey', 'ModelPixelScale', 'ModelTiepoint'])
      1
      ds_no2_0
      
      <xarray.DataArray '0' (Y: 720, X: 1440)> Size: 4MB
      [1036800 values with dtype=float32]
      Dimensions without coordinates: Y, X
      Attributes: (12/38)
          multiscales:              [{'datasets': [{'path': '0'}, {'path': '1'}, {'...
          Description:              Field=ColumnAmountNO2Trop,StdField=ColumnAmount...
          EndOrbit:                 18922
          EndUTC:                   2020-06-01T00:00:00.000000Z
          GranuleDay:               31
          GranuleDayOfYear:         152
          ...                       ...
          GeogCitationGeoKey:       WGS 84
          GeogAngularUnitsGeoKey:   9102
          GeogSemiMajorAxisGeoKey:  6378137.0
          GeogInvFlatteningGeoKey:  298.257223563
          ModelPixelScale:          [0.25, 0.25, 0.0]
          ModelTiepoint:            [0.0, 0.0, 0.0, -180.0, 90.0, 0.0]
      xarray.DataArray
      '0'
      • Y: 720
      • X: 1440
      • ...
        [1036800 values with dtype=float32]
          • multiscales :
            [{'datasets': [{'path': '0'}, {'path': '1'}, {'path': '2'}], 'metadata': {}, 'name': '', 'version': '0.1'}]
            Description :
            Field=ColumnAmountNO2Trop,StdField=ColumnAmountNO2TropStd,SolarZenithAngle=[0:85],CloudFraction=[0:300],VcdQualityFlags=~19,XTrackQualityFlags=~[1:254],TerrainReflectivity=[0:1000]
            EndOrbit :
            18922
            EndUTC :
            2020-06-01T00:00:00.000000Z
            GranuleDay :
            31
            GranuleDayOfYear :
            152
            GranuleMonth :
            5
            GranuleYear :
            2020
            InputPointer :
            OMI-Aura_L2-OMNO2_2020m0530t2358-o84444_v003-2020m0601t121734.he5,OMI-Aura_L2-OMNO2_2020m0531t0137-o84445_v003-2020m0601t122316.he5,OMI-Aura_L2-OMNO2_2020m0531t0316-o84446_v003-2020m0601t122238.he5,OMI-Aura_L2-OMNO2_2020m0531t0455-o84447_v003-2020m0601t122331.he5,OMI-Aura_L2-OMNO2_2020m0531t0634-o84448_v003-2020m0601t130650.he5,OMI-Aura_L2-OMNO2_2020m0531t0813-o84449_v003-2020m0601t132308.he5,OMI-Aura_L2-OMNO2_2020m0531t0951-o84450_v003-2020m0601t132253.he5,OMI-Aura_L2-OMNO2_2020m0531t1130-o84451_v003-2020m0601t145810.he5,OMI-Aura_L2-OMNO2_2020m0531t1309-o84452_v003-2020m0601t150337.he5,OMI-Aura_L2-OMNO2_2020m0531t1448-o84453_v003-2020m0601t150345.he5,OMI-Aura_L2-OMNO2_2020m0531t1627-o84454_v003-2020m0601t150342.he5,OMI-Aura_L2-OMNO2_2020m0531t1806-o84455_v003-2020m0601t212010.he5,OMI-Aura_L2-OMNO2_2020m0531t1945-o84456_v003-2020m0601t212011.he5,OMI-Aura_L2-OMNO2_2020m0531t2124-o84457_v003-2020m0601t212527.he5,OMI-Aura_L2-OMNO2_2020m0531t2303-o84458_v003-2020m0601t212536.he5
            InstrumentName :
            OMI
            Offset :
            0.0
            OrbitCount :
            15
            OrbitNumber :
            [84444 84445 84446 84447 84448 84449 84450 84451 84452 84453 84454 84455 84456 84457 84458]
            OVR_RESAMPLING_ALG :
            NEAREST
            Period :
            Daily
            PGE :
            OMNO2d
            PGEVersion :
            &quot;1.0.5.03&quot;
            ProcessLevel :
            3d
            Resolution :
            0.250 degrees
            ScaleFactor :
            1.0
            StartOrbit :
            18908
            StartUTC :
            2020.0
            TAI93At0zOfGranule :
            864950410.0
            Title :
            NO2 tropospheric column density, screened for CloudFraction &lt; 30%
            Units :
            molec/cm2
            DESCRIPTION :
            ColumnAmountNO2TropCloudScreened
            KeyDirectoryVersion :
            1
            KeyRevision :
            1
            KeyRevisionMinor :
            0
            GTModelTypeGeoKey :
            2
            GTRasterTypeGeoKey :
            1
            GeographicTypeGeoKey :
            4326
            GeogCitationGeoKey :
            WGS 84
            GeogAngularUnitsGeoKey :
            9102
            GeogSemiMajorAxisGeoKey :
            6378137.0
            GeogInvFlatteningGeoKey :
            298.257223563
            ModelPixelScale :
            [0.25, 0.25, 0.0]
            ModelTiepoint :
            [0.0, 0.0, 0.0, -180.0, 90.0, 0.0]
          1
          ds_no2_0.X
          
          <xarray.DataArray 'X' (X: 1440)> Size: 12kB
          array([   0,    1,    2, ..., 1437, 1438, 1439])
          Dimensions without coordinates: X
          xarray.DataArray
          'X'
          • X: 1440
          • 0 1 2 3 4 5 6 7 8 9 ... 1431 1432 1433 1434 1435 1436 1437 1438 1439
            array([   0,    1,    2, ..., 1437, 1438, 1439])
              1
               
              
              1
               
              

              Playing around w itslive¶

               1
               2
               3
               4
               5
               6
               7
               8
               9
              10
              11
              12
              13
              14
              15
              16
              def get_bounds_polygon(input_xr):
                  
                  xmin = input_xr.coords['x'].data.min()
                  xmax = input_xr.coords['x'].data.max()
              
                  ymin = input_xr.coords['y'].data.min()
                  ymax = input_xr.coords['y'].data.max()
              
                  pts_ls = [(xmin, ymin), (xmax, ymin),(xmax, ymax), (xmin, ymax), (xmin, ymin)]
              
                  crs = f"epsg:{input_xr.mapping.spatial_epsg}"
              
                  polygon_geom = Polygon(pts_ls)
                  polygon = gpd.GeoDataFrame(index=[0], crs=crs, geometry=[polygon_geom]) 
                  
                  return polygon
              
              1
              2
              client = al.Client()
              repo = client.get_or_create_repo('earthmover/emma_playground')
              
              1
              2
              3
              4
              5
              ds_its = xr.open_dataset(
                  repo.store,
                  group='itslive/vvirtual',
                  engine='zarr')
              ds_its.mapping.spatial_epsg
              
              1
              its_vec = get_bounds_polygon(ds_its)
              
              1
              its_vec_ll = its_vec.to_crs('EPSG:4326')
              
              1
              2
              3
              4
              5
              6
              7
              coords = ast.literal_eval(ds_its.attrs['geo_polygon'])
              points = [Point(c) for c in coords]
              gdf = gpd.GeoDataFrame(geometry=points)
              
              fig, ax = plt.subplots()
              its_vec_ll.plot(facecolor='None', edgecolor='blue',ax=ax)
              gdf.plot(ax=ax)
              
              1
              2
              3
              4
              5
              6
              7
              8
              #Can also view projected to flat :
              coords_prj = ast.literal_eval(ds_its.attrs['proj_polygon'])
              points_prj = [Point(c) for c in coords_prj]
              gdf_prj = gpd.GeoDataFrame(geometry=points_prj)
              
              fig, ax = plt.subplots()
              its_vec.plot(ax=ax,alpha=0.5)
              gdf_prj.plot(ax=ax)
              

              GOAL : query this dataset w/ .sel() using lat/lon points instaed of x,y

              • in this case, x and y exist, but should be able to do this without explicit x,y coordinates and with only a transform and input params.

              crs-related ITS_LIVE attrs (at xr.Dataset level):¶

              -- geo_polygon : string containing latlon point coordinates of footprint
              -- latitude : cenlat ?
              -- longitude : cenlon ?
              -- proj_polygon: list of projected coords of footprint
              -- projection : epsg code

              Geographic metadata (stored in attrs of mapping data_var)¶

              -- GeoTransform
              -- crs_wkt
              -- false_easting
              -- false_northing
              -- grid_mapping_name
              -- inverse_flattening
              -- latitude_of_orig
              -- latitude_of_projection_origin
              -- proj4text
              -- scale_factor_at_projection_origin
              -- semi_major_axis
              -- spatial_epsg
              -- spatial_ref (== crs_wkt)
              -- straight_vertical_longitude_from_pole

              CF grid mapping info is in ds_its.mapping.attrs

              some differences in how its stored though...

              • except, 'latitude of origin' instead of 'longitude of prime meridian'
              1
              2
              3
              4
              5
              ds_its = ds_its.isel(mid_date = 350)
              ds_its = ds_its.drop('mid_date')
              print(ds_its.mapping.attrs['crs_wkt'])
              print(ds_its.mapping.spatial_ref)
              ds_its['mapping'].attrs['crs_wkt'] == ds_its.mapping.spatial_ref
              
              1
              2
              source_crs = CRS('EPSG:3413')
              target_crs = CRS('EPSG:4326')
              
              1
              2
              3
              4
              polar_to_latlon = pyproj.Transformer.from_crs(source_crs, target_crs)
              latlon_to_polar = pyproj.Transformer.from_crs(target_crs, source_crs)
              
              x,y = latlon_to_polar.transform(60.7,-147)
              
              1
              2
              3
              4
              X, Y = np.meshgrid(ds_its.x, ds_its.y)
              lat, lon = polar_to_latlon.transform(X, Y)
              
              plt.pcolormesh(lon);
              
              1
               
              

              Goal:¶

              Query something like ds_its.sel(lon=-147) and get data back without ds_its needing to have a fully populated x-dimension

              • need a transform that will take a lon value, return an x value
                • use attrs info to build transform

              Deepak CRSIndex example¶

                1
                2
                3
                4
                5
                6
                7
                8
                9
               10
               11
               12
               13
               14
               15
               16
               17
               18
               19
               20
               21
               22
               23
               24
               25
               26
               27
               28
               29
               30
               31
               32
               33
               34
               35
               36
               37
               38
               39
               40
               41
               42
               43
               44
               45
               46
               47
               48
               49
               50
               51
               52
               53
               54
               55
               56
               57
               58
               59
               60
               61
               62
               63
               64
               65
               66
               67
               68
               69
               70
               71
               72
               73
               74
               75
               76
               77
               78
               79
               80
               81
               82
               83
               84
               85
               86
               87
               88
               89
               90
               91
               92
               93
               94
               95
               96
               97
               98
               99
              100
              101
              102
              103
              104
              105
              106
              107
              108
              109
              110
              111
              112
              113
              114
              115
              116
              117
              118
              119
              120
              121
              122
              123
              124
              125
              126
              from xarray.core.indexes import Index, PandasIndex, get_indexer_nd
              from xarray.core.indexing import merge_sel_results
              
              
              def create_spatial_ref(crs_wkt):
                  """Because I don't know what I'm doing"""
                  return xr.Variable((), 0, attrs={"crs_wkt": crs_wkt})
              
              
              class CRSIndex(Index):
                  # based off Benoit's RasterIndex in
                  # https://hackmd.io/Zxw_zCa7Rbynx_iJu6Y3LA?view
              
                  def __init__(self, variables):
                      # TODO: hardcoded variable names
              
                      # assert len(xy_indexes) == 2
                      assert "x" in variables
                      assert "y" in variables
                      assert "spatial_ref" in variables
              
                      # TODO: Instead do whatever the rio accessor is doing.
                      # rioxarray.open_dataset is doing
                      spatial_ref = variables.pop("spatial_ref")
                      self._crs = rio.crs.CRS.from_wkt(spatial_ref.attrs["crs_wkt"])
              
                      # must have two distinct dimensions
                      # Assumes x, y for index are never scalar. Is that correct?
                      dim = [idx.dim for key, idx in variables.items()]
                      assert dim[0] != dim[1]
              
                      self._indexes = variables
              
                  # TODO: what goes in options?
                  @classmethod
                  def from_variables(cls, variables, options):
                      # assert len(variables) == 2
              
                      xy_indexes = {
                          k: PandasIndex.from_variables({k: v}, options=options)
                          for k, v in variables.items()
                          if k in ["x", "y"]
                      }
                      xy_indexes["spatial_ref"] = variables["spatial_ref"]
              
                      return cls(xy_indexes)
              
                  # TODO: variables=None?
                  # set_xindex tries to pass variables; this seems like a bug
                  def create_variables(self, variables=None):
                      idx_variables = {}
              
                      for index in self._indexes.values():
                          idx_variables.update(index.create_variables(variables))
              
                      idx_variables["spatial_ref"] = create_spatial_ref(self.as_wkt)
                      return idx_variables
              
                  # TODO: see notes about IndexSelResult
                  #    The latter is a small class that stores positional indexers (indices)
                  #    and that could also store new variables, new indexes,
                  #    names of variables or indexes to drop,
                  #    names of dimensions to rename, etc.
                  def sel(self, labels, **kwargs):
                      # sel needs to only handle keys in labels
                      # since it delegates to isel.
                      # we handle all entries in ._indexes there
                      results = []
                      for k, index in self._indexes.items():
                          if k in labels:
                              # defer to pandas type indexing.
                              # This is where we would implement KDTree and friends
                              results.append(index.sel({k: labels[k]}, **kwargs))
                      return merge_sel_results(results)
              
                  def isel(self, indexers):
                      # TODO: check dim names in indexes
                      results = {}
                      for k, index in self._indexes.items():
                          if k in indexers:
                              # again possible KDTree / friends here.
                              results[k] = index.isel({k: indexers[k]})
                          else:
                              results[k] = index
                      # AGAIN!
                      results["spatial_ref"] = create_spatial_ref(self.as_wkt)
                      return type(self)(results)
              
                  def __repr__(self):
                      string = f"CRSIndex: {self._crs.to_string()}"
                      return string
              
                  def equals(self, other):
                      result = self._crs is other._crs or (
                          self._crs == other._crs
                          and self._indexes["x"].equals(other._indexes["x"])
                          and self._indexes["y"].equals(other._indexes["y"])
                      )
                      return result
              
                  def join(self, other, how="inner"):
                      if self._crs != other._crs:
                          raise ValueError(
                              "Cannot align or join objects with different CRS. "
                              f"Received {self._crs.name!r} and {other._crs.name!r}"
                          )
              
                      new_indexes = {k: v.join(other._indexes[k], how=how) for k, v in self._indexes.items()}
                      # create new spatial_ref here.
                      new_indexes["spatial_ref"] = create_spatial_ref(self.as_wkt)
                      return type(self)(new_indexes)
              
                  def reindex_like(self, other, method=None, tolerance=None):
                      # TODO: different method, tolerance for x, y?
                      return {
                          k: get_indexer_nd(self._indexes[k].index, other._indexes[k].index, method, tolerance)
                          for k in self._indexes.keys()
                      }
              
                  @property
                  def as_crs(self):
                      return self._crs
              
                  @property
                  def as_wkt(self):
                      return self._crs.to_wkt()
              
               1
               2
               3
               4
               5
               6
               7
               8
               9
              10
              index = CRSIndex.from_variables(
                  {
                      'x': ds_its.cf['projection_x_coordinate'].variable,
                      'y': ds_its.cf['projection_y_coordinate'].variable,
                      'spatial_ref': ds_its['spatial_ref'].variable,
                  },
                  options= {},
              )
              index.__dict__.keys()
              index._indexes
              
               1
               2
               3
               4
               5
               6
               7
               8
               9
              10
              11
              new_ds = ds_its.drop_indexes(['x','y'])
              names = ds_its.cf.standard_names
              new_ds = new_ds.set_coords('spatial_ref')
              new_ds = new_ds.set_xindex(
                  (
                      *names['projection_x_coordinate'],
                      *names['projection_y_coordinate'],
                      'spatial_ref',
                  ),
                  CRSIndex, 
              )
              

              (spiraling a little) my own bad approach making a wrapper class for transform¶

              what am i doign

               1
               2
               3
               4
               5
               6
               7
               8
               9
              10
              11
              12
              class special_dataset():
                  
                  def __init__(self, ds, transform):
                      self.ds = ds
                      self.transform = transform
                  
                  def special_sel(self, lat=None, lon=None, method=None):
                      #transform coords
                      # call xr . sel but pass transformed coords
                      x,y = self.transform.transform(lat,lon)#, method)
                      #x,y = latlon_to_polar.transform(60.7,-147)
                      return self.ds.sel(x=x, y=y, method=method)
              
              1
              ab = special_dataset(ds_its, latlon_to_polar)
              
              1
              ab.special_sel(60.7, -147, 'nearest')
              
              1
              2
              print(ab.special_sel(60.7, -147, 'nearest').x.data)
              print(ab.special_sel(60.7, -147, 'nearest').y.data)
              
              1
              new_ds.sel(x=[-3171373.310018429, -3178373.310018429], method='nearest')
              
               1
               2
               3
               4
               5
               6
               7
               8
               9
              10
              11
              12
              13
              14
              15
              16
              #transformer
              def quadratic_sol(a,b,c):
                  #make sure real numbers
                  if not all(isinstance(i, (int, float)) for i in [a,b,c]):
                      raise ValueError('inputs must be real numbers')
                      
                  if a == 0:
                      raise ValueError('A cannot equal zero')
                      
                  d = (b**2) - (4*a*c)
                  
                  x1 = (-b + np.emath.sqrt(d))/ (2*a)
                  x2 = (-b - np.emath.sqrt(d)) / (2*a)
                  
                  return [x1,x2]
              # x = [-b +- np.sqrt(b^2-4ac)]/2a
              
              1
              quadratic_sol(1,10,-24)