vdc = {'counties':
[<POLYGON ((-95.343 48.547, -95.341  48.715, -95.095 48.912, ...>,
<POLYGON ((-118.851 47.95, -118.847 48.479, -118.87 48.647, ...>,
<POLYGON ((-117.438 48.044,-117.541 47.79,  -117.607 47.798,...>,

1

1

size: 6.257914529454865 TiB

1

1

Repo card metadata block was not found. Setting CardData to empty.

1

1

1

<xarray.Dataset> Size: 114kB
Dimensions:   (geometry: 2844)
Coordinates:
  * geometry  (geometry) object 23kB POINT (19.8188896 41.3275) ... POINT (30...
Data variables:
    city      (geometry) object 23kB 'Tirana' 'Durres' ... 'Shepetivka' 'Bucha'
    country   (geometry) object 23kB 'Albania' 'Albania' ... 'Ukraine' 'Ukraine'
    lat       (geometry) float64 23kB 41.33 41.32 41.11 ... 50.75 50.18 50.57
    lon       (geometry) float64 23kB 19.82 19.44 20.08 ... 33.47 27.07 30.22

array([<POINT (19.819 41.328)>, <POINT (19.441 41.323)>,
       <POINT (20.082 41.112)>, ..., <POINT (33.467 50.75)>,
       <POINT (27.067 50.183)>, <POINT (30.217 50.567)>], dtype=object)

array(['Tirana', 'Durres', 'Elbasan', ..., 'Romny', 'Shepetivka', 'Bucha'],
      dtype=object)

array(['Albania', 'Albania', 'Albania', ..., 'Ukraine', 'Ukraine',
       'Ukraine'], dtype=object)

array([41.3275   , 41.3230556, 41.1125   , ..., 50.75     , 50.1833333,
       50.5666667])

array([19.8188896, 19.4413891, 20.082222 , ..., 33.4666672, 27.0666676,
       30.2166672])

PandasIndex(Index([   POINT (19.8188896 41.3275), POINT (19.4413891 41.3230556),
           POINT (20.082222 41.1125), POINT (19.4897213 40.4666667),
       POINT (19.5125771 42.0682829), POINT (19.5666676 40.7166667),
       POINT (20.7808342 40.6186111), POINT (19.9522228 40.7058333),
       POINT (19.7049999 40.9419444), POINT (19.5569439 41.1855556),
       ...
       POINT (38.6616669 48.5069444), POINT (33.6454582 49.0097264),
       POINT (33.5077782 48.3436111),  POINT (29.916666 50.0833333),
       POINT (33.3802795 46.7508333),  POINT (34.894165 50.3086111),
           POINT (39.7400017 48.295),      POINT (33.4666672 50.75),
       POINT (27.0666676 50.1833333), POINT (30.2166672 50.5666667)],
      dtype='object', name='geometry', length=2844))

1

1

Indexes:
    geometry  GeometryIndex (crs=EPSG:4326)

1

<xarray.Dataset> Size: 61GB
Dimensions:              (time: 7304, latitude: 721, longitude: 1440)
Coordinates:
  * latitude             (latitude) float32 3kB 90.0 89.75 89.5 ... -89.75 -90.0
    level                int64 8B 94256640801872
  * longitude            (longitude) float32 6kB 0.0 0.25 0.5 ... 359.5 359.8
  * time                 (time) datetime64[ns] 58kB 2017-01-01 ... 2021-12-31...
Data variables:
    2m_temperature       (time, latitude, longitude) float32 30GB dask.array<chunksize=(28, 721, 1440), meta=np.ndarray>
    u_component_of_wind  (time, latitude, longitude) float32 30GB dask.array<chunksize=(28, 721, 1440), meta=np.ndarray>

array([ 90.  ,  89.75,  89.5 , ..., -89.5 , -89.75, -90.  ], dtype=float32)

array(94256640801872)

array([0.0000e+00, 2.5000e-01, 5.0000e-01, ..., 3.5925e+02, 3.5950e+02,
       3.5975e+02], dtype=float32)

array(['2017-01-01T00:00:00.000000000', '2017-01-01T06:00:00.000000000',
       '2017-01-01T12:00:00.000000000', ..., '2021-12-31T06:00:00.000000000',
       '2021-12-31T12:00:00.000000000', '2021-12-31T18:00:00.000000000'],
      dtype='datetime64[ns]')

PandasIndex(Index([  90.0,  89.75,   89.5,  89.25,   89.0,  88.75,   88.5,  88.25,   88.0,
        87.75,
       ...
       -87.75,  -88.0, -88.25,  -88.5, -88.75,  -89.0, -89.25,  -89.5, -89.75,
        -90.0],
      dtype='float32', name='latitude', length=721))

PandasIndex(Index([   0.0,   0.25,    0.5,   0.75,    1.0,   1.25,    1.5,   1.75,    2.0,
         2.25,
       ...
        357.5, 357.75,  358.0, 358.25,  358.5, 358.75,  359.0, 359.25,  359.5,
       359.75],
      dtype='float32', name='longitude', length=1440))

PandasIndex(DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 06:00:00',
               '2017-01-01 12:00:00', '2017-01-01 18:00:00',
               '2017-01-02 00:00:00', '2017-01-02 06:00:00',
               '2017-01-02 12:00:00', '2017-01-02 18:00:00',
               '2017-01-03 00:00:00', '2017-01-03 06:00:00',
               ...
               '2021-12-29 12:00:00', '2021-12-29 18:00:00',
               '2021-12-30 00:00:00', '2021-12-30 06:00:00',
               '2021-12-30 12:00:00', '2021-12-30 18:00:00',
               '2021-12-31 00:00:00', '2021-12-31 06:00:00',
               '2021-12-31 12:00:00', '2021-12-31 18:00:00'],
              dtype='datetime64[ns]', name='time', length=7304, freq=None))

1

<xarray.Dataset> Size: 114kB
Dimensions:   (geometry: 2844)
Coordinates:
  * geometry  (geometry) object 23kB POINT (19.8188896 41.3275) ... POINT (30...
Data variables:
    city      (geometry) object 23kB 'Tirana' 'Durres' ... 'Shepetivka' 'Bucha'
    country   (geometry) object 23kB 'Albania' 'Albania' ... 'Ukraine' 'Ukraine'
    lat       (geometry) float64 23kB 41.33 41.32 41.11 ... 50.75 50.18 50.57
    lon       (geometry) float64 23kB 19.82 19.44 20.08 ... 33.47 27.07 30.22
Indexes:
    geometry  GeometryIndex (crs=EPSG:4326)

array([<POINT (19.819 41.328)>, <POINT (19.441 41.323)>,
       <POINT (20.082 41.112)>, ..., <POINT (33.467 50.75)>,
       <POINT (27.067 50.183)>, <POINT (30.217 50.567)>], dtype=object)

array(['Tirana', 'Durres', 'Elbasan', ..., 'Romny', 'Shepetivka', 'Bucha'],
      dtype=object)

array(['Albania', 'Albania', 'Albania', ..., 'Ukraine', 'Ukraine',
       'Ukraine'], dtype=object)

array([41.3275   , 41.3230556, 41.1125   , ..., 50.75     , 50.1833333,
       50.5666667])

array([19.8188896, 19.4413891, 20.082222 , ..., 33.4666672, 27.0666676,
       30.2166672])

<xvec.index.GeometryIndex object at 0x7f85b8f66210>

1
2

1

size: 0.15484336763620377 GB

1

<xarray.Dataset> Size: 166MB
Dimensions:              (time: 7304, geometry: 2844)
Coordinates:
    level                int64 8B 94256640801872
  * time                 (time) datetime64[ns] 58kB 2017-01-01 ... 2021-12-31...
  * geometry             (geometry) object 23kB POINT (19.8188896 41.3275) .....
Data variables:
    2m_temperature       (time, geometry) float32 83MB dask.array<chunksize=(28, 2844), meta=np.ndarray>
    u_component_of_wind  (time, geometry) float32 83MB dask.array<chunksize=(28, 2844), meta=np.ndarray>
Indexes:
    geometry  GeometryIndex (crs=None)

array(94256640801872)

array(['2017-01-01T00:00:00.000000000', '2017-01-01T06:00:00.000000000',
       '2017-01-01T12:00:00.000000000', ..., '2021-12-31T06:00:00.000000000',
       '2021-12-31T12:00:00.000000000', '2021-12-31T18:00:00.000000000'],
      dtype='datetime64[ns]')

array([<POINT (19.819 41.328)>, <POINT (19.441 41.323)>,
       <POINT (20.082 41.112)>, ..., <POINT (33.467 50.75)>,
       <POINT (27.067 50.183)>, <POINT (30.217 50.567)>], dtype=object)

PandasIndex(DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 06:00:00',
               '2017-01-01 12:00:00', '2017-01-01 18:00:00',
               '2017-01-02 00:00:00', '2017-01-02 06:00:00',
               '2017-01-02 12:00:00', '2017-01-02 18:00:00',
               '2017-01-03 00:00:00', '2017-01-03 06:00:00',
               ...
               '2021-12-29 12:00:00', '2021-12-29 18:00:00',
               '2021-12-30 00:00:00', '2021-12-30 06:00:00',
               '2021-12-30 12:00:00', '2021-12-30 18:00:00',
               '2021-12-31 00:00:00', '2021-12-31 06:00:00',
               '2021-12-31 12:00:00', '2021-12-31 18:00:00'],
              dtype='datetime64[ns]', name='time', length=7304, freq=None))

<xvec.index.GeometryIndex object at 0x7f85b9d71340>

1
2
3

1

1
2

[########################################] | 100% Completed | 15m 58s

1

<xarray.Dataset> Size: 166MB
Dimensions:              (time: 7304, geometry: 2844)
Coordinates:
  * time                 (time) datetime64[ns] 58kB 2017-01-01 ... 2021-12-31...
  * geometry             (geometry) object 23kB POINT (19.8188896 41.3275) .....
Data variables:
    2m_temperature       (time, geometry) float32 83MB 271.9 278.4 ... nan nan
    u_component_of_wind  (time, geometry) float32 83MB 112.1 112.7 ... 41.82
    city                 (geometry) object 23kB 'Tirana' 'Durres' ... 'Bucha'
    country              (geometry) object 23kB 'Albania' ... 'Ukraine'
Indexes:
    geometry  GeometryIndex (crs=EPSG:4326)

array(['2017-01-01T00:00:00.000000000', '2017-01-01T06:00:00.000000000',
       '2017-01-01T12:00:00.000000000', ..., '2021-12-31T06:00:00.000000000',
       '2021-12-31T12:00:00.000000000', '2021-12-31T18:00:00.000000000'],
      dtype='datetime64[ns]')

array([<POINT (19.819 41.328)>, <POINT (19.441 41.323)>,
       <POINT (20.082 41.112)>, ..., <POINT (33.467 50.75)>,
       <POINT (27.067 50.183)>, <POINT (30.217 50.567)>], dtype=object)

array([[271.87958, 278.35577, 269.85394, ..., 269.92792, 271.24445,
        272.31277],
       [270.30252, 277.62662, 267.76663, ..., 271.04095, 271.48956,
        272.74133],
       [281.74112, 282.03555, 281.34186, ..., 272.17865, 274.12875,
        273.7341 ],
       ...,
       [278.5096 , 282.59393, 277.4078 , ..., 267.58942, 274.69095,
        271.959  ],
       [287.78763, 286.99976, 287.64877, ..., 270.57343, 276.1852 ,
        275.06375],
       [      nan,       nan,       nan, ...,       nan,       nan,
              nan]], dtype=float32)

array([[112.14053  , 112.654465 , 110.033875 , ..., 139.53139  ,
        138.74988  , 141.79944  ],
       [109.49022  , 109.24812  , 108.42415  , ..., 113.78     ,
        127.885284 , 129.67764  ],
       [116.612946 , 116.66391  , 114.61247  , ..., 117.72574  ,
        131.63141  , 124.44496  ],
       ...,
       [-24.250381 , -23.992752 , -25.307579 , ...,  12.396057 ,
         -9.374149 ,   0.5137253],
       [-18.93332  , -18.946648 , -18.782288 , ...,  12.840256 ,
          8.216141 ,   8.793602 ],
       [-16.161514 , -15.952744 , -17.574066 , ...,  50.081924 ,
         33.921955 ,  41.82426  ]], dtype=float32)

array(['Tirana', 'Durres', 'Elbasan', ..., 'Romny', 'Shepetivka', 'Bucha'],
      dtype=object)

array(['Albania', 'Albania', 'Albania', ..., 'Ukraine', 'Ukraine',
       'Ukraine'], dtype=object)

PandasIndex(DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 06:00:00',
               '2017-01-01 12:00:00', '2017-01-01 18:00:00',
               '2017-01-02 00:00:00', '2017-01-02 06:00:00',
               '2017-01-02 12:00:00', '2017-01-02 18:00:00',
               '2017-01-03 00:00:00', '2017-01-03 06:00:00',
               ...
               '2021-12-29 12:00:00', '2021-12-29 18:00:00',
               '2021-12-30 00:00:00', '2021-12-30 06:00:00',
               '2021-12-30 12:00:00', '2021-12-30 18:00:00',
               '2021-12-31 00:00:00', '2021-12-31 06:00:00',
               '2021-12-31 12:00:00', '2021-12-31 18:00:00'],
              dtype='datetime64[ns]', name='time', length=7304, freq=None))

<xvec.index.GeometryIndex object at 0x7fe863ce1160>

1
2
3

1

<xarray.Dataset> Size: 166MB
Dimensions:              (time: 7304, geometry: 2844)
Coordinates:
  * time                 (time) datetime64[ns] 58kB 2017-01-01 ... 2021-12-31...
  * geometry             (geometry) object 23kB POINT (19.8188896 41.3275) .....
    city                 (geometry) object 23kB 'Tirana' 'Durres' ... 'Bucha'
    country              (geometry) object 23kB 'Albania' ... 'Ukraine'
Data variables:
    2m_temperature       (time, geometry) float32 83MB 271.9 278.4 ... nan nan
    u_component_of_wind  (time, geometry) float32 83MB 112.1 112.7 ... 41.82
Indexes:
    geometry  GeometryIndex (crs=EPSG:4326)

array(['2017-01-01T00:00:00.000000000', '2017-01-01T06:00:00.000000000',
       '2017-01-01T12:00:00.000000000', ..., '2021-12-31T06:00:00.000000000',
       '2021-12-31T12:00:00.000000000', '2021-12-31T18:00:00.000000000'],
      dtype='datetime64[ns]')

array([<POINT (19.819 41.328)>, <POINT (19.441 41.323)>,
       <POINT (20.082 41.112)>, ..., <POINT (33.467 50.75)>,
       <POINT (27.067 50.183)>, <POINT (30.217 50.567)>], dtype=object)

array(['Tirana', 'Durres', 'Elbasan', ..., 'Romny', 'Shepetivka', 'Bucha'],
      dtype=object)

array(['Albania', 'Albania', 'Albania', ..., 'Ukraine', 'Ukraine',
       'Ukraine'], dtype=object)

array([[271.87958, 278.35577, 269.85394, ..., 269.92792, 271.24445,
        272.31277],
       [270.30252, 277.62662, 267.76663, ..., 271.04095, 271.48956,
        272.74133],
       [281.74112, 282.03555, 281.34186, ..., 272.17865, 274.12875,
        273.7341 ],
       ...,
       [278.5096 , 282.59393, 277.4078 , ..., 267.58942, 274.69095,
        271.959  ],
       [287.78763, 286.99976, 287.64877, ..., 270.57343, 276.1852 ,
        275.06375],
       [      nan,       nan,       nan, ...,       nan,       nan,
              nan]], dtype=float32)

array([[112.14053  , 112.654465 , 110.033875 , ..., 139.53139  ,
        138.74988  , 141.79944  ],
       [109.49022  , 109.24812  , 108.42415  , ..., 113.78     ,
        127.885284 , 129.67764  ],
       [116.612946 , 116.66391  , 114.61247  , ..., 117.72574  ,
        131.63141  , 124.44496  ],
       ...,
       [-24.250381 , -23.992752 , -25.307579 , ...,  12.396057 ,
         -9.374149 ,   0.5137253],
       [-18.93332  , -18.946648 , -18.782288 , ...,  12.840256 ,
          8.216141 ,   8.793602 ],
       [-16.161514 , -15.952744 , -17.574066 , ...,  50.081924 ,
         33.921955 ,  41.82426  ]], dtype=float32)

PandasIndex(DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 06:00:00',
               '2017-01-01 12:00:00', '2017-01-01 18:00:00',
               '2017-01-02 00:00:00', '2017-01-02 06:00:00',
               '2017-01-02 12:00:00', '2017-01-02 18:00:00',
               '2017-01-03 00:00:00', '2017-01-03 06:00:00',
               ...
               '2021-12-29 12:00:00', '2021-12-29 18:00:00',
               '2021-12-30 00:00:00', '2021-12-30 06:00:00',
               '2021-12-30 12:00:00', '2021-12-30 18:00:00',
               '2021-12-31 00:00:00', '2021-12-31 06:00:00',
               '2021-12-31 12:00:00', '2021-12-31 18:00:00'],
              dtype='datetime64[ns]', name='time', length=7304, freq=None))

<xvec.index.GeometryIndex object at 0x7f85bb9bb410>

1

1

<xarray.Dataset> Size: 159kB
Dimensions:              (season: 4, geometry: 2844)
Coordinates:
  * geometry             (geometry) object 23kB POINT (19.8188896 41.3275) .....
    city                 (geometry) object 23kB 'Tirana' 'Durres' ... 'Bucha'
    country              (geometry) object 23kB 'Albania' ... 'Ukraine'
  * season               (season) object 32B 'DJF' 'JJA' 'MAM' 'SON'
Data variables:
    2m_temperature       (season, geometry) float32 46kB 280.6 283.1 ... 282.7
    u_component_of_wind  (season, geometry) float32 46kB 55.58 55.63 ... 47.79
Indexes:
    geometry  GeometryIndex (crs=EPSG:4326)

array([<POINT (19.819 41.328)>, <POINT (19.441 41.323)>,
       <POINT (20.082 41.112)>, ..., <POINT (33.467 50.75)>,
       <POINT (27.067 50.183)>, <POINT (30.217 50.567)>], dtype=object)

array(['Tirana', 'Durres', 'Elbasan', ..., 'Romny', 'Shepetivka', 'Bucha'],
      dtype=object)

array(['Albania', 'Albania', 'Albania', ..., 'Ukraine', 'Ukraine',
       'Ukraine'], dtype=object)

array(['DJF', 'JJA', 'MAM', 'SON'], dtype=object)

array([[280.62354, 283.11908, 279.23575, ..., 270.3793 , 271.60138,
        271.44788],
       [297.49365, 297.15274, 297.14392, ..., 293.97604, 292.93857,
        294.0415 ],
       [287.5308 , 288.08292, 286.8431 , ..., 282.1084 , 282.28137,
        282.68982],
       [289.8003 , 291.75275, 288.78796, ..., 282.25952, 282.63968,
        282.73175]], dtype=float32)

array([[ 55.575176,  55.630436,  54.574   , ...,  74.21834 ,  76.10485 ,
         75.07553 ],
       [-34.584896, -34.58256 , -34.817337, ..., -26.087788, -26.422586,
        -26.269844],
       [ 15.728514,  15.73295 ,  15.545878, ...,  17.109795,  17.090252,
         17.18853 ],
       [ 37.93523 ,  37.957623,  37.490055, ...,  47.977142,  47.582752,
         47.79004 ]], dtype=float32)

<xvec.index.GeometryIndex object at 0x7f858594aea0>

PandasIndex(Index(['DJF', 'JJA', 'MAM', 'SON'], dtype='object', name='season'))

1

<xarray.Dataset> Size: 175kB
Dimensions:              (time: 7304, geometry: 2)
Coordinates:
  * time                 (time) datetime64[ns] 58kB 2017-01-01 ... 2021-12-31...
  * geometry             (geometry) object 16B POINT (-16.2546158 28.4682386)...
    city                 (geometry) object 16B 'Santa Cruz de Tenerife' 'La L...
    country              (geometry) object 16B 'Spain' 'Spain'
Data variables:
    2m_temperature       (time, geometry) float32 58kB 280.3 280.3 ... nan nan
    u_component_of_wind  (time, geometry) float32 58kB 20.11 20.11 ... -31.75
Indexes:
    geometry  GeometryIndex (crs=EPSG:4326)

array(['2017-01-01T00:00:00.000000000', '2017-01-01T06:00:00.000000000',
       '2017-01-01T12:00:00.000000000', ..., '2021-12-31T06:00:00.000000000',
       '2021-12-31T12:00:00.000000000', '2021-12-31T18:00:00.000000000'],
      dtype='datetime64[ns]')

array([<POINT (-16.255 28.468)>, <POINT (-16.317 28.483)>], dtype=object)

array(['Santa Cruz de Tenerife', 'La Laguna'], dtype=object)

array(['Spain', 'Spain'], dtype=object)

array([[280.3136 , 280.3136 ],
       [276.42108, 276.42108],
       [287.49585, 287.49585],
       ...,
       [281.391  , 281.391  ],
       [294.3005 , 294.3005 ],
       [      nan,       nan]], dtype=float32)

array([[ 20.114271,  20.114271],
       [ 18.899544,  18.899544],
       [ 10.494137,  10.494137],
       ...,
       [-44.22158 , -44.22158 ],
       [-34.52471 , -34.52471 ],
       [-31.752907, -31.752907]], dtype=float32)

PandasIndex(DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 06:00:00',
               '2017-01-01 12:00:00', '2017-01-01 18:00:00',
               '2017-01-02 00:00:00', '2017-01-02 06:00:00',
               '2017-01-02 12:00:00', '2017-01-02 18:00:00',
               '2017-01-03 00:00:00', '2017-01-03 06:00:00',
               ...
               '2021-12-29 12:00:00', '2021-12-29 18:00:00',
               '2021-12-30 00:00:00', '2021-12-30 06:00:00',
               '2021-12-30 12:00:00', '2021-12-30 18:00:00',
               '2021-12-31 00:00:00', '2021-12-31 06:00:00',
               '2021-12-31 12:00:00', '2021-12-31 18:00:00'],
              dtype='datetime64[ns]', name='time', length=7304, freq=None))

<xvec.index.GeometryIndex object at 0x7f859b6fd880>

1

1

1

	city	country	region	latitude	longitude	continent	x	y	z	lon_360	geometry
84	Tirana	Albania	Southern Europe	41.327500	19.818890	Europe	4500.907258	1622.102741	4207.167403	19.818890	POINT (19.81889 41.32750)
85	Durres	Albania	Southern Europe	41.323056	19.441389	Europe	4511.804644	1592.521568	4206.796276	19.441389	POINT (19.44139 41.32306)
86	Elbasan	Albania	Southern Europe	41.112500	20.082222	Europe	4508.200233	1648.181036	4189.184997	20.082222	POINT (20.08222 41.11250)
87	Vlore	Albania	Southern Europe	40.466667	19.489721	Europe	4569.228960	1617.126398	4134.814376	19.489721	POINT (19.48972 40.46667)
88	Shkoder	Albania	Southern Europe	42.068283	19.512577	Europe	4457.868549	1579.715398	4268.670549	19.512577	POINT (19.51258 42.06828)

Introducing vector data cubes !¶

Background¶

Raster data cubes, vector data frames¶

Pushing the limits of data frames¶

What are vector data cubes?¶

What can we do with vector data cubes?¶

Understanding climate variability in population centers across Europe¶

Assemble vector data cube¶

Read raster data¶

Read vector data¶

Create a vector data cube with 'cities' data¶

Sample raster data cube with geometries from vector data cube¶

Using a vector data cube¶

1. Computation and grouping along a time dimension¶

2. Spatial indexing¶

3. Convert Xarray objects to geopandas GeoDataFrame¶

Wrap up¶

Summary¶

Up next¶

Acknowledgments¶

References¶