1
2

<xarray.Dataset>
Dimensions:  (y: 100, x: 100000000)
Coordinates:
  * x        (x) int64 dask.array<chunksize=(16777216,), meta=np.ndarray>
  * y        (y) int64 0 1 2 3 4 5 6 7 8 9 10 ... 90 91 92 93 94 95 96 97 98 99
Data variables:
    foo      (y, x) float64 dask.array<chunksize=(100, 167772), meta=np.ndarray>
Indexes:
    x        DaskIndex

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
       36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
       54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
       72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
       90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

PandasIndex(Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
            51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
            68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
            85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],
           dtype='int64', name='y'))

<__main__.DaskIndex object at 0x109492a50>

1
2

<xarray.Dataset>
Dimensions:  (y: 2, x: 10000)
Coordinates:
  * x        (x) int64 dask.array<chunksize=(10000,), meta=np.ndarray>
  * y        (y) int64 10 12
Data variables:
    foo      (y, x) float64 dask.array<chunksize=(2, 10000), meta=np.ndarray>
Indexes:
    x        DaskIndex

array([10, 12])

PandasIndex(Int64Index([10, 12], dtype='int64', name='y'))

<__main__.DaskIndex object at 0x114f517d0>

1

655 ms ± 13.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

1

<xarray.DataArray 'x' (x: 10000)>
dask.array<getitem, shape=(10000,), dtype=int64, chunksize=(10000,), chunktype=numpy.ndarray>
Coordinates:
  * x        (x) int64 dask.array<chunksize=(10000,), meta=np.ndarray>
Indexes:
    x        DaskIndex

<__main__.DaskIndex object at 0x114f517d0>

1

<xarray.DataArray 'foo' (y: 2, x: 10000)>
dask.array<getitem, shape=(2, 10000), dtype=float64, chunksize=(2, 10000), chunktype=numpy.ndarray>
Coordinates:
  * x        (x) int64 dask.array<chunksize=(10000,), meta=np.ndarray>
  * y        (y) int64 10 12
Indexes:
    x        DaskIndex

array([10, 12])

PandasIndex(Int64Index([10, 12], dtype='int64', name='y'))

<__main__.DaskIndex object at 0x114f517d0>

1

<xarray.Dataset>
Dimensions:  (y: 2, x: 10000)
Coordinates:
  * x        (x) int64 10000 10001 10002 10003 10004 ... 19996 19997 19998 19999
  * y        (y) int64 10 12
Data variables:
    foo      (y, x) float64 0.8685 0.06375 0.2268 ... 0.7615 0.8553 0.8133
Indexes:
    x        DaskIndex

array([10000, 10001, 10002, ..., 19997, 19998, 19999])

array([10, 12])

array([[0.86852983, 0.0637496 , 0.22682869, ..., 0.02478069, 0.59563265,
        0.88391897],
       [0.74251054, 0.78986414, 0.91074379, ..., 0.76147836, 0.85527309,
        0.81327248]])

PandasIndex(Int64Index([10, 12], dtype='int64', name='y'))

<__main__.DaskIndex object at 0x115002890>

1

<xarray.Dataset>
Dimensions:  (y: 100, x: 100000000)
Coordinates:
  * x        (x) int64 dask.array<chunksize=(16777216,), meta=np.ndarray>
  * y        (y) int64 0 1 2 3 4 5 6 7 8 9 10 ... 90 91 92 93 94 95 96 97 98 99
Data variables:
    foo      (y, x) float64 dask.array<chunksize=(100, 167772), meta=np.ndarray>
Indexes:
    x        DaskIndex

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
       36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
       54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
       72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
       90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

PandasIndex(Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
            51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
            68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
            85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],
           dtype='int64', name='y'))

<__main__.DaskIndex object at 0x109492a50>

1

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
Cell In[13], line 4
      1 ds2 = xr.Dataset(coords={"x": [-2, -1]})
      2 ds2 = ds2.drop_indexes("x").set_xindex("x", DaskIndex)
----> 4 xr.align(ds, ds2)

File ~/Git/github/benbovy/xarray/xarray/core/alignment.py:783, in align(join, copy, indexes, exclude, fill_value, *objects)
    587 """
    588 Given any number of Dataset and/or DataArray objects, returns new
    589 objects with aligned indexes and dimension sizes.
   (...)
    773 
    774 """
    775 aligner = Aligner(
    776     objects,
    777     join=join,
   (...)
    781     fill_value=fill_value,
    782 )
--> 783 aligner.align()
    784 return aligner.results

File ~/Git/github/benbovy/xarray/xarray/core/alignment.py:568, in Aligner.align(self)
    566 self.find_matching_unindexed_dims()
    567 self.assert_no_index_conflict()
--> 568 self.align_indexes()
    569 self.assert_unindexed_dim_sizes_equal()
    571 if self.join == "override":

File ~/Git/github/benbovy/xarray/xarray/core/alignment.py:422, in Aligner.align_indexes(self)
    415     raise ValueError(
    416         "cannot align objects with join='exact' where "
    417         "index/labels/sizes are not equal along "
    418         "these coordinates (dimensions): "
    419         + ", ".join(f"{name!r} {dims!r}" for name, dims in key[0])
    420     )
    421 joiner = self._get_index_joiner(index_cls)
--> 422 joined_index = joiner(matching_indexes)
    423 if self.join == "left":
    424     joined_index_vars = matching_index_vars[0]

File ~/Git/github/benbovy/xarray/xarray/core/indexes.py:285, in Index.join(self, other, how)
    267 def join(self: T_Index, other: T_Index, how: JoinOptions = "inner") -> T_Index:
    268     """Return a new index from the combination of this index with another
    269     index of the same type.
    270 
   (...)
    283         A new Index object.
    284     """
--> 285     raise NotImplementedError(
    286         f"{self!r} doesn't support alignment with inner/outer join method"
    287     )

NotImplementedError: <__main__.DaskIndex object at 0x109492a50> doesn't support alignment with inner/outer join method

1

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
Cell In[14], line 1
----> 1 xr.align(ds, ds2, join="inner")

File ~/Git/github/benbovy/xarray/xarray/core/alignment.py:783, in align(join, copy, indexes, exclude, fill_value, *objects)
    587 """
    588 Given any number of Dataset and/or DataArray objects, returns new
    589 objects with aligned indexes and dimension sizes.
   (...)
    773 
    774 """
    775 aligner = Aligner(
    776     objects,
    777     join=join,
   (...)
    781     fill_value=fill_value,
    782 )
--> 783 aligner.align()
    784 return aligner.results

File ~/Git/github/benbovy/xarray/xarray/core/alignment.py:568, in Aligner.align(self)
    566 self.find_matching_unindexed_dims()
    567 self.assert_no_index_conflict()
--> 568 self.align_indexes()
    569 self.assert_unindexed_dim_sizes_equal()
    571 if self.join == "override":

File ~/Git/github/benbovy/xarray/xarray/core/alignment.py:422, in Aligner.align_indexes(self)
    415     raise ValueError(
    416         "cannot align objects with join='exact' where "
    417         "index/labels/sizes are not equal along "
    418         "these coordinates (dimensions): "
    419         + ", ".join(f"{name!r} {dims!r}" for name, dims in key[0])
    420     )
    421 joiner = self._get_index_joiner(index_cls)
--> 422 joined_index = joiner(matching_indexes)
    423 if self.join == "left":
    424     joined_index_vars = matching_index_vars[0]

File ~/Git/github/benbovy/xarray/xarray/core/indexes.py:285, in Index.join(self, other, how)
    267 def join(self: T_Index, other: T_Index, how: JoinOptions = "inner") -> T_Index:
    268     """Return a new index from the combination of this index with another
    269     index of the same type.
    270 
   (...)
    283         A new Index object.
    284     """
--> 285     raise NotImplementedError(
    286         f"{self!r} doesn't support alignment with inner/outer join method"
    287     )

NotImplementedError: <__main__.DaskIndex object at 0x109492a50> doesn't support alignment with inner/outer join method

1

DaskIndex (out-of-core array index)¶

Implementation¶

Example¶

Construction¶

Label-based selection¶

Roll¶

HighLevelGraph

Layer1: random_sample

Layer2: getitem

Layer3: getitem

Layer4: concatenate

Layer5: rechunk-merge

Alignment¶