{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# DaskIndex (out-of-core array index)\n", "\n", "Implementation is taken from this [NumpyIndex prototype](https://notebooksharing.space/view/48ad86aed90f7588c9a475be6747528d87f975cb3317e5bd94265ffaa5a2478f#displayOptions=), with only minor adaptation.\n", "\n", "We could merge both `NumpyIndex` and `DaskIndex` into a generic (and basic) `ArrayIndex`, which would work with any duck array (lazy or not)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from typing import Any, Hashable, Iterable, Mapping, Self, Sequence\n", "\n", "import dask.array as da\n", "import numpy as np\n", "import xarray as xr" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Implementation" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from xarray import Variable\n", "from xarray.indexes import Index\n", "from xarray.core.indexing import IndexSelResult\n", "from xarray.core.utils import is_scalar" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "class DaskIndex(Index):\n", " \"\"\"Out-of-core (dask) array index.\n", " \n", " Lightweight, inefficient index as a basic wrapper around\n", " its coordinate array data.\n", " \n", " This index is suited for cases where index build overhead\n", " is an issue and where only basic indexing operations are\n", " needed (i.e., strict alignment, data selection in rare occasions).\n", " \n", " \"\"\"\n", " array: da.Array\n", " dim: Hashable\n", " name: Hashable\n", " \n", " def __init__(self, array: da.Array, dim: Hashable, name: Hashable):\n", " if array.ndim > 1:\n", " raise ValueError(\"ArrayIndex only accepts 1-dimensional arrays\")\n", "\n", " self.array = array\n", " self.dim = dim\n", " self.name = name\n", " \n", " @classmethod\n", " def from_variables(\n", " cls: type[Self], variables: Mapping[Any, Variable], options\n", " ) -> Self:\n", " if len(variables) != 1:\n", " raise ValueError(\n", " f\"DaskIndex only accepts one variable, found {len(variables)} variables\"\n", " )\n", "\n", " name, var = next(iter(variables.items()))\n", " \n", " return cls(var.data, var.dims[0], name)\n", " \n", " @classmethod\n", " def concat(\n", " cls: type[Self],\n", " indexes: Sequence[Self],\n", " dim: Hashable,\n", " positions: Iterable[Iterable[int]] = None,\n", " ) -> Self:\n", " raise NotImplementedError\n", " if not indexes:\n", " return cls(da.Array([]), dim, dim)\n", " \n", " if not all(idx.dim == dim for idx in indexes):\n", " dims = \",\".join({f\"{idx.dim!r}\" for idx in indexes})\n", " raise ValueError(\n", " f\"Cannot concatenate along dimension {dim!r} indexes with \"\n", " f\"dimensions: {dims}\"\n", " )\n", " \n", " arrays = [idx.array for idx in indexes]\n", " new_array = da.concatenate(arrays)\n", " \n", " if positions is not None:\n", " indices = nputils.inverse_permutation(da.concatenate(positions))\n", " new_array = new_array.take(indices)\n", "\n", " return cls(new_array, dim, indexes[0].name)\n", " \n", " def create_variables(\n", " self, variables: Mapping[Any, Variable] | None = None\n", " ) -> dict[Hashable, Variable]:\n", " \n", " #\n", " # TODO: implementating this method is needed so that\n", " # the corresponding coordinate is indexed properly with Dataset.isel.\n", " # Ideally this shouldn't be needed, though (we only extract and\n", " # shallow copy the coordinate variable here, but really not even\n", " # a copy is needed).\n", " #\n", " \n", " if variables is not None and self.name in variables:\n", " var = variables[self.name]\n", " attrs = var.attrs\n", " encoding = var.encoding\n", " else:\n", " attrs = None\n", " encoding = None\n", "\n", " var = Variable(self.dim, self.array, attrs=attrs, encoding=encoding)\n", " return {self.name: var}\n", " \n", " def isel(\n", " self: Self, indexers: Mapping[Any, int | slice | np.ndarray | Variable]\n", " ) -> Self | None:\n", " indxr = indexers[self.dim]\n", "\n", " if isinstance(indxr, Variable):\n", " if indxr.dims != (self.dim,):\n", " # can't preserve a index if result has new dimensions\n", " return None\n", " else:\n", " indxr = indxr.data\n", " if not isinstance(indxr, slice) and is_scalar(indxr):\n", " # scalar indexer: drop index\n", " return None\n", " \n", " return type(self)(self.array[indxr], self.dim, self.name)\n", "\n", " def sel(self, labels: dict[Any, Any], **kwargs) -> IndexSelResult:\n", " assert len(labels) == 1\n", " _, label = next(iter(labels.items()))\n", " \n", " if isinstance(label, slice):\n", " # TODO: what exactly do we want to do here?\n", " start = da.argmax(self.array == label.start)\n", " stop = da.argmax(self.array == label.stop)\n", " indexer = slice(start, stop)\n", " elif is_scalar(label):\n", " indexer = da.argmax(self.array == label)\n", " else:\n", " # TODO: other label types we want to support (n-d array-like, etc.)\n", " raise ValueError(f\"label {label} not (yet) supported by DaskIndex\")\n", " \n", " return IndexSelResult({self.dim: indexer})\n", " \n", " def equals(self: Self, other: Self) -> bool:\n", " if self.array.size != other.array.size:\n", " return False\n", " else:\n", " return da.all(self.array == other.array)\n", " \n", " def roll(self: Self, shifts: Mapping[Any, int]) -> Self:\n", " shift = shifts[self.dim]\n", " \n", " return type(self)(da.roll(self.array, shift), self.dim, self.name)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Construction\n", "\n", "Create coordinates \"x\" and \"y\", with no index for \"x\" and a default (pandas) index for \"y\".\n", "\n", "Only works with https://github.com/pydata/xarray/pull/8094! " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "coords = xr.Coordinates({\"x\": (\"x\", da.arange(100_000_000))}, indexes={})\n", "coords[\"y\"] = np.arange(100)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a dataset using the coordinates above, and set a `DaskIndex` for the \"x\" coordinate. Coordinate data remains lazy." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:  (y: 100, x: 100000000)\n",
       "Coordinates:\n",
       "  * x        (x) int64 dask.array<chunksize=(16777216,), meta=np.ndarray>\n",
       "  * y        (y) int64 0 1 2 3 4 5 6 7 8 9 10 ... 90 91 92 93 94 95 96 97 98 99\n",
       "Data variables:\n",
       "    foo      (y, x) float64 dask.array<chunksize=(100, 167772), meta=np.ndarray>\n",
       "Indexes:\n",
       "    x        DaskIndex
" ], "text/plain": [ "\n", "Dimensions: (y: 100, x: 100000000)\n", "Coordinates:\n", " * x (x) int64 dask.array\n", " * y (y) int64 0 1 2 3 4 5 6 7 8 9 10 ... 90 91 92 93 94 95 96 97 98 99\n", "Data variables:\n", " foo (y, x) float64 dask.array\n", "Indexes:\n", " x DaskIndex" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds = xr.Dataset(\n", " data_vars={\"foo\": ((\"y\", \"x\"), da.random.random((100, 100_000_000)))},\n", " coords=coords,\n", ")\n", "\n", "ds = ds.set_xindex(\"x\", DaskIndex)\n", "\n", "ds" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Label-based selection\n", "\n", "Select data by coordinate labels." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:  (y: 2, x: 10000)\n",
       "Coordinates:\n",
       "  * x        (x) int64 dask.array<chunksize=(10000,), meta=np.ndarray>\n",
       "  * y        (y) int64 10 12\n",
       "Data variables:\n",
       "    foo      (y, x) float64 dask.array<chunksize=(2, 10000), meta=np.ndarray>\n",
       "Indexes:\n",
       "    x        DaskIndex
" ], "text/plain": [ "\n", "Dimensions: (y: 2, x: 10000)\n", "Coordinates:\n", " * x (x) int64 dask.array\n", " * y (y) int64 10 12\n", "Data variables:\n", " foo (y, x) float64 dask.array\n", "Indexes:\n", " x DaskIndex" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds_subset = ds.sel(y=[10, 12], x=slice(10_000, 20_000))\n", "ds_subset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The operation above is fully lazy: " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "655 ms ± 13.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%timeit ds.sel(y=[10, 12], x=slice(10_000, 20_000))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The coordinates and data variables of the selection remain lazy" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray 'x' (x: 10000)>\n",
       "dask.array<getitem, shape=(10000,), dtype=int64, chunksize=(10000,), chunktype=numpy.ndarray>\n",
       "Coordinates:\n",
       "  * x        (x) int64 dask.array<chunksize=(10000,), meta=np.ndarray>\n",
       "Indexes:\n",
       "    x        DaskIndex
" ], "text/plain": [ "\n", "dask.array\n", "Coordinates:\n", " * x (x) int64 dask.array\n", "Indexes:\n", " x DaskIndex" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds_subset.x" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray 'foo' (y: 2, x: 10000)>\n",
       "dask.array<getitem, shape=(2, 10000), dtype=float64, chunksize=(2, 10000), chunktype=numpy.ndarray>\n",
       "Coordinates:\n",
       "  * x        (x) int64 dask.array<chunksize=(10000,), meta=np.ndarray>\n",
       "  * y        (y) int64 10 12\n",
       "Indexes:\n",
       "    x        DaskIndex
" ], "text/plain": [ "\n", "dask.array\n", "Coordinates:\n", " * x (x) int64 dask.array\n", " * y (y) int64 10 12\n", "Indexes:\n", " x DaskIndex" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds_subset.foo" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The selection is small so computing the Dataset is fast" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:  (y: 2, x: 10000)\n",
       "Coordinates:\n",
       "  * x        (x) int64 10000 10001 10002 10003 10004 ... 19996 19997 19998 19999\n",
       "  * y        (y) int64 10 12\n",
       "Data variables:\n",
       "    foo      (y, x) float64 0.8685 0.06375 0.2268 ... 0.7615 0.8553 0.8133\n",
       "Indexes:\n",
       "    x        DaskIndex
" ], "text/plain": [ "\n", "Dimensions: (y: 2, x: 10000)\n", "Coordinates:\n", " * x (x) int64 10000 10001 10002 10003 10004 ... 19996 19997 19998 19999\n", " * y (y) int64 10 12\n", "Data variables:\n", " foo (y, x) float64 0.8685 0.06375 0.2268 ... 0.7615 0.8553 0.8133\n", "Indexes:\n", " x DaskIndex" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds_subset.compute()" ] }, { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "## Roll\n", "\n", "This is lazy too" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:  (y: 100, x: 100000000)\n",
       "Coordinates:\n",
       "  * x        (x) int64 dask.array<chunksize=(16777216,), meta=np.ndarray>\n",
       "  * y        (y) int64 0 1 2 3 4 5 6 7 8 9 10 ... 90 91 92 93 94 95 96 97 98 99\n",
       "Data variables:\n",
       "    foo      (y, x) float64 dask.array<chunksize=(100, 167772), meta=np.ndarray>\n",
       "Indexes:\n",
       "    x        DaskIndex
" ], "text/plain": [ "\n", "Dimensions: (y: 100, x: 100000000)\n", "Coordinates:\n", " * x (x) int64 dask.array\n", " * y (y) int64 0 1 2 3 4 5 6 7 8 9 10 ... 90 91 92 93 94 95 96 97 98 99\n", "Data variables:\n", " foo (y, x) float64 dask.array\n", "Indexes:\n", " x DaskIndex" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.roll(x=5)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "

HighLevelGraph

\n", "

\n", " HighLevelGraph with 5 layers and 3582 keys from all layers.\n", "

\n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", "
\n", " \n", "

Layer1: random_sample

\n", "
\n", "

\n", " random_sample-acc21eb09941670cf01836dec0ac388a\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
layer_typeMaterializedLayer
is_materializedTrue
number of outputs597
shape(100, 100000000)
dtypefloat64
chunksize(100, 167772)
typedask.array.core.Array
chunk_typenumpy.ndarray
\n", "
\n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " 100000000\n", " 100\n", "\n", "
\n", "\n", "
\n", "
\n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", "
\n", " \n", "

Layer2: getitem

\n", "
\n", "

\n", " getitem-4ac0bb0d0de92654303bf4a075878043\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
layer_typeMaterializedLayer
is_materializedTrue
number of outputs597
shape(100, 99999995)
dtypefloat64
chunksize(100, 167772)
typedask.array.core.Array
chunk_typenumpy.ndarray
depends on random_sample-acc21eb09941670cf01836dec0ac388a
\n", "
\n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " 99999995\n", " 100\n", "\n", "
\n", "\n", "
\n", "
\n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", "
\n", " \n", "

Layer3: getitem

\n", "
\n", "

\n", " getitem-98d9e40a49433bb8174db9d6f8751789\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
layer_typeMaterializedLayer
is_materializedTrue
number of outputs1
shape(100, 5)
dtypefloat64
chunksize(100, 5)
typedask.array.core.Array
chunk_typenumpy.ndarray
depends on random_sample-acc21eb09941670cf01836dec0ac388a
\n", "
\n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " 5\n", " 100\n", "\n", "
\n", "\n", "
\n", "
\n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", "
\n", " \n", "

Layer4: concatenate

\n", "
\n", "

\n", " concatenate-c0d3fe82da806cf22304a8920339aa2c\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
layer_typeMaterializedLayer
is_materializedTrue
number of outputs598
shape(100, 100000000)
dtypefloat64
chunksize(100, 167772)
typedask.array.core.Array
chunk_typenumpy.ndarray
depends on getitem-98d9e40a49433bb8174db9d6f8751789
getitem-4ac0bb0d0de92654303bf4a075878043
\n", "
\n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " 100000000\n", " 100\n", "\n", "
\n", "\n", "
\n", "
\n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", "\n", "
\n", " \n", "

Layer5: rechunk-merge

\n", "
\n", "

\n", " rechunk-merge-8bdcaead68d616d69dbf2aefcac6a961\n", "

\n", "\n", " \n", " \n", " \n", " \n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
layer_typeMaterializedLayer
is_materializedTrue
number of outputs1789
shape(100, 100000000)
dtypefloat64
chunksize(100, 167772)
typedask.array.core.Array
chunk_typenumpy.ndarray
depends on concatenate-c0d3fe82da806cf22304a8920339aa2c
\n", "
\n", " \n", "\n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", "\n", " \n", " 100000000\n", " 100\n", "\n", "
\n", "\n", "
\n", "
\n", " \n", "
\n", "
\n", "
" ], "text/plain": [ "HighLevelGraph with 5 layers.\n", "\n", " 0. random_sample-acc21eb09941670cf01836dec0ac388a\n", " 1. getitem-4ac0bb0d0de92654303bf4a075878043\n", " 2. getitem-98d9e40a49433bb8174db9d6f8751789\n", " 3. concatenate-c0d3fe82da806cf22304a8920339aa2c\n", " 4. rechunk-merge-8bdcaead68d616d69dbf2aefcac6a961" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.roll(x=5).foo.data.dask" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Alignment\n", "\n", "Alignment, re-indexing, concatenate, etc. are not (well) supported. It may either fail (good) or try to compute and/or load all data (bad)." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "ename": "NotImplementedError", "evalue": "<__main__.DaskIndex object at 0x109492a50> doesn't support alignment with inner/outer join method", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[13], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m ds2 \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mDataset(coords\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mx\u001b[39m\u001b[38;5;124m\"\u001b[39m: [\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]})\n\u001b[1;32m 2\u001b[0m ds2 \u001b[38;5;241m=\u001b[39m ds2\u001b[38;5;241m.\u001b[39mdrop_indexes(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mx\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mset_xindex(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mx\u001b[39m\u001b[38;5;124m\"\u001b[39m, DaskIndex)\n\u001b[0;32m----> 4\u001b[0m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malign\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mds2\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/Git/github/benbovy/xarray/xarray/core/alignment.py:783\u001b[0m, in \u001b[0;36malign\u001b[0;34m(join, copy, indexes, exclude, fill_value, *objects)\u001b[0m\n\u001b[1;32m 587\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 588\u001b[0m \u001b[38;5;124;03mGiven any number of Dataset and/or DataArray objects, returns new\u001b[39;00m\n\u001b[1;32m 589\u001b[0m \u001b[38;5;124;03mobjects with aligned indexes and dimension sizes.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 773\u001b[0m \n\u001b[1;32m 774\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 775\u001b[0m aligner \u001b[38;5;241m=\u001b[39m Aligner(\n\u001b[1;32m 776\u001b[0m objects,\n\u001b[1;32m 777\u001b[0m join\u001b[38;5;241m=\u001b[39mjoin,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 781\u001b[0m fill_value\u001b[38;5;241m=\u001b[39mfill_value,\n\u001b[1;32m 782\u001b[0m )\n\u001b[0;32m--> 783\u001b[0m \u001b[43maligner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malign\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 784\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m aligner\u001b[38;5;241m.\u001b[39mresults\n", "File \u001b[0;32m~/Git/github/benbovy/xarray/xarray/core/alignment.py:568\u001b[0m, in \u001b[0;36mAligner.align\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 566\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfind_matching_unindexed_dims()\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39massert_no_index_conflict()\n\u001b[0;32m--> 568\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malign_indexes\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 569\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39massert_unindexed_dim_sizes_equal()\n\u001b[1;32m 571\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mjoin \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moverride\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", "File \u001b[0;32m~/Git/github/benbovy/xarray/xarray/core/alignment.py:422\u001b[0m, in \u001b[0;36mAligner.align_indexes\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 416\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot align objects with join=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mexact\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m where \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 417\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mindex/labels/sizes are not equal along \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 418\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthese coordinates (dimensions): \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 419\u001b[0m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdims\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name, dims \u001b[38;5;129;01min\u001b[39;00m key[\u001b[38;5;241m0\u001b[39m])\n\u001b[1;32m 420\u001b[0m )\n\u001b[1;32m 421\u001b[0m joiner \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_index_joiner(index_cls)\n\u001b[0;32m--> 422\u001b[0m joined_index \u001b[38;5;241m=\u001b[39m \u001b[43mjoiner\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmatching_indexes\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 423\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mjoin \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mleft\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 424\u001b[0m joined_index_vars \u001b[38;5;241m=\u001b[39m matching_index_vars[\u001b[38;5;241m0\u001b[39m]\n", "File \u001b[0;32m~/Git/github/benbovy/xarray/xarray/core/indexes.py:285\u001b[0m, in \u001b[0;36mIndex.join\u001b[0;34m(self, other, how)\u001b[0m\n\u001b[1;32m 267\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mjoin\u001b[39m(\u001b[38;5;28mself\u001b[39m: T_Index, other: T_Index, how: JoinOptions \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minner\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T_Index:\n\u001b[1;32m 268\u001b[0m \u001b[38;5;124;03m\"\"\"Return a new index from the combination of this index with another\u001b[39;00m\n\u001b[1;32m 269\u001b[0m \u001b[38;5;124;03m index of the same type.\u001b[39;00m\n\u001b[1;32m 270\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[38;5;124;03m A new Index object.\u001b[39;00m\n\u001b[1;32m 284\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 285\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 286\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt support alignment with inner/outer join method\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 287\u001b[0m )\n", "\u001b[0;31mNotImplementedError\u001b[0m: <__main__.DaskIndex object at 0x109492a50> doesn't support alignment with inner/outer join method" ] } ], "source": [ "ds2 = xr.Dataset(coords={\"x\": [-2, -1]})\n", "ds2 = ds2.drop_indexes(\"x\").set_xindex(\"x\", DaskIndex)\n", "\n", "xr.align(ds, ds2)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "ename": "NotImplementedError", "evalue": "<__main__.DaskIndex object at 0x109492a50> doesn't support alignment with inner/outer join method", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[14], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malign\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mds2\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minner\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/Git/github/benbovy/xarray/xarray/core/alignment.py:783\u001b[0m, in \u001b[0;36malign\u001b[0;34m(join, copy, indexes, exclude, fill_value, *objects)\u001b[0m\n\u001b[1;32m 587\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 588\u001b[0m \u001b[38;5;124;03mGiven any number of Dataset and/or DataArray objects, returns new\u001b[39;00m\n\u001b[1;32m 589\u001b[0m \u001b[38;5;124;03mobjects with aligned indexes and dimension sizes.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 773\u001b[0m \n\u001b[1;32m 774\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 775\u001b[0m aligner \u001b[38;5;241m=\u001b[39m Aligner(\n\u001b[1;32m 776\u001b[0m objects,\n\u001b[1;32m 777\u001b[0m join\u001b[38;5;241m=\u001b[39mjoin,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 781\u001b[0m fill_value\u001b[38;5;241m=\u001b[39mfill_value,\n\u001b[1;32m 782\u001b[0m )\n\u001b[0;32m--> 783\u001b[0m \u001b[43maligner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malign\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 784\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m aligner\u001b[38;5;241m.\u001b[39mresults\n", "File \u001b[0;32m~/Git/github/benbovy/xarray/xarray/core/alignment.py:568\u001b[0m, in \u001b[0;36mAligner.align\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 566\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfind_matching_unindexed_dims()\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39massert_no_index_conflict()\n\u001b[0;32m--> 568\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malign_indexes\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 569\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39massert_unindexed_dim_sizes_equal()\n\u001b[1;32m 571\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mjoin \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moverride\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", "File \u001b[0;32m~/Git/github/benbovy/xarray/xarray/core/alignment.py:422\u001b[0m, in \u001b[0;36mAligner.align_indexes\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 416\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot align objects with join=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mexact\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m where \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 417\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mindex/labels/sizes are not equal along \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 418\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthese coordinates (dimensions): \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 419\u001b[0m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdims\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name, dims \u001b[38;5;129;01min\u001b[39;00m key[\u001b[38;5;241m0\u001b[39m])\n\u001b[1;32m 420\u001b[0m )\n\u001b[1;32m 421\u001b[0m joiner \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_index_joiner(index_cls)\n\u001b[0;32m--> 422\u001b[0m joined_index \u001b[38;5;241m=\u001b[39m \u001b[43mjoiner\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmatching_indexes\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 423\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mjoin \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mleft\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 424\u001b[0m joined_index_vars \u001b[38;5;241m=\u001b[39m matching_index_vars[\u001b[38;5;241m0\u001b[39m]\n", "File \u001b[0;32m~/Git/github/benbovy/xarray/xarray/core/indexes.py:285\u001b[0m, in \u001b[0;36mIndex.join\u001b[0;34m(self, other, how)\u001b[0m\n\u001b[1;32m 267\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mjoin\u001b[39m(\u001b[38;5;28mself\u001b[39m: T_Index, other: T_Index, how: JoinOptions \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minner\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T_Index:\n\u001b[1;32m 268\u001b[0m \u001b[38;5;124;03m\"\"\"Return a new index from the combination of this index with another\u001b[39;00m\n\u001b[1;32m 269\u001b[0m \u001b[38;5;124;03m index of the same type.\u001b[39;00m\n\u001b[1;32m 270\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[38;5;124;03m A new Index object.\u001b[39;00m\n\u001b[1;32m 284\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 285\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 286\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt support alignment with inner/outer join method\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 287\u001b[0m )\n", "\u001b[0;31mNotImplementedError\u001b[0m: <__main__.DaskIndex object at 0x109492a50> doesn't support alignment with inner/outer join method" ] } ], "source": [ "xr.align(ds, ds2, join=\"inner\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:xarray_dev]", "language": "python", "name": "conda-env-xarray_dev-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 4 }