{ "cells": [ { "cell_type": "markdown", "id": "6fad80b2-cb76-49a4-abbd-7ee148d8416d", "metadata": {}, "source": [ "# Xarray RangeIndex prototype\n", "\n", "Author: Benoît Bovy ([benbovy](https://github.com/benbovy)) - December 2023" ] }, { "cell_type": "code", "execution_count": 141, "id": "78210461-b73b-4499-8b5e-89e9c257f4ff", "metadata": {}, "outputs": [], "source": [ "from typing import Any, Hashable, Mapping, Self\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "from xarray.indexes import PandasIndex" ] }, { "cell_type": "code", "execution_count": 164, "id": "79f51030-1b4d-4e7d-922c-31ffc869cbc7", "metadata": {}, "outputs": [], "source": [ "class RangeIndex(PandasIndex):\n", " \"\"\"A range index.\n", "\n", " This Xarray index uses a `pandas.RangeIndex` internally.\n", " It can be set from either:\n", "\n", " - a coordinate encapsulating a `pandas.RangeIndex` instance\n", " - a scalar coordinate (with \"start\", \"stop\" and \"step\" attributes)\n", " - any arbitrary coordinate (validation is performed by default)\n", " \n", " \"\"\"\n", "\n", " def __init__(\n", " self,\n", " array: pd.RangeIndex,\n", " dim: Hashable,\n", " coord_dtype: Any = None,\n", " ):\n", " assert isinstance(array, pd.RangeIndex)\n", " super().__init__(array, dim, coord_dtype=coord_dtype)\n", "\n", " @classmethod\n", " def from_variables(\n", " cls,\n", " variables: Mapping[Any, xr.Variable],\n", " *,\n", " options: Mapping[str, Any],\n", " ) -> Self:\n", "\n", " if len(variables) != 1:\n", " raise ValueError(\n", " f\"RangeIndex only accepts one variable, found {len(variables)} variables\"\n", " )\n", "\n", " name, var = next(iter(variables.items()))\n", "\n", " # case of a scalar coordinate\n", " if var.ndim == 0:\n", " idx = pd.RangeIndex(\n", " start=var.attrs.get(\"start\"),\n", " stop=var.attrs.get(\"stop\"),\n", " step=var.attrs.get(\"step\"),\n", " )\n", " dim = options.get(\"dim\", name)\n", " return cls(idx, dim)\n", "\n", " if var.ndim != 1:\n", " raise ValueError(\n", " \"RangeIndex only accepts a 1-dimensional variable, \"\n", " f\"variable {name!r} has {var.ndim} dimensions\"\n", " )\n", " \n", " # fastpath (variable encapsulates a pd.RangeIndex)\n", " # TODO: calling var.to_index() may be expensive?\n", " if isinstance(var.to_index(), pd.RangeIndex):\n", " return super().from_variables(variables, options=options)\n", " \n", " # case of a 1-d arbitrary coordinate\n", " var_data = var.data\n", " start = var_data[0]\n", " step = var_data[1] - start\n", " stop = var_data[-1] + step\n", " if options.get(\"validate\", True):\n", " np.testing.assert_array_equal(var.data, np.arange(start, stop, step))\n", " idx = pd.RangeIndex(start=start, stop=stop, step=step)\n", " return cls(idx, var.dims[0])\n", "\n", " @property\n", " def start(self):\n", " return self.index.start\n", "\n", " @property\n", " def stop(self):\n", " return self.index.stop\n", "\n", " @property\n", " def step(self):\n", " return self.index.step\n", " \n", " def _repr_inline_(self, max_width=0):\n", " return f\"RangeIndex(start={self.start}, stop={self.stop}, step={self.step})\"\n" ] }, { "cell_type": "markdown", "id": "3332b4a6-dbfc-4380-abe9-66655c3a8593", "metadata": {}, "source": [ "## Case 1: set RangeIndex from a coordinate encapsulating a `pd.RangeIndex`" ] }, { "cell_type": "code", "execution_count": 165, "id": "93bba47e-79ea-43c9-8363-152763c89760", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
<xarray.Dataset>\n", "Dimensions: (x: 10000)\n", "Coordinates:\n", " x (x) int64 0 1 2 3 4 5 6 7 ... 9993 9994 9995 9996 9997 9998 9999\n", "Data variables:\n", " *empty*
<xarray.Dataset>\n", "Dimensions: (x: 10000)\n", "Coordinates:\n", " * x (x) int64 0 1 2 3 4 5 6 7 ... 9993 9994 9995 9996 9997 9998 9999\n", "Data variables:\n", " *empty*
<xarray.Dataset>\n", "Dimensions: (x: 9)\n", "Coordinates:\n", " * x (x) int64 2 3 4 5 6 7 8 9 10\n", "Data variables:\n", " *empty*
<xarray.Dataset>\n", "Dimensions: (x: 3)\n", "Coordinates:\n", " * x (x) int64 4 5 6\n", "Data variables:\n", " *empty*
<xarray.Dataset>\n", "Dimensions: (x: 3)\n", "Coordinates:\n", " * x (x) int64 4 5 6\n", "Data variables:\n", " *empty*
<xarray.Dataset>\n", "Dimensions: ()\n", "Coordinates:\n", " x int64 1\n", "Data variables:\n", " *empty*
<xarray.Dataset>\n", "Dimensions: ()\n", "Coordinates:\n", " * x (x) int64 0 1 2 3 4 5 6 7 ... 9993 9994 9995 9996 9997 9998 9999\n", "Data variables:\n", " *empty*
<xarray.Dataset>\n", "Dimensions: ()\n", "Coordinates:\n", " x int64 1\n", "Data variables:\n", " *empty*
<xarray.Dataset>\n", "Dimensions: (x: 4)\n", "Coordinates:\n", " x (x) int64 0 1 2 3\n", "Data variables:\n", " *empty*
<xarray.Dataset>\n", "Dimensions: (x: 4)\n", "Coordinates:\n", " * x (x) int64 0 1 2 3\n", "Data variables:\n", " *empty*