1 2 3 4 5 6 | from typing import Any, Hashable, Mapping, Self import numpy as np import pandas as pd import xarray as xr from xarray.indexes import PandasIndex |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | class RangeIndex(PandasIndex): """A range index. This Xarray index uses a `pandas.RangeIndex` internally. It can be set from either: - a coordinate encapsulating a `pandas.RangeIndex` instance - a scalar coordinate (with "start", "stop" and "step" attributes) - any arbitrary coordinate (validation is performed by default) """ def __init__( self, array: pd.RangeIndex, dim: Hashable, coord_dtype: Any = None, ): assert isinstance(array, pd.RangeIndex) super().__init__(array, dim, coord_dtype=coord_dtype) @classmethod def from_variables( cls, variables: Mapping[Any, xr.Variable], *, options: Mapping[str, Any], ) -> Self: if len(variables) != 1: raise ValueError( f"RangeIndex only accepts one variable, found {len(variables)} variables" ) name, var = next(iter(variables.items())) # case of a scalar coordinate if var.ndim == 0: idx = pd.RangeIndex( start=var.attrs.get("start"), stop=var.attrs.get("stop"), step=var.attrs.get("step"), ) dim = options.get("dim", name) return cls(idx, dim) if var.ndim != 1: raise ValueError( "RangeIndex only accepts a 1-dimensional variable, " f"variable {name!r} has {var.ndim} dimensions" ) # fastpath (variable encapsulates a pd.RangeIndex) # TODO: calling var.to_index() may be expensive? if isinstance(var.to_index(), pd.RangeIndex): return super().from_variables(variables, options=options) # case of a 1-d arbitrary coordinate var_data = var.data start = var_data[0] step = var_data[1] - start stop = var_data[-1] + step if options.get("validate", True): np.testing.assert_array_equal(var.data, np.arange(start, stop, step)) idx = pd.RangeIndex(start=start, stop=stop, step=step) return cls(idx, var.dims[0]) @property def start(self): return self.index.start @property def stop(self): return self.index.stop @property def step(self): return self.index.step def _repr_inline_(self, max_width=0): return f"RangeIndex(start={self.start}, stop={self.stop}, step={self.step})" |
Case 1: set RangeIndex from a coordinate encapsulating a pd.RangeIndex
¶
1 2 | ds = xr.Dataset(coords={"x": pd.RangeIndex(0, 10_000, 1)}).drop_indexes("x") ds |
<xarray.Dataset> Dimensions: (x: 10000) Coordinates: x (x) int64 0 1 2 3 4 5 6 7 ... 9993 9994 9995 9996 9997 9998 9999 Data variables: *empty*
1 2 | ds = ds.set_xindex("x", RangeIndex) ds |
<xarray.Dataset> Dimensions: (x: 10000) Coordinates: * x (x) int64 0 1 2 3 4 5 6 7 ... 9993 9994 9995 9996 9997 9998 9999 Data variables: *empty*
1 | ds.xindexes |
Indexes: x RangeIndex(start=0, stop=10000, step=1)
Selection works just like for any pandas.Index
. In the results the "x" coordinate also has a RangeIndex
.
1 | ds.sel(x=slice(2, 10)) |
<xarray.Dataset> Dimensions: (x: 9) Coordinates: * x (x) int64 2 3 4 5 6 7 8 9 10 Data variables: *empty*
Alignment works too
1 2 3 | a1, a2 = xr.align(ds.sel(x=slice(0, 6)), ds.sel(x=slice(4, 12))) a1 |
<xarray.Dataset> Dimensions: (x: 3) Coordinates: * x (x) int64 4 5 6 Data variables: *empty*
1 | a2
|
<xarray.Dataset> Dimensions: (x: 3) Coordinates: * x (x) int64 4 5 6 Data variables: *empty*
Case 2: set RangeIndex from a scalar coordinate¶
1 2 | ds2 = xr.Dataset(coords={"x": ((), 1, {"start": 0, "stop": 10_000, "step": 1})}) ds2 |
<xarray.Dataset> Dimensions: () Coordinates: x int64 1 Data variables: *empty*
1 2 | ds2 = ds2.set_xindex("x", RangeIndex) ds2 |
<xarray.Dataset> Dimensions: () Coordinates: * x (x) int64 0 1 2 3 4 5 6 7 ... 9993 9994 9995 9996 9997 9998 9999 Data variables: *empty*
1 | ds2.xindexes |
Indexes: x RangeIndex(start=0, stop=10000, step=1)
Example of converting back to a scalar coordinate for serialization:
1 2 3 | idx = ds2.xindexes["x"] ds2.assign_coords(x=((), 1, {"start": idx.start, "stop": idx.stop, "step": idx.step})) |
<xarray.Dataset> Dimensions: () Coordinates: x int64 1 Data variables: *empty*
Case 3: set RangeIndex from an arbitrary coordinate¶
1 2 | ds3 = xr.Dataset(coords={"x": [0, 1, 2, 3]}).drop_indexes("x") ds3 |
<xarray.Dataset> Dimensions: (x: 4) Coordinates: x (x) int64 0 1 2 3 Data variables: *empty*
1 2 | ds3 = ds3.set_xindex("x", RangeIndex) ds3 |
<xarray.Dataset> Dimensions: (x: 4) Coordinates: * x (x) int64 0 1 2 3 Data variables: *empty*
1 2 | ds4 = xr.Dataset(coords={"x": [0, 1, 2, 5]}).drop_indexes("x") ds4.set_xindex("x", RangeIndex) |
--------------------------------------------------------------------------- AssertionError Traceback (most recent call last) Cell In[174], line 2 1 ds4 = xr.Dataset(coords={"x": [0, 1, 2, 5]}).drop_indexes("x") ----> 2 ds4.set_xindex("x", RangeIndex) File ~/Git/github/benbovy/xarray/xarray/core/dataset.py:4940, in Dataset.set_xindex(self, coord_names, index_cls, **options) 4934 raise ValueError( 4935 f"those coordinates already have an index: {indexed_coords}" 4936 ) 4938 coord_vars = {name: self._variables[name] for name in coord_names} -> 4940 index = index_cls.from_variables(coord_vars, options=options) 4942 new_coord_vars = index.create_variables(coord_vars) 4944 # special case for setting a pandas multi-index from level coordinates 4945 # TODO: remove it once we depreciate pandas multi-index dimension (tuple 4946 # elements) coordinate Cell In[164], line 64, in RangeIndex.from_variables(cls, variables, options) 62 stop = var_data[-1] + step 63 if options.get("validate", True): ---> 64 np.testing.assert_array_equal(var.data, np.arange(start, stop, step)) 65 idx = pd.RangeIndex(start=start, stop=stop, step=step) 66 return cls(idx, var.dims[0]) [... skipping hidden 1 frame] File ~/miniconda3/envs/xarray_dev/lib/python3.11/contextlib.py:81, in ContextDecorator.__call__.<locals>.inner(*args, **kwds) 78 @wraps(func) 79 def inner(*args, **kwds): 80 with self._recreate_cm(): ---> 81 return func(*args, **kwds) File ~/miniconda3/envs/xarray_dev/lib/python3.11/site-packages/numpy/testing/_private/utils.py:778, in assert_array_compare(comparison, x, y, err_msg, verbose, header, precision, equal_nan, equal_inf, strict) 772 reason = f'\n(dtypes {x.dtype}, {y.dtype} mismatch)' 773 msg = build_err_msg([x, y], 774 err_msg 775 + reason, 776 verbose=verbose, header=header, 777 names=('x', 'y'), precision=precision) --> 778 raise AssertionError(msg) 780 flagged = bool_(False) 781 if isnumber(x) and isnumber(y): AssertionError: Arrays are not equal (shapes (4,), (6,) mismatch) x: array([0, 1, 2, 5]) y: array([0, 1, 2, 3, 4, 5])
1 |