diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 7883db82..906a414d 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -4,6 +4,8 @@ Release Notes Upcoming Version ---------------- +* Add documentation about `LinearExpression.where` with `drop=True`. Add `BaseExpression.variable_names` property. + **Features** *Inspect the solver after solving* @@ -52,11 +54,15 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** +* ``add_variables`` / ``add_constraints``: extends 0.7.0's coords-as-truth rule to ``lower``, ``upper`` and ``mask`` for every bound type and dim order. Pandas ``Series`` / ``DataFrame`` bounds or masks missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 `__); the variable's dimension order always follows ``coords`` (`#706 `__); bare-tuple coord entries (``coords=[(0, 1, 2)]``) now behave like lists. Mismatched values or extra dims raise ``ValueError`` with a labelled message; sparse-coord masks (formerly a v0.6.3 ``FutureWarning``, #580) raise ``ValueError``, and masks with dims not in the data raise ``ValueError`` instead of ``AssertionError``. +* ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. * ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning. +* Fix Mosek interface to inspect both the basic and IPM solutions and pick the one with the better status, so that an optimal crossover solution is not discarded when IPM terminates with a (near-)Farkas certificate. **Breaking Changes** +* ``add_variables`` / ``add_constraints``: the v0.6.3 ``mask`` deprecations (#580) are now hard ``ValueError``\ s; an unnamed ``pd.MultiIndex`` in sequence-form ``coords`` raises ``TypeError`` unless paired with ``dims=[i]``. See Bug Fixes above. * ``available_solvers`` now lists all *installed* solvers, even ones without a working license. If you used it to decide "can I actually solve with X?", switch to ``linopy.licensed_solvers`` or ``SolverClass.license_status()``. * ``Model.solver_model`` and ``Model.solver_name`` are now read-only properties that delegate to ``model.solver``. You can't reassign them (only ``= None`` is allowed, which closes the solver), and ``solver_name`` is ``None`` before the first solve. * ``result.solution.primal`` and ``result.solution.dual`` are now ``numpy`` arrays indexed by linopy's integer labels (with ``NaN`` for slots without a value), instead of pandas Series keyed by variable/constraint name. If you accessed them by name, use ``model.variables[name].solution`` (or ``model.constraints[name].dual``) instead. @@ -64,6 +70,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Internal** +* ``linopy.common.as_dataarray`` is the single broadcasting primitive; strict subset-dim / coord-value checks live in ``validate_alignment`` (via ``align_to_coords`` in ``add_variables`` / ``add_constraints``). When ``coords`` is a mapping, extra keys beyond the positional ``dims`` are broadcast in rather than dropped. * Each ``Solver`` subclass now overrides at most three hooks: ``_build_direct`` (build the native model), ``_run_direct`` (run it), and ``_run_file`` (run the solver on an LP/MPS file). File-only solvers (CBC, GLPK, CPLEX, SCIP, Knitro, COPT, MindOpt) only override ``_run_file``. * New ``ConstraintLabelIndex`` cached on ``Model.constraints`` (mirrors the existing ``Variables.label_index``); ``ConstraintBase`` gains ``active_labels()`` and a ``range`` property; ``CSRConstraint`` exposes ``coords``. * ``linopy.common`` gains ``values_to_lookup_array``; the legacy pandas-based helpers ``series_to_lookup_array`` and ``lookup_vals`` are removed. diff --git a/linopy/common.py b/linopy/common.py index e9a38d29..b0e7a75d 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -9,7 +9,7 @@ import operator import os -from collections.abc import Callable, Generator, Hashable, Iterable, Sequence +from collections.abc import Callable, Generator, Hashable, Iterable, Mapping, Sequence from functools import cached_property, partial, reduce, wraps from pathlib import Path from typing import TYPE_CHECKING, Any, Generic, TypeVar, overload @@ -23,6 +23,7 @@ from xarray import Coordinates, DataArray, Dataset, apply_ufunc, broadcast from xarray import align as xr_align from xarray.core import dtypes, indexing +from xarray.core.coordinates import CoordinateValidationError from xarray.core.types import JoinOptions, T_Alignable from xarray.namedarray.utils import is_dict_like @@ -213,30 +214,20 @@ def numpy_to_dataarray( return DataArray(arr, coords=coords, dims=dims, **kwargs) -def as_dataarray( +def _as_dataarray_lax( arr: Any, coords: CoordsLike | None = None, dims: DimsLike | None = None, **kwargs: Any, ) -> DataArray: """ - Convert an object to a DataArray. + Type-dispatched DataArray conversion without any coords validation. - Parameters - ---------- - arr: - The input object. - coords (Union[dict, list, None]): - The coordinates for the DataArray. If None, default coordinates will be used. - dims (Union[list, None]): - The dimensions for the DataArray. If None, the dimensions will be automatically generated. - **kwargs: - Additional keyword arguments to be passed to the DataArray constructor. - - Returns - ------- - DataArray: - The converted DataArray. + This is the conversion primitive used by ``as_dataarray``: it picks the + right constructor for each supported input type but does not check the + result against ``coords``. Callers that need ``coords`` to govern the + output (dim order, shared-dim values, missing-dim expansion) should use + ``as_dataarray`` instead. """ if isinstance(arr, pd.Series | pd.DataFrame): arr = pandas_to_dataarray(arr, coords=coords, dims=dims, **kwargs) @@ -275,30 +266,350 @@ def as_dataarray( return arr -def broadcast_mask(mask: DataArray, labels: DataArray) -> DataArray: +def as_dataarray( + arr: Any, + coords: CoordsLike | None = None, + dims: DimsLike | None = None, + **kwargs: Any, +) -> DataArray: """ - Broadcast a boolean mask to match the shape of labels. + Convert ``arr`` to a DataArray and broadcast it against ``coords``. + + When ``coords`` carries named dimensions, the result is aligned with + those coords: + + - positional inputs (numpy, polars, unnamed pandas, scalar) are labeled + with the coord dim names by position; + - for every dim shared between ``arr`` and ``coords``, same-values- + different-order coordinates are reindexed to ``coords`` order; + - dims present in ``coords`` but not in ``arr`` are expanded to the + ``coords`` shape; + - the result is transposed to ``coords`` order. + + Dimensions present in ``arr`` but not in ``coords`` are preserved so + standard xarray broadcasting keeps working. Disagreeing coord values + on a shared dim (i.e. value sets that are not equal as sets) are + passed through unchanged: downstream xarray alignment decides how to + combine them. To enforce that ``arr.dims`` ⊆ ``coords.dims`` and that + shared coord values match, use ``validate_alignment`` (called + automatically for ``lower``, ``upper``, and ``mask`` in + :meth:`~linopy.model.Model.add_variables` and for ``mask`` in + :meth:`~linopy.model.Model.add_constraints`). - Ensures that mask dimensions are a subset of labels dimensions, broadcasts - the mask accordingly, and fills any NaN values (from missing coordinates) - with False while emitting a FutureWarning. + Parameters + ---------- + arr + Input scalar / list / numpy / polars / pandas / DataArray. + coords + Mapping of dim name → coord values, or a sequence of ``pd.Index`` + / unnamed sequences. ``None`` falls back to xarray's default + labeling (no broadcasting). + dims + Optional dim-names hint, used for positional inputs and to bias + pandas-axis interpretation. + **kwargs + Forwarded to the underlying DataArray construction. + + Returns + ------- + DataArray + Broadcast against ``coords`` (extra dims preserved). """ - assert set(mask.dims).issubset(labels.dims), ( - "Dimensions of mask not a subset of resulting labels dimensions." + if coords is None: + return _as_dataarray_lax(arr, coords, dims, **kwargs) + + if isinstance(coords, list | tuple) and any(isinstance(c, tuple) for c in coords): + # xarray reads bare `(a, b)` as `(dim_name, values)`; normalize so a + # coords entry passed as a tuple behaves identically to a list. + coords = [list(c) if isinstance(c, tuple) else c for c in coords] + + expected = _coords_to_dict(coords, dims=dims) + if not expected: + return _as_dataarray_lax(arr, coords, dims, **kwargs) + + if isinstance(arr, pd.Series | pd.DataFrame): + converted = _named_pandas_to_dataarray(arr) + if converted is not None: + arr = converted + + if not isinstance(arr, DataArray): + # numpy/polars/unnamed-pandas inputs are positional — their only + # meaningful information is the values; any axis labels are + # auto-generated. Default dims to coords' keys so the lax conversion + # labels axes correctly (instead of dim_0/dim_1), then re-assign + # coords from expected so positional inputs align to coords by + # position. A shape mismatch surfaces here as a clear xarray + # "conflicting sizes" error rather than a confusing + # "coordinates do not match" further down. + if dims is None: + dims = list(expected) + arr = _as_dataarray_lax(arr, coords, dims=dims, **kwargs) + # Skip MultiIndex dims — re-assigning a PandasMultiIndex coord emits + # a FutureWarning and isn't needed (the lax pass already used it). + arr = arr.assign_coords( + { + d: expected[d] + for d in arr.dims + if d in expected and not isinstance(arr.indexes.get(d), pd.MultiIndex) + } + ) + + for dim, coord_values in expected.items(): + if dim not in arr.dims: + continue + if isinstance(arr.indexes.get(dim), pd.MultiIndex): + continue + expected_idx = ( + coord_values + if isinstance(coord_values, pd.Index) + else pd.Index(coord_values) + ) + actual_idx = arr.coords[dim].to_index() + if actual_idx.equals(expected_idx): + continue + # Same values, different order → reindex to match expected order. + # Different value sets are left alone: downstream xarray alignment + # (e.g. xr.align in arithmetic) handles them. Callers needing strict + # value matching (add_variables / add_constraints) should use + # ``validate_alignment`` after this call. + if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( + expected_idx + ): + arr = arr.reindex({dim: expected_idx}) + + # expand_dims prepends new dimensions and their coordinate variables; + # the subsequent transpose restores coords order. Both are no-ops when + # the array already matches. Reconstruct so the DataArray's coords + # iteration order also follows coords (a Dataset built from this picks + # up its dim order from coord insertion). + expand = {k: v for k, v in expected.items() if k not in arr.dims} + if expand: + arr = arr.expand_dims(expand) + + target_dims = tuple(d for d in expected if d in arr.dims) + tuple( + d for d in arr.dims if d not in expected ) - mask = mask.broadcast_like(labels) - if mask.isnull().any(): - warn( - "Mask contains coordinates not covered by the data dimensions. " - "Missing values will be filled with False (masked out). " - "In a future version, this will raise an error. " - "Use mask.reindex() or `linopy.align()` to explicitly handle missing " - "coordinates.", - FutureWarning, - stacklevel=3, + arr = arr.transpose(*target_dims) + + coord_order = [c for c in target_dims if c in arr.coords] + [ + c for c in arr.coords if c not in target_dims + ] + if list(arr.coords) != coord_order: + arr = DataArray( + arr.variable, + coords={c: arr.coords[c] for c in coord_order}, + name=arr.name, ) - mask = mask.fillna(False).astype(bool) - return mask + + return arr + + +def validate_alignment( + arr: DataArray, + coords: CoordsLike | None, + dims: DimsLike | None = None, + *, + label: str | None = None, +) -> None: + """ + Raise ``ValueError`` if ``arr`` is incompatible with ``coords``. + + ``arr`` is compatible with ``coords`` when both of the following hold: + + - every dim in ``arr.dims`` is also a dim in ``coords`` (no extras); + - for every dim shared between ``arr`` and ``coords``, the coord + values are equal. + + ``dims`` mirrors the ``dims`` argument of ``as_dataarray``: it names + unnamed entries in a sequence-form ``coords`` by position, so + ``coords=[[1, 2, 3]], dims=["x"]`` is enforced the same way as + ``coords={"x": [1, 2, 3]}``. + + ``label`` names the argument in error messages (e.g. ``"lower bound"``). + + No-op when ``coords`` is ``None`` or carries no named dimensions. + """ + if coords is None: + return + expected = _coords_to_dict(coords, dims=dims) + if not expected: + return + subject = label or "Value" + expected_dims = set(expected) + extra = set(arr.dims) - expected_dims + if extra: + raise ValueError( + f"{subject} has dimension(s) {sorted(extra, key=str)} not declared in coords " + f"({sorted(expected_dims, key=str)}). Add them to coords or remove them from " + f"{subject.lower()}." + ) + for dim, coord_values in expected.items(): + if dim not in arr.dims: + continue + expected_is_mi = isinstance(coord_values, pd.MultiIndex) + actual_is_mi = isinstance(arr.indexes.get(dim), pd.MultiIndex) + if expected_is_mi or actual_is_mi: + if expected_is_mi and actual_is_mi: + if not arr.indexes[dim].equals(coord_values): + raise ValueError( + f"{subject}: MultiIndex for dimension {dim!r} does not " + f"match coords." + ) + continue + expected_idx = ( + coord_values + if isinstance(coord_values, pd.Index) + else pd.Index(coord_values) + ) + actual_idx = arr.coords[dim].to_index() + if not actual_idx.equals(expected_idx): + raise ValueError( + f"{subject}: coordinate values for dimension {dim!r} do not match " + f"coords — expected {expected_idx.tolist()}, got " + f"{actual_idx.tolist()}." + ) + + +def align_to_coords( + value: Any, + coords: CoordsLike | None, + *, + label: str, + **kwargs: Any, +) -> DataArray: + """ + Convert ``value`` with :func:`as_dataarray` and enforce the coords contract. + + Used by :meth:`~linopy.model.Model.add_variables` for ``lower``, ``upper``, + and ``mask``, and by :meth:`~linopy.model.Model.add_constraints` for + ``mask``. Raises :class:`ValueError` with a message that names ``label`` + when ``value`` cannot be aligned to ``coords``. Coords-parsing errors + propagate unchanged. + """ + if coords is not None: + _coords_to_dict(coords, dims=kwargs.get("dims")) + try: + da = as_dataarray(value, coords, **kwargs) + except TypeError as err: + raise TypeError(f"{label} could not be aligned to coords: {err}") from err + except (ValueError, CoordinateValidationError) as err: + raise ValueError(f"{label} could not be aligned to coords: {err}") from err + validate_alignment(da, coords, dims=kwargs.get("dims"), label=label) + return da + + +def _coords_to_dict( + coords: Sequence[Sequence | pd.Index] | Mapping, + dims: DimsLike | None = None, +) -> dict[Hashable, Any]: + """ + Normalize coords to a dict mapping dim names to coordinate values. + + Container forms: + + - ``xarray.Coordinates`` → kept dim entries only (MultiIndex level + coords dropped). + - ``Mapping`` → returned as a shallow ``dict`` copy. + - sequence-of-entries → each entry handled per the rules below. + + Sequence-entry rules (``i`` is the position in ``coords``, ``dims[i]`` + is the matching entry in ``dims`` when one exists). An entry is + *unlabeled* if it's an unnamed ``pd.Index`` or a bare ``list`` / + ``tuple`` / ``range`` / ``ndarray``. + + +---------------------------------+-----------------------+-----------+ + | Entry | Naming source | Outcome | + +=================================+=======================+===========+ + | ``pd.Index`` with ``.name`` | ``.name`` | accepted | + +---------------------------------+-----------------------+-----------+ + | unlabeled entry | ``dims[i]`` | accepted | + +---------------------------------+-----------------------+-----------+ + | unlabeled entry | — (no ``dims[i]``) | skipped | + | | | — xarray | + | | | assigns | + | | | ``dim_0`` | + | | | etc. | + +---------------------------------+-----------------------+-----------+ + | ``pd.MultiIndex`` with ``.name``| ``.name`` | accepted | + +---------------------------------+-----------------------+-----------+ + | ``pd.MultiIndex`` w/o ``.name`` | ``dims[i]`` | accepted | + | | | (named on | + | | | a copy) | + +---------------------------------+-----------------------+-----------+ + | ``pd.MultiIndex`` w/o ``.name`` | — (no ``dims[i]``) | TypeError | + +---------------------------------+-----------------------+-----------+ + | anything else (e.g. DataArray) | — | TypeError | + +---------------------------------+-----------------------+-----------+ + """ + if isinstance(coords, Coordinates): + # Coordinates iterates over every coord variable, including + # MultiIndex level coords. Keep only the entries that are dims. + return {d: coords[d] for d in coords.dims if d in coords} + if isinstance(coords, Mapping): + return dict(coords) + dim_names: list[Any] | None = None + if dims is not None: + dim_names = list(dims) if isinstance(dims, list | tuple) else [dims] + result: dict[Hashable, Any] = {} + for i, c in enumerate(coords): + if isinstance(c, pd.MultiIndex): + name = c.name or ( + dim_names[i] if dim_names and i < len(dim_names) else None + ) + if name is None: + raise TypeError( + "MultiIndex coords entries must have .name set so " + "xarray can use it as the dimension name. Set it via " + "`idx.name = 'my_dim'`, or pass `dims=[...]` to name " + "entries by position." + ) + if c.name is None: + c = c.copy() + c.name = name + result[name] = c + elif isinstance(c, pd.Index): + name = ( + c.name + if c.name + else (dim_names[i] if dim_names and i < len(dim_names) else None) + ) + if name is not None: + result[name] = c + elif isinstance(c, list | tuple | range | np.ndarray): + if dim_names and i < len(dim_names): + result[dim_names[i]] = pd.Index(c, name=dim_names[i]) + else: + raise TypeError( + f"coords entries must be pd.Index or an unnamed sequence " + f"(list / tuple / range / numpy.ndarray); got " + f"{type(c).__name__}. For an xarray DataArray coord, pass " + f"`variable.indexes[]` (a pd.Index) instead." + ) + return result + + +def _named_pandas_to_dataarray(arr: pd.Series | pd.DataFrame) -> DataArray | None: + """ + Convert a pandas Series or DataFrame with fully named axes to a DataArray. + + Returns ``None`` if any axis (or MultiIndex level) is unnamed or + non-string, so the caller can fall back to ``as_dataarray``. + """ + names = list(arr.index.names) + if isinstance(arr, pd.DataFrame): + names += list(arr.columns.names) + if any(not isinstance(n, str) for n in names): + return None + + if isinstance(arr, pd.DataFrame): + if isinstance(arr.index, pd.MultiIndex) or isinstance( + arr.columns, pd.MultiIndex + ): + arr = arr.stack(list(range(arr.columns.nlevels)), future_stack=True) + return arr.to_xarray() + return DataArray(arr) + + return arr.to_xarray() # TODO: rename to to_pandas_dataframe diff --git a/linopy/expressions.py b/linopy/expressions.py index 2ab0b8d3..674c987c 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -1844,7 +1844,7 @@ def from_rule( cls, model: Model, rule: Callable, - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None, + coords: Sequence[Sequence | pd.Index] | Mapping | None = None, ) -> LinearExpression: """ Create a linear expression from a rule and a set of coordinates. diff --git a/linopy/model.py b/linopy/model.py index 48a8200b..e374c101 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -20,7 +20,7 @@ import pandas as pd import xarray as xr from deprecation import deprecated -from numpy import inf, ndarray +from numpy import inf from pandas.core.frame import DataFrame from pandas.core.series import Series from xarray import DataArray, Dataset @@ -28,10 +28,10 @@ from linopy import solvers from linopy.common import ( + align_to_coords, as_dataarray, assign_multiindex_safe, best_int, - broadcast_mask, maybe_replace_signs, replace_by_map, to_path, @@ -112,73 +112,6 @@ logger = logging.getLogger(__name__) -def _coords_to_dict( - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping, -) -> dict[str, Any]: - """Normalize coords to a dict mapping dim names to coordinate values.""" - if isinstance(coords, Mapping): - return dict(coords) - # Sequence of indexes - result: dict[str, Any] = {} - for c in coords: - if isinstance(c, pd.Index) and c.name: - result[c.name] = c - return result - - -def _validate_dataarray_bounds(arr: Any, coords: Any) -> Any: - """ - Validate and expand DataArray bounds against explicit coords. - - If ``arr`` is not a DataArray, return it unchanged (``as_dataarray`` - will handle conversion). For DataArray inputs: - - - Raises ``ValueError`` if the array has dimensions not in coords. - - Raises ``ValueError`` if shared dimension coordinates don't match. - - Expands missing dimensions via ``expand_dims``. - """ - if not isinstance(arr, DataArray): - return arr - - expected = _coords_to_dict(coords) - if not expected: - return arr - - extra = set(arr.dims) - set(expected) - if extra: - raise ValueError(f"DataArray has extra dimensions not in coords: {extra}") - - for dim, coord_values in expected.items(): - if dim not in arr.dims: - continue - if isinstance(arr.indexes.get(dim), pd.MultiIndex): - continue - expected_idx = ( - coord_values - if isinstance(coord_values, pd.Index) - else pd.Index(coord_values) - ) - actual_idx = arr.coords[dim].to_index() - if not actual_idx.equals(expected_idx): - # Same values, different order → reindex to match expected order - if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( - expected_idx - ): - arr = arr.reindex({dim: expected_idx}) - else: - raise ValueError( - f"Coordinates for dimension '{dim}' do not match: " - f"expected {expected_idx.tolist()}, got {actual_idx.tolist()}" - ) - - # Expand missing dimensions - expand = {k: v for k, v in expected.items() if k not in arr.dims} - if expand: - arr = arr.expand_dims(expand) - - return arr - - class Model: """ Linear optimization model. @@ -657,9 +590,9 @@ def add_variables( self, lower: Any = -inf, upper: Any = inf, - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None, + coords: Sequence[Sequence | pd.Index] | Mapping | None = None, name: str | None = None, - mask: DataArray | ndarray | Series | None = None, + mask: MaskLike | None = None, binary: bool = False, integer: bool = False, semi_continuous: bool = False, @@ -682,12 +615,27 @@ def add_variables( upper : TYPE, optional Upper bound of the variable(s). Ignored if `binary` is True. The default is inf. - coords : list/xarray.Coordinates, optional - The coords of the variable array. - These are directly passed to the DataArray creation of - `lower` and `upper`. For every single combination of - coordinates a optimization variable is added to the model. - The default is None. + coords : list/dict/xarray.Coordinates, optional + The coords of the variable array. When provided with **named + dimensions** (a ``Mapping``, ``xarray.Coordinates``, a + sequence of named ``pd.Index`` objects, or an unnamed + sequence paired with ``dims=`` in ``**kwargs``), ``coords`` + is the source of truth for the variable's dimensions, + order, and values. ``lower``, ``upper`` and ``mask`` are + aligned to this contract: + + - dims of every bound must be a subset of ``coords.dims``; + extra dims raise ``ValueError``; + - dim order in the variable always follows ``coords``; + - shared-dim coordinate values must equal ``coords``; same + values in a different order are auto-reindexed, different + value sets raise ``ValueError``; + - dims listed in ``coords`` but missing from a bound are + broadcast to ``coords`` shape. + + One optimization variable is added per combination of + coordinates. The default is ``None``, in which case the + shape is inferred from the bounds. name : str, optional Reference name of the added variables. The default None results in a name like "var1", "var2" etc. @@ -740,6 +688,67 @@ def add_variables( [7]: x[7] ∈ [0, inf] [8]: x[8] ∈ [0, inf] [9]: x[9] ∈ [0, inf] + + Strict coords-as-truth: a bound with an extra dim raises. + + >>> import xarray as xr + >>> m = Model() + >>> bad = xr.DataArray( + ... [[1.0, 2.0, 3.0]] * 2, + ... dims=["extra", "x"], + ... coords={"x": [0, 1, 2]}, + ... ) + >>> m.add_variables(lower=bad, coords=[pd.Index([0, 1, 2], name="x")], name="v") + Traceback (most recent call last): + ... + ValueError: lower bound has dimension(s) ['extra'] not declared in coords ... + + Strict coords-as-truth: a bound whose shared-dim values don't + match raises. + + >>> m = Model() + >>> wrong = xr.DataArray( + ... [1.0, 2.0, 3.0], dims=["x"], coords={"x": [10, 20, 30]} + ... ) + >>> m.add_variables( + ... lower=wrong, coords=[pd.Index([0, 1, 2], name="x")], name="v" + ... ) + Traceback (most recent call last): + ... + ValueError: lower bound: coordinate values for dimension 'x' do not match coords ... + + Strict coords-as-truth, helpful side: a bound whose coord values + match ``coords`` only in a different order is auto-reindexed. + + >>> m = Model() + >>> reordered = xr.DataArray( + ... [3.0, 1.0, 2.0], dims=["x"], coords={"x": ["c", "a", "b"]} + ... ) + >>> v = m.add_variables( + ... lower=reordered, + ... coords=[pd.Index(["a", "b", "c"], name="x")], + ... name="r", + ... ) + >>> v.lower.values.tolist() + [1.0, 2.0, 3.0] + + Unnamed-coords sequence + ``dims=`` opts into the same strict + enforcement as a named index — extra dims still raise. + + >>> m = Model() + >>> m.add_variables(lower=bad, coords=[[0, 1, 2]], dims=["x"], name="w") + Traceback (most recent call last): + ... + ValueError: lower bound has dimension(s) ['extra'] not declared in coords ... + + The same strict contract applies to ``mask`` (including with + ``coords=[[...]], dims=[...]``). + + >>> m = Model() + >>> m.add_variables(mask=bad, coords=[[0, 1, 2]], dims=["x"], name="wm") + Traceback (most recent call last): + ... + ValueError: mask has dimension(s) ['extra'] not declared in coords ... """ if name is None: name = f"var{self._varnameCounter}" @@ -765,14 +774,12 @@ def add_variables( "Semi-continuous variables require a positive scalar lower bound." ) - if coords is not None: - lower = _validate_dataarray_bounds(lower, coords) - upper = _validate_dataarray_bounds(upper, coords) - + lower_da = align_to_coords(lower, coords, label="lower bound", **kwargs) + upper_da = align_to_coords(upper, coords, label="upper bound", **kwargs) data = Dataset( { - "lower": as_dataarray(lower, coords, **kwargs), - "upper": as_dataarray(upper, coords, **kwargs), + "lower": lower_da, + "upper": upper_da, "labels": -1, } ) @@ -781,8 +788,12 @@ def add_variables( self._check_valid_dim_names(data) if mask is not None: - mask = as_dataarray(mask, coords=data.coords, dims=data.dims).astype(bool) - mask = broadcast_mask(mask, data.labels) + mask = align_to_coords( + mask, + coords if coords is not None else data.coords, + label="mask", + **kwargs, + ).astype(bool) # Auto-mask based on NaN in bounds (use numpy for speed) if self.auto_mask: @@ -891,7 +902,7 @@ def add_constraints( sign: SignLike | None = ..., rhs: ConstantLike | VariableLike | ExpressionLike | None = ..., name: str | None = ..., - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = ..., + coords: Sequence[Sequence | pd.Index] | Mapping | None = ..., mask: MaskLike | None = ..., freeze: Literal[False] = ..., ) -> Constraint: ... @@ -907,7 +918,7 @@ def add_constraints( sign: SignLike | None = ..., rhs: ConstantLike | VariableLike | ExpressionLike | None = ..., name: str | None = ..., - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = ..., + coords: Sequence[Sequence | pd.Index] | Mapping | None = ..., mask: MaskLike | None = ..., freeze: Literal[True] = ..., ) -> CSRConstraint: ... @@ -922,7 +933,7 @@ def add_constraints( sign: SignLike | None = None, rhs: ConstantLike | VariableLike | ExpressionLike | None = None, name: str | None = None, - coords: Sequence[Sequence | pd.Index | DataArray] | Mapping | None = None, + coords: Sequence[Sequence | pd.Index] | Mapping | None = None, mask: MaskLike | None = None, freeze: bool | None = None, ) -> ConstraintBase: @@ -1046,8 +1057,7 @@ def add_constraints( (data,) = xr.broadcast(data, exclude=[TERM_DIM]) if mask is not None: - mask = as_dataarray(mask, coords=data.coords, dims=data.dims).astype(bool) - mask = broadcast_mask(mask, data.labels) + mask = align_to_coords(mask, data.coords, label="mask").astype(bool) # Auto-mask based on null expressions or NaN RHS (use numpy for speed) if self.auto_mask: @@ -1428,7 +1438,7 @@ def calculate_block_maps(self) -> None: @overload def linexpr( - self, *args: Sequence[Sequence | pd.Index | DataArray] | Mapping + self, *args: Sequence[Sequence | pd.Index] | Mapping ) -> LinearExpression: ... @overload @@ -1441,7 +1451,7 @@ def linexpr( *args: tuple[ConstantLike, str | Variable | ScalarVariable] | ConstantLike | Callable - | Sequence[Sequence | pd.Index | DataArray] + | Sequence[Sequence | pd.Index] | Mapping, ) -> LinearExpression: """ diff --git a/linopy/piecewise.py b/linopy/piecewise.py index ccc265a7..25a0ce17 100644 --- a/linopy/piecewise.py +++ b/linopy/piecewise.py @@ -1006,20 +1006,18 @@ def _broadcast_points( lin_exprs = [_to_linexpr(e) for e in exprs] - target_dims: set[str] = set() - for le in lin_exprs: - target_dims.update(str(d) for d in le.coord_dims) - - missing = target_dims - skip - {str(d) for d in points.dims} - if not missing: - return points + point_dims = {str(d) for d in points.dims} + # Iterate exprs/dims in order; a set would give a hash-dependent, + # run-varying expanded dimension order. expand_map: dict[str, list] = {} - for d in missing: - for le in lin_exprs: + for le in lin_exprs: + for dim in le.coord_dims: + d = str(dim) + if d in skip or d in point_dims or d in expand_map: + continue if d in le.coords: - expand_map[str(d)] = list(le.coords[d].values) - break + expand_map[d] = list(le.coords[d].values) if expand_map: points = points.expand_dims(expand_map) diff --git a/linopy/sos_reformulation.py b/linopy/sos_reformulation.py index 1f17ee92..4abfb755 100644 --- a/linopy/sos_reformulation.py +++ b/linopy/sos_reformulation.py @@ -119,7 +119,7 @@ def reformulate_sos1( upper_name = f"{prefix}{name}_upper" card_name = f"{prefix}{name}_card" - coords = [var.coords[d] for d in var.dims] + coords = [var.indexes[d] for d in var.dims] y = model.add_variables(coords=coords, name=y_name, binary=True) model.add_constraints(var <= M * y, name=upper_name) @@ -173,9 +173,9 @@ def reformulate_sos2( card_name = f"{prefix}{name}_card" z_coords = [ - pd.Index(var.coords[sos_dim].values[:-1], name=sos_dim) + pd.Index(var.indexes[sos_dim][:-1], name=sos_dim) if d == sos_dim - else var.coords[d] + else var.indexes[d] for d in var.dims ] z = model.add_variables(coords=z_coords, name=z_name, binary=True) diff --git a/test/test_common.py b/test/test_common.py index 0c379a0b..4cfb6e46 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -6,6 +6,7 @@ """ from collections.abc import Callable +from typing import Any import numpy as np import pandas as pd @@ -18,6 +19,7 @@ from linopy import LinearExpression, Model, Variable from linopy.common import ( align, + align_to_coords, as_dataarray, assign_multiindex_safe, best_int, @@ -25,6 +27,7 @@ is_constant, iterate_slices, maybe_group_terms_polars, + validate_alignment, ) from linopy.testing import assert_linequal, assert_varequal from linopy.types import CoordsLike @@ -345,13 +348,15 @@ def test_as_dataarray_with_ndarray_coords_dict_dims_aligned() -> None: def test_as_dataarray_with_ndarray_coords_dict_set_dims_not_aligned() -> None: + """Coords is source of truth: extra coord entries broadcast into the result.""" target_dims = ("dim_0", "dim_1") target_coords = {"dim_0": ["a", "b"], "dim_2": ["A", "B"]} arr = np.array([[1, 2], [3, 4]]) da = as_dataarray(arr, coords=target_coords, dims=target_dims) - assert da.dims == target_dims + # dims labels the positional axes; coords adds dim_2 by broadcast. + assert set(da.dims) == {"dim_0", "dim_1", "dim_2"} assert list(da.coords["dim_0"].values) == ["a", "b"] - assert "dim_2" not in da.coords + assert list(da.coords["dim_2"].values) == ["A", "B"] def test_as_dataarray_with_number() -> None: @@ -483,6 +488,197 @@ def test_as_dataarray_with_unsupported_type() -> None: as_dataarray(lambda x: 1, dims=["dim1"], coords=[["a"]]) +def test_as_dataarray_preserves_extra_dims_for_broadcasting() -> None: + """Extra dims in the input are not rejected — they broadcast downstream.""" + arr = DataArray( + [[1, 2], [3, 4], [5, 6]], + dims=["a", "t"], + coords={"a": [0, 1, 2], "t": [10, 20]}, + ) + coords = {"a": [0, 1, 2]} + da = as_dataarray(arr, coords=coords) + assert set(da.dims) == {"a", "t"} + assert list(da.coords["t"].values) == [10, 20] + + +def test_as_dataarray_keeps_disjoint_shared_dim_values() -> None: + """Different value sets on a shared dim are passed through (xr.align handles).""" + arr = DataArray([1, 2, 3, 4, 5], dims=["a"], coords={"a": [0, 1, 2, 3, 4]}) + coords = {"a": [2, 3]} + da = as_dataarray(arr, coords=coords) + # No exception, no reindex; downstream alignment intersects. + assert list(da.coords["a"].values) == [0, 1, 2, 3, 4] + + +def test_validate_alignment_rejects_extra_dims() -> None: + arr = DataArray( + [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} + ) + with pytest.raises(ValueError, match=r"not declared in coords"): + validate_alignment(arr, {"a": [0, 1]}) + + +def test_validate_alignment_rejects_value_mismatch() -> None: + arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) + with pytest.raises(ValueError, match="do not match coords"): + validate_alignment(arr, {"a": [10, 20, 30]}) + + +def test_validate_alignment_allows_subset_dims() -> None: + """arr.dims ⊂ coords.dims is fine (broadcasting fills the missing dim).""" + arr = DataArray([1, 2, 3], dims=["a"], coords={"a": [0, 1, 2]}) + validate_alignment(arr, {"a": [0, 1, 2], "b": [10, 20]}) # no raise + + +def test_validate_alignment_unnamed_coords_and_dims() -> None: + """coords=[[...]], dims=[...] enforces the same contract as a named mapping.""" + arr = DataArray([1, 2, 3], dims=["x"], coords={"x": [0, 1, 2]}) + validate_alignment(arr, [[0, 1, 2]], dims=["x"]) # no raise + + bad = DataArray( + [[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [0, 1], "y": [0, 1]} + ) + with pytest.raises(ValueError, match=r"not declared in coords"): + validate_alignment(bad, [[0, 1]], dims=["x"]) + + +def test_validate_alignment_label_in_error() -> None: + arr = DataArray( + [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} + ) + with pytest.raises(ValueError, match=r"lower bound has dimension\(s\) \['b'\]"): + validate_alignment(arr, {"a": [0, 1]}, label="lower bound") + + +def test_align_to_coords_wraps_conversion_errors() -> None: + with pytest.raises(ValueError, match=r"lower bound could not be aligned"): + align_to_coords(np.array([1, 2]), {"x": [0, 1, 2]}, label="lower bound") + + +def test_align_to_coords_preserves_type_errors() -> None: + """Unsupported input types stay TypeError (don't become ValueError).""" + with pytest.raises(TypeError, match=r"lower bound could not be aligned"): + align_to_coords(lambda x: x, {"x": [0, 1, 2]}, label="lower bound") + + +def test_align_to_coords_does_not_relabel_coords_errors() -> None: + """Coords-side TypeError carries its own message, not the value label.""" + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + with pytest.raises(TypeError, match=r"MultiIndex.*must have \.name set"): + align_to_coords(np.array([1, 2, 3, 4]), [mi], label="lower bound") + + +class TestCoordsToDictRules: + """ + One test per row of the ``_coords_to_dict`` rules table. + + Each test name states the rule it pins; the assertions show the + expected outcome. Together they form the executable spec of how + sequence-form ``coords`` entries are named. + """ + + @staticmethod + def _parse(coords: Any, dims: Any = None) -> dict: + from linopy.common import _coords_to_dict + + return _coords_to_dict(coords, dims=dims) + + # -- container forms --------------------------------------------------- + + def test_mapping_is_returned_as_shallow_dict_copy(self) -> None: + src = {"x": [0, 1, 2], "y": [10, 20]} + result = self._parse(src) + assert result == src + assert result is not src + + def test_xarray_coordinates_keeps_only_dim_entries(self) -> None: + midx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + coords = xr.Coordinates.from_pandas_multiindex(midx, "stacked") + result = self._parse(coords) + assert set(result) == {"stacked"} + + # -- pd.Index entries -------------------------------------------------- + + def test_named_pd_index_uses_its_name(self) -> None: + result = self._parse([pd.Index([0, 1, 2], name="x")]) + assert set(result) == {"x"} + + def test_unnamed_pd_index_with_dims_uses_dims(self) -> None: + result = self._parse([pd.Index([0, 1, 2])], dims=["x"]) + assert set(result) == {"x"} + + def test_unnamed_pd_index_without_dims_is_size_only(self) -> None: + # Same as a bare sequence: contributes no dim name; xarray assigns + # ``dim_0`` downstream. + assert self._parse([pd.Index([0, 1, 2])]) == {} + m = Model() + v = m.add_variables(coords=[pd.Index([0, 1, 2])]) + assert v.dims == ("dim_0",) + + # -- pd.MultiIndex entries -------------------------------------------- + + def test_named_multiindex_uses_its_name(self) -> None: + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + mi.name = "multi" + result = self._parse([mi]) + assert set(result) == {"multi"} + + def test_unnamed_multiindex_with_dims_uses_dims(self) -> None: + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + result = self._parse([mi], dims=["multi"]) + assert set(result) == {"multi"} + assert result["multi"].name == "multi" + assert mi.name is None # caller's MultiIndex not mutated + + def test_unnamed_multiindex_without_dims_raises(self) -> None: + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) + with pytest.raises(TypeError, match=r"MultiIndex.*must have \.name set"): + self._parse([mi]) + + # -- bare sequence entries -------------------------------------------- + + @pytest.mark.parametrize( + "entry", + [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], + ids=["list", "tuple", "range", "ndarray"], + ) + def test_bare_sequence_with_dims_uses_dims(self, entry: Any) -> None: + result = self._parse([entry], dims=["x"]) + assert set(result) == {"x"} + + @pytest.mark.parametrize( + "entry", + [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], + ids=["list", "tuple", "range", "ndarray"], + ) + def test_bare_sequence_without_dims_is_silently_skipped(self, entry: Any) -> None: + assert self._parse([entry]) == {} + + @pytest.mark.parametrize( + "entry", + [[0, 1, 2], (0, 1, 2), range(3), np.array([0, 1, 2])], + ids=["list", "tuple", "range", "ndarray"], + ) + def test_bare_sequence_without_dims_falls_through_to_xarray_dim_0( + self, entry: Any + ) -> None: + m = Model() + v = m.add_variables(coords=[entry]) + assert v.dims == ("dim_0",) + + # -- unsupported entries ---------------------------------------------- + + def test_dataarray_entry_raises(self) -> None: + with pytest.raises(TypeError, match=r"coords entries must be pd\.Index"): + self._parse([DataArray([0, 1, 2], dims=["x"])]) + + def test_unknown_type_entry_raises(self) -> None: + class Foo: ... + + with pytest.raises(TypeError, match=r"coords entries must be pd\.Index"): + self._parse([Foo()]) + + def test_best_int() -> None: # Test for int8 assert best_int(127) == np.int8 diff --git a/test/test_constraints.py b/test/test_constraints.py index 1667bfec..acc41b2e 100644 --- a/test/test_constraints.py +++ b/test/test_constraints.py @@ -258,20 +258,29 @@ def test_masked_constraints_broadcast() -> None: assert (m.constraints.labels.bc2[:, 0:5] != -1).all() assert (m.constraints.labels.bc2[:, 5:10] == -1).all() + # Pandas Series with named index missing a dim is broadcast to data.coords. + mask_pd = pd.Series( + [True, False, True] + [False] * 7, index=pd.RangeIndex(10, name="dim_0") + ) + m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc_pd", mask=mask_pd) + assert (m.constraints.labels.bc_pd[[0, 2], :] != -1).all() + assert (m.constraints.labels.bc_pd[[1, 3, 4, 5, 6, 7, 8, 9], :] == -1).all() + + # Mask with sparse coords (subset of data's coords) now raises instead of + # emitting a FutureWarning — the rule from the bounds path applies here too. mask3 = xr.DataArray( [True, True, False, False, False], dims=["dim_0"], coords={"dim_0": range(5)}, ) - with pytest.warns(FutureWarning, match="Missing values will be filled"): + with pytest.raises( + ValueError, match=r"mask: coordinate values for dimension 'dim_0'" + ): m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc3", mask=mask3) - assert (m.constraints.labels.bc3[0:2, :] != -1).all() - assert (m.constraints.labels.bc3[2:5, :] == -1).all() - assert (m.constraints.labels.bc3[5:10, :] == -1).all() # Mask with extra dimension not in data should raise mask4 = xr.DataArray([True, False], dims=["extra_dim"]) - with pytest.raises(AssertionError, match="not a subset"): + with pytest.raises(ValueError, match=r"mask has dimension\(s\) \['extra_dim'\]"): m.add_constraints(1 * x + 10 * y, EQUAL, 0, name="bc4", mask=mask4) diff --git a/test/test_piecewise_constraints.py b/test/test_piecewise_constraints.py index c44af394..72b57265 100644 --- a/test/test_piecewise_constraints.py +++ b/test/test_piecewise_constraints.py @@ -1383,6 +1383,23 @@ def test_broadcast_over_extra_dims(self) -> None: assert "generator" in delta.dims assert "time" in delta.dims + def test_broadcast_points_dim_order_follows_exprs(self) -> None: + """Expanded dims follow the expression dim order, not set ordering.""" + import xarray as xr + + from linopy.piecewise import BREAKPOINT_DIM, _broadcast_points + + m = Model() + coords = [ + pd.Index(["v0", "v1"], name="alpha"), + pd.Index(["w0", "w1"], name="beta"), + pd.Index([0, 1], name="gamma"), + ] + x = m.add_variables(coords=coords, name="x") + points = xr.DataArray([0, 1, 2, 3], dims=[BREAKPOINT_DIM]) + out = _broadcast_points(points, 1 * x) + assert out.dims == ("alpha", "beta", "gamma", BREAKPOINT_DIM) + # =========================================================================== # NaN masking diff --git a/test/test_repr.py b/test/test_repr.py index 0b8a6a6b..ebe9804c 100644 --- a/test/test_repr.py +++ b/test/test_repr.py @@ -40,6 +40,7 @@ multiindex = pd.MultiIndex.from_product( [list("asdfhjkg"), list("asdfghj")], names=["level_0", "level_1"] ) +multiindex.name = "multi" g = m.add_variables(coords=[multiindex], name="g") # create linear expression for each variable diff --git a/test/test_variable.py b/test/test_variable.py index b14b746e..4b89d318 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -419,42 +419,189 @@ def test_bound_types_with_coords( ) def test_dataarray_coord_mismatch(self, model: "Model", coords: Any) -> None: lower = DataArray([0, 0, 0], dims=["x"], coords={"x": [0, 1, 2]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match="lower bound.*do not match coords"): model.add_variables(lower=lower, coords=coords, name="x") def test_dataarray_coord_mismatch_upper(self, model: "Model") -> None: upper = DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match="upper bound.*do not match coords"): model.add_variables(upper=upper, coords=self.SEQ_COORDS, name="x") def test_dataarray_extra_dims(self, model: "Model") -> None: - lower = DataArray([[1, 2], [3, 4]], dims=["x", "y"]) - with pytest.raises(ValueError, match="extra dimensions"): + lower = DataArray( + [[1, 2], [3, 4], [5, 6]], dims=["x", "y"], coords={"x": [0, 1, 2]} + ) + with pytest.raises(ValueError, match=r"lower bound has dimension\(s\) \['y'\]"): model.add_variables(lower=lower, coords=self.DICT_COORDS, name="x") + def test_mask_extra_dims_with_unnamed_coords_and_dims(self, model: "Model") -> None: + """Mask is validated against coords + dims= like lower/upper.""" + mask = DataArray( + [[True, False], [True, False], [False, True]], + dims=["x", "extra"], + coords={"x": [0, 1, 2]}, + ) + with pytest.raises(ValueError, match=r"mask has dimension\(s\) \['extra'\]"): + model.add_variables( + mask=mask, + coords=[[0, 1, 2]], + dims=["x"], + name="m", + ) + + def test_dataarray_coord_reorder(self, model: "Model") -> None: + """A bound whose coords differ only in order is reindexed to coords.""" + lower = DataArray([3, 1, 2], dims=["x"], coords={"x": ["c", "a", "b"]}) + var = model.add_variables( + lower=lower, coords=[pd.Index(["a", "b", "c"], name="x")], name="x" + ) + assert (var.data.lower == [1, 2, 3]).all() + + def test_positional_bound_aligns_to_coords(self, model: "Model") -> None: + """ + Numpy / unnamed-pandas bounds align to coords positionally, + even when the input's auto-generated coord values would not match. + """ + coords = [pd.Index(list("abc"), name="x")] + # numpy array — no labels at all, positional alignment. + v_np = model.add_variables(upper=np.array([1, 2, 3]), coords=coords, name="np") + assert v_np.dims == ("x",) + assert (v_np.data.upper.sel(x="a") == 1).all() + assert (v_np.data.upper.sel(x="c") == 3).all() + # Unnamed Series — pandas index is auto-generated, ignored in favour + # of coords (positional alignment, principle: coords is source of truth). + v_s = model.add_variables( + upper=pd.Series([10, 20, 30]), coords=coords, name="s" + ) + assert v_s.dims == ("x",) + assert (v_s.data.upper.sel(x="a") == 10).all() + assert (v_s.data.upper.sel(x="c") == 30).all() + # Unnamed DataFrame — both axes positional. + v_df = model.add_variables( + upper=pd.DataFrame([[1, 2], [3, 4], [5, 6]]), + coords=[pd.Index(list("abc"), name="x"), pd.Index(list("xy"), name="y")], + name="df", + ) + assert v_df.dims == ("x", "y") + assert (v_df.data.upper.sel(x="a", y="x") == 1).all() + assert (v_df.data.upper.sel(x="c", y="y") == 6).all() + + def test_positional_bound_wrong_size_raises_clear_error( + self, model: "Model" + ) -> None: + """ + Shape mismatch on positional inputs surfaces as a size error, + not a 'coordinates do not match' error. + """ + coords = [pd.Index(list("abc"), name="x")] + with pytest.raises(ValueError, match=r"upper bound could not be aligned"): + model.add_variables(upper=np.array([1, 2]), coords=coords, name="np_bad") + with pytest.raises(ValueError, match=r"upper bound could not be aligned"): + model.add_variables(upper=pd.Series([1, 2]), coords=coords, name="s_bad") + + def test_unnamed_pd_index_is_size_only(self, model: "Model") -> None: + bound = DataArray([1, 2, 3], dims=["dim_0"]) + var = model.add_variables(upper=bound, coords=[pd.Index([0, 1, 2])], name="x") + assert (var.upper == [1, 2, 3]).all() + # -- Broadcasting missing dims ----------------------------------------- - def test_dataarray_broadcast_missing_dim(self, model: "Model") -> None: + @pytest.mark.parametrize( + "bound", + [ + pytest.param( + DataArray([1, 2, 3], dims=["time"], coords={"time": range(3)}), + id="DataArray", + ), + pytest.param( + pd.Series(index=pd.RangeIndex(3, name="time"), data=[1, 2, 3]), + id="Series", + ), + pytest.param( + pd.DataFrame( + index=pd.RangeIndex(3, name="time"), + columns=pd.Index(["red"], name="colour"), + data=[[1], [2], [3]], + ), + id="DataFrame", + ), + pytest.param( + pd.Series( + index=pd.MultiIndex.from_product( + [pd.RangeIndex(3), ["red"]], names=("time", "colour") + ), + data=[1, 2, 3], + ), + id="Series-multiindex", + ), + pytest.param( + pd.DataFrame( + index=pd.RangeIndex(3, name="time"), + columns=pd.MultiIndex.from_product( + [["a", "b"], ["red"]], names=("space", "colour") + ), + data=[[1, 1], [2, 2], [3, 3]], + ), + id="DataFrame-multicolumns", + ), + pytest.param( + pd.DataFrame( + index=pd.MultiIndex.from_product( + [pd.RangeIndex(3), ["a", "b"]], names=("time", "space") + ), + columns=pd.Index(["red"], name="colour"), + data=[[1], [1], [2], [2], [3], [3]], + ), + id="DataFrame-multiindex", + ), + ], + ) + def test_bound_broadcast_missing_dim( + self, model: "Model", bound: DataArray | pd.Series | pd.DataFrame + ) -> None: + """Pandas / DataArray bounds missing dims are broadcast to coords.""" time = pd.RangeIndex(3, name="time") space = pd.Index(["a", "b"], name="space") - lower = DataArray([1, 2, 3], dims=["time"], coords={"time": range(3)}) - var = model.add_variables(lower=lower, coords=[time, space], name="x") - assert set(var.data.dims) == {"time", "space"} - assert var.data.sizes == {"time": 3, "space": 2} - # Verify broadcast filled with actual values, not NaN + colour = pd.Index(["red"], name="colour") + var = model.add_variables( + lower=-bound, upper=bound, coords=[time, space, colour], name="x" + ) + assert var.dims == ("time", "space", "colour") + assert var.data.lower.dims == ("time", "space", "colour") + assert var.data.upper.dims == ("time", "space", "colour") + assert var.data.sizes == {"time": 3, "space": 2, "colour": 1} assert not var.data.lower.isnull().any() - assert (var.data.lower.sel(space="a") == [1, 2, 3]).all() - assert (var.data.lower.sel(space="b") == [1, 2, 3]).all() - - # -- Special coord formats --------------------------------------------- + assert (var.data.lower.sel(space="a", colour="red") == [-1, -2, -3]).all() + assert (var.data.lower.sel(space="b", colour="red") == [-1, -2, -3]).all() + assert (var.data.upper.sel(space="a", colour="red") == [1, 2, 3]).all() - def test_multiindex_coords(self, model: "Model") -> None: - idx = pd.MultiIndex.from_product( - [[1, 2], ["a", "b"]], names=("level1", "level2") + @pytest.mark.parametrize( + "lower, upper", + [ + pytest.param(0, "da", id="scalar-lower+da-upper"), + pytest.param("da", 1, id="da-lower+scalar-upper"), + pytest.param("da", "da", id="da-lower+da-upper"), + ], + ) + def test_dataarray_broadcast_missing_dim_order( + self, model: "Model", lower: Any, upper: Any + ) -> None: + """Dimension order follows coords, not the type of the bounds (#706).""" + x = pd.Index(["a", "b", "c"], name="x") + y = pd.Index(["X", "Y"], name="y") + full = DataArray( + np.arange(6).reshape(3, 2), coords={"x": x, "y": y}, dims=["x", "y"] ) - idx.name = "multi" - var = model.add_variables(lower=0, upper=1, coords=[idx], name="x") - assert var.shape == (4,) + # bounds are DataArrays missing the 'y' dimension + da = full.sum("y") + lower = da if lower == "da" else lower + upper = da if upper == "da" else upper + var = model.add_variables(lower=lower, upper=upper, coords=[x, y], name="x") + assert var.dims == ("x", "y") + assert var.data.lower.dims == ("x", "y") + assert var.data.upper.dims == ("x", "y") + + # -- Special coord formats --------------------------------------------- def test_xarray_coordinates_object(self, model: "Model") -> None: time = pd.RangeIndex(3, name="time") @@ -527,7 +674,7 @@ def test_one_dataarray_mismatches_other_ok(self, model: "Model") -> None: """Only the mismatched bound should raise, regardless of the other.""" lower = DataArray([0, 0, 0], dims=["x"], coords={"x": [0, 1, 2]}) upper = DataArray([1, 1], dims=["x"], coords={"x": [10, 20]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match=r"upper bound.*do not match coords"): model.add_variables( lower=lower, upper=upper, coords=self.SEQ_COORDS, name="x" ) @@ -629,7 +776,7 @@ def test_reordered_coords_reindexed(self, model: "Model") -> None: def test_reordered_coords_different_values_raises(self, model: "Model") -> None: """Overlapping but not identical coord sets must still raise.""" lower = DataArray([10, 20], dims=["x"], coords={"x": ["a", "b"]}) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match=r"lower bound.*do not match coords"): model.add_variables(lower=lower, coords={"x": ["a", "c"]}, name="x") # -- String and datetime coordinates ----------------------------------- @@ -657,9 +804,60 @@ def test_string_coords_mismatch(self, model: "Model") -> None: lower = DataArray( [0, 0], dims=["region"], coords={"region": ["north", "south"]} ) - with pytest.raises(ValueError, match="do not match"): + with pytest.raises(ValueError, match=r"lower bound.*do not match coords"): model.add_variables( lower=lower, coords={"region": ["north", "south", "east"]}, name="x", ) + + +class TestAddVariablesMultiIndexCoords: + """MultiIndex-specific coord handling in add_variables.""" + + @pytest.fixture + def model(self) -> "Model": + return Model() + + @pytest.fixture + def midx(self) -> pd.MultiIndex: + mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=("l1", "l2")) + mi.name = "multi" + return mi + + def test_scalar_bounds(self, model: "Model", midx: pd.MultiIndex) -> None: + var = model.add_variables(lower=0, upper=1, coords=[midx], name="x") + assert var.shape == (4,) + assert var.dims == ("multi",) + + def test_dataarray_bound(self, model: "Model", midx: pd.MultiIndex) -> None: + bound = DataArray([1, 2, 3, 4], dims=["multi"], coords={"multi": midx}) + var = model.add_variables(upper=bound, coords=[midx], name="x") + assert var.shape == (4,) + assert (var.data.upper == [1, 2, 3, 4]).all() + + def test_dataarray_bound_broadcast( + self, model: "Model", midx: pd.MultiIndex + ) -> None: + time = pd.Index([10, 20, 30], name="time") + bound = DataArray([1, 2, 3, 4], dims=["multi"], coords={"multi": midx}) + var = model.add_variables( + lower=-bound, upper=bound, coords=[midx, time], name="x" + ) + assert var.dims == ("multi", "time") + assert var.shape == (4, 3) + assert (var.data.upper.sel(time=10) == [1, 2, 3, 4]).all() + + def test_without_name_raises(self, model: "Model") -> None: + midx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=("l1", "l2")) + with pytest.raises(TypeError, match="MultiIndex.*must have .name set"): + model.add_variables(lower=0, upper=1, coords=[midx], name="x") + + def test_mismatched_multiindex_raises( + self, model: "Model", midx: pd.MultiIndex + ) -> None: + other = pd.MultiIndex.from_product([[0, 1], ["x", "y"]], names=("l1", "l2")) + other.name = "multi" + bound = DataArray([1, 2, 3, 4], dims=["multi"], coords={"multi": other}) + with pytest.raises(ValueError, match="MultiIndex.*does not match"): + model.add_variables(upper=bound, coords=[midx], name="x") diff --git a/test/test_variables.py b/test/test_variables.py index 37de6aff..e55ca680 100644 --- a/test/test_variables.py +++ b/test/test_variables.py @@ -123,20 +123,29 @@ def test_variables_mask_broadcast() -> None: assert (y.labels[:, 0:5] != -1).all() assert (y.labels[:, 5:10] == -1).all() + # Pandas Series with named index missing a dim is broadcast to data.coords. + mask_pd = pd.Series( + [True, False, True] + [False] * 7, index=pd.RangeIndex(10, name="dim_0") + ) + v = m.add_variables(lower, upper, name="v", mask=mask_pd) + assert (v.labels[[0, 2], :] != -1).all() + assert (v.labels[[1, 3, 4, 5, 6, 7, 8, 9], :] == -1).all() + + # Mask with sparse coords (subset of data's coords) now raises instead of + # emitting a FutureWarning — the rule from the bounds path applies here too. mask3 = xr.DataArray( [True, True, False, False, False], dims=["dim_0"], coords={"dim_0": range(5)}, ) - with pytest.warns(FutureWarning, match="Missing values will be filled"): - z = m.add_variables(lower, upper, name="z", mask=mask3) - assert (z.labels[0:2, :] != -1).all() - assert (z.labels[2:5, :] == -1).all() - assert (z.labels[5:10, :] == -1).all() + with pytest.raises( + ValueError, match=r"mask: coordinate values for dimension 'dim_0'" + ): + m.add_variables(lower, upper, name="z", mask=mask3) # Mask with extra dimension not in data should raise mask4 = xr.DataArray([True, False], dims=["extra_dim"]) - with pytest.raises(AssertionError, match="not a subset"): + with pytest.raises(ValueError, match=r"mask has dimension\(s\) \['extra_dim'\]"): m.add_variables(lower, upper, name="w", mask=mask4)