From 523b5e4d161d5830f7386a02362a605504dc03d2 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 13 Jun 2026 05:58:03 +0200 Subject: [PATCH 01/24] Preliminary plotting capability for b2view --- pyproject.toml | 2 + src/blosc2/b2view/app.py | 108 +++++++++++++++++++++++++++++++++++- src/blosc2/b2view/model.py | 49 ++++++++++++++++ tests/b2view/test_basics.py | 41 ++++++++++++++ tests/test_b2view_model.py | 53 ++++++++++++++++++ todo/b2view.md | 16 ++++++ 6 files changed, 268 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 237ce828..6266545f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,8 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html" [project.optional-dependencies] parquet = ["pyarrow"] +# in-terminal plots for the 'p' key in b2view +plot = ["textual-plotext"] [project.scripts] parquet-to-blosc2 = "blosc2.cli.parquet_to_blosc2:main" diff --git a/src/blosc2/b2view/app.py b/src/blosc2/b2view/app.py index 7e5557f0..5f75966a 100644 --- a/src/blosc2/b2view/app.py +++ b/src/blosc2/b2view/app.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any, ClassVar +import numpy as np from rich.markup import escape as markup_escape from textual.app import App, ComposeResult from textual.binding import Binding @@ -11,6 +12,11 @@ from textual.screen import ModalScreen from textual.widgets import DataTable, Footer, Header, Input, Static, Tree +try: + from textual_plotext import PlotextPlot +except ImportError: # plotting is optional + PlotextPlot = None + from blosc2.b2view.model import DataSliceLayout, StoreBrowser from blosc2.b2view.render import ( column_float_decimals, @@ -212,6 +218,7 @@ class HelpScreen(ModalScreen[None]): ("s / e (home / end)", "first / last column window"), ("c", "go to column index or name..."), ("/", "filter visible columns by substring (CTable)"), + ("p", "plot a whole-column overview (needs textual-plotext)"), ], ), ( @@ -425,6 +432,55 @@ def action_cancel(self) -> None: self.dismiss(None) +class PlotScreen(ModalScreen[None]): + """Modal plotting one numeric column of the data grid (textual-plotext).""" + + CSS = """ + PlotScreen { + align: center middle; + } + #plot-dialog { + width: 90%; + height: 80%; + border: thick $accent; + background: $surface; + padding: 1 2; + } + #plot-title { + text-style: bold; + height: 1; + } + #plot-widget { + height: 1fr; + } + """ + + BINDINGS: ClassVar = [ + ("escape", "close", "Close"), + ("q", "close", "Close"), + ("p", "close", "Close"), + ] + + def __init__(self, *, title: str, x, y): + super().__init__() + self.plot_title = title + self.x = list(x) + self.y = list(y) + + def compose(self) -> ComposeResult: + with Vertical(id="plot-dialog"): + yield Static(markup_escape(self.plot_title), id="plot-title") + yield PlotextPlot(id="plot-widget") + + def on_mount(self) -> None: + plt = self.query_one(PlotextPlot).plt + plt.plot(self.x, self.y, marker="braille") + plt.xlabel("row") + + def action_close(self) -> None: + self.dismiss(None) + + class B2ViewApp(App): """Browse TreeStore hierarchy and preview objects.""" @@ -464,6 +520,7 @@ class B2ViewApp(App): Binding("c", "go_to_column", "Go to column", show=False), Binding("f", "filter_rows", "Filter rows", show=False), Binding("slash", "filter_columns", "Filter columns", show=False), + Binding("p", "plot_column", "Plot column", show=False), Binding("d", "dim_cycle", "Dim mode", show=False), Binding("enter", "dim_toggle_nav", "Toggle nav", show=False), Binding("escape", "dim_exit", "Exit dim mode", show=False), @@ -513,7 +570,10 @@ def compose(self) -> ComposeResult: yield Static("", id="vlmetadata") with B2ViewPanel(id="data-pane") as data_pane: data_pane.border_title = "data" - data_pane.border_subtitle = "?(help) | d(im mode) | filter: f(rows) /(cols) | rows: t/b/g(oto) | cols: s/e/c(goto)" + data_pane.border_subtitle = ( + "?(help) | d(im mode) | filter: f(rows) /(cols) | " + "rows: t/b/g(oto) | cols: s/e/c(goto) | p(lot)" + ) yield Static("", id="data-header") with Horizontal(id="data-table-row"): yield BufferedDataTable(id="data-table", show_row_labels=True, zebra_stripes=True) @@ -1218,6 +1278,52 @@ def action_go_to_row(self) -> None: screen = GoToRowScreen(nrows=self.table_page["nrows"], current=current) self.push_screen(screen, self._go_to_row) + _PLOT_MAX_POINTS = 2000 + + def action_plot_column(self) -> None: + """p key — plot a downsampled overview of the whole cursor column.""" + if not self._in_data_grid(): + return + if PlotextPlot is None: + self.notify("Plotting needs the 'textual-plotext' package", severity="warning") + return + buffer = self.table_buffer or self.table_page + columns = buffer["columns"] + if not columns: + return + cursor_col = self.query_one("#data-table", DataTable).cursor_column + name = columns[min(max(0, cursor_col), len(columns) - 1)] + # Cheap numeric check on the already-loaded buffer; this also rejects + # expensive object columns before any whole-column strided read. + sample = np.asarray(buffer["data"][name]) + if sample.dtype.kind not in "iufb": + self.notify(f"Column {name!r} is not numeric", severity="warning") + return + + column: str | int | None + if buffer.get("source_kind") == "ctable": + column = name + elif name.isdigit(): # array grids label columns with global indices + column = int(name) + else: # 1-D arrays (single navigable dim) have one "value" column + column = None + series = self.browser.plot_series( + self.selected_path, column=column, layout=self._data_layout, max_points=self._PLOT_MAX_POINTS + ) + + x, y = series["x"], np.asarray(series["y"]) + if y.dtype.kind == "b": + y = y.astype(np.int64) + finite = np.isfinite(y.astype(np.float64)) + x, y = x[finite], y[finite] + if x.size == 0: + self.notify(f"Column {name!r} has no finite values to plot", severity="warning") + return + title = f"{self.selected_path} · {name} · {series['n']} rows" + if series["step"] > 1: + title += f" (step {series['step']})" + self.push_screen(PlotScreen(title=title, x=x, y=y)) + def action_go_to_column(self) -> None: if not self._in_data_grid(): return diff --git a/src/blosc2/b2view/model.py b/src/blosc2/b2view/model.py index 1bde09e0..733e0f9c 100644 --- a/src/blosc2/b2view/model.py +++ b/src/blosc2/b2view/model.py @@ -277,6 +277,55 @@ def preview( return {"message": "SChunk byte preview is not implemented yet."} return {"message": f"Preview is not supported for {kind!r} objects."} + def plot_series( + self, + path: str, + *, + column: str | int | None = None, + layout: DataSliceLayout | None = None, + max_points: int = 2000, + ) -> dict[str, Any]: + """Return a bounded ``{"x", "y", "n", "step"}`` overview of one series. + + The series is a CTable column (*column* is its name; an active row + filter is honored) or an array (*column* is the global index along + the column dimension of *layout*, or None for 1-D arrays). The whole + length is covered with a single strided blosc2 read of at most + *max_points* elements — the full data is never materialized. + """ + path = self.normalize_path(path) + obj = self._get_object(path) + kind = object_kind(obj) + + if kind == "ctable": + obj = self._filter_views.get(path, obj) + n = len(obj) + step = max(1, -(-n // max_points)) + y = safe_asarray(obj[column][::step]) if n else np.empty(0) + elif kind in {"ndarray", "c2array"}: + shape = tuple(getattr(obj, "shape", ()) or ()) + ndim = len(shape) + if ndim == 0: + raise ValueError("Cannot plot a scalar") + row_dim = layout.navigable_dims[0] if layout is not None and layout.navigable_dims else 0 + n = shape[row_dim] + step = max(1, -(-n // max_points)) + idx: list[int | slice] = [] + for i in range(ndim): + if i == row_dim: + idx.append(slice(0, n, step)) + elif layout is not None and i in layout.fixed_values: + idx.append(layout.fixed_values[i]) + elif layout is not None and len(layout.navigable_dims) > 1 and i == layout.navigable_dims[1]: + idx.append(int(column)) + else: + idx.append(0) + y = np.asarray(obj[tuple(idx)]) if n else np.empty(0) + else: + raise ValueError(f"Cannot plot {kind!r} objects") + + return {"x": np.arange(0, n, step), "y": y, "n": n, "step": step} + def column_names(self, path: str) -> list[str] | None: """Return the column names for a CTable path, or None for other kinds. diff --git a/tests/b2view/test_basics.py b/tests/b2view/test_basics.py index e8773cfc..d3d7bdd9 100644 --- a/tests/b2view/test_basics.py +++ b/tests/b2view/test_basics.py @@ -495,6 +495,15 @@ async def test_ctable_column_paging(store_path): assert len(page["columns"]) < len(wide_columns) assert table.virtual_size.width <= table.size.width + # 'p' on a non-numeric column must not open a plot (notify only). + # This also passes when textual-plotext is not installed. + await pilot.press("s") + await wait_for_table(pilot) + table.move_cursor(column=app.table_page["columns"].index("d")) + await pilot.press("p") + await pilot.pause() + assert type(app.screen).__name__ != "PlotScreen" + # ── CTable filtering ───────────────────────────────────────────────────── @@ -603,3 +612,35 @@ async def submit_column_filter(pattern: str) -> None: await wait_for_table(pilot) assert app.browser.get_column_filter("/level0/ctable") is None assert app.table_page["ncols"] == len(expected) + + +# ── Plotting ('p' key, optional textual-plotext) ───────────────────────── + + +async def test_plot_column(store_path): + """'p' plots a downsampled whole-array overview in a modal.""" + pytest.importorskip("textual_plotext") + from blosc2.b2view.app import PlotScreen + + app = B2ViewApp(store_path, start_path="/level0/leaf1", start_panel="data") + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + await focus_data_table(pilot) + + await pilot.press("p") + await pilot.pause() + assert isinstance(app.screen, PlotScreen) + screen = app.screen + + # The series covers the whole 1-D leaf, downsampled by striding + step = -(-LEAF1_LEN // app._PLOT_MAX_POINTS) + assert step > 1 # the leaf is larger than the point budget + assert list(screen.x) == list(range(0, LEAF1_LEN, step)) + np.testing.assert_allclose(screen.y, leaf1_values()[::step]) + assert "leaf1" in screen.plot_title + assert f"step {step}" in screen.plot_title + + # 'p' (like escape) closes the plot again + await pilot.press("p") + await pilot.pause() + assert not isinstance(app.screen, PlotScreen) diff --git a/tests/test_b2view_model.py b/tests/test_b2view_model.py index 3d782841..ac293e4a 100644 --- a/tests/test_b2view_model.py +++ b/tests/test_b2view_model.py @@ -229,3 +229,56 @@ def test_preview_array_high_dimensional_slice(): arr = np.arange(2 * 3 * 4).reshape(2, 3, 4) preview = preview_array(arr, max_rows=2, max_cols=3) np.testing.assert_array_equal(preview, arr[0, :2, :3]) + + +def test_plot_series_1d_strided_overview(tmp_path): + path = tmp_path / "plot1d.b2z" + n = 1000 + with blosc2.TreeStore(str(path), mode="w") as store: + store["/wave"] = blosc2.linspace(0, 1, num=n) + + with StoreBrowser(str(path)) as browser: + series = browser.plot_series("/wave", max_points=300) + assert series["n"] == n + assert series["step"] == 4 # ceil(1000 / 300) + np.testing.assert_array_equal(series["x"], np.arange(0, n, 4)) + np.testing.assert_allclose(series["y"], np.linspace(0, 1, n)[::4]) + + # Small arrays are returned whole (step 1) + small = browser.plot_series("/wave", max_points=n) + assert small["step"] == 1 + assert len(small["y"]) == n + + +def test_plot_series_2d_column_with_layout(tmp_path): + from blosc2.b2view.model import DataSliceLayout + + path = tmp_path / "plot2d.b2z" + values = np.linspace(0, 1, 200 * 8).reshape(200, 8) + with blosc2.TreeStore(str(path), mode="w") as store: + store["/grid"] = values + + with StoreBrowser(str(path)) as browser: + layout = DataSliceLayout.from_shape((200, 8)) + series = browser.plot_series("/grid", column=5, layout=layout, max_points=50) + assert series["n"] == 200 + assert series["step"] == 4 + np.testing.assert_allclose(series["y"], values[::4, 5]) + + +def test_plot_series_ctable_column_honors_row_filter(tmp_path): + path = tmp_path / "plotct.b2z" + with blosc2.TreeStore(str(path), mode="w") as store: + store["/table"] = make_ctable(100) + + with StoreBrowser(str(path)) as browser: + series = browser.plot_series("/table", column="y", max_points=1000) + assert series["n"] == 100 + assert series["step"] == 1 + np.testing.assert_allclose(series["y"], np.arange(100) * 1.5) + + # An active row filter restricts the plotted universe + browser.set_filter("/table", "x >= 50") + filtered = browser.plot_series("/table", column="y", max_points=1000) + assert filtered["n"] == 50 + np.testing.assert_allclose(filtered["y"], np.arange(50, 100) * 1.5) diff --git a/todo/b2view.md b/todo/b2view.md index 1d1b540d..082093b4 100644 --- a/todo/b2view.md +++ b/todo/b2view.md @@ -21,6 +21,13 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of placeholder; offer on-demand decoding (e.g. a key to materialize the column, or decode just the cursor row). - [ ] SChunk preview is not implemented (`model.preview` returns a message). +- [ ] Plotting follow-ups for the `p` key: maybe a live mini-plot that + follows paging, or zoom into a row range from the plot modal. If + character resolution proves too coarse, `textual-image` can render + real matplotlib output on kitty/iTerm2/sixel terminals, degrading to + half-blocks elsewhere. Note: plain striding can alias periodic data; + a chunk-aggregated min/max envelope would be the audio-editor-style + fix. ### Testing @@ -59,6 +66,15 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of - 2026-06-12: `s`/`e` keys jump to the start/end column window (aliases of Home/End, which were undiscoverable); the data panel subtitle now lists all jump keys: `rows: t/b/g | cols: s/e`. +- 2026-06-12: `p` plots the cursor column (or a 1-D leaf) of the loaded row + buffer in a modal, via the optional `textual-plotext` package (new `plot` + extra); braille scatter, NaN/inf filtered, non-numeric columns and a + missing package just notify. Works headless in Pilot tests. +- 2026-06-12: The `p` plot now shows a downsampled overview of the *whole* + series (`StoreBrowser.plot_series`): a single strided blosc2 read of at + most ~2000 points (10 ms on a 10M-element array), never materializing the + full data; honors layout (fixed dims) for N-D arrays and active row + filters for CTables. - 2026-06-12: `?` opens a help screen listing all keys grouped by area (panels, tree, grid rows/columns, dim mode); shown in the footer, closed with esc/`?`/`q`. From 9c2d32f592a3db2ad2359766f04c4c8e2c25b389 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 13 Jun 2026 07:18:31 +0200 Subject: [PATCH 02/24] Acceleration path for NDArray getitem when strides are large --- src/blosc2/ndarray.py | 58 +++++++++++++++++- tests/ndarray/test_getitem.py | 107 ++++++++++++++++++++++++++++++++++ todo/b2view.md | 4 +- 3 files changed, 166 insertions(+), 3 deletions(-) diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py index 8945e250..18101f19 100644 --- a/src/blosc2/ndarray.py +++ b/src/blosc2/ndarray.py @@ -46,6 +46,13 @@ slice_to_chunktuple, ) +# Upper bound on the number of coordinates the strided-slice sparse-gather +# fast path will build (see NDArray._try_subsample_gather). Caps both the +# int64 coordinate array (~8 MB at this size) and the per-coordinate scattered +# copies, so a dense slice such as ``a[::2]`` falls back to the bulk path +# instead of materializing a huge coordinate set. +_SUBSAMPLE_GATHER_MAX_COORDS = 1_000_000 + # These functions in ufunc_map in ufunc_map_1param are implemented in numexpr and so we call # those instead (since numexpr uses multithreading it is faster) ufunc_map = { @@ -4272,9 +4279,56 @@ def set_oselection_numpy(self, key: list | np.ndarray, arr: NDArray) -> np.ndarr """ return super().set_oindex_numpy(key, arr) + def _try_subsample_gather(self, start, stop, step, shape): + """Fast path for a coarse strided read via the sparse-gather primitive. + + Returns a NumPy array of the (post-squeeze) *shape* when the selection + is a coarse subsample worth routing through ``b2nd_get_sparse_cbuffer`` + (decompressing only the blocks that actually hold a selected element), + or ``None`` to fall back to the dense per-chunk path. + + Engages only when every step is positive, the result fits + ``_SUBSAMPLE_GATHER_MAX_COORDS``, and at least one axis strides by at + least its block extent (so whole blocks are skipped — the condition + under which gather decompresses strictly fewer blocks than the dense + bounding-box read). + """ + ndim = self.ndim + if builtins.any(s <= 0 for s in step): # negative steps keep the dense path + return None + + blocks = self.blocks + if not builtins.any(step[d] > 1 and step[d] >= blocks[d] for d in range(ndim)): + return None + + # Per-axis sample positions; matches the (sp-st-sign)//stp+1 count. + positions = [np.arange(start[d], stop[d], step[d]) for d in range(ndim)] + nelems = 1 + for p in positions: + nelems *= p.size + if not 0 < nelems <= _SUBSAMPLE_GATHER_MAX_COORDS: + return None + + # Flat C-order linear indices over the full real-axis grid. + flat = np.zeros((), dtype=np.int64) + cstride = 1 + for d in range(ndim - 1, -1, -1): + contrib = (positions[d].astype(np.int64) * cstride).reshape((-1,) + (1,) * (ndim - 1 - d)) + flat = flat + contrib + cstride *= self.shape[d] + flat = np.ascontiguousarray(flat).reshape(-1) + + return self._take_sparse_normalized(flat).reshape(shape) + def _get_set_nonunit_steps(self, _slice, out=None, value=None): start, stop, step, mask = _slice _get = out is not None + if _get: + # Coarse strided reads can skip whole blocks: route them through the + # sparse-gather primitive instead of decompressing full chunks. + gathered = self._try_subsample_gather(start, stop, step, out.shape) + if gathered is not None: + return gathered out = self if out is None else out # default return for setitem with no intersecting chunks if 0 in self.shape: return out @@ -4437,8 +4491,8 @@ def __getitem__( Parameters ---------- key: int, slice, sequence of (slices, int), array of bools, LazyExpr or str - The slice(s) to be retrieved. Note that step parameter is not yet honored - in slices. If a LazyExpr is provided, the expression is expected to be of + The slice(s) to be retrieved. Slice steps (including negative steps) + are honored. If a LazyExpr is provided, the expression is expected to be of boolean type, and the result will be another LazyExpr returning the values of this array where the expression is True. When key is a (nd-)array of bools, the result will be the values of ``self`` diff --git a/tests/ndarray/test_getitem.py b/tests/ndarray/test_getitem.py index 3092fa8f..2d4581de 100644 --- a/tests/ndarray/test_getitem.py +++ b/tests/ndarray/test_getitem.py @@ -655,3 +655,110 @@ def test_getitem_integer_array_still_uses_fancy_for_boolean(): expected = np.arange(12, dtype=np.int32).reshape(3, 4)[mask] result = a[mask] np.testing.assert_array_equal(result, expected) + + +# --------------------------------------------------------------------------- +# Strided-slice sparse-gather fast path (NDArray._try_subsample_gather) +# --------------------------------------------------------------------------- + +from unittest import mock # noqa: E402 + +from blosc2.ndarray import _SUBSAMPLE_GATHER_MAX_COORDS, get_ndarray_start_stop # noqa: E402 +from blosc2.utils import process_key # noqa: E402 + + +def _gather_gate(a, start, stop, step): + """Mirror the helper's eligibility gate using the array's actual blocks.""" + if any(s <= 0 for s in step): + return False + counts = [max(0, (sp - st - 1) // stp + 1) for st, sp, stp in zip(start, stop, step, strict=True)] + n = math.prod(counts) + if not 0 < n <= _SUBSAMPLE_GATHER_MAX_COORDS: + return False + return any(step[d] > 1 and step[d] >= a.blocks[d] for d in range(a.ndim)) + + +@pytest.mark.parametrize( + ("shape", "blocks", "key"), + [ + # 1-D coarse subsample: step >= block -> fast path + ((100_000,), (64,), np.s_[::1000]), + ((100_000,), (64,), np.s_[5:90_000:777]), + # 1-D fine stride: step < block -> dense fallback (still correct) + ((100_000,), (5000,), np.s_[::3]), + # negative step -> dense fallback + ((100_000,), (64,), np.s_[::-1000]), + ((100_000,), (64,), np.s_[90_000:100:-321]), + # 2-D strided + integer axis (squeeze) and newaxis parity + ((200, 200), (8, 8), np.s_[::32, ::32]), + ((200, 200), (8, 8), np.s_[::32, 5]), + ((200, 200), (8, 8), np.s_[5, ::32]), + ((200, 200), (8, 8), np.s_[None, ::32, ::16]), + ((200, 200), (8, 8), np.s_[::32, :]), + # 3-D with a fixed leading index + ((8, 200, 200), (2, 8, 8), np.s_[3, ::32, ::32]), + ((8, 200, 200), (2, 8, 8), np.s_[::2, ::64, 7]), + ], +) +def test_getitem_strided_gather_matches_numpy(shape, blocks, key): + npa = np.arange(math.prod(shape), dtype=np.float64).reshape(shape) + a = blosc2.asarray(npa, blocks=blocks) + np.testing.assert_array_equal(a[key], npa[key]) + + +def _run_with_gather_spy(a, key): + """Return (result, gather_used).""" + orig = blosc2.NDArray._take_sparse_normalized + calls = [] + + def spy(self, indices, out=None): + calls.append(indices) + return orig(self, indices, out) + + with mock.patch.object(blosc2.NDArray, "_take_sparse_normalized", spy): + result = a[key] + return result, bool(calls) + + +@pytest.mark.parametrize( + ("shape", "blocks", "key"), + [ + ((100_000,), (64,), np.s_[::1000]), # step >= block -> gather + ((100_000,), (5000,), np.s_[::3]), # step < block -> dense + ((100_000,), (64,), np.s_[::-1000]), # negative -> dense + ((3_000_000,), (64,), np.s_[::2]), # > 1M coords -> dense + ((200, 200), (8, 8), np.s_[::32, 5]), + ((200, 200), (8, 8), np.s_[5, ::32]), + ], +) +def test_getitem_strided_gather_dispatch(shape, blocks, key): + npa = np.arange(math.prod(shape), dtype=np.float64).reshape(shape) + a = blosc2.asarray(npa, blocks=blocks) + key_, _mask = process_key(key, a.shape) + start, stop, step, _ = get_ndarray_start_stop(a.ndim, key_, a.shape) + expected_gather = _gather_gate(a, start, stop, step) + + result, used = _run_with_gather_spy(a, key) + np.testing.assert_array_equal(result, npa[key]) + assert used == expected_gather + + +def test_setitem_strided_does_not_use_gather(): + """The fast path is read-only; strided assignment must use the dense path.""" + npa = np.arange(100_000, dtype=np.float64) + a = blosc2.asarray(npa, blocks=(64,)) + vals = np.full(a[::1000].shape, -1.0) + + orig = blosc2.NDArray._take_sparse_normalized + calls = [] + + def spy(self, indices, out=None): + calls.append(indices) + return orig(self, indices, out) + + with mock.patch.object(blosc2.NDArray, "_take_sparse_normalized", spy): + a[::1000] = vals + assert calls == [] # gather not used for setitem + + npa[::1000] = vals + np.testing.assert_array_equal(a[:], npa) diff --git a/todo/b2view.md b/todo/b2view.md index 082093b4..dc0db9a7 100644 --- a/todo/b2view.md +++ b/todo/b2view.md @@ -27,7 +27,9 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of real matplotlib output on kitty/iTerm2/sixel terminals, degrading to half-blocks elsewhere. Note: plain striding can alias periodic data; a chunk-aggregated min/max envelope would be the audio-editor-style - fix. + fix. (The strided read that `plot_series` issues now hits the core + NDArray sparse-gather fast path automatically when step >= block, i.e. + for large arrays — see `NDArray._try_subsample_gather`.) ### Testing From 02ad87e39c66a27cd879fbef77518011506d5737 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 13 Jun 2026 07:35:33 +0200 Subject: [PATCH 03/24] Acceleration path for Column getitem when logical positions equal physical ones --- src/blosc2/ctable.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/blosc2/ctable.py b/src/blosc2/ctable.py index 2f6bd930..c0c022a7 100644 --- a/src/blosc2/ctable.py +++ b/src/blosc2/ctable.py @@ -866,6 +866,23 @@ def _resolve_live_positions(self) -> np.ndarray: return slp return np.where(self._valid_rows[:])[0] + def _has_identity_positions(self) -> bool: + """True when logical row ``k`` maps to physical row ``k`` for every row. + + Holds for a base table with no column mask, no sorted/filtered view, and + no deletions. In that case a logical slice is a physical slice, so it + can be read straight from the underlying NDArray instead of resolving + and gathering explicit live positions. All checks are O(1) (cached + counts / lengths) — no validity scan is triggered. + """ + t = self._table + if self._mask is not None or t.base is not None: + return False + if getattr(t, "_cached_live_positions", None) is not None: + return False + n = t._known_n_rows() # cached live-row count, may be None + return n is not None and n == len(t._valid_rows) + def __getitem__(self, key: int | slice | list | np.ndarray): """Return values for the given logical index. @@ -911,6 +928,18 @@ def _values_from_key(self, key, *, check_stale: bool = True): # noqa: C901 return self._maybe_decode_timestamp_values(self._raw_col[int(pos_true)]) elif isinstance(key, slice): + # Identity fast path: when logical positions equal physical ones, a + # logical slice is a physical slice. Read it straight from the + # underlying NDArray, skipping the O(nrows) live-position scan and + # letting NDArray's strided-gather fast path handle coarse steps. + # Restricted to positive steps and plain stored columns; everything + # else falls through to the position-gather path below unchanged. + if ( + (key.step is None or key.step > 0) + and not (self.is_computed or self.is_list or self.is_varlen_scalar or self.is_dictionary) + and self._has_identity_positions() + ): + return self._maybe_decode_timestamp_values(np.asarray(self._raw_col[key])) real_pos = self._resolve_live_positions() start, stop, step = key.indices(len(real_pos)) if start >= stop: From f3f5d7ff7f4e9119d83f52d4a1a604fe07d7a048 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 13 Jun 2026 07:43:47 +0200 Subject: [PATCH 04/24] Fix negative step in Column getitem that was returning [] --- src/blosc2/ctable.py | 14 +- tests/ctable/test_column_slice_fastpath.py | 184 +++++++++++++++++++++ 2 files changed, 191 insertions(+), 7 deletions(-) create mode 100644 tests/ctable/test_column_slice_fastpath.py diff --git a/src/blosc2/ctable.py b/src/blosc2/ctable.py index c0c022a7..e39b1a98 100644 --- a/src/blosc2/ctable.py +++ b/src/blosc2/ctable.py @@ -932,24 +932,24 @@ def _values_from_key(self, key, *, check_stale: bool = True): # noqa: C901 # logical slice is a physical slice. Read it straight from the # underlying NDArray, skipping the O(nrows) live-position scan and # letting NDArray's strided-gather fast path handle coarse steps. - # Restricted to positive steps and plain stored columns; everything - # else falls through to the position-gather path below unchanged. + # Plain stored columns only; everything else falls through to the + # position-gather path below. if ( - (key.step is None or key.step > 0) - and not (self.is_computed or self.is_list or self.is_varlen_scalar or self.is_dictionary) + not (self.is_computed or self.is_list or self.is_varlen_scalar or self.is_dictionary) and self._has_identity_positions() ): return self._maybe_decode_timestamp_values(np.asarray(self._raw_col[key])) real_pos = self._resolve_live_positions() - start, stop, step = key.indices(len(real_pos)) - if start >= stop: + # Apply the slice straight to the physical positions so that all + # slice semantics (including negative steps) follow NumPy. + selected_pos = real_pos[key] + if selected_pos.size == 0: if self.is_list or self.is_varlen_scalar or self.is_dictionary: return [] if self.is_ndarray: spec = self._table._schema.columns_by_name[self._col_name].spec return np.empty((0, *spec.item_shape), dtype=self.dtype) return np.array([], dtype=self.dtype) - selected_pos = real_pos[start:stop:step] # physical row positions if self.is_computed: lo, hi = int(selected_pos.min()), int(selected_pos.max()) chunk = np.asarray(self._raw_col[lo : hi + 1]) diff --git a/tests/ctable/test_column_slice_fastpath.py b/tests/ctable/test_column_slice_fastpath.py new file mode 100644 index 00000000..48c52e24 --- /dev/null +++ b/tests/ctable/test_column_slice_fastpath.py @@ -0,0 +1,184 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""Identity-case fast path for Column strided slicing. + +A clean base CTable (no column mask, no sorted/filtered view, no deletions) +has logical row k == physical row k, so a positive-step logical slice is a +physical slice and can be read straight from the underlying NDArray — skipping +the O(nrows) live-position scan and reaching NDArray's strided-gather fast path +(see ``Column._has_identity_positions`` / ``_values_from_key`` in ctable.py). + +These tests check that the fast path returns values identical to the +position-gather path, and that it engages only when it is safe to. +""" + +from dataclasses import dataclass +from unittest import mock + +import numpy as np +import pytest + +import blosc2 +from blosc2 import CTable +from blosc2.ctable import Column + + +@dataclass +class Row: + a: int = blosc2.field(blosc2.int64(), default=0) + b: float = blosc2.field(blosc2.float64(), default=0.0) + t: int = blosc2.field(blosc2.timestamp(), default=0) # exercises timestamp decode + + +def _make(n=1000, a_values=None): + arr = np.empty(n, dtype=[("a", " stop) +] + +NEGATIVE_SLICES = [ + np.s_[::-1], + np.s_[::-3], + np.s_[900:5:-7], + np.s_[-1::-1], + np.s_[5:900:-1], # empty (start < stop with negative step) +] + +ALL_SLICES = POSITIVE_SLICES + NEGATIVE_SLICES + + +def _force_slow(col, key): + """Read *key* with the identity fast path disabled (position-gather path).""" + with mock.patch.object(Column, "_has_identity_positions", return_value=False): + return np.asarray(col[key]) + + +def _resolve_spy(): + """Patch context + a list recording _resolve_live_positions calls.""" + calls = [] + orig = Column._resolve_live_positions + + def spy(self): + calls.append(self._col_name) + return orig(self) + + return mock.patch.object(Column, "_resolve_live_positions", spy), calls + + +# --------------------------------------------------------------------------- +# Correctness: fast path == position-gather path, and == NumPy +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("key", ALL_SLICES) +@pytest.mark.parametrize("colname", ["a", "b", "t"]) +def test_identity_fast_path_matches_slow_path(key, colname): + table, _ = _make() + fast = np.asarray(table[colname][key]) + slow = _force_slow(table[colname], key) + np.testing.assert_array_equal(fast, slow) + + +@pytest.mark.parametrize("key", ALL_SLICES) +@pytest.mark.parametrize("colname", ["a", "b"]) +def test_identity_fast_path_matches_numpy(key, colname): + table, arr = _make() + np.testing.assert_array_equal(np.asarray(table[colname][key]), arr[colname][key]) + + +def test_timestamp_column_is_decoded_on_fast_path(): + table, _ = _make() + fast = np.asarray(table["t"][::100]) + assert np.issubdtype(fast.dtype, np.datetime64) # decode applied, not raw int64 + + +# --------------------------------------------------------------------------- +# Dispatch: the fast path engages only when positions are the identity +# --------------------------------------------------------------------------- + + +def test_clean_positive_step_skips_position_scan(): + table, _ = _make() + patcher, calls = _resolve_spy() + with patcher: + _ = table["a"][::100] + assert calls == [] # identity fast path: no live-position scan + + +def test_clean_negative_step_uses_fast_path(): + # Negative steps on a clean table are also the identity case now: the slice + # is read straight from the NDArray, no live-position scan. + table, arr = _make() + patcher, calls = _resolve_spy() + with patcher: + result = np.asarray(table["a"][::-3]) + assert calls == [] + np.testing.assert_array_equal(result, arr["a"][::-3]) + + +@pytest.mark.parametrize("key", [np.s_[::10], np.s_[::-3], np.s_[15:2:-1]]) +def test_deletions_use_position_path_and_stay_correct(key): + table, arr = _make() + table.delete([3, 7, 50]) + expected = np.delete(arr, [3, 7, 50]) + patcher, calls = _resolve_spy() + with patcher: + result = np.asarray(table["a"][key]) + assert calls # identity broken by deletions + np.testing.assert_array_equal(result, expected["a"][key]) + + +@pytest.mark.parametrize("key", [np.s_[::5], np.s_[::-2], np.s_[::-1]]) +def test_filtered_view_uses_position_path_and_stays_correct(key): + table, arr = _make() + view = table.where("a >= 500") + expected = arr["b"][arr["a"] >= 500] + patcher, calls = _resolve_spy() + with patcher: + result = np.asarray(view["b"][key]) + assert calls # a view is not the identity case + np.testing.assert_allclose(result, expected[key]) + + +def test_materialized_sort_is_identity_and_correct(): + # Non-inplace sort_by returns a physically materialized table (base is None, + # no cached positions), so it is a legitimate identity case: the fast path + # is used and must return the rows in sorted order. + rng = np.random.default_rng(0) + a_values = rng.permutation(1000) + table, arr = _make(a_values=a_values) + sorted_table = table.sort_by("a") + patcher, calls = _resolve_spy() + with patcher: + result = np.asarray(sorted_table["b"][::50]) + assert calls == [] # materialized sorted table is the identity case + order = np.argsort(arr["a"], kind="stable") + np.testing.assert_allclose(result, arr["b"][order][::50]) + + +def test_setitem_strided_unaffected(): + """The fast path is read-only; strided assignment is unchanged.""" + table, arr = _make() + table["a"][::100] = -1 + expected = arr["a"].copy() + expected[::100] = -1 + np.testing.assert_array_equal(np.asarray(table["a"][:]), expected) From eba181d921f7998077acfdb2e699881f49d78328 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 13 Jun 2026 08:45:40 +0200 Subject: [PATCH 05/24] Produce envelope (peak-preserving) plots by default --- src/blosc2/b2view/app.py | 32 ++++--- src/blosc2/b2view/model.py | 168 ++++++++++++++++++++++++++++++------ tests/b2view/test_basics.py | 14 +-- tests/test_b2view_model.py | 70 +++++++++++---- todo/b2view.md | 36 +++++--- 5 files changed, 247 insertions(+), 73 deletions(-) diff --git a/src/blosc2/b2view/app.py b/src/blosc2/b2view/app.py index 5f75966a..ee3c2cc2 100644 --- a/src/blosc2/b2view/app.py +++ b/src/blosc2/b2view/app.py @@ -461,11 +461,12 @@ class PlotScreen(ModalScreen[None]): ("p", "close", "Close"), ] - def __init__(self, *, title: str, x, y): + def __init__(self, *, title: str, x, ymin, ymax): super().__init__() self.plot_title = title self.x = list(x) - self.y = list(y) + self.ymin = list(ymin) + self.ymax = list(ymax) def compose(self) -> ComposeResult: with Vertical(id="plot-dialog"): @@ -474,7 +475,11 @@ def compose(self) -> ComposeResult: def on_mount(self) -> None: plt = self.query_one(PlotextPlot).plt - plt.plot(self.x, self.y, marker="braille") + # Draw the max (upper) and min (lower) envelope. When they coincide + # (a sampled series) this reads as a single line. + plt.plot(self.x, self.ymax, marker="braille") + if self.ymin != self.ymax: + plt.plot(self.x, self.ymin, marker="braille") plt.xlabel("row") def action_close(self) -> None: @@ -1311,18 +1316,21 @@ def action_plot_column(self) -> None: self.selected_path, column=column, layout=self._data_layout, max_points=self._PLOT_MAX_POINTS ) - x, y = series["x"], np.asarray(series["y"]) - if y.dtype.kind == "b": - y = y.astype(np.int64) - finite = np.isfinite(y.astype(np.float64)) - x, y = x[finite], y[finite] + x = np.asarray(series["x"]) + ymin = np.asarray(series["ymin"], dtype=np.float64) + ymax = np.asarray(series["ymax"], dtype=np.float64) + # Keep only buckets with finite extremes (drops all-NaN buckets). + finite = np.isfinite(ymin) & np.isfinite(ymax) + x, ymin, ymax = x[finite], ymin[finite], ymax[finite] if x.size == 0: self.notify(f"Column {name!r} has no finite values to plot", severity="warning") return - title = f"{self.selected_path} · {name} · {series['n']} rows" - if series["step"] > 1: - title += f" (step {series['step']})" - self.push_screen(PlotScreen(title=title, x=x, y=y)) + method = series.get("method") + descr = {"summary": "min/max envelope", "reduce": "min/max envelope"}.get( + method, "sampled — may miss extremes" + ) + title = f"{self.selected_path} · {name} · {series['n']} rows · {descr}" + self.push_screen(PlotScreen(title=title, x=x, ymin=ymin, ymax=ymax)) def action_go_to_column(self) -> None: if not self._in_data_grid(): diff --git a/src/blosc2/b2view/model.py b/src/blosc2/b2view/model.py index 733e0f9c..3fde5ce3 100644 --- a/src/blosc2/b2view/model.py +++ b/src/blosc2/b2view/model.py @@ -10,6 +10,50 @@ import blosc2 +# Above this uncompressed size, plot_series falls back to a strided sample +# instead of reading the whole series for an exact min/max envelope (the full +# read both materializes the data and costs O(n)). ~1 GB ≈ 125M float64. +_PLOT_FULL_READ_MAX_BYTES = 1_000_000_000 + + +def _minmax_buckets( + vmin: np.ndarray, vmax: np.ndarray, positions: np.ndarray, n: int, max_points: int +) -> dict[str, np.ndarray]: + """Reduce per-element (or per-block) min/max into <= *max_points* buckets. + + *vmin*/*vmax* are the per-source-unit minima/maxima, *positions* the global + row index of each unit's start, *n* the total row count. NaN units (already + NaN in *vmin*/*vmax*) are ignored within a bucket; an all-NaN bucket stays + NaN. Returns ``{"x", "ymin", "ymax"}`` with bucket-center x positions. + """ + nunits = vmin.shape[0] + if nunits == 0: + empty = np.empty(0) + return {"x": empty, "ymin": empty, "ymax": empty} + if nunits <= max_points: + starts = np.arange(nunits) + else: + group = -(-nunits // max_points) # ceil + starts = np.arange(0, nunits, group) + # NaN-aware reduceat: +inf/-inf neutralizes NaN units, then mapped back. + lo = np.where(np.isnan(vmin), np.inf, vmin) + hi = np.where(np.isnan(vmax), -np.inf, vmax) + ymin = np.minimum.reduceat(lo, starts) + ymax = np.maximum.reduceat(hi, starts) + ymin = np.where(np.isinf(ymin), np.nan, ymin) + ymax = np.where(np.isinf(ymax), np.nan, ymax) + x = np.minimum(positions[starts], max(0, n - 1)) + return {"x": x, "ymin": ymin, "ymax": ymax} + + +def _reduce_envelope(vals: np.ndarray, n: int, max_points: int) -> dict[str, np.ndarray]: + """Per-bucket min/max envelope of an in-memory 1-D series.""" + vals = np.asarray(vals) + if vals.shape[0] == 0: + empty = np.empty(0) + return {"x": empty, "ymin": empty, "ymax": empty} + return _minmax_buckets(vals, vals, np.arange(vals.shape[0]), n, max_points) + @dataclass(frozen=True) class NodeInfo: @@ -285,46 +329,120 @@ def plot_series( layout: DataSliceLayout | None = None, max_points: int = 2000, ) -> dict[str, Any]: - """Return a bounded ``{"x", "y", "n", "step"}`` overview of one series. + """Return a peak-preserving overview of one series for plotting. + + The result is ``{"x", "ymin", "ymax", "n", "method"}`` with at most + *max_points* buckets; ``ymin``/``ymax`` are the per-bucket extremes so a + plotted envelope never hides a peak or trough. Three tiers, cheapest + first: + + - ``"summary"``: read precomputed per-block min/max from the column's + SUMMARY index — no data decompression (CTable columns, no active + filter, numeric). + - ``"reduce"``: read the whole series and reduce per bucket (exact, + but O(n) and bounded by ``_PLOT_FULL_READ_MAX_BYTES``). + - ``"sample"``: strided sample for series too large to read fully; this + may miss extremes, so callers should label it. The series is a CTable column (*column* is its name; an active row - filter is honored) or an array (*column* is the global index along - the column dimension of *layout*, or None for 1-D arrays). The whole - length is covered with a single strided blosc2 read of at most - *max_points* elements — the full data is never materialized. + filter is honored) or an array (*column* is the global index along the + column dimension of *layout*, or None for 1-D arrays). """ path = self.normalize_path(path) obj = self._get_object(path) kind = object_kind(obj) if kind == "ctable": - obj = self._filter_views.get(path, obj) - n = len(obj) - step = max(1, -(-n // max_points)) - y = safe_asarray(obj[column][::step]) if n else np.empty(0) - elif kind in {"ndarray", "c2array"}: + filtered = path in self._filter_views + view = self._filter_views.get(path, obj) + n = len(view) + if not filtered: + env = self._column_summary_envelope(obj, column, n, max_points) + if env is not None: + return {**env, "n": n, "method": "summary"} + itemsize = np.dtype(view[column].dtype).itemsize + if n * itemsize > _PLOT_FULL_READ_MAX_BYTES: + step = max(1, -(-n // max_points)) + y = safe_asarray(view[column][::step]) if n else np.empty(0) + return {"x": np.arange(0, n, step), "ymin": y, "ymax": y, "n": n, "method": "sample"} + vals = safe_asarray(view[column][:]) if n else np.empty(0) + return {**_reduce_envelope(vals, n, max_points), "n": n, "method": "reduce"} + + if kind in {"ndarray", "c2array"}: shape = tuple(getattr(obj, "shape", ()) or ()) ndim = len(shape) if ndim == 0: raise ValueError("Cannot plot a scalar") row_dim = layout.navigable_dims[0] if layout is not None and layout.navigable_dims else 0 n = shape[row_dim] - step = max(1, -(-n // max_points)) - idx: list[int | slice] = [] - for i in range(ndim): - if i == row_dim: - idx.append(slice(0, n, step)) - elif layout is not None and i in layout.fixed_values: - idx.append(layout.fixed_values[i]) - elif layout is not None and len(layout.navigable_dims) > 1 and i == layout.navigable_dims[1]: - idx.append(int(column)) - else: - idx.append(0) - y = np.asarray(obj[tuple(idx)]) if n else np.empty(0) - else: - raise ValueError(f"Cannot plot {kind!r} objects") - return {"x": np.arange(0, n, step), "y": y, "n": n, "step": step} + def _row_index(row_slice): + idx: list[int | slice] = [] + for i in range(ndim): + if i == row_dim: + idx.append(row_slice) + elif layout is not None and i in layout.fixed_values: + idx.append(layout.fixed_values[i]) + elif ( + layout is not None + and len(layout.navigable_dims) > 1 + and i == layout.navigable_dims[1] + ): + idx.append(int(column)) + else: + idx.append(0) + return tuple(idx) + + itemsize = np.dtype(obj.dtype).itemsize + if n * itemsize > _PLOT_FULL_READ_MAX_BYTES: + step = max(1, -(-n // max_points)) + y = np.asarray(obj[_row_index(slice(0, n, step))]) if n else np.empty(0) + return {"x": np.arange(0, n, step), "ymin": y, "ymax": y, "n": n, "method": "sample"} + vals = np.asarray(obj[_row_index(slice(0, n))]) if n else np.empty(0) + return {**_reduce_envelope(vals, n, max_points), "n": n, "method": "reduce"} + + raise ValueError(f"Cannot plot {kind!r} objects") + + def _column_summary_envelope( + self, table: Any, column: str | int | None, n: int, max_points: int + ) -> dict[str, np.ndarray] | None: + """Build a min/max envelope from a column's SUMMARY index, or None. + + Reads precomputed per-block ``(min, max)`` from the index — no data + decompression. Returns None when there is no usable summary (non-string + column, no index, non-numeric, or unsupported level). + """ + if not isinstance(column, str): + return None + try: + idx = table.index(column) + except Exception: + return None + if getattr(idx, "kind", None) != "summary": + return None + try: + desc = idx.descriptor + levels = desc.get("levels") or {} + level = "block" if "block" in levels else next(iter(levels), None) + if level is None or np.dtype(desc["dtype"]).kind not in "iuf": + return None + from blosc2.indexing import FLAG_ALL_NAN, _open_level_summary_handle + + handle = _open_level_summary_handle(idx._target_array(), desc, level) + bmin = np.asarray(handle["min"][:]) + bmax = np.asarray(handle["max"][:]) + flags = np.asarray(handle["flags"][:]) + except Exception: + return None + if bmin.shape[0] == 0: + return None + all_nan = (flags & FLAG_ALL_NAN) != 0 + if all_nan.any(): + bmin = np.where(all_nan, np.nan, bmin) + bmax = np.where(all_nan, np.nan, bmax) + block = int(desc["blocks"][0]) + positions = np.arange(bmin.shape[0]) * block + return _minmax_buckets(bmin, bmax, positions, n, max_points) def column_names(self, path: str) -> list[str] | None: """Return the column names for a CTable path, or None for other kinds. diff --git a/tests/b2view/test_basics.py b/tests/b2view/test_basics.py index d3d7bdd9..3d6d8e45 100644 --- a/tests/b2view/test_basics.py +++ b/tests/b2view/test_basics.py @@ -618,7 +618,7 @@ async def submit_column_filter(pattern: str) -> None: async def test_plot_column(store_path): - """'p' plots a downsampled whole-array overview in a modal.""" + """'p' plots a min/max envelope of the whole 1-D leaf in a modal.""" pytest.importorskip("textual_plotext") from blosc2.b2view.app import PlotScreen @@ -632,13 +632,13 @@ async def test_plot_column(store_path): assert isinstance(app.screen, PlotScreen) screen = app.screen - # The series covers the whole 1-D leaf, downsampled by striding - step = -(-LEAF1_LEN // app._PLOT_MAX_POINTS) - assert step > 1 # the leaf is larger than the point budget - assert list(screen.x) == list(range(0, LEAF1_LEN, step)) - np.testing.assert_allclose(screen.y, leaf1_values()[::step]) + # Bucketed envelope covering the whole leaf; bracketed by true extremes + assert 0 < len(screen.x) <= app._PLOT_MAX_POINTS + leaf = leaf1_values() + assert min(screen.ymin) <= leaf.min() + 1e-9 + assert max(screen.ymax) >= leaf.max() - 1e-9 assert "leaf1" in screen.plot_title - assert f"step {step}" in screen.plot_title + assert "envelope" in screen.plot_title # 'p' (like escape) closes the plot again await pilot.press("p") diff --git a/tests/test_b2view_model.py b/tests/test_b2view_model.py index ac293e4a..f4ba97bc 100644 --- a/tests/test_b2view_model.py +++ b/tests/test_b2view_model.py @@ -231,23 +231,56 @@ def test_preview_array_high_dimensional_slice(): np.testing.assert_array_equal(preview, arr[0, :2, :3]) -def test_plot_series_1d_strided_overview(tmp_path): +def test_plot_series_1d_envelope_captures_extremes(tmp_path): path = tmp_path / "plot1d.b2z" - n = 1000 + n = 100_000 + data = np.linspace(0, 1, num=n) + data[12345] = 9.0 # a spike between bucket samples + data[98765] = -9.0 with blosc2.TreeStore(str(path), mode="w") as store: - store["/wave"] = blosc2.linspace(0, 1, num=n) + store["/wave"] = blosc2.asarray(data) with StoreBrowser(str(path)) as browser: series = browser.plot_series("/wave", max_points=300) assert series["n"] == n - assert series["step"] == 4 # ceil(1000 / 300) - np.testing.assert_array_equal(series["x"], np.arange(0, n, 4)) - np.testing.assert_allclose(series["y"], np.linspace(0, 1, n)[::4]) + assert series["method"] == "reduce" # NDArray leaf, fits the read budget + assert len(series["x"]) <= 300 + # The envelope must contain the true global extremes, including spikes + assert np.isclose(np.nanmax(series["ymax"]), 9.0) + assert np.isclose(np.nanmin(series["ymin"]), -9.0) - # Small arrays are returned whole (step 1) + # Small arrays: one bucket per element, ymin == ymax == the values small = browser.plot_series("/wave", max_points=n) - assert small["step"] == 1 - assert len(small["y"]) == n + assert len(small["x"]) == n + np.testing.assert_allclose(small["ymin"], data) + np.testing.assert_allclose(small["ymax"], data) + + +def test_plot_series_uses_summary_index_when_available(tmp_path): + # A persisted CTable builds SUMMARY indexes on close; plot_series should + # read per-block min/max from the index (no data decompression). + path = str(tmp_path / "plotsum.b2t") + n = 200_000 + data = np.linspace(-1.0, 1.0, n) + data[1234] = 7.0 # spikes the per-block summary must capture + data[199_001] = -7.0 + + @dataclasses.dataclass + class VRow: + v: float = blosc2.field(blosc2.float64(), default=0.0) + + arr = np.empty(n, dtype=[("v", " exact reduce + assert series["method"] in ("summary", "reduce") + assert np.isclose(np.nanmax(series["ymax"]), 99 * 1.5) + assert np.isclose(np.nanmin(series["ymin"]), 0.0) - # An active row filter restricts the plotted universe + # An active row filter restricts the plotted universe (and forces reduce) browser.set_filter("/table", "x >= 50") filtered = browser.plot_series("/table", column="y", max_points=1000) assert filtered["n"] == 50 - np.testing.assert_allclose(filtered["y"], np.arange(50, 100) * 1.5) + assert filtered["method"] == "reduce" + assert np.isclose(np.nanmin(filtered["ymin"]), 50 * 1.5) + assert np.isclose(np.nanmax(filtered["ymax"]), 99 * 1.5) diff --git a/todo/b2view.md b/todo/b2view.md index dc0db9a7..40b08c82 100644 --- a/todo/b2view.md +++ b/todo/b2view.md @@ -21,15 +21,16 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of placeholder; offer on-demand decoding (e.g. a key to materialize the column, or decode just the cursor row). - [ ] SChunk preview is not implemented (`model.preview` returns a message). -- [ ] Plotting follow-ups for the `p` key: maybe a live mini-plot that - follows paging, or zoom into a row range from the plot modal. If - character resolution proves too coarse, `textual-image` can render - real matplotlib output on kitty/iTerm2/sixel terminals, degrading to - half-blocks elsewhere. Note: plain striding can alias periodic data; - a chunk-aggregated min/max envelope would be the audio-editor-style - fix. (The strided read that `plot_series` issues now hits the core - NDArray sparse-gather fast path automatically when step >= block, i.e. - for large arrays — see `NDArray._try_subsample_gather`.) +- [ ] Plotting follow-ups for the `p` key: a live mini-plot that follows + paging, or zoom into a row range from the plot modal. If character + resolution proves too coarse, `textual-image` can render real matplotlib + output on kitty/iTerm2/sixel terminals, degrading to half-blocks + elsewhere. +- [ ] Tier-2 plot envelope (`_reduce_envelope`) materializes the series via + `obj[:]`, so it is bounded by `_PLOT_FULL_READ_MAX_BYTES` (~1 GB) and + falls back to a labeled strided sample above that. Lift the ceiling by + chunk-streaming the per-bucket min/max instead of reading the whole + series at once. ### Testing @@ -72,11 +73,18 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of buffer in a modal, via the optional `textual-plotext` package (new `plot` extra); braille scatter, NaN/inf filtered, non-numeric columns and a missing package just notify. Works headless in Pilot tests. -- 2026-06-12: The `p` plot now shows a downsampled overview of the *whole* - series (`StoreBrowser.plot_series`): a single strided blosc2 read of at - most ~2000 points (10 ms on a 10M-element array), never materializing the - full data; honors layout (fixed dims) for N-D arrays and active row - filters for CTables. +- 2026-06-12: The `p` plot shows a downsampled overview of the *whole* + series (`StoreBrowser.plot_series`); honors layout (fixed dims) for N-D + arrays and active row filters for CTables. +- 2026-06-13: `p` plot is now a peak-preserving **min/max envelope** (was + plain strided decimation, which aliased and hid extremes between samples). + `plot_series` returns `{x, ymin, ymax, n, method}` and picks the cheapest + tier: (1) `summary` — per-block min/max straight from the column's SUMMARY + index, no decompression (~44x faster, the big win for large persisted / + parquet columns; identity case only); (2) `reduce` — read + per-bucket + min/max, bounded by `_PLOT_FULL_READ_MAX_BYTES` (~1 GB); (3) `sample` — + labeled strided fallback above that ceiling. `PlotScreen` draws the + upper/lower envelope; the title states the method. - 2026-06-12: `?` opens a help screen listing all keys grouped by area (panels, tree, grid rows/columns, dim mode); shown in the footer, closed with esc/`?`/`q`. From cb051d2f3cc6326d921dfa563f96b6cd60a96db4 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 13 Jun 2026 08:58:40 +0200 Subject: [PATCH 06/24] Fix sidecar-handle cache collision for compact-store columns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include the sidecar's `path` in `_sidecar_handle_cache_key` so sibling columns in a compact (.b2z) store no longer collide on a single cache entry. Previously all columns shared the same `_array_key` (identical urlpath), so every column received the handle opened first — returning wrong data when the handle was read directly (e.g. plots, b2view). --- src/blosc2/indexing.py | 12 ++++--- tests/ctable/test_ctable_indexing.py | 51 ++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/src/blosc2/indexing.py b/src/blosc2/indexing.py index 411a2e55..011fec1d 100644 --- a/src/blosc2/indexing.py +++ b/src/blosc2/indexing.py @@ -970,8 +970,10 @@ def _clear_cached_data(array: blosc2.NDArray, token: str) -> None: _SIDECAR_HANDLE_CACHE.pop(key, None) -def _sidecar_handle_cache_key(array: blosc2.NDArray, token: str, category: str, name: str): - return (_array_key(array), token, category, name) +def _sidecar_handle_cache_key( + array: blosc2.NDArray, token: str, category: str, name: str, path: str | None = None +): + return (_array_key(array), token, category, name, path) def _sidecar_storage_category(category: str) -> str: @@ -984,12 +986,14 @@ def _invalidate_sidecar_cache_entries(array: blosc2.NDArray, token: str, categor categories = {storage_category, f"{storage_category}_handle"} for cache_category in categories: _DATA_CACHE.pop(_data_cache_key(array, token, cache_category, name), None) - _SIDECAR_HANDLE_CACHE.pop(_sidecar_handle_cache_key(array, token, cache_category, name), None) + prefix = _sidecar_handle_cache_key(array, token, cache_category, name) + for key in [k for k in _SIDECAR_HANDLE_CACHE if k[:4] == prefix[:4]]: + _SIDECAR_HANDLE_CACHE.pop(key, None) def _open_sidecar_handle(array: blosc2.NDArray, token: str, category: str, name: str, path: str | None): _purge_stale_persistent_caches() - cache_key = _sidecar_handle_cache_key(array, token, category, name) + cache_key = _sidecar_handle_cache_key(array, token, category, name, path) cached = _SIDECAR_HANDLE_CACHE.get(cache_key) if cached is not None: return cached diff --git a/tests/ctable/test_ctable_indexing.py b/tests/ctable/test_ctable_indexing.py index bea6d30e..b1d8231a 100644 --- a/tests/ctable/test_ctable_indexing.py +++ b/tests/ctable/test_ctable_indexing.py @@ -755,6 +755,57 @@ class Aligned: assert got == expected, f"index returned {got}, expected {expected} (scan)" +def test_sidecar_handle_cache_no_cross_column_collision(tmp_path): + """Regression: in a compact (.b2z) multi-column store, reading the SUMMARY + block sidecar handle for each column must return *that* column's data, not + a sibling's. + + The process-wide ``_SIDECAR_HANDLE_CACHE`` was keyed only by + ``(_array_key, token, category, name)``. In compact stores all columns + share the same urlpath → same ``_array_key``, so sibling columns collided + on a single cache entry and every column received the handle opened first. + Including ``path`` in the key fixes this. + """ + + @dataclasses.dataclass + class Distinct: + a: float = blosc2.field(blosc2.float64(), chunks=(500,), blocks=(250,)) + b: float = blosc2.field(blosc2.float64(), chunks=(500,), blocks=(250,)) + + n = 1000 + rng = np.random.default_rng(42) + # Non-overlapping ranges so we can trivially tell columns apart by max. + a = (rng.random(n) * 10).astype(np.float64) # max ≈ 10 + b = (rng.random(n) * 100 + 100).astype(np.float64) # max ≈ 200, min ≥ 100 + + t = blosc2.CTable(Distinct) + t.extend(list(zip(a.tolist(), b.tolist(), strict=True))) + path = str(tmp_path / "distinct.b2z") + t.to_b2z(path) + + # Build SUMMARY indexes on both columns. + with blosc2.open(path, mode="a") as w: + w.create_index("a", kind=blosc2.IndexKind.SUMMARY) + w.create_index("b", kind=blosc2.IndexKind.SUMMARY) + + with blosc2.open(path) as r: + catalog = dict(r._get_index_catalog()) + nd_a = r["a"]._raw_col + nd_b = r["b"]._raw_col + + summary_a = blosc2.indexing._open_level_summary_handle(nd_a, catalog["a"], "block") + summary_b = blosc2.indexing._open_level_summary_handle(nd_b, catalog["b"], "block") + + max_a = summary_a["max"].max() + max_b = summary_b["max"].max() + + # Without the fix, both handles return the same (first-opened) column's data + # because the cache key collides. With the fix they must differ. + assert max_a != max_b, f"cross-column collision: both max values are {max_a}" + assert max_a < 11, f"column a max {max_a} outside expected range [0, 10]" + assert 100 <= max_b <= 200, f"column b max {max_b} outside expected range [100, 200]" + + @dataclasses.dataclass class _GranRow: # Small explicit grid so the SUMMARY index spans several chunks/blocks. From aa28c29cd557828a6edaaf5bfb62a3490c703dc2 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 13 Jun 2026 14:17:56 +0200 Subject: [PATCH 07/24] Enable cross-column index pruning in compact CTable queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compact (.b2z) stores share one urlpath, so the index store held only the first column's descriptor under "__self__" — multi-column predicates pruned by one column at best. Injecting all columns (per-column tokens + array_to_col threading) wasn't enough: the segment merge required left.base is right.base, so cross-column ANDs failed to merge and fell back to a full scan (~2x regression). Replace the base-identity check with a row-grid compatibility check (_grid_compatible_segment_plans): segments are row-aligned, so columns sharing level/segment_len/shape/row-count map every segment to the same rows and their candidate masks can be combined directly. _merge_segment_plans now intersects (AND) / unions (OR) across columns, with a safe fallback when grids differ (AND -> most-selective side, OR -> full scan). Cross-column AND now prunes instead of full-scanning; OR prunes when both sides are segment-selective. Adds tests for AND/OR pruning, correctness vs scan, and merge semantics. --- src/blosc2/ctable_indexing.py | 28 ++++-- src/blosc2/indexing.py | 139 +++++++++++++++++-------- tests/ctable/test_ctable_indexing.py | 145 +++++++++++++++++++++++++++ 3 files changed, 265 insertions(+), 47 deletions(-) diff --git a/src/blosc2/ctable_indexing.py b/src/blosc2/ctable_indexing.py index e2568743..90dac675 100644 --- a/src/blosc2/ctable_indexing.py +++ b/src/blosc2/ctable_indexing.py @@ -738,6 +738,16 @@ def create_index( # noqa: C901 value_epoch, _ = self._storage.get_epoch_counters() descriptor["built_value_epoch"] = value_epoch + if is_persistent: + # Use column name as token so sibling columns in compact stores get + # distinct keys in the shared _PERSISTENT_INDEXES store (all columns + # share the same urlpath yet must not collide on "__self__"). File + # paths were built with token="__self__" (omitted from the filename) + # and remain unchanged; only the in-memory key is affected. + # In-memory CTables already have unique _IN_MEMORY_INDEXES entries + # per column (keyed by id(array)), so no token change is needed. + descriptor["token"] = col_name + catalog = self._get_index_catalog() catalog[col_name] = descriptor self._storage.save_index_catalog(catalog) @@ -1221,27 +1231,31 @@ def _try_index_where(self, expr_result: blosc2.LazyExpr) -> np.ndarray | None: # the upcoming query always loads the correct sidecar for this column. from blosc2.indexing import _clear_cached_data, _register_descriptor_owner - for _col_name, col_arr, descriptor in indexed_columns[:1]: + # Build column-name → array mapping so the planner can resolve the + # correct per-column token instead of the generic "__self__". + array_to_col = {} + for _col_name, col_arr, descriptor in indexed_columns: + array_to_col[id(col_arr)] = _col_name arr_key = _array_key(col_arr) if _is_persistent_array(col_arr): store = _PERSISTENT_INDEXES.get(arr_key) or _default_index_store() - if store["indexes"].get(descriptor["token"]) is not descriptor: - _clear_cached_data(col_arr, descriptor["token"]) - store["indexes"][descriptor["token"]] = descriptor + if store["indexes"].get(_col_name) is not descriptor: + _clear_cached_data(col_arr, _col_name) + store["indexes"][_col_name] = descriptor _PERSISTENT_INDEXES[arr_key] = store else: store = _IN_MEMORY_INDEXES.get(id(col_arr)) or _default_index_store() - store["indexes"][descriptor["token"]] = descriptor + store["indexes"][_col_name] = descriptor _IN_MEMORY_INDEXES[id(col_arr)] = store # Record the owning array so a sibling column sharing this urlpath # (and, after column alignment, the same shape/chunks) cannot match # this descriptor in _descriptor_for_target. - _register_descriptor_owner(col_arr, descriptor["token"]) + _register_descriptor_owner(col_arr, _col_name) where_dict = {"_where_x": primary_col_arr} merged_operands = {**operands, "_where_x": primary_col_arr} - plan = plan_query(expression, merged_operands, where_dict) + plan = plan_query(expression, merged_operands, where_dict, array_to_col=array_to_col) if not plan.usable: return None diff --git a/src/blosc2/indexing.py b/src/blosc2/indexing.py index 011fec1d..9d588c72 100644 --- a/src/blosc2/indexing.py +++ b/src/blosc2/indexing.py @@ -1276,12 +1276,14 @@ def _plain_value(value): return {key: _plain_value(value) for key, value in cparams.items()} -def _load_array_sidecar(array: blosc2.NDArray, token: str, category: str, name: str) -> np.ndarray: +def _load_array_sidecar( + array: blosc2.NDArray, token: str, category: str, name: str, path: str | None = None +) -> np.ndarray: cache_key = _data_cache_key(array, token, category, name) cached = _DATA_CACHE.get(cache_key) if cached is not None: return cached - handle = _SIDECAR_HANDLE_CACHE.get(_sidecar_handle_cache_key(array, token, category, name)) + handle = _SIDECAR_HANDLE_CACHE.get(_sidecar_handle_cache_key(array, token, category, name, path)) if handle is None: raise RuntimeError("in-memory index metadata is missing from the current process") data = _read_sidecar_span(handle, 0, int(handle.shape[0])) @@ -4095,14 +4097,14 @@ def _component_nbytes(array: blosc2.NDArray, descriptor: dict, component: IndexC if component.path is not None: return int(_open_sidecar_file(component.path, _INDEX_MMAP_MODE).nbytes) token = descriptor["token"] - return int(_load_array_sidecar(array, token, component.category, component.name).nbytes) + return int(_load_array_sidecar(array, token, component.category, component.name, component.path).nbytes) def _component_cbytes(array: blosc2.NDArray, descriptor: dict, component: IndexComponent) -> int: if component.path is not None: return int(_open_sidecar_file(component.path, _INDEX_MMAP_MODE).cbytes) token = descriptor["token"] - sidecar = _load_array_sidecar(array, token, component.category, component.name) + sidecar = _load_array_sidecar(array, token, component.category, component.name, component.path) kwargs = {} cparams = descriptor.get("cparams") if cparams is not None: @@ -4833,8 +4835,14 @@ def _descriptor_for(array: blosc2.NDArray, field: str | None) -> dict | None: return _descriptor_for_target(array, _field_target_descriptor(field)) -def _descriptor_for_target(array: blosc2.NDArray, target: dict) -> dict | None: +def _descriptor_for_target( + array: blosc2.NDArray, target: dict, array_to_col: dict | None = None +) -> dict | None: token = _target_token(target) + if array_to_col is not None and token == SELF_TARGET_NAME: + col_name = array_to_col.get(id(array)) + if col_name is not None: + token = col_name descriptor = _load_store(array)["indexes"].get(token) if descriptor is None or descriptor.get("stale", False): return None @@ -5175,12 +5183,14 @@ def _finest_level(descriptor: dict) -> str: return level_names[-1] -def _plan_segment_compare(node: ast.Compare, operands: dict) -> SegmentPredicatePlan | None: +def _plan_segment_compare( + node: ast.Compare, operands: dict, array_to_col: dict | None = None +) -> SegmentPredicatePlan | None: target = _target_from_compare(node, operands) if target is None: return None base, target_info, op, value = target - descriptor = _descriptor_for_target(base, target_info) + descriptor = _descriptor_for_target(base, target_info, array_to_col) if descriptor is None: return None level = _finest_level(descriptor) @@ -5202,19 +5212,45 @@ def _plan_segment_compare(node: ast.Compare, operands: dict) -> SegmentPredicate ) -def _same_segment_space(left: SegmentPredicatePlan, right: SegmentPredicatePlan) -> bool: +def _grid_compatible_segment_plans(left: SegmentPredicatePlan, right: SegmentPredicatePlan) -> bool: + """Whether two segment plans share an identical row→segment grid. + + Segments are row-aligned (segment ``i`` always covers rows + ``[i*segment_len, (i+1)*segment_len)``), so two plans with the same level, + ``segment_len``, candidate-mask shape, and row count map every segment index + to the same rows — even when they belong to *different* columns. This is + what lets cross-column AND/OR intersect their per-segment candidate masks. + Compact-store columns that join the auto-aligned fast-eval grid share + ``segment_len``; columns excluded from alignment may not, and fall back. + """ return ( - left.base is right.base - and left.level == right.level + left.level == right.level and left.segment_len == right.segment_len and left.candidate_units.shape == right.candidate_units.shape + and int(left.base.shape[0]) == int(right.base.shape[0]) ) +def _segment_plan_scan_rows(plan: SegmentPredicatePlan) -> int: + """Estimated rows the downstream scan must visit for *plan* (selectivity).""" + return int(np.count_nonzero(plan.candidate_units)) * int(plan.segment_len) + + def _merge_segment_plans( left: SegmentPredicatePlan, right: SegmentPredicatePlan, op: str ) -> SegmentPredicatePlan | None: - if not _same_segment_space(left, right): + if not _grid_compatible_segment_plans(left, right): + # Different columns can carry indexes on incompatible grids (e.g. a + # column excluded from the aligned fast-eval set). We cannot combine + # their per-segment masks directly. + if op == "and": + # AND pruning by a *superset* of the true candidates is always + # correct, because the downstream scan re-evaluates the full + # predicate per row. Keep whichever single side prunes more so we + # never do worse than single-column pruning. + return left if _segment_plan_scan_rows(left) <= _segment_plan_scan_rows(right) else right + # OR cannot be pruned to one side's segments — the other column may + # match rows in segments this side discarded. Fall back to full scan. return None if op == "and": candidate_units = left.candidate_units & right.candidate_units @@ -5231,11 +5267,13 @@ def _merge_segment_plans( ) -def _plan_segment_boolop(node: ast.BoolOp, operands: dict) -> SegmentPredicatePlan | None: +def _plan_segment_boolop( + node: ast.BoolOp, operands: dict, array_to_col: dict | None = None +) -> SegmentPredicatePlan | None: op = "and" if isinstance(node.op, ast.And) else "or" if isinstance(node.op, ast.Or) else None if op is None: return None - plans = [_plan_segment_node(value, operands) for value in node.values] + plans = [_plan_segment_node(value, operands, array_to_col) for value in node.values] if op == "and": plans = [plan for plan in plans if plan is not None] if not plans: @@ -5252,7 +5290,9 @@ def _plan_segment_boolop(node: ast.BoolOp, operands: dict) -> SegmentPredicatePl return plan -def _plan_segment_bitop(node: ast.BinOp, operands: dict) -> SegmentPredicatePlan | None: +def _plan_segment_bitop( + node: ast.BinOp, operands: dict, array_to_col: dict | None = None +) -> SegmentPredicatePlan | None: if isinstance(node.op, ast.BitAnd): op = "and" elif isinstance(node.op, ast.BitOr): @@ -5260,8 +5300,8 @@ def _plan_segment_bitop(node: ast.BinOp, operands: dict) -> SegmentPredicatePlan else: return None - left = _plan_segment_node(node.left, operands) - right = _plan_segment_node(node.right, operands) + left = _plan_segment_node(node.left, operands, array_to_col) + right = _plan_segment_node(node.right, operands, array_to_col) if op == "and": if left is None: return right @@ -5273,22 +5313,26 @@ def _plan_segment_bitop(node: ast.BinOp, operands: dict) -> SegmentPredicatePlan return _merge_segment_plans(left, right, op) -def _plan_segment_node(node: ast.AST, operands: dict) -> SegmentPredicatePlan | None: +def _plan_segment_node( + node: ast.AST, operands: dict, array_to_col: dict | None = None +) -> SegmentPredicatePlan | None: if isinstance(node, ast.Compare): - return _plan_segment_compare(node, operands) + return _plan_segment_compare(node, operands, array_to_col) if isinstance(node, ast.BoolOp): - return _plan_segment_boolop(node, operands) + return _plan_segment_boolop(node, operands, array_to_col) if isinstance(node, ast.BinOp): - return _plan_segment_bitop(node, operands) + return _plan_segment_bitop(node, operands, array_to_col) return None -def _plan_exact_compare(node: ast.Compare, operands: dict) -> ExactPredicatePlan | None: +def _plan_exact_compare( + node: ast.Compare, operands: dict, array_to_col: dict | None = None +) -> ExactPredicatePlan | None: target = _target_from_compare(node, operands) if target is None: return None base, target_info, op, value = target - descriptor = _descriptor_for_target(base, target_info) + descriptor = _descriptor_for_target(base, target_info, array_to_col) if descriptor is None or descriptor.get("kind") not in {"bucket", "partial", "full", "opsi"}: return None try: @@ -5400,16 +5444,18 @@ def _merge_exact_plans( ) -def _plan_exact_conjunction(node: ast.AST, operands: dict) -> list[ExactPredicatePlan] | None: +def _plan_exact_conjunction( + node: ast.AST, operands: dict, array_to_col: dict | None = None +) -> list[ExactPredicatePlan] | None: if isinstance(node, ast.Compare): - plan = _plan_exact_compare(node, operands) + plan = _plan_exact_compare(node, operands, array_to_col) return None if plan is None else [plan] if isinstance(node, ast.BoolOp): if not isinstance(node.op, ast.And): return None plans = [] for value in node.values: - subplans = _plan_exact_conjunction(value, operands) + subplans = _plan_exact_conjunction(value, operands, array_to_col) if subplans is None: return None plans.extend(subplans) @@ -5417,8 +5463,8 @@ def _plan_exact_conjunction(node: ast.AST, operands: dict) -> list[ExactPredicat if isinstance(node, ast.BinOp): if not isinstance(node.op, ast.BitAnd): return None - left = _plan_exact_conjunction(node.left, operands) - right = _plan_exact_conjunction(node.right, operands) + left = _plan_exact_conjunction(node.left, operands, array_to_col) + right = _plan_exact_conjunction(node.right, operands, array_to_col) if left is None and right is None: return None if left is None: @@ -5429,10 +5475,12 @@ def _plan_exact_conjunction(node: ast.AST, operands: dict) -> list[ExactPredicat return None -def _plan_exact_boolop(node: ast.BoolOp, operands: dict) -> ExactPredicatePlan | None: +def _plan_exact_boolop( + node: ast.BoolOp, operands: dict, array_to_col: dict | None = None +) -> ExactPredicatePlan | None: if not isinstance(node.op, ast.And): return None - plans = [_plan_exact_node(value, operands) for value in node.values] + plans = [_plan_exact_node(value, operands, array_to_col) for value in node.values] if any(plan is None for plan in plans): return None plan = plans[0] @@ -5444,23 +5492,27 @@ def _plan_exact_boolop(node: ast.BoolOp, operands: dict) -> ExactPredicatePlan | return plan -def _plan_exact_bitop(node: ast.BinOp, operands: dict) -> ExactPredicatePlan | None: +def _plan_exact_bitop( + node: ast.BinOp, operands: dict, array_to_col: dict | None = None +) -> ExactPredicatePlan | None: if not isinstance(node.op, ast.BitAnd): return None - left = _plan_exact_node(node.left, operands) - right = _plan_exact_node(node.right, operands) + left = _plan_exact_node(node.left, operands, array_to_col) + right = _plan_exact_node(node.right, operands, array_to_col) if left is None or right is None: return None return _merge_exact_plans(left, right, "and") -def _plan_exact_node(node: ast.AST, operands: dict) -> ExactPredicatePlan | None: +def _plan_exact_node( + node: ast.AST, operands: dict, array_to_col: dict | None = None +) -> ExactPredicatePlan | None: if isinstance(node, ast.Compare): - return _plan_exact_compare(node, operands) + return _plan_exact_compare(node, operands, array_to_col) if isinstance(node, ast.BoolOp): - return _plan_exact_boolop(node, operands) + return _plan_exact_boolop(node, operands, array_to_col) if isinstance(node, ast.BinOp): - return _plan_exact_bitop(node, operands) + return _plan_exact_bitop(node, operands, array_to_col) return None @@ -6706,7 +6758,14 @@ def _plan_single_exact_query(exact_plan: ExactPredicatePlan) -> IndexPlan: return IndexPlan(False, "available positional index does not prune any units for this predicate") -def plan_query(expression: str, operands: dict, where: dict | None, *, use_index: bool = True) -> IndexPlan: +def plan_query( + expression: str, + operands: dict, + where: dict | None, + *, + use_index: bool = True, + array_to_col: dict | None = None, +) -> IndexPlan: if not use_index: return IndexPlan(False, "index usage disabled for this query") if where is None or len(where) != 1: @@ -6717,13 +6776,13 @@ def plan_query(expression: str, operands: dict, where: dict | None, *, use_index except SyntaxError: return IndexPlan(False, "expression is not valid Python syntax for planning") - exact_terms = _plan_exact_conjunction(tree.body, operands) + exact_terms = _plan_exact_conjunction(tree.body, operands, array_to_col) if exact_terms is not None and len(exact_terms) > 1: multi_exact_plan = _plan_multi_exact_query(exact_terms) if multi_exact_plan is not None: return multi_exact_plan - exact_plan = _plan_exact_node(tree.body, operands) + exact_plan = _plan_exact_node(tree.body, operands, array_to_col) if exact_plan is not None: exact_query_plan = _plan_single_exact_query(exact_plan) if exact_query_plan.usable: @@ -6738,7 +6797,7 @@ def plan_query(expression: str, operands: dict, where: dict | None, *, use_index if cross_col is not None: return cross_col - segment_plan = _plan_segment_node(tree.body, operands) + segment_plan = _plan_segment_node(tree.body, operands, array_to_col) if segment_plan is None: return IndexPlan(False, "no usable index was found for this predicate") diff --git a/tests/ctable/test_ctable_indexing.py b/tests/ctable/test_ctable_indexing.py index b1d8231a..2803aff3 100644 --- a/tests/ctable/test_ctable_indexing.py +++ b/tests/ctable/test_ctable_indexing.py @@ -1014,3 +1014,148 @@ def test_summary_chunk_skip_scoped_extraction(tmp_path): assert r.where(r.v > thr).nrows == int((v > thr).sum()) # A negative threshold matches everything (full-mask path). assert r.where(r.v > -1).nrows == n + + +# --------------------------------------------------------------------------- +# Cross-column SUMMARY-index pruning on compact (.b2z) stores +# --------------------------------------------------------------------------- + + +@dataclasses.dataclass +class _CrossRow: + # Small explicit grid so the SUMMARY index spans many block-segments and + # both columns are chunk/block aligned (shared row→segment grid). + a: float = blosc2.field(blosc2.float64(), chunks=(1000,), blocks=(250,)) + b: float = blosc2.field(blosc2.float64(), chunks=(1000,), blocks=(250,)) + + +def _build_cross_b2z(tmp_path, a, b, name): + """Compact .b2z with SUMMARY indexes on *both* columns.""" + t = blosc2.CTable(_CrossRow) + t.extend(list(zip(a.tolist(), b.tolist(), strict=True))) + path = str(tmp_path / name) + t.to_b2z(path) + with blosc2.open(path, mode="a") as w: + w.create_index("a", kind=blosc2.IndexKind.SUMMARY) + w.create_index("b", kind=blosc2.IndexKind.SUMMARY) + return path + + +def _spy_on_plans(monkeypatch): + """Patch ``plan_query`` to record every ``IndexPlan`` it returns. + + ``_try_index_where`` imports ``plan_query`` from ``blosc2.indexing`` at call + time, so patching the module attribute is picked up by the next query. + """ + plans = [] + orig = blosc2.indexing.plan_query + + def spy(*args, **kwargs): + plan = orig(*args, **kwargs) + plans.append(plan) + return plan + + monkeypatch.setattr(blosc2.indexing, "plan_query", spy) + return plans + + +def test_cross_column_and_prunes_segments_compact_b2z(tmp_path, monkeypatch): + """Regression guard: an AND across two SUMMARY-indexed columns of a compact + store must combine their per-segment candidate masks (intersection) instead + of falling back to a full scan. See todo/multiple-indexes-in-queries.md — + the per-column-token change alone made the cross-column merge fail and + silently disable pruning.""" + n = 10_000 + a = np.arange(n, dtype=np.float64) # ascending → segment-selective + b = np.random.default_rng(0).random(n) * 1000.0 # random → non-selective + path = _build_cross_b2z(tmp_path, a, b, "and.b2z") + + expected = int(((a > n - 100) & (b > 500.0)).sum()) + assert expected > 0 # predicate must match real rows + + plans = _spy_on_plans(monkeypatch) + with blosc2.open(path) as r: + got = r.where(f"(a > {n - 100}) & (b > 500.0)").nrows + assert got == expected + + pruned = [p for p in plans if p.usable and p.selected_units < p.total_units] + assert pruned, "cross-column AND fell back to a full scan instead of pruning" + + +def test_cross_column_or_prunes_segments_compact_b2z(tmp_path, monkeypatch): + """An OR across two SUMMARY-indexed columns must union their candidate masks + (both sides segment-selective ⇒ still prunes), and stay correct.""" + n = 10_000 + a = np.arange(n, dtype=np.float64) # ascending → high values at high index + b = np.arange(n, 0, -1, dtype=np.float64) # descending → high values at low index + path = _build_cross_b2z(tmp_path, a, b, "or.b2z") + + expected = int(((a > n - 100) | (b > n - 100)).sum()) + assert expected > 0 + + plans = _spy_on_plans(monkeypatch) + with blosc2.open(path) as r: + got = r.where(f"(a > {n - 100}) | (b > {n - 100})").nrows + assert got == expected + + pruned = [p for p in plans if p.usable and p.selected_units < p.total_units] + assert pruned, "cross-column OR fell back to a full scan instead of pruning" + + +def test_cross_column_predicates_match_scan_compact_b2z(tmp_path): + """Cross-column AND/OR over two SUMMARY-indexed columns must match the + boolean-mask (no-index) result across selective, non-selective, empty, and + mixed-direction predicates.""" + n = 8_000 + rng = np.random.default_rng(3) + a = (rng.random(n) * 100).astype(np.float64) + b = (rng.random(n) * 100).astype(np.float64) + path = _build_cross_b2z(tmp_path, a, b, "corr.b2z") + + cases = [ + ("(a > 90) & (b > 10)", (a > 90) & (b > 10)), # selective ∧ non-selective + ("(a > 50) & (b > 50)", (a > 50) & (b > 50)), # both moderately selective + ("(a > 10) | (b > 90)", (a > 10) | (b > 90)), # OR + ("(a > 99.9) & (b > 99.9)", (a > 99.9) & (b > 99.9)), # near-empty intersection + ("(a < 5) & (b > 0)", (a < 5) & (b > 0)), # low-end selective + ] + with blosc2.open(path) as r: + for expr, mask in cases: + assert r.where(expr).nrows == int(mask.sum()), expr + + +def _seg_plan(units, *, base_nrows=1000, segment_len=250, level="block"): + import types + + from blosc2.indexing import SegmentPredicatePlan + + return SegmentPredicatePlan( + base=types.SimpleNamespace(shape=(base_nrows,)), + candidate_units=np.asarray(units, dtype=bool), + descriptor={"token": "x"}, + target={}, + field=None, + level=level, + segment_len=segment_len, + ) + + +def test_merge_segment_plans_intersection_union_and_fallback(): + """Unit-level guard for the cross-column merge semantics.""" + from blosc2.indexing import _merge_segment_plans + + left = _seg_plan([1, 1, 1, 0]) + right = _seg_plan([0, 1, 1, 1]) + + # Grid-compatible AND → intersection; OR → union. + np.testing.assert_array_equal(_merge_segment_plans(left, right, "and").candidate_units, [0, 1, 1, 0]) + np.testing.assert_array_equal(_merge_segment_plans(left, right, "or").candidate_units, [1, 1, 1, 1]) + + # Incompatible grid (different segment_len): AND keeps the more selective + # side (fewer candidate rows = fewer nonzero units × segment_len), never a + # full scan; OR cannot prune safely → None. + coarse = _seg_plan([1, 1], segment_len=500) # 2 units selected + fine = _seg_plan([1, 0, 0, 0]) # 1 unit selected, finer grid + assert _merge_segment_plans(coarse, fine, "and") is fine # fine prunes more + assert _merge_segment_plans(fine, coarse, "and") is fine + assert _merge_segment_plans(coarse, fine, "or") is None From a2feb127f5b02f9001b556098b38edb75bf4a8e8 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 13 Jun 2026 14:27:14 +0200 Subject: [PATCH 08/24] Add notes on when summary indexes are not created automatically --- src/blosc2/ctable.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/blosc2/ctable.py b/src/blosc2/ctable.py index e39b1a98..90855efa 100644 --- a/src/blosc2/ctable.py +++ b/src/blosc2/ctable.py @@ -3067,10 +3067,23 @@ def __init__( ---------- create_summary_index: If ``True`` (default), SUMMARY indexes are automatically built for - all eligible scalar columns on :meth:`close`. These indexes are - extremely cheap to store (< 0.1% of column size) and accelerate - ``where()`` queries without any user action. Set to ``False`` to - disable. + all eligible scalar columns. These indexes are extremely cheap to + store (< 0.1% of column size) and accelerate ``where()`` queries + without any user action. Set to ``False`` to disable. + + The build is triggered by :meth:`close`, not by table creation, so + *when* it happens depends on the table's lifecycle: + + - **Persistent** tables (``urlpath=...``) are closed as part of + normal use, so they get these indexes and reopen with them. + - A **purely in-memory** table is never closed automatically, so it + is *not* indexed unless you close it explicitly or use it as a + context manager (``with blosc2.CTable(...) as t:``). Otherwise + call :meth:`create_index` yourself. + + Note that :meth:`to_b2z` and :meth:`save` write live rows through a + logical copy and do **not** trigger the build; index the source + table (or the reopened result) explicitly if you need it. """ # Auto-size: if the caller didn't specify expected_size and new_data has a # known length, pre-allocate just enough (×2 for headroom, min 64). @@ -3191,7 +3204,13 @@ def __init__( self._save_n_rows_to_meta() def close(self) -> None: - """Close any persistent backing store held by this table.""" + """Close any persistent backing store held by this table. + + On the first close of a writable root table, this also builds the + automatic SUMMARY indexes (unless ``create_summary_index=False``); see + the ``create_summary_index`` parameter of :class:`CTable` for how this + interacts with in-memory vs. persistent tables. + """ storage = getattr(self, "_storage", None) # Persist row count for root tables so subsequent opens can skip # the _valid_rows intersection in where() for all-valid tables. From fd70fae169e323c50d1d79535f69b9ec2c9814bc Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 13 Jun 2026 14:59:42 +0200 Subject: [PATCH 09/24] Stream exact plot envelopes for large series in b2view plot_series capped the exact min/max envelope at _PLOT_FULL_READ_MAX_BYTES (~1 GB) and fell back to a strided sample above it, which can step over peaks the envelope exists to preserve. For local objects (CTable columns, N-D arrays), stream the series in bounded spans and accumulate per-bucket min/max instead. Since min/max are associative, arbitrary span boundaries reproduce the single-read result exactly, so the envelope stays peak-preserving (method="reduce") at O(span) memory. Remote c2arrays keep the labeled strided sample to avoid many network round-trips. Adds _minmax_buckets_streaming / _bucket_geometry / _stream_span (reads aligned to native chunks, ~_PLOT_STREAM_BUFFER_BYTES each) and model-level tests in tests/b2view/test_plot_model.py: exactness vs full read, a spike a sample would miss, all-NaN/int/edge cases, and remote-stays-sample. --- src/blosc2/b2view/model.py | 93 ++++++++++++++++--- tests/b2view/test_plot_model.py | 152 ++++++++++++++++++++++++++++++++ todo/b2view.md | 16 ++-- 3 files changed, 244 insertions(+), 17 deletions(-) create mode 100644 tests/b2view/test_plot_model.py diff --git a/src/blosc2/b2view/model.py b/src/blosc2/b2view/model.py index 3fde5ce3..866fd01d 100644 --- a/src/blosc2/b2view/model.py +++ b/src/blosc2/b2view/model.py @@ -10,11 +10,15 @@ import blosc2 -# Above this uncompressed size, plot_series falls back to a strided sample -# instead of reading the whole series for an exact min/max envelope (the full -# read both materializes the data and costs O(n)). ~1 GB ≈ 125M float64. +# Above this uncompressed size, plot_series does not read the whole series at +# once for an exact min/max envelope. Local objects are instead streamed in +# bounded spans (still exact); only remote c2arrays fall back to a strided +# sample to avoid many network round-trips. ~1 GB ≈ 125M float64. _PLOT_FULL_READ_MAX_BYTES = 1_000_000_000 +# Target size of a single streamed read in the exact-but-bounded envelope path. +_PLOT_STREAM_BUFFER_BYTES = 64_000_000 # ~64 MB + def _minmax_buckets( vmin: np.ndarray, vmax: np.ndarray, positions: np.ndarray, n: int, max_points: int @@ -55,6 +59,58 @@ def _reduce_envelope(vals: np.ndarray, n: int, max_points: int) -> dict[str, np. return _minmax_buckets(vals, vals, np.arange(vals.shape[0]), n, max_points) +def _bucket_geometry(n: int, max_points: int) -> tuple[int, int]: + """Return ``(group, nbuckets)`` for an *n*-row series cut into ``<= max_points`` + contiguous buckets, matching the grouping policy of :func:`_minmax_buckets` + so the streamed and full-read envelopes bucket rows identically.""" + if n <= 0: + return 1, 0 + group = 1 if n <= max_points else -(-n // max_points) # ceil + nbuckets = -(-n // group) # ceil + return group, nbuckets + + +def _minmax_buckets_streaming(read_chunk, n: int, max_points: int, *, span: int) -> dict[str, np.ndarray]: + """Exact per-bucket min/max envelope, read in row spans of at most *span*. + + Equivalent to reading the whole series and calling :func:`_reduce_envelope`, + but never holds more than *span* rows in memory. *read_chunk* is a callable + ``(start, stop) -> 1-D array`` for that row range. Because min/max are + associative, arbitrary span boundaries (including ones that fall inside a + bucket) yield a result identical to the single-read path. + """ + group, nbuckets = _bucket_geometry(n, max_points) + if nbuckets == 0: + empty = np.empty(0) + return {"x": empty, "ymin": empty, "ymax": empty} + ymin = np.full(nbuckets, np.inf) + ymax = np.full(nbuckets, -np.inf) + span = max(1, int(span)) + for s in range(0, n, span): + e = min(s + span, n) + vals = np.asarray(read_chunk(s, e), dtype=float).ravel()[: e - s] + bidx = np.arange(s, e) // group # global bucket per row, non-decreasing + seg_starts = np.concatenate(([0], np.flatnonzero(np.diff(bidx)) + 1)) + buckets = bidx[seg_starts] # unique within this span (contiguous runs) + lo = np.where(np.isnan(vals), np.inf, vals) + hi = np.where(np.isnan(vals), -np.inf, vals) + ymin[buckets] = np.minimum(ymin[buckets], np.minimum.reduceat(lo, seg_starts)) + ymax[buckets] = np.maximum(ymax[buckets], np.maximum.reduceat(hi, seg_starts)) + ymin = np.where(np.isinf(ymin), np.nan, ymin) + ymax = np.where(np.isinf(ymax), np.nan, ymax) + x = np.minimum(np.arange(nbuckets) * group, max(0, n - 1)) + return {"x": x, "ymin": ymin, "ymax": ymax} + + +def _stream_span(n: int, itemsize: int, chunklen: int | None) -> int: + """Rows per streamed read: ~``_PLOT_STREAM_BUFFER_BYTES`` worth, aligned to + whole native chunks when possible (chunks are the decompression unit).""" + budget = max(1, _PLOT_STREAM_BUFFER_BYTES // max(1, itemsize)) + if chunklen and chunklen > 0: + return chunklen if chunklen >= budget else (budget // chunklen) * chunklen + return budget + + @dataclass(frozen=True) class NodeInfo: """Lightweight description of one TreeStore child.""" @@ -360,12 +416,17 @@ def plot_series( env = self._column_summary_envelope(obj, column, n, max_points) if env is not None: return {**env, "n": n, "method": "summary"} - itemsize = np.dtype(view[column].dtype).itemsize + col = view[column] + itemsize = np.dtype(col.dtype).itemsize if n * itemsize > _PLOT_FULL_READ_MAX_BYTES: - step = max(1, -(-n // max_points)) - y = safe_asarray(view[column][::step]) if n else np.empty(0) - return {"x": np.arange(0, n, step), "ymin": y, "ymax": y, "n": n, "method": "sample"} - vals = safe_asarray(view[column][:]) if n else np.empty(0) + # Local column: stream an exact envelope in bounded spans. + chunks = getattr(col, "chunks", None) + span = _stream_span(n, itemsize, chunks[0] if chunks else None) + env = _minmax_buckets_streaming( + lambda s, e: safe_asarray(col[s:e]), n, max_points, span=span + ) + return {**env, "n": n, "method": "reduce"} + vals = safe_asarray(col[:]) if n else np.empty(0) return {**_reduce_envelope(vals, n, max_points), "n": n, "method": "reduce"} if kind in {"ndarray", "c2array"}: @@ -395,9 +456,19 @@ def _row_index(row_slice): itemsize = np.dtype(obj.dtype).itemsize if n * itemsize > _PLOT_FULL_READ_MAX_BYTES: - step = max(1, -(-n // max_points)) - y = np.asarray(obj[_row_index(slice(0, n, step))]) if n else np.empty(0) - return {"x": np.arange(0, n, step), "ymin": y, "ymax": y, "n": n, "method": "sample"} + if kind == "c2array": + # Remote: stream of small reads would mean many round-trips; + # keep the labeled strided sample as the last resort. + step = max(1, -(-n // max_points)) + y = np.asarray(obj[_row_index(slice(0, n, step))]) if n else np.empty(0) + return {"x": np.arange(0, n, step), "ymin": y, "ymax": y, "n": n, "method": "sample"} + # Local N-D array: stream an exact envelope along the row dim. + chunks = getattr(obj, "chunks", None) + span = _stream_span(n, itemsize, chunks[row_dim] if chunks else None) + env = _minmax_buckets_streaming( + lambda s, e: np.asarray(obj[_row_index(slice(s, e, 1))]), n, max_points, span=span + ) + return {**env, "n": n, "method": "reduce"} vals = np.asarray(obj[_row_index(slice(0, n))]) if n else np.empty(0) return {**_reduce_envelope(vals, n, max_points), "n": n, "method": "reduce"} diff --git a/tests/b2view/test_plot_model.py b/tests/b2view/test_plot_model.py new file mode 100644 index 00000000..113c541e --- /dev/null +++ b/tests/b2view/test_plot_model.py @@ -0,0 +1,152 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +####################################################################### + +"""Unit tests for b2view's streamed plot envelope (no app session needed). + +``plot_series`` reads a peak-preserving min/max envelope. Below +``_PLOT_FULL_READ_MAX_BYTES`` it reads the series in one shot; above it, local +objects are streamed in bounded spans (still *exact*) and only remote c2arrays +fall back to a strided sample. These tests force the streamed path by lowering +the byte ceiling and assert it reproduces the full-read envelope exactly. +""" + +from __future__ import annotations + +import dataclasses + +import numpy as np +import pytest + +import blosc2 +from blosc2.b2view import model +from blosc2.b2view.model import ( + StoreBrowser, + _bucket_geometry, + _minmax_buckets_streaming, + _reduce_envelope, +) + +N = 20_000 +MAX_POINTS = 2000 + + +def _series(): + """A deterministic series with NaNs and a single sharp spike.""" + rng = np.random.default_rng(0) + vals = (rng.standard_normal(N) * 10).astype(np.float64) + vals[rng.integers(0, N, N // 50)] = np.nan # scattered NaN units + vals[1234] = 999.0 # a spike strided sampling is likely to miss + return vals + + +@pytest.fixture(scope="module") +def plot_store(tmp_path_factory): + """A TreeStore with a 1-D NDArray leaf and a CTable, sharing one series.""" + vals = _series() + path = str(tmp_path_factory.mktemp("plot") / "plot.b2z") + + @dataclasses.dataclass + class Row: + x: float = blosc2.field(blosc2.float64()) + + tstore = blosc2.TreeStore(path, mode="w") + try: + # Small chunks so the stream spans several native chunks. + tstore["/leaf"] = blosc2.asarray(vals, chunks=(4096,)) + t = blosc2.CTable(Row, expected_size=N, validate=False) + t.extend({"x": vals}, validate=False) + tstore["/ctable"] = t + finally: + tstore.close() + return path, vals + + +def _force_stream(monkeypatch, *, buffer_bytes=20_000): + """Lower both ceilings so a small series exercises the streamed path with + spans that straddle bucket boundaries.""" + monkeypatch.setattr(model, "_PLOT_FULL_READ_MAX_BYTES", 1) + monkeypatch.setattr(model, "_PLOT_STREAM_BUFFER_BYTES", buffer_bytes) + + +def _assert_exact(env, vals): + expected = _reduce_envelope(np.asarray(vals), len(vals), MAX_POINTS) + np.testing.assert_array_equal(env["x"], expected["x"]) + np.testing.assert_allclose(env["ymin"], expected["ymin"], equal_nan=True) + np.testing.assert_allclose(env["ymax"], expected["ymax"], equal_nan=True) + + +def test_stream_envelope_matches_full_read_ndarray(plot_store, monkeypatch): + path, vals = plot_store + _force_stream(monkeypatch) + with StoreBrowser(path) as browser: + env = browser.plot_series("/leaf", max_points=MAX_POINTS) + assert env["method"] == "reduce" # exact, not a sample + assert env["n"] == N + _assert_exact(env, vals) + + +def test_stream_envelope_matches_full_read_ctable(plot_store, monkeypatch): + path, vals = plot_store + _force_stream(monkeypatch) + with StoreBrowser(path) as browser: + env = browser.plot_series("/ctable", column="x", max_points=MAX_POINTS) + assert env["method"] == "reduce" + assert env["n"] == N + _assert_exact(env, vals) + + +def test_stream_envelope_captures_spike_a_sample_would_miss(plot_store, monkeypatch): + path, vals = plot_store + _force_stream(monkeypatch) + with StoreBrowser(path) as browser: + env = browser.plot_series("/leaf", max_points=MAX_POINTS) + # The streamed envelope sees the spike... + assert np.nanmax(env["ymax"]) == pytest.approx(999.0) + # ...whereas the strided sample the old fallback used would step right over it. + step = max(1, -(-N // MAX_POINTS)) + assert np.nanmax(vals[::step]) < 999.0 + + +def test_remote_c2array_falls_back_to_sample(plot_store, monkeypatch): + path, _ = plot_store + _force_stream(monkeypatch) + # Pretend the local leaf is a remote c2array: streaming it would mean many + # network round-trips, so plot_series must keep the labeled strided sample. + monkeypatch.setattr(model, "object_kind", lambda obj: "c2array") + with StoreBrowser(path) as browser: + env = browser.plot_series("/leaf", max_points=MAX_POINTS) + assert env["method"] == "sample" + + +@pytest.mark.parametrize("n", [0, 1, 7, MAX_POINTS, MAX_POINTS + 1, 12_345]) +def test_streaming_reducer_matches_full_read(n): + rng = np.random.default_rng(n) + vals = (rng.standard_normal(n) * 100).astype(np.float64) if n else np.empty(0) + if n: + vals[rng.integers(0, n, max(1, n // 50))] = np.nan + group = _bucket_geometry(n, MAX_POINTS)[0] + span = max(1, (group * 3) // 2 + 1) # awkward span: straddles buckets + streamed = _minmax_buckets_streaming(lambda s, e: vals[s:e], n, MAX_POINTS, span=span) + expected = _reduce_envelope(vals, n, MAX_POINTS) + np.testing.assert_array_equal(streamed["x"], expected["x"]) + np.testing.assert_allclose(streamed["ymin"], expected["ymin"], equal_nan=True) + np.testing.assert_allclose(streamed["ymax"], expected["ymax"], equal_nan=True) + + +def test_streaming_reducer_all_nan_bucket_stays_nan(): + vals = np.full(100, np.nan) + env = _minmax_buckets_streaming(lambda s, e: vals[s:e], 100, 10, span=7) + assert np.isnan(env["ymin"]).all() + assert np.isnan(env["ymax"]).all() + + +def test_streaming_reducer_integer_dtype(): + vals = np.arange(1000, dtype=np.int64) + env = _minmax_buckets_streaming(lambda s, e: vals[s:e], 1000, 100, span=33) + expected = _reduce_envelope(vals, 1000, 100) + np.testing.assert_array_equal(env["ymin"], expected["ymin"]) + np.testing.assert_array_equal(env["ymax"], expected["ymax"]) diff --git a/todo/b2view.md b/todo/b2view.md index 40b08c82..9d231cba 100644 --- a/todo/b2view.md +++ b/todo/b2view.md @@ -26,12 +26,6 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of resolution proves too coarse, `textual-image` can render real matplotlib output on kitty/iTerm2/sixel terminals, degrading to half-blocks elsewhere. -- [ ] Tier-2 plot envelope (`_reduce_envelope`) materializes the series via - `obj[:]`, so it is bounded by `_PLOT_FULL_READ_MAX_BYTES` (~1 GB) and - falls back to a labeled strided sample above that. Lift the ceiling by - chunk-streaming the per-bucket min/max instead of reading the whole - series at once. - ### Testing - [ ] Visual regressions: consider `pytest-textual-snapshot` (SVG snapshots) @@ -39,6 +33,16 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of ## Done +- 2026-06-13: Tier-2 plot envelope is no longer capped at + `_PLOT_FULL_READ_MAX_BYTES` (~1 GB). Above the ceiling, **local** objects + (CTable columns, N-D arrays) are streamed in bounded spans + (`_minmax_buckets_streaming`, ~`_PLOT_STREAM_BUFFER_BYTES` per read, aligned + to native chunks) and the envelope stays **exact** (`method="reduce"`); only + remote `c2array`s still fall back to the labeled strided `sample` (streaming + would mean many round-trips). Min/max are associative, so arbitrary span + boundaries reproduce the single-read result bit-for-bit. Unit tests in + `tests/b2view/test_plot_model.py` (exactness vs full read, spike a sample + would miss, all-NaN/int/edge cases, remote-stays-sample). - 2026-06-12: Pilot-based test suite (`tests/b2view/test_basics.py`) with a deterministic store generator (`tests/b2view/tree_store_gen.py`); marker `tui`. From 52d77036fc2eb6a3afd7a5cef1f060eb87e49a5e Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 13 Jun 2026 17:25:05 +0200 Subject: [PATCH 10/24] Row paging re-aligns to the page grid after dim-mode single-row scrolls --- src/blosc2/b2view/app.py | 26 ++++++++++++++++++++------ tests/b2view/test_basics.py | 28 ++++++++++++++++++++++++++++ todo/b2view.md | 12 ++++++------ 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/src/blosc2/b2view/app.py b/src/blosc2/b2view/app.py index ee3c2cc2..8d4b6173 100644 --- a/src/blosc2/b2view/app.py +++ b/src/blosc2/b2view/app.py @@ -89,12 +89,12 @@ def action_cursor_left(self) -> None: super().action_cursor_left() def action_page_down(self) -> None: - if getattr(self.app, "page_table", lambda _: False)(1): + if getattr(self.app, "page_table", lambda *a, **k: False)(1, align=True): return super().action_page_down() def action_page_up(self) -> None: - if getattr(self.app, "page_table", lambda _: False)(-1): + if getattr(self.app, "page_table", lambda *a, **k: False)(-1, align=True): return super().action_page_up() @@ -1061,7 +1061,7 @@ def _update_data_table(self, data: dict, *, cursor_row: int = 0, cursor_col: int def _finish_table_page_load(self) -> None: self.loading_table_page = False - def page_table(self, direction: int) -> bool: + def page_table(self, direction: int, *, align: bool = False) -> bool: if self.loading_table_page or self.table_page is None: return False page = self.table_page @@ -1069,13 +1069,27 @@ def page_table(self, direction: int) -> bool: if direction > 0: if page["stop"] >= page["nrows"]: return False - data = self._load_table_page(self.selected_path, page["stop"]) + # An explicit page down re-aligns to the page grid: dim-mode + # single-row scrolls (_scroll_navigable_viewport) can leave `start` + # off a page_size boundary, and contiguous paging from `stop` would + # carry that offset forever. Snapping to the next page_size + # multiple mirrors how column paging re-fits on each page. For an + # already-aligned page this equals `stop`, so cursor-edge paging + # (align=False) is unchanged. + new_start = (page["start"] // page_size + 1) * page_size if align else page["stop"] + data = self._load_table_page(self.selected_path, new_start) cursor_row = 0 else: if page["start"] <= 0: return False - start = max(0, page["start"] - page_size) - data = self._load_table_page(self.selected_path, start) + if align: + # Previous grid line: floor for an off-grid start, start-page + # for an aligned one (ceil-div keeps aligned pages contiguous). + new_start = (-(-page["start"] // page_size) - 1) * page_size + else: + new_start = page["start"] - page_size + new_start = max(0, new_start) + data = self._load_table_page(self.selected_path, new_start) cursor_row = data["stop"] - data["start"] - 1 self._update_data_table(data, cursor_row=cursor_row) self._update_data_header(data) diff --git a/tests/b2view/test_basics.py b/tests/b2view/test_basics.py index 3d6d8e45..c9850849 100644 --- a/tests/b2view/test_basics.py +++ b/tests/b2view/test_basics.py @@ -276,6 +276,34 @@ async def test_2d_paging(store_path): assert page["col_stop"] == LEAF2_SHAPE[1] np.testing.assert_allclose(page["data"]["97"], expected[page["start"] : page["stop"], 97]) + # Row paging re-aligns after a dim-mode single-row scroll. Back to the + # top so the row window starts on a page_size boundary. + await pilot.press("t") + await wait_for_table(pilot) + assert app.table_page["start"] == 0 + page_size = app._table_page_size() + assert page_size < LEAF2_SHAPE[0] # several row pages exist + + # In dim mode the active (row) dim scrolls by one row, nudging the + # window off the page grid. + await pilot.press("d") + assert app._dim_mode + await pilot.press("up") + await wait_for_table(pilot) + assert app.table_page["start"] == 1 # off-grid by one row + await pilot.press("escape") + assert not app._dim_mode + + # An explicit page down now snaps back onto the page grid instead of + # carrying the one-row offset (the bug), and page up returns to 0. + await pilot.press("pagedown") + await wait_for_table(pilot) + assert app.table_page["start"] == page_size + + await pilot.press("pageup") + await wait_for_table(pilot) + assert app.table_page["start"] == 0 + # ── 3-D array: dim mode navigation ─────────────────────────────────────── diff --git a/todo/b2view.md b/todo/b2view.md index 9d231cba..d1567d39 100644 --- a/todo/b2view.md +++ b/todo/b2view.md @@ -9,12 +9,6 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of ## Pending -### Navigation - -- [ ] Row paging can lose page alignment after dim-mode single-row scrolls - (`_scroll_navigable_viewport` shifts by 1); consider re-aligning on the - next page up/down, as column paging does now. - ### Data panel - [ ] CTable expensive columns (list/struct/object) show a `<...; skipped>` @@ -33,6 +27,12 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of ## Done +- 2026-06-13: Row paging re-aligns to the page grid after dim-mode single-row + scrolls. `_scroll_navigable_viewport` shifts `start` by one row, which used + to make every later page up/down carry that offset; `page_table` now takes + `align=` and an explicit page up/down (only — cursor-edge paging stays + contiguous) snaps `start` to the nearest page_size boundary, mirroring column + paging's per-page re-fit. Regression covered in `test_2d_paging`. - 2026-06-13: Tier-2 plot envelope is no longer capped at `_PLOT_FULL_READ_MAX_BYTES` (~1 GB). Above the ceiling, **local** objects (CTable columns, N-D arrays) are streamed in bounded spans From 4a904c18f065f5c8a6c03fa255c074541fbef8fe Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 13 Jun 2026 17:41:10 +0200 Subject: [PATCH 11/24] Add row-range zoom to the b2view plot modal The 'p' plot showed only a whole-series min/max envelope, so detail within a region was bucketed away with no way to drill in. plot_series gains row_start/row_stop: the whole series still uses the fast SUMMARY tier, while a sub-range is read exactly (reduce/stream, or a strided sample only for large remote ranges) with x kept in absolute row coordinates. PlotScreen now holds a fetch closure + total n and re-queries the envelope on +/- (zoom about centre), left/right (pan), 0 (reset), and g (type an exact start:stop via the new PlotRangeScreen). A key-hint line and a '?'-help group advertise the keys. --- src/blosc2/b2view/app.py | 233 ++++++++++++++++++++++++++++---- src/blosc2/b2view/model.py | 132 ++++++++++++------ tests/b2view/test_basics.py | 49 +++++++ tests/b2view/test_plot_model.py | 28 ++++ todo/b2view.md | 19 ++- 5 files changed, 389 insertions(+), 72 deletions(-) diff --git a/src/blosc2/b2view/app.py b/src/blosc2/b2view/app.py index 8d4b6173..72191d4d 100644 --- a/src/blosc2/b2view/app.py +++ b/src/blosc2/b2view/app.py @@ -221,6 +221,15 @@ class HelpScreen(ModalScreen[None]): ("p", "plot a whole-column overview (needs textual-plotext)"), ], ), + ( + "Plot modal (after 'p')", + [ + ("+ / -", "zoom in / out about the centre"), + ("left / right", "pan the zoomed window"), + ("0", "reset to the whole series"), + ("g", "type an exact start:stop row range"), + ], + ), ( "Dim mode (N-D arrays)", [ @@ -432,8 +441,97 @@ def action_cancel(self) -> None: self.dismiss(None) +def _plot_view(series: dict) -> tuple[np.ndarray, np.ndarray, np.ndarray, str]: + """Turn a ``plot_series`` result into drawable arrays + a method label. + + Drops all-NaN buckets (no finite extremes) and maps the read method to a + human description shown in the title. + """ + x = np.asarray(series["x"]) + ymin = np.asarray(series["ymin"], dtype=np.float64) + ymax = np.asarray(series["ymax"], dtype=np.float64) + finite = np.isfinite(ymin) & np.isfinite(ymax) + x, ymin, ymax = x[finite], ymin[finite], ymax[finite] + method = series.get("method") + descr = {"summary": "min/max envelope", "reduce": "min/max envelope"}.get( + method, "sampled — may miss extremes" + ) + return x, ymin, ymax, descr + + +class PlotRangeScreen(ModalScreen["tuple[int, int] | None"]): + """Small modal asking for an explicit ``start:stop`` row range.""" + + CSS = """ + PlotRangeScreen { + align: center middle; + } + #range-dialog { + width: 50; + height: auto; + border: thick $accent; + background: $surface; + padding: 1 2; + } + #range-title { + text-style: bold; + margin-bottom: 1; + } + """ + + BINDINGS: ClassVar = [("escape", "cancel", "Cancel")] + + def __init__(self, *, n: int, start: int, stop: int): + super().__init__() + self.n = n + self.start = start + self.stop = stop + + def compose(self) -> ComposeResult: + with Vertical(id="range-dialog"): + yield Static( + f"Row range start:stop within 0..{self.n} (current {self.start}:{self.stop})", + id="range-title", + ) + yield Input(placeholder="start:stop", id="range-input") + + def on_mount(self) -> None: + widget = self.query_one("#range-input", Input) + widget.value = f"{self.start}:{self.stop}" + widget.focus() + + def _parse(self, text: str) -> tuple[int, int] | None: + if ":" not in text: + return None + lo, hi = text.split(":", 1) + try: + start = int(lo) if lo.strip() else 0 + stop = int(hi) if hi.strip() else self.n + except ValueError: + return None + start = max(0, min(start, self.n)) + stop = max(0, min(stop, self.n)) + return None if stop <= start else (start, stop) + + def on_input_submitted(self, event: Input.Submitted) -> None: + parsed = self._parse(event.value.strip().replace("_", "")) + if parsed is None: + self.query_one("#range-title", Static).update("Enter a range as start:stop") + return + self.dismiss(parsed) + + def action_cancel(self) -> None: + self.dismiss(None) + + class PlotScreen(ModalScreen[None]): - """Modal plotting one numeric column of the data grid (textual-plotext).""" + """Modal plotting one numeric column; zoomable into a row sub-range. + + Keys: ``+``/``-`` zoom about the view centre, ``←``/``→`` pan, ``0`` reset to + the whole series, ``g`` type an exact ``start:stop`` range. Each change + re-fetches the envelope for the new range (exact for sub-ranges) via the + *fetch* closure, so zooming reveals detail the whole-series buckets hide. + """ CSS = """ PlotScreen { @@ -453,34 +551,114 @@ class PlotScreen(ModalScreen[None]): #plot-widget { height: 1fr; } + #plot-keys { + height: 1; + color: $text-muted; + } """ + _KEYS_HINT = "+/- zoom · ←/→ pan · 0 reset · g range · q close" + _MIN_WIDTH = 16 # smallest zoom window (rows), so the envelope still reads + BINDINGS: ClassVar = [ ("escape", "close", "Close"), ("q", "close", "Close"), ("p", "close", "Close"), + ("plus", "zoom_in", "Zoom in"), + ("equals_sign", "zoom_in", "Zoom in"), + ("minus", "zoom_out", "Zoom out"), + ("left", "pan_left", "Pan left"), + ("right", "pan_right", "Pan right"), + ("0", "reset_range", "Reset"), + ("g", "goto_range", "Range"), ] - def __init__(self, *, title: str, x, ymin, ymax): + def __init__(self, *, title_prefix: str, fetch, n: int, row_start: int, row_stop: int, series: dict): super().__init__() - self.plot_title = title + self.title_prefix = title_prefix + self._fetch = fetch + self.n = n + self.row_start = row_start + self.row_stop = row_stop + self._apply(series) + + def _apply(self, series: dict) -> None: + x, ymin, ymax, descr = _plot_view(series) self.x = list(x) self.ymin = list(ymin) self.ymax = list(ymax) + full = self.row_start == 0 and self.row_stop == self.n + rng = "" if full else f" · rows {self.row_start}:{self.row_stop}" + note = "" if self.x else " · (no finite values in range)" + self.plot_title = f"{self.title_prefix} · {self.n} rows{rng} · {descr}{note}" def compose(self) -> ComposeResult: with Vertical(id="plot-dialog"): yield Static(markup_escape(self.plot_title), id="plot-title") yield PlotextPlot(id="plot-widget") + yield Static(self._KEYS_HINT, id="plot-keys") def on_mount(self) -> None: - plt = self.query_one(PlotextPlot).plt - # Draw the max (upper) and min (lower) envelope. When they coincide - # (a sampled series) this reads as a single line. - plt.plot(self.x, self.ymax, marker="braille") - if self.ymin != self.ymax: - plt.plot(self.x, self.ymin, marker="braille") + self._redraw() + + def _redraw(self) -> None: + widget = self.query_one(PlotextPlot) + plt = widget.plt + plt.clear_figure() + if self.x: + # Upper (max) and lower (min) envelope; a single line when they + # coincide (a sampled series). + plt.plot(self.x, self.ymax, marker="braille") + if self.ymin != self.ymax: + plt.plot(self.x, self.ymin, marker="braille") plt.xlabel("row") + widget.refresh() + self.query_one("#plot-title", Static).update(markup_escape(self.plot_title)) + + def _set_range(self, start: int, stop: int) -> None: + start = max(0, min(int(start), self.n)) + stop = max(0, min(int(stop), self.n)) + if stop <= start or (start, stop) == (self.row_start, self.row_stop): + return + self.row_start, self.row_stop = start, stop + self._apply(self._fetch(start, stop)) + self._redraw() + + def _zoom(self, factor: float) -> None: + width = self.row_stop - self.row_start + center = (self.row_start + self.row_stop) // 2 + new_w = width // 2 if factor < 1 else width * 2 + new_w = max(min(self._MIN_WIDTH, self.n), min(self.n, new_w)) + start = max(0, min(center - new_w // 2, self.n - new_w)) + self._set_range(start, start + new_w) + + def _pan(self, direction: int) -> None: + width = self.row_stop - self.row_start + delta = max(1, width // 4) * direction + start = max(0, min(self.row_start + delta, self.n - width)) + self._set_range(start, start + width) + + def action_zoom_in(self) -> None: + self._zoom(0.5) + + def action_zoom_out(self) -> None: + self._zoom(2.0) + + def action_pan_left(self) -> None: + self._pan(-1) + + def action_pan_right(self) -> None: + self._pan(1) + + def action_reset_range(self) -> None: + self._set_range(0, self.n) + + def action_goto_range(self) -> None: + def _on_range(result: tuple[int, int] | None) -> None: + if result is not None: + self._set_range(*result) + + self.app.push_screen(PlotRangeScreen(n=self.n, start=self.row_start, stop=self.row_stop), _on_range) def action_close(self) -> None: self.dismiss(None) @@ -1326,25 +1504,34 @@ def action_plot_column(self) -> None: column = int(name) else: # 1-D arrays (single navigable dim) have one "value" column column = None - series = self.browser.plot_series( - self.selected_path, column=column, layout=self._data_layout, max_points=self._PLOT_MAX_POINTS - ) - x = np.asarray(series["x"]) - ymin = np.asarray(series["ymin"], dtype=np.float64) - ymax = np.asarray(series["ymax"], dtype=np.float64) - # Keep only buckets with finite extremes (drops all-NaN buckets). - finite = np.isfinite(ymin) & np.isfinite(ymax) - x, ymin, ymax = x[finite], ymin[finite], ymax[finite] + layout = self._data_layout + + def fetch(start: int, stop: int | None) -> dict: + return self.browser.plot_series( + self.selected_path, + column=column, + layout=layout, + max_points=self._PLOT_MAX_POINTS, + row_start=start, + row_stop=stop, + ) + + series = fetch(0, None) # whole series (uses the fast SUMMARY tier if any) + x, _ymin, _ymax, _descr = _plot_view(series) if x.size == 0: self.notify(f"Column {name!r} has no finite values to plot", severity="warning") return - method = series.get("method") - descr = {"summary": "min/max envelope", "reduce": "min/max envelope"}.get( - method, "sampled — may miss extremes" + self.push_screen( + PlotScreen( + title_prefix=f"{self.selected_path} · {name}", + fetch=fetch, + n=series["n"], + row_start=series["row_start"], + row_stop=series["row_stop"], + series=series, + ) ) - title = f"{self.selected_path} · {name} · {series['n']} rows · {descr}" - self.push_screen(PlotScreen(title=title, x=x, ymin=ymin, ymax=ymax)) def action_go_to_column(self) -> None: if not self._in_data_grid(): diff --git a/src/blosc2/b2view/model.py b/src/blosc2/b2view/model.py index 866fd01d..cd8800ca 100644 --- a/src/blosc2/b2view/model.py +++ b/src/blosc2/b2view/model.py @@ -384,25 +384,31 @@ def plot_series( column: str | int | None = None, layout: DataSliceLayout | None = None, max_points: int = 2000, + row_start: int = 0, + row_stop: int | None = None, ) -> dict[str, Any]: """Return a peak-preserving overview of one series for plotting. - The result is ``{"x", "ymin", "ymax", "n", "method"}`` with at most - *max_points* buckets; ``ymin``/``ymax`` are the per-bucket extremes so a - plotted envelope never hides a peak or trough. Three tiers, cheapest - first: + The result is ``{"x", "ymin", "ymax", "n", "row_start", "row_stop", + "method"}`` with at most *max_points* buckets; ``ymin``/``ymax`` are the + per-bucket extremes so a plotted envelope never hides a peak or trough, + ``n`` is the *total* series length and ``row_start``/``row_stop`` the + plotted range. Three tiers, cheapest first: - ``"summary"``: read precomputed per-block min/max from the column's - SUMMARY index — no data decompression (CTable columns, no active - filter, numeric). - - ``"reduce"``: read the whole series and reduce per bucket (exact, - but O(n) and bounded by ``_PLOT_FULL_READ_MAX_BYTES``). - - ``"sample"``: strided sample for series too large to read fully; this - may miss extremes, so callers should label it. - - The series is a CTable column (*column* is its name; an active row - filter is honored) or an array (*column* is the global index along the - column dimension of *layout*, or None for 1-D arrays). + SUMMARY index — no data decompression (whole series only; CTable + columns, no active filter, numeric). + - ``"reduce"``: read the (sub)series and reduce per bucket — exact, + O(range), streamed in bounded spans above ``_PLOT_FULL_READ_MAX_BYTES`` + for local objects. + - ``"sample"``: strided sample for a remote series too large to read + fully; this may miss extremes, so callers should label it. + + Pass *row_start*/*row_stop* to zoom into a sub-range (always read + exactly; ``x`` stays in absolute row coordinates). The series is a + CTable column (*column* is its name; an active row filter is honored) or + an array (*column* is the global index along the column dimension of + *layout*, or None for 1-D arrays). """ path = self.normalize_path(path) obj = self._get_object(path) @@ -412,22 +418,23 @@ def plot_series( filtered = path in self._filter_views view = self._filter_views.get(path, obj) n = len(view) - if not filtered: + start, stop = self._clamp_range(row_start, row_stop, n) + if start == 0 and stop == n and not filtered: env = self._column_summary_envelope(obj, column, n, max_points) if env is not None: - return {**env, "n": n, "method": "summary"} + return {**env, "n": n, "row_start": start, "row_stop": stop, "method": "summary"} col = view[column] - itemsize = np.dtype(col.dtype).itemsize - if n * itemsize > _PLOT_FULL_READ_MAX_BYTES: - # Local column: stream an exact envelope in bounded spans. - chunks = getattr(col, "chunks", None) - span = _stream_span(n, itemsize, chunks[0] if chunks else None) - env = _minmax_buckets_streaming( - lambda s, e: safe_asarray(col[s:e]), n, max_points, span=span - ) - return {**env, "n": n, "method": "reduce"} - vals = safe_asarray(col[:]) if n else np.empty(0) - return {**_reduce_envelope(vals, n, max_points), "n": n, "method": "reduce"} + chunks = getattr(col, "chunks", None) + return self._range_envelope( + lambda s, e, st=1: safe_asarray(col[s:e:st]), + start, + stop, + n, + np.dtype(col.dtype).itemsize, + chunks[0] if chunks else None, + remote=False, + max_points=max_points, + ) if kind in {"ndarray", "c2array"}: shape = tuple(getattr(obj, "shape", ()) or ()) @@ -436,6 +443,7 @@ def plot_series( raise ValueError("Cannot plot a scalar") row_dim = layout.navigable_dims[0] if layout is not None and layout.navigable_dims else 0 n = shape[row_dim] + start, stop = self._clamp_range(row_start, row_stop, n) def _row_index(row_slice): idx: list[int | slice] = [] @@ -454,26 +462,62 @@ def _row_index(row_slice): idx.append(0) return tuple(idx) - itemsize = np.dtype(obj.dtype).itemsize - if n * itemsize > _PLOT_FULL_READ_MAX_BYTES: - if kind == "c2array": - # Remote: stream of small reads would mean many round-trips; - # keep the labeled strided sample as the last resort. - step = max(1, -(-n // max_points)) - y = np.asarray(obj[_row_index(slice(0, n, step))]) if n else np.empty(0) - return {"x": np.arange(0, n, step), "ymin": y, "ymax": y, "n": n, "method": "sample"} - # Local N-D array: stream an exact envelope along the row dim. - chunks = getattr(obj, "chunks", None) - span = _stream_span(n, itemsize, chunks[row_dim] if chunks else None) - env = _minmax_buckets_streaming( - lambda s, e: np.asarray(obj[_row_index(slice(s, e, 1))]), n, max_points, span=span - ) - return {**env, "n": n, "method": "reduce"} - vals = np.asarray(obj[_row_index(slice(0, n))]) if n else np.empty(0) - return {**_reduce_envelope(vals, n, max_points), "n": n, "method": "reduce"} + chunks = getattr(obj, "chunks", None) + return self._range_envelope( + lambda s, e, st=1: np.asarray(obj[_row_index(slice(s, e, st))]), + start, + stop, + n, + np.dtype(obj.dtype).itemsize, + chunks[row_dim] if chunks else None, + remote=(kind == "c2array"), + max_points=max_points, + ) raise ValueError(f"Cannot plot {kind!r} objects") + @staticmethod + def _clamp_range(row_start: int, row_stop: int | None, n: int) -> tuple[int, int]: + start = 0 if row_start is None else max(0, min(int(row_start), n)) + stop = n if row_stop is None else max(0, min(int(row_stop), n)) + return (stop, start) if stop < start else (start, stop) + + def _range_envelope( + self, + read, + start: int, + stop: int, + n_total: int, + itemsize: int, + chunklen: int | None, + *, + remote: bool, + max_points: int, + ) -> dict[str, Any]: + """Envelope of rows ``[start, stop)`` via *read(s, e, step=1)``, with ``x`` + in absolute row coordinates. Reads the range exactly (reduce/stream), + falling back to a strided sample only for large *remote* ranges.""" + rng = stop - start + base = {"n": n_total, "row_start": start, "row_stop": stop} + if rng <= 0: + empty = np.empty(0) + return {"x": empty, "ymin": empty, "ymax": empty, **base, "method": "reduce"} + if rng * itemsize > _PLOT_FULL_READ_MAX_BYTES: + if remote: + step = max(1, -(-rng // max_points)) + y = np.asarray(read(start, stop, step)) + x = np.arange(start, stop, step) + m = min(len(x), len(y)) + return {"x": x[:m], "ymin": y[:m], "ymax": y[:m], **base, "method": "sample"} + span = _stream_span(rng, itemsize, chunklen) + env = _minmax_buckets_streaming( + lambda s, e: read(start + s, start + e), rng, max_points, span=span + ) + else: + env = _reduce_envelope(np.asarray(read(start, stop)), rng, max_points) + env["x"] = np.asarray(env["x"]) + start + return {**env, **base, "method": "reduce"} + def _column_summary_envelope( self, table: Any, column: str | int | None, n: int, max_points: int ) -> dict[str, np.ndarray] | None: diff --git a/tests/b2view/test_basics.py b/tests/b2view/test_basics.py index c9850849..328ec296 100644 --- a/tests/b2view/test_basics.py +++ b/tests/b2view/test_basics.py @@ -668,6 +668,55 @@ async def test_plot_column(store_path): assert "leaf1" in screen.plot_title assert "envelope" in screen.plot_title + # ── Zoom / pan / reset / exact range from the plot modal ───────── + from blosc2.b2view.app import PlotRangeScreen + + n = screen.n + assert (screen.row_start, screen.row_stop) == (0, n) + + # '+' zooms in about the centre: the window halves and re-centres. + await pilot.press("plus") + await pilot.pause() + assert screen.row_stop - screen.row_start == n // 2 + assert screen.row_start > 0 + assert "rows" in screen.plot_title + + # '-' zooms back out to the whole series. + await pilot.press("minus") + await pilot.pause() + assert (screen.row_start, screen.row_stop) == (0, n) + + # Pan right shifts a zoomed window without changing its width. + await pilot.press("plus") + await pilot.pause() + width = screen.row_stop - screen.row_start + start_before = screen.row_start + await pilot.press("right") + await pilot.pause() + assert screen.row_start > start_before + assert screen.row_stop - screen.row_start == width + + # '0' resets to the whole series. + await pilot.press("0") + await pilot.pause() + assert (screen.row_start, screen.row_stop) == (0, n) + + # 'g' opens a range modal; an exact range zooms there and reads it exactly. + await pilot.press("g") + await pilot.pause() + assert isinstance(app.screen, PlotRangeScreen) + app.screen.query_one("#range-input", Input).value = "1000:2000" + await pilot.press("enter") + await pilot.pause() + assert isinstance(app.screen, PlotScreen) + screen = app.screen + assert (screen.row_start, screen.row_stop) == (1000, 2000) + sub = leaf1_values()[1000:2000] + assert min(screen.ymin) <= sub.min() + 1e-9 + assert max(screen.ymax) >= sub.max() - 1e-9 + assert min(screen.x) >= 1000 + assert max(screen.x) < 2000 + # 'p' (like escape) closes the plot again await pilot.press("p") await pilot.pause() diff --git a/tests/b2view/test_plot_model.py b/tests/b2view/test_plot_model.py index 113c541e..08812346 100644 --- a/tests/b2view/test_plot_model.py +++ b/tests/b2view/test_plot_model.py @@ -144,6 +144,34 @@ def test_streaming_reducer_all_nan_bucket_stays_nan(): assert np.isnan(env["ymax"]).all() +@pytest.mark.parametrize(("node", "column"), [("/leaf", None), ("/ctable", "x")]) +def test_plot_series_subrange_is_exact(plot_store, node, column): + path, vals = plot_store + s, e = 4000, 9000 + with StoreBrowser(path) as browser: + sub = browser.plot_series(node, column=column, max_points=MAX_POINTS, row_start=s, row_stop=e) + assert sub["n"] == N # total, not the range + assert (sub["row_start"], sub["row_stop"]) == (s, e) + expected = _reduce_envelope(vals[s:e], e - s, MAX_POINTS) + np.testing.assert_array_equal(sub["x"], np.asarray(expected["x"]) + s) # absolute x + np.testing.assert_allclose(sub["ymin"], expected["ymin"], equal_nan=True) + np.testing.assert_allclose(sub["ymax"], expected["ymax"], equal_nan=True) + + +def test_plot_series_range_clamps_and_orders(plot_store): + path, _ = plot_store + with StoreBrowser(path) as browser: + # row_stop past the end clamps to n + clamped = browser.plot_series("/leaf", row_stop=10 * N) + assert clamped["row_stop"] == N + # start > stop is swapped into a valid range + swapped = browser.plot_series("/leaf", row_start=5000, row_stop=1000) + assert (swapped["row_start"], swapped["row_stop"]) == (1000, 5000) + # an empty range yields no buckets + empty = browser.plot_series("/leaf", row_start=2000, row_stop=2000) + assert len(empty["x"]) == 0 + + def test_streaming_reducer_integer_dtype(): vals = np.arange(1000, dtype=np.int64) env = _minmax_buckets_streaming(lambda s, e: vals[s:e], 1000, 100, span=33) diff --git a/todo/b2view.md b/todo/b2view.md index d1567d39..9ba7b158 100644 --- a/todo/b2view.md +++ b/todo/b2view.md @@ -15,11 +15,11 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of placeholder; offer on-demand decoding (e.g. a key to materialize the column, or decode just the cursor row). - [ ] SChunk preview is not implemented (`model.preview` returns a message). -- [ ] Plotting follow-ups for the `p` key: a live mini-plot that follows - paging, or zoom into a row range from the plot modal. If character - resolution proves too coarse, `textual-image` can render real matplotlib - output on kitty/iTerm2/sixel terminals, degrading to half-blocks - elsewhere. +- [ ] Plotting follow-ups for the `p` key (remaining): a live mini-plot in the + data panel that follows paging; and, if braille resolution proves too + coarse, `textual-image` to render real matplotlib output on + kitty/iTerm2/sixel terminals, degrading to half-blocks elsewhere. + (Row-range zoom — done 2026-06-13.) ### Testing - [ ] Visual regressions: consider `pytest-textual-snapshot` (SVG snapshots) @@ -27,6 +27,15 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of ## Done +- 2026-06-13: The `p` plot modal is now zoomable into a row range. + `plot_series` gained `row_start`/`row_stop` (the whole series keeps the fast + SUMMARY tier; a sub-range is read exactly, with `x` in absolute rows). + `PlotScreen` holds a fetch closure + total `n` and re-queries on `+`/`-` + (zoom about centre), `←`/`→` (pan), `0` (reset), `g` (type an exact + `start:stop` via `PlotRangeScreen`); a key hint line and a `?`-help group + advertise the keys. Tests: `tests/b2view/test_plot_model.py` + (sub-range exactness, clamping/ordering) and the extended `test_plot_column` + Pilot journey. - 2026-06-13: Row paging re-aligns to the page grid after dim-mode single-row scrolls. `_scroll_navigable_viewport` shifts `start` by one row, which used to make every later page up/down carry that offset; `page_table` now takes From 435cf684bdef4cf75ad74f79489fd52792c0c897 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sun, 14 Jun 2026 14:40:28 +0200 Subject: [PATCH 12/24] Updated commit message (the test path reference changed): b2view: 'v' locks the data grid to the plotted row range Add a public CTable.slice(start, stop=None, /, *, copy=True): range- or slice-style bounds in live-row space; copy=False returns a zero-copy view (via _view_from_positions, like head/tail), copy=True a compact copy (via take), mirroring NDArray.slice. In the plot modal, 'v' now locks the data grid in place to the navigated row range instead of just jumping the cursor. For CTable sources the model registers a copy=False slice view per-path in _window_views (precedence over _filter_views, so it composes over an active filter); len(view) bounds paging for free. The app holds self.row_window, reloads in place via _enter/_exit_row_window, shows a WINDOW a:b header chip, and gains an esc unlock layer. NDArray plots still fall back to a cursor jump (follow-up: window them copy-free via the layout). --- src/blosc2/b2view/app.py | 107 +++++++++++++++++++++++++------ src/blosc2/b2view/model.py | 30 ++++++++- src/blosc2/ctable.py | 39 +++++++++++ tests/b2view/test_basics.py | 59 +++++++++++++++++ tests/ctable/test_ctable_take.py | 58 +++++++++++++++++ todo/b2view.md | 43 +++++++++++-- 6 files changed, 312 insertions(+), 24 deletions(-) diff --git a/src/blosc2/b2view/app.py b/src/blosc2/b2view/app.py index 72191d4d..2b337276 100644 --- a/src/blosc2/b2view/app.py +++ b/src/blosc2/b2view/app.py @@ -208,7 +208,7 @@ class HelpScreen(ModalScreen[None]): ("t / b", "first / last row"), ("g", "go to row..."), ("f", "filter rows (CTable)"), - ("escape", "clear the active filter"), + ("escape", "unlock a row window / clear the active filter"), ], ), ( @@ -228,6 +228,7 @@ class HelpScreen(ModalScreen[None]): ("left / right", "pan the zoomed window"), ("0", "reset to the whole series"), ("g", "type an exact start:stop row range"), + ("v", "lock the data grid to the current range (esc unlocks)"), ], ), ( @@ -524,13 +525,16 @@ def action_cancel(self) -> None: self.dismiss(None) -class PlotScreen(ModalScreen[None]): +class PlotScreen(ModalScreen["tuple[int, int] | None"]): """Modal plotting one numeric column; zoomable into a row sub-range. Keys: ``+``/``-`` zoom about the view centre, ``←``/``→`` pan, ``0`` reset to the whole series, ``g`` type an exact ``start:stop`` range. Each change re-fetches the envelope for the new range (exact for sub-ranges) via the *fetch* closure, so zooming reveals detail the whole-series buckets hide. + + ``v`` dismisses with the current ``(row_start, row_stop)`` so the caller can + jump the data grid to the range you navigated to; closing dismisses ``None``. """ CSS = """ @@ -557,7 +561,7 @@ class PlotScreen(ModalScreen[None]): } """ - _KEYS_HINT = "+/- zoom · ←/→ pan · 0 reset · g range · q close" + _KEYS_HINT = "+/- zoom · ←/→ pan · 0 reset · g range · v view rows · q close" _MIN_WIDTH = 16 # smallest zoom window (rows), so the envelope still reads BINDINGS: ClassVar = [ @@ -571,6 +575,7 @@ class PlotScreen(ModalScreen[None]): ("right", "pan_right", "Pan right"), ("0", "reset_range", "Reset"), ("g", "goto_range", "Range"), + ("v", "view_range", "View rows"), ] def __init__(self, *, title_prefix: str, fetch, n: int, row_start: int, row_stop: int, series: dict): @@ -660,6 +665,10 @@ def _on_range(result: tuple[int, int] | None) -> None: self.app.push_screen(PlotRangeScreen(n=self.n, start=self.row_start, stop=self.row_stop), _on_range) + def action_view_range(self) -> None: + """v key — close the plot and jump the data grid to the current range.""" + self.dismiss((self.row_start, self.row_stop)) + def action_close(self) -> None: self.dismiss(None) @@ -734,6 +743,8 @@ def __init__( self._active_dim = 0 self._dim_mode = False self.loading_table_page = False + # Absolute (start, stop) of a locked row window from the plot's 'v' key. + self.row_window: tuple[int, int] | None = None def compose(self) -> ComposeResult: yield Header() @@ -873,6 +884,9 @@ def update_panels(self, path: str) -> None: self._data_layout = None self._active_dim = 0 self._dim_mode = False + # A locked row window does not survive navigating to a node. + self.row_window = None + self.browser.clear_row_window(path) if info.kind == "group": data_header.display = False data_table_row.display = False @@ -1355,23 +1369,32 @@ def _update_data_header(self, data: dict) -> None: header_parts.append(f"rows {data['start']}:{data['stop']} of {data['nrows']}") if "col_start" in data: header_parts.append(f"cols {data['col_start']}:{data['col_stop']} of {data['ncols']}") - if data.get("source_kind") == "ctable" and self.browser is not None: - flt = self.browser.get_filter(self.selected_path) - col_flt = self.browser.get_column_filter(self.selected_path) - if flt: - total = self.browser.base_nrows(self.selected_path) - header_parts.append(f"filter: [bold]{markup_escape(flt)}[/bold] ({total} total)") - if col_flt: - total_cols = self.browser.base_ncols(self.selected_path) - header_parts.append(f"cols: [bold]{markup_escape(col_flt)}[/bold] ({total_cols} total)") - if flt or col_flt: - header_parts.append("clear") + header_parts.extend(self._window_and_filter_chips(data)) line = ", ".join(header_parts) if self._dim_mode and layout is not None: line = f"[reverse]{line}[/reverse]" self.query_one("#data-header", Static).update(line) + def _window_and_filter_chips(self, data: dict) -> list[str]: + """Header chips for a locked row window and any active CTable filters.""" + chips: list[str] = [] + if self.row_window is not None: + ws, we = self.row_window + chips.append(f"[reverse] WINDOW {ws}:{we} [/reverse]") + if data.get("source_kind") == "ctable" and self.browser is not None: + flt = self.browser.get_filter(self.selected_path) + col_flt = self.browser.get_column_filter(self.selected_path) + if flt: + total = self.browser.base_nrows(self.selected_path) + chips.append(f"filter: [bold]{markup_escape(flt)}[/bold] ({total} total)") + if col_flt: + total_cols = self.browser.base_ncols(self.selected_path) + chips.append(f"cols: [bold]{markup_escape(col_flt)}[/bold] ({total_cols} total)") + if flt or col_flt or self.row_window is not None: + chips.append("unlock/clear") + return chips + def _make_global_scrollbar(self, *, start: int, stop: int, total: int, size: int, track: str) -> str: size = max(1, size) total = max(1, total) @@ -1530,9 +1553,54 @@ def fetch(start: int, stop: int | None) -> dict: row_start=series["row_start"], row_stop=series["row_stop"], series=series, - ) + ), + self._view_plot_range, ) + def _view_plot_range(self, span: tuple[int, int] | None) -> None: + """Lock the data grid to a row range chosen with 'v' in the plot modal. + + For CTable nodes the grid is replaced in place with a zero-copy + ``slice`` view of the range, so paging cannot leave it (``esc`` unlocks). + Other source kinds (e.g. plain NDArrays) fall back to a cursor jump until + their windowing lands. + """ + if span is None or self.table_page is None: + return + start, stop = span + if self.table_page.get("source_kind") == "ctable" and self.browser is not None: + self._enter_row_window(start, stop) + else: + self._go_to_row(start) + self.notify(f"Viewing rows {start}:{stop}") + + def _enter_row_window(self, start: int, stop: int) -> None: + """Replace the CTable grid with a locked [start:stop] window view.""" + try: + self.browser.set_row_window(self.selected_path, start, stop) + except Exception as exc: # pragma: no cover - defensive + self.notify(f"Could not lock rows: {exc}", severity="error") + return + self.row_window = (start, stop) + self.table_buffer = None + data = self._load_table_page(self.selected_path, 0) + self._update_data_table(data, cursor_row=0, cursor_col=0) + self._update_data_header(data) + self.query_one("#data-table", DataTable).focus() + self.notify(f"Locked to rows {start}:{stop} · esc to unlock") + + def _exit_row_window(self) -> None: + """Unlock the row window and restore the full CTable grid.""" + if self.row_window is None or self.browser is None: + return + self.browser.clear_row_window(self.selected_path) + self.row_window = None + self.table_buffer = None + data = self._load_table_page(self.selected_path, 0) + self._update_data_table(data, cursor_row=0, cursor_col=0) + self._update_data_header(data) + self.query_one("#data-table", DataTable).focus() + def action_go_to_column(self) -> None: if not self._in_data_grid(): return @@ -1840,16 +1908,19 @@ def action_dim_toggle_nav(self) -> None: self._dim_toggle() def action_dim_exit(self) -> None: - """Escape: exit dim mode, or clear an active CTable filter. + """Escape: exit dim mode, unlock a row window, or clear a CTable filter. - One layer per press: dim mode, then the row filter, then the - column filter. + One layer per press: dim mode, then the locked row window, then the + row filter, then the column filter. """ if self._dim_mode: self._dim_mode = False if self.table_page is not None: self._update_data_header(self.table_page) return + if self.row_window is not None: + self._exit_row_window() + return if ( not self._in_data_grid() or self.table_page.get("source_kind") != "ctable" diff --git a/src/blosc2/b2view/model.py b/src/blosc2/b2view/model.py index cd8800ca..4b1af736 100644 --- a/src/blosc2/b2view/model.py +++ b/src/blosc2/b2view/model.py @@ -239,6 +239,8 @@ def __init__(self, urlpath: str): # Per-path row filters for CTable nodes (path -> expr / where() view) self._filters: dict[str, str] = {} self._filter_views: dict[str, Any] = {} + # Per-path locked row windows for CTable nodes (path -> slice() view) + self._window_views: dict[str, Any] = {} # Per-path column filters (path -> substring pattern / matched names) self._column_filters: dict[str, str] = {} self._column_selections: dict[str, list[str]] = {} @@ -366,7 +368,12 @@ def preview( return preview_array_1d(obj, start=start, stop=stop) return preview_array(obj, slices=slices, max_rows=max_rows, max_cols=max_cols) if kind == "ctable": - obj = self._filter_views.get(path, obj) + # A locked row window (set by 'v') takes precedence; it is sliced + # from whatever was visible, so it already folds in any row filter. + if path in self._window_views: + obj = self._window_views[path] + else: + obj = self._filter_views.get(path, obj) if columns is None: columns = self._column_selections.get(path) stop = min(start + max_rows, len(obj)) if stop is None else stop @@ -593,6 +600,27 @@ def get_filter(self, path: str) -> str | None: """Return the active filter expression for *path*, if any.""" return self._filters.get(self.normalize_path(path)) + def set_row_window(self, path: str, start: int, stop: int) -> int: + """Lock the CTable at *path* to live rows ``[start:stop]``; return its length. + + The window is a zero-copy :meth:`CTable.slice` view of whatever is + currently visible (so it composes over any active row filter). Paging + then cannot leave the range because the view reports only its own rows. + """ + path = self.normalize_path(path) + base = self._filter_views.get(path, self._get_object(path)) + view = base.slice(start, stop, copy=False) + self._window_views[path] = view + return len(view) + + def clear_row_window(self, path: str) -> None: + """Remove any locked row window from *path*.""" + self._window_views.pop(self.normalize_path(path), None) + + def get_row_window(self, path: str) -> bool: + """Return whether *path* currently has a locked row window.""" + return self.normalize_path(path) in self._window_views + def base_nrows(self, path: str) -> int: """Return the unfiltered row count of the CTable at *path*.""" return len(self._get_object(path)) diff --git a/src/blosc2/ctable.py b/src/blosc2/ctable.py index 90855efa..6107f563 100644 --- a/src/blosc2/ctable.py +++ b/src/blosc2/ctable.py @@ -5156,6 +5156,45 @@ def take(self, indices, /) -> CTable: result._last_pos = n - 1 if n > 0 else None return result + def slice(self, start, stop=None, /, *, copy: bool = True) -> CTable: + """Return a contiguous range of live (non-deleted) rows. + + The range is given the way :func:`range` takes its bounds, either as a + single stop (``table.slice(stop)``), as start/stop integers + (``table.slice(start, stop)``), or as a Python ``slice`` + (``table.slice(slice(start, stop))``). Negative bounds count from the + end; ``step`` is not supported. + + Parameters + ---------- + start, stop: + Range bounds, interpreted as logical positions among the live rows. + copy: + When ``True`` (the default, mirroring :meth:`NDArray.slice`) a compact + copy of the range is returned. When ``False`` a zero-copy view is + returned instead, sharing the parent's column data (read-only, like + :meth:`head`/:meth:`tail`). + + Returns + ------- + out: :ref:`CTable` + The requested rows, re-indexed from 0. + """ + if isinstance(start, slice): + if stop is not None: + raise TypeError("pass either a slice or start/stop integers, not both") + key = start + else: + key = slice(0, start) if stop is None else slice(start, stop) + if key.step not in (None, 1): + raise ValueError("CTable.slice does not support a step") + lo, hi, _ = key.indices(self.nrows) + hi = max(lo, hi) + if copy: + return self.take(np.arange(lo, hi, dtype=np.int64)) + positions = self._live_positions_from_valid_rows_chunks()[lo:hi] + return self._view_from_positions(np.asarray(positions)) + def head(self, N: int = 5) -> CTable: """Return a view of the first *N* live rows (default 5).""" if N <= 0: diff --git a/tests/b2view/test_basics.py b/tests/b2view/test_basics.py index 328ec296..6f7d299a 100644 --- a/tests/b2view/test_basics.py +++ b/tests/b2view/test_basics.py @@ -717,7 +717,66 @@ async def test_plot_column(store_path): assert min(screen.x) >= 1000 assert max(screen.x) < 2000 + # 'v' closes the plot and jumps the data grid to the range start (1000), + # leaving the table navigable rather than clipping it to the range. + await pilot.press("v") + await pilot.pause() + assert not isinstance(app.screen, PlotScreen) + table = app.query_one("#data-table", DataTable) + assert app.table_page["start"] + table.cursor_row == 1000 + # 'p' (like escape) closes the plot again await pilot.press("p") await pilot.pause() + assert isinstance(app.screen, PlotScreen) + await pilot.press("p") + await pilot.pause() assert not isinstance(app.screen, PlotScreen) + + +async def test_plot_view_locks_ctable_window(store_path): + """'v' on a CTable plot replaces the grid with a locked [start:stop] window.""" + pytest.importorskip("textual_plotext") + from blosc2.b2view.app import PlotScreen + + app = B2ViewApp(store_path, start_path="/level0/ctable", start_panel="data") + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + table = await focus_data_table(pilot) + assert app.table_page["nrows"] == NROWS + + # Plot column 'b' (== row index), then zoom to an exact 100:110 range. + table.move_cursor(column=app.table_page["columns"].index("b")) + await pilot.press("p") + await pilot.pause() + assert isinstance(app.screen, PlotScreen) + await pilot.press("g") + await pilot.pause() + app.screen.query_one("#range-input", Input).value = "100:110" + await pilot.press("enter") + await pilot.pause() + assert (app.screen.row_start, app.screen.row_stop) == (100, 110) + + # 'v' locks the grid to that window: the modal closes, the grid shows + # exactly those 10 rows (b == 100..109), re-indexed from 0. + await pilot.press("v") + await wait_for_table(pilot) + assert not isinstance(app.screen, PlotScreen) + assert app.row_window == (100, 110) + page = app.table_page + assert page["nrows"] == 10 + np.testing.assert_array_equal(page["data"]["b"], np.arange(100, 110)) + + # Paging cannot leave the window: 'b'(ottom) lands on its last row (109). + await pilot.press("b") + await wait_for_table(pilot) + page = app.table_page + assert page["stop"] == 10 + assert page["data"]["b"][table.cursor_row] == 109 + + # 'esc' unlocks and restores the full table. + await pilot.press("escape") + await wait_for_table(pilot) + assert app.row_window is None + assert app.browser.get_row_window("/level0/ctable") is False + assert app.table_page["nrows"] == NROWS diff --git a/tests/ctable/test_ctable_take.py b/tests/ctable/test_ctable_take.py index 2e2bf7f8..3d36d81d 100644 --- a/tests/ctable/test_ctable_take.py +++ b/tests/ctable/test_ctable_take.py @@ -159,3 +159,61 @@ def test_top_level_take_rejects_axis_for_ctable_and_column(): blosc2.take(t, [0], axis=0) with pytest.raises(ValueError, match="axis"): blosc2.take(t["id"], [0], axis=0) + + +# ── CTable.slice (contiguous range of live rows; copy or zero-copy view) ── + + +def test_slice_range_styles_agree(): + """slice(stop), slice(start, stop) and slice(slice(...)) select the same rows.""" + t = make_table(10) + expected = np.arange(2, 6, dtype=np.int32) + np.testing.assert_array_equal(t.slice(2, 6)["id"][:], expected) + np.testing.assert_array_equal(t.slice(slice(2, 6))["id"][:], expected) + # single-arg form behaves like range(stop) + np.testing.assert_array_equal(t.slice(4)["id"][:], np.arange(0, 4, dtype=np.int32)) + + +def test_slice_negative_and_out_of_range_bounds_clamp(): + t = make_table(10) + # negative start counts from the end + np.testing.assert_array_equal(t.slice(-3, 10)["id"][:], np.arange(7, 10, dtype=np.int32)) + # stop past the end clamps; start past stop is empty + np.testing.assert_array_equal(t.slice(8, 999)["id"][:], np.arange(8, 10, dtype=np.int32)) + assert t.slice(6, 2).nrows == 0 + + +def test_slice_copy_false_is_a_zero_copy_view(): + t = make_table(8) + view = t.slice(2, 6, copy=False) + # Shares the parent's column storage (no copy) and re-indexes from 0. + assert view._cols is t._cols + assert view.base is t + assert view.nrows == 4 + np.testing.assert_array_equal(view["id"][:], np.arange(2, 6, dtype=np.int32)) + + +def test_slice_copy_true_is_an_independent_compact_table(): + t = make_table(8) + sub = t.slice(2, 6) # copy=True by default + assert sub._cols is not t._cols + assert sub.nrows == 4 + np.testing.assert_array_equal(sub["id"][:], np.arange(2, 6, dtype=np.int32)) + + +def test_slice_skips_deleted_rows_in_logical_space(): + t = make_table(8) + t.delete(2) + t.delete(5) + # Live logical ids are [0, 1, 3, 4, 6, 7]; logical [1:4] -> ids [1, 3, 4]. + for copy in (True, False): + sub = t.slice(1, 4, copy=copy) + np.testing.assert_array_equal(sub["id"][:], np.array([1, 3, 4], dtype=np.int32)) + + +def test_slice_rejects_step_and_double_bounds(): + t = make_table(4) + with pytest.raises(ValueError, match="step"): + t.slice(slice(0, 4, 2)) + with pytest.raises(TypeError): + t.slice(slice(0, 4), 4) diff --git a/todo/b2view.md b/todo/b2view.md index 9ba7b158..61ce2284 100644 --- a/todo/b2view.md +++ b/todo/b2view.md @@ -15,11 +15,31 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of placeholder; offer on-demand decoding (e.g. a key to materialize the column, or decode just the cursor row). - [ ] SChunk preview is not implemented (`model.preview` returns a message). -- [ ] Plotting follow-ups for the `p` key (remaining): a live mini-plot in the - data panel that follows paging; and, if braille resolution proves too - coarse, `textual-image` to render real matplotlib output on - kitty/iTerm2/sixel terminals, degrading to half-blocks elsewhere. - (Row-range zoom — done 2026-06-13.) +- [ ] `m` key in the plot modal: render a high-res matplotlib view of the + *currently shown* range (the braille envelope stays the fast navigator; + `m` is the explicit drill-down for the range you zoomed/panned to). + Plot the *raw* values of the window read exactly (so "high-res" means more + detail, not just more pixels of the same min/max buckets) — cap the window + so an un-zoomed million-row series doesn't trigger a huge read (require + zooming in, or fall back to the envelope above the cap). Display via + `textual-image` with a capability ladder: real image on kitty/iTerm2/sixel + → half-blocks elsewhere → "terminal can't show images, staying in braille" + message. Push it as a screen on top of `PlotScreen` so `q` returns to the + braille view with the zoom intact. Deps: add `textual-image` and promote + `matplotlib` from the dev group into the `plot` extra. Gated on need: + only worth it if the ~200-point braille resolution proves too coarse. +- [ ] Extend the `v` locked row window to NDArray sources. The CTable path + (no-copy `slice` view) shipped 2026-06-14; plain NDArray plots still fall + back to a cursor jump. Do it copy-free via the layout, not + `NDArray.slice` (which copies): clamp `DataSliceLayout`'s navigable row + dim to `[start, stop]`, offset paging by `start`, and report the windowed + length so the grid bounds match the CTable behaviour. Reuse the existing + `self.row_window` state and `esc`-unlock layer. +- [ ] Live mini-plot in the data panel that follows paging: a small, + always-visible braille plot of the current row window (or cursor column), + redrawn on paging — a sparkline companion to the table, vs. the one-shot + `p` modal. Reuses `plot_series`; the work is layout (find room in the + data panel) and wiring the redraw to the paging events. ### Testing - [ ] Visual regressions: consider `pytest-textual-snapshot` (SVG snapshots) @@ -27,6 +47,19 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of ## Done +- 2026-06-14: `v` in the plot modal locks the data grid to the navigated row + range (esc unlocks). Backed by a new public `CTable.slice(start, stop=None, + *, copy=True)` — `range`-style/`slice`-object bounds in live-row space, + `copy=False` returns a zero-copy view (via `_view_from_positions`, like + `head`/`tail`), `copy=True` a compact copy (via `take`), mirroring + `NDArray.slice`. b2view registers the `copy=False` view per-path in the + model's `_window_views` (precedence over `_filter_views`, so it composes over + an active filter); `len(view)` bounds paging for free. App holds + `self.row_window`; `_enter_row_window`/`_exit_row_window` reload in place, the + header shows a `WINDOW a:b` chip, and `action_dim_exit` gained the unlock + layer. NDArray plots still fall back to a cursor jump (see Pending). Tests: + `tests/ctable/test_ctable_slice.py` and `test_plot_view_locks_ctable_window` + (plus the cursor-jump fallback still covered in `test_plot_column`). - 2026-06-13: The `p` plot modal is now zoomable into a row range. `plot_series` gained `row_start`/`row_stop` (the whole series keeps the fast SUMMARY tier; a sub-range is read exactly, with `x` in absolute rows). From cf1be1b13c2066e2eaa5f4ebf6ff2e2dcaa27301 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sun, 14 Jun 2026 14:55:00 +0200 Subject: [PATCH 13/24] b2view: 'v' locks the data grid to the plotted row range Add a public CTable.slice(start, stop=None, /, *, copy=True): range- or slice-style bounds in live-row space; copy=False returns a zero-copy view (via _view_from_positions, like head/tail), copy=True a compact copy (via take), mirroring NDArray.slice. In the plot modal, 'v' now locks the data grid in place to the navigated row range instead of jumping the cursor; esc unlocks. CTable sources use a copy=False slice view registered per-path in the model's _window_views (precedence over _filter_views, so it composes over a filter); len(view) bounds paging for free. NDArray sources are clamped copy-free via the layout: DataSliceLayout gains a row_window field, preview_array_from_layout reports nrows = stop-start and offsets reads by start. The app holds self.row_window, reloads in place, shows a WINDOW a:b header chip, and gains an esc unlock layer in action_dim_exit. --- src/blosc2/b2view/app.py | 66 ++++++++++++++++++++++++------------- src/blosc2/b2view/model.py | 31 ++++++++++++++--- tests/b2view/test_basics.py | 27 ++++++++++++--- todo/b2view.md | 21 ++++++------ 4 files changed, 104 insertions(+), 41 deletions(-) diff --git a/src/blosc2/b2view/app.py b/src/blosc2/b2view/app.py index 2b337276..b03aa087 100644 --- a/src/blosc2/b2view/app.py +++ b/src/blosc2/b2view/app.py @@ -1176,7 +1176,10 @@ def _sync_layout_scroll(self, start: int, layout: DataSliceLayout) -> None: navigable = layout.navigable_dims if len(navigable) >= 1: row_dim = navigable[0] - total = layout.shape[row_dim] + # A locked window shortens the navigable row dim; scroll is + # window-relative, so clamp to the window length. + win_lo, win_hi = layout.row_window_bounds(row_dim) + total = win_hi - win_lo layout.row_start = max(0, min(start, total)) layout.row_stop = min(layout.row_start + self._table_page_size() * 10, total) if len(navigable) >= 2: @@ -1365,6 +1368,10 @@ def _update_data_header(self, data: dict) -> None: if self._dim_mode: header_parts.append("[reverse] DIM MODE [/reverse]") header_parts.append("←→dim ↑↓val fix/nav exit") + elif self.row_window is not None: + ws, we = self.row_window + header_parts.append(f"[reverse] WINDOW {ws}:{we} [/reverse]") + header_parts.append("unlock") else: header_parts.append(f"rows {data['start']}:{data['stop']} of {data['nrows']}") if "col_start" in data: @@ -1560,43 +1567,56 @@ def fetch(start: int, stop: int | None) -> dict: def _view_plot_range(self, span: tuple[int, int] | None) -> None: """Lock the data grid to a row range chosen with 'v' in the plot modal. - For CTable nodes the grid is replaced in place with a zero-copy - ``slice`` view of the range, so paging cannot leave it (``esc`` unlocks). - Other source kinds (e.g. plain NDArrays) fall back to a cursor jump until - their windowing lands. + The grid is replaced in place with just the range, so paging cannot + leave it (``esc`` unlocks). CTable nodes use a zero-copy ``slice`` view; + NDArray nodes narrow the layout's navigable row dim. Other source kinds + fall back to a cursor jump. """ if span is None or self.table_page is None: return start, stop = span - if self.table_page.get("source_kind") == "ctable" and self.browser is not None: - self._enter_row_window(start, stop) + kind = self.table_page.get("source_kind") + if kind == "ctable" and self.browser is not None: + self._enter_row_window(start, stop, backend="ctable") + elif kind in {"ndarray_slice", "ndarray2d"} and self._data_layout is not None: + self._enter_row_window(start, stop, backend="ndarray") else: self._go_to_row(start) self.notify(f"Viewing rows {start}:{stop}") - def _enter_row_window(self, start: int, stop: int) -> None: - """Replace the CTable grid with a locked [start:stop] window view.""" - try: - self.browser.set_row_window(self.selected_path, start, stop) - except Exception as exc: # pragma: no cover - defensive - self.notify(f"Could not lock rows: {exc}", severity="error") - return + def _enter_row_window(self, start: int, stop: int, *, backend: str) -> None: + """Replace the grid with a locked [start:stop] window (in place).""" + if backend == "ctable": + try: + self.browser.set_row_window(self.selected_path, start, stop) + except Exception as exc: # pragma: no cover - defensive + self.notify(f"Could not lock rows: {exc}", severity="error") + return + else: # ndarray: narrow the navigable row dim, scroll back to its top + self._data_layout.row_window = (start, stop) + self._data_layout.row_start = 0 + self._data_layout.row_stop = 0 self.row_window = (start, stop) - self.table_buffer = None - data = self._load_table_page(self.selected_path, 0) - self._update_data_table(data, cursor_row=0, cursor_col=0) - self._update_data_header(data) - self.query_one("#data-table", DataTable).focus() + self._reload_row_window(0) self.notify(f"Locked to rows {start}:{stop} · esc to unlock") def _exit_row_window(self) -> None: - """Unlock the row window and restore the full CTable grid.""" - if self.row_window is None or self.browser is None: + """Unlock the row window and restore the full grid.""" + if self.row_window is None: return - self.browser.clear_row_window(self.selected_path) + if self.browser is not None: + self.browser.clear_row_window(self.selected_path) + if self._data_layout is not None and self._data_layout.row_window is not None: + self._data_layout.row_window = None + self._data_layout.row_start = 0 + self._data_layout.row_stop = 0 self.row_window = None + self._reload_row_window(0) + + def _reload_row_window(self, start: int) -> None: + """Rebuild the data grid from scratch after a window change.""" self.table_buffer = None - data = self._load_table_page(self.selected_path, 0) + data = self._load_table_page(self.selected_path, start) self._update_data_table(data, cursor_row=0, cursor_col=0) self._update_data_header(data) self.query_one("#data-table", DataTable).focus() diff --git a/src/blosc2/b2view/model.py b/src/blosc2/b2view/model.py index 4b1af736..a509ce72 100644 --- a/src/blosc2/b2view/model.py +++ b/src/blosc2/b2view/model.py @@ -150,6 +150,11 @@ class DataSliceLayout: col_start: int = 0 col_stop: int = 0 + # Optional locked window (absolute [start, stop)) on the navigable row dim. + # When set, the grid sees a row dimension of length ``stop - start`` whose + # logical row 0 maps to absolute row ``start`` (see ``preview_array_from_layout``). + row_window: tuple[int, int] | None = None + @classmethod def from_shape(cls, shape: tuple[int, ...]) -> DataSliceLayout: """Create a default layout: leading dims fixed at 0, last up-to-2 dims navigable.""" @@ -213,8 +218,22 @@ def copy_with( row_stop=self.row_stop if row_stop is None else row_stop, col_start=self.col_start if col_start is None else col_start, col_stop=self.col_stop if col_stop is None else col_stop, + row_window=self.row_window, ) + def row_window_bounds(self, row_dim: int | None) -> tuple[int, int]: + """Return the absolute [start, stop) extent of the navigable row dim. + + Narrowed to ``row_window`` when one is set; otherwise the full dim. + """ + full = self.shape[row_dim] if row_dim is not None else 1 + if row_dim is None or self.row_window is None: + return 0, full + w0, w1 = self.row_window + w0 = max(0, min(w0, full)) + w1 = max(w0, min(w1, full)) + return w0, w1 + def total_for_dim(self, dim: int) -> int: """Return the total size of *dim*.""" if 0 <= dim < len(self.shape): @@ -768,8 +787,11 @@ def preview_array_from_layout( row_dim = navigable[0] if len(navigable) >= 1 else None col_dim = navigable[1] if len(navigable) >= 2 else None - # Page sizes - nrows = shape[row_dim] if row_dim is not None else 1 + # Page sizes. A locked row window narrows the navigable row dim to + # [win_lo, win_hi): the grid sees only ``nrows`` rows (so paging cannot + # leave it) and every read is offset by ``win_lo``. + win_lo, win_hi = layout.row_window_bounds(row_dim) + nrows = (win_hi - win_lo) if row_dim is not None else 1 ncols = shape[col_dim] if col_dim is not None else 1 # Clamp fixed values @@ -789,9 +811,10 @@ def preview_array_from_layout( if i in fixed_values: idx.append(fixed_values[i]) elif row_dim is not None and i == row_dim: + # ``layout.row_start`` is window-relative; offset into the array. start = max(0, min(layout.row_start, nrows)) - stop = min(max(start, start + max_rows), nrows) - idx.append(slice(start, stop)) + stop = min(start + max_rows, nrows) + idx.append(slice(win_lo + start, win_lo + stop)) elif col_dim is not None and i == col_dim: col_start = max(0, min(layout.col_start, ncols)) col_stop = min(col_start + max_cols, ncols) diff --git a/tests/b2view/test_basics.py b/tests/b2view/test_basics.py index 6f7d299a..420d9f24 100644 --- a/tests/b2view/test_basics.py +++ b/tests/b2view/test_basics.py @@ -717,13 +717,32 @@ async def test_plot_column(store_path): assert min(screen.x) >= 1000 assert max(screen.x) < 2000 - # 'v' closes the plot and jumps the data grid to the range start (1000), - # leaving the table navigable rather than clipping it to the range. + # 'v' locks the 1-D array grid to the [1000:2000) window via the layout: + # the grid sees 1000 rows, re-indexed from 0, and logical row 0 reads + # absolute row 1000. Paging cannot leave the window. await pilot.press("v") - await pilot.pause() + await wait_for_table(pilot) assert not isinstance(app.screen, PlotScreen) table = app.query_one("#data-table", DataTable) - assert app.table_page["start"] + table.cursor_row == 1000 + assert app.row_window == (1000, 2000) + page = app.table_page + assert page["nrows"] == 1000 + assert page["start"] == 0 + assert page["data"]["value"][0] == pytest.approx(leaf1_values()[1000]) + + # 'b'(ottom) lands on the window's last row (absolute 1999), still inside. + await pilot.press("b") + await wait_for_table(pilot) + page = app.table_page + assert page["stop"] == 1000 + assert page["data"]["value"][table.cursor_row] == pytest.approx(leaf1_values()[1999]) + + # 'esc' unlocks and restores the full array. + await pilot.press("escape") + await wait_for_table(pilot) + assert app.row_window is None + assert app._data_layout.row_window is None + assert app.table_page["nrows"] == LEAF1_LEN # 'p' (like escape) closes the plot again await pilot.press("p") diff --git a/todo/b2view.md b/todo/b2view.md index 61ce2284..d14f38d6 100644 --- a/todo/b2view.md +++ b/todo/b2view.md @@ -28,13 +28,6 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of braille view with the zoom intact. Deps: add `textual-image` and promote `matplotlib` from the dev group into the `plot` extra. Gated on need: only worth it if the ~200-point braille resolution proves too coarse. -- [ ] Extend the `v` locked row window to NDArray sources. The CTable path - (no-copy `slice` view) shipped 2026-06-14; plain NDArray plots still fall - back to a cursor jump. Do it copy-free via the layout, not - `NDArray.slice` (which copies): clamp `DataSliceLayout`'s navigable row - dim to `[start, stop]`, offset paging by `start`, and report the windowed - length so the grid bounds match the CTable behaviour. Reuse the existing - `self.row_window` state and `esc`-unlock layer. - [ ] Live mini-plot in the data panel that follows paging: a small, always-visible braille plot of the current row window (or cursor column), redrawn on paging — a sparkline companion to the table, vs. the one-shot @@ -47,6 +40,15 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of ## Done +- 2026-06-14: NDArray sources also support the `v` locked window, copy-free via + the layout (not `NDArray.slice`, which copies). `DataSliceLayout` gained a + `row_window` field + `row_window_bounds`; `preview_array_from_layout` narrows + the navigable row dim to `[w0, w1)` — it reports `nrows = w1 - w0` (so paging + is bounded) and offsets every read by `w0` (so logical row 0 reads absolute + `w0`). `_sync_layout_scroll` clamps scroll to the window length. + `_view_plot_range` routes ctable→slice-view / ndarray→layout, and + `_enter`/`_exit_row_window` share a `_reload_row_window` helper. Covered by + the NDArray-leaf window assertions in the extended `test_plot_column`. - 2026-06-14: `v` in the plot modal locks the data grid to the navigated row range (esc unlocks). Backed by a new public `CTable.slice(start, stop=None, *, copy=True)` — `range`-style/`slice`-object bounds in live-row space, @@ -57,9 +59,8 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of an active filter); `len(view)` bounds paging for free. App holds `self.row_window`; `_enter_row_window`/`_exit_row_window` reload in place, the header shows a `WINDOW a:b` chip, and `action_dim_exit` gained the unlock - layer. NDArray plots still fall back to a cursor jump (see Pending). Tests: - `tests/ctable/test_ctable_slice.py` and `test_plot_view_locks_ctable_window` - (plus the cursor-jump fallback still covered in `test_plot_column`). + layer. Tests: `CTable.slice` cases in `tests/ctable/test_ctable_take.py` and + `test_plot_view_locks_ctable_window`. - 2026-06-13: The `p` plot modal is now zoomable into a row range. `plot_series` gained `row_start`/`row_stop` (the whole series keeps the fast SUMMARY tier; a sub-range is read exactly, with `x` in absolute rows). From fedfb1e868516cd9129db2bfe2f5c92bf38b784b Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sun, 14 Jun 2026 18:15:59 +0200 Subject: [PATCH 14/24] b2view: 'h' opens a high-res matplotlib view of the plotted range In the plot modal, 'h' renders the raw values of the currently shown range as a real image over the braille plot; q/esc/h return with the zoom intact. The braille envelope stays the fast navigator. model.read_series reads the exact values for [row_start, row_stop) (same series selection as plot_series, no bucketing). PlotScreen gains a raw_fetch closure and action_hires, capped at _HIRES_MAX_POINTS (50k; above that it asks you to zoom in). HiResPlotScreen renders matplotlib (Agg) to PNG and shows it via textual-image's auto Image (kitty/iTerm2/sixel -> half-cells), scaled to fill the dialog; a focusable VerticalScroll body keeps the screen's keys live, and it closes with pop_screen. Add textual-image and matplotlib to the 'plot' extra. --- pyproject.toml | 5 +- src/blosc2/b2view/app.py | 162 +++++++++++++++++++++++++++++++- src/blosc2/b2view/model.py | 58 ++++++++++++ tests/b2view/test_basics.py | 48 ++++++++++ tests/b2view/test_plot_model.py | 21 +++++ todo/b2view.md | 24 +++-- 6 files changed, 301 insertions(+), 17 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6266545f..7887b47d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,8 +52,9 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html" [project.optional-dependencies] parquet = ["pyarrow"] -# in-terminal plots for the 'p' key in b2view -plot = ["textual-plotext"] +# in-terminal plots for the 'p' key in b2view; 'h' adds a high-res matplotlib +# view rendered as a real image (kitty/iTerm2/sixel) or half-cells elsewhere +plot = ["textual-plotext", "textual-image", "matplotlib"] [project.scripts] parquet-to-blosc2 = "blosc2.cli.parquet_to_blosc2:main" diff --git a/src/blosc2/b2view/app.py b/src/blosc2/b2view/app.py index b03aa087..3c7c3172 100644 --- a/src/blosc2/b2view/app.py +++ b/src/blosc2/b2view/app.py @@ -2,6 +2,7 @@ from __future__ import annotations +import io from typing import TYPE_CHECKING, Any, ClassVar import numpy as np @@ -17,6 +18,13 @@ except ImportError: # plotting is optional PlotextPlot = None +try: + # Auto-selects the best terminal image protocol (kitty/iTerm2/sixel), + # degrading to colored half-cells; used by the high-res 'h' plot view. + from textual_image.widget import Image as TextualImage +except ImportError: # high-res view is optional + TextualImage = None + from blosc2.b2view.model import DataSliceLayout, StoreBrowser from blosc2.b2view.render import ( column_float_decimals, @@ -229,6 +237,7 @@ class HelpScreen(ModalScreen[None]): ("0", "reset to the whole series"), ("g", "type an exact start:stop row range"), ("v", "lock the data grid to the current range (esc unlocks)"), + ("h", "high-res matplotlib image of the current range"), ], ), ( @@ -561,8 +570,9 @@ class PlotScreen(ModalScreen["tuple[int, int] | None"]): } """ - _KEYS_HINT = "+/- zoom · ←/→ pan · 0 reset · g range · v view rows · q close" + _KEYS_HINT = "+/- zoom · ←/→ pan · 0 reset · g range · v view rows · h hi-res · q close" _MIN_WIDTH = 16 # smallest zoom window (rows), so the envelope still reads + _HIRES_MAX_POINTS = 50_000 # above this, ask the user to zoom in first BINDINGS: ClassVar = [ ("escape", "close", "Close"), @@ -576,12 +586,24 @@ class PlotScreen(ModalScreen["tuple[int, int] | None"]): ("0", "reset_range", "Reset"), ("g", "goto_range", "Range"), ("v", "view_range", "View rows"), + ("h", "hires", "High-res"), ] - def __init__(self, *, title_prefix: str, fetch, n: int, row_start: int, row_stop: int, series: dict): + def __init__( + self, + *, + title_prefix: str, + fetch, + n: int, + row_start: int, + row_stop: int, + series: dict, + raw_fetch=None, + ): super().__init__() self.title_prefix = title_prefix self._fetch = fetch + self._raw_fetch = raw_fetch # (start, stop) -> {"x", "y", ...} raw read self.n = n self.row_start = row_start self.row_stop = row_stop @@ -669,10 +691,136 @@ def action_view_range(self) -> None: """v key — close the plot and jump the data grid to the current range.""" self.dismiss((self.row_start, self.row_stop)) + def action_hires(self) -> None: + """h key — open a high-res matplotlib image of the current raw range. + + Pushed on top of this screen so ``q`` returns to the braille view with + the zoom intact. The braille envelope stays the fast navigator; this is + the drill-down once you have zoomed to a range worth seeing in detail. + """ + if self._raw_fetch is None or TextualImage is None or not _matplotlib_available(): + self.app.notify( + "High-res view needs the 'textual-image' and 'matplotlib' packages", + severity="warning", + ) + return + width = self.row_stop - self.row_start + if width > self._HIRES_MAX_POINTS: + self.app.notify( + f"Zoom in to ≤ {self._HIRES_MAX_POINTS} rows for a high-res view (now {width})", + severity="warning", + ) + return + series = self._raw_fetch(self.row_start, self.row_stop) + self.app.push_screen(HiResPlotScreen(title=self.plot_title, x=series["x"], y=series["y"])) + def action_close(self) -> None: self.dismiss(None) +def _matplotlib_available() -> bool: + """Whether matplotlib can be imported (the high-res view needs it).""" + try: + import matplotlib # noqa: F401 + except ImportError: + return False + return True + + +class HiResPlotScreen(ModalScreen[None]): + """A high-res matplotlib image of a raw series range, over the braille plot. + + Rendered with matplotlib (Agg) to a PNG and shown via ``textual-image``, + which auto-selects the best terminal protocol (kitty/iTerm2/sixel) and + degrades to colored half-cells elsewhere. ``q``/``esc``/``h`` return to the + braille view underneath with its zoom intact. + """ + + CSS = """ + HiResPlotScreen { + align: center middle; + } + #hires-dialog { + width: 95%; + height: 90%; + border: thick $accent; + background: $surface; + padding: 1 2; + } + #hires-title { + text-style: bold; + height: 1; + } + #hires-body { + height: 1fr; + width: 1fr; + align: center middle; + } + #hires-image { + width: 100%; + height: 100%; + } + #hires-keys { + height: 1; + color: $text-muted; + } + """ + + _KEYS_HINT = "q/esc/h · back to braille" + + BINDINGS: ClassVar = [ + ("escape", "close", "Close"), + ("q", "close", "Close"), + ("h", "close", "Close"), + ] + + def __init__(self, *, title: str, x, y): + super().__init__() + self._title = title + self._x = x + self._y = y + + def compose(self) -> ComposeResult: + with Vertical(id="hires-dialog"): + yield Static(markup_escape(self._title), id="hires-title") + # A VerticalScroll is focusable, so the screen's key bindings fire + # (the image widget itself is not focusable). + yield VerticalScroll(id="hires-body") + yield Static(self._KEYS_HINT, id="hires-keys") + + def on_mount(self) -> None: + body = self.query_one("#hires-body", VerticalScroll) + body.focus() + try: + png = self._render_png() + except Exception as exc: # pragma: no cover - defensive + body.mount(Static(f"Could not render: {exc}")) + return + body.mount(TextualImage(io.BytesIO(png), id="hires-image")) + + def _render_png(self) -> bytes: + import matplotlib + + matplotlib.use("Agg") + import matplotlib.pyplot as plt + + fig, ax = plt.subplots(figsize=(12, 6), dpi=110) + ax.plot(self._x, self._y, linewidth=0.8, color="#1f77b4") + ax.set_xlabel("row") + ax.margins(x=0) + ax.grid(True, alpha=0.3) + fig.tight_layout() + buf = io.BytesIO() + fig.savefig(buf, format="png") + plt.close(fig) + return buf.getvalue() + + def action_close(self) -> None: + # pop_screen (not dismiss): this screen is pushed without a result + # callback, so it returns to the braille PlotScreen with its zoom intact. + self.app.pop_screen() + + class B2ViewApp(App): """Browse TreeStore hierarchy and preview objects.""" @@ -1547,6 +1695,15 @@ def fetch(start: int, stop: int | None) -> dict: row_stop=stop, ) + def raw_fetch(start: int, stop: int | None) -> dict: + return self.browser.read_series( + self.selected_path, + column=column, + layout=layout, + row_start=start, + row_stop=stop, + ) + series = fetch(0, None) # whole series (uses the fast SUMMARY tier if any) x, _ymin, _ymax, _descr = _plot_view(series) if x.size == 0: @@ -1560,6 +1717,7 @@ def fetch(start: int, stop: int | None) -> dict: row_start=series["row_start"], row_stop=series["row_stop"], series=series, + raw_fetch=raw_fetch, ), self._view_plot_range, ) diff --git a/src/blosc2/b2view/model.py b/src/blosc2/b2view/model.py index a509ce72..e84ba29d 100644 --- a/src/blosc2/b2view/model.py +++ b/src/blosc2/b2view/model.py @@ -502,6 +502,64 @@ def _row_index(row_slice): raise ValueError(f"Cannot plot {kind!r} objects") + def read_series( + self, + path: str, + *, + column: str | int | None = None, + layout: DataSliceLayout | None = None, + row_start: int = 0, + row_stop: int | None = None, + ) -> dict[str, Any]: + """Return the *raw* values of one series over ``[row_start, row_stop)``. + + Same series selection as :meth:`plot_series` (CTable column honoring an + active filter, or an array column via *layout*) but with no bucketing — + every value is read exactly, for the high-res ``h`` view. The result is + ``{"x", "y", "n", "row_start", "row_stop"}`` with ``x`` in absolute row + coordinates. This reads exactly what is asked, so callers must bound the + range first (see ``B2ViewApp._HIRES_MAX_POINTS``). + """ + path = self.normalize_path(path) + obj = self._get_object(path) + kind = object_kind(obj) + + if kind == "ctable": + view = self._filter_views.get(path, obj) + n = len(view) + start, stop = self._clamp_range(row_start, row_stop, n) + y = safe_asarray(view[column][start:stop]) + elif kind in {"ndarray", "c2array"}: + shape = tuple(getattr(obj, "shape", ()) or ()) + ndim = len(shape) + if ndim == 0: + raise ValueError("Cannot plot a scalar") + row_dim = layout.navigable_dims[0] if layout is not None and layout.navigable_dims else 0 + n = shape[row_dim] + start, stop = self._clamp_range(row_start, row_stop, n) + # Same column/fixed-dim selection as plot_series' array branch. + idx: list[int | slice] = [] + for i in range(ndim): + if i == row_dim: + idx.append(slice(start, stop)) + elif layout is not None and i in layout.fixed_values: + idx.append(layout.fixed_values[i]) + elif layout is not None and len(layout.navigable_dims) > 1 and i == layout.navigable_dims[1]: + idx.append(int(column)) + else: + idx.append(0) + y = np.asarray(obj[tuple(idx)]) + else: + raise ValueError(f"Cannot plot {kind!r} objects") + + return { + "x": np.arange(start, stop), + "y": y, + "n": n, + "row_start": start, + "row_stop": stop, + } + @staticmethod def _clamp_range(row_start: int, row_stop: int | None, n: int) -> tuple[int, int]: start = 0 if row_start is None else max(0, min(int(row_start), n)) diff --git a/tests/b2view/test_basics.py b/tests/b2view/test_basics.py index 420d9f24..23a41157 100644 --- a/tests/b2view/test_basics.py +++ b/tests/b2view/test_basics.py @@ -799,3 +799,51 @@ async def test_plot_view_locks_ctable_window(store_path): assert app.row_window is None assert app.browser.get_row_window("/level0/ctable") is False assert app.table_page["nrows"] == NROWS + + +async def test_plot_hires_view(store_path): + """'h' opens a high-res matplotlib image over the braille plot; 'q' returns.""" + pytest.importorskip("textual_plotext") + pytest.importorskip("textual_image") + pytest.importorskip("matplotlib") + from blosc2.b2view.app import HiResPlotScreen, PlotScreen + + app = B2ViewApp(store_path, start_path="/level0/ctable", start_panel="data") + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + table = await focus_data_table(pilot) + table.move_cursor(column=app.table_page["columns"].index("b")) + + await pilot.press("p") + await pilot.pause() + assert isinstance(app.screen, PlotScreen) + + # Force a tiny cap so the un-zoomed series (300 rows) is "too large": + # 'h' asks the user to zoom in and stays on the braille plot. + app.screen._HIRES_MAX_POINTS = 50 + await pilot.press("h") + await pilot.pause() + assert isinstance(app.screen, PlotScreen) # not opened + + # Zoom to a small range, then 'h' opens the high-res image screen. + await pilot.press("g") + await pilot.pause() + app.screen.query_one("#range-input", Input).value = "100:140" + await pilot.press("enter") + await pilot.pause() + plot = app.screen + assert (plot.row_start, plot.row_stop) == (100, 140) + + await pilot.press("h") + await pilot.pause() + assert isinstance(app.screen, HiResPlotScreen) + # The image rendered and mounted (UnicodeImage in a headless test). + from blosc2.b2view.app import TextualImage + + assert app.screen.query_one("#hires-image", TextualImage) is not None + + # 'q' returns to the braille plot with the zoom intact. + await pilot.press("q") + await pilot.pause() + assert app.screen is plot + assert (plot.row_start, plot.row_stop) == (100, 140) diff --git a/tests/b2view/test_plot_model.py b/tests/b2view/test_plot_model.py index 08812346..2db447bc 100644 --- a/tests/b2view/test_plot_model.py +++ b/tests/b2view/test_plot_model.py @@ -172,6 +172,27 @@ def test_plot_series_range_clamps_and_orders(plot_store): assert len(empty["x"]) == 0 +@pytest.mark.parametrize(("node", "column"), [("/leaf", None), ("/ctable", "x")]) +def test_read_series_returns_exact_raw_values(plot_store, node, column): + """read_series is the unbucketed counterpart of plot_series (for the 'h' view).""" + path, vals = plot_store + s, e = 4000, 9000 + with StoreBrowser(path) as browser: + raw = browser.read_series(node, column=column, row_start=s, row_stop=e) + assert raw["n"] == N # total, not the range + assert (raw["row_start"], raw["row_stop"]) == (s, e) + np.testing.assert_array_equal(raw["x"], np.arange(s, e)) # absolute rows + np.testing.assert_array_equal(raw["y"], vals[s:e]) # NaNs compare equal by position + + +def test_read_series_clamps_range(plot_store): + path, vals = plot_store + with StoreBrowser(path) as browser: + clamped = browser.read_series("/leaf", row_stop=10 * N) + assert clamped["row_stop"] == N + assert clamped["y"].shape == (N,) + + def test_streaming_reducer_integer_dtype(): vals = np.arange(1000, dtype=np.int64) env = _minmax_buckets_streaming(lambda s, e: vals[s:e], 1000, 100, span=33) diff --git a/todo/b2view.md b/todo/b2view.md index d14f38d6..8ae80958 100644 --- a/todo/b2view.md +++ b/todo/b2view.md @@ -15,19 +15,6 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of placeholder; offer on-demand decoding (e.g. a key to materialize the column, or decode just the cursor row). - [ ] SChunk preview is not implemented (`model.preview` returns a message). -- [ ] `m` key in the plot modal: render a high-res matplotlib view of the - *currently shown* range (the braille envelope stays the fast navigator; - `m` is the explicit drill-down for the range you zoomed/panned to). - Plot the *raw* values of the window read exactly (so "high-res" means more - detail, not just more pixels of the same min/max buckets) — cap the window - so an un-zoomed million-row series doesn't trigger a huge read (require - zooming in, or fall back to the envelope above the cap). Display via - `textual-image` with a capability ladder: real image on kitty/iTerm2/sixel - → half-blocks elsewhere → "terminal can't show images, staying in braille" - message. Push it as a screen on top of `PlotScreen` so `q` returns to the - braille view with the zoom intact. Deps: add `textual-image` and promote - `matplotlib` from the dev group into the `plot` extra. Gated on need: - only worth it if the ~200-point braille resolution proves too coarse. - [ ] Live mini-plot in the data panel that follows paging: a small, always-visible braille plot of the current row window (or cursor column), redrawn on paging — a sparkline companion to the table, vs. the one-shot @@ -40,6 +27,17 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of ## Done +- 2026-06-14: `h` in the plot modal opens a high-res matplotlib image of the + current raw range, over the braille plot (`q`/`esc`/`h` return with the zoom + intact). `model.read_series` reads the exact values for `[row_start, row_stop)` + (same series selection as `plot_series`, no bucketing); `PlotScreen` gets a + `raw_fetch` closure and an `action_hires` that caps the window at + `_HIRES_MAX_POINTS` (50k — else "zoom in" notice). `HiResPlotScreen` renders + matplotlib (Agg) to a PNG and shows it via `textual-image`'s auto `Image` + (kitty/iTerm2/sixel → half-cells); a focusable `VerticalScroll` body keeps the + screen's keys live, and it closes with `pop_screen` (pushed without a result + callback). Deps: `textual-image` + `matplotlib` added to the `plot` extra. + Tests: `read_series` cases in `test_plot_model.py` and `test_plot_hires_view`. - 2026-06-14: NDArray sources also support the `v` locked window, copy-free via the layout (not `NDArray.slice`, which copies). `DataSliceLayout` gained a `row_window` field + `row_window_bounds`; `preview_array_from_layout` narrows From 679dde997c21c9e094ff81b33908176eba488399 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 15 Jun 2026 07:26:09 +0200 Subject: [PATCH 15/24] CTable: full-table to_string(), table-valued repr, display options Mirror pandas/polars display conventions: - CTable.to_string() now renders the whole table by default (all rows and columns), like pandas DataFrame.to_string(). New max_rows/max_width params truncate on demand. Decouple __str__ from to_string so str/print/repr stay truncated per the global options. - repr(t) now shows the same truncated table as str(t) instead of the one-line CTable<...> summary; the summary remains on t.info. - set_printoptions gains display_width (None=auto terminal, -1=all columns, int=fixed budget); display_rows now accepts -1 (all rows). - Add blosc2.printoptions(...) context manager (set + restore on exit). Behaviour changes: to_string() returns the full table (was truncated), and repr() returns the table (was the summary). --- RELEASE_NOTES.md | 18 ++++ src/blosc2/__init__.py | 2 + src/blosc2/ctable.py | 124 +++++++++++++++++++++----- tests/ctable/test_column.py | 7 +- tests/ctable/test_getitem_access.py | 79 ++++++++++++++++ tests/ctable/test_vlstring_vlbytes.py | 5 +- 6 files changed, 211 insertions(+), 24 deletions(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 7f949a30..a95a78de 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -4,6 +4,24 @@ XXX version-specific blurb XXX +### CTable display + +- **`CTable.to_string()` now renders the whole table by default** (every row and + every column), like `pandas`' `DataFrame.to_string()`. New `max_rows` and + `max_width` parameters truncate on demand. *Behaviour change*: previously + `to_string()` returned the truncated view; code that relied on that should + pass `max_rows=`/`max_width=` (or use `str()`). +- **`repr(ctable)` now shows the same truncated table as `str(ctable)`** + (pandas/polars convention), instead of the one-line `CTable<…>` summary. The + compact summary remains available via `ctable.info`. +- **New display options** in `set_printoptions`: `display_width` controls the + column-fitting width budget (`None` = auto-detect terminal, `-1` = show all + columns, positive int = fixed budget), and `display_rows` now accepts `-1` to + show all rows (`0` still shows none). +- **New `blosc2.printoptions(...)` context manager** temporarily sets the display + options and restores them on exit, e.g. + `with blosc2.printoptions(display_rows=-1, display_width=-1): print(t)`. + ## Changes from 4.4.3 to 4.4.5 Note: 4.4.4 was skipped due to a failure during the release process. diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py index 5a039f5f..c1015f9b 100644 --- a/src/blosc2/__init__.py +++ b/src/blosc2/__init__.py @@ -642,6 +642,7 @@ def _raise(exc): get_null_policy, get_printoptions, null_policy, + printoptions, set_printoptions, ) from .groupby import CTableGroupBy, group_reduce @@ -1075,5 +1076,6 @@ def _raise(exc): "get_null_policy", "get_printoptions", "null_policy", + "printoptions", "set_printoptions", ] diff --git a/src/blosc2/ctable.py b/src/blosc2/ctable.py index 6107f563..568ee90e 100644 --- a/src/blosc2/ctable.py +++ b/src/blosc2/ctable.py @@ -173,9 +173,14 @@ def sentinel_for_arrow_type(self, pa, pa_type): DEFAULT_NULL_POLICY = NullPolicy() _NULL_POLICY = contextvars.ContextVar("blosc2_null_policy", default=DEFAULT_NULL_POLICY) +# Sentinel for set_printoptions params whose valid value includes ``None`` +# (so ``None`` can be set explicitly rather than meaning "leave unchanged"). +_UNSET = object() + _CTABLE_PRINT_OPTIONS: dict[str, Any] = { "display_index": True, "display_rows": 60, + "display_width": None, "display_precision": 6, "fancy": False, } @@ -193,20 +198,30 @@ def set_printoptions( *, display_index: bool | None = None, display_rows: int | None = None, + display_width: int | None = _UNSET, display_precision: int | None = None, fancy: bool | None = None, ) -> None: """Set global display options for :class:`CTable` string representations. + These options affect ``str(ctable)``/``repr(ctable)``/``print(ctable)`` (the + interactive, truncated view). They do *not* affect :meth:`CTable.to_string`, + which renders everything by default. + Parameters ---------- display_index: - Whether ``str(ctable)`` should include a pandas-like logical row index + Whether the display should include a pandas-like logical row index column. ``None`` leaves the current setting unchanged. display_rows: - Maximum number of rows allowed before truncating to a compact head/tail - view (five first and five last rows, when possible). ``None`` leaves - the current setting unchanged. + Maximum number of rows shown before truncating to a compact head/tail + view (five first and five last rows, when possible). ``-1`` shows all + rows, ``0`` shows none. ``None`` leaves the current setting unchanged. + display_width: + Character budget used to decide how many columns fit before truncating + the middle ones with ``...``. ``None`` (the default) auto-detects the + terminal width, ``-1`` shows all columns, a positive int sets a fixed + budget. Omit the argument to leave the current setting unchanged. display_precision: Number of digits after the decimal point for floating-point values in table displays. Trailing zeros are trimmed. ``None`` leaves the @@ -222,9 +237,20 @@ def set_printoptions( raise TypeError("display_index must be a bool or None") _CTABLE_PRINT_OPTIONS["display_index"] = display_index if display_rows is not None: - if not isinstance(display_rows, int) or isinstance(display_rows, bool) or display_rows < 0: - raise TypeError("display_rows must be a non-negative int or None") + if not isinstance(display_rows, int) or isinstance(display_rows, bool) or display_rows < -1: + raise TypeError("display_rows must be -1 (all), a non-negative int, or None") _CTABLE_PRINT_OPTIONS["display_rows"] = display_rows + if display_width is not _UNSET: + if not ( + display_width is None + or ( + isinstance(display_width, int) + and not isinstance(display_width, bool) + and display_width >= -1 + ) + ): + raise TypeError("display_width must be None (auto), -1 (all), or a non-negative int") + _CTABLE_PRINT_OPTIONS["display_width"] = display_width if display_precision is not None: if ( not isinstance(display_precision, int) @@ -244,6 +270,25 @@ def get_printoptions() -> dict[str, Any]: return dict(_CTABLE_PRINT_OPTIONS) +@contextlib.contextmanager +def printoptions(**kwargs: Any): + """Temporarily set :class:`CTable` display options, restored on exit. + + Accepts the same keyword arguments as :func:`set_printoptions`. Handy for a + one-off full dump, e.g.:: + + with blosc2.printoptions(display_rows=-1, display_width=-1): + print(ctable) + """ + saved = dict(_CTABLE_PRINT_OPTIONS) + try: + set_printoptions(**kwargs) + yield + finally: + _CTABLE_PRINT_OPTIONS.clear() + _CTABLE_PRINT_OPTIONS.update(saved) + + @contextlib.contextmanager def null_policy(policy: NullPolicy): """Temporarily set the default policy for CTable null sentinel inference.""" @@ -3849,7 +3894,7 @@ def _display_positions(self, display_rows: int | None = None): else: valid_np = self._valid_rows[:] all_pos = np.where(valid_np)[0] - if nrows <= display_rows: + if display_rows < 0 or nrows <= display_rows: # -1 (or any negative) shows all rows return all_pos, np.array([], dtype=all_pos.dtype), 0 preview_rows = min(10, display_rows) @@ -3875,16 +3920,22 @@ def _display_widths(self, col_names: list[str] | None = None) -> dict[str, int]: return widths def _display_columns( - self, *, display_index: bool = False, index_width: int = 0 + self, *, display_index: bool = False, index_width: int = 0, max_width: int | None = None ) -> tuple[list[str], int]: - """Return terminal-width-friendly display columns and hidden count.""" + """Return width-friendly display columns and hidden count. + + *max_width* is the character budget for column fitting: ``None`` or a + negative value shows all columns (no truncation); a positive int caps it. + """ col_names = list(self.col_names) + if max_width is None or max_width < 0: # unlimited: show every column + return col_names, 0 widths = self._display_widths(col_names) widths["..."] = 3 total_width = sum(widths[n] + 2 for n in col_names) + 2 * max(0, len(col_names) - 1) if display_index: total_width += index_width + 2 + 2 - term_width = shutil.get_terminal_size((120, 20)).columns + term_width = max_width if total_width <= term_width or len(col_names) <= 2: return col_names, 0 @@ -4208,11 +4259,32 @@ def _display_lines_without_index( ) return lines - def to_string(self, *, display_index: bool | None = None, index_name: str = "") -> str: + def to_string( + self, + *, + max_rows: int | None = None, + max_width: int | None = None, + display_index: bool | None = None, + index_name: str = "", + ) -> str: """Return a tabular string representation of the table. + By default (``max_rows=None``, ``max_width=None``) this renders the + *whole* table — every row and every column — like ``pandas``' + ``DataFrame.to_string()``. This is independent of the global + :func:`blosc2.set_printoptions`; those only affect the truncated + ``str``/``repr``/``print`` view. + Parameters ---------- + max_rows: + Maximum number of rows before truncating to a compact head/tail + view. ``None`` (default) shows all rows; ``-1`` also means all, + ``0`` shows none, a positive int caps it. + max_width: + Character budget for column fitting. ``None`` (default) or ``-1`` + shows all columns; a positive int truncates the middle ones with + ``...`` to fit. display_index: Whether to include a pandas-like logical row index column. If ``None`` (default), use the global value configured with @@ -4229,21 +4301,26 @@ def to_string(self, *, display_index: bool | None = None, index_name: str = "") nrows = self._n_rows ncols = len(self.col_names) - head_pos, tail_pos, hidden = self._display_positions() + rows_arg = -1 if max_rows is None else max_rows # None ⇒ all rows + head_pos, tail_pos, hidden = self._display_positions(rows_arg) # Memoise per-column sparse gathers for the duration of this render so # the repeated (column, head_pos/tail_pos) lookups across precision, # width and row formatting only touch storage once. head_pos/tail_pos # stay referenced below, so keying the cache on their id() is safe. self._display_fetch_cache = {} try: - return self._to_string_body(display_index, index_name, nrows, ncols, head_pos, tail_pos, hidden) + return self._to_string_body( + display_index, index_name, nrows, ncols, head_pos, tail_pos, hidden, max_width + ) finally: self._display_fetch_cache = None - def _to_string_body(self, display_index, index_name, nrows, ncols, head_pos, tail_pos, hidden) -> str: + def _to_string_body( + self, display_index, index_name, nrows, ncols, head_pos, tail_pos, hidden, max_width=None + ) -> str: index_width = self._display_index_width(nrows, hidden, index_name) if display_index else 0 display_cols, hidden_cols = self._display_columns( - display_index=display_index, index_width=index_width + display_index=display_index, index_width=index_width, max_width=max_width ) # Warm the fetch cache with a single combined head+tail gather per column. # head_pos/tail_pos land in different blocks, but folding them into one @@ -4289,13 +4366,20 @@ def _to_string_body(self, display_index, index_name, nrows, ncols, head_pos, tai return "\n".join(lines) def __str__(self) -> str: - """Pandas-style tabular display with column names, dtypes, and a row count footer.""" - return self.to_string() + """Pandas-style tabular display, truncated per :func:`blosc2.set_printoptions`.""" + opts = _CTABLE_PRINT_OPTIONS + width = opts["display_width"] + if width is None: # auto: fit to the current terminal + width = shutil.get_terminal_size((120, 20)).columns + return self.to_string(max_rows=opts["display_rows"], max_width=width) def __repr__(self) -> str: - """Short ``CTable(N rows, X compressed)`` summary string.""" - cols = ", ".join(self.col_names) - return f"CTable<{cols}>({self._n_rows:,} rows, {_fmt_bytes(self.cbytes)} compressed)" + """Same truncated table as ``str`` (pandas/polars convention). + + The compact ``CTable(N rows, …)`` summary is available via + :attr:`info`. + """ + return self.__str__() def __len__(self): """Return the number of live (non-deleted) rows.""" diff --git a/tests/ctable/test_column.py b/tests/ctable/test_column.py index 328c480f..ec1784f7 100644 --- a/tests/ctable/test_column.py +++ b/tests/ctable/test_column.py @@ -783,9 +783,12 @@ def test_repr_contains_col_names_and_row_count(): assert "20" in r -def test_repr_is_single_line(): +def test_repr_matches_str_tabular(): + # repr mirrors str: the truncated table (pandas/polars convention), not a + # one-line summary. The compact summary lives on `info`. t = CTable(Row, new_data=DATA20) - assert "\n" not in repr(t) + assert repr(t) == str(t) + assert "\n" in repr(t) def test_column_repr_shows_preview_values(): diff --git a/tests/ctable/test_getitem_access.py b/tests/ctable/test_getitem_access.py index dae0552c..728d73cd 100644 --- a/tests/ctable/test_getitem_access.py +++ b/tests/ctable/test_getitem_access.py @@ -99,6 +99,85 @@ def test_display_precision_printoption_formats_float_values(): ) +def _wide_table(nrows=200): + @dataclass + class WideRow: + c00: int = blosc2.field(blosc2.int64()) + c01: int = blosc2.field(blosc2.int64()) + c02: int = blosc2.field(blosc2.int64()) + c03: int = blosc2.field(blosc2.int64()) + c04: int = blosc2.field(blosc2.int64()) + c05: int = blosc2.field(blosc2.int64()) + c06: int = blosc2.field(blosc2.int64()) + c07: int = blosc2.field(blosc2.int64()) + + return CTable(WideRow, new_data=[tuple(range(i, i + 8)) for i in range(nrows)]) + + +def test_to_string_is_full_by_default(): + """Bare to_string() shows every row and column, ignoring the global options.""" + t = _wide_table(nrows=200) + blosc2.set_printoptions(display_rows=20, display_width=40) # would truncate str() + try: + full = t.to_string() + # All 200 rows present (header + 200 + footer), no row/col ellipsis. + assert "...; rows hidden" not in full + assert full.splitlines()[-1].strip().startswith("[200 rows") + assert "c00" in full # first column shown + assert "c07" in full # last column shown + # str() still truncates per the options. + assert str(t).count("\n") < full.count("\n") + finally: + blosc2.set_printoptions(display_rows=60, display_width=None) + + +def test_to_string_max_rows_and_max_width_truncate(): + t = _wide_table(nrows=200) + truncated = t.to_string(max_rows=10, max_width=40) + assert truncated.count("\n") < t.to_string().count("\n") + # A narrow width budget drops middle columns with an ellipsis column. + assert any(line.strip().startswith("...") or " ... " in line for line in truncated.splitlines()) + + +def test_display_rows_minus_one_shows_all(): + t = _wide_table(nrows=120) + with blosc2.printoptions(display_rows=-1): + assert str(t).count("\n") >= 120 # all rows, no head/tail collapse + + +def test_display_width_minus_one_shows_all_columns(): + t = _wide_table(nrows=5) + with blosc2.printoptions(display_width=-1): + header = str(t).splitlines()[0] + assert "c00" in header # every column shown, regardless of terminal + assert "c07" in header + + +def test_printoptions_context_manager_restores(): + before = blosc2.get_printoptions() + with blosc2.printoptions(display_rows=-1, display_width=-1, display_precision=2): + inside = blosc2.get_printoptions() + assert inside["display_rows"] == -1 + assert inside["display_width"] == -1 + assert blosc2.get_printoptions() == before + + +def test_set_printoptions_validates_new_options(): + with pytest.raises(TypeError): + blosc2.set_printoptions(display_rows=-2) + with pytest.raises(TypeError): + blosc2.set_printoptions(display_width=-2) + with pytest.raises(TypeError): + blosc2.set_printoptions(display_width=1.5) + # display_width=None (auto) is settable and round-trips. + blosc2.set_printoptions(display_width=-1) + try: + blosc2.set_printoptions(display_width=None) + assert blosc2.get_printoptions()["display_width"] is None + finally: + blosc2.set_printoptions(display_width=None) + + def test_getitem_string_column(): t = CTable(AccessRow, new_data=DATA) col = t["id"] diff --git a/tests/ctable/test_vlstring_vlbytes.py b/tests/ctable/test_vlstring_vlbytes.py index d1230000..5353359c 100644 --- a/tests/ctable/test_vlstring_vlbytes.py +++ b/tests/ctable/test_vlstring_vlbytes.py @@ -600,5 +600,6 @@ def test_ctable_vlstring_str_display(): def test_ctable_vlstring_repr(): ct = blosc2.CTable(VLRow, new_data=ROWS) r = repr(ct) - assert "CTable" in r - assert "5" in r + # repr is now the tabular view (same as str); the footer carries the count. + assert r == str(ct) + assert "5 rows" in r From bd3a83a27e24b5cdd795eaf2b0f7ced0a0bc7100 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 15 Jun 2026 07:49:50 +0200 Subject: [PATCH 16/24] CTable.to_csv(): return the CSV string when no path is given Make the path argument optional: with no path (or path=None), nothing is written and the CSV text is returned as a string, like pandas' DataFrame.to_csv(). Passing a path still writes the file and returns None; the returned string is byte-for-byte identical to the file content. No index column is written (matching polars and CTable's index-less data model). --- RELEASE_NOTES.md | 10 ++++ src/blosc2/ctable.py | 59 ++++++++++++++++++++---- tests/ctable/test_csv_interop.py | 15 +++++- tests/ctable/test_getitem_access.py | 25 ++++++++-- tests/ctable/test_select_describe_cov.py | 5 +- tests/ctable/test_vlstring_vlbytes.py | 4 +- 6 files changed, 102 insertions(+), 16 deletions(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index a95a78de..5b52cab6 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -11,6 +11,10 @@ XXX version-specific blurb XXX `max_width` parameters truncate on demand. *Behaviour change*: previously `to_string()` returned the truncated view; code that relied on that should pass `max_rows=`/`max_width=` (or use `str()`). +- **The `[N rows x M columns]` dimensions footer now follows pandas**: omitted by + `to_string()` (pass `show_dimensions=True` to force it), and shown by + `str`/`repr`/`print` only when the view is actually truncated. Previously it + was always appended. - **`repr(ctable)` now shows the same truncated table as `str(ctable)`** (pandas/polars convention), instead of the one-line `CTable<…>` summary. The compact summary remains available via `ctable.info`. @@ -22,6 +26,12 @@ XXX version-specific blurb XXX options and restores them on exit, e.g. `with blosc2.printoptions(display_rows=-1, display_width=-1): print(t)`. +### CTable I/O + +- **`CTable.to_csv()` now accepts no path**, returning the CSV as a string like + `pandas`' `DataFrame.to_csv()`. Passing a path still writes the file (and + returns `None`); the returned string is byte-for-byte the same as the file. + ## Changes from 4.4.3 to 4.4.5 Note: 4.4.4 was skipped due to a failure during the release process. diff --git a/src/blosc2/ctable.py b/src/blosc2/ctable.py index 568ee90e..9c8f7b35 100644 --- a/src/blosc2/ctable.py +++ b/src/blosc2/ctable.py @@ -4264,6 +4264,7 @@ def to_string( *, max_rows: int | None = None, max_width: int | None = None, + show_dimensions: bool | str = False, display_index: bool | None = None, index_name: str = "", ) -> str: @@ -4285,6 +4286,11 @@ def to_string( Character budget for column fitting. ``None`` (default) or ``-1`` shows all columns; a positive int truncates the middle ones with ``...`` to fit. + show_dimensions: + Whether to append a ``[N rows x M columns]`` footer. ``False`` + (default) omits it, matching ``pandas``' ``to_string()``; ``True`` + always shows it; ``"truncate"`` shows it only when the view is + truncated (the behaviour of ``str``/``repr``). display_index: Whether to include a pandas-like logical row index column. If ``None`` (default), use the global value configured with @@ -4310,13 +4316,30 @@ def to_string( self._display_fetch_cache = {} try: return self._to_string_body( - display_index, index_name, nrows, ncols, head_pos, tail_pos, hidden, max_width + display_index, + index_name, + nrows, + ncols, + head_pos, + tail_pos, + hidden, + max_width, + show_dimensions, ) finally: self._display_fetch_cache = None def _to_string_body( - self, display_index, index_name, nrows, ncols, head_pos, tail_pos, hidden, max_width=None + self, + display_index, + index_name, + nrows, + ncols, + head_pos, + tail_pos, + hidden, + max_width=None, + show_dimensions=False, ) -> str: index_width = self._display_index_width(nrows, hidden, index_name) if display_index else 0 display_cols, hidden_cols = self._display_columns( @@ -4362,7 +4385,11 @@ def _to_string_body( ) if sep is not None: lines.append(sep) - lines.extend(self._display_footer(nrows, ncols, hidden, hidden_cols, fancy)) + # pandas convention: to_string() omits the dimensions footer + # (show_dimensions=False); str/repr show it only when truncated. + truncated = hidden > 0 or hidden_cols > 0 + if show_dimensions is True or (show_dimensions == "truncate" and truncated): + lines.extend(self._display_footer(nrows, ncols, hidden, hidden_cols, fancy)) return "\n".join(lines) def __str__(self) -> str: @@ -4371,7 +4398,7 @@ def __str__(self) -> str: width = opts["display_width"] if width is None: # auto: fit to the current terminal width = shutil.get_terminal_size((120, 20)).columns - return self.to_string(max_rows=opts["display_rows"], max_width=width) + return self.to_string(max_rows=opts["display_rows"], max_width=width, show_dimensions="truncate") def __repr__(self) -> str: """Same truncated table as ``str`` (pandas/polars convention). @@ -7278,8 +7305,8 @@ def _limited_batches(batch_iter, limit: int): # CSV interop # ------------------------------------------------------------------ - def to_csv(self, path: str, *, header: bool = True, sep: str = ",") -> None: - """Write all live rows to a CSV file. + def to_csv(self, path: str | None = None, *, header: bool = True, sep: str = ",") -> str | None: + """Write all live rows to CSV. Uses Python's stdlib ``csv`` module — no extra dependency required. Fixed-shape ndarray column cells are serialised as JSON arrays for @@ -7288,13 +7315,21 @@ def to_csv(self, path: str, *, header: bool = True, sep: str = ",") -> None: Parameters ---------- path: - Destination file path. Created or overwritten. + Destination file path (created or overwritten). If ``None`` (the + default), nothing is written and the CSV is returned as a string, + like ``pandas``' ``DataFrame.to_csv()``. header: If ``True`` (default), write column names as the first row. sep: Field delimiter. Defaults to ``","``; use ``"\\t"`` for TSV. + + Returns + ------- + str or None + The CSV text when *path* is ``None``, otherwise ``None``. """ import csv + import io n = len(self) arrays: list = [] @@ -7313,13 +7348,21 @@ def to_csv(self, path: str, *, header: bool = True, sep: str = ",") -> None: else: arrays.append(col[:]) - with open(path, "w", newline="") as f: + def _write(f) -> None: writer = csv.writer(f, delimiter=sep) if header: writer.writerow(self.col_names) for row in zip(*arrays, strict=True): writer.writerow(row) + if path is None: + buf = io.StringIO(newline="") + _write(buf) + return buf.getvalue() + with open(path, "w", newline="") as f: + _write(f) + return None + @staticmethod def _csv_ndarray_col_to_array(raw: list[str], col) -> np.ndarray: """Convert a list of JSON-array CSV strings to a stacked ndarray for an ndarray column.""" diff --git a/tests/ctable/test_csv_interop.py b/tests/ctable/test_csv_interop.py index 811f559f..9a2063c9 100644 --- a/tests/ctable/test_csv_interop.py +++ b/tests/ctable/test_csv_interop.py @@ -45,10 +45,23 @@ def table10(): def test_to_csv_creates_file(table10, tmp_csv): - table10.to_csv(tmp_csv) + ret = table10.to_csv(tmp_csv) + assert ret is None # writing to a path returns None assert os.path.exists(tmp_csv) +def test_to_csv_no_path_returns_string(table10, tmp_csv): + # pandas-style: no path -> return the CSV text instead of writing a file. + text = table10.to_csv() + assert isinstance(text, str) + assert text.splitlines()[0] == "id,score,active,label" + assert len(text.splitlines()) == 11 # header + 10 data rows + # The returned string matches what gets written to a file. + table10.to_csv(tmp_csv) + with open(tmp_csv, newline="") as f: + assert f.read() == text + + def test_to_csv_header_row(table10, tmp_csv): table10.to_csv(tmp_csv) with open(tmp_csv) as f: diff --git a/tests/ctable/test_getitem_access.py b/tests/ctable/test_getitem_access.py index 728d73cd..85b664dd 100644 --- a/tests/ctable/test_getitem_access.py +++ b/tests/ctable/test_getitem_access.py @@ -120,13 +120,19 @@ def test_to_string_is_full_by_default(): blosc2.set_printoptions(display_rows=20, display_width=40) # would truncate str() try: full = t.to_string() - # All 200 rows present (header + 200 + footer), no row/col ellipsis. - assert "...; rows hidden" not in full - assert full.splitlines()[-1].strip().startswith("[200 rows") + lines = full.splitlines() + # All 200 rows present (header + 200 data rows), no dimensions footer + # (pandas convention) and no row/col ellipsis. + assert len(lines) == 201 + assert "rows x" not in full # no dimensions footer + assert "columns]" not in full + assert lines[-1].split()[0] == "199" # last data row assert "c00" in full # first column shown assert "c07" in full # last column shown # str() still truncates per the options. assert str(t).count("\n") < full.count("\n") + # opt-in footer for those who want it + assert t.to_string(show_dimensions=True).splitlines()[-1].strip().startswith("[200 rows") finally: blosc2.set_printoptions(display_rows=60, display_width=None) @@ -139,6 +145,19 @@ def test_to_string_max_rows_and_max_width_truncate(): assert any(line.strip().startswith("...") or " ... " in line for line in truncated.splitlines()) +def test_dimensions_footer_follows_pandas(): + """No footer from to_string() or an untruncated str(); shown when truncated.""" + small = _wide_table(nrows=5) + assert "rows x" not in small.to_string() # to_string omits it (pandas) + # Pin the width so columns never truncate; then the only question is rows. + with blosc2.printoptions(display_width=-1): + assert "rows x" not in str(small) # nothing truncated -> no footer + assert str(small) == repr(small) + + big = _wide_table(nrows=200) # 200 > display_rows default 60 -> rows truncated + assert "[200 rows x 8 columns]" in str(big) # truncated -> footer shown + + def test_display_rows_minus_one_shows_all(): t = _wide_table(nrows=120) with blosc2.printoptions(display_rows=-1): diff --git a/tests/ctable/test_select_describe_cov.py b/tests/ctable/test_select_describe_cov.py index 7d00349e..eb2109e0 100644 --- a/tests/ctable/test_select_describe_cov.py +++ b/tests/ctable/test_select_describe_cov.py @@ -129,8 +129,9 @@ def test_to_string_display_index(): assert "row" in lines[0] assert lines[1].lstrip().startswith("0") - assert lines[-3].lstrip().startswith("9") - assert lines[-2] == "" + # to_string() omits the dimensions footer (pandas convention), so the last + # line is the final data row. + assert lines[-1].lstrip().startswith("9") def test_global_printoptions_display_index(): diff --git a/tests/ctable/test_vlstring_vlbytes.py b/tests/ctable/test_vlstring_vlbytes.py index 5353359c..106261fe 100644 --- a/tests/ctable/test_vlstring_vlbytes.py +++ b/tests/ctable/test_vlstring_vlbytes.py @@ -600,6 +600,6 @@ def test_ctable_vlstring_str_display(): def test_ctable_vlstring_repr(): ct = blosc2.CTable(VLRow, new_data=ROWS) r = repr(ct) - # repr is now the tabular view (same as str); the footer carries the count. + # repr is now the tabular view (same as str); a small table shows no footer. assert r == str(ct) - assert "5 rows" in r + assert "id" in r.splitlines()[0] # column header present From 35022928adea8841a27c817d344729dbacc42515 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 15 Jun 2026 09:56:38 +0200 Subject: [PATCH 17/24] b2view: 'enter' decodes a skipped CTable cell on demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expensive CTable columns (list/struct/object/ndarray) render a `<...; skipped>` placeholder to keep table navigation responsive. Pressing Enter on such a cell now decodes just that one cell into a CellDetailScreen modal (pretty-printed, scrollable, esc/q/enter to return) — the table stays underneath with its position intact. --- src/blosc2/b2view/app.py | 105 ++++++++++++++++++++++++++++++++++++ src/blosc2/b2view/model.py | 22 ++++++++ tests/b2view/test_basics.py | 48 +++++++++++++++++ tests/test_b2view_model.py | 35 ++++++++++++ todo/b2view.md | 20 ++++--- 5 files changed, 222 insertions(+), 8 deletions(-) diff --git a/src/blosc2/b2view/app.py b/src/blosc2/b2view/app.py index 3c7c3172..d4b9b902 100644 --- a/src/blosc2/b2view/app.py +++ b/src/blosc2/b2view/app.py @@ -121,6 +121,8 @@ def action_select_cursor(self) -> None: if getattr(app, "_dim_mode", False): getattr(app, "action_dim_toggle_nav", lambda: None)() return + if getattr(app, "_inspect_cursor_cell", lambda: False)(): + return super().action_select_cursor() def _wheel_step(self) -> int: @@ -227,6 +229,7 @@ class HelpScreen(ModalScreen[None]): ("c", "go to column index or name..."), ("/", "filter visible columns by substring (CTable)"), ("p", "plot a whole-column overview (needs textual-plotext)"), + ("enter", "decode a skipped cell (list/struct/object column)"), ], ), ( @@ -821,6 +824,75 @@ def action_close(self) -> None: self.app.pop_screen() +class CellDetailScreen(ModalScreen[None]): + """Pretty-printed view of a single decoded CTable cell. + + Reached with Return on an expensive (list/struct/object/ndarray) column + whose grid cell shows a ``<...; skipped>`` placeholder; the value is decoded + on demand. The table stays underneath with its position intact (esc/q/enter + return). + """ + + CSS = """ + CellDetailScreen { + align: center middle; + } + #cell-dialog { + width: 80%; + height: auto; + max-height: 90%; + border: thick $accent; + background: $surface; + padding: 1 2; + } + #cell-title { + text-style: bold; + height: 1; + } + #cell-body { + height: auto; + max-height: 1fr; + margin-top: 1; + } + #cell-keys { + height: 1; + color: $text-muted; + margin-top: 1; + } + """ + + BINDINGS: ClassVar = [ + ("escape", "close", "Close"), + ("q", "close", "Close"), + ("enter", "close", "Close"), + ] + + def __init__(self, *, row: int, name: str, label: str, value: Any): + super().__init__() + self._row = row + self._name = name + self._label = label + self._value = value + + def compose(self) -> ComposeResult: + import pprint + + title = f"row {self._row} · {self._name} ({self._label})" + text = pprint.pformat(self._value, width=100, sort_dicts=False) + with Vertical(id="cell-dialog"): + yield Static(markup_escape(title), id="cell-title") + # A VerticalScroll is focusable, so the screen's key bindings fire. + with VerticalScroll(id="cell-body"): + yield Static(markup_escape(text)) + yield Static("esc/q · close", id="cell-keys") + + def on_mount(self) -> None: + self.query_one("#cell-body", VerticalScroll).focus() + + def action_close(self) -> None: + self.app.pop_screen() + + class B2ViewApp(App): """Browse TreeStore hierarchy and preview objects.""" @@ -1655,6 +1727,39 @@ def action_go_to_row(self) -> None: _PLOT_MAX_POINTS = 2000 + def _inspect_cursor_cell(self) -> bool: + """Return on a skipped CTable cell: decode just that cell into a modal. + + Returns True when the key was consumed (the cursor sat on an expensive + ``<...; skipped>`` cell), so the data-table's default select handler is + skipped. Anything else (numeric/text cells, non-CTable grids) returns + False and falls through. + """ + if not self._in_data_grid() or self.table_page is None or self.browser is None: + return False + if self.table_page.get("source_kind") != "ctable": + return False + # skipped_columns lives on the buffer; _slice_table_buffer drops it. + skipped = (self.table_buffer or {}).get("skipped_columns") or {} + if not skipped: + return False + columns = self.table_page["columns"] + table = self.query_one("#data-table", DataTable) + cursor_col = table.cursor_column + if not (0 <= cursor_col < len(columns)): + return False + name = columns[cursor_col] + if name not in skipped: + return False + row = self.table_page["start"] + table.cursor_row + try: + value = self.browser.read_cell(self.selected_path, name, row) + except Exception as exc: # pragma: no cover - defensive + self.notify(f"Could not decode cell: {exc}", severity="error") + return True # we owned the key; surface the failure + self.push_screen(CellDetailScreen(row=row, name=name, label=skipped[name], value=value)) + return True + def action_plot_column(self) -> None: """p key — plot a downsampled overview of the whole cursor column.""" if not self._in_data_grid(): diff --git a/src/blosc2/b2view/model.py b/src/blosc2/b2view/model.py index e84ba29d..f71c1201 100644 --- a/src/blosc2/b2view/model.py +++ b/src/blosc2/b2view/model.py @@ -560,6 +560,28 @@ def read_series( "row_stop": stop, } + def read_cell(self, path: str, column: str, row: int) -> Any: + """Decode a single CTable cell — the on-demand path for expensive columns. + + *row* is in the same live-row space as :meth:`preview` (it mirrors the + window/filter view precedence), so the row the grid shows is the cell + that gets decoded. Returns the native Python value (list/dict/array/…), + not a NumPy-wrapped one, so callers can pretty-print its structure. + """ + path = self.normalize_path(path) + obj = self._get_object(path) + if object_kind(obj) == "ctable": + # Same precedence as preview(): a locked row window wins over a + # filter view, so the visible row index resolves the same cell. + if path in self._window_views: + obj = self._window_views[path] + else: + obj = self._filter_views.get(path, obj) + values = obj[column][row : row + 1] + if len(values) == 0: + raise IndexError(f"row {row} is out of range") + return values[0] + @staticmethod def _clamp_range(row_start: int, row_stop: int | None, n: int) -> tuple[int, int]: start = 0 if row_start is None else max(0, min(int(row_start), n)) diff --git a/tests/b2view/test_basics.py b/tests/b2view/test_basics.py index 23a41157..609308ea 100644 --- a/tests/b2view/test_basics.py +++ b/tests/b2view/test_basics.py @@ -847,3 +847,51 @@ async def test_plot_hires_view(store_path): await pilot.pause() assert app.screen is plot assert (plot.row_start, plot.row_stop) == (100, 140) + + +# ── Expensive (skipped) CTable cell: decode on demand with Enter ───────── + + +async def test_enter_decodes_skipped_cell(tmp_path): + """Enter on a ``<...; skipped>`` list cell opens the decoded-cell modal.""" + import dataclasses + + from blosc2.b2view.app import CellDetailScreen + + @dataclasses.dataclass + class TaggedRow: + id: int = blosc2.field(blosc2.int32()) + tags: list[int] = blosc2.field(blosc2.list(blosc2.int64(), nullable=True)) # noqa: RUF009 + + path = str(tmp_path / "tagged.b2z") + rows = [(i, list(range(i + 1))) for i in range(6)] + with blosc2.TreeStore(path, mode="w") as store: + store["/t"] = blosc2.CTable(TaggedRow, new_data=rows) + + app = B2ViewApp(path, start_path="/t", start_panel="data") + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + table = await focus_data_table(pilot) + + # The list column is a placeholder in the grid. + assert "tags" in (app.table_buffer.get("skipped_columns") or {}) + + # Enter on the cheap 'id' column does nothing special (no modal). + table.move_cursor(row=2, column=app.table_page["columns"].index("id")) + await pilot.press("enter") + await pilot.pause() + assert not isinstance(app.screen, CellDetailScreen) + + # Enter on the skipped 'tags' cell decodes just that row into a modal. + table.move_cursor(row=2, column=app.table_page["columns"].index("tags")) + await pilot.press("enter") + await pilot.pause() + assert isinstance(app.screen, CellDetailScreen) + assert app.screen._value == [0, 1, 2] # row 2 of the generator above + assert app.screen._row == 2 + + # esc returns to the table with its position intact. + await pilot.press("escape") + await pilot.pause() + assert not isinstance(app.screen, CellDetailScreen) + assert table.cursor_row == 2 diff --git a/tests/test_b2view_model.py b/tests/test_b2view_model.py index f4ba97bc..9bf5b247 100644 --- a/tests/test_b2view_model.py +++ b/tests/test_b2view_model.py @@ -176,6 +176,41 @@ def info_items(self): assert preview["data"]["path"].tolist() == [""] * 3 +@dataclasses.dataclass +class TaggedRow: + id: int = blosc2.field(blosc2.int32()) + tags: list[int] = blosc2.field(blosc2.list(blosc2.int64(), nullable=True)) # noqa: RUF009 + + +def test_read_cell_decodes_expensive_column_on_demand(tmp_path): + path = tmp_path / "tagged.b2z" + rows = [(0, [0]), (1, [1, 10]), (2, [2, 20, 200]), (3, None)] + with blosc2.TreeStore(str(path), mode="w") as store: + store["/t"] = blosc2.CTable(TaggedRow, new_data=rows) + + with StoreBrowser(str(path)) as browser: + # The expensive list column is a placeholder in the preview, ... + preview = browser.preview("/t", max_cols=2) + assert "tags" in preview["skipped_columns"] + # ... but read_cell decodes the exact cell the grid row points at. + assert browser.read_cell("/t", "tags", 2) == [2, 20, 200] + assert browser.read_cell("/t", "tags", 0) == [0] + assert browser.read_cell("/t", "tags", 3) is None + + +def test_read_cell_honors_filter_view_row_space(tmp_path): + path = tmp_path / "tagged_filter.b2z" + rows = [(0, [0]), (1, [1, 10]), (2, [2, 20]), (3, [3, 30])] + with blosc2.TreeStore(str(path), mode="w") as store: + store["/t"] = blosc2.CTable(TaggedRow, new_data=rows) + + with StoreBrowser(str(path)) as browser: + browser.set_filter("/t", "id >= 2") # live view is rows [2, 3] + # read_cell row 0 must resolve to the first *visible* row (id == 2). + assert browser.read_cell("/t", "tags", 0) == [2, 20] + assert browser.read_cell("/t", "tags", 1) == [3, 30] + + def test_ctable_preview_preserves_ragged_nested_values(): class Column: def __init__(self, values): diff --git a/todo/b2view.md b/todo/b2view.md index 8ae80958..9fe7c8bb 100644 --- a/todo/b2view.md +++ b/todo/b2view.md @@ -11,15 +11,7 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of ### Data panel -- [ ] CTable expensive columns (list/struct/object) show a `<...; skipped>` - placeholder; offer on-demand decoding (e.g. a key to materialize the - column, or decode just the cursor row). - [ ] SChunk preview is not implemented (`model.preview` returns a message). -- [ ] Live mini-plot in the data panel that follows paging: a small, - always-visible braille plot of the current row window (or cursor column), - redrawn on paging — a sparkline companion to the table, vs. the one-shot - `p` modal. Reuses `plot_series`; the work is layout (find room in the - data panel) and wiring the redraw to the paging events. ### Testing - [ ] Visual regressions: consider `pytest-textual-snapshot` (SVG snapshots) @@ -27,6 +19,18 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of ## Done +- 2026-06-15: Expensive CTable cells (list/struct/object/ndarray columns) show a + `<...; skipped>` placeholder; **Enter** on such a cell now decodes just that + one cell on demand into a `CellDetailScreen` modal (pretty-printed, scrollable, + esc/q/enter to return — the table keeps its position). Backed by + `StoreBrowser.read_cell(path, column, row)`, which mirrors `preview`'s + window/filter view precedence so the visible row resolves the same cell. + `BufferedDataTable.action_select_cursor` falls through to + `B2ViewApp._inspect_cursor_cell` when not in dim mode (skipped cells only, + else the default select); `skipped_columns` is read from `table_buffer` (it is + dropped by `_slice_table_buffer`). Tests: `read_cell` cases in + `test_b2view_model.py` (decode + filter-view row space) and the Pilot + `test_enter_decodes_skipped_cell`. - 2026-06-14: `h` in the plot modal opens a high-res matplotlib image of the current raw range, over the braille plot (`q`/`esc`/`h` return with the zoom intact). `model.read_series` reads the exact values for `[row_start, row_stop)` From ceae8440aa513faaf71c1855737d47131cd76942 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 15 Jun 2026 10:19:46 +0200 Subject: [PATCH 18/24] b2view: SChunk preview as a paged hex dump The data panel showed an "not implemented" stub for SChunk nodes. Render them instead as an xxd-style hex dump, modeled as a grid preview so all the existing row navigation applies unchanged. --- src/blosc2/b2view/app.py | 22 ++++++++++++-- src/blosc2/b2view/model.py | 59 ++++++++++++++++++++++++++++++++++++- tests/b2view/test_basics.py | 51 ++++++++++++++++++++++++++++++++ tests/test_b2view_model.py | 55 ++++++++++++++++++++++++++++++++++ todo/b2view.md | 14 ++++++++- 5 files changed, 196 insertions(+), 5 deletions(-) diff --git a/src/blosc2/b2view/app.py b/src/blosc2/b2view/app.py index d4b9b902..645af7c5 100644 --- a/src/blosc2/b2view/app.py +++ b/src/blosc2/b2view/app.py @@ -1198,8 +1198,9 @@ def _is_table_preview(data) -> bool: @staticmethod def _uses_grid_preview(info) -> bool: - # 1D, 2D, 3D+ NDArray/C2Array all use grid preview - return info.kind == "ctable" or ( + # 1D, 2D, 3D+ NDArray/C2Array all use grid preview; SChunk uses it for + # the paged hex dump (rows of 16 bytes). + return info.kind in {"ctable", "schunk"} or ( info.kind in {"ndarray", "c2array"} and info.metadata.get("ndim", 0) >= 1 ) @@ -1423,6 +1424,11 @@ def _slice_table_buffer(self, start: int, page_size: int) -> dict: "columns": buffer["columns"], "hidden_columns": buffer["hidden_columns"], "data": {name: values[offset : offset + count] for name, values in buffer["data"].items()}, + **( + {"row_labels": buffer["row_labels"][offset : offset + count]} + if "row_labels" in buffer + else {} + ), **{ key: buffer[key] for key in ( @@ -1434,6 +1440,8 @@ def _slice_table_buffer(self, start: int, page_size: int) -> dict: "slice_indices", "n_slices_per_dim", "viewport_width", + "nbytes", + "typesize", ) if key in buffer }, @@ -1455,13 +1463,16 @@ def _update_data_table(self, data: dict, *, cursor_row: int = 0, cursor_col: int source = buffer if buffer is not None and buffer["columns"] == data["columns"] else data decimals = {name: column_float_decimals(source["data"][name]) for name in data["columns"]} nrows = data["stop"] - data["start"] + # SChunk hex dumps carry explicit (hex byte-offset) row labels; + # everything else labels the gutter with the logical row number. + row_labels = data.get("row_labels") for i in range(nrows): table.add_row( *[ format_cell(data["data"][name][i], float_decimals=decimals[name]) for name in data["columns"] ], - label=str(data["start"] + i), + label=row_labels[i] if row_labels is not None else str(data["start"] + i), ) nrows = data["stop"] - data["start"] cursor_row = min(max(0, cursor_row), max(0, nrows - 1)) @@ -1592,6 +1603,11 @@ def _update_data_header(self, data: dict) -> None: ws, we = self.row_window header_parts.append(f"[reverse] WINDOW {ws}:{we} [/reverse]") header_parts.append("unlock") + elif data.get("source_kind") == "schunk": + # The hex dump is paged in 16-byte rows; report it in bytes. + header_parts.append(f"hex dump · {data.get('nbytes', 0)} bytes") + if data.get("typesize", 1) > 1: + header_parts.append(f"typesize {data['typesize']}") else: header_parts.append(f"rows {data['start']}:{data['stop']} of {data['nrows']}") if "col_start" in data: diff --git a/src/blosc2/b2view/model.py b/src/blosc2/b2view/model.py index f71c1201..2995b042 100644 --- a/src/blosc2/b2view/model.py +++ b/src/blosc2/b2view/model.py @@ -400,7 +400,8 @@ def preview( obj, start=start, stop=stop, columns=columns, max_cols=max_cols, col_start=col_start ) if kind == "schunk": - return {"message": "SChunk byte preview is not implemented yet."} + stop = start + max_rows if stop is None else stop + return preview_schunk(obj, start=start, stop=stop) return {"message": f"Preview is not supported for {kind!r} objects."} def plot_series( @@ -1136,6 +1137,62 @@ def preview_ctable( } +def schunk_row_geometry(typesize: int) -> tuple[int, int]: + """Return ``(items_per_row, bytes_per_row)`` for the hex dump. + + Bytes are grouped into ``typesize``-wide items (so a 4-byte typesize shows + 32-bit words); ``items_per_row`` is chosen so a row is ~16 bytes wide, and + never below one whole item. + """ + typesize = max(1, int(typesize or 1)) + items_per_row = max(1, 16 // typesize) + return items_per_row, items_per_row * typesize + + +def preview_schunk(obj: Any, *, start: int = 0, stop: int = 20) -> dict[str, Any]: + """Return a bounded ``xxd``-style hex dump of an SChunk's raw bytes. + + Each grid row is one ``bytes_per_row`` span (a multiple of ``typesize``); + *start*/*stop* are in those row units, so the existing row-paging machinery + applies unchanged. Only the visible byte span is read (``obj[a:b]``), so a + multi-GB SChunk previews instantly. The byte offset is the row label. + """ + nbytes = int(getattr(obj, "nbytes", 0) or 0) + typesize = max(1, int(getattr(obj, "typesize", 1) or 1)) + items_per_row, bytes_per_row = schunk_row_geometry(typesize) + total_rows = (nbytes + bytes_per_row - 1) // bytes_per_row + start = max(0, start) + stop = min(max(start, stop), total_rows) + byte_start = start * bytes_per_row + byte_stop = min(stop * bytes_per_row, nbytes) + raw = bytes(obj[byte_start:byte_stop]) if byte_stop > byte_start else b"" + hex_width = items_per_row * typesize * 2 + (items_per_row - 1) + hex_col: list[str] = [] + ascii_col: list[str] = [] + labels: list[str] = [] + for r in range(stop - start): + chunk = raw[r * bytes_per_row : (r + 1) * bytes_per_row] + items = [chunk[k : k + typesize].hex() for k in range(0, len(chunk), typesize)] + hex_col.append(" ".join(items).ljust(hex_width)) + ascii_col.append("".join(chr(b) if 0x20 <= b <= 0x7E else "." for b in chunk)) + labels.append(format(byte_start + r * bytes_per_row, "08x")) + return { + "start": start, + "stop": stop, + "nrows": total_rows, + "columns": ["hex", "ascii"], + "hidden_columns": 0, + "row_labels": labels, + "data": { + "hex": np.array(hex_col, dtype=object), + "ascii": np.array(ascii_col, dtype=object), + }, + "source_kind": "schunk", + "typesize": typesize, + "nbytes": nbytes, + } + + def is_expensive_ctable_column(obj: Any, name: str) -> bool: """Return whether previewing a CTable column is likely row-by-row expensive.""" try: diff --git a/tests/b2view/test_basics.py b/tests/b2view/test_basics.py index 609308ea..41a62677 100644 --- a/tests/b2view/test_basics.py +++ b/tests/b2view/test_basics.py @@ -895,3 +895,54 @@ class TaggedRow: await pilot.pause() assert not isinstance(app.screen, CellDetailScreen) assert table.cursor_row == 2 + + +# ── SChunk: paged hex dump in the data grid ────────────────────────────── + + +async def test_schunk_hex_dump_paging(tmp_path): + """An SChunk node renders a paged hex+ascii dump with byte-offset labels.""" + path = str(tmp_path / "raw.b2z") + # 4 KiB of a repeating 0..255 ramp, so values at any offset are predictable. + payload = bytes(range(256)) * 16 + with blosc2.TreeStore(path, mode="w") as store: + store["/raw"] = blosc2.SChunk(chunksize=2**16, data=payload) + + app = B2ViewApp(path, start_path="/raw", start_panel="data") + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + table = await focus_data_table(pilot) + + page = app.table_page + assert page["source_kind"] == "schunk" + assert page["columns"] == ["hex", "ascii"] + assert page["nrows"] == len(payload) // 16 # 16 bytes/row + assert page["start"] == 0 + first_stop = page["stop"] + assert first_stop < page["nrows"] # bigger than one viewport + + # Row 0 is bytes 0x00..0x0f; the gutter shows the hex byte offset. + assert page["row_labels"][0] == "00000000" + assert page["data"]["hex"][0] == "00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f" + + # The header reports the dump in bytes, not "rows". + from textual.widgets import Static + + header = app.query_one("#data-header", Static).render() + assert f"{len(payload)} bytes" in str(header) + + # Page forward by stepping off the last visible row; offsets keep going. + table.move_cursor(row=first_stop - 1) + await pilot.press("down") + await wait_for_table(pilot) + page = app.table_page + assert page["start"] == first_stop + assert page["row_labels"][0] == format(first_stop * 16, "08x") + + # 'b' jumps to the last row of the dump. + await pilot.press("b") + await wait_for_table(pilot) + page = app.table_page + assert page["stop"] == page["nrows"] + last_offset = (page["nrows"] - 1) * 16 + assert page["row_labels"][-1] == format(last_offset, "08x") diff --git a/tests/test_b2view_model.py b/tests/test_b2view_model.py index 9bf5b247..aca9d4c7 100644 --- a/tests/test_b2view_model.py +++ b/tests/test_b2view_model.py @@ -12,6 +12,8 @@ preview_array_1d, preview_array_2d, preview_ctable, + preview_schunk, + schunk_row_geometry, ) from blosc2.b2view.render import make_preview_renderables @@ -211,6 +213,59 @@ def test_read_cell_honors_filter_view_row_space(tmp_path): assert browser.read_cell("/t", "tags", 1) == [3, 30] +def test_schunk_row_geometry_groups_by_typesize(): + assert schunk_row_geometry(1) == (16, 16) + assert schunk_row_geometry(2) == (8, 16) + assert schunk_row_geometry(4) == (4, 16) + assert schunk_row_geometry(8) == (2, 16) + assert schunk_row_geometry(3) == (5, 15) # rows stay a whole multiple of typesize + assert schunk_row_geometry(32) == (1, 32) # never below one whole item + + +def test_preview_schunk_hex_dump_bytes_and_offsets(): + data = bytes(range(256)) + s = blosc2.SChunk(chunksize=2**16, data=data) + p = preview_schunk(s, start=0, stop=4) + + assert p["source_kind"] == "schunk" + assert p["columns"] == ["hex", "ascii"] + assert p["nrows"] == 16 # 256 bytes / 16 per row + assert p["nbytes"] == 256 + # Row 0 is bytes 0x00..0x0f, byte-offset label in hex. + assert p["row_labels"][:2] == ["00000000", "00000010"] + assert p["data"]["hex"][0] == "00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f" + # Printable ASCII renders; non-printable bytes become dots. + assert p["data"]["ascii"][0] == "." * 16 + assert p["data"]["ascii"][3] == "0123456789:;<=>?" # bytes 0x30..0x3f + + +def test_preview_schunk_groups_hex_by_typesize(): + s = blosc2.SChunk(chunksize=2**16, cparams={"typesize": 4}, data=bytes(range(32))) + p = preview_schunk(s, start=0, stop=2) + assert p["typesize"] == 4 + # 4-byte items, no inter-byte spaces inside an item. + assert p["data"]["hex"][0] == "00010203 04050607 08090a0b 0c0d0e0f" + + +def test_preview_schunk_paging_reads_only_the_window(tmp_path): + path = tmp_path / "raw.b2z" + with blosc2.TreeStore(str(path), mode="w") as store: + store["/raw"] = blosc2.SChunk(chunksize=2**16, data=bytes(range(256))) + + with StoreBrowser(str(path)) as browser: + # A later page resolves the right byte offsets without reading earlier rows. + page = browser.preview("/raw", start=10, stop=12) + assert page["row_labels"] == ["000000a0", "000000b0"] # rows 10, 11 → bytes 160, 176 + assert page["data"]["hex"][0].startswith("a0 a1 a2 a3") + + +def test_preview_schunk_empty(): + s = blosc2.SChunk(chunksize=2**16) + p = preview_schunk(s, start=0, stop=20) + assert p["nrows"] == 0 + assert list(p["data"]["hex"]) == [] + + def test_ctable_preview_preserves_ragged_nested_values(): class Column: def __init__(self, values): diff --git a/todo/b2view.md b/todo/b2view.md index 9fe7c8bb..07a33440 100644 --- a/todo/b2view.md +++ b/todo/b2view.md @@ -11,7 +11,6 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of ### Data panel -- [ ] SChunk preview is not implemented (`model.preview` returns a message). ### Testing - [ ] Visual regressions: consider `pytest-textual-snapshot` (SVG snapshots) @@ -19,6 +18,19 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of ## Done +- 2026-06-15: SChunk preview — a paged, `xxd`-style hex dump in the data grid + (was an unimplemented-message stub). `preview_schunk` reads only the visible + byte span (`obj[a:b]`) and returns the standard preview dict with + `source_kind="schunk"`, two columns (`hex` | `ascii`) and a new `row_labels` + field (hex byte offsets shown in the gutter); each grid row is one + `bytes_per_row` span, so the existing row paging / `t`/`b` / `g`oto / scrollbar + all apply unchanged and a multi-GB SChunk previews instantly. Hex bytes are + grouped into `typesize`-wide items (`schunk_row_geometry` picks ~16 bytes/row, + never below one whole item). `_uses_grid_preview` now routes schunk to the + grid; `_slice_table_buffer` carries `row_labels`/`nbytes`/`typesize`; + `_update_data_table` uses `row_labels` for the gutter; the header reads + "hex dump · N bytes (typesize ...)". Tests: `preview_schunk` cases in + `test_b2view_model.py` and the Pilot `test_schunk_hex_dump_paging`. - 2026-06-15: Expensive CTable cells (list/struct/object/ndarray columns) show a `<...; skipped>` placeholder; **Enter** on such a cell now decodes just that one cell on demand into a `CellDetailScreen` modal (pretty-printed, scrollable, From 39d94cf5927bf5a1b0c5949034ce6143c594c0e1 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 15 Jun 2026 10:49:15 +0200 Subject: [PATCH 19/24] Rename the b2view high-res extra: plot -> hires With the braille 'p' plot now built in (textual-plotext is a core dep), the extra only covers the high-res 'h' image view, so `plot` was misleading. Rename it to `hires`. Unreleased (4.4.6.dev0), so no deprecation needed. --- doc/getting_started/b2view.rst | 5 ++++- doc/getting_started/installation.rst | 32 ++++++++++++++++++++++++++++ pyproject.toml | 9 +++++--- todo/b2view.md | 8 +++---- 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/doc/getting_started/b2view.rst b/doc/getting_started/b2view.rst index 75e98c83..c5c80357 100644 --- a/doc/getting_started/b2view.rst +++ b/doc/getting_started/b2view.rst @@ -8,7 +8,10 @@ metadata and vlmeta of the selected node, and a paged view of the data itself — NDArrays of any dimensionality as well as CTables. ``b2view`` is installed with python-blosc2; no extra dependencies are -needed. +needed, including the in-terminal braille plot (the ``p`` key). Only the +high-resolution image view (the ``h`` key) needs the ``hires`` extra — +``pip install "blosc2[hires]"``. See :doc:`installation` for the list of +extras. Step 1 — Create a sample store ------------------------------ diff --git a/doc/getting_started/installation.rst b/doc/getting_started/installation.rst index a6eb5672..b1f1262c 100644 --- a/doc/getting_started/installation.rst +++ b/doc/getting_started/installation.rst @@ -16,6 +16,38 @@ Conda conda install -c conda-forge python-blosc2 +Optional features (extras) +++++++++++++++++++++++++++ + +The base install already includes everything needed for compression, the +array machinery, and the :doc:`b2view ` terminal browser. A few +heavier, feature-specific dependencies are kept out of it and grouped +into *extras* that you opt into with the ``blosc2[extra]`` syntax: + +.. list-table:: + :header-rows: 1 + :widths: 18 82 + + * - Extra + - Adds + * - ``hires`` + - The high-resolution image view in b2view (the ``h`` key), which + renders a real ``matplotlib`` image in the terminal + (``textual-image``, ``matplotlib``). The lightweight braille plot + (the ``p`` key) is built in and needs no extra. + * - ``parquet`` + - The ``parquet-to-blosc2`` converter (``pyarrow``); see + :doc:`parquet_to_blosc2`. + +Install one or more extras by listing them in brackets (quote the +argument in shells like ``zsh`` that treat brackets specially): + +.. code-block:: console + + pip install "blosc2[hires]" # b2view high-res view (h key) + pip install "blosc2[parquet]" # the Parquet converter + pip install "blosc2[hires,parquet]" # both at once + Source code +++++++++++ diff --git a/pyproject.toml b/pyproject.toml index 7887b47d..ba44a912 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ "requests", "rich", "textual", + "textual-plotext", "threadpoolctl; platform_machine != 'wasm32'", ] version = "4.4.6.dev0" @@ -52,9 +53,11 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html" [project.optional-dependencies] parquet = ["pyarrow"] -# in-terminal plots for the 'p' key in b2view; 'h' adds a high-res matplotlib -# view rendered as a real image (kitty/iTerm2/sixel) or half-cells elsewhere -plot = ["textual-plotext", "textual-image", "matplotlib"] +# The b2view TUI and its in-terminal braille plot (the 'p' key) are core deps, +# so they work out of the box. This extra only adds the high-res 'h' view, +# which renders a real matplotlib image (kitty/iTerm2/sixel, or half-cells +# elsewhere) — matplotlib is the heavy part, hence the opt-in. +hires = ["textual-image", "matplotlib"] [project.scripts] parquet-to-blosc2 = "blosc2.cli.parquet_to_blosc2:main" diff --git a/todo/b2view.md b/todo/b2view.md index 07a33440..a664652d 100644 --- a/todo/b2view.md +++ b/todo/b2view.md @@ -52,7 +52,7 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of matplotlib (Agg) to a PNG and shows it via `textual-image`'s auto `Image` (kitty/iTerm2/sixel → half-cells); a focusable `VerticalScroll` body keeps the screen's keys live, and it closes with `pop_screen` (pushed without a result - callback). Deps: `textual-image` + `matplotlib` added to the `plot` extra. + callback). Deps: `textual-image` + `matplotlib` in the `hires` extra. Tests: `read_series` cases in `test_plot_model.py` and `test_plot_hires_view`. - 2026-06-14: NDArray sources also support the `v` locked window, copy-free via the layout (not `NDArray.slice`, which copies). `DataSliceLayout` gained a @@ -131,9 +131,9 @@ Tests live in `tests/b2view/` (marker `tui`); see the note at the top of Home/End, which were undiscoverable); the data panel subtitle now lists all jump keys: `rows: t/b/g | cols: s/e`. - 2026-06-12: `p` plots the cursor column (or a 1-D leaf) of the loaded row - buffer in a modal, via the optional `textual-plotext` package (new `plot` - extra); braille scatter, NaN/inf filtered, non-numeric columns and a - missing package just notify. Works headless in Pilot tests. + buffer in a modal, via the `textual-plotext` package (a core dep); + braille scatter, NaN/inf filtered, non-numeric columns and a missing + package just notify. Works headless in Pilot tests. - 2026-06-12: The `p` plot shows a downsampled overview of the *whole* series (`StoreBrowser.plot_series`); honors layout (fixed dims) for N-D arrays and active row filters for CTables. From fa0276dcd8ae482ac864da8454d80c05162d9833 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 15 Jun 2026 11:16:00 +0200 Subject: [PATCH 20/24] b2view: brand the status chips yellow (was white reverse video) The WINDOW / DIM MODE indicators used plain [reverse] (white) reverse video. Switch them to the brand accent (yellow) so they match the theme: - new _accent_chip() helper renders dark-text-on-yellow via theme vars - WINDOW chips (zoomed/locked header and filter-chip path) use it - the dim-mode full-line highlight becomes dark-on-yellow; the DIM MODE label is an inverted cutout (yellow-on-dark) so it still stands out --- src/blosc2/b2view/app.py | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/blosc2/b2view/app.py b/src/blosc2/b2view/app.py index 645af7c5..363caeec 100644 --- a/src/blosc2/b2view/app.py +++ b/src/blosc2/b2view/app.py @@ -11,6 +11,7 @@ from textual.binding import Binding from textual.containers import Horizontal, Vertical, VerticalScroll from textual.screen import ModalScreen +from textual.theme import Theme from textual.widgets import DataTable, Footer, Header, Input, Static, Tree try: @@ -48,6 +49,25 @@ # Source kinds whose data grid supports horizontal (column) paging. _COL_PAGED_KINDS = frozenset({"ndarray2d", "ndarray_slice", "ctable"}) +# Blosc2-branded palette layered over Textual's default dark canvas: only the +# logo colors are overridden (background/surface/panel stay None so they derive +# the same near-black as textual-dark). Turquoise is used for all borders and +# scrollbars (deep blue is too dark to read on the dark canvas), with yellow as +# the accent for the focused pane's border. +BLOSC2_THEME = Theme( + name="blosc2", + primary="#007a86", # turquoise + secondary="#007a86", # turquoise (deep blue reads poorly on a dark canvas) + accent="#df9e00", # yellow + foreground="#e0e0e0", # match textual-dark's foreground + dark=True, +) + + +def _accent_chip(text: str) -> str: + """A reverse-video status chip in the brand accent (dark text on yellow).""" + return f"[$background on $accent] {text} [/]" + class B2ViewPanel(Vertical): """Pane container that can be maximized.""" @@ -907,8 +927,8 @@ class B2ViewApp(App): #data-header { height: auto; padding: 0 1; } #data-table-row { height: 1fr; } #data-table { width: 1fr; height: 1fr; } - #row-scrollbar { width: 1; height: 1fr; color: $accent; } - #col-scrollbar { height: 1; width: 1fr; color: $accent; } + #row-scrollbar { width: 1; height: 1fr; color: $primary; } + #col-scrollbar { height: 1; width: 1fr; color: $primary; } #meta-scroll, #vlmeta-scroll, #data-scroll { height: 1fr; padding: 0 1; } #tree-pane:focus-within, #meta-pane:focus-within, #vlmeta-pane:focus-within, #data-pane:focus-within { border: heavy $accent; } B2ViewPanel.-maximized, @@ -998,6 +1018,8 @@ def compose(self) -> ComposeResult: yield Footer() def on_mount(self) -> None: + self.register_theme(BLOSC2_THEME) + self.theme = "blosc2" self.browser = StoreBrowser(self.urlpath) tree = self.query_one("#tree", Tree) tree.root.data = "/" @@ -1597,11 +1619,13 @@ def _update_data_header(self, data: dict) -> None: header_parts.append(part) if self._dim_mode: - header_parts.append("[reverse] DIM MODE [/reverse]") + # The whole line is accent-reversed below; this chip is a cutout + # (accent text on the dark canvas) so it stands out against it. + header_parts.append("[$accent on $background] DIM MODE [/]") header_parts.append("←→dim ↑↓val fix/nav exit") elif self.row_window is not None: ws, we = self.row_window - header_parts.append(f"[reverse] WINDOW {ws}:{we} [/reverse]") + header_parts.append(_accent_chip(f"WINDOW {ws}:{we}")) header_parts.append("unlock") elif data.get("source_kind") == "schunk": # The hex dump is paged in 16-byte rows; report it in bytes. @@ -1616,7 +1640,7 @@ def _update_data_header(self, data: dict) -> None: line = ", ".join(header_parts) if self._dim_mode and layout is not None: - line = f"[reverse]{line}[/reverse]" + line = f"[$background on $accent]{line}[/]" self.query_one("#data-header", Static).update(line) def _window_and_filter_chips(self, data: dict) -> list[str]: @@ -1624,7 +1648,7 @@ def _window_and_filter_chips(self, data: dict) -> list[str]: chips: list[str] = [] if self.row_window is not None: ws, we = self.row_window - chips.append(f"[reverse] WINDOW {ws}:{we} [/reverse]") + chips.append(_accent_chip(f"WINDOW {ws}:{we}")) if data.get("source_kind") == "ctable" and self.browser is not None: flt = self.browser.get_filter(self.selected_path) col_flt = self.browser.get_column_filter(self.selected_path) From ac348482dd1a572617c6743ed4331aa84dbff24e Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 15 Jun 2026 11:27:52 +0200 Subject: [PATCH 21/24] b2view: fix data panel not focused with --path + --panel data Startup focus was applied on a fixed 0.05s timer that raced the node selection; when selection won, its tree.focus() pulled focus back to the tree, so `b2view store /path --panel data` left the grid unfocused. Replace the timer with a one-shot flag applied at the end of update_panels, once the data panel's display and contents have settled, so focus lands deterministically on the requested panel (the grid for a leaf, the preview scroll for a group). --- src/blosc2/b2view/app.py | 25 +++++++++++++++++++------ tests/b2view/test_basics.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/src/blosc2/b2view/app.py b/src/blosc2/b2view/app.py index 363caeec..d8c8ba2a 100644 --- a/src/blosc2/b2view/app.py +++ b/src/blosc2/b2view/app.py @@ -983,6 +983,9 @@ def __init__( self._active_dim = 0 self._dim_mode = False self.loading_table_page = False + # One-shot: apply the --panel start focus after the first update_panels, + # once the data panel's display/contents have settled (see update_panels). + self._apply_focus_on_next_update = False # Absolute (start, stop) of a locked row window from the plot's 'v' key. self.row_window: tuple[int, int] | None = None @@ -1028,15 +1031,18 @@ def on_mount(self) -> None: self.query_one("#data-table-row", Horizontal).display = False self.query_one("#col-scrollbar", Static).display = False + # Focus the requested start panel after the first update_panels has set + # up the data panel (its display and contents), so 'data' lands on the + # populated grid instead of racing the node selection. + self._apply_focus_on_next_update = True if self.start_path and self.start_path != "/": self._navigate_to_path(self.start_path) else: self.call_after_refresh(self.update_panels, "/") - tree.focus() - # Override focus after render settles, when starting panel is not the tree - if self.start_panel != "tree": - self.set_timer(0.05, lambda: self._focus_panel_by_name(self.start_panel)) + def _apply_start_focus(self) -> None: + """Focus the panel requested on startup (the --panel option).""" + self._focus_panel_by_name(self.start_panel) def _focus_panel_by_name(self, name: str) -> None: """Focus a panel by its user-facing name.""" @@ -1077,11 +1083,12 @@ def _navigate_to_path(self, path: str) -> None: found.expand() node = found - # Selecting the node fires NodeSelected → on_tree_node_selected → update_panels + # Selecting the node fires NodeSelected → on_tree_node_selected → + # update_panels, which applies the one-shot start-panel focus once the + # data panel is populated (see _apply_focus_on_next_update). def _do_select(): tree.select_node(node) tree.scroll_to_node(node) - tree.focus() self.call_after_refresh(_do_select) @@ -1177,6 +1184,12 @@ def update_panels(self, path: str) -> None: self._update_vlmeta(vlmeta_pane, vlmeta_widget, None) self._reset_panel_scroll() + # The data panel's display/contents are now settled; apply the one-shot + # startup focus (deferred one frame so the target widget is rendered). + if self._apply_focus_on_next_update: + self._apply_focus_on_next_update = False + self.call_after_refresh(self._apply_start_focus) + @staticmethod def _format_vlmeta_value(value: Any) -> str: """Format a vlmeta value for display.""" diff --git a/tests/b2view/test_basics.py b/tests/b2view/test_basics.py index 41a62677..c60c3d52 100644 --- a/tests/b2view/test_basics.py +++ b/tests/b2view/test_basics.py @@ -119,6 +119,36 @@ def _assert_ctable_window_values(page, expected): # ── Tree and panel focus navigation ────────────────────────────────────── +async def _wait_focus(pilot, expected_id: str) -> str | None: + """Pause until the focused widget is *expected_id* (or give up).""" + for _ in range(30): + await pilot.pause() + if getattr(pilot.app.focused, "id", None) == expected_id: + break + return getattr(pilot.app.focused, "id", None) + + +async def test_start_panel_focus_with_path(store_path): + """``--panel`` focuses the right widget on startup, even with a ``--path``. + + Regression: the data panel was left unfocused when both a starting path + and ``--panel data`` were given (a timer raced the node selection, which + pulled focus back to the tree). + """ + # The bug case: data panel on a leaf must focus the data grid itself. + app = B2ViewApp(store_path, start_path="/level0/leaf1", start_panel="data") + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + assert await _wait_focus(pilot, "data-table") == "data-table" + + # Other panels still land where asked. + for panel, expected in [("meta", "meta-scroll"), ("tree", "tree")]: + app = B2ViewApp(store_path, start_path="/level0/leaf1", start_panel=panel) + async with app.run_test(size=TERM_SIZE) as pilot: + await wait_for_table(pilot) + assert await _wait_focus(pilot, expected) == expected + + async def test_tree_and_panel_focus(store_path): """Tab cycles the panels; Down/Enter in the tree selects nodes.""" app = B2ViewApp(store_path) From 0cdd259f83eced974ed8c70f71070805ec6f9356 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 15 Jun 2026 11:52:48 +0200 Subject: [PATCH 22/24] Start building wasm wheels for PyPI --- .github/workflows/cibuildwheels.yml | 54 ++++++++++++++++++++++++++++- src/blosc2/b2view/cli.py | 13 +++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cibuildwheels.yml b/.github/workflows/cibuildwheels.yml index 601b63cf..771880ea 100644 --- a/.github/workflows/cibuildwheels.yml +++ b/.github/workflows/cibuildwheels.yml @@ -127,8 +127,60 @@ jobs: ./wheelhouse/*.tar.gz + build_wheels_wasm: + name: Build WASM/Pyodide wheels for ${{ matrix.p_ver }} + runs-on: ubuntu-latest + env: + CIBW_BUILD: ${{ matrix.cibw_build }} + # WASM/Pyodide has no SIMD/runtime CPU detection; disable optimised paths + CMAKE_ARGS: "-DWITH_ZLIB_OPTIM=OFF -DWITH_OPTIM=OFF -DWITH_RUNTIME_CPU_DETECTION=OFF" + CIBW_TEST_COMMAND: "pytest {project}/tests" + # cp314 targets the 2026 ABI, built against a prerelease Pyodide (314.x) + CIBW_ENABLE: ${{ matrix.cibw_enable }} + strategy: + fail-fast: false + matrix: + include: + # Python 3.13 -> pyemscripten_2025_0 (Pyodide 0.29.x, stable) + - p_ver: "3.13" + cibw_build: "cp313-*" + cibw_enable: "" + artifact_name: "wasm-pyodide-cp313" + # Python 3.14 -> pyemscripten_2026_0 (Pyodide 314.x, prerelease) + - p_ver: "3.14" + cibw_build: "cp314-*" + cibw_enable: "pyodide-prerelease" + artifact_name: "wasm-pyodide-cp314" + steps: + - name: Checkout repo + uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y cmake + + - name: Install cibuildwheel + run: pip install "cibuildwheel==4.1.*" + + - name: Build wheels + # Testing is performed automatically by cibuildwheel (via node). + # platform=pyodide can only be set via the CLI flag, not an env var. + run: cibuildwheel --platform pyodide + + - uses: actions/upload-artifact@v7 + with: + name: ${{ matrix.artifact_name }} + path: ./wheelhouse/*.whl + + upload_pypi: - needs: [ build_wheels] + needs: [ build_wheels, build_wheels_wasm ] runs-on: ubuntu-latest # Only upload wheels when tagging (typically a release) if: startsWith(github.event.ref, 'refs/tags') diff --git a/src/blosc2/b2view/cli.py b/src/blosc2/b2view/cli.py index 61403011..db0d5f66 100644 --- a/src/blosc2/b2view/cli.py +++ b/src/blosc2/b2view/cli.py @@ -28,6 +28,19 @@ def build_parser() -> argparse.ArgumentParser: def main(argv: list[str] | None = None) -> int: args = build_parser().parse_args(argv) + + import blosc2 + + if blosc2.IS_WASM: + print( + "b2view is an interactive terminal UI and is not supported in the " + "Pyodide/WebAssembly build of blosc2:\nthere is no terminal driver " + "(termios) available in this environment.\n" + "Run b2view from a native (CPython) install instead.", + file=sys.stderr, + ) + return 1 + try: from blosc2.b2view.app import B2ViewApp except ImportError as exc: From 955abe8ca8af670e4568e931c19cd7a5efb1bf2c Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 15 Jun 2026 12:15:30 +0200 Subject: [PATCH 23/24] WASM build for cp313 pinned to Pyodided 0.29.3: fixes a regression using 0.29.4. --- .github/workflows/cibuildwheels.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cibuildwheels.yml b/.github/workflows/cibuildwheels.yml index 771880ea..186432d9 100644 --- a/.github/workflows/cibuildwheels.yml +++ b/.github/workflows/cibuildwheels.yml @@ -135,21 +135,23 @@ jobs: # WASM/Pyodide has no SIMD/runtime CPU detection; disable optimised paths CMAKE_ARGS: "-DWITH_ZLIB_OPTIM=OFF -DWITH_OPTIM=OFF -DWITH_RUNTIME_CPU_DETECTION=OFF" CIBW_TEST_COMMAND: "pytest {project}/tests" - # cp314 targets the 2026 ABI, built against a prerelease Pyodide (314.x) - CIBW_ENABLE: ${{ matrix.cibw_enable }} + # Pin the Pyodide version explicitly per target for reproducible builds. + # cp313 stays on 0.29.3 (the version wasm.yml proves good): 0.29.4, the + # cibuildwheel 4.1 default, regresses SChunk get_slice on WASM. + CIBW_PYODIDE_VERSION: ${{ matrix.pyodide_version }} strategy: fail-fast: false matrix: include: - # Python 3.13 -> pyemscripten_2025_0 (Pyodide 0.29.x, stable) + # Python 3.13 -> pyemscripten_2025_0 - p_ver: "3.13" cibw_build: "cp313-*" - cibw_enable: "" + pyodide_version: "0.29.3" artifact_name: "wasm-pyodide-cp313" - # Python 3.14 -> pyemscripten_2026_0 (Pyodide 314.x, prerelease) + # Python 3.14 -> pyemscripten_2026_0 - p_ver: "3.14" cibw_build: "cp314-*" - cibw_enable: "pyodide-prerelease" + pyodide_version: "314.0.0" artifact_name: "wasm-pyodide-cp314" steps: - name: Checkout repo From dc71431adfef1bae5b11295177143039c7638456 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 15 Jun 2026 13:23:00 +0200 Subject: [PATCH 24/24] Now, plot_series gives a locked row window precedence over the row filter --- src/blosc2/b2view/model.py | 33 ++++++++++++++++++++++++--------- tests/b2view/test_plot_model.py | 21 +++++++++++++++++++++ 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/src/blosc2/b2view/model.py b/src/blosc2/b2view/model.py index 2995b042..804beae6 100644 --- a/src/blosc2/b2view/model.py +++ b/src/blosc2/b2view/model.py @@ -433,20 +433,29 @@ def plot_series( Pass *row_start*/*row_stop* to zoom into a sub-range (always read exactly; ``x`` stays in absolute row coordinates). The series is a - CTable column (*column* is its name; an active row filter is honored) or - an array (*column* is the global index along the column dimension of - *layout*, or None for 1-D arrays). + CTable column (*column* is its name; a locked row window takes + precedence, otherwise an active row filter is honored) or an array + (*column* is the global index along the column dimension of *layout*, + or None for 1-D arrays). """ path = self.normalize_path(path) obj = self._get_object(path) kind = object_kind(obj) if kind == "ctable": - filtered = path in self._filter_views - view = self._filter_views.get(path, obj) + # A locked row window (set by 'v') takes precedence over any row + # filter, mirroring preview()/read_cell(): a plot shows exactly the + # rows the grid is showing. The SUMMARY fast-path spans the whole + # column, so it is only valid when neither narrows the series. + if path in self._window_views: + view = self._window_views[path] + narrowed = True + else: + view = self._filter_views.get(path, obj) + narrowed = path in self._filter_views n = len(view) start, stop = self._clamp_range(row_start, row_stop, n) - if start == 0 and stop == n and not filtered: + if start == 0 and stop == n and not narrowed: env = self._column_summary_envelope(obj, column, n, max_points) if env is not None: return {**env, "n": n, "row_start": start, "row_stop": stop, "method": "summary"} @@ -514,8 +523,9 @@ def read_series( ) -> dict[str, Any]: """Return the *raw* values of one series over ``[row_start, row_stop)``. - Same series selection as :meth:`plot_series` (CTable column honoring an - active filter, or an array column via *layout*) but with no bucketing — + Same series selection as :meth:`plot_series` (CTable column honoring a + locked row window then an active filter, or an array column via + *layout*) but with no bucketing — every value is read exactly, for the high-res ``h`` view. The result is ``{"x", "y", "n", "row_start", "row_stop"}`` with ``x`` in absolute row coordinates. This reads exactly what is asked, so callers must bound the @@ -526,7 +536,12 @@ def read_series( kind = object_kind(obj) if kind == "ctable": - view = self._filter_views.get(path, obj) + # Honor a locked row window first, then any row filter, matching + # preview()/read_cell() so the hi-res view tracks the visible grid. + if path in self._window_views: + view = self._window_views[path] + else: + view = self._filter_views.get(path, obj) n = len(view) start, stop = self._clamp_range(row_start, row_stop, n) y = safe_asarray(view[column][start:stop]) diff --git a/tests/b2view/test_plot_model.py b/tests/b2view/test_plot_model.py index 2db447bc..db6e078f 100644 --- a/tests/b2view/test_plot_model.py +++ b/tests/b2view/test_plot_model.py @@ -193,6 +193,27 @@ def test_read_series_clamps_range(plot_store): assert clamped["y"].shape == (N,) +def test_locked_row_window_confines_plot_and_read_series(plot_store): + """A locked row window (the 'v' action) takes precedence over the full + series in both plot_series and read_series, matching preview()/read_cell() + (PR #663 review): a plot/hi-res of a windowed CTable shows only its rows.""" + path, vals = plot_store + lo, hi = 1000, 1500 + with StoreBrowser(path) as browser: + browser.set_row_window("/ctable", lo, hi) + + env = browser.plot_series("/ctable", column="x", max_points=MAX_POINTS) + assert env["n"] == hi - lo # window length, not the full series + assert env["method"] != "summary" # whole-column fast-path disabled + expected = _reduce_envelope(vals[lo:hi], hi - lo, MAX_POINTS) + np.testing.assert_allclose(env["ymin"], expected["ymin"], equal_nan=True) + np.testing.assert_allclose(env["ymax"], expected["ymax"], equal_nan=True) + + raw = browser.read_series("/ctable", column="x") + assert raw["n"] == hi - lo + np.testing.assert_array_equal(raw["y"], vals[lo:hi]) + + def test_streaming_reducer_integer_dtype(): vals = np.arange(1000, dtype=np.int64) env = _minmax_buckets_streaming(lambda s, e: vals[s:e], 1000, 100, span=33)