From 786df5b1647becaa1204dbf3c92a2ac5ea5585cd Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Tue, 25 Feb 2025 23:47:50 +0100 Subject: [PATCH 1/5] support vsifile IO support --- tilebench/__init__.py | 59 ++++++++++++++++++++++++++++++++++++---- tilebench/middleware.py | 16 ++++++----- tilebench/scripts/cli.py | 9 ++++++ 3 files changed, 71 insertions(+), 13 deletions(-) diff --git a/tilebench/__init__.py b/tilebench/__init__.py index 3f5c70a..454efe7 100644 --- a/tilebench/__init__.py +++ b/tilebench/__init__.py @@ -19,7 +19,7 @@ log.add(sys.stderr, format=fmt) -def parse_logs(logs: List[str]) -> Dict[str, Any]: +def parse_rasterio_io_logs(logs: List[str]) -> Dict[str, Any]: """Parse Rasterio and CURL logs.""" # HEAD head_requests = len([line for line in logs if "CURL_INFO_HEADER_OUT: HEAD" in line]) @@ -53,6 +53,45 @@ def parse_logs(logs: List[str]) -> Dict[str, Any]: } +def parse_vsifile_io_logs(logs: List[str]) -> Dict[str, Any]: + """Parse VSIFILE IO logs.""" + # HEAD + head_requests = len( + [line for line in logs if "VSIFILE_INFO_HEADER_OUT: HEAD" in line] + ) + head_summary = { + "count": head_requests, + } + + # GET + all_get_requests = len( + [line for line in logs if "VSIFILE_INFO_HEADER_OUT: GET" in line] + ) + + get_requests = [line for line in logs if "VSIFILE: Downloading: " in line] + + get_values_str = [] + for get in get_requests: + get_values_str.extend(get.split("VSIFILE: Downloading: ")[1].split(", ")) + + get_values = [list(map(int, r.split("-"))) for r in get_values_str] + data_transfer = sum([j - i + 1 for i, j in get_values]) + + get_summary = { + "count": all_get_requests, + "bytes": data_transfer, + "ranges": get_values_str, + } + + warp_kernel = [line.split(" ")[-2:] for line in logs if "GDALWarpKernel" in line] + + return { + "HEAD": head_summary, + "GET": get_summary, + "WarpKernels": warp_kernel, + } + + def profile( kernels: bool = False, add_to_return: bool = False, @@ -60,18 +99,21 @@ def profile( raw: bool = False, cprofile: bool = False, config: Optional[Dict] = None, + io="rasterio", ): """Profiling.""" + if io not in ["rasterio", "vsifile"]: + raise ValueError(f"Unsupported {io} IO backend") def wrapper(func: Callable): """Wrap a function.""" def wrapped_f(*args, **kwargs): """Wrapped function.""" - rio_stream = StringIO() - logger = logging.getLogger("rasterio") + io_stream = StringIO() + logger = logging.getLogger(io) logger.setLevel(logging.DEBUG) - handler = logging.StreamHandler(rio_stream) + handler = logging.StreamHandler(io_stream) logger.addHandler(handler) gdal_config = config or {} @@ -88,10 +130,15 @@ def wrapped_f(*args, **kwargs): logger.removeHandler(handler) handler.close() - logs = rio_stream.getvalue().splitlines() + logs = io_stream.getvalue().splitlines() profile_lines = [p for p in profile_stream.getvalue().splitlines() if p] - results = parse_logs(logs) + results = {} + if io == "vsifile": + results.update(parse_vsifile_io_logs(logs)) + else: + results.update(parse_rasterio_io_logs(logs)) + results["Timing"] = t.elapsed if cprofile: diff --git a/tilebench/middleware.py b/tilebench/middleware.py index 3238769..5eeadb2 100644 --- a/tilebench/middleware.py +++ b/tilebench/middleware.py @@ -10,7 +10,7 @@ from starlette.requests import Request from starlette.types import ASGIApp, Message, Receive, Scope, Send -from tilebench import parse_logs +from tilebench import parse_rasterio_io_logs class VSIStatsMiddleware(BaseHTTPMiddleware): @@ -21,11 +21,13 @@ def __init__( app: ASGIApp, config: Optional[Dict] = None, exclude_paths: Optional[List] = None, + io: str = "rasterio", ) -> None: """Init Middleware.""" super().__init__(app) self.config: Dict = config or {} self.exclude_paths: List = exclude_paths or [] + self.io_backend = io async def dispatch(self, request: Request, call_next): """Add VSI stats in headers.""" @@ -33,10 +35,10 @@ async def dispatch(self, request: Request, call_next): if request.scope["path"] in self.exclude_paths: return await call_next(request) - rio_stream = StringIO() - logger = logging.getLogger("rasterio") + io_stream = StringIO() + logger = logging.getLogger(self.io_backend) logger.setLevel(logging.DEBUG) - handler = logging.StreamHandler(rio_stream) + handler = logging.StreamHandler(io_stream) logger.addHandler(handler) gdal_config = {"CPL_DEBUG": "ON", "CPL_CURL_VERBOSE": "TRUE"} @@ -46,10 +48,10 @@ async def dispatch(self, request: Request, call_next): logger.removeHandler(handler) handler.close() - if rio_stream: - rio_lines = rio_stream.getvalue().splitlines() + if io_stream: + io_lines = io_stream.getvalue().splitlines() - results = parse_logs(rio_lines) + results = parse_rasterio_io_logs(io_lines) head_results = "head;count={count}".format(**results["HEAD"]) get_results = "get;count={count};size={bytes}".format(**results["GET"]) ranges_results = "ranges; values={}".format( diff --git a/tilebench/scripts/cli.py b/tilebench/scripts/cli.py index 65b60f5..c46265f 100644 --- a/tilebench/scripts/cli.py +++ b/tilebench/scripts/cli.py @@ -103,6 +103,13 @@ def cli(): callback=options_to_dict, help="Reader Options.", ) +@click.option( + "--io", + "io_backend", + type=click.Choice(["vsifile", "rasterio"], case_sensitive=True), + help="IO Backend Options.", + default="rasterio", +) def profile( input, tile, @@ -115,6 +122,7 @@ def profile( tms, config, reader_params, + io_backend, ): """Profile Reader Tile read.""" tilematrixset = default_tms @@ -166,6 +174,7 @@ def profile( raw=add_stdout, cprofile=add_cprofile, config=config, + io=io_backend, ) def _read_tile(src_path: str, x: int, y: int, z: int, tilesize: int = 256): with DstReader(src_path, tms=tilematrixset, **reader_params) as cog: From 17a2449b3e7d484a8163df957fb55583051da926 Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Tue, 25 Feb 2025 23:50:38 +0100 Subject: [PATCH 2/5] add support in middleware --- tilebench/middleware.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tilebench/middleware.py b/tilebench/middleware.py index 5eeadb2..7f5ced5 100644 --- a/tilebench/middleware.py +++ b/tilebench/middleware.py @@ -10,7 +10,7 @@ from starlette.requests import Request from starlette.types import ASGIApp, Message, Receive, Scope, Send -from tilebench import parse_rasterio_io_logs +from tilebench import parse_rasterio_io_logs, parse_vsifile_io_logs class VSIStatsMiddleware(BaseHTTPMiddleware): @@ -27,6 +27,10 @@ def __init__( super().__init__(app) self.config: Dict = config or {} self.exclude_paths: List = exclude_paths or [] + + if io not in ["rasterio", "vsifile"]: + raise ValueError(f"Unsupported {io} IO backend") + self.io_backend = io async def dispatch(self, request: Request, call_next): @@ -49,9 +53,14 @@ async def dispatch(self, request: Request, call_next): handler.close() if io_stream: - io_lines = io_stream.getvalue().splitlines() + logs = io_stream.getvalue().splitlines() + + results = {} + if self.io_backend == "vsifile": + results.update(parse_vsifile_io_logs(logs)) + else: + results.update(parse_rasterio_io_logs(logs)) - results = parse_rasterio_io_logs(io_lines) head_results = "head;count={count}".format(**results["HEAD"]) get_results = "get;count={count};size={bytes}".format(**results["GET"]) ranges_results = "ranges; values={}".format( From 07c0d5665c5c9d783484ad55a8c4ff4a2dcf2d0c Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Wed, 26 Feb 2025 00:05:48 +0100 Subject: [PATCH 3/5] update vsifile logs --- tilebench/__init__.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tilebench/__init__.py b/tilebench/__init__.py index 454efe7..950224e 100644 --- a/tilebench/__init__.py +++ b/tilebench/__init__.py @@ -56,17 +56,13 @@ def parse_rasterio_io_logs(logs: List[str]) -> Dict[str, Any]: def parse_vsifile_io_logs(logs: List[str]) -> Dict[str, Any]: """Parse VSIFILE IO logs.""" # HEAD - head_requests = len( - [line for line in logs if "VSIFILE_INFO_HEADER_OUT: HEAD" in line] - ) + head_requests = len([line for line in logs if "VSIFILE_INFO: HEAD" in line]) head_summary = { "count": head_requests, } # GET - all_get_requests = len( - [line for line in logs if "VSIFILE_INFO_HEADER_OUT: GET" in line] - ) + all_get_requests = len([line for line in logs if "VSIFILE_INFO: GET" in line]) get_requests = [line for line in logs if "VSIFILE: Downloading: " in line] From 0d1c6aada19a1c77c8fb6cc891508e04d6c63c60 Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Wed, 26 Feb 2025 18:05:34 +0100 Subject: [PATCH 4/5] fix and add support in viz --- CHANGES.md | 4 ++++ tilebench/scripts/cli.py | 10 +++++++++- tilebench/viz.py | 10 ++++++---- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d0c3757..3fe73dc 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,7 @@ +## 0.15.0 (2025-02-26) + +* add support for `VSIFile` backend (https://github.com/developmentseed/tilebench/pull/27) + ## 0.14.0 (2025-01-06) * remove `python 3.8` support diff --git a/tilebench/scripts/cli.py b/tilebench/scripts/cli.py index c46265f..34c30f9 100644 --- a/tilebench/scripts/cli.py +++ b/tilebench/scripts/cli.py @@ -323,7 +323,14 @@ def random(input, zoom, reader, tms, reader_params): callback=options_to_dict, help="Reader Options.", ) -def viz(src_path, port, host, server_only, reader, config, reader_params): +@click.option( + "--io", + "io_backend", + type=click.Choice(["vsifile", "rasterio"], case_sensitive=True), + help="IO Backend Options.", + default="rasterio", +) +def viz(src_path, port, host, server_only, reader, config, reader_params, io_backend): """WEB UI to visualize VSI statistics for a web mercator tile requests.""" if reader: module, classname = reader.rsplit(".", 1) @@ -342,6 +349,7 @@ def viz(src_path, port, host, server_only, reader, config, reader_params): port=port, host=host, config=config, + io_backend=io_backend, ) if not server_only: click.echo(f"Viewer started at {application.template_url}", err=True) diff --git a/tilebench/viz.py b/tilebench/viz.py index 3afd862..737ba3d 100644 --- a/tilebench/viz.py +++ b/tilebench/viz.py @@ -131,6 +131,7 @@ class TileDebug: port: int = attr.ib(default=8080) host: str = attr.ib(default="127.0.0.1") config: Dict = attr.ib(default=dict) + io_backend: str = attr.ib(default="rasterio") router: Optional[APIRouter] = attr.ib(init=False) @@ -215,6 +216,7 @@ def tile( add_to_return=True, raw=False, config=self.config, + io=self.io_backend, ) def _read_tile(src_path: str, x: int, y: int, z: int): with self.reader(src_path, **self.reader_params) as src_dst: @@ -295,10 +297,10 @@ def info(): } ] - try: - ovr = src_dst.dataset.overviews(1) - except Exception: - ovr = [] + try: + ovr = src_dst.dataset.overviews(1) + except Exception: + ovr = [] info["overviews"] = len(ovr) From 6494f0e257f9b508a7e1c05e3b28fbd6470ac581 Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Thu, 27 Feb 2025 10:43:47 +0100 Subject: [PATCH 5/5] add tests --- CHANGES.md | 2 +- pyproject.toml | 1 + tests/test_middleware.py | 99 ++++++++++++++++++++++++++++++---------- tests/test_reader.py | 27 +++++++++++ tests/test_viz.py | 99 ++++++++++++++++++++++++++++++---------- 5 files changed, 181 insertions(+), 47 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 3fe73dc..143932d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,4 @@ -## 0.15.0 (2025-02-26) +## 0.15.0 (2025-02-27) * add support for `VSIFile` backend (https://github.com/developmentseed/tilebench/pull/27) diff --git a/pyproject.toml b/pyproject.toml index 33bb87e..be3e1d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ test = [ "pytest-cov", "pytest-asyncio", "requests", + "vsifile", ] dev = [ "pre-commit", diff --git a/tests/test_middleware.py b/tests/test_middleware.py index fcf4a26..c2fb0a0 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -1,8 +1,10 @@ """Tests for tilebench.""" +import rasterio from fastapi import FastAPI from rio_tiler.io import Reader from starlette.testclient import TestClient +from vsifile.rasterio import opener from tilebench.middleware import NoCacheMiddleware, VSIStatsMiddleware @@ -33,26 +35,77 @@ def tile(): def skip(): return "I've been skipped" - client = TestClient(app) - - response = client.get("/info") - assert response.status_code == 200 - assert response.headers["content-type"] == "application/json" - assert response.headers["Cache-Control"] == "no-cache" - assert response.headers["VSI-Stats"] - stats = response.headers["VSI-Stats"] - assert "head;count=" in stats - assert "get;count=" in stats - - response = client.get("/tile") - assert response.status_code == 200 - assert response.headers["content-type"] == "application/json" - assert response.headers["VSI-Stats"] - stats = response.headers["VSI-Stats"] - assert "head;count=" in stats - assert "get;count=" in stats - - response = client.get("/skip") - assert response.status_code == 200 - assert response.headers["content-type"] == "application/json" - assert "VSI-Stats" not in response.headers + with TestClient(app) as client: + response = client.get("/info") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + assert response.headers["Cache-Control"] == "no-cache" + assert response.headers["VSI-Stats"] + stats = response.headers["VSI-Stats"] + assert "head;count=" in stats + assert "get;count=" in stats + + response = client.get("/tile") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + assert response.headers["VSI-Stats"] + stats = response.headers["VSI-Stats"] + assert "head;count=" in stats + assert "get;count=" in stats + + response = client.get("/skip") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + assert "VSI-Stats" not in response.headers + + +def test_middleware_vsifile(): + """Simple test.""" + app = FastAPI() + app.add_middleware(NoCacheMiddleware) + app.add_middleware( + VSIStatsMiddleware, config={}, exclude_paths=["/skip"], io="vsifile" + ) + + @app.get("/info") + def head(): + """Get info.""" + with rasterio.open(COG_PATH, opener=opener) as src: + with Reader(None, dataset=src) as cog: + cog.info() + return "I got info" + + @app.get("/tile") + def tile(): + """Read tile.""" + with rasterio.open(COG_PATH, opener=opener) as src: + with Reader(None, dataset=src) as cog: + cog.tile(36460, 52866, 17) + return "I got tile" + + @app.get("/skip") + def skip(): + return "I've been skipped" + + with TestClient(app) as client: + response = client.get("/info") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + assert response.headers["Cache-Control"] == "no-cache" + assert response.headers["VSI-Stats"] + stats = response.headers["VSI-Stats"] + assert "head;count=" in stats + assert "get;count=" in stats + + response = client.get("/tile") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + assert response.headers["VSI-Stats"] + stats = response.headers["VSI-Stats"] + assert "head;count=" in stats + assert "get;count=" in stats + + response = client.get("/skip") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + assert "VSI-Stats" not in response.headers diff --git a/tests/test_reader.py b/tests/test_reader.py index 323265b..956e5b2 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1,6 +1,8 @@ """Tests for tilebench.""" +import rasterio from rio_tiler.io import Reader +from vsifile.rasterio import opener from tilebench import profile as profiler @@ -41,3 +43,28 @@ def _read_tile(src_path: str, x: int, y: int, z: int, tilesize: int = 256): assert stats.get("GET") assert stats.get("Timing") assert stats.get("WarpKernels") + + +def test_vsifile(): + """Checkout profile output.""" + + @profiler( + kernels=True, + add_to_return=True, + quiet=True, + config={"GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR"}, + io="vsifile", + ) + def _read_tile(src_path: str, x: int, y: int, z: int, tilesize: int = 256): + with rasterio.open(src_path, opener=opener) as src: + with Reader(None, dataset=src) as cog: + return cog.tile(x, y, z, tilesize=tilesize) + + (data, mask), stats = _read_tile(COG_PATH, 36460, 52866, 17) + assert data.shape + assert mask.shape + assert stats + assert "HEAD" in stats + assert stats.get("GET") + assert stats.get("Timing") + assert "WarpKernels" in stats diff --git a/tests/test_viz.py b/tests/test_viz.py index b39b766..1c81f84 100644 --- a/tests/test_viz.py +++ b/tests/test_viz.py @@ -1,6 +1,10 @@ """Tests for tilebench.""" +import attr +import rasterio +from rio_tiler.io import Reader from starlette.testclient import TestClient +from vsifile.rasterio import opener from tilebench.viz import TileDebug @@ -17,26 +21,75 @@ def test_viz(): assert app.endpoint == "http://127.0.0.1:8080" assert app.template_url == "http://127.0.0.1:8080" - client = TestClient(app.app) - - response = client.get("/tiles/17/36460/52866") - assert response.status_code == 200 - assert response.headers["content-type"] == "application/json" - assert response.headers["Cache-Control"] == "no-cache" - assert response.headers["VSI-Stats"] - stats = response.headers["VSI-Stats"] - assert "head;count=" in stats - assert "get;count=" in stats - - response = client.get("/info.geojson") - assert response.status_code == 200 - assert response.headers["content-type"] == "application/geo+json" - assert "VSI-Stats" not in response.headers - - response = client.get("/tiles.geojson?ovr_level=0") - assert response.status_code == 200 - assert response.headers["content-type"] == "application/geo+json" - - response = client.get("/tiles.geojson?ovr_level=1") - assert response.status_code == 200 - assert response.headers["content-type"] == "application/geo+json" + with TestClient(app.app) as client: + response = client.get("/tiles/17/36460/52866") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + assert response.headers["Cache-Control"] == "no-cache" + assert response.headers["VSI-Stats"] + stats = response.headers["VSI-Stats"] + assert "head;count=" in stats + assert "get;count=" in stats + + response = client.get("/info.geojson") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/geo+json" + assert "VSI-Stats" not in response.headers + + response = client.get("/tiles.geojson?ovr_level=0") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/geo+json" + + response = client.get("/tiles.geojson?ovr_level=1") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/geo+json" + + +def test_viz_vsifile(): + """Should work as expected (create TileServer object).""" + + @attr.s + class VSIReader(Reader): + """Rasterio Reader with VSIFILE opener.""" + + dataset = attr.ib(default=None, init=False) # type: ignore + + def __attrs_post_init__(self): + """Use vsifile.rasterio.opener as Python file opener.""" + self.dataset = self._ctx_stack.enter_context( + rasterio.open(self.input, opener=opener) + ) + super().__attrs_post_init__() + + app = TileDebug( + src_path=COG_PATH, + config={"GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR"}, + reader=VSIReader, + io_backend="vsifile", + ) + assert app.port == 8080 + assert app.endpoint == "http://127.0.0.1:8080" + assert app.template_url == "http://127.0.0.1:8080" + + with TestClient(app.app) as client: + response = client.get("/tiles/17/36460/52866") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + assert response.headers["Cache-Control"] == "no-cache" + assert response.headers["VSI-Stats"] + stats = response.headers["VSI-Stats"] + assert "head;count=" in stats + assert "get;count=" in stats + + response = client.get("/info.geojson") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/geo+json" + assert "VSI-Stats" not in response.headers + + response = client.get("/tiles.geojson?ovr_level=0") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/geo+json" + + response = client.get("/tiles.geojson?ovr_level=1") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/geo+json"