Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support vsifile IO support #27

Merged
merged 5 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.15.0 (2025-02-27)

* add support for `VSIFile` backend (https://github.com/developmentseed/tilebench/pull/27)

## 0.14.0 (2025-01-06)

* remove `python 3.8` support
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ test = [
"pytest-cov",
"pytest-asyncio",
"requests",
"vsifile",
]
dev = [
"pre-commit",
Expand Down
99 changes: 76 additions & 23 deletions tests/test_middleware.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Tests for tilebench."""

import rasterio
from fastapi import FastAPI
from rio_tiler.io import Reader
from starlette.testclient import TestClient
from vsifile.rasterio import opener

from tilebench.middleware import NoCacheMiddleware, VSIStatsMiddleware

Expand Down Expand Up @@ -33,26 +35,77 @@ def tile():
def skip():
return "I've been skipped"

client = TestClient(app)

response = client.get("/info")
assert response.status_code == 200
assert response.headers["content-type"] == "application/json"
assert response.headers["Cache-Control"] == "no-cache"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "head;count=" in stats
assert "get;count=" in stats

response = client.get("/tile")
assert response.status_code == 200
assert response.headers["content-type"] == "application/json"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "head;count=" in stats
assert "get;count=" in stats

response = client.get("/skip")
assert response.status_code == 200
assert response.headers["content-type"] == "application/json"
assert "VSI-Stats" not in response.headers
with TestClient(app) as client:
response = client.get("/info")
assert response.status_code == 200
assert response.headers["content-type"] == "application/json"
assert response.headers["Cache-Control"] == "no-cache"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "head;count=" in stats
assert "get;count=" in stats

response = client.get("/tile")
assert response.status_code == 200
assert response.headers["content-type"] == "application/json"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "head;count=" in stats
assert "get;count=" in stats

response = client.get("/skip")
assert response.status_code == 200
assert response.headers["content-type"] == "application/json"
assert "VSI-Stats" not in response.headers


def test_middleware_vsifile():
"""Simple test."""
app = FastAPI()
app.add_middleware(NoCacheMiddleware)
app.add_middleware(
VSIStatsMiddleware, config={}, exclude_paths=["/skip"], io="vsifile"
)

@app.get("/info")
def head():
"""Get info."""
with rasterio.open(COG_PATH, opener=opener) as src:
with Reader(None, dataset=src) as cog:
cog.info()
return "I got info"

@app.get("/tile")
def tile():
"""Read tile."""
with rasterio.open(COG_PATH, opener=opener) as src:
with Reader(None, dataset=src) as cog:
cog.tile(36460, 52866, 17)
return "I got tile"

@app.get("/skip")
def skip():
return "I've been skipped"

with TestClient(app) as client:
response = client.get("/info")
assert response.status_code == 200
assert response.headers["content-type"] == "application/json"
assert response.headers["Cache-Control"] == "no-cache"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "head;count=" in stats
assert "get;count=" in stats

response = client.get("/tile")
assert response.status_code == 200
assert response.headers["content-type"] == "application/json"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "head;count=" in stats
assert "get;count=" in stats

response = client.get("/skip")
assert response.status_code == 200
assert response.headers["content-type"] == "application/json"
assert "VSI-Stats" not in response.headers
27 changes: 27 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Tests for tilebench."""

import rasterio
from rio_tiler.io import Reader
from vsifile.rasterio import opener

from tilebench import profile as profiler

Expand Down Expand Up @@ -41,3 +43,28 @@ def _read_tile(src_path: str, x: int, y: int, z: int, tilesize: int = 256):
assert stats.get("GET")
assert stats.get("Timing")
assert stats.get("WarpKernels")


def test_vsifile():
"""Checkout profile output."""

@profiler(
kernels=True,
add_to_return=True,
quiet=True,
config={"GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR"},
io="vsifile",
)
def _read_tile(src_path: str, x: int, y: int, z: int, tilesize: int = 256):
with rasterio.open(src_path, opener=opener) as src:
with Reader(None, dataset=src) as cog:
return cog.tile(x, y, z, tilesize=tilesize)

(data, mask), stats = _read_tile(COG_PATH, 36460, 52866, 17)
assert data.shape
assert mask.shape
assert stats
assert "HEAD" in stats
assert stats.get("GET")
assert stats.get("Timing")
assert "WarpKernels" in stats
99 changes: 76 additions & 23 deletions tests/test_viz.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
"""Tests for tilebench."""

import attr
import rasterio
from rio_tiler.io import Reader
from starlette.testclient import TestClient
from vsifile.rasterio import opener

from tilebench.viz import TileDebug

Expand All @@ -17,26 +21,75 @@ def test_viz():
assert app.endpoint == "http://127.0.0.1:8080"
assert app.template_url == "http://127.0.0.1:8080"

client = TestClient(app.app)

response = client.get("/tiles/17/36460/52866")
assert response.status_code == 200
assert response.headers["content-type"] == "application/json"
assert response.headers["Cache-Control"] == "no-cache"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "head;count=" in stats
assert "get;count=" in stats

response = client.get("/info.geojson")
assert response.status_code == 200
assert response.headers["content-type"] == "application/geo+json"
assert "VSI-Stats" not in response.headers

response = client.get("/tiles.geojson?ovr_level=0")
assert response.status_code == 200
assert response.headers["content-type"] == "application/geo+json"

response = client.get("/tiles.geojson?ovr_level=1")
assert response.status_code == 200
assert response.headers["content-type"] == "application/geo+json"
with TestClient(app.app) as client:
response = client.get("/tiles/17/36460/52866")
assert response.status_code == 200
assert response.headers["content-type"] == "application/json"
assert response.headers["Cache-Control"] == "no-cache"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "head;count=" in stats
assert "get;count=" in stats

response = client.get("/info.geojson")
assert response.status_code == 200
assert response.headers["content-type"] == "application/geo+json"
assert "VSI-Stats" not in response.headers

response = client.get("/tiles.geojson?ovr_level=0")
assert response.status_code == 200
assert response.headers["content-type"] == "application/geo+json"

response = client.get("/tiles.geojson?ovr_level=1")
assert response.status_code == 200
assert response.headers["content-type"] == "application/geo+json"


def test_viz_vsifile():
"""Should work as expected (create TileServer object)."""

@attr.s
class VSIReader(Reader):
"""Rasterio Reader with VSIFILE opener."""

dataset = attr.ib(default=None, init=False) # type: ignore

def __attrs_post_init__(self):
"""Use vsifile.rasterio.opener as Python file opener."""
self.dataset = self._ctx_stack.enter_context(
rasterio.open(self.input, opener=opener)
)
super().__attrs_post_init__()

app = TileDebug(
src_path=COG_PATH,
config={"GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR"},
reader=VSIReader,
io_backend="vsifile",
)
assert app.port == 8080
assert app.endpoint == "http://127.0.0.1:8080"
assert app.template_url == "http://127.0.0.1:8080"

with TestClient(app.app) as client:
response = client.get("/tiles/17/36460/52866")
assert response.status_code == 200
assert response.headers["content-type"] == "application/json"
assert response.headers["Cache-Control"] == "no-cache"
assert response.headers["VSI-Stats"]
stats = response.headers["VSI-Stats"]
assert "head;count=" in stats
assert "get;count=" in stats

response = client.get("/info.geojson")
assert response.status_code == 200
assert response.headers["content-type"] == "application/geo+json"
assert "VSI-Stats" not in response.headers

response = client.get("/tiles.geojson?ovr_level=0")
assert response.status_code == 200
assert response.headers["content-type"] == "application/geo+json"

response = client.get("/tiles.geojson?ovr_level=1")
assert response.status_code == 200
assert response.headers["content-type"] == "application/geo+json"
55 changes: 49 additions & 6 deletions tilebench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
log.add(sys.stderr, format=fmt)


def parse_logs(logs: List[str]) -> Dict[str, Any]:
def parse_rasterio_io_logs(logs: List[str]) -> Dict[str, Any]:
"""Parse Rasterio and CURL logs."""
# HEAD
head_requests = len([line for line in logs if "CURL_INFO_HEADER_OUT: HEAD" in line])
Expand Down Expand Up @@ -53,25 +53,63 @@ def parse_logs(logs: List[str]) -> Dict[str, Any]:
}


def parse_vsifile_io_logs(logs: List[str]) -> Dict[str, Any]:
"""Parse VSIFILE IO logs."""
# HEAD
head_requests = len([line for line in logs if "VSIFILE_INFO: HEAD" in line])
head_summary = {
"count": head_requests,
}

# GET
all_get_requests = len([line for line in logs if "VSIFILE_INFO: GET" in line])

get_requests = [line for line in logs if "VSIFILE: Downloading: " in line]

get_values_str = []
for get in get_requests:
get_values_str.extend(get.split("VSIFILE: Downloading: ")[1].split(", "))

get_values = [list(map(int, r.split("-"))) for r in get_values_str]
data_transfer = sum([j - i + 1 for i, j in get_values])

get_summary = {
"count": all_get_requests,
"bytes": data_transfer,
"ranges": get_values_str,
}

warp_kernel = [line.split(" ")[-2:] for line in logs if "GDALWarpKernel" in line]

return {
"HEAD": head_summary,
"GET": get_summary,
"WarpKernels": warp_kernel,
}


def profile(
kernels: bool = False,
add_to_return: bool = False,
quiet: bool = False,
raw: bool = False,
cprofile: bool = False,
config: Optional[Dict] = None,
io="rasterio",
):
"""Profiling."""
if io not in ["rasterio", "vsifile"]:
raise ValueError(f"Unsupported {io} IO backend")

def wrapper(func: Callable):
"""Wrap a function."""

def wrapped_f(*args, **kwargs):
"""Wrapped function."""
rio_stream = StringIO()
logger = logging.getLogger("rasterio")
io_stream = StringIO()
logger = logging.getLogger(io)
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler(rio_stream)
handler = logging.StreamHandler(io_stream)
logger.addHandler(handler)

gdal_config = config or {}
Expand All @@ -88,10 +126,15 @@ def wrapped_f(*args, **kwargs):
logger.removeHandler(handler)
handler.close()

logs = rio_stream.getvalue().splitlines()
logs = io_stream.getvalue().splitlines()
profile_lines = [p for p in profile_stream.getvalue().splitlines() if p]

results = parse_logs(logs)
results = {}
if io == "vsifile":
results.update(parse_vsifile_io_logs(logs))
else:
results.update(parse_rasterio_io_logs(logs))

results["Timing"] = t.elapsed

if cprofile:
Expand Down
Loading