Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

### Added

- Environment variable `EXCLUDED_FROM_ITEMS` to exclude specific fields from items endpoint response. Supports comma-separated list of fully qualified field names (e.g., `properties.auth:schemes,properties.storage:schemes`) [#518](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/518)
- Added validator for `REDIS_MAX_CONNECTIONS` to handle empty or null-like values ("", "null", None) and return None instead. [#519](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/519)

### Changed
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,8 @@ You can customize additional settings in your `.env` file:
| `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional |
| `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional |
| `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional |
| `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional |


> [!NOTE]
> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.
Expand Down
13 changes: 11 additions & 2 deletions stac_fastapi/core/stac_fastapi/core/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import abc
import logging
import os
from copy import deepcopy
from typing import Any, List, Optional

Expand All @@ -10,7 +11,7 @@

from stac_fastapi.core.datetime_utils import now_to_rfc3339_str
from stac_fastapi.core.models.links import CollectionLinks
from stac_fastapi.core.utilities import get_bool_env
from stac_fastapi.core.utilities import get_bool_env, get_excluded_from_items
from stac_fastapi.types import stac as stac_types
from stac_fastapi.types.links import ItemLinks, resolve_links

Expand Down Expand Up @@ -108,7 +109,7 @@ def db_to_stac(cls, item: dict, base_url: str) -> stac_types.Item:
else:
assets = item.get("assets", {})

return stac_types.Item(
stac_item = stac_types.Item(
type="Feature",
stac_version=item.get("stac_version", ""),
stac_extensions=item.get("stac_extensions", []),
Expand All @@ -121,6 +122,14 @@ def db_to_stac(cls, item: dict, base_url: str) -> stac_types.Item:
assets=assets,
)

excluded_fields = os.getenv("EXCLUDED_FROM_ITEMS")
if excluded_fields:
for field_path in excluded_fields.split(","):
if field_path := field_path.strip():
get_excluded_from_items(stac_item, field_path)

return stac_item
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. let's allow for a list of private data fields. PRIVATE_DATA_FIELDS
  2. most Users will have private fields listed under properties so we need to account for nested fields



class CollectionSerializer(Serializer):
"""Serialization methods for STAC collections."""
Expand Down
17 changes: 17 additions & 0 deletions stac_fastapi/core/stac_fastapi/core/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,20 @@ def dict_deep_update(merge_to: Dict[str, Any], merge_from: Dict[str, Any]) -> No
dict_deep_update(merge_to[k], merge_from[k])
else:
merge_to[k] = v


def get_excluded_from_items(obj: dict, field_path: str) -> None:
"""Remove a field from items.

The field is removed in-place from the dictionary if it exists.
If any intermediate path does not exist or is not a dictionary,
the function returns without making any changes.
"""
*path, final = field_path.split(".")
current = obj
for part in path:
current = current.get(part, {})
if not isinstance(current, dict):
return

current.pop(final, None)
53 changes: 53 additions & 0 deletions stac_fastapi/tests/api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1657,3 +1657,56 @@ async def test_use_datetime_false(app_client, load_test_data, txn_client, monkey

assert "test-item-datetime-only" not in found_ids
assert "test-item-start-end-only" in found_ids


@pytest.mark.asyncio
async def test_hide_private_data_from_item(app_client, txn_client, load_test_data):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Everything is looking really good. Let's just add a test case with a field like properties.confidential to make sure that nested fields are being excluded.

Copy link
Collaborator Author

@YuriZmytrakov YuriZmytrakov Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @jonhealy1 I have expanded the test with a test case properties.private_data.

os.environ["EXCLUDED_FROM_ITEMS"] = "private_data,properties.private_data"

test_collection = load_test_data("test_collection.json")
test_collection_id = "test-collection-private-data"
test_collection["id"] = test_collection_id
await create_collection(txn_client, test_collection)

test_item = load_test_data("test_item.json")
test_item_id = "test-item-private-data"
test_item["id"] = test_item_id
test_item["collection"] = test_collection_id
test_item["private_data"] = {"secret": "sensitive_info"}
test_item["properties"]["private_data"] = {"secret": "sensitive_info"}
await create_item(txn_client, test_item)

# Test /collections/{collection_id}/items
resp = await app_client.get(f"/collections/{test_collection_id}/items")
assert resp.status_code == 200
resp_json = resp.json()
item = resp_json["features"][0]
assert "private_data" not in item
assert "private_data" not in item["properties"]

# Test /collections/{collection_id}/items/{item_id}
resp = await app_client.get(
f"/collections/{test_collection_id}/items/{test_item_id}"
)
assert resp.status_code == 200
resp_json = resp.json()
assert "private_data" not in resp_json
assert "private_data" not in resp_json["properties"]

# Test GET /search
resp = await app_client.get(f"/search?collections={test_collection_id}")
assert resp.status_code == 200
resp_json = resp.json()
item = resp_json["features"][0]
assert "private_data" not in item
assert "private_data" not in item["properties"]

# Test POST /search
resp = await app_client.post("/search", json={"collections": [test_collection_id]})
assert resp.status_code == 200
resp_json = resp.json()
item = resp_json["features"][0]
assert "private_data" not in item
assert "private_data" not in item["properties"]

del os.environ["EXCLUDED_FROM_ITEMS"]