From 5dacbcac26a1e992440345aa2db81940eaf43a04 Mon Sep 17 00:00:00 2001 From: "Kevin A. Mitchell" Date: Thu, 19 Feb 2026 20:30:09 -0600 Subject: [PATCH 1/7] models: Add validators to handle demo redacted values in PDF metadata - Introduced `BeforeValidator` to process demo redacted values for various fields in `_internal.py` and `public.py`. - Added `demo_value_sanitizers` module to centralize logic for detecting and replacing demo redacted values with appropriate defaults: - For boolean fields: replace with `True` or `False`. - For integer fields: replace with `0`. - For file IDs: replace with a placeholder UUID. - Applied validators to metadata properties, ensuring consistent handling of demo data input. Assisted-by: Codex --- src/pdfrest/models/_demo_value_sanitizers.py | 86 ++++++++++++++++++++ src/pdfrest/models/_internal.py | 7 +- src/pdfrest/models/public.py | 26 ++++++ 3 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 src/pdfrest/models/_demo_value_sanitizers.py diff --git a/src/pdfrest/models/_demo_value_sanitizers.py b/src/pdfrest/models/_demo_value_sanitizers.py new file mode 100644 index 0000000..56e3aeb --- /dev/null +++ b/src/pdfrest/models/_demo_value_sanitizers.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import logging +import re +from typing import Any + +from pydantic import ValidationInfo + +LOGGER = logging.getLogger("pdfrest.models") + +_DEMO_UUID = "00000000-0000-4000-8000-000000000000" +_REDACTED_X_PATTERN = re.compile(r"^[Xx-]{8,}$") + + +def _field_name(info: ValidationInfo) -> str: + return info.field_name or "" + + +def _looks_like_demo_redaction(value: Any) -> bool: + if not isinstance(value, str): + return False + if _looks_like_generate_redacted_string(value): + return True + return bool(_REDACTED_X_PATTERN.fullmatch(value)) + + +def _looks_like_generate_redacted_string(value: str) -> bool: + """Detect strings redacted by PDFCloud-API generateRedactedString. + + The upstream redactor preserves the first two characters and replaces all + non-whitespace characters after that with '*'. + """ + if len(value) < 3: + return False + tail = value[2:] + if "*" not in tail: + return False + return all(char == "*" or char.isspace() for char in tail) + + +def _log_replacement(original: Any, replacement: Any, info: ValidationInfo) -> None: + LOGGER.warning( + "Demo value %s detected in %s; replaced with %s", + original, + _field_name(info), + replacement, + ) + + +def _demo_bool_or_passthrough( + value: Any, info: ValidationInfo, *, replacement: bool +) -> Any: + if value is None or isinstance(value, bool): + return value + if _looks_like_demo_redaction(value): + _log_replacement(value, replacement, info) + return replacement + return value + + +def demo_bool_false_or_passthrough(value: Any, info: ValidationInfo) -> Any: + return _demo_bool_or_passthrough(value, info, replacement=False) + + +def demo_bool_true_or_passthrough(value: Any, info: ValidationInfo) -> Any: + return _demo_bool_or_passthrough(value, info, replacement=True) + + +def demo_file_id_or_passthrough(value: Any, info: ValidationInfo) -> Any: + if value is None: + return value + if _looks_like_demo_redaction(value): + replacement = _DEMO_UUID + _log_replacement(value, replacement, info) + return replacement + return value + + +def demo_int_or_passthrough(value: Any, info: ValidationInfo) -> Any: + if value is None or isinstance(value, int): + return value + if _looks_like_demo_redaction(value): + replacement = 0 + _log_replacement(value, replacement, info) + return replacement + return value diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 918f24b..85bd11f 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -45,6 +45,7 @@ WatermarkHorizontalAlignment, WatermarkVerticalAlignment, ) +from ._demo_value_sanitizers import demo_file_id_or_passthrough from .public import PdfRestFile, PdfRestFileID PdfConvertColorProfile = PdfPresetColorProfile | Literal["custom"] @@ -2584,7 +2585,11 @@ class PdfRestRawUploadedFile(BaseModel): """ name: Annotated[str, Field(description="The name of the file")] - id: Annotated[PdfRestFileID, Field(description="The id of the file")] + id: Annotated[ + PdfRestFileID, + BeforeValidator(demo_file_id_or_passthrough), + Field(description="The id of the file"), + ] output_url: Annotated[ list[HttpUrl] | HttpUrl | None, Field(description="The url of the unzipped file", alias="outputUrl"), diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index da316d3..26ff77b 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -11,6 +11,7 @@ AliasChoices, AwareDatetime, BaseModel, + BeforeValidator, ConfigDict, Field, HttpUrl, @@ -20,6 +21,12 @@ from pydantic_core import CoreSchema from typing_extensions import override +from ._demo_value_sanitizers import ( + demo_bool_false_or_passthrough, + demo_bool_true_or_passthrough, + demo_int_or_passthrough, +) + __all__ = ( "ExtractTextResponse", "ExtractedTextDocument", @@ -851,6 +858,7 @@ class PdfRestInfoResponse(BaseModel): tagged: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether structure tags are present in the PDF " "document. The result is true or false." @@ -860,6 +868,7 @@ class PdfRestInfoResponse(BaseModel): image_only: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description=( "Indicates whether the document is 'image only,' meaning it consists " @@ -995,6 +1004,7 @@ class PdfRestInfoResponse(BaseModel): contains_annotations: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description=( "Indicates whether the PDF document contains annotations such as " @@ -1007,6 +1017,7 @@ class PdfRestInfoResponse(BaseModel): contains_signature: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether the PDF contains any digital signatures. " "The result is true or false." @@ -1028,6 +1039,7 @@ class PdfRestInfoResponse(BaseModel): file_size: Annotated[ int | None, + BeforeValidator(demo_int_or_passthrough), Field( description="The size of the PDF file in bytes. The result is an integer." ), @@ -1042,6 +1054,7 @@ class PdfRestInfoResponse(BaseModel): restrict_permissions_set: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description=( "Indicates whether the PDF file has restricted permissions, such as " @@ -1054,6 +1067,7 @@ class PdfRestInfoResponse(BaseModel): contains_xfa: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether the PDF contains XFA forms. The result is " "true or false." @@ -1063,6 +1077,7 @@ class PdfRestInfoResponse(BaseModel): contains_acroforms: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether the PDF contains Acroforms. The result is " "true or false." @@ -1072,6 +1087,7 @@ class PdfRestInfoResponse(BaseModel): contains_javascript: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether the PDF contains JavaScript. The result is " "true or false." @@ -1081,6 +1097,7 @@ class PdfRestInfoResponse(BaseModel): contains_transparency: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether the PDF contains transparent objects. The " "result is true or false." @@ -1090,6 +1107,7 @@ class PdfRestInfoResponse(BaseModel): contains_embedded_file: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether the PDF contains one or more embedded " "files. The result is true or false." @@ -1099,6 +1117,7 @@ class PdfRestInfoResponse(BaseModel): uses_embedded_fonts: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether the PDF contains fully embedded fonts. " "The result is true or false." @@ -1108,6 +1127,7 @@ class PdfRestInfoResponse(BaseModel): uses_nonembedded_fonts: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether the PDF contains non-embedded fonts. The " "result is true or false." @@ -1117,6 +1137,7 @@ class PdfRestInfoResponse(BaseModel): pdfa: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether the document conforms to the PDF/A " "standard. The result is true or false." @@ -1126,6 +1147,7 @@ class PdfRestInfoResponse(BaseModel): pdfua_claim: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether the document claims to conform to the " "PDF/UA standard. The result is true or false." @@ -1135,6 +1157,7 @@ class PdfRestInfoResponse(BaseModel): pdfe_claim: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether the document claims to conform to the " "PDF/E standard. The result is true or false." @@ -1144,6 +1167,7 @@ class PdfRestInfoResponse(BaseModel): pdfx_claim: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description="Indicates whether the document claims to conform to the " "PDF/X standard. The result is true or false." @@ -1153,6 +1177,7 @@ class PdfRestInfoResponse(BaseModel): requires_password_to_open: Annotated[ bool | None, + BeforeValidator(demo_bool_false_or_passthrough), Field( description=( "Indicates whether the PDF requires a password to open. The result " @@ -1165,6 +1190,7 @@ class PdfRestInfoResponse(BaseModel): all_queries_processed: Annotated[ bool, + BeforeValidator(demo_bool_true_or_passthrough), Field( validation_alias=AliasChoices( "all_queries_processed", "allQueriesProcessed" From ae24ce65a681da9e94316e36cbb90c9438ffd56c Mon Sep 17 00:00:00 2001 From: "Kevin A. Mitchell" Date: Thu, 19 Feb 2026 20:32:26 -0600 Subject: [PATCH 2/7] tests: Add tests for sanitizing and replacing demo redacted values - Added `test_query_pdf_info_demo_redacted_booleans_replaced` in `test_query_pdf_info.py` to validate detection and replacement of demo redacted boolean and integer fields with appropriate values. - Added `test_unzip_file_demo_redacted_id_replaced_and_logged` in `test_unzip_file.py` to verify sanitization of redacted file IDs and logging of replacements. - Ensured proper log messages were emitted for all demo value replacements. Assisted-by: Codex --- tests/test_query_pdf_info.py | 78 ++++++++++++++++++++++++++++++++++++ tests/test_unzip_file.py | 55 +++++++++++++++++++++++++ 2 files changed, 133 insertions(+) diff --git a/tests/test_query_pdf_info.py b/tests/test_query_pdf_info.py index 8e4fe88..eb87a9f 100644 --- a/tests/test_query_pdf_info.py +++ b/tests/test_query_pdf_info.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import logging from collections.abc import Sequence import httpx @@ -238,3 +239,80 @@ def handler(request: httpx.Request) -> httpx.Response: assert isinstance(response, PdfRestInfoResponse) assert response.tagged is True assert response.all_queries_processed is True + + +def test_query_pdf_info_demo_redacted_booleans_replaced( + monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + caplog.set_level(logging.WARNING, logger="pdfrest.models") + input_file = make_pdf_file(str(PdfRestFileID.generate())) + + def handler(request: httpx.Request) -> httpx.Response: + if request.method != "POST" or request.url.path != "/pdf-info": + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + return httpx.Response( + 200, + json={ + "inputId": str(input_file.id), + "tagged": "fa***", + "image_only": "fa***", + "contains_annotations": "fa***", + "contains_signature": "fa***", + "file_size": "25***", + "restrict_permissions_set": "fa***", + "contains_xfa": "fa***", + "contains_acroforms": "fa***", + "contains_javascript": "fa***", + "contains_transparency": "fa***", + "contains_embedded_file": "fa***", + "uses_embedded_fonts": "fa***", + "uses_nonembedded_fonts": "fa***", + "pdfa": "fa***", + "pdfua_claim": "fa***", + "pdfe_claim": "fa***", + "pdfx_claim": "fa***", + "requires_password_to_open": "fa***", + "allQueriesProcessed": "tr**", + }, + ) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.query_pdf_info( + input_file, + queries=ALL_PDF_INFO_QUERIES, + ) + + assert response.tagged is False + assert response.image_only is False + assert response.contains_annotations is False + assert response.contains_signature is False + assert response.file_size == 0 + assert response.restrict_permissions_set is False + assert response.contains_xfa is False + assert response.contains_acroforms is False + assert response.contains_javascript is False + assert response.contains_transparency is False + assert response.contains_embedded_file is False + assert response.uses_embedded_fonts is False + assert response.uses_nonembedded_fonts is False + assert response.pdfa is False + assert response.pdfua_claim is False + assert response.pdfe_claim is False + assert response.pdfx_claim is False + assert response.requires_password_to_open is False + assert response.all_queries_processed is True + assert "Demo value fa*** detected in tagged; replaced with False" in caplog.text + assert "Demo value 25*** detected in file_size; replaced with 0" in caplog.text + assert "Demo value fa*** detected in pdfe_claim; replaced with False" in caplog.text + assert "Demo value fa*** detected in pdfx_claim; replaced with False" in caplog.text + assert ( + "Demo value fa*** detected in requires_password_to_open; replaced with False" + in caplog.text + ) + assert ( + "Demo value tr** detected in all_queries_processed; replaced with True" + in caplog.text + ) diff --git a/tests/test_unzip_file.py b/tests/test_unzip_file.py index 25d5a6a..573eda8 100644 --- a/tests/test_unzip_file.py +++ b/tests/test_unzip_file.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import logging import httpx import pytest @@ -16,6 +17,8 @@ build_file_info_payload, ) +DEMO_REPLACEMENT_ID = "00000000-0000-4000-8000-000000000000" + def make_zip_file(file_id: str, name: str = "archive.zip") -> PdfRestFile: return PdfRestFile.model_validate( @@ -302,3 +305,55 @@ async def test_async_unzip_file_single_input(monkeypatch: pytest.MonkeyPatch) -> async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: with pytest.raises(ValidationError, match="at most 1 item"): await client.unzip_file([first, second]) + + +def test_unzip_file_demo_redacted_id_replaced_and_logged( + monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + caplog.set_level(logging.WARNING, logger="pdfrest.models") + zip_file = make_zip_file(str(PdfRestFileID.generate())) + redacted_id = "XXXXXXXXX-XXXXXXXXX-XXXX-XXXXXXXXXXXX" + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/unzip": + return httpx.Response( + 200, + json={ + "inputId": [zip_file.id], + "files": [ + { + "name": "inner.txt", + "id": redacted_id, + "outputUrl": ( + "https://api.pdfrest.com/resource/" + f"{redacted_id}?format=file" + ), + } + ], + }, + ) + if ( + request.method == "GET" + and request.url.path == f"/resource/{DEMO_REPLACEMENT_ID}" + ): + return httpx.Response( + 200, + json=build_file_info_payload( + DEMO_REPLACEMENT_ID, + "inner.txt", + "text/plain", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.unzip_file(zip_file) + + assert response.output_file.id == DEMO_REPLACEMENT_ID + assert ( + "Demo value XXXXXXXXX-XXXXXXXXX-XXXX-XXXXXXXXXXXX detected in id; " + "replaced with 00000000-0000-4000-8000-000000000000" in caplog.text + ) From f79108b84bd38d87ef3063b44c45df771547de3b Mon Sep 17 00:00:00 2001 From: "Kevin A. Mitchell" Date: Thu, 19 Feb 2026 20:37:23 -0600 Subject: [PATCH 3/7] docs: Add guidance on handling demo redacted values in API responses - Documented how demo/free-tier keys lead to masked or redacted values in API responses and detailed the replacement behavior for such values. - Added examples of fields with replacement logic and default values: - Boolean fields replaced with `True` or specific defaults. - Integer fields replaced with `0`. - File IDs replaced with placeholder UUIDs for consistency. - Included a logging configuration example for tracking redacted value replacements in Python applications. Assisted-by: Codex --- docs/getting-started.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/getting-started.md b/docs/getting-started.md index f5e053e..6215d95 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -66,6 +66,39 @@ For the official Cloud onboarding flow, see: interactively and generating starter code samples before integrating them into your project. +### Demo keys and redacted values + +If you are using a demo/free-tier key, some API responses may include redacted +values (for example `fa***`, `tr**`, masked strings, or placeholder IDs). + +To keep response models parseable, the SDK replaces certain known demo-redacted +values in a few response fields: + +- `PdfRestInfoResponse` boolean fields: + `tagged`, `image_only`, `contains_annotations`, `contains_signature`, + `restrict_permissions_set`, `contains_xfa`, `contains_acroforms`, + `contains_javascript`, `contains_transparency`, `contains_embedded_file`, + `uses_embedded_fonts`, `uses_nonembedded_fonts`, `pdfa`, `pdfua_claim`, + `pdfe_claim`, `pdfx_claim`, `requires_password_to_open` +- `PdfRestInfoResponse.file_size` -> replaced with `0` when redacted +- `PdfRestInfoResponse.all_queries_processed` -> replaced with `True` when redacted +- unzip response file IDs are sanitized before file-info lookup, so + `PdfRestFileBasedResponse.output_file.id` may be the null UUID + `00000000-0000-4000-8000-000000000000` when demo IDs are redacted + +When a replacement happens, the SDK logs a warning in this format: + +`Demo value detected in ; replaced with ` + +To see these warnings in your app, configure Python logging (example): + +```python +import logging + +logging.basicConfig(level=logging.WARNING) +logging.getLogger("pdfrest.models").setLevel(logging.WARNING) +``` + ## 3. Add a short example program Create `quickstart.py`: From c12ddf4cb50ea5e99879fd7a9a639b1bd3da7f47 Mon Sep 17 00:00:00 2001 From: "Kevin A. Mitchell" Date: Fri, 20 Feb 2026 09:45:06 -0600 Subject: [PATCH 4/7] client: Log and test demo restriction messages in API responses - Added `_log_demo_restriction_messages` to log when API responses include demo mode restriction messages in specific fields (`message`, `warning`, `keyMessage`) for both sync and async clients. - Introduced `_is_demo_restriction_message` utility to identify demo-related restriction messages based on known patterns. - Updated documentation in `getting-started.md` with examples of these log messages and how to configure logging for monitoring. - Created new test cases to validate the detection and logging of demo restriction messages: - Tests ensure messages are logged once even if duplicated across fields. - Verified async and sync clients handle and log these cases consistently. Assisted-by: Codex --- docs/getting-started.md | 6 ++ src/pdfrest/client.py | 40 +++++++++- tests/test_client.py | 161 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 206 insertions(+), 1 deletion(-) diff --git a/docs/getting-started.md b/docs/getting-started.md index 6215d95..cded885 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -90,6 +90,11 @@ When a replacement happens, the SDK logs a warning in this format: `Demo value detected in ; replaced with ` +When the API returns a demo restriction body message (for example the free-tier +"watermarked or redacted" notice in `message`), the SDK also logs: + +`Demo mode restriction message in response field=: ` + To see these warnings in your app, configure Python logging (example): ```python @@ -97,6 +102,7 @@ import logging logging.basicConfig(level=logging.WARNING) logging.getLogger("pdfrest.models").setLevel(logging.WARNING) +logging.getLogger("pdfrest.client").setLevel(logging.WARNING) ``` ## 3. Add a short example program diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index b3b3f6a..ae2365f 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -192,6 +192,7 @@ BACKOFF_JITTER_SECONDS = 0.1 RETRYABLE_STATUS_CODES = {408, 425, 429, 499} _SUCCESSFUL_DELETION_MESSAGE = "successfully deleted" +_DEMO_RESTRICTION_MESSAGE_FIELDS = ("message", "warning", "keyMessage") HttpMethod = Literal["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"] @@ -263,6 +264,17 @@ def _parse_retry_after_header(header_value: str | None) -> float | None: return seconds if seconds > 0 else 0.0 +def _is_demo_restriction_message(value: str) -> bool: + normalized = value.strip().casefold() + if not normalized: + return False + return ( + "watermarked or redacted" in normalized + and "free account" in normalized + and "upgrade your plan" in normalized + ) + + FileContent = IO[bytes] | bytes | str FileTuple2 = tuple[str | None, FileContent] FileTuple3 = tuple[str | None, FileContent, str | None] @@ -836,11 +848,13 @@ def _handle_response(self, response: httpx.Response) -> Any: f"{getattr(request, 'method', 'UNKNOWN')} {getattr(request, 'url', '')}" ) if response.is_success: + payload = self._decode_json(response) + self._log_demo_restriction_messages(payload, request_label) if self._logger.isEnabledFor(logging.DEBUG): self._logger.debug( "Response %s status=%s", request_label, response.status_code ) - return self._decode_json(response) + return payload message, error_payload = self._extract_error_details(response) retry_after = _parse_retry_after_header(response.headers.get("Retry-After")) @@ -888,6 +902,30 @@ def _decode_json(self, response: httpx.Response) -> Any: response_content=response.text, ) from exc + def _log_demo_restriction_messages(self, payload: Any, request_label: str) -> None: + if not isinstance(payload, Mapping): + return + + typed_payload = cast(Mapping[str, Any], payload) + emitted_messages: set[str] = set() + for field_name in _DEMO_RESTRICTION_MESSAGE_FIELDS: + value = typed_payload.get(field_name) + if not isinstance(value, str): + continue + message = value.strip() + if not _is_demo_restriction_message(message): + continue + normalized_message = message.casefold() + if normalized_message in emitted_messages: + continue + emitted_messages.add(normalized_message) + self._logger.warning( + "Demo mode restriction message in response %s field=%s: %s", + request_label, + field_name, + message, + ) + @staticmethod def _extract_error_details( response: httpx.Response, diff --git a/tests/test_client.py b/tests/test_client.py index f7cc6c0..c0380c9 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -25,6 +25,11 @@ VALID_API_KEY = "12345678-1234-1234-1234-123456789abc" ANOTHER_VALID_API_KEY = "abcdefab-cdef-abcd-efab-cdefabcdef12" ASYNC_API_KEY = "fedcba98-7654-3210-fedc-ba9876543210" +DEMO_RESTRICTION_MESSAGE = ( + "Output has been watermarked or redacted. This API request was processed " + "with a free account. Visit https://pdfrest.com/pricing/ to upgrade your " + "plan and receive outputs without watermarks or redactions." +) def _build_up_response() -> dict[str, Any]: @@ -709,6 +714,107 @@ def handler(_: httpx.Request) -> httpx.Response: assert exc.value.response_content == "not-json" +def test_client_logs_demo_restriction_message_warning( + monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + monkeypatch.setenv("PDFREST_API_KEY", VALID_API_KEY) + caplog.set_level("WARNING", logger="pdfrest.client") + + def handler(_: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={**_build_up_response(), "message": DEMO_RESTRICTION_MESSAGE}, + ) + + transport = httpx.MockTransport(handler) + with PdfRestClient(transport=transport) as client: + _ = client.up() + + assert "Demo mode restriction message in response" in caplog.text + assert "field=message" in caplog.text + assert DEMO_RESTRICTION_MESSAGE in caplog.text + + +@pytest.mark.parametrize( + ("field_name", "body_value"), + [ + pytest.param("message", DEMO_RESTRICTION_MESSAGE, id="message"), + pytest.param("warning", DEMO_RESTRICTION_MESSAGE, id="warning"), + pytest.param("keyMessage", DEMO_RESTRICTION_MESSAGE, id="key-message"), + ], +) +def test_client_logs_demo_restriction_message_warning_all_fields( + monkeypatch: pytest.MonkeyPatch, + caplog: pytest.LogCaptureFixture, + field_name: str, + body_value: str, +) -> None: + monkeypatch.setenv("PDFREST_API_KEY", VALID_API_KEY) + caplog.set_level("WARNING", logger="pdfrest.client") + + def handler(_: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={**_build_up_response(), field_name: body_value}, + ) + + transport = httpx.MockTransport(handler) + with PdfRestClient(transport=transport) as client: + _ = client.up() + + assert "Demo mode restriction message in response" in caplog.text + assert f"field={field_name}" in caplog.text + assert DEMO_RESTRICTION_MESSAGE in caplog.text + + +def test_client_logs_demo_restriction_message_once_when_duplicated( + monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + monkeypatch.setenv("PDFREST_API_KEY", VALID_API_KEY) + caplog.set_level("WARNING", logger="pdfrest.client") + + def handler(_: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={ + **_build_up_response(), + "message": DEMO_RESTRICTION_MESSAGE, + "warning": DEMO_RESTRICTION_MESSAGE, + "keyMessage": DEMO_RESTRICTION_MESSAGE, + }, + ) + + transport = httpx.MockTransport(handler) + with PdfRestClient(transport=transport) as client: + _ = client.up() + + demo_logs = [ + record.message + for record in caplog.records + if "Demo mode restriction message in response" in record.message + ] + assert len(demo_logs) == 1 + + +def test_client_does_not_log_non_demo_key_message_warning( + monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + monkeypatch.setenv("PDFREST_API_KEY", VALID_API_KEY) + caplog.set_level("WARNING", logger="pdfrest.client") + + def handler(_: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={**_build_up_response(), "keyMessage": "This is a test key"}, + ) + + transport = httpx.MockTransport(handler) + with PdfRestClient(transport=transport) as client: + _ = client.up() + + assert "Demo mode restriction message in response" not in caplog.text + + @pytest.mark.asyncio async def test_async_client_raises_for_non_json_success_response( monkeypatch: pytest.MonkeyPatch, @@ -728,6 +834,61 @@ def handler(_: httpx.Request) -> httpx.Response: assert exc.value.response_content == "not-json" +@pytest.mark.asyncio +async def test_async_client_logs_demo_restriction_message_warning( + monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + monkeypatch.setenv("PDFREST_API_KEY", ASYNC_API_KEY) + caplog.set_level("WARNING", logger="pdfrest.client") + + def handler(_: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={**_build_up_response(), "message": DEMO_RESTRICTION_MESSAGE}, + ) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(transport=transport) as client: + _ = await client.up() + + assert "Demo mode restriction message in response" in caplog.text + assert "field=message" in caplog.text + assert DEMO_RESTRICTION_MESSAGE in caplog.text + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + ("field_name", "body_value"), + [ + pytest.param("message", DEMO_RESTRICTION_MESSAGE, id="message"), + pytest.param("warning", DEMO_RESTRICTION_MESSAGE, id="warning"), + pytest.param("keyMessage", DEMO_RESTRICTION_MESSAGE, id="key-message"), + ], +) +async def test_async_client_logs_demo_restriction_message_warning_all_fields( + monkeypatch: pytest.MonkeyPatch, + caplog: pytest.LogCaptureFixture, + field_name: str, + body_value: str, +) -> None: + monkeypatch.setenv("PDFREST_API_KEY", ASYNC_API_KEY) + caplog.set_level("WARNING", logger="pdfrest.client") + + def handler(_: httpx.Request) -> httpx.Response: + return httpx.Response( + 200, + json={**_build_up_response(), field_name: body_value}, + ) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(transport=transport) as client: + _ = await client.up() + + assert "Demo mode restriction message in response" in caplog.text + assert f"field={field_name}" in caplog.text + assert DEMO_RESTRICTION_MESSAGE in caplog.text + + def test_client_uses_text_for_non_json_error_payload( monkeypatch: pytest.MonkeyPatch, ) -> None: From 8cb1536bb49fb46f9943ac987ff263993d83ddcf Mon Sep 17 00:00:00 2001 From: "Kevin A. Mitchell" Date: Fri, 20 Feb 2026 11:24:30 -0600 Subject: [PATCH 5/7] client: Add demo fallback handling for 404 file-info lookups - Introduced `_is_demo_fallback_file_id` and `_build_demo_fallback_file` to detect and return placeholder metadata for missing demo fallback files. - Updated `file_info` and `async_file_info` methods to return fallback data when a 404 error is encountered with a demo fallback file ID. - Improved logging to notify users when fallback data is being returned. tests: Add coverage for demo fallback file handling - Added tests to validate sync and async handling of demo fallback files. - Verified proper logging when returning placeholder metadata for 404 cases. Assisted-by: Codex --- src/pdfrest/client.py | 46 +++++++++++++++++++- tests/test_unzip_file.py | 91 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+), 2 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index ae2365f..7c66e19 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -193,6 +193,11 @@ RETRYABLE_STATUS_CODES = {408, 425, 429, 499} _SUCCESSFUL_DELETION_MESSAGE = "successfully deleted" _DEMO_RESTRICTION_MESSAGE_FIELDS = ("message", "warning", "keyMessage") +_DEMO_FALLBACK_FILE_ID = "00000000-0000-4000-8000-000000000000" +_DEMO_FALLBACK_FILE_URL = "https://pdfrest.com/demo-redacted" +_DEMO_FALLBACK_MIME_TYPE = "application/octet-stream" +_DEMO_FALLBACK_FILE_NAME = "demo-redacted.bin" +_DEMO_FALLBACK_FILE_SIZE = 1 HttpMethod = Literal["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"] @@ -320,6 +325,23 @@ def _extract_uploaded_file_ids(payload: Any) -> list[str]: return file_ids +def _is_demo_fallback_file_id(file_id: str) -> bool: + return file_id.strip().lower() == _DEMO_FALLBACK_FILE_ID + + +def _build_demo_fallback_file(file_id: str) -> PdfRestFile: + return PdfRestFile.model_validate( + { + "id": file_id, + "name": _DEMO_FALLBACK_FILE_NAME, + "url": _DEMO_FALLBACK_FILE_URL, + "type": _DEMO_FALLBACK_MIME_TYPE, + "size": _DEMO_FALLBACK_FILE_SIZE, + "modified": datetime.now(timezone.utc), + } + ) + + def _handle_deletion_failures(response: PdfRestDeletionResponse) -> None: failures: list[PdfRestDeleteError] = [] for file_id, result in response.deletion_responses.items(): @@ -1198,7 +1220,17 @@ def fetch_file_info( extra_headers=extra_headers, timeout=timeout, ) - payload = self._send_request(request) + try: + payload = self._send_request(request) + except PdfRestApiError as exc: + if exc.status_code == 404 and _is_demo_fallback_file_id(file_id): + self._logger.warning( + "Demo fallback file id %s was not found during file-info lookup; " + "returning placeholder metadata.", + file_id, + ) + return _build_demo_fallback_file(file_id) + raise return PdfRestFile.model_validate(payload) @@ -1473,7 +1505,17 @@ async def fetch_file_info( extra_headers=extra_headers, timeout=timeout, ) - payload = await self._send_request(request) + try: + payload = await self._send_request(request) + except PdfRestApiError as exc: + if exc.status_code == 404 and _is_demo_fallback_file_id(file_id): + self._logger.warning( + "Demo fallback file id %s was not found during file-info lookup; " + "returning placeholder metadata.", + file_id, + ) + return _build_demo_fallback_file(file_id) + raise return PdfRestFile.model_validate(payload) diff --git a/tests/test_unzip_file.py b/tests/test_unzip_file.py index 573eda8..ca5969f 100644 --- a/tests/test_unzip_file.py +++ b/tests/test_unzip_file.py @@ -357,3 +357,94 @@ def handler(request: httpx.Request) -> httpx.Response: "Demo value XXXXXXXXX-XXXXXXXXX-XXXX-XXXXXXXXXXXX detected in id; " "replaced with 00000000-0000-4000-8000-000000000000" in caplog.text ) + + +def test_unzip_file_demo_fallback_file_info_on_404( + monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + caplog.set_level(logging.WARNING, logger="pdfrest.client") + zip_file = make_zip_file(str(PdfRestFileID.generate())) + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/unzip": + return httpx.Response( + 200, + json={ + "inputId": [zip_file.id], + "files": [ + { + "name": "inner.txt", + "id": DEMO_REPLACEMENT_ID, + "outputUrl": None, + } + ], + }, + ) + if ( + request.method == "GET" + and request.url.path == f"/resource/{DEMO_REPLACEMENT_ID}" + ): + return httpx.Response(404, json={"error": "The file does not exist."}) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.unzip_file(zip_file) + + assert response.output_file.id == DEMO_REPLACEMENT_ID + assert response.output_file.name == "demo-redacted.bin" + assert str(response.output_file.url) == "https://pdfrest.com/demo-redacted" + assert response.output_file.type == "application/octet-stream" + assert response.output_file.size == 1 + assert ( + "Demo fallback file id 00000000-0000-4000-8000-000000000000 was not found " + "during file-info lookup; returning placeholder metadata." in caplog.text + ) + + +@pytest.mark.asyncio +async def test_async_unzip_file_demo_fallback_file_info_on_404( + monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + caplog.set_level(logging.WARNING, logger="pdfrest.client") + zip_file = make_zip_file(str(PdfRestFileID.generate())) + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/unzip": + return httpx.Response( + 200, + json={ + "inputId": [zip_file.id], + "files": [ + { + "name": "inner-async.txt", + "id": DEMO_REPLACEMENT_ID, + "outputUrl": None, + } + ], + }, + ) + if ( + request.method == "GET" + and request.url.path == f"/resource/{DEMO_REPLACEMENT_ID}" + ): + return httpx.Response(404, json={"error": "The file does not exist."}) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.unzip_file(zip_file) + + assert response.output_file.id == DEMO_REPLACEMENT_ID + assert response.output_file.name == "demo-redacted.bin" + assert str(response.output_file.url) == "https://pdfrest.com/demo-redacted" + assert response.output_file.type == "application/octet-stream" + assert response.output_file.size == 1 + assert ( + "Demo fallback file id 00000000-0000-4000-8000-000000000000 was not found " + "during file-info lookup; returning placeholder metadata." in caplog.text + ) From da22e3e463f0b35704679df67d00ae2468ce55a8 Mon Sep 17 00:00:00 2001 From: "Kevin A. Mitchell" Date: Fri, 20 Feb 2026 11:54:42 -0600 Subject: [PATCH 6/7] models: Clamp demo redacted values to parseable constants - Updated `_demo_value_sanitizers.py` to enforce parseable-but-useless replacements for demo-redacted boolean-like strings to maintain operability while preserving data obscurity. - Added comments clarifying the intent of replacements in demo mode. - Updated AGENTS.md to document the redaction approach for demo/free-tier values. Assisted-by: Codex --- AGENTS.md | 4 ++++ src/pdfrest/models/_demo_value_sanitizers.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index cf46ec1..47b632d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -173,6 +173,10 @@ pdfRest wire quirks (for example, splitting comma-separated values or serializing only the first uploaded file ID), not re-implement constraint logic already expressed by Pydantic field types/annotations. +- For demo/free-tier redactions, favor parseable-but-useless replacements over + reconstructing likely true values. The SDK should remain operable (no parsing + crashes) while preserving demo mode’s intent of withholding useful output + fidelity. - Prefer reusable validator factories that take parameters (for example allowed-value/extension helpers with keyword-configured fallbacks) over bespoke one-off validator functions tied to a single field. diff --git a/src/pdfrest/models/_demo_value_sanitizers.py b/src/pdfrest/models/_demo_value_sanitizers.py index 56e3aeb..06db8ee 100644 --- a/src/pdfrest/models/_demo_value_sanitizers.py +++ b/src/pdfrest/models/_demo_value_sanitizers.py @@ -53,6 +53,9 @@ def _demo_bool_or_passthrough( if value is None or isinstance(value, bool): return value if _looks_like_demo_redaction(value): + # Intentionally clamp demo-redacted bool-like strings to a configured + # constant. The goal is parseability without restoring potentially + # meaningful signal that demo mode is designed to obscure. _log_replacement(value, replacement, info) return replacement return value From 3a5b42e07e434a218d140589bb4da072ce95bc68 Mon Sep 17 00:00:00 2001 From: "Kevin A. Mitchell" Date: Fri, 20 Feb 2026 12:24:34 -0600 Subject: [PATCH 7/7] pyproject: Update to 1.0.1 for demo bug fixes --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a022a92..423355e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pdfrest" -version = "1.0.0" +version = "1.0.1" description = "Python client library for interacting with the PDFRest API" readme = "README.md" authors = [ diff --git a/uv.lock b/uv.lock index 4e6372b..e889725 100644 --- a/uv.lock +++ b/uv.lock @@ -961,7 +961,7 @@ wheels = [ [[package]] name = "pdfrest" -version = "1.0.0" +version = "1.0.1" source = { editable = "." } dependencies = [ { name = "exceptiongroup" },