Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,10 @@
pdfRest wire quirks (for example, splitting comma-separated values or
serializing only the first uploaded file ID), not re-implement constraint
logic already expressed by Pydantic field types/annotations.
- For demo/free-tier redactions, favor parseable-but-useless replacements over
reconstructing likely true values. The SDK should remain operable (no parsing
crashes) while preserving demo mode’s intent of withholding useful output
fidelity.
- Prefer reusable validator factories that take parameters (for example
allowed-value/extension helpers with keyword-configured fallbacks) over
bespoke one-off validator functions tied to a single field.
Expand Down
39 changes: 39 additions & 0 deletions docs/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,45 @@ For the official Cloud onboarding flow, see:
interactively and generating starter code samples before integrating them
into your project.

### Demo keys and redacted values

If you are using a demo/free-tier key, some API responses may include redacted
values (for example `fa***`, `tr**`, masked strings, or placeholder IDs).

To keep response models parseable, the SDK replaces certain known demo-redacted
values in a few response fields:

- `PdfRestInfoResponse` boolean fields:
`tagged`, `image_only`, `contains_annotations`, `contains_signature`,
`restrict_permissions_set`, `contains_xfa`, `contains_acroforms`,
`contains_javascript`, `contains_transparency`, `contains_embedded_file`,
`uses_embedded_fonts`, `uses_nonembedded_fonts`, `pdfa`, `pdfua_claim`,
`pdfe_claim`, `pdfx_claim`, `requires_password_to_open`
- `PdfRestInfoResponse.file_size` -> replaced with `0` when redacted
- `PdfRestInfoResponse.all_queries_processed` -> replaced with `True` when redacted
- unzip response file IDs are sanitized before file-info lookup, so
`PdfRestFileBasedResponse.output_file.id` may be the null UUID
`00000000-0000-4000-8000-000000000000` when demo IDs are redacted

When a replacement happens, the SDK logs a warning in this format:

`Demo value <val> detected in <field-name>; replaced with <replacement>`

When the API returns a demo restriction body message (for example the free-tier
"watermarked or redacted" notice in `message`), the SDK also logs:

`Demo mode restriction message in response <METHOD URL> field=<field>: <message>`

To see these warnings in your app, configure Python logging (example):

```python
import logging

logging.basicConfig(level=logging.WARNING)
logging.getLogger("pdfrest.models").setLevel(logging.WARNING)
logging.getLogger("pdfrest.client").setLevel(logging.WARNING)
```

## 3. Add a short example program

Create `quickstart.py`:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "pdfrest"
version = "1.0.0"
version = "1.0.1"
description = "Python client library for interacting with the PDFRest API"
readme = "README.md"
authors = [
Expand Down
86 changes: 83 additions & 3 deletions src/pdfrest/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,12 @@
BACKOFF_JITTER_SECONDS = 0.1
RETRYABLE_STATUS_CODES = {408, 425, 429, 499}
_SUCCESSFUL_DELETION_MESSAGE = "successfully deleted"
_DEMO_RESTRICTION_MESSAGE_FIELDS = ("message", "warning", "keyMessage")
_DEMO_FALLBACK_FILE_ID = "00000000-0000-4000-8000-000000000000"
_DEMO_FALLBACK_FILE_URL = "https://pdfrest.com/demo-redacted"
_DEMO_FALLBACK_MIME_TYPE = "application/octet-stream"
_DEMO_FALLBACK_FILE_NAME = "demo-redacted.bin"
_DEMO_FALLBACK_FILE_SIZE = 1


HttpMethod = Literal["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"]
Expand Down Expand Up @@ -263,6 +269,17 @@ def _parse_retry_after_header(header_value: str | None) -> float | None:
return seconds if seconds > 0 else 0.0


def _is_demo_restriction_message(value: str) -> bool:
normalized = value.strip().casefold()
if not normalized:
return False
return (
"watermarked or redacted" in normalized
and "free account" in normalized
and "upgrade your plan" in normalized
)


FileContent = IO[bytes] | bytes | str
FileTuple2 = tuple[str | None, FileContent]
FileTuple3 = tuple[str | None, FileContent, str | None]
Expand Down Expand Up @@ -308,6 +325,23 @@ def _extract_uploaded_file_ids(payload: Any) -> list[str]:
return file_ids


def _is_demo_fallback_file_id(file_id: str) -> bool:
return file_id.strip().lower() == _DEMO_FALLBACK_FILE_ID


def _build_demo_fallback_file(file_id: str) -> PdfRestFile:
return PdfRestFile.model_validate(
{
"id": file_id,
"name": _DEMO_FALLBACK_FILE_NAME,
"url": _DEMO_FALLBACK_FILE_URL,
"type": _DEMO_FALLBACK_MIME_TYPE,
"size": _DEMO_FALLBACK_FILE_SIZE,
"modified": datetime.now(timezone.utc),
}
)


def _handle_deletion_failures(response: PdfRestDeletionResponse) -> None:
failures: list[PdfRestDeleteError] = []
for file_id, result in response.deletion_responses.items():
Expand Down Expand Up @@ -836,11 +870,13 @@ def _handle_response(self, response: httpx.Response) -> Any:
f"{getattr(request, 'method', 'UNKNOWN')} {getattr(request, 'url', '')}"
)
if response.is_success:
payload = self._decode_json(response)
self._log_demo_restriction_messages(payload, request_label)
if self._logger.isEnabledFor(logging.DEBUG):
self._logger.debug(
"Response %s status=%s", request_label, response.status_code
)
return self._decode_json(response)
return payload

message, error_payload = self._extract_error_details(response)
retry_after = _parse_retry_after_header(response.headers.get("Retry-After"))
Expand Down Expand Up @@ -888,6 +924,30 @@ def _decode_json(self, response: httpx.Response) -> Any:
response_content=response.text,
) from exc

def _log_demo_restriction_messages(self, payload: Any, request_label: str) -> None:
if not isinstance(payload, Mapping):
return

typed_payload = cast(Mapping[str, Any], payload)
emitted_messages: set[str] = set()
for field_name in _DEMO_RESTRICTION_MESSAGE_FIELDS:
value = typed_payload.get(field_name)
if not isinstance(value, str):
continue
message = value.strip()
if not _is_demo_restriction_message(message):
continue
normalized_message = message.casefold()
if normalized_message in emitted_messages:
continue
emitted_messages.add(normalized_message)
self._logger.warning(
"Demo mode restriction message in response %s field=%s: %s",
request_label,
field_name,
message,
)

@staticmethod
def _extract_error_details(
response: httpx.Response,
Expand Down Expand Up @@ -1160,7 +1220,17 @@ def fetch_file_info(
extra_headers=extra_headers,
timeout=timeout,
)
payload = self._send_request(request)
try:
payload = self._send_request(request)
except PdfRestApiError as exc:
if exc.status_code == 404 and _is_demo_fallback_file_id(file_id):
self._logger.warning(
"Demo fallback file id %s was not found during file-info lookup; "
"returning placeholder metadata.",
file_id,
)
return _build_demo_fallback_file(file_id)
raise
return PdfRestFile.model_validate(payload)


Expand Down Expand Up @@ -1435,7 +1505,17 @@ async def fetch_file_info(
extra_headers=extra_headers,
timeout=timeout,
)
payload = await self._send_request(request)
try:
payload = await self._send_request(request)
except PdfRestApiError as exc:
if exc.status_code == 404 and _is_demo_fallback_file_id(file_id):
self._logger.warning(
"Demo fallback file id %s was not found during file-info lookup; "
"returning placeholder metadata.",
file_id,
)
return _build_demo_fallback_file(file_id)
raise
return PdfRestFile.model_validate(payload)


Expand Down
89 changes: 89 additions & 0 deletions src/pdfrest/models/_demo_value_sanitizers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from __future__ import annotations

import logging
import re
from typing import Any

from pydantic import ValidationInfo

LOGGER = logging.getLogger("pdfrest.models")

_DEMO_UUID = "00000000-0000-4000-8000-000000000000"
_REDACTED_X_PATTERN = re.compile(r"^[Xx-]{8,}$")


def _field_name(info: ValidationInfo) -> str:
return info.field_name or "<unknown>"


def _looks_like_demo_redaction(value: Any) -> bool:
if not isinstance(value, str):
return False
if _looks_like_generate_redacted_string(value):
return True
return bool(_REDACTED_X_PATTERN.fullmatch(value))


def _looks_like_generate_redacted_string(value: str) -> bool:
"""Detect strings redacted by PDFCloud-API generateRedactedString.

The upstream redactor preserves the first two characters and replaces all
non-whitespace characters after that with '*'.
"""
if len(value) < 3:
return False
tail = value[2:]
if "*" not in tail:
return False
return all(char == "*" or char.isspace() for char in tail)


def _log_replacement(original: Any, replacement: Any, info: ValidationInfo) -> None:
LOGGER.warning(
"Demo value %s detected in %s; replaced with %s",
original,
_field_name(info),
replacement,
)


def _demo_bool_or_passthrough(
value: Any, info: ValidationInfo, *, replacement: bool
) -> Any:
if value is None or isinstance(value, bool):
return value
if _looks_like_demo_redaction(value):
# Intentionally clamp demo-redacted bool-like strings to a configured
# constant. The goal is parseability without restoring potentially
# meaningful signal that demo mode is designed to obscure.
_log_replacement(value, replacement, info)
return replacement
return value


def demo_bool_false_or_passthrough(value: Any, info: ValidationInfo) -> Any:
return _demo_bool_or_passthrough(value, info, replacement=False)
Comment thread
datalogics-kam marked this conversation as resolved.


def demo_bool_true_or_passthrough(value: Any, info: ValidationInfo) -> Any:
return _demo_bool_or_passthrough(value, info, replacement=True)
Comment thread
datalogics-kam marked this conversation as resolved.


def demo_file_id_or_passthrough(value: Any, info: ValidationInfo) -> Any:
if value is None:
return value
if _looks_like_demo_redaction(value):
replacement = _DEMO_UUID
_log_replacement(value, replacement, info)
return replacement
return value


def demo_int_or_passthrough(value: Any, info: ValidationInfo) -> Any:
if value is None or isinstance(value, int):
return value
if _looks_like_demo_redaction(value):
replacement = 0
_log_replacement(value, replacement, info)
return replacement
return value
7 changes: 6 additions & 1 deletion src/pdfrest/models/_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
WatermarkHorizontalAlignment,
WatermarkVerticalAlignment,
)
from ._demo_value_sanitizers import demo_file_id_or_passthrough
from .public import PdfRestFile, PdfRestFileID

PdfConvertColorProfile = PdfPresetColorProfile | Literal["custom"]
Expand Down Expand Up @@ -2584,7 +2585,11 @@ class PdfRestRawUploadedFile(BaseModel):
"""

name: Annotated[str, Field(description="The name of the file")]
id: Annotated[PdfRestFileID, Field(description="The id of the file")]
id: Annotated[
PdfRestFileID,
BeforeValidator(demo_file_id_or_passthrough),
Field(description="The id of the file"),
]
output_url: Annotated[
list[HttpUrl] | HttpUrl | None,
Field(description="The url of the unzipped file", alias="outputUrl"),
Expand Down
Loading