diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index b108882..7a71690 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,3 +1,14 @@ +Repository-local references: + +- [AGENTS.md](../AGENTS.md) +- [docs/documentation-maintenance-checklist.md](../docs/documentation-maintenance-checklist.md) +- [docs/repo-map.md](../docs/repo-map.md) +- [docs/domain-model.md](../docs/domain-model.md) +- [docs/invariants.md](../docs/invariants.md) +- [src/excelalchemy/README.md](../src/excelalchemy/README.md) +- [tests/README.md](../tests/README.md) +- [examples/README.md](../examples/README.md) + ## Summary - Describe the user-facing or engineering goal of this change. @@ -15,6 +26,7 @@ ## Checklist -- [ ] I updated documentation when behavior or workflows changed. +- [ ] I updated documentation when behavior, workflows, or design changed. +- [ ] I updated the relevant repository-local knowledge docs listed above, following `docs/documentation-maintenance-checklist.md`. - [ ] I did not include generated files or local-only artifacts. - [ ] I confirmed this change does not require additional release steps. diff --git a/AGENTS.md b/AGENTS.md index 5a5f014..112065b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,142 +1,264 @@ # AGENTS.md -## Repository purpose - -- ExcelAlchemy is a schema-driven Python library for typed Excel import/export workflows built around Pydantic models. -- The main user flows in this repo are: template generation, workbook import validation, result workbook rendering, storage-backed upload/download, and backend/API integration. - -## Core architecture overview - -- Public facade: `excelalchemy.ExcelAlchemy`. -- Public config objects: `ImporterConfig`, `ExporterConfig`, `ImportMode`. -- Public schema metadata entry points: `FieldMeta(...)` and `Annotated[..., ExcelMeta(...)]`. -- Main internal collaborators live under `src/excelalchemy/core/`: - - schema/layout - - header parsing/validation - - row aggregation - - import execution - - workbook rendering - - storage gateway resolution -- Public import result surface: `ImportResult`, `CellErrorMap`, `RowIssueMap`. -- Storage is modeled as the `ExcelStorage` protocol. -- Current 2.x reality: custom storage adapters return `WorksheetTable`; examples import it from `excelalchemy.core.table`. - -## Important directories - -- `src/excelalchemy/`: package source. -- `src/excelalchemy/core/`: internal orchestration and workflow components. -- `src/excelalchemy/codecs/`: built-in field codecs and codec base classes. -- `src/excelalchemy/metadata.py`, `config.py`, `results.py`, `exceptions.py`: stable public modules. -- `src/excelalchemy/_primitives/`: internal helpers, constants, payloads, deprecation utilities. -- `src/excelalchemy/types/`, `exc.py`, `identity.py`, `header_models.py`, `const.py`: compatibility/deprecation layer retained in 2.x. -- `tests/contracts/`: public behavior and contract tests. -- `tests/integration/`: workflow and example integration tests. -- `tests/unit/`: focused unit coverage. -- `tests/support/`: test fixtures, helpers, and in-memory storage/test models. -- `examples/`: runnable examples used as user-facing reference material. -- `examples/fastapi_reference/`: copyable FastAPI-oriented reference project. -- `docs/`: usage, architecture, integration, locale, and release documentation. -- `docs/releases/`: release notes/checklists. -- `scripts/`: smoke checks and generated-doc/example asset helpers. -- `files/example-outputs/`: generated outputs referenced by docs and smoke tests. -- `.github/workflows/`: CI and PyPI publish workflows. - -## Public API boundaries - -- Prefer imports from `excelalchemy` package root for common public types. -- Additional stable public modules: +## Related docs + +- [README.md](README.md) for the user-facing overview and supported workflows. +- [docs/repo-map.md](docs/repo-map.md) for directory-level navigation. +- [docs/domain-model.md](docs/domain-model.md) for the library's main concepts and relationships. +- [docs/invariants.md](docs/invariants.md) for behavior that should not drift accidentally. +- [src/excelalchemy/README.md](src/excelalchemy/README.md) for implementation-oriented package structure. +- [tests/README.md](tests/README.md) for where behavior is protected. +- [examples/README.md](examples/README.md) for how examples should be interpreted. +- [plans/README.md](plans/README.md), [tech_debt/README.md](tech_debt/README.md), and [adr/README.md](adr/README.md) for execution planning, debt tracking, and architecture records. + +## 1. Repository goal + +- `ExcelAlchemy` is a schema-driven Python library for typed Excel import/export workflows built around Pydantic models. +- The repository centers on these user-facing flows: + - template generation + - workbook import validation + - result workbook rendering + - storage-backed upload/download + - backend/API integration + +## 2. Core domains and major components + +- Package source: `src/excelalchemy/` +- Public facade and public surface: + - `src/excelalchemy/__init__.py` + - `src/excelalchemy/config.py` + - `src/excelalchemy/metadata.py` + - `src/excelalchemy/results.py` + - `src/excelalchemy/exceptions.py` + - `src/excelalchemy/artifacts.py` +- Internal workflow orchestration: + - `src/excelalchemy/core/alchemy.py` + - `src/excelalchemy/core/import_session.py` + - `src/excelalchemy/core/schema.py` + - `src/excelalchemy/core/headers.py` + - `src/excelalchemy/core/rows.py` + - `src/excelalchemy/core/executor.py` + - `src/excelalchemy/core/rendering.py` + - `src/excelalchemy/core/writer.py` + - `src/excelalchemy/core/storage.py` + - `src/excelalchemy/core/storage_protocol.py` + - `src/excelalchemy/core/storage_minio.py` + - `src/excelalchemy/core/table.py` +- Field semantics and type adapters: + - `src/excelalchemy/codecs/` +- Pydantic integration boundary: + - `src/excelalchemy/helper/pydantic.py` +- Locale and messages: + - `src/excelalchemy/i18n/messages.py` +- Internal primitives and compatibility helpers: + - `src/excelalchemy/_primitives/` + - `src/excelalchemy/types/` + - `src/excelalchemy/exc.py` + - `src/excelalchemy/identity.py` + - `src/excelalchemy/header_models.py` + - `src/excelalchemy/const.py` + - `src/excelalchemy/util/convertor.py` + +## 3. Main entry points + +- Main user entry point: + - `excelalchemy.ExcelAlchemy` +- Main config entry points: + - `excelalchemy.ImporterConfig` + - `excelalchemy.ExporterConfig` + - `excelalchemy.ImportMode` +- Main schema metadata entry points: + - `excelalchemy.FieldMeta(...)` + - `excelalchemy.ExcelMeta(...)` +- Main result entry points: + - `excelalchemy.ImportResult` + - `excelalchemy.CellErrorMap` + - `excelalchemy.RowIssueMap` +- Main storage extension point: + - `excelalchemy.ExcelStorage` +- Main runnable references: + - `examples/README.md` + - `examples/employee_import_workflow.py` + - `examples/export_workflow.py` + - `examples/custom_storage.py` + - `examples/fastapi_reference/README.md` + +## 4. Public API vs internal implementation + +- Prefer these stable public modules in new code and docs: + - `excelalchemy` - `excelalchemy.config` - `excelalchemy.metadata` - `excelalchemy.results` - `excelalchemy.exceptions` - `excelalchemy.codecs` -- Treat these as internal implementation details unless a task is explicitly about internals: +- Treat these as internal implementation details unless the task is explicitly about internals: - `excelalchemy.core.*` - `excelalchemy.helper.*` - `excelalchemy.i18n.*` - `excelalchemy._primitives.*` -- 2.x compatibility-only imports: +- Treat these as 2.x compatibility-only imports: - `excelalchemy.exc` - `excelalchemy.identity` - `excelalchemy.header_models` - `excelalchemy.types.*` - `excelalchemy.util.convertor` -- In new code/docs, prefer: - - `storage=...` over legacy `minio=...`, `bucket_name=...`, `url_expires=...` - - `worksheet_table`, `header_table`, `cell_error_map`, `row_error_map` over old aliases like `df`, `header_df`, `cell_errors`, `row_errors` - -## Coding style and naming conventions - -- Python support target is `>=3.12`; CI runs `3.12`, `3.13`, and `3.14`. -- Use `uv` for setup, test, lint, type-check, build, and smoke commands. -- Ruff is the formatter/linter. Configured line length is `120`. -- Pyright is used for static typing; many source files are listed as strict. -- The codebase uses modern typing syntax consistent with Python 3.12+. -- Tests are behavior-oriented; prefer contract coverage for public behavior over implementation-only assertions. -- Docs and examples are practical and backend-oriented; avoid marketing language and unsupported claims. - -## Testing commands - -- `uv run pytest --cov=excelalchemy --cov-report=term-missing:skip-covered tests` -- `uv run python scripts/smoke_package.py` -- `uv run python scripts/smoke_examples.py` -- `uv run python scripts/smoke_docs_assets.py` -- `uv run python scripts/smoke_api_payload_snapshot.py` - -## Lint and type-check commands - -- `uv run ruff format --check .` -- `uv run ruff check .` -- `uv run pyright` - -## Documentation update expectations - -- Update docs when public behavior, examples, payload shapes, migration guidance, or compatibility guidance changes. -- Keep `README.md`, `README-pypi.md`, `docs/`, and `examples/` aligned when the onboarding story changes. -- If example output changes intentionally, regenerate captured assets: - - `uv run python scripts/generate_example_output_assets.py` -- Keep docs smoke checks passing after doc/example changes. -- If import failure payload shape changes intentionally, update the generated snapshot in `files/example-outputs/` and keep `scripts/smoke_api_payload_snapshot.py` passing. - -## Rules for making changes - -- Preserve stable public API modules unless the task explicitly includes a deprecation/migration change. -- Add or update tests near the affected behavior: - - `tests/contracts/` for public contracts - - `tests/integration/` for workflows/examples - - `tests/unit/` for focused logic -- Prefer the helper constructors already used across docs/examples: +- Current 2.x storage seam: + - `ExcelStorage` is public + - custom `read_excel_table(...)` implementations currently return `WorksheetTable` from `src/excelalchemy/core/table.py` + - treat that as a narrow extension seam, not as a general reason to import `excelalchemy.core.*` in application code +- In new code and docs, prefer: + - `storage=...` over `minio=...`, `bucket_name=...`, `url_expires=...` + - `worksheet_table` over `df` + - `header_table` over `header_df` + - `cell_error_map` over `cell_errors` + - `row_error_map` over `row_errors` + +## 5. Important constraints and invariants + +- Stable public behavior is protected primarily by: + - `tests/contracts/` + - `tests/integration/` +- Examples are part of the user-facing contract: + - `tests/integration/test_examples_smoke.py` + - `scripts/smoke_examples.py` +- Result payloads and API-facing shapes are treated as stable 2.x surfaces: + - `src/excelalchemy/results.py` + - `docs/result-objects.md` + - `docs/api-response-cookbook.md` + - `scripts/smoke_api_payload_snapshot.py` + - `files/example-outputs/import-failure-api-payload.json` +- Storage is a protocol boundary, not a Minio-only architecture: + - `src/excelalchemy/core/storage_protocol.py` + - `src/excelalchemy/core/storage.py` + - `src/excelalchemy/core/storage_minio.py` +- The repo has explicitly moved away from pandas-first internals: + - runtime workbook tables use `WorksheetTable` + - see `src/excelalchemy/core/table.py` +- Locale behavior is explicit: + - workbook-facing display locale supports `zh-CN` and `en` + - runtime exceptions/messages are English-first in 2.x + - see `docs/locale.md` +- Formula handling is server-side and `openpyxl`-based: + - the library reads stored workbook values and does not run Excel + - see `docs/limitations.md` +- Backward compatibility is active in 2.x: + - deprecated modules and aliases still exist + - legacy Minio config still works but emits deprecation warnings + - see `MIGRATIONS.md`, `docs/public-api.md`, and `tests/unit/test_deprecation_policy.py` + +## 6. Safe modification areas + +- Documentation and cross-links: + - `README.md` + - `README-pypi.md` + - `docs/*.md` + - `examples/README.md` + - `examples/fastapi_reference/README.md` +- Example scripts and reference app when the public usage story changes: + - `examples/*.py` + - `examples/fastapi_reference/*` +- Tests and fixtures that clarify intended behavior: + - `tests/contracts/` + - `tests/integration/` + - `tests/unit/` + - `tests/support/` +- Isolated field-type behavior when the change is codec-specific: + - one module under `src/excelalchemy/codecs/` + - matching tests under `tests/unit/codecs/` + +## 7. Areas requiring extra caution + +- Package root exports: + - `src/excelalchemy/__init__.py` + - changes here affect public imports directly +- Public config, metadata, results, and exceptions: + - `src/excelalchemy/config.py` + - `src/excelalchemy/metadata.py` + - `src/excelalchemy/results.py` + - `src/excelalchemy/exceptions.py` +- Core orchestration: + - `src/excelalchemy/core/alchemy.py` + - `src/excelalchemy/core/import_session.py` + - `src/excelalchemy/core/schema.py` + - `src/excelalchemy/core/headers.py` + - `src/excelalchemy/core/rows.py` + - `src/excelalchemy/core/executor.py` +- Storage boundary and compatibility behavior: + - `src/excelalchemy/core/storage.py` + - `src/excelalchemy/core/storage_protocol.py` + - `src/excelalchemy/core/storage_minio.py` + - `tests/contracts/test_storage_contract.py` + - `tests/unit/test_config_options.py` +- Compatibility shims and deprecation policy: + - `src/excelalchemy/types/` + - `src/excelalchemy/exc.py` + - `src/excelalchemy/identity.py` + - `src/excelalchemy/header_models.py` + - `src/excelalchemy/util/convertor.py` + - `tests/unit/test_deprecation_policy.py` +- Locale and message wording: + - `src/excelalchemy/i18n/messages.py` + - `docs/locale.md` +- Generated outputs and docs smoke dependencies: + - `files/example-outputs/` + - `scripts/generate_example_output_assets.py` + - `scripts/smoke_docs_assets.py` + - `scripts/smoke_api_payload_snapshot.py` + +## 8. Preferred workflow for making changes + +- Start from the repo docs that define the area you are changing: + - `README.md` + - `docs/architecture.md` + - `docs/public-api.md` + - `docs/result-objects.md` + - `docs/api-response-cookbook.md` + - `docs/locale.md` + - `docs/limitations.md` +- Read the matching contract and integration tests before changing behavior: + - `tests/contracts/` + - `tests/integration/` +- Prefer the recommended 2.x constructors and API shapes already used in examples: - `ImporterConfig.for_create` - `ImporterConfig.for_update` - `ImporterConfig.for_create_or_update` - `ExporterConfig.for_storage` -- Keep examples runnable from the repo root. -- Keep locale behavior explicit: - - workbook-facing display locale supports `zh-CN` and `en` - - docs say runtime exceptions/messages are English-first in 2.x -- When touching compatibility behavior, also check `MIGRATIONS.md`, `docs/public-api.md`, and deprecation tests. +- Keep new code on stable public imports unless the task is explicitly internal. +- When behavior changes, update: + - source + - the nearest tests + - the docs/examples that teach that behavior + - generated assets or smoke expectations if needed +- Validate with the repo’s normal commands: + - `uv run ruff format --check .` + - `uv run ruff check .` + - `uv run pyright` + - `uv run pytest --cov=excelalchemy --cov-report=term-missing:skip-covered tests` + - `uv run python scripts/smoke_package.py` + - `uv run python scripts/smoke_examples.py` + - `uv run python scripts/smoke_docs_assets.py` + - `uv run python scripts/smoke_api_payload_snapshot.py` -## Things to avoid +## 9. Documentation that must be updated when behavior changes -- Do not present internal modules as stable application-facing API in docs/examples unless the current repo requires it and the limitation is stated clearly. -- Do not reintroduce pandas-first workflows; the repo explicitly moved to `openpyxl + WorksheetTable`. -- Do not hard-wire Minio into core architecture changes; storage is a protocol boundary. -- Do not remove compatibility imports or old facade aliases casually; 2.x still tests and documents them. -- Do not invent new docs-site or release workflows; this repo currently uses Markdown files plus smoke scripts and GitHub workflows. +- Update `README.md` and `README-pypi.md` when the onboarding story, examples, or recommended API shape changes. +- Update `docs/public-api.md` when stable vs internal vs compatibility boundaries change. +- Update `MIGRATIONS.md` when migration guidance, deprecated paths, or recommended replacements change. +- Update `docs/architecture.md` when component responsibilities or workflow boundaries change. +- Update `docs/result-objects.md` and `docs/api-response-cookbook.md` when result objects or payload shapes change. +- Update `docs/locale.md` when locale behavior or message policy changes. +- Update `docs/limitations.md` or `docs/performance.md` when operational constraints or runtime expectations change. +- Update `examples/README.md` and `examples/fastapi_reference/README.md` when examples or reference layouts change. +- Regenerate `files/example-outputs/` with `scripts/generate_example_output_assets.py` when captured example output changes intentionally. +- Keep `scripts/smoke_docs_assets.py` and `scripts/smoke_api_payload_snapshot.py` passing after doc or payload changes. -## Examples and backward compatibility +## 10. Prohibited or discouraged changes -- Examples are part of the user-facing contract; `tests/integration/test_examples_smoke.py` and `scripts/smoke_examples.py` exercise them. -- `examples/fastapi_reference/` is the most complete backend integration reference in the repo. -- Keep example response shapes aligned with: - - `docs/result-objects.md` - - `docs/api-response-cookbook.md` -- For new examples and docs, prefer the recommended 2.x path: - - public imports from stable modules - - explicit `storage=...` - - new facade inspection names -- Backward compatibility is active in 2.x: - - deprecated modules still exist - - legacy Minio config still works but emits deprecation warnings - - old import-inspection aliases still exist -- If a change touches backward-compatible behavior, check tests under `tests/unit/test_deprecation_policy.py`, `tests/unit/test_config_options.py`, and relevant contract/integration coverage. +- Do not present internal modules under `excelalchemy.core.*`, `excelalchemy.helper.*`, `excelalchemy.i18n.*`, or `excelalchemy._primitives.*` as stable application-facing API in docs or examples. +- Do not reintroduce pandas-first internals or docs; the repo explicitly uses `openpyxl + WorksheetTable`. +- Do not hard-wire Minio into the core architecture; storage is modeled as `ExcelStorage`. +- Do not remove compatibility imports, deprecated module paths, or old facade aliases casually; 2.x still documents and tests them. +- Do not switch new docs/examples back to legacy config names when `storage=...` is sufficient. +- Do not invent new documentation-site, release, or smoke-test workflows that are not already represented by `docs/`, `scripts/`, and `.github/workflows/`. +- Do not make claims in docs/examples that conflict with `docs/limitations.md`, `docs/locale.md`, or the tested result payloads. diff --git a/CHANGELOG.md b/CHANGELOG.md index e3e2ce3..9c21792 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,64 @@ All notable changes to this project will be documented in this file. The format is inspired by Keep a Changelog and versioned according to PEP 440. +## [2.3.0] - 2026-04-23 + +This release continues the stable 2.x line with a more complete import +workflow: clearer template guidance before upload, lightweight structural +preflight before execution, synchronous lifecycle visibility during import, and +compact remediation-oriented payloads after failures. + +### Added + +- Added additive template UX metadata support through `hint=` and + `example_value=` so generated header comments can provide clearer workbook + input guidance +- Added `ExcelAlchemy.preflight_import(...)` for lightweight structural import + validation before full import execution +- Added `ImportPreflightResult` and `ImportPreflightStatus` as stable public + preflight result types +- Added additive `on_event=` support on `ExcelAlchemy.import_data(...)` for + synchronous import lifecycle callbacks +- Added `build_frontend_remediation_payload(...)` for compact retry-oriented + remediation payloads alongside the existing import result surfaces +- Added a dedicated `WorksheetNotFoundError` exception so sheet-missing + preflight classification does not rely on backend-specific message parsing + +### Changed + +- Extended the import workflow so applications can combine template guidance, + preflight validation, lifecycle event observation, and remediation payloads + without replacing the existing full import API +- Kept `ExcelAlchemy.import_data(...)` as the full validation and execution + path while clarifying that `preflight_import(...)` is structural only +- Updated storage-backed workbook reading so preflight maps only explicit + worksheet-missing failures to `SHEET_MISSING` and re-raises unrelated + storage/runtime failures +- Refined template comment rendering for single-staff guidance formatting +- Expanded contract coverage for: + - preflight header validation + - missing and extra field handling + - row-count estimation + - import lifecycle event payloads + - remediation payload behavior + +### Documentation + +- Updated `README.md`, `README-pypi.md`, and onboarding docs to describe the + additive template guidance metadata +- Updated `docs/getting-started.md` with practical preflight usage guidance and + a `preflight -> import` workflow example +- Updated `docs/public-api.md` and `docs/result-objects.md` to document + `preflight_import(...)`, `ImportPreflightResult`, lifecycle callbacks, and + remediation payload helpers +- Updated `docs/architecture.md`, `docs/domain-model.md`, examples, and + reference-app guidance to reflect the broader import workflow story +- Added design plans under `plans/` for: + - template UX metadata v1 + - job-friendly import lifecycle events v1 + - import preflight v1 + - front-end remediation payload v1 + ## [2.2.8] - 2026-04-05 This release continues the stable 2.x line with a clearer integration reading diff --git a/README-pypi.md b/README-pypi.md index 127b498..4d8677b 100644 --- a/README-pypi.md +++ b/README-pypi.md @@ -10,9 +10,20 @@ ExcelAlchemy turns Pydantic models into typed workbook contracts: - render workbook-facing output in `zh-CN` or `en` - keep storage pluggable through `ExcelStorage` -The current stable release is `2.2.8`, which continues the 2.x line with a clearer integration roadmap, stronger import-failure payload smoke verification, and more direct install-time validation of the FastAPI reference app. +The current stable release is `2.3.0`, which continues the 2.x line with a +more complete import workflow: clearer template guidance before upload, +lightweight structural preflight before execution, synchronous lifecycle +visibility during import, and remediation-oriented payloads after failures. -[GitHub Repository](https://github.com/RayCarterLab/ExcelAlchemy) · [Full README](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/README.md) · [Getting Started](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/getting-started.md) · [Integration Roadmap](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/integration-roadmap.md) · [Result Objects](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/result-objects.md) · [API Response Cookbook](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/api-response-cookbook.md) · [Examples Showcase](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/examples-showcase.md) · [Architecture](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/architecture.md) · [Migration Notes](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/MIGRATIONS.md) +At the top level, that import workflow is: + +- template authoring +- preflight gate +- import runtime +- result intelligence +- artifact and delivery + +[GitHub Repository](https://github.com/RayCarterLab/ExcelAlchemy) · [Full README](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/README.md) · [Getting Started](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/getting-started.md) · [Integration Roadmap](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/integration-roadmap.md) · [Platform Architecture](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/platform-architecture.md) · [Runtime Model](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/runtime-model.md) · [Integration Blueprints](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/integration-blueprints.md) · [Result Objects](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/result-objects.md) · [API Response Cookbook](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/api-response-cookbook.md) · [Examples Showcase](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/examples-showcase.md) · [Architecture](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/architecture.md) · [Migration Notes](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/MIGRATIONS.md) ## Screenshots @@ -69,7 +80,12 @@ class Importer(BaseModel): email: Annotated[ Email, Field(min_length=10), - ExcelMeta(label='Email', order=1, hint='Use your work email'), + ExcelMeta( + label='Email', + order=1, + hint='Use your work email', + example_value='alice@company.com', + ), ] @@ -77,6 +93,10 @@ alchemy = ExcelAlchemy(ImporterConfig(Importer, locale='en')) template = alchemy.download_template_artifact(filename='people-template.xlsx') ``` +This template metadata is additive: it leaves the worksheet layout alone and +improves the generated header comment with both guidance text and a concrete +example value. + ## Example Outputs These fixed outputs are generated from the repository examples by @@ -154,6 +174,9 @@ for configured selection fields. ## Learn More - [Full project README](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/README.md) +- [Platform architecture](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/platform-architecture.md) +- [Runtime model](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/runtime-model.md) +- [Integration blueprints](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/integration-blueprints.md) - [Architecture notes](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/architecture.md) - [Locale policy](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/docs/locale.md) - [Migration notes](https://github.com/RayCarterLab/ExcelAlchemy/blob/main/MIGRATIONS.md) diff --git a/README.md b/README.md index d243b54..ce89034 100755 --- a/README.md +++ b/README.md @@ -6,7 +6,11 @@ ![Lint](https://img.shields.io/badge/lint-ruff-D7FF64) ![Typing](https://img.shields.io/badge/typing-pyright-2C6BED) -[中文 README](README_cn.md) · [About](ABOUT.md) · [Getting Started](docs/getting-started.md) · [Integration Roadmap](docs/integration-roadmap.md) · [Result Objects](docs/result-objects.md) · [API Response Cookbook](docs/api-response-cookbook.md) · [Architecture](docs/architecture.md) · [Examples Showcase](docs/examples-showcase.md) · [Public API](docs/public-api.md) · [Locale Policy](docs/locale.md) · [Limitations](docs/limitations.md) · [Performance](docs/performance.md) · [Changelog](CHANGELOG.md) · [Migration Notes](MIGRATIONS.md) +[中文 README](README_cn.md) · [About](ABOUT.md) · [Getting Started](docs/getting-started.md) · [Integration Roadmap](docs/integration-roadmap.md) · [Platform Architecture](docs/platform-architecture.md) · [Runtime Model](docs/runtime-model.md) · [Integration Blueprints](docs/integration-blueprints.md) · [Result Objects](docs/result-objects.md) · [API Response Cookbook](docs/api-response-cookbook.md) · [Architecture](docs/architecture.md) · [Examples Showcase](docs/examples-showcase.md) · [Public API](docs/public-api.md) · [Locale Policy](docs/locale.md) · [Limitations](docs/limitations.md) · [Performance](docs/performance.md) · [Changelog](CHANGELOG.md) · [Migration Notes](MIGRATIONS.md) + +Repository guides: [AGENTS.md](AGENTS.md) · [Repository Map](docs/repo-map.md) · [Domain Model](docs/domain-model.md) · [Invariants](docs/invariants.md) · [Package Guide](src/excelalchemy/README.md) · [Test Guide](tests/README.md) · [Examples Guide](examples/README.md) + +Engineering records: [Plans](plans/README.md) · [Technical Debt](tech_debt/README.md) · [ADRs](adr/README.md) ExcelAlchemy is a schema-driven Python library for Excel import and export workflows. It turns Pydantic models into typed workbook contracts: generate templates, validate uploads, map failures back to rows @@ -16,12 +20,26 @@ This repository is also a design artifact. It documents a series of deliberate engineering choices: `src/` layout, Pydantic v2 migration, pandas removal, pluggable storage, `uv`-based workflows, and locale-aware workbook output. -The current stable release is `2.2.8`, which continues the ExcelAlchemy 2.x line with a clearer integration roadmap, stronger import-failure payload smoke verification, and more direct install-time validation of the FastAPI reference app. +The current stable release is `2.3.0`, which continues the ExcelAlchemy 2.x +line with a more complete import workflow: clearer template guidance before +upload, lightweight structural preflight before execution, synchronous +lifecycle visibility during import, and remediation-oriented payloads after +failures. + +For the platform-layer architecture of that workflow, see: + +- [`docs/platform-architecture.md`](docs/platform-architecture.md) +- [`docs/runtime-model.md`](docs/runtime-model.md) +- [`docs/integration-blueprints.md`](docs/integration-blueprints.md) ## At a Glance - Build Excel templates directly from typed Pydantic schemas +- Guide users with workbook-facing input hints and examples +- Run lightweight structural preflight checks before full import - Validate uploaded workbooks and write failures back to rows and cells +- Observe import lifecycle progress through additive callbacks +- Build remediation-oriented payloads for retry workflows - Keep storage pluggable through `ExcelStorage` - Render workbook-facing text in `zh-CN` or `en` - Stay lightweight at runtime with `openpyxl` instead of pandas @@ -67,7 +85,12 @@ class Importer(BaseModel): email: Annotated[ Email, Field(min_length=10), - ExcelMeta(label='Email', order=1, hint='Use your work email'), + ExcelMeta( + label='Email', + order=1, + hint='Use your work email', + example_value='alice@company.com', + ), ] @@ -75,9 +98,113 @@ alchemy = ExcelAlchemy(ImporterConfig(Importer, locale='en')) template = alchemy.download_template_artifact(filename='people-template.xlsx') ``` +This template metadata is additive: it keeps the worksheet layout unchanged and +adds clearer header comments for spreadsheet users, such as a free-form hint +and a concrete example value. + For browser downloads, prefer `template.as_bytes()` with a `Blob`, or return the bytes from your backend with `Content-Disposition: attachment`. A top-level navigation to a long `data:` URL is less reliable in modern browsers. +## Import Workflow + +ExcelAlchemy is designed to work as a product-ready import layer rather than +only a row-validation helper. + +The top-level import workflow in the 2.x line is: + +- template authoring +- preflight gate +- import runtime +- result intelligence +- artifact and delivery + +In practical terms, that usually means: + +- generate a template with workbook-facing guidance +- run `preflight_import(...)` as a lightweight structural gate +- run `import_data(..., on_event=...)` for full validation and execution +- inspect `ImportResult`, `CellErrorMap`, and `RowIssueMap` +- build remediation-oriented payloads if the import fails +- deliver template or result workbook artifacts through the configured storage seam + +For the platform view and runtime sequence behind this workflow, see: + +- [`docs/platform-architecture.md`](docs/platform-architecture.md) +- [`docs/runtime-model.md`](docs/runtime-model.md) +- [`docs/integration-blueprints.md`](docs/integration-blueprints.md) + +Use `preflight_import(...)` when you want a fast answer to: + +- does the configured sheet exist +- do the workbook headers match the schema +- is the workbook structurally importable + +Use `import_data(...)` when you want the full workflow: + +- row validation +- create / update callback execution +- result workbook rendering +- structured row and cell failure output + +Short example: + +```python +from pydantic import BaseModel + +from excelalchemy import ExcelAlchemy, Email, FieldMeta, ImporterConfig, Number, String +from excelalchemy.results import build_frontend_remediation_payload + + +class EmployeeImporter(BaseModel): + full_name: String = FieldMeta(label='Full name', order=1, hint='Use the legal name') + age: Number = FieldMeta(label='Age', order=2) + work_email: Email = FieldMeta(label='Work email', order=3, example_value='alice@company.com') + + +async def create_employee(row: dict[str, object], context: dict[str, object] | None) -> dict[str, object]: + return row + + +alchemy = ExcelAlchemy( + ImporterConfig.for_create( + EmployeeImporter, + creator=create_employee, + storage=storage, + locale='en', + ) +) + +template = alchemy.download_template_artifact(filename='employee-template.xlsx') + +preflight = alchemy.preflight_import('employees.xlsx') +if not preflight.is_valid: + response = {'preflight': preflight.to_api_payload()} +else: + events: list[dict[str, object]] = [] + result = await alchemy.import_data( + 'employees.xlsx', + 'employees-result.xlsx', + on_event=events.append, + ) + + response = { + 'result': result.to_api_payload(), + 'events': events, + 'remediation': build_frontend_remediation_payload( + result=result, + cell_error_map=alchemy.cell_error_map, + row_error_map=alchemy.row_error_map, + ), + } +``` + +This keeps one clear separation: + +- template authoring and preflight help before execution +- import runtime handles real validation and persistence +- result intelligence helps API and frontend retry flows after failure +- artifact and delivery expose files and URLs after the run + ## When To Use / When Not To Use / Limitations & Gotchas ### When To Use @@ -180,6 +307,10 @@ flowchart TD ``` See the full breakdown in [docs/architecture.md](docs/architecture.md). +For the integration-oriented platform view layered above those components, see +[docs/platform-architecture.md](docs/platform-architecture.md), +[docs/runtime-model.md](docs/runtime-model.md), and +[docs/integration-blueprints.md](docs/integration-blueprints.md). ## Workflow @@ -258,6 +389,7 @@ Import workflow output: ```text Employee import workflow completed +Preflight: VALID Result: SUCCESS Success rows: 1 Failed rows: 0 @@ -485,6 +617,9 @@ More detail is documented in [ABOUT.md](ABOUT.md). - [README.md](README.md): product + design overview - [README_cn.md](README_cn.md): Chinese usage-oriented guide - [ABOUT.md](ABOUT.md): engineering rationale and evolution notes +- [docs/platform-architecture.md](docs/platform-architecture.md): import platform capability model +- [docs/runtime-model.md](docs/runtime-model.md): runtime sequence across the import workflow +- [docs/integration-blueprints.md](docs/integration-blueprints.md): backend/frontend integration patterns - [docs/architecture.md](docs/architecture.md): component map and boundaries - [docs/limitations.md](docs/limitations.md): practical fit, limitations, and gotchas - [docs/performance.md](docs/performance.md): operational guidance for large files, memory, and backend guardrails diff --git a/README_cn.md b/README_cn.md index a4bb39f..3ba0080 100644 --- a/README_cn.md +++ b/README_cn.md @@ -1,11 +1,24 @@ # ExcelAlchemy -[English README](./README.md) · [项目说明](./ABOUT.md) · [快速开始](./docs/getting-started.md) · [接入路线图](./docs/integration-roadmap.md) · [结果对象](./docs/result-objects.md) · [架构文档](./docs/architecture.md) · [Locale Policy](./docs/locale.md) · [Changelog](./CHANGELOG.md) · [迁移说明](./MIGRATIONS.md) +[English README](./README.md) · [项目说明](./ABOUT.md) · [快速开始](./docs/getting-started.md) · [接入路线图](./docs/integration-roadmap.md) · [平台架构](./docs/platform-architecture.md) · [运行时模型](./docs/runtime-model.md) · [集成蓝图](./docs/integration-blueprints.md) · [结果对象](./docs/result-objects.md) · [架构文档](./docs/architecture.md) · [Locale Policy](./docs/locale.md) · [Changelog](./CHANGELOG.md) · [迁移说明](./MIGRATIONS.md) ExcelAlchemy 是一个面向 Excel 导入导出的 schema-first Python 库。 它的核心思路不是“读写表格文件”,而是“把 Excel 当成一种带约束的业务契约”。 -当前稳定发布版本是 `2.2.8`,它在稳定的 ExcelAlchemy 2.x 线上继续加强了接入路线图、失败导入 API 载荷的 smoke 校验,以及对 FastAPI 参考应用的安装后真实可用验证。 +当前稳定发布版本是 `2.3.0`。在稳定的 ExcelAlchemy 2.x 线上,它进一步补齐了更完整的导入工作流: + +- 上传前更清晰的模板引导 +- 执行前更轻量的结构化 preflight gate +- 导入过程中的同步 lifecycle visibility +- 导入失败后的 remediation-oriented payload + +从平台层视角看,这条导入链路可以概括为: + +- 模板编排 +- preflight gate +- 导入运行时 +- 结果智能 +- 制品与交付 你用 Pydantic 模型定义结构,用 `FieldMeta` 定义 Excel 元数据,用显式的导入/导出流程去完成模板生成、数据校验、错误回写和后端集成。 @@ -90,6 +103,12 @@ flowchart LR H --> I ``` +如果你想先看平台层文档,而不是内部组件图,可以继续看: + +- [`docs/platform-architecture.md`](./docs/platform-architecture.md) +- [`docs/runtime-model.md`](./docs/runtime-model.md) +- [`docs/integration-blueprints.md`](./docs/integration-blueprints.md) + ## 安装 ```bash @@ -359,6 +378,9 @@ Minio 只是一个默认实现,真正稳定的接口应该是 `ExcelStorage` - [README.md](./README.md): 英文首页,偏作品集表达 - [README_cn.md](./README_cn.md): 中文说明页,偏使用和理解 - [ABOUT.md](./ABOUT.md): 设计原则、迁移记录、架构取舍 +- [docs/platform-architecture.md](./docs/platform-architecture.md): 导入平台层能力模型 +- [docs/runtime-model.md](./docs/runtime-model.md): 导入工作流的运行时顺序 +- [docs/integration-blueprints.md](./docs/integration-blueprints.md): 后端 / 前端接入蓝图 - [docs/architecture.md](./docs/architecture.md): 组件边界与扩展点 ## 开发 diff --git a/adr/0001-facade-outside-focused-collaborators-inside.md b/adr/0001-facade-outside-focused-collaborators-inside.md new file mode 100644 index 0000000..c51620c --- /dev/null +++ b/adr/0001-facade-outside-focused-collaborators-inside.md @@ -0,0 +1,62 @@ +# ADR 0001: Facade Outside, Focused Collaborators Inside + +## Status + +- `accepted` +- Inference note: this ADR is inferred from current repository design, documentation, and code layout. No prior ADR file for this decision exists in the repository. + +## Context + +- The repository exposes a compact public entry point through `ExcelAlchemy`. +- Internal work is split across focused collaborators for: + - schema/layout + - header parsing and validation + - row aggregation + - import execution + - rendering + - storage +- The repository documentation presents this split as a deliberate architectural shape rather than an accidental implementation detail. + +## Decision + +- Keep `ExcelAlchemy` as the small public workflow facade. +- Keep major workflow responsibilities in focused internal collaborators under `src/excelalchemy/core/` instead of collapsing them into one large coordinator class. + +## Consequences + +- The top-level API stays compact and easier to document through `excelalchemy` and the stable public modules. +- Internal workflow components can evolve more independently. +- Behavior changes often require coordinated edits across multiple internal modules and their matching tests. +- Repository navigation depends on clear architecture docs and package-local guidance because the implementation is intentionally distributed. + +## Evidence + +- `ABOUT.md` + - explicitly names “Facade Outside, Components Inside” as an architecture decision +- `docs/architecture.md` + - maps `ExcelAlchemy` to `ExcelSchemaLayout`, `ExcelHeaderParser`, `ExcelHeaderValidator`, `RowAggregator`, `ImportExecutor`, `ExcelRenderer`, and `ExcelStorage` +- `src/excelalchemy/core/alchemy.py` + - keeps the facade methods and delegates core work to collaborators +- `src/excelalchemy/core/schema.py` +- `src/excelalchemy/core/headers.py` +- `src/excelalchemy/core/rows.py` +- `src/excelalchemy/core/executor.py` +- `src/excelalchemy/core/rendering.py` +- `src/excelalchemy/core/storage.py` + +## Uncertainty + +- The repository strongly supports this decision as the current design. +- This ADR does not claim to reconstruct the original chronological discussion that led to it; it only records the decision as it is visible today. + +## Relevant paths + +- `ABOUT.md` +- `docs/architecture.md` +- `src/excelalchemy/core/alchemy.py` +- `src/excelalchemy/core/schema.py` +- `src/excelalchemy/core/headers.py` +- `src/excelalchemy/core/rows.py` +- `src/excelalchemy/core/executor.py` +- `src/excelalchemy/core/rendering.py` +- `src/excelalchemy/core/storage.py` diff --git a/adr/0002-metadata-owns-excel-semantics-separate-from-pydantic-internals.md b/adr/0002-metadata-owns-excel-semantics-separate-from-pydantic-internals.md new file mode 100644 index 0000000..14536bd --- /dev/null +++ b/adr/0002-metadata-owns-excel-semantics-separate-from-pydantic-internals.md @@ -0,0 +1,62 @@ +# ADR 0002: Metadata Owns Excel Semantics, Separate From Pydantic Internals + +## Status + +- `accepted` +- Inference note: this ADR is inferred from current repository design, documentation, and code layout. No prior ADR file for this decision exists in the repository. + +## Context + +- The repository treats workbook semantics as a first-class concern: + - labels + - order + - comments and hints + - options + - formatting hints + - import constraints +- The docs describe a shift away from tighter coupling to Pydantic internals, especially during the Pydantic v2 migration. +- The implementation now uses a metadata layer plus a Pydantic adapter boundary rather than making Pydantic field internals the owner of Excel-specific behavior. + +## Decision + +- Keep Excel-specific semantics in the metadata layer centered on `src/excelalchemy/metadata.py`. +- Keep Pydantic integration behind the adapter layer in `src/excelalchemy/helper/pydantic.py`. +- Preserve `FieldMetaInfo` as the 2.x compatibility-facing metadata object while the internal model remains layered. + +## Consequences + +- Workbook semantics remain explicit and controlled by ExcelAlchemy rather than by framework internals. +- Framework upgrades are less likely to require invasive changes across the whole runtime path. +- The implementation carries some complexity because the 2.x public surface still exposes `FieldMetaInfo` while internal code is moving toward layered metadata objects. +- Metadata, schema extraction, and validation-message normalization must stay aligned across multiple files. + +## Evidence + +- `ABOUT.md` + - explicitly states “Excel Metadata Owns Excel Semantics” and “Pydantic Is an Adapter Boundary” +- `docs/architecture.md` + - describes metadata as a stable public layer and Pydantic integration as a separate adapter +- `src/excelalchemy/metadata.py` + - defines `FieldMetaInfo` as a compatibility facade over: + - `DeclaredFieldMeta` + - `RuntimeFieldBinding` + - `WorkbookPresentationMeta` + - `ImportConstraints` +- `src/excelalchemy/helper/pydantic.py` + - extracts metadata from Pydantic models and maps validation errors into ExcelAlchemy errors +- `tests/contracts/test_pydantic_contract.py` + - verifies that Excel metadata stays outside direct `FieldInfo` subclassing and that validation is mapped into `ExcelCellError` and `ExcelRowError` + +## Uncertainty + +- The repository clearly shows the current separation and its rationale. +- This ADR does not claim that every internal consumer already uses the layered metadata API uniformly; the codebase itself still documents `FieldMetaInfo` as a 2.x compatibility facade. + +## Relevant paths + +- `ABOUT.md` +- `docs/architecture.md` +- `src/excelalchemy/metadata.py` +- `src/excelalchemy/helper/pydantic.py` +- `tests/contracts/test_pydantic_contract.py` +- `tests/unit/test_field_metadata.py` diff --git a/adr/0003-keep-2x-compatibility-shims-as-migration-layer-scheduled-for-3x-removal.md b/adr/0003-keep-2x-compatibility-shims-as-migration-layer-scheduled-for-3x-removal.md new file mode 100644 index 0000000..b826cc0 --- /dev/null +++ b/adr/0003-keep-2x-compatibility-shims-as-migration-layer-scheduled-for-3x-removal.md @@ -0,0 +1,74 @@ +# ADR 0003: Keep 2.x Compatibility Shims, But Treat Them As A Migration Layer Scheduled For 3.0 Removal + +## Status + +- `accepted` +- Inference note: this ADR is inferred from current repository design, migration docs, and tests. No prior ADR file for this decision exists in the repository. + +## Context + +- The repository currently supports both: + - preferred public modules and names + - older compatibility import paths and aliases +- The migration and public-API docs explicitly describe this as a 2.x compatibility policy, not the preferred long-term design. +- Tests exist specifically to verify deprecation warnings and replacement guidance. + +## Decision + +- Keep compatibility shims and alias paths active throughout the 2.x line. +- Treat them as a migration layer rather than as preferred public API. +- Keep deprecation warnings explicit and point users toward the preferred replacements. +- Treat removal as a 3.0 concern rather than removing compatibility paths casually during 2.x maintenance. + +## Consequences + +- Existing users have a smoother migration path into the newer public module layout. +- The repository must carry extra maintenance cost in source, docs, and tests for deprecated paths. +- Public guidance must continue to distinguish “still works” from “recommended”. +- New code and docs should prefer: + - `excelalchemy` + - `excelalchemy.config` + - `excelalchemy.metadata` + - `excelalchemy.results` + - `excelalchemy.exceptions` + - `excelalchemy.codecs` + - `storage=...` + - `worksheet_table`, `header_table`, `cell_error_map`, `row_error_map` + +## Evidence + +- `docs/public-api.md` + - explicitly separates stable public modules from compatibility modules +- `MIGRATIONS.md` + - states that `excelalchemy.types.*` remains available in 2.x and is scheduled for removal in ExcelAlchemy 3.0 + - states that old import-inspection names still work in 2.x but clearer names are preferred +- `docs/architecture.md` + - includes a compatibility policy section for 2.x imports +- `src/excelalchemy/types/` +- `src/excelalchemy/exc.py` +- `src/excelalchemy/identity.py` +- `src/excelalchemy/header_models.py` +- `src/excelalchemy/util/convertor.py` + - implement the compatibility layer directly in the package +- `tests/unit/test_deprecation_policy.py` + - verifies deprecation warnings and replacement targets +- `src/excelalchemy/core/alchemy.py` + - still exposes compatibility property aliases such as `df`, `header_df`, `cell_errors`, and `row_errors` + +## Uncertainty + +- The repository explicitly documents the 3.0 removal direction for some compatibility paths. +- This ADR does not infer a more detailed deprecation timeline or exact 3.0 scope beyond what is already written in the repository. + +## Relevant paths + +- `docs/public-api.md` +- `MIGRATIONS.md` +- `docs/architecture.md` +- `src/excelalchemy/types/` +- `src/excelalchemy/exc.py` +- `src/excelalchemy/identity.py` +- `src/excelalchemy/header_models.py` +- `src/excelalchemy/util/convertor.py` +- `src/excelalchemy/core/alchemy.py` +- `tests/unit/test_deprecation_policy.py` diff --git a/adr/README.md b/adr/README.md new file mode 100644 index 0000000..182677e --- /dev/null +++ b/adr/README.md @@ -0,0 +1,75 @@ +# Architecture Decision Records + +This directory is for architecture decision records for this repository. +Use an ADR when a decision changes or clarifies the long-term shape of the codebase. + +## Related docs + +- [../AGENTS.md](../AGENTS.md) for repository-local change guidance. +- [../docs/domain-model.md](../docs/domain-model.md) for the concepts ADRs usually shape. +- [../docs/invariants.md](../docs/invariants.md) for the constraints ADRs often establish or explain. +- [../plans/README.md](../plans/README.md) for execution plans that may produce an ADR. +- [../tech_debt/README.md](../tech_debt/README.md) for debt items that may motivate or result from an architectural decision. + +## When to create an ADR + +Create an ADR when the decision affects one or more of these areas: + +- stable public API direction +- internal architecture boundaries +- storage architecture +- metadata and schema model design +- result payload structure +- compatibility and deprecation direction +- locale or message-layer policy +- testing or smoke-verification strategy when it changes repository-wide expectations + +Do not create an ADR for: + +- small local refactors +- obvious bug fixes +- one-off implementation choices that do not establish a lasting repository pattern + +## Expected structure + +Each ADR should include: + +- Title + - Short and specific. +- Status + - For example: + - `proposed` + - `accepted` + - `superseded` + - `rejected` +- Context + - What repository problem or pressure led to the decision. +- Decision + - The choice that was made. +- Consequences + - What becomes easier, harder, or required because of the decision. +- Relevant paths + - The main code, docs, tests, examples, or scripts affected. +- Related records + - Optional links to plans, debt entries, migrations, or superseding ADRs. + +## Practical guidance + +- Keep ADRs short and concrete. +- Write them so a future maintainer can understand why the repository looks the way it does. +- Prefer explicit repository references such as: + - `src/excelalchemy/config.py` + - `src/excelalchemy/metadata.py` + - `src/excelalchemy/results.py` + - `docs/public-api.md` + - `MIGRATIONS.md` + +## Repository alignment + +In this repository, likely ADR-worthy topics include: + +- why a behavior is public vs internal +- why `storage=...` is preferred over legacy Minio fields +- why compatibility shims remain in 2.x +- why result payloads or naming conventions changed +- why a workflow moved between `core/`, `helper/`, or public modules diff --git a/docs/api-response-cookbook.md b/docs/api-response-cookbook.md index 7e3cac7..77a1cda 100644 --- a/docs/api-response-cookbook.md +++ b/docs/api-response-cookbook.md @@ -43,6 +43,24 @@ Application logs use named loggers such as `excelalchemy.codecs`, on `code`, `message_key`, `message`, and `display_message` instead of raw log output. +If your frontend needs a more task-oriented retry experience, you can add the +optional remediation payload alongside the existing stable result payloads: + +```python +from excelalchemy.results import build_frontend_remediation_payload + +response = { + 'result': result.to_api_payload(), + 'cell_errors': alchemy.cell_error_map.to_api_payload(), + 'row_errors': alchemy.row_error_map.to_api_payload(), + 'remediation': build_frontend_remediation_payload( + result=result, + cell_error_map=alchemy.cell_error_map, + row_error_map=alchemy.row_error_map, + ), +} +``` + ## 1. Success Response Use this when the import completed without header or data failures. @@ -270,7 +288,83 @@ def build_excel_import_response(alchemy, result): This keeps your route layer thin and your API contract stable. -## 5. Frontend Mapping Ideas +## 5. Front-end Remediation Example + +Use this when the frontend needs one compact section for retry guidance instead +of deriving everything from `cell_errors` and `row_errors`. + +```json +{ + "result": { + "result": "DATA_INVALID", + "is_success": false, + "is_header_invalid": false, + "is_data_invalid": true, + "summary": { + "success_count": 0, + "fail_count": 1, + "result_workbook_url": "memory://employee-import-result.xlsx" + } + }, + "remediation": { + "result": { + "result": "DATA_INVALID", + "is_success": false, + "is_header_invalid": false, + "is_data_invalid": true + }, + "remediation": { + "needs_remediation": true, + "affected_row_count": 1, + "affected_field_count": 1, + "affected_code_count": 1, + "header_issue_count": 0, + "result_workbook_available": true, + "suggested_action": "Correct the invalid rows and re-upload the workbook.", + "fix_hint": "Download the result workbook and review the highlighted rows before re-uploading." + }, + "by_field": [ + { + "field_label": "Age", + "unique_label": "Age", + "error_count": 1, + "codes": ["ExcelCellError"], + "suggested_action": "Review the highlighted cells, correct the invalid values, and re-upload the workbook." + } + ], + "by_code": [ + { + "code": "ExcelCellError", + "error_count": 1, + "suggested_action": "Review the highlighted cells, correct the invalid values, and re-upload the workbook." + } + ], + "items": [ + { + "scope": "cell", + "code": "ExcelCellError", + "field_label": "Age", + "row_number_for_humans": 1, + "column_number_for_humans": 4, + "display_message": "【Age】Invalid input; enter a number.", + "suggested_action": "Review the highlighted cells, correct the invalid values, and re-upload the workbook." + } + ] + } +} +``` + +Frontend usage: + +- use `remediation.remediation.needs_remediation` to decide whether to show a + retry-focused panel +- use `remediation.remediation.suggested_action` for the primary call to action +- use `remediation.by_field` for field-oriented fix panels or filters +- use `remediation.by_code` for grouped badges or “most common issue” views +- use `remediation.items` when the UI needs one compact issue list with + optional `fix_hint` + +## 6. Frontend Mapping Ideas Common patterns: @@ -284,7 +378,7 @@ Common patterns: - use `message` when you want plain text without workbook decoration - use `display_message` when you want ready-to-render text -## 6. Related Reading +## 7. Related Reading - [`docs/result-objects.md`](result-objects.md) - [`examples/fastapi_reference/README.md`](../examples/fastapi_reference/README.md) diff --git a/docs/architecture.md b/docs/architecture.md index bb6af66..b09acf2 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -1,5 +1,11 @@ # Architecture +This page is the internal component view of the repository. +If you want the integration-oriented platform view introduced for the v2.4 +documentation slice, see [`docs/platform-architecture.md`](platform-architecture.md). +If you want the runtime sequence on top of that platform view, see +[`docs/runtime-model.md`](runtime-model.md). + ## Component Map ```mermaid @@ -46,6 +52,8 @@ flowchart LR - owns the user-facing workflow - coordinates import/export operations - keeps the top-level API compact +- exposes `import_data(..., on_event=...)` as an additive progress-reporting + hook for import runs ### Schema @@ -77,6 +85,15 @@ flowchart LR - dispatches create/update/upsert logic - isolates backend execution from parsing concerns +### Import Session + +`src/excelalchemy/core/import_session.py` + +- owns one import run's lifecycle and mutable runtime state +- emits structured lifecycle events when `on_event=...` is supplied +- keeps those events on the same synchronous path as header validation, row + execution, and result workbook rendering + ### Rendering `src/excelalchemy/core/rendering.py` diff --git a/docs/documentation-maintenance-checklist.md b/docs/documentation-maintenance-checklist.md new file mode 100644 index 0000000..6a0b41c --- /dev/null +++ b/docs/documentation-maintenance-checklist.md @@ -0,0 +1,162 @@ +# Documentation Maintenance Checklist + +Use this checklist to keep repository-local knowledge aligned with code changes. +It is intentionally lightweight and specific to the current `ExcelAlchemy` repository. + +## Related docs + +- [../AGENTS.md](../AGENTS.md) for repository-local editing guidance. +- [repo-map.md](repo-map.md) for the top-level repository map. +- [domain-model.md](domain-model.md) for core concepts and relationships. +- [invariants.md](invariants.md) for behavior that should not drift accidentally. +- [../src/excelalchemy/README.md](../src/excelalchemy/README.md) for the main package implementation guide. +- [../tests/README.md](../tests/README.md) and [../examples/README.md](../examples/README.md) for executable contract surfaces. +- [../plans/README.md](../plans/README.md), [../tech_debt/README.md](../tech_debt/README.md), and [../adr/README.md](../adr/README.md) for planning, debt tracking, and architecture records. + +## Use this checklist when + +- a PR changes public API shape or recommended usage +- a PR changes internal module boundaries or workflow structure +- a PR changes examples, tests, or generated example outputs +- a PR changes locale behavior, result payloads, storage behavior, or compatibility paths + +## Update `AGENTS.md` when + +- the recommended public import surface changes: + - `src/excelalchemy/__init__.py` + - `src/excelalchemy/config.py` + - `src/excelalchemy/metadata.py` + - `src/excelalchemy/results.py` + - `src/excelalchemy/exceptions.py` +- the repo’s main entry points, safe edit zones, or caution areas change +- the preferred workflow, validation commands, or documentation update expectations change +- the current 2.x guidance changes for: + - `storage=...` vs legacy Minio fields + - compatibility imports under `src/excelalchemy/types/`, `src/excelalchemy/exc.py`, `src/excelalchemy/identity.py`, `src/excelalchemy/header_models.py`, `src/excelalchemy/util/convertor.py` + - the `WorksheetTable` storage seam in `src/excelalchemy/core/table.py` + +## Update `docs/repo-map.md` when + +- a top-level directory is added, removed, renamed, or repurposed +- a new root-level file becomes an important starting point +- a new major source area appears under `src/excelalchemy/` +- a new docs, examples, tests, scripts, or asset path becomes important for understanding the repo +- the recommended starting points for: + - public API + - import flow + - export/template generation + - storage integration + - tests + change materially + +## Update `docs/domain-model.md` when + +- a new core concept becomes part of the library vocabulary +- an existing concept is renamed, split, merged, or removed +- responsibilities move between major collaborators such as: + - `src/excelalchemy/core/schema.py` + - `src/excelalchemy/core/headers.py` + - `src/excelalchemy/core/rows.py` + - `src/excelalchemy/core/executor.py` + - `src/excelalchemy/core/rendering.py` + - `src/excelalchemy/core/storage_protocol.py` + - `src/excelalchemy/helper/pydantic.py` +- the public vs internal distinction changes for a concept +- import, template, export, or storage lifecycle steps change in a way users or maintainers need to reason about + +## Update `docs/invariants.md` when + +- a public behavior becomes newly stable or stops being stable +- a result state, payload shape, fallback rule, or compatibility expectation changes +- a new rule is enforced by tests or docs and should be treated as intentional behavior +- locale behavior changes for: + - runtime messages + - workbook display text + - compatibility constants in `src/excelalchemy/const.py` +- storage contract expectations change, especially around: + - `ExcelStorage` + - `WorksheetTable` + - upload payload shape + - explicit storage precedence over legacy Minio fields + +## Update module README files when + +- update `src/excelalchemy/README.md` when: + - responsibilities move between public modules and internal modules + - major internal flows change + - the recommended edit points for public API, import validation, export/template generation, storage, or locale behavior change +- update `tests/README.md` when: + - test directories are reorganized + - a different test layer becomes the right place for a class of changes + - new smoke checks or fixture areas become part of the normal workflow +- update `examples/README.md` when: + - examples are added, removed, regrouped, or reclassified + - an example becomes a compatibility example instead of a recommended example + - example changes imply updates to `files/example-outputs/` or smoke scripts + +## Create a new ADR when + +- a change establishes or changes a lasting repository pattern +- the decision affects: + - stable public API direction + - internal architecture boundaries + - storage architecture + - metadata and schema design + - result payload structure + - compatibility and deprecation direction + - locale or message-layer policy +- the reasoning should outlive a single PR or execution plan + +Use `adr/README.md` for the expected structure. + +## Create a new execution plan when + +- the work spans multiple areas such as: + - `src/excelalchemy/` + - `tests/` + - `docs/` + - `examples/` + - `scripts/` + - `files/example-outputs/` +- the work needs explicit sequencing, checkpoints, or risk tracking +- the change is large enough that a PR description is not sufficient + +Use `plans/README.md` for status conventions and logging format. + +## Record technical debt when + +- the change exposes a known compromise that will remain after the PR +- the repo carries a temporary workaround that adds maintenance cost +- the recommended public API and the current implementation still diverge +- documentation, examples, tests, or smoke scripts must stay synchronized through awkward manual steps + +Use `tech_debt/README.md` and include: + +- impact +- current workaround +- desired fix +- priority +- relevant paths + +## Verify before merging a PR + +- `AGENTS.md`, `docs/repo-map.md`, `docs/domain-model.md`, and `docs/invariants.md` still match the changed code paths. +- `src/excelalchemy/README.md`, `tests/README.md`, and `examples/README.md` still describe the current implementation and workflow shape. +- `docs/public-api.md` is updated if public-vs-internal or compatibility guidance changed. +- `MIGRATIONS.md` is updated if deprecation or migration guidance changed. +- `docs/result-objects.md` and `docs/api-response-cookbook.md` are updated if result payloads changed. +- `docs/locale.md` is updated if locale-visible behavior changed. +- `examples/` and `files/example-outputs/` are updated if examples or captured outputs changed. +- Run the repo’s normal verification commands for the affected area: + - `uv run ruff format --check .` + - `uv run ruff check .` + - `uv run pyright` + - `uv run pytest --cov=excelalchemy --cov-report=term-missing:skip-covered tests` + - `uv run python scripts/smoke_package.py` + - `uv run python scripts/smoke_examples.py` + - `uv run python scripts/smoke_docs_assets.py` + - `uv run python scripts/smoke_api_payload_snapshot.py` + +## Short rule of thumb + +- If a code change alters how someone should navigate, reason about, or safely modify this repo, update the matching repository-local knowledge document in the same PR. diff --git a/docs/domain-model.md b/docs/domain-model.md new file mode 100644 index 0000000..d432936 --- /dev/null +++ b/docs/domain-model.md @@ -0,0 +1,266 @@ +# Domain Model + +This document names the core concepts used by `ExcelAlchemy` and shows how they relate to each other. +It is based on the repository as it exists today. + +For directory-level navigation, see [`docs/repo-map.md`](repo-map.md). +For component structure, see [`docs/architecture.md`](architecture.md). + +## Related docs + +- [repo-map.md](repo-map.md) for where these concepts live in the repository. +- [invariants.md](invariants.md) for the constraints that govern these concepts. +- [../src/excelalchemy/README.md](../src/excelalchemy/README.md) for the package-level implementation view. +- [../tests/README.md](../tests/README.md) for where the model is protected by tests. + +## 1. Core concepts and entities + +| Concept | Primary files | Responsibility | Visibility | +| --- | --- | --- | --- | +| `ExcelAlchemy` facade | `src/excelalchemy/__init__.py`, `src/excelalchemy/core/alchemy.py` | Main workflow entry point for template generation, import, export, and upload. | Public | +| Schema/model contract | User Pydantic models referenced by `ImporterConfig` and `ExporterConfig` | Defines the workbook-facing data shape. Fields carry Excel-specific codec types and metadata. | Public concept | +| `ImporterConfig` | `src/excelalchemy/config.py` | Configures import models, callbacks, import mode, locale, and storage. | Public | +| `ExporterConfig` | `src/excelalchemy/config.py` | Configures export model, locale, storage, and export conversion behavior. | Public | +| `ImportMode` | `src/excelalchemy/config.py` | Selects `CREATE`, `UPDATE`, or `CREATE_OR_UPDATE` import behavior. | Public | +| `FieldMeta(...)` / `ExcelMeta(...)` | `src/excelalchemy/metadata.py` | Declare workbook labels, ordering, hints, options, required-ness, formatting hints, and import constraints. | Public | +| `FieldMetaInfo` and layered metadata | `src/excelalchemy/metadata.py` | Hold resolved field metadata during execution. `FieldMetaInfo` is the compatibility facade over the layered metadata objects. | Internal runtime concept with compatibility role | +| Field codec | `src/excelalchemy/codecs/base.py`, `src/excelalchemy/codecs/*.py` | Owns Excel-facing parsing, display formatting, normalization, and header-comment behavior for a field type. | Public extension surface | +| Composite field codec | `src/excelalchemy/codecs/base.py`, `src/excelalchemy/codecs/date_range.py`, `src/excelalchemy/codecs/number_range.py`, `src/excelalchemy/codecs/organization.py`, `src/excelalchemy/codecs/staff.py`, `src/excelalchemy/codecs/tree.py` | Expands one logical field into multiple worksheet columns. | Public extension surface | +| Schema layout | `src/excelalchemy/core/schema.py` | Flattens model fields into an Excel-facing ordered layout with unique labels and keys. | Internal | +| Header model | `src/excelalchemy/_primitives/header_models.py` | Represents one parsed workbook header, including parent/child label relationships for merged headers. | Internal | +| Header parser | `src/excelalchemy/core/headers.py` | Detects simple vs merged headers and turns header rows into normalized header objects. | Internal | +| Header validator | `src/excelalchemy/core/headers.py` | Compares workbook headers to schema layout and produces `ValidateHeaderResult`. | Internal | +| Worksheet table | `src/excelalchemy/core/table.py` | Lightweight internal 2D table abstraction used for workbook import/export flow instead of pandas. | Internal, but important to understand | +| Import session | `src/excelalchemy/core/import_session.py` | Owns one import run’s lifecycle, state, counts, header table, worksheet table, and result rendering decisions. | Internal | +| Import session snapshot | `src/excelalchemy/core/import_session.py` | Immutable summary of the current import session phase and counts. | Internal | +| Import lifecycle event callback | `src/excelalchemy/core/alchemy.py`, `src/excelalchemy/core/import_session.py` | Optional per-run callback passed to `ExcelAlchemy.import_data(...)` for synchronous lifecycle events. | Public concept | +| Row aggregator | `src/excelalchemy/core/rows.py` | Reconstructs flattened worksheet rows back into model-shaped payloads. | Internal | +| Import issue tracker | `src/excelalchemy/core/rows.py` | Maps cell and row issues back into workbook coordinates and result columns. | Internal | +| Import executor | `src/excelalchemy/core/executor.py` | Validates row payloads and dispatches configured create/update/upsert callbacks. | Internal | +| Pydantic adapter | `src/excelalchemy/helper/pydantic.py` | Extracts metadata from Pydantic models and converts Pydantic validation output into ExcelAlchemy row/cell issues. | Internal boundary | +| Renderer | `src/excelalchemy/core/rendering.py` | Converts worksheet tables and metadata into workbook outputs for templates, exports, and import results. | Internal | +| Writer | `src/excelalchemy/core/writer.py` | Applies workbook-level formatting, comments, colors, and result columns. | Internal | +| `ExcelStorage` | `src/excelalchemy/core/storage_protocol.py` | Defines the storage protocol for reading workbook tables and uploading rendered workbooks. | Public extension surface | +| Storage gateway resolution | `src/excelalchemy/core/storage.py` | Chooses the storage implementation for a config, including missing-storage fallback. | Internal | +| `MinioStorageGateway` | `src/excelalchemy/core/storage_minio.py` | Built-in `ExcelStorage` implementation for Minio-compatible object storage. | Concrete implementation | +| `ExcelArtifact` | `src/excelalchemy/artifacts.py` | Wraps a rendered workbook as bytes, base64, or data URL. | Public | +| `ValidateHeaderResult` | `src/excelalchemy/results.py` | Represents header-only validation outcome. | Public result type | +| `ImportResult` | `src/excelalchemy/results.py` | Represents the top-level outcome of an import run. | Public result type | +| `CellErrorMap` | `src/excelalchemy/results.py` | Structured cell-level issue map with workbook coordinates and API helpers. | Public result type | +| `RowIssueMap` | `src/excelalchemy/results.py` | Structured row-level issue map with summaries and API helpers. | Public result type | + +## 2. Responsibilities + +### Declaration responsibilities + +- Pydantic models define the logical data contract for import and export. +- `FieldMeta(...)` and `ExcelMeta(...)` define the workbook-facing contract: + - label + - order + - comments and hints + - options + - formatting hints + - import-side constraints +- Field codecs define how a field behaves in a workbook: + - how it is described in header comments + - how workbook input is parsed + - how values are normalized for validation + - how values are rendered back for display + +### Execution responsibilities + +- `ExcelAlchemy` turns a config and schema into a usable workflow object. +- `ExcelAlchemy.import_data(..., on_event=...)` can report lifecycle progress + to a job or service layer while keeping the import itself synchronous. +- `ExcelSchemaLayout` turns schema declarations into a flattened Excel layout. +- `ExcelHeaderParser` and `ExcelHeaderValidator` decide whether an uploaded workbook matches that layout. +- `RowAggregator` reconstructs model-shaped data from worksheet rows. +- `ImportExecutor` validates and dispatches row payloads through configured callbacks. +- `ImportIssueTracker` preserves workbook-coordinate visibility for failures. + +### Output responsibilities + +- `ExcelRenderer` and `writer.py` produce: + - templates + - exports + - import result workbooks +- `ImportResult`, `CellErrorMap`, and `RowIssueMap` expose structured programmatic results. +- `ExcelArtifact` exposes workbook output in transport-friendly forms. + +### Integration responsibilities + +- `ExcelStorage` is the boundary for reading and uploading workbooks. +- `MinioStorageGateway` is the built-in concrete backend. +- `helper/pydantic.py` is the boundary between Pydantic and ExcelAlchemy-specific metadata/error handling. + +## 3. Relationships between concepts + +- `ImporterConfig` or `ExporterConfig` is passed into `ExcelAlchemy`. +- The config points to one or more schema models. +- Schema model fields are declared with Excel-specific codec types and metadata. +- `helper/pydantic.py` extracts those declarations into runtime field metadata. +- `ExcelSchemaLayout` organizes that metadata into an ordered Excel-facing layout. +- The layout drives: + - template generation + - header validation + - row aggregation + - error ordering + - export column selection +- `ExcelStorage` provides workbook input as `WorksheetTable` and accepts rendered workbook output for upload. +- During import: + - `ImportSession` coordinates the lifecycle + - an optional `on_event` callback can observe lifecycle milestones inline + - `ExcelHeaderParser` parses header rows + - `ExcelHeaderValidator` validates them against `ExcelSchemaLayout` + - `RowAggregator` reconstructs row payloads + - `ImportExecutor` validates and dispatches rows + - `ImportIssueTracker` accumulates row and cell issues + - `ExcelRenderer` writes a result workbook when needed +- `ImportIssueTracker` feeds the public `CellErrorMap` and `RowIssueMap`. +- `ImportResult` summarizes the overall import status and any result workbook URL. +- For template and export paths, `ExcelRenderer` produces workbook output that can be returned directly as a data URL, wrapped as `ExcelArtifact`, or uploaded through storage. + +## 4. Public-facing vs internal concepts + +### Public-facing concepts + +- `ExcelAlchemy` +- `ImporterConfig` +- `ExporterConfig` +- `ImportMode` +- schema models declared with Pydantic +- `FieldMeta(...)` +- `ExcelMeta(...)` +- built-in codecs under `src/excelalchemy/codecs/` +- codec extension base classes: + - `ExcelFieldCodec` + - `CompositeExcelFieldCodec` +- `ExcelStorage` +- `ExcelArtifact` +- `ValidateHeaderResult` +- `ImportResult` +- `CellErrorMap` +- `RowIssueMap` +- `ExcelAlchemy.import_data(..., on_event=...)` + +### Internal concepts + +- `ExcelSchemaLayout` +- `ExcelHeader` +- `ExcelHeaderParser` +- `ExcelHeaderValidator` +- `WorksheetTable` +- `WorksheetRow` +- `ImportSession` +- `ImportSessionSnapshot` +- `ImportSessionPhase` +- `RowAggregator` +- `ImportIssueTracker` +- `ImportExecutor` +- `ExcelRenderer` +- `writer.py` +- storage gateway resolution in `src/excelalchemy/core/storage.py` +- Pydantic adaptation in `src/excelalchemy/helper/pydantic.py` +- primitives under `src/excelalchemy/_primitives/` + +### Bridge or compatibility concepts + +- `FieldMetaInfo` + - Important runtime object in the implementation. + - Not the main declaration entry point for new code. +- Compatibility modules kept for the 2.x line: + - `src/excelalchemy/types/` + - `src/excelalchemy/exc.py` + - `src/excelalchemy/identity.py` + - `src/excelalchemy/header_models.py` + - `src/excelalchemy/util/convertor.py` + +## 5. Important lifecycle and flow concepts + +### Import flow + +The import flow is the richest lifecycle in the repository. + +- Start point: + - `ExcelAlchemy.import_data(...)` + - implemented in `src/excelalchemy/core/alchemy.py` +- Optional public progress hook: + - `ExcelAlchemy.import_data(..., on_event=...)` + - emits simple event dictionaries during the same synchronous import run +- Runtime owner: + - `ImportSession` + - `src/excelalchemy/core/import_session.py` +- Main lifecycle phases: + - `INITIALIZED` + - `WORKBOOK_LOADED` + - `HEADERS_VALIDATED` + - `ROWS_PREPARED` + - `ROWS_EXECUTED` + - `RESULT_RENDERED` + - `COMPLETED` +- Decision points: + - header valid or not via `ValidateHeaderResult` + - row valid or not through `ImportExecutor` + - overall result via `ValidateResult`: + - `HEADER_INVALID` + - `DATA_INVALID` + - `SUCCESS` +- Event vocabulary: + - `started` + - `header_validated` + - `row_processed` + - `completed` + - `failed` +- Workbook-facing row result concept: + - `ValidateRowResult` + - values: + - `SUCCESS` + - `FAIL` + +### Template generation flow + +- Start points: + - `ExcelAlchemy.download_template(...)` + - `ExcelAlchemy.download_template_artifact(...)` +- Core idea: + - schema contract -> field metadata -> schema layout -> worksheet table -> renderer -> workbook output +- Main components: + - `src/excelalchemy/core/alchemy.py` + - `src/excelalchemy/core/schema.py` + - `src/excelalchemy/core/rendering.py` + - `src/excelalchemy/core/writer.py` + +### Export flow + +- Start points: + - `ExcelAlchemy.export(...)` + - `ExcelAlchemy.export_artifact(...)` + - `ExcelAlchemy.export_upload(...)` +- Core idea: + - export model + export rows -> selected output keys -> worksheet table -> renderer -> artifact or uploaded workbook +- Main components: + - `src/excelalchemy/core/alchemy.py` + - `src/excelalchemy/core/schema.py` + - `src/excelalchemy/core/rendering.py` + - `src/excelalchemy/core/writer.py` + - `src/excelalchemy/core/storage_protocol.py` + - `src/excelalchemy/core/storage.py` + +### Storage integration flow + +- Start point: + - `storage=...` on `ImporterConfig` or `ExporterConfig` +- Core idea: + - input workbooks are read as `WorksheetTable` + - rendered workbooks are uploaded and returned as URLs + - custom storage readers currently use `src/excelalchemy/core/table.py` for that table shape +- Main components: + - `src/excelalchemy/core/storage_protocol.py` + - `src/excelalchemy/core/storage.py` + - `src/excelalchemy/core/storage_minio.py` + - `src/excelalchemy/core/table.py` + - `examples/custom_storage.py` + +## Mental model in one sentence + +- `ExcelAlchemy` treats an Excel workbook as a typed contract derived from Pydantic models, then routes that contract through layout, parsing, validation, execution, rendering, and storage boundaries. diff --git a/docs/examples-showcase.md b/docs/examples-showcase.md index db2bb5f..d3ff2f6 100644 --- a/docs/examples-showcase.md +++ b/docs/examples-showcase.md @@ -30,6 +30,7 @@ see Best entry point if you want to understand the core story: - generate a workbook template +- run a lightweight structural preflight - accept a filled workbook - validate the upload - create domain rows @@ -43,6 +44,7 @@ Fixed output: ```text Employee import workflow completed +Preflight: VALID Result: SUCCESS Success rows: 1 Failed rows: 0 diff --git a/docs/getting-started.md b/docs/getting-started.md index 7a7ebbb..116a14b 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -4,6 +4,11 @@ This page is the fastest way to get productive with ExcelAlchemy. If you want screenshots and fixed workflow outputs first, see [`docs/examples-showcase.md`](examples-showcase.md). +If you want the platform-layer architecture of the import workflow, see +[`docs/platform-architecture.md`](platform-architecture.md), +[`docs/runtime-model.md`](runtime-model.md), +and +[`docs/integration-blueprints.md`](integration-blueprints.md). If you want the full public surface and compatibility boundaries, see [`docs/public-api.md`](public-api.md). If you want to understand the result objects and how to surface them through an @@ -75,8 +80,39 @@ class EmployeeImporter(BaseModel): age: Annotated[Number, Field(ge=18), ExcelMeta(label='Age', order=2)] ``` +If you want generated templates to give users a more concrete example before +upload, you can add template UX metadata such as `hint` and `example_value`: + +```python +class EmployeeImporter(BaseModel): + work_email: Annotated[ + String, + Field(min_length=8), + ExcelMeta( + label='Work email', + order=3, + hint='Use your company email address', + example_value='alice@company.com', + ), + ] +``` + +This is additive. It does not change import behavior or worksheet layout. It +only adds a more helpful header comment in the generated template, for example: + +- `Hint: Use your company email address` +- `Example: alice@company.com` + ## 4. Pick The Workflow You Need +Top-level import workflow: + +1. template authoring +2. preflight gate +3. import runtime +4. result intelligence +5. artifact and delivery + Import-only create flow: ```python @@ -170,12 +206,14 @@ These two documents explain: If you are integrating ExcelAlchemy into a web backend, the recommended public result surface is: +- `ImportPreflightResult` - `ImportResult` - `alchemy.cell_error_map` - `alchemy.row_error_map` These objects let you return: +- a lightweight preflight summary before import - a high-level import summary - row-level error summaries - cell-level coordinates for UI highlighting @@ -185,3 +223,44 @@ See: - [`docs/result-objects.md`](result-objects.md) - [`docs/api-response-cookbook.md`](api-response-cookbook.md) - [`examples/fastapi_reference/README.md`](../examples/fastapi_reference/README.md) + +## 9. Use Preflight Before Import When You Need A Quick Structural Check + +Use `preflight_import(...)` when you want a fast answer to: + +- does the configured sheet exist +- do the headers match the schema +- does the workbook look structurally importable +- about how many rows would a later import process + +Use `import_data(...)` when you want the full workflow: + +- row validation +- create / update callback execution +- cell and row error maps +- result workbook rendering and upload + +Typical backend flow: + +```python +preflight = alchemy.preflight_import('employees.xlsx') + +if not preflight.is_valid: + return { + 'preflight': preflight.to_api_payload(), + } + +result = await alchemy.import_data('employees.xlsx', 'employees-result.xlsx') + +return { + 'preflight': preflight.to_api_payload(), + 'result': result.to_api_payload(), + 'cell_errors': alchemy.cell_error_map.to_api_payload(), + 'row_errors': alchemy.row_error_map.to_api_payload(), +} +``` + +Keep this distinction in mind: + +- preflight is lightweight and structural +- import is the full validation and execution path diff --git a/docs/integration-blueprints.md b/docs/integration-blueprints.md new file mode 100644 index 0000000..4ac23a1 --- /dev/null +++ b/docs/integration-blueprints.md @@ -0,0 +1,189 @@ +# Integration Blueprints + +This page shows practical integration blueprints for the v2.4 import platform +layer. +These are not new product surfaces. +They are recommended ways to compose the current 2.x capabilities. + +If you want the platform capability map, see +[`docs/platform-architecture.md`](platform-architecture.md). +If you want the runtime sequence view, see +[`docs/runtime-model.md`](runtime-model.md). +If you want detailed result payload shapes, see +[`docs/result-objects.md`](result-objects.md) and +[`docs/api-response-cookbook.md`](api-response-cookbook.md). + +## Blueprint 1: Backend Worker Import Flow + +Use this blueprint when your application already has a worker, queue, or job +runner and you want ExcelAlchemy to remain the import engine inside that worker. + +This is an application-level worker blueprint. +It is not a claim that ExcelAlchemy ships a worker framework. + +### Recommended shape + +```mermaid +flowchart LR + U[Spreadsheet User] --> API[Upload API] + API --> PF[Preflight Gate] + PF -->|valid| Q[App Queue / Worker Trigger] + PF -->|invalid| PFR[Preflight Response] + + Q --> W[Backend Worker] + W --> EA[ExcelAlchemy import_data(..., on_event=...)] + EA --> RES[ImportResult + Issue Maps] + EA --> OUT[Result Workbook Upload] + RES --> APIRES[API Status Store / Polling Endpoint] + OUT --> APIRES +``` + +### Why this fits the current platform + +- preflight stays a lightweight synchronous gate +- the worker owns application scheduling and retries +- `import_data(..., on_event=...)` remains the real runtime entry point +- lifecycle events can update application job status inline +- result intelligence remains post-import and machine-readable + +### Recommended responsibilities + +Upload API: + +- accept the workbook reference +- run `preflight_import(...)` +- reject obvious structural failures early +- enqueue only structurally importable workbooks + +Backend worker: + +- call `import_data(..., on_event=...)` +- update job status using lifecycle events +- persist `ImportResult` +- persist row/cell issue payloads when needed +- expose result workbook URL when one is produced + +Application status endpoint: + +- return job state +- return final result payloads +- optionally expose remediation payloads for UI consumers + +### Important boundaries + +- the queue, worker, retry policy, and job persistence belong to the + application +- ExcelAlchemy remains synchronous inside the worker execution +- do not describe this as an ExcelAlchemy job subsystem + +## Blueprint 2: Frontend Remediation Flow + +Use this blueprint when a frontend needs both high-level outcome information and +compact retry guidance after a failed import. + +```mermaid +flowchart TD + U[Spreadsheet User] --> FE[Frontend] + FE --> API[Backend API] + API --> PF[preflight_import(...)] + + PF -->|invalid| PRE[ImportPreflightResult payload] + PF -->|valid| RUN[import_data(...)] + + RUN --> R[ImportResult] + RUN --> C[CellErrorMap] + RUN --> RI[RowIssueMap] + RUN --> RP[build_frontend_remediation_payload(...)] + RUN --> URL[result workbook URL] + + PRE --> FE + R --> FE + C --> FE + RI --> FE + RP --> FE + URL --> FE +``` + +### Recommended frontend use + +Preflight response: + +- decide whether the workbook is structurally importable +- show blocking sheet/header problems before full execution + +Result response: + +- use `ImportResult` for high-level outcome and counts +- use `CellErrorMap` for precise field/cell UI highlighting +- use `RowIssueMap` for list/table summaries +- use the remediation payload for concise retry guidance +- use the result workbook URL when the user should download the annotated file + +### Recommended response layering + +- `preflight` + - structural gate result only +- `result` + - overall import outcome +- `cell_errors` + - precise workbook-coordinate issues +- `row_errors` + - grouped row summaries +- `remediation` + - optional condensed retry guidance + +### Important boundaries + +- remediation payloads are additive and opt-in +- they do not replace the stable result payloads +- they should stay conservative and avoid overstating automatic fix guidance + +## Blueprint 3: Artifact Delivery Flow + +Use this blueprint when delivery of the template artifact and result workbook is +part of the integration design. + +```mermaid +flowchart LR + A[Application] --> T[Template Artifact Request] + T --> EA[ExcelAlchemy] + EA --> TA[ExcelArtifact] + TA --> DL[Browser Download or API Response] + + A --> I[Import Runtime] + I --> RW[Rendered Result Workbook] + RW --> ST[ExcelStorage Upload] + ST --> URL[Result Workbook URL] + URL --> API[API Response or Worker Status] +``` + +### What this highlights + +- template authoring and result delivery share rendering primitives +- artifact delivery is a platform stage even though it depends on earlier + stages +- storage remains a seam rather than a mandated backend + +## Choosing The Right Blueprint + +- use the backend worker blueprint when your application already has queued or + long-running import orchestration +- use the frontend remediation blueprint when the UI needs compact retry + guidance after validation failures +- use the artifact delivery blueprint when file delivery semantics are a first + class part of the integration + +In all three cases, keep the same platform order: + +1. template authoring +2. preflight gate +3. import runtime +4. result intelligence +5. artifact and delivery + +## Recommended Reading + +- [`docs/platform-architecture.md`](platform-architecture.md) +- [`docs/runtime-model.md`](runtime-model.md) +- [`docs/result-objects.md`](result-objects.md) +- [`docs/api-response-cookbook.md`](api-response-cookbook.md) diff --git a/docs/integration-roadmap.md b/docs/integration-roadmap.md index a9d8187..d102377 100644 --- a/docs/integration-roadmap.md +++ b/docs/integration-roadmap.md @@ -10,6 +10,12 @@ large workbooks, and round-trip limits, see [`docs/limitations.md`](limitations.md). If you want screenshots and captured workflow output first, see [`docs/examples-showcase.md`](examples-showcase.md). +If you want the new platform-layer architecture view first, see +[`docs/platform-architecture.md`](platform-architecture.md). +If you want the runtime sequence and blueprint-style integration guidance, see +[`docs/runtime-model.md`](runtime-model.md) +and +[`docs/integration-blueprints.md`](integration-blueprints.md). ## 1. If You Are Integrating ExcelAlchemy For The First Time @@ -24,6 +30,7 @@ Recommended order: Focus on these concepts first: - stable import paths +- template authoring -> preflight gate -> import runtime -> result intelligence -> artifact/delivery - schema declaration style - `storage=...` as the recommended backend integration path - the difference between import, create-or-update, and export workflows @@ -33,30 +40,43 @@ Focus on these concepts first: Recommended order: -1. [`docs/result-objects.md`](result-objects.md) -2. [`docs/api-response-cookbook.md`](api-response-cookbook.md) -3. [`examples/fastapi_reference/README.md`](../examples/fastapi_reference/README.md) -4. [`docs/public-api.md`](public-api.md) +1. [`docs/platform-architecture.md`](platform-architecture.md) +2. [`docs/integration-blueprints.md`](integration-blueprints.md) +3. [`docs/result-objects.md`](result-objects.md) +4. [`docs/api-response-cookbook.md`](api-response-cookbook.md) +5. [`examples/fastapi_reference/README.md`](../examples/fastapi_reference/README.md) +6. [`docs/public-api.md`](public-api.md) Focus on these objects: +- `ImportPreflightResult` - `ImportResult` - `CellErrorMap` - `RowIssueMap` +Use the platform docs first when you need to decide: + +- where to put preflight versus full runtime execution +- whether result payload shaping belongs in the API layer or UI layer +- how template generation and result delivery fit the same integration story + Use these payload helpers directly in your API layer: +- `ExcelAlchemy.preflight_import(...)` when the endpoint wants a lightweight structural gate before the real import - `ImportResult.to_api_payload()` - `CellErrorMap.to_api_payload()` - `RowIssueMap.to_api_payload()` +- `build_frontend_remediation_payload(...)` when the client wants compact retry guidance ## 3. If You Are Building Frontend Error Displays Recommended order: -1. [`docs/result-objects.md`](result-objects.md) -2. [`docs/api-response-cookbook.md`](api-response-cookbook.md) -3. [`examples/fastapi_reference/README.md`](../examples/fastapi_reference/README.md) +1. [`docs/platform-architecture.md`](platform-architecture.md) +2. [`docs/integration-blueprints.md`](integration-blueprints.md) +3. [`docs/result-objects.md`](result-objects.md) +4. [`docs/api-response-cookbook.md`](api-response-cookbook.md) +5. [`examples/fastapi_reference/README.md`](../examples/fastapi_reference/README.md) Focus on these payload fields: @@ -77,6 +97,19 @@ And these grouped or summary helpers: - `grouped.messages_by_row` - `grouped.messages_by_code` +If the frontend wants a compact retry-oriented payload instead of deriving its +own remediation summary, also inspect: + +- `build_frontend_remediation_payload(...)` +- `remediation.suggested_action` +- `remediation.fix_hint` + +Use the blueprint doc when the frontend flow depends on: + +- a preflight-first upload experience +- result workbook download links +- compact remediation guidance in addition to the stable result surfaces + ## 4. If You Want Copyable Reference Code Start here: diff --git a/docs/invariants.md b/docs/invariants.md new file mode 100644 index 0000000..3c5ed06 --- /dev/null +++ b/docs/invariants.md @@ -0,0 +1,166 @@ +# Invariants + +This document records important invariants and behavioral constraints that are visible in the repository today. +It is based on source code, tests, and existing documentation. + +For API boundaries, see [`docs/public-api.md`](public-api.md). +For domain vocabulary, see [`docs/domain-model.md`](domain-model.md). +For component structure, see [`docs/architecture.md`](architecture.md). + +## Related docs + +- [../AGENTS.md](../AGENTS.md) for repository-local guidance on changing behavior safely. +- [repo-map.md](repo-map.md) for a directory-level map of the code and docs behind these constraints. +- [domain-model.md](domain-model.md) for the concepts these invariants attach to. +- [../src/excelalchemy/README.md](../src/excelalchemy/README.md) for the implementation view of the main package. +- [../tests/README.md](../tests/README.md) for where these invariants are enforced. + +## 1. Public API invariants + +- **Stable public imports come from the package root and a small set of public modules.** + - Why it matters: new code and documentation should prefer the supported public surface instead of internal modules that may change without notice. + - Relevant files: `docs/public-api.md`, `src/excelalchemy/__init__.py`, `src/excelalchemy/config.py`, `src/excelalchemy/metadata.py`, `src/excelalchemy/results.py`, `src/excelalchemy/exceptions.py` + +- **`ExcelStorage` and `storage=...` are the recommended 2.x backend integration contract.** + - Why it matters: storage is modeled as a protocol boundary, and new code should not treat Minio-specific config as the primary architecture. + - Relevant files: `docs/public-api.md`, `MIGRATIONS.md`, `src/excelalchemy/core/storage_protocol.py`, `src/excelalchemy/config.py` + +- **`ImportResult`, `CellErrorMap`, and `RowIssueMap` are first-class public result surfaces.** + - Why it matters: API and frontend integrations are expected to build on these objects and their helper methods rather than internal runtime state. + - Relevant files: `docs/result-objects.md`, `docs/api-response-cookbook.md`, `src/excelalchemy/results.py`, `tests/contracts/test_result_contract.py` + +- **The recommended import inspection names are `worksheet_table`, `header_table`, `cell_error_map`, and `row_error_map`.** + - Why it matters: these names are the forward-looking 2.x terminology; older aliases exist for compatibility but are not the preferred naming path. + - Relevant files: `docs/public-api.md`, `MIGRATIONS.md`, `src/excelalchemy/core/alchemy.py`, `src/excelalchemy/core/import_session.py` + +- **Template/export APIs return explicit Excel payload types.** + - Why it matters: callers can rely on `download_template()` and `export()` returning prefixed data URLs, while `download_template_artifact()` and `export_artifact()` return binary-friendly `ExcelArtifact` objects. + - Relevant files: `src/excelalchemy/core/abstract.py`, `src/excelalchemy/core/alchemy.py`, `src/excelalchemy/artifacts.py`, `tests/contracts/test_template_contract.py`, `tests/contracts/test_export_contract.py` + +## 2. Behavioral invariants + +- **Template generation does not require a configured storage backend.** + - Why it matters: template rendering is a pure render path; only import workbook reads and upload paths depend on storage. + - Relevant files: `tests/contracts/test_storage_contract.py`, `src/excelalchemy/core/alchemy.py`, `src/excelalchemy/core/storage.py` + +- **A header-invalid import ends the flow without uploading a result workbook.** + - Why it matters: invalid headers short-circuit before row execution and result-workbook upload. + - Relevant files: `tests/contracts/test_import_contract.py`, `src/excelalchemy/core/import_session.py`, `src/excelalchemy/results.py` + +- **A data-invalid import uploads a result workbook and reports a download URL.** + - Why it matters: row-level failures are expected to produce a workbook that can be returned to users for correction. + - Relevant files: `tests/contracts/test_import_contract.py`, `tests/contracts/test_storage_contract.py`, `src/excelalchemy/core/import_session.py` + +- **Import result workbooks prepend result and reason columns and mark failures visually.** + - Why it matters: the library’s failure feedback is workbook-facing, not just API-facing. + - Relevant files: `tests/contracts/test_import_contract.py`, `tests/contracts/test_core_components_contract.py`, `src/excelalchemy/core/rows.py`, `src/excelalchemy/core/writer.py` + +- **Explicit `storage` takes precedence over legacy Minio settings when both are supplied.** + - Why it matters: this preserves the recommended storage abstraction path while still allowing legacy compatibility fields to coexist during migration. + - Relevant files: `tests/contracts/test_storage_contract.py`, `tests/unit/test_config_options.py`, `src/excelalchemy/core/storage.py`, `src/excelalchemy/config.py` + +- **Uploaded workbook payloads remain binary `.xlsx` content, not prefixed data URLs.** + - Why it matters: storage backends are expected to receive workbook bytes, not browser-oriented `data:` URL strings. + - Relevant files: `tests/contracts/test_storage_contract.py`, `src/excelalchemy/core/storage_minio.py`, `src/excelalchemy/util/file.py` + +- **Each import run reloads workbook state and tracks a fresh session snapshot.** + - Why it matters: the facade is long-lived, but import runtime state is one-shot and should not leak from one run into the next. + - Relevant files: `tests/contracts/test_import_contract.py`, `src/excelalchemy/core/import_session.py`, `src/excelalchemy/core/alchemy.py` + +- **Generated templates do not rely on Excel data-validation rules.** + - Why it matters: workbook guidance is encoded in comments, formatting, and runtime validation rather than native Excel validation lists/rules. + - Relevant files: `tests/contracts/test_template_contract.py`, `src/excelalchemy/core/rendering.py`, `src/excelalchemy/core/writer.py` + +## 3. Data and contract invariants + +- **Excel metadata is attached to Pydantic fields without turning the field object into `FieldMetaInfo`.** + - Why it matters: metadata remains decoupled from Pydantic field internals, which is part of the repository’s Pydantic v2 boundary design. + - Relevant files: `tests/contracts/test_pydantic_contract.py`, `src/excelalchemy/metadata.py`, `src/excelalchemy/helper/pydantic.py` + +- **Schema extraction flattens composite fields into ordered unique labels, keys, and offsets.** + - Why it matters: merged headers, error targeting, and row reconstruction all depend on this flattened layout being stable and explicit. + - Relevant files: `tests/contracts/test_pydantic_contract.py`, `tests/contracts/test_core_components_contract.py`, `src/excelalchemy/core/schema.py`, `src/excelalchemy/metadata.py` + +- **Repeated child labels are valid when they belong to different parent labels.** + - Why it matters: merged headers are identified by parent-plus-child identity, not by child label alone. + - Relevant files: `tests/contracts/test_core_components_contract.py`, `src/excelalchemy/core/headers.py`, `src/excelalchemy/_primitives/header_models.py` + +- **Row aggregation reconstructs composite columns back into parent-shaped payloads.** + - Why it matters: flattened worksheet columns are not the final import payload; they must be grouped back into the logical model structure before validation and callbacks. + - Relevant files: `tests/contracts/test_core_components_contract.py`, `src/excelalchemy/core/rows.py` + +- **Storage readers are expected to return `WorksheetTable` from `src/excelalchemy/core/table.py` and preserve merged-header gaps as empty cells.** + - Why it matters: header parsing depends on structural empties being preserved instead of collapsed away, and custom storage implementations follow the same table contract in the current 2.x line. + - Relevant files: `tests/contracts/test_storage_contract.py`, `src/excelalchemy/core/storage_protocol.py`, `src/excelalchemy/core/storage_minio.py`, `src/excelalchemy/core/table.py`, `examples/custom_storage.py` + +- **`ImportResult` has exactly three top-level result states: `SUCCESS`, `HEADER_INVALID`, and `DATA_INVALID`.** + - Why it matters: downstream integrations and status helpers rely on this fixed result vocabulary. + - Relevant files: `src/excelalchemy/results.py`, `tests/contracts/test_result_contract.py` + +- **`ImportResult.from_validate_header_result(...)` is only valid for failed header validation.** + - Why it matters: header validation and import execution are separate phases, and the conversion helper is intentionally constrained to invalid-header outcomes. + - Relevant files: `tests/contracts/test_result_contract.py`, `src/excelalchemy/results.py` + +- **Pydantic field-level validation errors become `ExcelCellError`; model-level validation errors become `ExcelRowError`.** + - Why it matters: the library preserves the distinction between cell-specific problems and row-wide business-rule failures. + - Relevant files: `tests/contracts/test_pydantic_contract.py`, `src/excelalchemy/helper/pydantic.py`, `src/excelalchemy/exceptions.py` + +- **Missing-field and field-format validation messages are normalized into workbook-facing ExcelAlchemy errors.** + - Why it matters: the repo treats error wording as part of the import contract rather than exposing raw Pydantic messages directly. + - Relevant files: `tests/contracts/test_pydantic_contract.py`, `src/excelalchemy/helper/pydantic.py` + +## 4. Localization and formatting invariants + +- **Runtime exception messages are English-first in the 2.x line.** + - Why it matters: Python-facing error text is intentionally stabilized in English even when workbook-facing text is localized. + - Relevant files: `docs/locale.md`, `MIGRATIONS.md`, `src/excelalchemy/i18n/messages.py` + +- **Workbook display locales currently support `zh-CN` and `en`, with `zh-CN` as the default workbook locale.** + - Why it matters: workbook text is locale-aware, but the supported locale set is intentionally narrow and explicit today. + - Relevant files: `docs/locale.md`, `src/excelalchemy/config.py`, `src/excelalchemy/i18n/messages.py` + +- **Workbook-facing text falls back to the workbook default locale, while runtime and diagnostics fall back to English.** + - Why it matters: different message layers have different fallback policies, and callers should not assume one global locale switch covers all output. + - Relevant files: `docs/locale.md`, `src/excelalchemy/i18n/messages.py` + +- **Workbook display locale controls import instructions, header comments, result/reason labels, row status text, and workbook-facing value text.** + - Why it matters: localization is not limited to a few labels; it affects the visible workbook contract. + - Relevant files: `docs/locale.md`, `tests/contracts/test_template_contract.py`, `tests/contracts/test_import_contract.py`, `src/excelalchemy/core/alchemy.py`, `src/excelalchemy/core/writer.py` + +- **Developer diagnostics use named loggers and stable English log messages.** + - Why it matters: logs are treated as a developer/operator surface distinct from API payloads and workbook display text. + - Relevant files: `docs/locale.md`, `tests/unit/test_diagnostics_logging.py`, `src/excelalchemy/_primitives/diagnostics.py`, `src/excelalchemy/codecs/base.py` + +- **Required template headers are visually distinguished and annotated with comments.** + - Why it matters: user guidance in the workbook is part of the contract, not an optional extra. + - Relevant files: `tests/contracts/test_template_contract.py`, `src/excelalchemy/metadata.py`, `src/excelalchemy/core/writer.py`, `src/excelalchemy/const.py` + +- **Import result workbooks highlight failed cells in red.** + - Why it matters: workbook feedback must remain visible and actionable for users correcting invalid rows. + - Relevant files: `tests/contracts/test_import_contract.py`, `src/excelalchemy/core/writer.py`, `src/excelalchemy/const.py` + +- **`excelalchemy.const` compatibility constants represent stable `zh-CN` defaults, not the full locale policy.** + - Why it matters: locale-aware behavior should be driven by config locale, not by reading those constants as the source of truth. + - Relevant files: `docs/locale.md`, `src/excelalchemy/const.py`, `src/excelalchemy/config.py` + +## 5. Backward compatibility expectations + +- **Deprecated compatibility modules remain available in the 2.x line but emit explicit deprecation warnings.** + - Why it matters: migration paths are still supported, but callers are expected to move toward the newer public module layout. + - Relevant files: `docs/public-api.md`, `MIGRATIONS.md`, `tests/unit/test_deprecation_policy.py`, `src/excelalchemy/types/`, `src/excelalchemy/exc.py`, `src/excelalchemy/identity.py`, `src/excelalchemy/header_models.py` + +- **The `excelalchemy.types.*` compatibility namespace is scheduled for removal in ExcelAlchemy 3.0.** + - Why it matters: it is preserved for 2.x migrations only and should not be treated as a long-term public namespace. + - Relevant files: `docs/public-api.md`, `MIGRATIONS.md`, `tests/unit/test_deprecation_policy.py` + +- **Legacy Minio config fields still work in 2.x, but they emit deprecation warnings and are no longer the recommended path.** + - Why it matters: existing integrations continue to function, but new code should move to `storage=...`. + - Relevant files: `MIGRATIONS.md`, `docs/public-api.md`, `tests/unit/test_config_options.py`, `src/excelalchemy/config.py` + +- **Older import-inspection aliases remain available in 2.x.** + - Why it matters: `df`, `header_df`, `cell_errors`, and `row_errors` still work as compatibility paths, even though the clearer names are preferred in new code. + - Relevant files: `docs/public-api.md`, `MIGRATIONS.md`, `src/excelalchemy/core/alchemy.py`, `src/excelalchemy/core/import_session.py` + +- **Compatibility warnings are expected to point to replacement import paths.** + - Why it matters: the deprecation layer is not just a warning mechanism; it is part of the migration guidance built into the codebase. + - Relevant files: `tests/unit/test_deprecation_policy.py`, `src/excelalchemy/_primitives/deprecation.py`, `src/excelalchemy/util/convertor.py` diff --git a/docs/platform-architecture.md b/docs/platform-architecture.md new file mode 100644 index 0000000..32bc9e2 --- /dev/null +++ b/docs/platform-architecture.md @@ -0,0 +1,264 @@ +# Platform Architecture + +This page describes the import platform layer in ExcelAlchemy 2.x. +It sits above the internal component map documented in +[`docs/architecture.md`](architecture.md). + +Use this page when you want to answer: + +- what the import platform capabilities are +- how those capabilities compose into one workflow +- which public APIs belong to each capability layer +- how the platform view differs from the internal component view + +If you want the internal module breakdown, see +[`docs/architecture.md`](architecture.md). +If you want the runtime sequence in more detail, see +[`docs/runtime-model.md`](runtime-model.md). +If you want integration examples and blueprint-style guidance, see +[`docs/integration-blueprints.md`](integration-blueprints.md). + +## Platform Model + +ExcelAlchemy’s import platform is best understood as five capability stages: + +1. template authoring +2. preflight gate +3. import runtime +4. result intelligence +5. artifact and delivery + +These stages are layered on top of the existing facade-and-collaborators design. +They do not replace the internal architecture. They group the current public +capabilities into an integration-oriented model. + +```mermaid +flowchart LR + A[Template Authoring] + B[Preflight Gate] + C[Import Runtime] + D[Result Intelligence] + E[Artifact and Delivery] + + A --> B --> C --> D --> E + + S[ExcelStorage] + L[Locale] + F[ExcelAlchemy Facade] + + F --- A + F --- B + F --- C + F --- D + F --- E + + S --- B + S --- C + S --- E + L --- A + L --- E +``` + +## Capability Layers + +### 1. Template Authoring + +Purpose: + +- define the workbook contract before upload +- make the template self-explanatory for spreadsheet users + +Primary public surfaces: + +- schema models +- `FieldMeta(...)` +- `ExcelMeta(...)` +- `ExcelAlchemy.download_template(...)` +- `ExcelAlchemy.download_template_artifact(...)` + +Current capability boundary: + +- additive workbook guidance such as `hint` and `example_value` +- workbook-facing labels, ordering, and field semantics +- no row execution +- no upload validation + +Internal alignment: + +- `src/excelalchemy/metadata.py` +- `src/excelalchemy/core/schema.py` +- `src/excelalchemy/core/rendering.py` +- `src/excelalchemy/core/writer.py` +- `src/excelalchemy/codecs/` + +### 2. Preflight Gate + +Purpose: + +- answer whether a workbook is structurally importable before full execution + +Primary public surfaces: + +- `ExcelAlchemy.preflight_import(...)` +- `ImportPreflightResult` + +Current capability boundary: + +- sheet existence +- header validity +- lightweight structural checks +- estimated row count +- no row-level validation +- no callback execution +- no remediation payload construction + +Internal alignment: + +- `src/excelalchemy/core/preflight.py` +- `src/excelalchemy/core/headers.py` +- `src/excelalchemy/core/schema.py` +- `src/excelalchemy/core/storage_protocol.py` + +### 3. Import Runtime + +Purpose: + +- execute the real import flow +- keep runtime visibility additive and synchronous + +Primary public surfaces: + +- `ExcelAlchemy.import_data(..., on_event=...)` +- `ImporterConfig.for_create(...)` +- `ImporterConfig.for_update(...)` +- `ImporterConfig.for_create_or_update(...)` +- `ImportMode` + +Current capability boundary: + +- row preparation and validation +- create/update/create-or-update dispatch +- inline lifecycle events +- result workbook rendering decisions +- no job framework +- no streaming runtime model + +Internal alignment: + +- `src/excelalchemy/core/import_session.py` +- `src/excelalchemy/core/executor.py` +- `src/excelalchemy/core/rows.py` +- `src/excelalchemy/helper/pydantic.py` + +### 4. Result Intelligence + +Purpose: + +- turn one import run into structured signals for APIs, admin tools, and + frontend remediation flows + +Primary public surfaces: + +- `ImportResult` +- `CellErrorMap` +- `RowIssueMap` +- `build_frontend_remediation_payload(...)` + +Current capability boundary: + +- top-level outcome classification +- header issue exposure +- cell-level and row-level issue inspection +- grouped summaries and API payload helpers +- conservative, opt-in remediation guidance + +Internal alignment: + +- `src/excelalchemy/results.py` +- issue production paths in `src/excelalchemy/core/rows.py` +- execution result mapping in `src/excelalchemy/core/executor.py` + +### 5. Artifact and Delivery + +Purpose: + +- deliver platform outputs to callers, storage backends, and downstream systems + +Primary public surfaces: + +- `ExcelArtifact` +- template artifact helpers +- result workbook URL on `ImportResult` +- `ExcelStorage` + +Current capability boundary: + +- template bytes or artifact delivery +- result workbook upload and URL return +- storage-backed input and output handoff +- no storage product lock-in + +Internal alignment: + +- `src/excelalchemy/artifacts.py` +- `src/excelalchemy/core/rendering.py` +- `src/excelalchemy/core/writer.py` +- `src/excelalchemy/core/storage_protocol.py` +- `src/excelalchemy/core/storage.py` + +## Relationship To The Internal Architecture + +The platform model is not a second implementation architecture. +It is a reader-facing view of the current system. + +```mermaid +flowchart TD + P[Platform Layer] + P --> A[Template Authoring] + P --> B[Preflight Gate] + P --> C[Import Runtime] + P --> D[Result Intelligence] + P --> E[Artifact and Delivery] + + A --> A1[metadata.py] + A --> A2[schema.py] + A --> A3[rendering.py / writer.py] + + B --> B1[preflight.py] + B --> B2[headers.py] + B --> B3[storage_protocol.py] + + C --> C1[alchemy.py] + C --> C2[import_session.py] + C --> C3[executor.py] + C --> C4[rows.py] + C --> C5[helper/pydantic.py] + + D --> D1[results.py] + D --> D2[rows.py] + D --> D3[executor.py] + + E --> E1[artifacts.py] + E --> E2[rendering.py / writer.py] + E --> E3[storage.py] +``` + +Use the platform view when you are integrating the library. +Use the internal view when you are changing implementation behavior. + +## What This Page Does Not Claim + +- It does not introduce a new async or job execution model. +- It does not claim that preflight replaces import execution. +- It does not claim that remediation is part of the runtime pipeline. +- It does not promote internal modules as stable application-facing APIs. +- It does not change the 2.x compatibility boundaries documented in + [`docs/public-api.md`](public-api.md). + +## Recommended Reading + +- [`docs/runtime-model.md`](runtime-model.md) +- [`docs/integration-blueprints.md`](integration-blueprints.md) +- [`docs/public-api.md`](public-api.md) +- [`docs/result-objects.md`](result-objects.md) +- [`docs/architecture.md`](architecture.md) diff --git a/docs/public-api.md b/docs/public-api.md index 04991e9..717c9fe 100644 --- a/docs/public-api.md +++ b/docs/public-api.md @@ -6,6 +6,10 @@ ones should be treated as internal implementation details. If you want the quickest path into the library, start with [`docs/getting-started.md`](getting-started.md). +If you want the import platform model first, see +[`docs/platform-architecture.md`](platform-architecture.md) +and +[`docs/runtime-model.md`](runtime-model.md). If you want a role-based reading path, see [`docs/integration-roadmap.md`](integration-roadmap.md). If you want concrete repository examples, see @@ -17,6 +21,23 @@ If you want result-object guidance for backend or frontend integration, see If you want copyable backend response shapes, see [`docs/api-response-cookbook.md`](api-response-cookbook.md). +## Import Platform At A Glance + +The stable public import workflow in the 2.x line is: + +1. template authoring +2. preflight gate +3. import runtime +4. result intelligence +5. artifact and delivery + +This page documents the public APIs that participate in those stages. +It does not replace the more detailed platform docs: + +- [`docs/platform-architecture.md`](platform-architecture.md) +- [`docs/runtime-model.md`](runtime-model.md) +- [`docs/integration-blueprints.md`](integration-blueprints.md) + ## Stable Public Modules These modules are the recommended import paths for application code: @@ -30,9 +51,12 @@ These modules are the recommended import paths for application code: - `excelalchemy.metadata` Public metadata entry points such as `FieldMeta(...)`, `ExcelMeta(...)`, and `PatchFieldMeta`. + Template guidance metadata such as `hint=` and `example_value=` is part of + this additive public surface. - `excelalchemy.results` Structured import result models such as `ImportResult`, - `ValidateResult`, and `ValidateHeaderResult`. + `ValidateResult`, `ValidateHeaderResult`, `ImportPreflightResult`, and + `ImportPreflightStatus`. - `excelalchemy.exceptions` Stable exception module for `ConfigError`, `ExcelCellError`, `ExcelRowError`, and `ProgrammaticError`. @@ -47,6 +71,12 @@ These modules are the recommended import paths for application code: The recommended backend configuration pattern in the 2.x line. - `ExcelArtifact` The recommended return shape when you need bytes, base64, or data URLs. +- `ExcelAlchemy.import_data(..., on_event=...)` + The additive public hook for synchronous import lifecycle events during one + import run. +- `ExcelAlchemy.preflight_import(...)` + The additive public hook for lightweight structural validation before full + import execution. - import inspection names: Prefer `worksheet_table`, `header_table`, `cell_error_map`, and `row_error_map` when reading import-run state from the facade. @@ -55,6 +85,60 @@ These modules are the recommended import paths for application code: when you need frontend-friendly or API-friendly validation output. The stable helper set also includes `records()`, `summary_by_field()`, `summary_by_row()`, and `summary_by_code()` where applicable. + For a compact retry-oriented payload, `excelalchemy.results` also exposes + `build_frontend_remediation_payload(...)` as an additive helper. + +## Stable Public Surface By Platform Stage + +### Template authoring + +- schema models declared with Pydantic +- `FieldMeta(...)` +- `ExcelMeta(...)` +- template generation methods on `ExcelAlchemy` +- `ExcelArtifact` when you need template bytes, base64, or data URLs + +### Preflight gate + +- `ExcelAlchemy.preflight_import(...)` +- `ImportPreflightResult` +- `ImportPreflightStatus` + +### Import runtime + +- `ExcelAlchemy.import_data(..., on_event=...)` +- `ImporterConfig` +- `ImportMode` +- `ImporterConfig.for_create(...)` +- `ImporterConfig.for_update(...)` +- `ImporterConfig.for_create_or_update(...)` + +Important boundary: + +- `on_event=...` is an additive synchronous observability hook +- it is not a separate async or job execution model + +### Result intelligence + +- `ImportResult` +- `CellErrorMap` +- `RowIssueMap` +- `build_frontend_remediation_payload(...)` +- facade inspection names: + `worksheet_table`, `header_table`, `cell_error_map`, `row_error_map` + +Important boundary: + +- remediation payloads are opt-in additions on top of the stable result + surfaces +- they do not replace `ImportResult.to_api_payload()` or the issue-map payloads + +### Artifact and delivery + +- `ExcelArtifact` +- `ExcelStorage` +- `storage=...` +- result workbook URL exposure through `ImportResult` ## Compatibility Modules In 2.x @@ -109,11 +193,41 @@ For most application code, these are the recommended import paths: - `from excelalchemy.results import ...` Use this if you need result models or richer error-map helper types directly. +For synchronous job-style progress reporting, you can attach an event callback +to the existing import call: + +```python +job_state = {'status': 'pending', 'processed_rows': 0, 'total_rows': 0} + +def handle_import_event(event: dict[str, object]) -> None: + if event['event'] == 'started': + job_state['status'] = 'running' + elif event['event'] == 'row_processed': + job_state['processed_rows'] = event['processed_row_count'] + job_state['total_rows'] = event['total_row_count'] + elif event['event'] == 'completed': + job_state['status'] = 'completed' + job_state['result'] = event['result'] + elif event['event'] == 'failed': + job_state['status'] = 'failed' + +result = await alchemy.import_data( + 'employees.xlsx', + 'employee-import-result.xlsx', + on_event=handle_import_event, +) +``` + +This is still a synchronous import. The callback runs inline during normal +header validation, row execution, and result rendering, which makes it useful +for service-layer progress tracking without introducing a new execution model. + If you are building API responses from import failures, the recommended public result helpers are: - `CellErrorMap.to_api_payload()` - `RowIssueMap.to_api_payload()` +- `build_frontend_remediation_payload(...)` - `CellErrorMap.records()` - `RowIssueMap.records()` - `CellErrorMap.summary_by_field()` diff --git a/docs/repo-map.md b/docs/repo-map.md new file mode 100644 index 0000000..1fec2b5 --- /dev/null +++ b/docs/repo-map.md @@ -0,0 +1,365 @@ +# Repository Map + +This file is a compact map of the `ExcelAlchemy` repository. +It is meant to help both humans and coding agents find the right files before making changes. + +## Related docs + +- [../README.md](../README.md) for the public-facing overview. +- [../AGENTS.md](../AGENTS.md) for repo-local editing guidance. +- [domain-model.md](domain-model.md) for the core concepts behind these directories. +- [invariants.md](invariants.md) for behavior that should stay stable. +- [../src/excelalchemy/README.md](../src/excelalchemy/README.md) for the implementation structure inside the main package. +- [../tests/README.md](../tests/README.md) and [../examples/README.md](../examples/README.md) for the executable contract surface. + +## Top-Level Layout + +- `src/` + - Main package source for the library. + - Most code changes should start here. +- `tests/` + - Contract, integration, and unit coverage. + - Use this to confirm intended behavior before changing code. +- `examples/` + - Runnable reference workflows and a small FastAPI reference app. + - These are part of the user-facing contract, not throwaway demos. +- `docs/` + - Markdown documentation for architecture, public API, result objects, limits, performance, and onboarding. +- `scripts/` + - Smoke checks and asset-generation helpers used to validate docs, examples, and package behavior. +- `files/` + - Example workbooks and generated example outputs used by docs and smoke scripts. +- `images/` + - Screenshots used by `README.md` and `README-pypi.md`. +- `.github/` + - CI, publish workflows, and issue/PR templates. + +## Important Root Files + +- `README.md` + - Main project overview and best entry point for understanding the library. +- `README-pypi.md` + - PyPI-facing summary; should stay aligned with the main onboarding story. +- `README_cn.md` + - Chinese-language README. +- `ABOUT.md` + - Design rationale, architectural intent, and evolution notes. +- `MIGRATIONS.md` + - Compatibility and upgrade guidance for the 2.x line. +- `CHANGELOG.md` + - Release history and notable behavior/documentation changes. +- `AGENTS.md` + - Agent-focused guidance for safe navigation and modification. +- `pyproject.toml` + - Packaging, dependencies, Ruff, Pyright, and pytest configuration. +- `uv.lock` + - Locked dependency state for `uv`. + +## Package Source: `src/excelalchemy/` + +- `src/excelalchemy/__init__.py` + - Main public package surface. + - Re-exports the facade, configs, codecs, result objects, exceptions, and common types. +- `src/excelalchemy/config.py` + - Public configuration objects: + - `ImporterConfig` + - `ExporterConfig` + - `ImportMode` +- `src/excelalchemy/metadata.py` + - Public field metadata entry points: + - `FieldMeta(...)` + - `ExcelMeta(...)` + - Also contains the layered metadata model behind `FieldMetaInfo`. +- `src/excelalchemy/results.py` + - Public import result objects and API-friendly error maps: + - `ImportResult` + - `CellErrorMap` + - `RowIssueMap` +- `src/excelalchemy/exceptions.py` + - Public exceptions such as `ConfigError`, `ExcelCellError`, `ExcelRowError`, and `ProgrammaticError`. +- `src/excelalchemy/artifacts.py` + - Public `ExcelArtifact` wrapper for bytes, data URLs, and related helpers. + +## Internal Implementation: `src/excelalchemy/core/` + +- `src/excelalchemy/core/alchemy.py` + - Main facade implementation behind `excelalchemy.ExcelAlchemy`. + - Good starting point for understanding how import, export, template generation, and storage fit together. +- `src/excelalchemy/core/import_session.py` + - One-shot import runtime state and lifecycle. + - Central to the import execution path. +- `src/excelalchemy/core/schema.py` + - Builds flattened Excel-facing schema layout from Pydantic models. +- `src/excelalchemy/core/headers.py` + - Parses and validates simple and merged workbook headers. +- `src/excelalchemy/core/rows.py` + - Aggregates worksheet rows back into model-shaped payloads and tracks row/cell issues. +- `src/excelalchemy/core/executor.py` + - Validates row payloads and dispatches create/update/create-or-update callbacks. +- `src/excelalchemy/core/rendering.py` + - High-level rendering entry points for templates, exports, and import result workbooks. +- `src/excelalchemy/core/writer.py` + - Lower-level workbook writing logic used by rendering. +- `src/excelalchemy/core/storage_protocol.py` + - `ExcelStorage` protocol; the main storage extension point. +- `src/excelalchemy/core/storage.py` + - Storage gateway resolution and fallback behavior. +- `src/excelalchemy/core/storage_minio.py` + - Built-in Minio-backed storage implementation. +- `src/excelalchemy/core/table.py` + - Internal `WorksheetTable` abstraction used instead of pandas. + +## Field Codecs: `src/excelalchemy/codecs/` + +- `src/excelalchemy/codecs/base.py` + - Base codec abstractions: + - `ExcelFieldCodec` + - `CompositeExcelFieldCodec` +- `src/excelalchemy/codecs/*.py` + - Built-in field codec implementations behind public aliases such as `Email`, `NumberRange`, and `DateRange`: + - `string.py` + - `number.py` + - `date.py` + - `date_range.py` + - `money.py` + - `email.py` + - `phone_number.py` + - `url.py` + - `radio.py` + - `multi_checkbox.py` + - `organization.py` + - `staff.py` + - `tree.py` +- `src/excelalchemy/codecs/__init__.py` + - Small registry helpers for choice-oriented codecs. + +## Internal Support Modules + +- `src/excelalchemy/helper/pydantic.py` + - Pydantic adaptation boundary. + - Important for metadata extraction and mapping validation errors back into ExcelAlchemy errors. +- `src/excelalchemy/i18n/messages.py` + - Runtime and workbook display messages. + - Important when changing wording, message keys, or locale behavior. +- `src/excelalchemy/_primitives/` + - Internal constants, identity wrappers, payload aliases, diagnostics, and deprecation helpers. + - Important files include: + - `src/excelalchemy/_primitives/constants.py` + - `src/excelalchemy/_primitives/identity.py` + - `src/excelalchemy/_primitives/payloads.py` + - `src/excelalchemy/_primitives/diagnostics.py` + - `src/excelalchemy/_primitives/deprecation.py` + - `src/excelalchemy/_primitives/header_models.py` + +## Compatibility Layer + +- `src/excelalchemy/types/` + - Deprecated compatibility namespace retained in the 2.x line. +- `src/excelalchemy/exc.py` + - Compatibility shim for `excelalchemy.exceptions`. +- `src/excelalchemy/identity.py` + - Compatibility shim for root-level identity exports. +- `src/excelalchemy/header_models.py` + - Compatibility shim for internal header models. +- `src/excelalchemy/const.py` + - Compatibility/low-level constants surface. +- `src/excelalchemy/util/convertor.py` + - Deprecated compatibility shim for `excelalchemy.util.converter`. + +These compatibility paths remain in the 2.x line, but they are not the preferred starting points for new code. + +## Documentation: `docs/` + +- `docs/getting-started.md` + - Fastest path for new users. +- `docs/public-api.md` + - Stable public modules vs compatibility vs internal modules. +- `docs/architecture.md` + - Component map and workflow map. +- `docs/result-objects.md` + - Import result objects and API-facing error maps. +- `docs/api-response-cookbook.md` + - Example backend response shapes. +- `docs/locale.md` + - Locale policy for workbook-facing text and runtime messages. +- `docs/limitations.md` + - Practical runtime limits and non-goals. +- `docs/performance.md` + - Operational expectations and performance notes. +- `docs/integration-roadmap.md` + - Role-based reading path. +- `docs/tool-comparison.md` + - Positioning against other tool categories. +- `docs/repo-map.md` + - This repository map. +- `docs/releases/` + - Release notes for specific versions. + +## Examples: `examples/` + +- `examples/README.md` + - Recommended reading order for examples. +- `examples/annotated_schema.py` + - Modern `Annotated[..., ExcelMeta(...)]` declaration style. +- `examples/employee_import_workflow.py` + - Core import workflow. +- `examples/create_or_update_import.py` + - Create-or-update import mode. +- `examples/export_workflow.py` + - Export flow and artifact upload behavior. +- `examples/custom_storage.py` + - Minimal custom `ExcelStorage` example. + - Also shows the current 2.x storage seam where readers return `WorksheetTable` from `src/excelalchemy/core/table.py`. +- `examples/minio_storage.py` + - Built-in Minio path example for the current 2.x line. + - Uses internal storage modules to demonstrate the built-in Minio compatibility path, not the preferred new-code API shape. +- `examples/fastapi_upload.py` + - Single-file FastAPI integration sketch. +- `examples/fastapi_reference/` + - More complete reference layout for a backend integration: + - `app.py` + - `models.py` + - `schemas.py` + - `responses.py` + - `presenters.py` + - `services.py` + - `storage.py` + - `README.md` + +## Tests: `tests/` + +- `tests/contracts/` + - Public behavior and compatibility contracts. + - Best starting point for understanding what should remain stable. +- `tests/integration/` + - Workflow-level and example-level tests. +- `tests/unit/` + - Focused logic tests for codecs, metadata, config, diagnostics, and other helpers. +- `tests/support/` + - Shared test models, fixtures, in-memory storage, and workbook helpers. +- `tests/files/` + - Workbook fixtures used by tests. + +## Scripts: `scripts/` + +- `scripts/smoke_package.py` + - Installed-package smoke test. +- `scripts/smoke_examples.py` + - Example smoke test. +- `scripts/smoke_docs_assets.py` + - Docs and generated asset smoke test. +- `scripts/smoke_api_payload_snapshot.py` + - Stable payload snapshot smoke test. +- `scripts/generate_example_output_assets.py` + - Regenerates captured example output assets. +- `scripts/generate_portfolio_assets.py` + - Generates portfolio/demo assets. + +## Supporting Assets + +- `files/example-outputs/` + - Generated text and JSON outputs referenced by docs and smoke scripts. +- `files/*.xlsx` + - Example workbook assets. +- `images/` + - Screenshots shown in README files. + +## Public API vs Internal Implementation + +- Public API starting points: + - `src/excelalchemy/__init__.py` + - `src/excelalchemy/config.py` + - `src/excelalchemy/metadata.py` + - `src/excelalchemy/results.py` + - `src/excelalchemy/exceptions.py` + - `src/excelalchemy/codecs/` +- Internal implementation starting points: + - `src/excelalchemy/core/alchemy.py` + - `src/excelalchemy/core/import_session.py` + - `src/excelalchemy/core/schema.py` + - `src/excelalchemy/core/headers.py` + - `src/excelalchemy/core/rows.py` + - `src/excelalchemy/core/executor.py` + +## Most Important Code Paths + +- Public facade and configuration: + - `src/excelalchemy/__init__.py` + - `src/excelalchemy/config.py` + - `src/excelalchemy/metadata.py` + - `src/excelalchemy/results.py` +- Import flow: + - `src/excelalchemy/core/alchemy.py` + - `src/excelalchemy/core/import_session.py` + - `src/excelalchemy/core/headers.py` + - `src/excelalchemy/core/rows.py` + - `src/excelalchemy/core/executor.py` + - `src/excelalchemy/helper/pydantic.py` +- Export and template generation: + - `src/excelalchemy/core/alchemy.py` + - `src/excelalchemy/core/schema.py` + - `src/excelalchemy/core/rendering.py` + - `src/excelalchemy/core/writer.py` + - `src/excelalchemy/codecs/` +- Storage integration: + - `src/excelalchemy/core/storage_protocol.py` + - `src/excelalchemy/core/storage.py` + - `src/excelalchemy/core/storage_minio.py` + - `src/excelalchemy/core/table.py` + - `examples/custom_storage.py` +- Result payloads and API responses: + - `src/excelalchemy/results.py` + - `docs/result-objects.md` + - `docs/api-response-cookbook.md` + +## Likely Starting Points for Common Tasks + +- Understanding the public API: + - `README.md` + - `docs/getting-started.md` + - `docs/public-api.md` + - `src/excelalchemy/__init__.py` + - `src/excelalchemy/config.py` + - `src/excelalchemy/metadata.py` + - `src/excelalchemy/results.py` + +- Understanding import flow: + - `docs/architecture.md` + - `examples/employee_import_workflow.py` + - `src/excelalchemy/core/alchemy.py` + - `src/excelalchemy/core/import_session.py` + - `src/excelalchemy/core/headers.py` + - `src/excelalchemy/core/rows.py` + - `src/excelalchemy/core/executor.py` + - `tests/contracts/test_import_contract.py` + +- Understanding export and template generation: + - `examples/export_workflow.py` + - `src/excelalchemy/core/alchemy.py` + - `src/excelalchemy/core/schema.py` + - `src/excelalchemy/core/rendering.py` + - `src/excelalchemy/core/writer.py` + - `tests/contracts/test_template_contract.py` + - `tests/contracts/test_export_contract.py` + +- Understanding storage integration: + - `docs/public-api.md` + - `src/excelalchemy/core/storage_protocol.py` + - `src/excelalchemy/core/storage.py` + - `src/excelalchemy/core/storage_minio.py` + - `src/excelalchemy/core/table.py` + - `examples/custom_storage.py` + - `tests/contracts/test_storage_contract.py` + +- Understanding tests: + - `tests/contracts/` + - `tests/integration/` + - `tests/unit/` + - `tests/support/` + - Start with: + - `tests/contracts/test_import_contract.py` + - `tests/contracts/test_export_contract.py` + - `tests/contracts/test_template_contract.py` + - `tests/contracts/test_storage_contract.py` + - `tests/contracts/test_result_contract.py` + - `tests/contracts/test_pydantic_contract.py` diff --git a/docs/result-objects.md b/docs/result-objects.md index 40f16ee..2e7613e 100644 --- a/docs/result-objects.md +++ b/docs/result-objects.md @@ -14,15 +14,24 @@ If you want copyable success / failure / header-invalid response shapes, see The most important public result objects are: +- `ImportPreflightResult` - `ImportResult` - `CellErrorMap` - `RowIssueMap` +The public results module also exposes an additive remediation helper: + +- `build_frontend_remediation_payload(...)` + You can import them from: ```python from excelalchemy import ImportResult -from excelalchemy.results import CellErrorMap, RowIssueMap +from excelalchemy.results import ( + CellErrorMap, + RowIssueMap, + build_frontend_remediation_payload, +) ``` ## Error Payload Layers @@ -294,6 +303,100 @@ response = { } ``` +## `ImportPreflightResult` + +`ImportPreflightResult` is the high-level summary of one lightweight structural +preflight run. + +Useful fields include: + +- `status` + Overall status such as `VALID`, `HEADER_INVALID`, `SHEET_MISSING`, or + `STRUCTURE_INVALID`. +- `sheet_name` + The configured worksheet name used for preflight. +- `sheet_exists` + Whether the configured worksheet was found. +- `has_merged_header` + Whether the header block was detected as merged when readable. +- `estimated_row_count` + Estimated number of data rows for a later import run. +- `structural_issue_codes` + Stable machine-readable codes for non-header structural failures. + +Typical usage: + +```python +result = alchemy.preflight_import('employees.xlsx') + +if result.is_valid: + ... +``` + +Use preflight when you need a quick structural gate before the real import. + +Practical cases: + +- reject uploads that are missing the target sheet +- stop early when headers do not match the schema +- show a lightweight “looks importable” response before running row validation + +Do not treat preflight as a replacement for `import_data(...)`. + +Preflight does not do: + +- row-level validation +- create / update execution +- cell or row error collection +- result workbook generation + +Useful helpers: + +- `is_valid` +- `is_header_invalid` +- `is_sheet_missing` +- `is_structure_invalid` +- `to_api_payload()` + +Example payload: + +```json +{ + "status": "HEADER_INVALID", + "is_valid": false, + "is_header_invalid": true, + "is_sheet_missing": false, + "is_structure_invalid": false, + "sheet": { + "name": "Sheet1", + "exists": true, + "has_merged_header": false + }, + "summary": { + "estimated_row_count": 3, + "structural_issue_codes": [] + }, + "header_issues": { + "is_required_missing": true, + "missing_required": ["Age"], + "missing_primary": [], + "unrecognized": ["Unexpected Column"], + "duplicated": [] + } +} +``` + +Simple workflow: + +```python +preflight = alchemy.preflight_import('employees.xlsx') + +if not preflight.is_valid: + return {'preflight': preflight.to_api_payload()} + +result = await alchemy.import_data('employees.xlsx', 'employees-result.xlsx') +``` + This gives you: - a stable top-level import summary @@ -307,6 +410,109 @@ For concrete success, data-invalid, and header-invalid API response examples, see [`docs/api-response-cookbook.md`](api-response-cookbook.md). +## Front-end Remediation Payload + +`build_frontend_remediation_payload(...)` is an additive serializer for +frontend-oriented remediation flows. It does not replace the default +`to_api_payload()` helpers and does not change their output shapes. + +Use it when you want a thinner payload that answers: + +- does the user need to fix anything before retrying? +- what is the next suggested action? +- which fields or codes are the best remediation entry points? +- are there any conservative fix hints available for known issue patterns? + +Recommended usage: + +```python +from excelalchemy.results import build_frontend_remediation_payload + +result = await alchemy.import_data('employees.xlsx', 'employee-import-result.xlsx') + +remediation = build_frontend_remediation_payload( + result=result, + cell_error_map=alchemy.cell_error_map, + row_error_map=alchemy.row_error_map, +) +``` + +The remediation payload contains these top-level sections: + +- `result` + - the stable `ImportResult.to_api_payload()` output +- `remediation` + - a compact remediation summary with counts, workbook availability, and an + optional top-level `suggested_action` / `fix_hint` +- `by_field` + - field-oriented remediation summaries derived from cell-level issues +- `by_code` + - code-oriented remediation summaries derived from row-level issue grouping +- `items` + - compact issue entries for frontend rendering + +Example shape: + +```json +{ + "result": { + "result": "DATA_INVALID", + "is_success": false, + "is_header_invalid": false, + "is_data_invalid": true + }, + "remediation": { + "needs_remediation": true, + "affected_row_count": 1, + "affected_field_count": 1, + "affected_code_count": 1, + "header_issue_count": 0, + "result_workbook_available": true, + "suggested_action": "Correct the invalid rows and re-upload the workbook.", + "fix_hint": "Download the result workbook and review the highlighted rows before re-uploading." + }, + "by_field": [ + { + "field_label": "Email", + "unique_label": "Email", + "error_count": 1, + "codes": ["valid_email_required"], + "suggested_action": "Enter a complete email address and re-upload the workbook.", + "fix_hint": "Use a format such as name@example.com." + } + ], + "by_code": [ + { + "code": "valid_email_required", + "error_count": 1, + "suggested_action": "Enter a complete email address and re-upload the workbook.", + "fix_hint": "Use a format such as name@example.com." + } + ], + "items": [ + { + "scope": "cell", + "code": "valid_email_required", + "field_label": "Email", + "row_number_for_humans": 1, + "column_number_for_humans": 2, + "display_message": "【Email】Enter a valid email address, such as name@example.com", + "suggested_action": "Enter a complete email address and re-upload the workbook.", + "fix_hint": "Use a format such as name@example.com." + } + ] +} +``` + +Important constraints: + +- the remediation payload is additive and opt-in +- existing payload helpers remain the primary stable result contracts +- `suggested_action` and `fix_hint` are intentionally conservative +- unknown issue patterns may omit remediation hints entirely +- v1 uses existing result objects and issue metadata; it does not add automatic + fixing or change the import pipeline + ## Workbook Feedback vs API Feedback ExcelAlchemy is designed so the workbook result and the API response can tell diff --git a/docs/runtime-model.md b/docs/runtime-model.md new file mode 100644 index 0000000..0a5396b --- /dev/null +++ b/docs/runtime-model.md @@ -0,0 +1,194 @@ +# Runtime Model + +This page describes the runtime sequence of the ExcelAlchemy import platform. +It explains how the public workflow moves from authoring through delivery +without redefining the underlying component architecture. + +If you want the platform capability map first, see +[`docs/platform-architecture.md`](platform-architecture.md). +If you want integration-oriented blueprints, see +[`docs/integration-blueprints.md`](integration-blueprints.md). +If you want the internal component view, see +[`docs/architecture.md`](architecture.md). + +## Runtime Sequence + +The recommended mental model is: + +1. template authoring +2. preflight gate +3. import runtime +4. result intelligence +5. artifact and delivery + +This sequence reflects current 2.x behavior. +It is not a promise of new orchestration primitives. + +```mermaid +sequenceDiagram + participant App as Application + participant Facade as ExcelAlchemy + participant Storage as ExcelStorage + participant Runtime as Import Runtime + participant Results as Result Intelligence + + App->>Facade: define schema + metadata + App->>Facade: download_template_artifact(...) + Facade-->>App: template artifact + + App->>Facade: preflight_import(input_excel_name) + Facade->>Storage: read workbook table + Facade-->>App: ImportPreflightResult + + alt preflight valid + App->>Facade: import_data(..., on_event=...) + Facade->>Storage: read workbook table + Facade->>Runtime: validate + execute rows + Runtime-->>App: lifecycle events + Runtime->>Results: build result state + Results-->>App: ImportResult + issue maps + Results->>Storage: upload result workbook if produced + Storage-->>App: result workbook URL + else preflight invalid + Facade-->>App: stop before runtime execution + end +``` + +## Stage Boundaries + +### Template Authoring + +Runtime role: + +- none for uploaded workbook execution +- prepares the workbook contract and user guidance ahead of time + +Important constraints: + +- template guidance is additive +- it does not add a separate template engine +- it does not validate uploaded data by itself + +### Preflight Gate + +Runtime role: + +- fast structural decision point before full import execution + +What it does: + +- reads the workbook through configured storage +- checks sheet presence +- checks header/structure compatibility +- estimates row count + +What it does not do: + +- row-level validation +- callback dispatch +- result workbook generation +- remediation computation + +### Import Runtime + +Runtime role: + +- owns the real import execution path + +What it does: + +- loads workbook data +- parses and validates headers +- reconstructs row payloads +- validates through the configured runtime path +- dispatches create/update/upsert behavior +- emits inline lifecycle events when `on_event=...` is supplied + +What it does not do: + +- create a separate background execution framework +- stream rows as a separate public runtime model + +### Result Intelligence + +Runtime role: + +- turns execution state into stable post-import surfaces + +What it does: + +- classify the run outcome +- expose header issues +- expose row and cell issue maps +- support grouped summaries +- support optional remediation-oriented payload shaping + +What it does not do: + +- change import execution behavior +- automatically fix workbooks +- replace the default stable payloads + +### Artifact and Delivery + +Runtime role: + +- deliver the artifacts and URLs created by earlier stages + +What it does: + +- return template artifacts +- upload result workbooks when applicable +- expose result workbook URLs through the public result surface + +What it does not do: + +- define the storage product choice +- require Minio as the architecture + +## How The Runtime Model Sits On The Existing Facade + +The facade remains the public orchestration boundary. +The platform runtime model is layered on top of that boundary rather than +beside it. + +```mermaid +flowchart TD + A[Application] + A --> B[ExcelAlchemy Facade] + + B --> C[Template Methods] + B --> D[preflight_import(...)] + B --> E[import_data(..., on_event=...)] + + D --> F[Structural Gate Path] + E --> G[Import Session Path] + + G --> H[ImportResult] + G --> I[CellErrorMap / RowIssueMap] + H --> J[Artifact and Delivery] +``` + +This is why the recommended documentation split is: + +- platform architecture docs for capability and sequence +- internal architecture docs for collaborator ownership + +## Runtime Maturity And Boundaries + +The current platform is mature enough to document as a coherent model, but the +doc set should stay precise about scope: + +- lifecycle events are additive synchronous callbacks +- preflight is lightweight and structural +- remediation payloads are opt-in and conservative +- artifact delivery depends on the configured storage seam +- large imports may still need worker-style application orchestration outside + the library + +## Recommended Reading + +- [`docs/platform-architecture.md`](platform-architecture.md) +- [`docs/integration-blueprints.md`](integration-blueprints.md) +- [`docs/result-objects.md`](result-objects.md) +- [`docs/api-response-cookbook.md`](api-response-cookbook.md) diff --git a/examples/README.md b/examples/README.md index c653a28..e6e1997 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,103 +1,128 @@ # Examples -These examples are organized as a recommended learning path rather than a flat list. - -If you want a single public-facing page that combines screenshots, -representative workflows, and captured outputs, see -[`docs/examples-showcase.md`](../docs/examples-showcase.md). - -If you want a copyable reference layout rather than a single script, see -[`examples/fastapi_reference/`](fastapi_reference/README.md). - -## Recommended Reading Order - -1. `annotated_schema.py` - - Start here if you want to learn the declaration style first. - - Shows the modern `Annotated[..., Field(...), ExcelMeta(...)]` pattern. - - Best for: first-time readers who want to understand the schema declaration style. - - Output: prints the generated template filename and the declared field labels. - - Captured output: [`files/example-outputs/annotated-schema.txt`](../files/example-outputs/annotated-schema.txt) -2. `employee_import_workflow.py` - - Read this next if you want to understand the core import story. - - Shows template generation, workbook upload, import execution, and result reporting. - - Best for: backend developers implementing the basic import path. - - Output: prints success and failure counts, created row count, and uploaded result artifacts. - - Captured output: [`files/example-outputs/employee-import-workflow.txt`](../files/example-outputs/employee-import-workflow.txt) -3. `create_or_update_import.py` - - Read this after the basic import flow. - - Shows `ImporterConfig.for_create_or_update(...)` with `is_data_exist`, `creator`, and `updater`. - - Best for: admin tools and synchronization flows that mix creates and updates. - - Output: prints created row count, updated row count, and the final import result. - - Captured output: [`files/example-outputs/create-or-update-import.txt`](../files/example-outputs/create-or-update-import.txt) -4. `export_workflow.py` - - Read this once the import flow is clear. - - Shows artifact generation, export uploads, and a custom storage-backed export task. - - Best for: download centers and reporting tasks that need workbook artifacts and upload URLs. - - Output: prints artifact filename, byte size, upload URL, and uploaded object names. - - Captured output: [`files/example-outputs/export-workflow.txt`](../files/example-outputs/export-workflow.txt) -5. `custom_storage.py` - - Read this when you want to implement your own `ExcelStorage`. - - Keeps the example minimal and focused on the protocol boundary. - - Best for: teams wiring ExcelAlchemy into their own object storage layer. - - Output: prints the in-memory upload URL and uploaded byte count. - - Captured output: [`files/example-outputs/custom-storage.txt`](../files/example-outputs/custom-storage.txt) -6. `date_and_range_fields.py` - - Read this if you want to understand workbook-friendly date, date range, number range, and money fields. - - Best for: data-entry templates with compensation, effective dates, or range fields. - - Output: prints the generated template filename and the exported field labels. - - Captured output: [`files/example-outputs/date-and-range-fields.txt`](../files/example-outputs/date-and-range-fields.txt) -7. `selection_fields.py` - - Read this if your domain uses approval forms, assignments, ownership trees, or selection-heavy templates. - - Best for: approval forms, personnel assignment forms, and selection-heavy business templates. - - Output: prints the generated template filename and the declared selection field labels. - - Captured output: [`files/example-outputs/selection-fields.txt`](../files/example-outputs/selection-fields.txt) -8. `minio_storage.py` - - Read this if you need the built-in Minio path in the current 2.x line. - - This reflects the current 2.x compatibility-based Minio path rather than a future 3.x-only storage story. - - Best for: teams already using the built-in Minio compatibility path in 2.x. - - Output: prints the gateway type and confirms the built-in Minio path. -9. `fastapi_upload.py` - - Read this last as a web integration example. - - It is useful once the import and storage examples already make sense. - - Best for: backend teams exposing template download and workbook import over HTTP. - - Output: prints the import result, created row count, uploaded result artifacts, and registered FastAPI routes. -10. `fastapi_reference/` - - Read this if you want a copyable minimal reference project rather than a single-file integration sketch. - - Shows a split between route, request/response schema, service, response builder, storage, and workbook schema layers. - - Best for: teams integrating ExcelAlchemy into a real FastAPI backend. - - Output: prints the import result, created row count, uploaded artifacts, registered route paths, and structured response sections. - - Captured output: [`files/example-outputs/fastapi-reference.txt`](../files/example-outputs/fastapi-reference.txt) - -## By Goal - -- Learn the declaration style: - - `annotated_schema.py` -- Learn the core import flow: - - `employee_import_workflow.py` - - `create_or_update_import.py` -- Learn export and storage integration: - - `export_workflow.py` - - `custom_storage.py` - - `minio_storage.py` -- Learn field families: - - `date_and_range_fields.py` - - `selection_fields.py` -- Learn web integration: - - `fastapi_upload.py` - - `fastapi_reference/` - -## Storage and Backend Integration - -- `custom_storage.py` - - Shows a minimal custom `ExcelStorage` implementation for export uploads. -- `export_workflow.py` - - Shows a realistic export flow with artifact generation and upload. -- `minio_storage.py` - - Shows the built-in Minio-backed storage path currently available in the 2.x line. -- `fastapi_upload.py` - - Shows a FastAPI integration sketch for template download and workbook import. -- `fastapi_reference/` - - Shows a minimal reference-project layout with route, service, storage, and schema modules. +This directory contains runnable examples for the current 2.x API shape. +Use them to understand how the library is intended to be used from application code. + +## Related docs + +- [../README.md](../README.md) for the main user-facing overview. +- [../docs/public-api.md](../docs/public-api.md) for the supported public import surface. +- [../docs/repo-map.md](../docs/repo-map.md) for where examples fit into the repository. +- [../src/excelalchemy/README.md](../src/excelalchemy/README.md) for the implementation details behind the public workflows shown here. +- [../tests/README.md](../tests/README.md) for the smoke and integration coverage that treats examples as contract material. + +## What These Examples Demonstrate + +- The recommended public API path: + - public imports from `excelalchemy` + - `storage=...` as the preferred backend integration path + - facade methods on `ExcelAlchemy` +- The main user-facing workflows in this repository: + - schema declaration + - template generation + - import validation + - create-or-update import + - export generation + - storage integration + - backend/API integration + +## Example Groups + +### Core workflow demos + +- `examples/annotated_schema.py` + - Demonstrates the modern `Annotated[..., Field(...), ExcelMeta(...)]` declaration style. + - Shows additive template UX metadata such as `hint=` and `example_value=`. + - Type: demo of the recommended declaration style. + +- `examples/employee_import_workflow.py` + - Demonstrates the basic import flow from template generation through preflight, lifecycle-event observation, and import result handling. + - Type: runnable workflow demo. + +- `examples/create_or_update_import.py` + - Demonstrates `ImporterConfig.for_create_or_update(...)`. + - Type: runnable workflow demo. + +- `examples/export_workflow.py` + - Demonstrates export artifact generation and upload behavior. + - Type: runnable workflow demo. + +### Field-family demos + +- `examples/date_and_range_fields.py` + - Demonstrates date, date range, number range, and money-style workbook fields. + - Type: focused field-behavior demo. + +- `examples/selection_fields.py` + - Demonstrates option-driven field families such as radio, tree, organization, and staff selection fields. + - Type: focused field-behavior demo. + +### Storage and compatibility examples + +- `examples/custom_storage.py` + - Demonstrates a minimal custom `ExcelStorage` implementation. + - Type: reference integration example. + +- `examples/minio_storage.py` + - Demonstrates the built-in Minio path that still exists in the current 2.x line. + - Type: compatibility example, not the preferred new-code path. + +### Backend integration examples + +- `examples/fastapi_upload.py` + - Demonstrates a compact FastAPI integration sketch. + - Type: lightweight integration demo. + +- `examples/fastapi_reference/` + - Demonstrates a more structured FastAPI-oriented reference layout with route, service, response, schema, and storage separation. + - Type: reference integration example. + +## How These Examples Should Be Used + +- Use them as: + - runnable demonstrations of the current recommended API shape + - copyable starting points for integration work + - smoke-tested examples of how the public facade is expected to behave + +- Do not treat them as: + - internal architecture documentation + - the only supported way to structure an application + - a guarantee that every example pattern is equally preferred + +## What An Agent Should Infer + +- The examples are part of the user-facing contract of this repository. +- The examples generally reflect the preferred 2.x usage story. +- `examples/custom_storage.py` and `examples/fastapi_reference/` are the best examples for real integration patterns. +- `examples/custom_storage.py` is the narrow case where application-side example code currently touches `src/excelalchemy/core/table.py`, because `ExcelStorage.read_excel_table(...)` uses `WorksheetTable` in the 2.x line. +- `examples/minio_storage.py` is intentionally a compatibility-oriented example for the current 2.x line. + +## What An Agent Should Not Infer + +- Do not infer that internal modules under `src/excelalchemy/core/` are the intended application import path just because an example touches a low-level concept. +- Do not generalize the imports in `examples/minio_storage.py` into the recommended application API; that example exists to show the built-in Minio compatibility path. +- Do not infer that legacy Minio config fields are the preferred new-code path; the repo docs prefer `storage=...`. +- Do not infer that examples are exploratory or disposable; they are covered by smoke-style tests. +- Do not infer that example output text can change freely; docs and generated assets depend on it. + +## When Example Changes Require Other Updates + +- If example behavior changes, also inspect: + - `tests/integration/test_examples_smoke.py` + - `scripts/smoke_examples.py` + - `docs/examples-showcase.md` + - `README.md` + - `README-pypi.md` + +- If printed example output changes intentionally, also update: + - `files/example-outputs/` + - `scripts/generate_example_output_assets.py` + - `scripts/smoke_docs_assets.py` + +- If example API payloads or integration response shapes change, also inspect: + - `docs/result-objects.md` + - `docs/api-response-cookbook.md` + - `examples/fastapi_reference/README.md` ## How To Run @@ -116,31 +141,7 @@ uv run python examples/fastapi_upload.py uv run python -m examples.fastapi_reference.app ``` -If you want to run the FastAPI app itself, install FastAPI first and then run -your preferred ASGI server against `examples.fastapi_upload:app`. - -For the reference-project version, use: - -```bash -uv run uvicorn examples.fastapi_reference.app:app --reload -``` - -If you want to smoke-test the web integration without running a server, execute: - -```bash -uv run python examples/fastapi_upload.py -``` +If you want the visual showcase and captured outputs that correspond to these examples, see: -## Notes - -- The examples intentionally use in-memory storage so they stay self-contained. -- They are meant to show the recommended public API shape for the stable 2.x - line. -- If you want a production backend, prefer `storage=...` with - `MinioStorageGateway` or your own `ExcelStorage` implementation. -- The built-in `minio_storage.py` example reflects the current 2.x Minio path, - which still uses the compatibility configuration fields under the hood. -- The smoke tests in `tests/integration/test_examples_smoke.py` cover the main - example entry points directly. -- Fixed example output assets are generated by - `scripts/generate_example_output_assets.py`. +- `docs/examples-showcase.md` +- `files/example-outputs/` diff --git a/examples/annotated_schema.py b/examples/annotated_schema.py index 78f5f45..c00b60e 100644 --- a/examples/annotated_schema.py +++ b/examples/annotated_schema.py @@ -11,13 +11,18 @@ class EmployeeImporter(BaseModel): full_name: Annotated[ String, Field(min_length=2), - ExcelMeta(label='Full name', order=1, hint='Use the legal name'), + ExcelMeta(label='Full name', order=1, hint='Use the legal name', example_value='Alice Chen'), ] age: Annotated[Number, Field(ge=18), ExcelMeta(label='Age', order=2)] work_email: Annotated[ Email, Field(min_length=8), - ExcelMeta(label='Work email', order=3, hint='Use your company email address'), + ExcelMeta( + label='Work email', + order=3, + hint='Use your company email address', + example_value='alice.chen@company.com', + ), ] diff --git a/examples/employee_import_workflow.py b/examples/employee_import_workflow.py index d704c80..43b0227 100644 --- a/examples/employee_import_workflow.py +++ b/examples/employee_import_workflow.py @@ -13,6 +13,7 @@ ExcelStorage, FieldMeta, ImporterConfig, + ImportPreflightResult, ImportResult, Number, String, @@ -82,9 +83,23 @@ async def create_employee(row: dict[str, object], context: dict[str, object] | N return row -async def run_workflow() -> tuple[ImportResult, InMemoryImportStorage, dict[str, object]]: +async def run_workflow() -> tuple[ + ImportPreflightResult, + ImportResult, + InMemoryImportStorage, + dict[str, object], + list[dict[str, object]], +]: storage = InMemoryImportStorage() - context: dict[str, object] = {'created_rows': []} + context: dict[str, object] = { + 'created_rows': [], + 'job_progress': { + 'status': 'pending', + 'processed_rows': 0, + 'total_rows': 0, + }, + } + events: list[dict[str, object]] = [] alchemy = ExcelAlchemy( ImporterConfig.for_create( @@ -98,22 +113,53 @@ async def run_workflow() -> tuple[ImportResult, InMemoryImportStorage, dict[str, template = alchemy.download_template_artifact(filename='employee-template.xlsx') _build_import_fixture(storage, template.as_bytes()) - result = await alchemy.import_data('employee-import.xlsx', 'employee-import-result.xlsx') - return result, storage, context + preflight = alchemy.preflight_import('employee-import.xlsx') + assert preflight.is_valid + + def handle_import_event(event: dict[str, object]) -> None: + events.append(event) + job_progress = context['job_progress'] + assert isinstance(job_progress, dict) + + match event['event']: + case 'started': + job_progress['status'] = 'running' + case 'row_processed': + job_progress['processed_rows'] = event['processed_row_count'] + job_progress['total_rows'] = event['total_row_count'] + case 'completed': + job_progress['status'] = 'completed' + job_progress['result'] = event['result'] + job_progress['result_workbook_url'] = event['url'] + case 'failed': + job_progress['status'] = 'failed' + job_progress['error'] = event['error_message'] + + result = await alchemy.import_data( + 'employee-import.xlsx', + 'employee-import-result.xlsx', + on_event=handle_import_event, + ) + return preflight, result, storage, context, events def main() -> None: - result, storage, context = asyncio.run(run_workflow()) + preflight, result, storage, context, events = asyncio.run(run_workflow()) created_rows = context['created_rows'] + job_progress = context['job_progress'] assert isinstance(created_rows, list) + assert isinstance(job_progress, dict) print('Employee import workflow completed') + print(f'Preflight: {preflight.status}') print(f'Result: {result.result}') print(f'Success rows: {result.success_count}') print(f'Failed rows: {result.fail_count}') print(f'Result workbook URL: {result.url}') print(f'Created rows: {len(created_rows)}') print(f'Uploaded artifacts: {sorted(storage.uploaded)}') + print(f'Observed events: {[event["event"] for event in events]}') + print(f'Job progress: {job_progress}') if __name__ == '__main__': diff --git a/examples/fastapi_reference/README.md b/examples/fastapi_reference/README.md index f741401..eab99f2 100644 --- a/examples/fastapi_reference/README.md +++ b/examples/fastapi_reference/README.md @@ -160,6 +160,25 @@ Example JSON response: "by_code": [] } }, + "remediation": { + "result": { + "result": "SUCCESS", + "is_success": true, + "is_header_invalid": false, + "is_data_invalid": false + }, + "remediation": { + "needs_remediation": false, + "affected_row_count": 0, + "affected_field_count": 0, + "affected_code_count": 0, + "header_issue_count": 0, + "result_workbook_available": true + }, + "by_field": [], + "by_code": [], + "items": [] + }, "created_rows": 1, "uploaded_artifacts": [], "request": { @@ -177,6 +196,7 @@ structured result payload. Application code can then read: - uploaded result workbook names from `uploaded_artifacts` - cell-level frontend payloads from `cell_errors` - row-level frontend payloads from `row_errors` +- compact retry guidance from `remediation` In the reference app, those values live under `data`, while `ok` tells the client whether the route returned a success or an API-layer error envelope. @@ -218,11 +238,45 @@ Example validation-error response shape: } ] } + }, + "remediation": { + "result": { + "result": "DATA_INVALID", + "is_success": false, + "is_header_invalid": false, + "is_data_invalid": true + }, + "remediation": { + "needs_remediation": true, + "affected_row_count": 1, + "affected_field_count": 1, + "affected_code_count": 1, + "header_issue_count": 0, + "result_workbook_available": true, + "suggested_action": "Correct the invalid rows and re-upload the workbook." + }, + "by_field": [ + { + "field_label": "Email", + "unique_label": "Email", + "error_count": 1, + "codes": ["valid_email_required"], + "suggested_action": "Enter a complete email address and re-upload the workbook.", + "fix_hint": "Use a format such as name@example.com." + } + ] } } } ``` +The remediation payload is additive: + +- `result`, `cell_errors`, and `row_errors` stay available for full inspection +- `remediation` is the smaller retry-oriented view +- `suggested_action` and `fix_hint` are intentionally conservative and may be + omitted for unknown issue patterns + Example API-layer error response: ```json @@ -265,6 +319,7 @@ The demo entry point prints: - registered route paths - response sections - request tenant and structured summary keys +- remediation summary keys For a captured output artifact, see: diff --git a/examples/fastapi_reference/app.py b/examples/fastapi_reference/app.py index 6b11b48..57e0b97 100644 --- a/examples/fastapi_reference/app.py +++ b/examples/fastapi_reference/app.py @@ -82,6 +82,7 @@ def main() -> None: print(f'Request tenant: {response_payload.request.tenant_id}') print(f"Cell error summary keys: {sorted(response_payload.cell_errors['summary'].keys())}") print(f"Row error summary keys: {sorted(response_payload.row_errors['summary'].keys())}") + print(f"Remediation keys: {sorted(response_payload.remediation.keys())}") app = create_app() if importlib.util.find_spec('fastapi') is not None else None diff --git a/examples/fastapi_reference/responses.py b/examples/fastapi_reference/responses.py index 540bd57..dcf995b 100644 --- a/examples/fastapi_reference/responses.py +++ b/examples/fastapi_reference/responses.py @@ -2,6 +2,7 @@ from examples.fastapi_reference.schemas import EmployeeImportRequest, EmployeeImportResponse from excelalchemy import CellErrorMap, ImportResult, RowIssueMap +from excelalchemy.results import build_frontend_remediation_payload def build_import_response( @@ -19,6 +20,11 @@ def build_import_response( result=result.to_api_payload(), cell_errors=cell_error_map.to_api_payload(), row_errors=row_error_map.to_api_payload(), + remediation=build_frontend_remediation_payload( + result=result, + cell_error_map=cell_error_map, + row_error_map=row_error_map, + ), created_rows=created_rows, uploaded_artifacts=uploaded_artifacts, request=request, diff --git a/examples/fastapi_reference/schemas.py b/examples/fastapi_reference/schemas.py index a758d80..af1f54d 100644 --- a/examples/fastapi_reference/schemas.py +++ b/examples/fastapi_reference/schemas.py @@ -17,6 +17,7 @@ class EmployeeImportResponse(BaseModel): result: dict[str, object] = Field(description='High-level import result payload.') cell_errors: dict[str, object] = Field(description='Structured cell-level error payload.') row_errors: dict[str, object] = Field(description='Structured row-level error payload.') + remediation: dict[str, object] = Field(description='Compact remediation-oriented payload for frontend flows.') created_rows: int = Field(description='Number of created rows in the demo service.') uploaded_artifacts: list[str] = Field(description='Uploaded workbook artifact names.') request: EmployeeImportRequest = Field(description='Structured request metadata.') diff --git a/files/example-outputs/annotated-schema.txt b/files/example-outputs/annotated-schema.txt index 897a833..3f86314 100644 --- a/files/example-outputs/annotated-schema.txt +++ b/files/example-outputs/annotated-schema.txt @@ -1 +1 @@ -Generated template: employee-template.xlsx (6803 bytes) +Generated template: employee-template.xlsx (6853 bytes) diff --git a/files/example-outputs/custom-storage.txt b/files/example-outputs/custom-storage.txt index 78204fc..18fbc73 100644 --- a/files/example-outputs/custom-storage.txt +++ b/files/example-outputs/custom-storage.txt @@ -1,2 +1,2 @@ memory://employees.xlsx -Uploaded bytes: 6813 +Uploaded bytes: 6836 diff --git a/files/example-outputs/date-and-range-fields.txt b/files/example-outputs/date-and-range-fields.txt index aa1a1ea..3e76258 100644 --- a/files/example-outputs/date-and-range-fields.txt +++ b/files/example-outputs/date-and-range-fields.txt @@ -1,2 +1,2 @@ -Generated template: compensation-template.xlsx (6862 bytes) +Generated template: compensation-template.xlsx (6882 bytes) Fields: Start date, Probation window, Salary band, Signing bonus diff --git a/files/example-outputs/employee-import-workflow.txt b/files/example-outputs/employee-import-workflow.txt index e3c3552..e59e149 100644 --- a/files/example-outputs/employee-import-workflow.txt +++ b/files/example-outputs/employee-import-workflow.txt @@ -1,7 +1,10 @@ Employee import workflow completed +Preflight: VALID Result: SUCCESS Success rows: 1 Failed rows: 0 Result workbook URL: None Created rows: 1 Uploaded artifacts: [] +Observed events: ['started', 'header_validated', 'row_processed', 'completed'] +Job progress: {'status': 'completed', 'processed_rows': 1, 'total_rows': 1, 'result': 'SUCCESS', 'result_workbook_url': None} diff --git a/files/example-outputs/export-workflow.txt b/files/example-outputs/export-workflow.txt index 3b3af8d..09cfbcd 100644 --- a/files/example-outputs/export-workflow.txt +++ b/files/example-outputs/export-workflow.txt @@ -1,5 +1,5 @@ Export workflow completed Artifact filename: employees-export.xlsx -Artifact bytes: 6893 +Artifact bytes: 6909 Upload URL: memory://employees-export-upload.xlsx Uploaded objects: ['employees-export-upload.xlsx'] diff --git a/files/example-outputs/fastapi-reference.txt b/files/example-outputs/fastapi-reference.txt index b12b511..463aa2c 100644 --- a/files/example-outputs/fastapi-reference.txt +++ b/files/example-outputs/fastapi-reference.txt @@ -6,7 +6,8 @@ Created rows: 1 Uploaded artifacts: [] Routes: ['/employee-imports', '/employee-template.xlsx'] Envelope sections: ['data', 'ok'] -Data sections: ['cell_errors', 'created_rows', 'request', 'result', 'row_errors', 'uploaded_artifacts'] +Data sections: ['cell_errors', 'created_rows', 'remediation', 'request', 'result', 'row_errors', 'uploaded_artifacts'] Request tenant: tenant-001 Cell error summary keys: ['by_code', 'by_field', 'by_row'] Row error summary keys: ['by_code', 'by_row'] +Remediation keys: ['by_code', 'by_field', 'items', 'remediation', 'result'] diff --git a/files/example-outputs/import-failure-api-payload.json b/files/example-outputs/import-failure-api-payload.json index ae4531b..5409568 100644 --- a/files/example-outputs/import-failure-api-payload.json +++ b/files/example-outputs/import-failure-api-payload.json @@ -119,6 +119,86 @@ ] } }, + "remediation": { + "by_code": [ + { + "code": "ExcelCellError", + "error_count": 1, + "row_indices": [ + 0 + ], + "row_numbers_for_humans": [ + 1 + ], + "suggested_action": "Review the highlighted cells, correct the invalid values, and re-upload the workbook.", + "unique_labels": [ + "Age" + ] + } + ], + "by_field": [ + { + "codes": [ + "ExcelCellError" + ], + "error_count": 1, + "field_label": "Age", + "parent_label": null, + "row_indices": [ + 0 + ], + "row_numbers_for_humans": [ + 1 + ], + "suggested_action": "Review the highlighted cells, correct the invalid values, and re-upload the workbook.", + "unique_label": "Age" + } + ], + "items": [ + { + "code": "ExcelCellError", + "column_index": 3, + "column_number_for_humans": 4, + "display_message": "\u3010Age\u3011Invalid input; enter a number.", + "field_label": "Age", + "message": "Invalid input; enter a number.", + "parent_label": null, + "row_index": 0, + "row_number_for_humans": 1, + "scope": "cell", + "suggested_action": "Review the highlighted cells, correct the invalid values, and re-upload the workbook.", + "unique_label": "Age" + } + ], + "remediation": { + "affected_code_count": 1, + "affected_field_count": 1, + "affected_row_count": 1, + "fix_hint": "Download the result workbook and review the highlighted rows before re-uploading.", + "header_issue_count": 0, + "needs_remediation": true, + "result_workbook_available": true, + "suggested_action": "Correct the invalid rows and re-upload the workbook." + }, + "result": { + "header_issues": { + "duplicated": [], + "is_required_missing": false, + "missing_primary": [], + "missing_required": [], + "unrecognized": [] + }, + "is_data_invalid": true, + "is_header_invalid": false, + "is_success": false, + "result": "DATA_INVALID", + "summary": { + "fail_count": 1, + "result_workbook_url": "memory://smoke-invalid-result.xlsx", + "success_count": 0 + } + } + }, "result": { "header_issues": { "duplicated": [], diff --git a/files/example-outputs/selection-fields.txt b/files/example-outputs/selection-fields.txt index 7987614..da3158f 100644 --- a/files/example-outputs/selection-fields.txt +++ b/files/example-outputs/selection-fields.txt @@ -1,2 +1,2 @@ -Generated template: selection-fields-template.xlsx (6942 bytes) +Generated template: selection-fields-template.xlsx (6953 bytes) Fields: Request type, Impacted teams, Owner organization, Partner organizations, Owner, Reviewers diff --git a/images/excelalchemy-hero.png b/images/excelalchemy-hero.png new file mode 100644 index 0000000..c55332b Binary files /dev/null and b/images/excelalchemy-hero.png differ diff --git a/plans/README.md b/plans/README.md new file mode 100644 index 0000000..9965ed5 --- /dev/null +++ b/plans/README.md @@ -0,0 +1,93 @@ +# Plans + +This directory is for execution plans for repository work. +Use it when the work is large enough that a short issue, PR description, or commit message is not enough. + +## Related docs + +- [../AGENTS.md](../AGENTS.md) for repository-local change guidance. +- [../docs/repo-map.md](../docs/repo-map.md) for the main directories plans usually touch. +- [../tech_debt/README.md](../tech_debt/README.md) for recording maintenance burdens discovered while planning. +- [../adr/README.md](../adr/README.md) for decisions that should become lasting architecture records instead of plan notes. + +## What belongs in a plan + +- Multi-step implementation work that spans: + - source code + - tests + - docs + - examples + - smoke scripts +- Changes that need explicit sequencing or checkpoints. +- Work that may affect public API, compatibility behavior, payload shape, or example outputs. +- Repository improvement work such as: + - migrations + - documentation restructures + - refactors across `src/excelalchemy/`, `tests/`, and `docs/` + +Do not use a plan for tiny one-file edits or obvious fixes with no coordination cost. + +## Status conventions + +Use one status per plan: + +- `planned` + - Work is defined but not started. +- `active` + - Work is in progress. +- `completed` + - Intended work is done and the plan remains as a record. +- `abandoned` + - Work was intentionally stopped or superseded. + +Put the status near the top of the file so it is easy to scan. + +## Recommended plan contents + +- Goal + - What the change is trying to achieve in this repository. +- Scope + - Which directories, modules, docs, examples, or scripts are expected to change. +- Non-goals + - What the plan is not trying to change. +- Steps + - Ordered execution steps. +- Risks or caution areas + - For example public API, locale behavior, compatibility shims, payload shape, or example assets. +- Verification + - Which tests, type checks, lint steps, and smoke scripts should pass. + +## Progress logs + +- Keep progress logs brief and append-only. +- Write each progress entry as: + - date + - short summary of what changed + - current blockers or next step if relevant +- Prefer concrete repository references such as: + - `src/excelalchemy/results.py` + - `tests/contracts/test_import_contract.py` + - `docs/public-api.md` + +## Decision logs + +- Record only plan-local decisions here. +- If a decision changes the repository’s architectural direction or public design, create an ADR under `adr/` instead of burying it in a plan. +- For plan-local decisions, include: + - the decision + - why it was made + - affected paths + - any follow-up work it creates + +## Repository alignment + +For this repository, plans should usually mention affected items from: + +- `src/excelalchemy/` +- `tests/contracts/` +- `tests/integration/` +- `tests/unit/` +- `examples/` +- `docs/` +- `scripts/` +- `files/example-outputs/` diff --git a/plans/v2-3-front-end-remediation-payload-v1-design.md b/plans/v2-3-front-end-remediation-payload-v1-design.md new file mode 100644 index 0000000..bfe50a6 --- /dev/null +++ b/plans/v2-3-front-end-remediation-payload-v1-design.md @@ -0,0 +1,308 @@ +# v2.3 Front-end Remediation Payload v1 Design Note + +Status: `planned` + +Related plan: + +- [v2-3-front-end-remediation-payload-v1.md](./v2-3-front-end-remediation-payload-v1.md) + +## Current Repository State + +The current repository already has a stable public result surface and strong verification anchors: + +- public result objects live in `src/excelalchemy/results.py` +- the recommended public import path already includes `excelalchemy.results` +- existing payload contracts are protected by: + - `tests/contracts/test_result_contract.py` + - `tests/unit/test_excel_exceptions.py` + - `scripts/smoke_api_payload_snapshot.py` + - `files/example-outputs/import-failure-api-payload.json` +- frontend/API guidance already centers on: + - `docs/result-objects.md` + - `docs/api-response-cookbook.md` + - `examples/fastapi_reference/` + +That means the safest design is to add a narrow, opt-in serializer in the existing public results layer and leave the default payload builders untouched. + +## Recommended Approach + +### Recommendation + +Use a single new public helper function in `excelalchemy.results`: + +```python +build_frontend_remediation_payload( + *, + result: ImportResult, + cell_error_map: CellErrorMap, + row_error_map: RowIssueMap, +) -> dict[str, object] +``` + +### Why this is the narrowest additive public entry point + +This is the smallest stable API surface because it: + +- adds one new public callable instead of changing multiple existing methods +- does not change `ImportResult`, `CellErrorMap`, or `RowIssueMap` method signatures +- does not require a new public module +- fits the current public API guidance, which already treats `excelalchemy.results` as the place for structured result helpers +- keeps package-root export optional rather than required + +### Public entry point decision + +The narrowest additive public entry point should be: + +- a module-level helper in `excelalchemy.results` + +It should not initially require a root export from `excelalchemy.__init__.py`. + +Reason: + +- `excelalchemy.results` is already a stable public module +- not exporting from the package root avoids widening the common import surface +- the feature is specialized enough that an explicit `from excelalchemy.results import ...` is appropriate + +## Where Remediation Summary Should Live + +### Recommended placement + +The remediation summary should live as a serializer/preset layer inside `src/excelalchemy/results.py`. + +### Why not attach it to existing result objects + +Attaching remediation methods directly to existing classes would enlarge the stable method surface of: + +- `ImportResult` +- `CellErrorMap` +- `RowIssueMap` + +That creates more compatibility burden than necessary for a v1 experiment. + +It also creates an awkward ownership question: + +- `ImportResult` alone does not know enough about field- and row-level remediation +- `CellErrorMap` and `RowIssueMap` alone do not know enough about header-invalid outcomes + +A serializer that accepts all three public objects matches the current repository design better. + +### Why not create a separate helper module + +Creating something like `excelalchemy.remediation` or `excelalchemy.frontend` would introduce a brand-new public module for a single narrowly scoped feature. + +That would: + +- increase public API surface more than necessary +- require extra public API documentation and boundary work +- be harder to justify while the feature is still explicitly a v1 experiment + +## Source Of `suggested_action` / `fix_hint` In v1 + +### Recommended source hierarchy + +Use a strict, additive derivation order: + +1. `ImportResult` result state and header issue buckets for top-level remediation actions +2. `message_key` from underlying `ExcelCellError` / `ExcelRowError` when present +3. selected fallback `code` mappings where the code is already stable enough +4. no hint when the issue is not confidently recognized + +### Concrete v1 policy + +Top-level remediation guidance should come from: + +- `result.result` +- `result.is_header_invalid` +- `result.is_data_invalid` +- `result.missing_required` +- `result.missing_primary` +- `result.unrecognized` +- `result.duplicated` +- `result.url` + +Item-level and grouped remediation guidance should come from: + +- `error.message_key` first +- `error.code` second + +They should not come from: + +- free-text parsing of `message` +- workbook-facing `display_message` +- schema metadata that does not already participate in result construction +- any new pipeline-generated remediation fields + +### Why this is the safest v1 source + +This approach is narrow and testable because: + +- `message_key` is already explicit and structured +- `code` already appears in stable payloads +- header-invalid actions can be derived without new execution logic +- unknown errors can cleanly omit hints + +### Important v1 limitation + +The current repository still produces broad fallback codes such as `ExcelCellError` and `ExcelRowError` in some flows. That means v1 hint coverage should be intentionally sparse. High-confidence omissions are better than low-confidence generic advice. + +## Best Verifier Anchors + +### Primary anchors + +These are the strongest existing verification anchors for this feature: + +- `tests/contracts/test_result_contract.py` + - best place to lock the new public helper entry point and its top-level payload contract +- `tests/unit/test_excel_exceptions.py` + - best place to verify issue-record-derived remediation payload details, mapping behavior, and omission behavior +- `scripts/smoke_api_payload_snapshot.py` + - best smoke check for end-to-end payload stability +- `files/example-outputs/import-failure-api-payload.json` + - canonical snapshot for the invalid-import payload example + +### Secondary anchors + +Use these only if the reference example adopts the new payload: + +- `examples/fastapi_reference/responses.py` +- `examples/fastapi_reference/schemas.py` +- `examples/fastapi_reference/README.md` +- `tests/integration/test_examples_smoke.py` +- `scripts/generate_example_output_assets.py` + +### Why these are the right anchors + +They align with the feature shape: + +- public API contract +- focused result serialization logic +- one canonical invalid-import payload snapshot +- one copyable backend integration example + +They avoid drifting into pipeline, storage, or compatibility verification that this feature does not need. + +## Docs That Must Change If Implemented Correctly + +### Must change + +- `docs/result-objects.md` + - add the new remediation helper and explain where it fits relative to existing payload helpers +- `docs/api-response-cookbook.md` + - add one frontend-remediation-oriented response shape using the new helper +- `examples/fastapi_reference/README.md` + - show the new response section if the FastAPI reference adopts it + +### Conditional changes + +- `docs/public-api.md` + - only if the new helper is meant to be part of the documented stable helper set +- `docs/integration-roadmap.md` + - only if the frontend path should explicitly point readers to the remediation helper + +### Should not need changes + +- `README.md` +- `README-pypi.md` +- `MIGRATIONS.md` +- storage docs +- compatibility docs + +## Rejected Alternative + +### Rejected option + +Add a `preset=` or `mode=` parameter to existing serializers such as: + +- `ImportResult.to_api_payload(preset='frontend_remediation')` +- `CellErrorMap.to_api_payload(preset='frontend_remediation')` +- `RowIssueMap.to_api_payload(preset='frontend_remediation')` + +### Why reject it + +This looks compact, but it is the wrong tradeoff for this repository: + +- it changes stable public method signatures on first-class public objects +- it introduces branching behavior into serializers whose current contracts are already documented and smoke-tested +- it makes ownership unclear because remediation needs combined data from `ImportResult`, `CellErrorMap`, and `RowIssueMap` +- it increases the chance of future preset sprawl in already-core methods +- it is harder to explain and test than one standalone helper with one fixed output shape + +In short: + +- one new helper is easier to document, easier to snapshot, and easier to keep additive than adding “serializer modes” to three existing public objects + +## Precise Implementation Boundaries + +### In scope + +- one new public helper in `src/excelalchemy/results.py` +- small private mapping helpers in `src/excelalchemy/results.py` if needed +- contract tests for the new helper +- focused unit tests for hint derivation and omission behavior +- docs updates for result objects and API cookbook +- optional adoption in `examples/fastapi_reference/` +- snapshot smoke update if the canonical payload asset intentionally includes remediation output + +### Out of scope + +- changes to `src/excelalchemy/core/*` +- changes to the import execution pipeline +- new storage behavior +- compatibility cleanup +- broad package-root re-export expansion +- new public modules +- new async/job abstractions +- automatic remediation or workbook rewriting + +## Checklist Of Files Likely To Change + +### Core implementation + +- [ ] `src/excelalchemy/results.py` +- [ ] `src/excelalchemy/__init__.py` only if root export is intentionally added + +### Tests + +- [ ] `tests/contracts/test_result_contract.py` +- [ ] `tests/unit/test_excel_exceptions.py` +- [ ] `tests/integration/test_examples_smoke.py` only if the FastAPI reference example response changes + +### Docs and examples + +- [ ] `docs/result-objects.md` +- [ ] `docs/api-response-cookbook.md` +- [ ] `examples/fastapi_reference/responses.py` if the example adopts the helper +- [ ] `examples/fastapi_reference/schemas.py` if the example adopts the helper +- [ ] `examples/fastapi_reference/README.md` if the example adopts the helper +- [ ] `docs/public-api.md` only if the helper is added to the documented stable helper list +- [ ] `docs/integration-roadmap.md` only if the frontend reading path is updated + +### Smoke assets + +- [ ] `scripts/smoke_api_payload_snapshot.py` +- [ ] `files/example-outputs/import-failure-api-payload.json` +- [ ] `scripts/generate_example_output_assets.py` only if example output generation is extended +- [ ] `scripts/smoke_docs_assets.py` only if new stable doc fragments need smoke coverage + +## Checklist Of Verifiers That Must Pass + +- [ ] `uv run ruff format --check .` +- [ ] `uv run ruff check .` +- [ ] `uv run pyright` +- [ ] `uv run pytest --cov=excelalchemy --cov-report=term-missing:skip-covered tests` +- [ ] `uv run python scripts/smoke_package.py` +- [ ] `uv run python scripts/smoke_examples.py` +- [ ] `uv run python scripts/smoke_docs_assets.py` +- [ ] `uv run python scripts/smoke_api_payload_snapshot.py` + +## Final Design Decision + +For `Front-end remediation payload v1`, the preferred design is: + +- one opt-in helper function in `excelalchemy.results` +- implemented as a serializer/preset layer in `src/excelalchemy/results.py` +- driven by existing `ImportResult`, `CellErrorMap`, and `RowIssueMap` +- deriving hints from header issue buckets, `message_key`, and selected stable `code` mappings only +- verified primarily by result contract tests, payload-focused unit tests, and the existing invalid-import snapshot smoke path + +This is the smallest additive API surface with the strongest testability in the current repository. diff --git a/plans/v2-3-front-end-remediation-payload-v1.md b/plans/v2-3-front-end-remediation-payload-v1.md new file mode 100644 index 0000000..59c0751 --- /dev/null +++ b/plans/v2-3-front-end-remediation-payload-v1.md @@ -0,0 +1,320 @@ +# v2.3 Front-end Remediation Payload v1 + +Status: `planned` + +## Problem Statement + +ExcelAlchemy already exposes stable import result payloads through: + +- `ImportResult.to_api_payload()` +- `CellErrorMap.to_api_payload()` +- `RowIssueMap.to_api_payload()` + +Those payloads are useful and already frontend-capable, but they are still optimized around raw result inspection rather than remediation flow. Frontend consumers currently need to derive their own: + +- remediation-oriented aggregate summary +- suggested next action +- optional fix hints for known issue types +- compact payload shape tuned for “what should the user do next?” + +For `v2.3`, the experiment should stay narrow: add an additive frontend remediation payload layer on top of the existing result objects without changing the import pipeline, storage behavior, or current default payload contracts. + +## Goals + +- Add a frontend-oriented remediation payload capability on top of existing import result objects. +- Keep the feature additive and backward-compatible. +- Preserve the existing `to_api_payload()` default shapes. +- Provide an aggregated remediation summary derived from existing result data. +- Add optional `fix_hint` / `suggested_action` fields for known issue patterns. +- Provide one explicit serializer entry point for this payload so downstream API code can opt in intentionally. +- Update the minimal docs, example integration, and smoke assets needed to make the feature visible and verifiable. + +## Non-Goals + +- Automatic fixing or workbook mutation. +- Async jobs, background processing, or progress reporting. +- Import execution pipeline redesign. +- Storage redesign or storage-specific behavior. +- Global public API renaming. +- Broad refactors of result objects unrelated to remediation payloads. +- Expanding compatibility shims or deprecation behavior. + +## Proposed User-Visible Behavior + +### Existing payloads remain unchanged + +These existing serializers must keep their current behavior and documented shapes: + +- `ImportResult.to_api_payload()` +- `CellErrorMap.to_api_payload()` +- `RowIssueMap.to_api_payload()` + +### New additive remediation serializer + +Add one new public, frontend-focused serializer under `excelalchemy.results`. + +Preferred direction: + +- introduce a standalone helper such as `build_frontend_remediation_payload(...)` + +Why this direction: + +- it avoids branching the stable default `to_api_payload()` behavior +- it keeps the remediation feature explicitly opt-in +- it avoids coupling the new payload too tightly to any one result object +- it keeps the implementation in the public results layer instead of the import pipeline + +The helper should accept the existing public result surfaces: + +- `ImportResult` +- `CellErrorMap` +- `RowIssueMap` + +### Remediation payload v1 contents + +The first version should be intentionally small and deterministic. The payload should include: + +- stable import status summary +- remediation aggregate summary +- top-level suggested next action +- optional item-level and summary-level `fix_hint` / `suggested_action` +- compact grouped data oriented around remediation, not full raw inspection + +The expected shape should be centered on sections like: + +- `result` + - stable overall status, likely derived from `ImportResult` +- `remediation` + - aggregate counts such as affected rows, affected fields, affected codes, and whether a result workbook is available + - top-level `suggested_action` + - top-level `fix_hint` when the issue is globally actionable, such as header mismatch +- `by_field` + - field-oriented remediation summaries derived from `CellErrorMap.summary_by_field()` +- `by_code` + - code-oriented remediation summaries derived from existing summary helpers +- `items` + - compact issue records with optional `fix_hint` / `suggested_action` + +Version 1 should not attempt to replace the existing rich payloads. It should be a thinner, frontend-task-oriented view built from them. + +### Hint behavior + +Hints should be optional and best-effort: + +- use known `message_key` values first when present +- fall back to known `code` mappings where safe +- if no mapping is known, leave `fix_hint` / `suggested_action` unset in a consistent way + +Version 1 should not invent speculative hints for unknown errors. + +## Likely Code Areas Affected + +### Public result surfaces + +- `src/excelalchemy/results.py` + - primary implementation location for the remediation serializer and any small helper data structures +- `src/excelalchemy/__init__.py` + - only if the new serializer is exported from the package root + +### Public docs + +- `docs/result-objects.md` + - document the new remediation serializer alongside the existing result helpers +- `docs/api-response-cookbook.md` + - add a frontend-remediation-oriented response example +- `docs/public-api.md` + - update only if a new public helper is added to the stable public surface +- `docs/integration-roadmap.md` + - update only if the recommended frontend-reading path should explicitly mention the new helper + +### Example integration surface + +- `examples/fastapi_reference/responses.py` + - opt into the new remediation payload in the reference response builder +- `examples/fastapi_reference/schemas.py` + - extend the example response schema with a remediation field if the example adopts it +- `examples/fastapi_reference/README.md` + - document the new response section and example JSON + +### Tests + +- `tests/contracts/test_result_contract.py` + - contract coverage for the new public serializer +- `tests/unit/test_excel_exceptions.py` + - focused payload-shape and helper coverage for `CellErrorMap` / `RowIssueMap` driven remediation output +- `tests/integration/test_examples_smoke.py` + - only if the FastAPI reference example response shape changes + +### Smoke and generated assets + +- `scripts/smoke_api_payload_snapshot.py` + - extend or add a remediation snapshot assertion path +- `scripts/generate_example_output_assets.py` + - regenerate captured output if example payload output changes +- `scripts/smoke_docs_assets.py` + - update required fragments only if docs gain a stable remediation section reference +- `files/example-outputs/import-failure-api-payload.json` + - update only if the canonical failure payload asset intentionally includes remediation output + +## Test Strategy + +### Contract tests + +Add contract tests that verify: + +- the new remediation serializer is available from the intended public module +- existing `to_api_payload()` outputs are unchanged +- remediation payloads for `SUCCESS`, `HEADER_INVALID`, and `DATA_INVALID` are deterministic +- aggregate counts are derived correctly from existing result objects +- item ordering is deterministic enough for snapshot-style assertions + +### Unit tests + +Add focused tests that verify: + +- known `message_key` mappings produce `fix_hint` / `suggested_action` +- known `code` mappings work when `message_key` is absent +- unknown issues omit remediation hints consistently +- field-level and row-level summaries do not over-count duplicated records +- header-invalid remediation summaries do not require row or cell errors + +### Integration tests + +If the FastAPI reference app adopts the remediation payload: + +- extend the example smoke coverage to assert the new response section exists +- keep the reference app compile/runtime smoke path small and additive + +### Smoke validation + +Keep smoke verification narrow: + +- extend the API payload snapshot path rather than introducing a new smoke workflow +- update doc smoke only for stable, durable fragments +- avoid expanding smoke to unrelated examples + +## Docs, Examples, And Smoke Updates Required + +Required if the feature is implemented: + +- document the remediation helper in `docs/result-objects.md` +- add one cookbook example in `docs/api-response-cookbook.md` +- update `examples/fastapi_reference/README.md` to show the new response section +- update `examples/fastapi_reference/responses.py` and `schemas.py` if the reference app uses the new serializer +- update `scripts/smoke_api_payload_snapshot.py` +- update `files/example-outputs/import-failure-api-payload.json` if the canonical snapshot is expanded + +Likely not required unless scope grows: + +- `README.md` +- `README-pypi.md` +- compatibility docs such as `MIGRATIONS.md` +- storage docs + +## Risks And Open Questions + +### 1. Hint quality is constrained by current error metadata + +Many current payloads still expose broad fallback codes such as `ExcelCellError`. Useful remediation hints cannot assume a rich code taxonomy already exists. + +Planned constraint: + +- v1 hints should be sparse and high-confidence +- unknown issues should not receive guessed remediation text + +### 2. Message locale needs explicit policy + +Runtime/API text in 2.x is English-first, while workbook-facing text is locale-aware. + +Open question: + +- should remediation hint text follow the existing English-first API policy in v1, or should it attempt locale-aware text when the error originated from a `message_key`? + +Recommended v1 answer: + +- keep remediation hint text aligned with the current API/runtime policy unless there is a very low-cost locale-aware path + +### 3. Payload scope can sprawl quickly + +There is a real risk of turning a narrow serializer into a second full response model. + +Planned constraint: + +- v1 should add only summary, action, and compact remediation groupings +- it should not duplicate every section from the existing raw payloads + +### 4. Public API surface choice must stay conservative + +Adding a preset argument to the existing `to_api_payload()` methods is technically possible, but it increases branching in already-stable serializers. + +Planned decision: + +- prefer a new standalone remediation serializer helper over changing the existing serializer signatures + +### 5. Compatibility aliases should remain out of scope + +If package-root export changes or facade examples touch old aliases indirectly, treat that as a documentation and testing consideration, not as a reason to modify compatibility shims. + +## Phased Implementation Steps + +### Phase 1. Lock the v1 payload contract + +- define the remediation payload shape in `docs/result-objects.md` and/or the plan-local notes during implementation +- choose the exact public entry point name in `src/excelalchemy/results.py` +- define the initial hint mapping policy: + - `message_key` first + - selected fallback `code` mappings second + - unknown issues omitted +- define deterministic ordering for grouped sections + +### Phase 2. Implement the public serializer narrowly + +- add the remediation serializer in `src/excelalchemy/results.py` +- keep all existing serializers unchanged +- export from `src/excelalchemy/__init__.py` only if the helper is intended to be package-root public + +### Phase 3. Add contract and unit coverage + +- add contract tests for public availability and stable payload output +- add unit tests for mapping, omission, and aggregation behavior +- add regression coverage for success, header-invalid, and data-invalid cases + +### Phase 4. Update reference integration and docs + +- adopt the remediation payload in `examples/fastapi_reference/` +- document the new helper in result-object and API-cookbook docs +- keep example changes minimal and aligned with the preferred public import path + +### Phase 5. Refresh smoke assets + +- update the API payload snapshot path +- regenerate any affected example output asset +- keep `scripts/smoke_docs_assets.py` aligned if new durable doc fragments are introduced + +## Explicit Acceptance Criteria + +- A new additive remediation serializer exists in the public results layer. +- Existing `ImportResult.to_api_payload()`, `CellErrorMap.to_api_payload()`, and `RowIssueMap.to_api_payload()` contract outputs remain unchanged. +- The remediation serializer supports `SUCCESS`, `HEADER_INVALID`, and `DATA_INVALID` outcomes. +- The remediation payload includes an aggregated remediation summary. +- The remediation payload includes optional `fix_hint` and/or `suggested_action` fields for known issue patterns. +- Unknown issue patterns do not receive guessed remediation hints. +- The implementation does not modify the import execution pipeline. +- The implementation does not introduce async/job concepts. +- The implementation does not require storage changes. +- Contract tests cover the new public behavior. +- Relevant docs and the FastAPI reference example are updated if they opt into the new payload. +- Snapshot/smoke verification is updated and passes for the intended canonical example payload. + +## Verification + +When implementation starts, the verification set for this plan should include: + +- `uv run ruff format --check .` +- `uv run ruff check .` +- `uv run pyright` +- `uv run pytest --cov=excelalchemy --cov-report=term-missing:skip-covered tests` +- `uv run python scripts/smoke_package.py` +- `uv run python scripts/smoke_examples.py` +- `uv run python scripts/smoke_docs_assets.py` +- `uv run python scripts/smoke_api_payload_snapshot.py` diff --git a/plans/v2-3-import-preflight-v1-lightweight-structural-validation-design.md b/plans/v2-3-import-preflight-v1-lightweight-structural-validation-design.md new file mode 100644 index 0000000..95fc3bb --- /dev/null +++ b/plans/v2-3-import-preflight-v1-lightweight-structural-validation-design.md @@ -0,0 +1,397 @@ +# v2.3 Import Preflight v1: Lightweight Structural Validation + +Status: `planned` + +## Problem + +`ExcelAlchemy` currently has one import-oriented runtime path: + +- `ExcelAlchemy.import_data(...)` +- `ImportSession.run(...)` + +That path assumes: + +- storage-backed workbook loading +- async execution +- row-level validation and create/update callbacks +- optional result-workbook rendering and upload + +Users who only want to answer "is this workbook structurally importable?" must +still enter the full import shape or build ad hoc checks around internal +components. That is too heavy for API upload guards and pre-submit validation. + +The gap is especially clear for these checks: + +- does the configured sheet exist +- do the workbook headers match the schema +- is the workbook structurally usable for import +- roughly how many data rows would the import process + +## Goals + +- Add a lightweight preflight workflow that validates an import workbook before + full import execution. +- Keep the workflow additive and clearly separate from `import_data(...)`. +- Reuse existing header parsing and header validation behavior. +- Reuse the existing storage-backed workbook input seam so callers do not need + to materialize raw workbook bytes themselves. +- Support: + - sheet existence checks + - header validation + - basic workbook structure checks + - row count estimation +- Return a lightweight public result object suitable for API and service-layer + decisions. +- Avoid new storage surface area, result-workbook rendering, row execution, and + callback invocation. + +## Non-goals + +- No full import execution. +- No row-level validation or callback dispatch. +- No async API for v1. +- No job system, lifecycle events, or progress streaming. +- No new storage abstraction, storage-specific public API, or upload behavior. +- No template changes. +- No remediation payload integration. +- No attempt to infer deep workbook quality beyond structural importability. + +## API Options + +### Option A: Add `preflight_only=` to `import_data(...)` + +Example: + +```python +result = await alchemy.import_data( + input_excel_name, + output_excel_name, + preflight_only=True, +) +``` + +Reject this option. + +Reasons: + +- it mixes a lightweight structural check into the full import API +- it still implies storage-backed input names and output names +- it keeps preflight async for no real benefit +- it invites branching inside `ImportSession.run(...)` and increases coupling + +### Option B: Reuse `ValidateHeaderResult` as the public result + +Example: + +```python +header_result = alchemy.preflight_import(input_excel_name) +``` + +Reject this option. + +Reasons: + +- `ValidateHeaderResult` only models header issues +- it cannot represent sheet-missing or structure-invalid outcomes cleanly +- it has no place for row count estimation +- stretching it would blur the difference between header validation and + workbook preflight + +### Option C: Add a new synchronous preflight method and result type + +Example: + +```python +result = alchemy.preflight_import(input_excel_name) +``` + +Recommend this option. + +Reasons: + +- additive public surface +- aligns with the existing facade input seam +- no async requirement +- clear separation from full import semantics +- enough room for sheet, structure, header, and row-count fields without + overloading `ImportResult` or `ValidateHeaderResult` + +## Recommended Design + +### Public API + +Add one new synchronous facade method: + +```python +def preflight_import(self, input_excel_name: str) -> ImportPreflightResult: + ... +``` + +Keep the first version intentionally narrow: + +- input is an existing workbook name resolved through configured `ExcelStorage` +- the configured schema sheet name remains the lookup target +- no output workbook name +- no async variant in v1 + +### Public Result Shape + +Add a lightweight public result model and status enum in +`src/excelalchemy/results.py`. + +Recommended names: + +- `ImportPreflightStatus` +- `ImportPreflightResult` + +Recommended status values: + +- `VALID` +- `HEADER_INVALID` +- `SHEET_MISSING` +- `STRUCTURE_INVALID` + +Recommended result responsibilities: + +- report whether preflight passed +- expose the configured sheet name and whether it was found +- expose header issues using the same field groups already used by + `ValidateHeaderResult` / `ImportResult` +- expose `has_merged_header` when header rows were readable +- expose `estimated_row_count` +- expose a small, stable list of structural issue codes for non-header failures + +The result should stay lightweight: + +- no `url` +- no `success_count` / `fail_count` +- no row-error maps +- no cell-error maps +- no remediation hints + +### Internal Design + +Do not route preflight through `ImportSession.run(...)`. + +Instead, add a small dedicated internal path that reuses existing logic where it +already exists: + +- reuse `ExcelSchemaLayout` +- reuse `ExcelHeaderParser` +- reuse `ExcelHeaderValidator` +- reuse `WorksheetTable` +- reuse configured `ExcelStorage` for workbook reads only + +Recommended shape: + +- add a new internal helper or session-like object dedicated to preflight +- use the existing storage gateway read path to obtain `WorksheetTable` +- keep row-count estimation in the preflight path, not in `ImportSession` + +Clarification for the original "no storage" constraint: + +- preflight should not introduce a new storage API +- preflight should not upload or write artifacts +- preflight may still read the input workbook through configured `ExcelStorage`, + because that is already the repository's import input boundary + +### Structure Checks In Scope For v1 + +Keep "basic structure checks" small and explicit: + +- configured sheet exists +- storage can read the configured workbook into `WorksheetTable` +- enough rows exist to inspect the header block +- the header block is readable for simple or merged-header detection + +Do not add deeper checks such as: + +- per-row value validation +- field codec parsing across data rows +- business-rule execution +- workbook repair suggestions + +### Row Count Estimation + +The estimate should align with current import semantics instead of inventing a +new counting rule. + +Recommended rule: + +- read the worksheet through the configured storage gateway into + `WorksheetTable` +- detect merged vs simple headers with `ExcelHeaderParser` +- estimate data rows from the same header-offset logic the import session uses + +Target invariant: + +- for a workbook that later runs through `import_data(...)`, preflight + `estimated_row_count` should match `last_import_snapshot.data_row_count` + unless later implementation intentionally documents an edge-case difference + +### Duplication And Reuse Direction + +Prefer reusing the existing storage read seam and current header components +rather than introducing a raw-bytes API or a generic shared import engine. + +Do not duplicate header parsing or validation logic in the facade. + +Do not over-abstract the shared logic either. + +Recommended boundary: + +- storage gateway is responsible for reading `WorksheetTable` +- preflight path is responsible for structural decisions +- `ExcelHeaderParser` and `ExcelHeaderValidator` stay responsible for header + semantics +- `ImportSession` remains responsible for full import execution only + +## Affected Modules + +Likely source changes: + +- `src/excelalchemy/core/alchemy.py` +- `src/excelalchemy/results.py` +- `src/excelalchemy/__init__.py` +- one new internal module for preflight orchestration under + `src/excelalchemy/core/`, if that keeps `alchemy.py` and `import_session.py` + clean +- possibly `src/excelalchemy/core/headers.py` for small reuse-oriented helpers + +Likely documentation changes: + +- `docs/public-api.md` +- `docs/result-objects.md` +- `docs/domain-model.md` +- `docs/architecture.md` +- `README.md` if the preflight workflow becomes part of the main onboarding + story + +Likely examples: + +- `examples/employee_import_workflow.py` or a new focused example +- `examples/fastapi_reference/` if the feature is presented as an upload guard + +Likely test updates: + +- `tests/contracts/test_import_contract.py` +- `tests/contracts/test_result_contract.py` +- one new unit-focused test module for preflight internals if needed +- example smoke tests if examples are updated + +## Test Strategy + +Contract tests should define the public behavior: + +- valid workbook returns `VALID` +- missing target sheet returns `SHEET_MISSING` +- unreadable or structurally unusable workbook returns `STRUCTURE_INVALID` +- invalid headers return `HEADER_INVALID` +- header issue lists preserve current ordering semantics +- `estimated_row_count` matches current import counting rules for simple and + merged headers +- no row-level execution side effects occur + +Unit tests should focus on isolated mechanics: + +- simple vs merged header detection under preflight +- structural issue classification +- row count estimation against existing import semantics + +Integration and smoke coverage should stay light: + +- add or update one example only if the public story is changed +- avoid broad smoke churn unless docs/examples are intentionally expanded + +## Risks + +### Duplicate structural logic + +Risk: + +- preflight could accidentally duplicate header handling or row-count rules from + `ImportSession` + +Mitigation: + +- reuse `ExcelHeaderParser` and `ExcelHeaderValidator` directly +- reuse the existing storage read seam instead of introducing a second input + mechanism +- extract only small focused helpers if duplication appears in count logic or + result mapping + +### Coupling preflight to the full import session + +Risk: + +- branching inside `ImportSession.run(...)` would mix sync preflight with async + import execution and increase maintenance burden + +Mitigation: + +- keep preflight as a dedicated path with targeted reuse of header and layout + helpers + +### Result-type overlap + +Risk: + +- `ImportResult`, `ValidateHeaderResult`, and a new preflight result can become + confusing if their responsibilities overlap + +Mitigation: + +- document the distinction clearly: + - `ValidateHeaderResult` = header-only internal/public helper result + - `ImportResult` = full import outcome + - `ImportPreflightResult` = lightweight structural importability result + +### Count drift + +Risk: + +- preflight row-count estimation could silently diverge from import-session row + counting, especially for merged headers + +Mitigation: + +- define tests that compare both paths on the same fixtures +- derive the estimate from the same header-offset rules already used in import + +## Phased Steps + +1. Define the public API and result-model contract. +2. Implement a dedicated read-only preflight path that uses the configured + storage gateway and reuses header parser and header + validator logic. +3. Add contract tests for statuses, header issue mapping, and row-count + estimation. +4. Update docs for the new public entry point and result object. +5. Add or update one example only if it improves the public workflow story. + +## Acceptance Criteria + +- The library exposes a new additive synchronous preflight entry point. +- Preflight accepts `input_excel_name` and reads through configured + `ExcelStorage`. +- Preflight does not execute create/update callbacks. +- Preflight does not produce result workbooks or upload anything. +- Preflight reports: + - sheet existence + - header validity + - basic structural validity + - estimated row count +- Header issue lists remain consistent with existing header validation behavior. +- The result type is documented as a stable public surface. +- Existing full-import APIs and result payloads remain unchanged. + +## Verification + +When implementation starts, run: + +- `uv run ruff format --check .` +- `uv run ruff check .` +- `uv run pyright` +- `uv run pytest --cov=excelalchemy --cov-report=term-missing:skip-covered tests` +- `uv run python scripts/smoke_package.py` +- `uv run python scripts/smoke_examples.py` diff --git a/plans/v2-3-job-friendly-import-v1-structured-lifecycle-events-design.md b/plans/v2-3-job-friendly-import-v1-structured-lifecycle-events-design.md new file mode 100644 index 0000000..cb07dfb --- /dev/null +++ b/plans/v2-3-job-friendly-import-v1-structured-lifecycle-events-design.md @@ -0,0 +1,293 @@ +# v2.3 Job-friendly Import v1: Structured Import Lifecycle Events + +Status: `planned` + +## Chosen API Design + +Use the existing import entry point and add one optional keyword-only callback: + +```python +await alchemy.import_data( + input_excel_name, + output_excel_name, + on_event=handle_event, +) +``` + +Recommended signature: + +```python +async def import_data( + self, + input_excel_name: str, + output_excel_name: str, + *, + on_event: Callable[[dict[str, object]], None] | None = None, +) -> ImportResult +``` + +This is the smallest additive API that satisfies the feature goal: + +- no new public method +- no config-level event registration +- no observer class +- no generator-based import mode +- no new persistence or background execution model + +The callback is per-run rather than attached to the `ExcelAlchemy` instance or +`ImporterConfig`, which keeps the surface narrow and avoids sticky runtime +state. + +## Why This Is The Simplest Viable Shape + +The repository already has one stable import control flow: + +- `ExcelAlchemy.import_data(...)` +- `ImportSession.run(...)` + +The design should layer observability onto that path, not create a second one. + +This callback shape is the narrowest fit because it: + +- keeps `import_data(...)` as the only import workflow entry point +- avoids widening `ImporterConfig` for a runtime-only concern +- avoids introducing a second public abstraction just to carry events +- is trivial to test with a local `events.append` +- does not require consumers to learn a different import API + +## Rejected Alternative + +Reject a generator-based API such as: + +```python +async for event in alchemy.stream_import_data(...): + ... +``` + +Reasoning: + +- it creates a second public import workflow beside `import_data(...)` +- it complicates the final-result path for callers that still need + `ImportResult` +- it likely requires expanding `src/excelalchemy/core/abstract.py` +- it is more surface area than this v1 needs + +Also reject a config-level callback on `ImporterConfig` for v1. + +Reasoning: + +- it broadens config surface for a per-run runtime concern +- it creates callback lifetime questions on long-lived `ExcelAlchemy` instances +- it is less explicit than passing the handler directly to the import call that + uses it + +## Event Structure + +Use plain event dictionaries for v1. + +Do not introduce a public event class hierarchy yet. + +Required common field: + +- `event` + +All other fields should be event-specific and only included when needed for +progress or outcome. + +### Event Kinds + +- `started` +- `header_validated` +- `row_processed` +- `completed` +- `failed` + +### Recommended Event Payloads + +`started` + +```python +{'event': 'started'} +``` + +`header_validated` + +```python +{'event': 'header_validated', 'is_valid': True} +``` + +```python +{ + 'event': 'header_validated', + 'is_valid': False, + 'missing_required': [...], + 'missing_primary': [...], + 'unrecognized': [...], + 'duplicated': [...], +} +``` + +`row_processed` + +```python +{ + 'event': 'row_processed', + 'processed_row_count': 3, + 'total_row_count': 10, + 'success_count': 2, + 'fail_count': 1, +} +``` + +`completed` + +```python +{ + 'event': 'completed', + 'result': 'SUCCESS' | 'DATA_INVALID' | 'HEADER_INVALID', + 'success_count': 9, + 'fail_count': 1, + 'url': 'memory://result.xlsx' | None, +} +``` + +`failed` + +```python +{ + 'event': 'failed', + 'error_type': 'ValueError', + 'error_message': '...', +} +``` + +## Deliberate Non-Fields For v1 + +Do not add these unless implementation proves they are required: + +- no sequence number +- no timestamp +- no `import_mode` +- no `has_merged_header` +- no raw row payload +- no `CellErrorMap` / `RowIssueMap` payload copies +- no event object metadata beyond `event` + +This keeps the event contract small and lowers compatibility burden. + +## Integration With Existing Import Flow + +Fit event emission directly into the existing session lifecycle. + +Recommended emission points: + +- emit `started` at the beginning of `ImportSession.run(...)` +- emit `header_validated` immediately after `_validate_header(...)` +- emit `row_processed` from `_execute_rows()` using existing counters +- emit `completed` immediately before returning `ImportResult` +- emit `failed` from a top-level `try/except` in `run(...)`, then re-raise + +Important semantic rule: + +- `HEADER_INVALID` is a normal `completed` outcome +- `DATA_INVALID` is a normal `completed` outcome +- only unexpected exceptions produce `failed` + +## How To Avoid Duplicating Logic + +Reuse existing runtime state instead of recomputing anything. + +Use: + +- `ValidateHeaderResult` for header-validation details +- existing row counters in `ImportSession` +- existing final `ImportResult` + +Do not duplicate behavior in: + +- `src/excelalchemy/core/executor.py` +- `src/excelalchemy/core/rows.py` +- `src/excelalchemy/results.py` + +The session should remain the sole place where lifecycle decisions are made and +where events are emitted. + +## Exact Boundary Of Changes + +In scope: + +- add one optional `on_event=` parameter to `import_data(...)` +- thread that callback from facade to `ImportSession` +- add a small private emit helper inside `ImportSession` +- emit five event types from existing lifecycle points +- add contract tests for event order and payload shape +- document the new optional argument + +Out of scope: + +- new `stream_import_data(...)` method +- config-level observer registration +- public event classes +- config refactor +- executor refactor +- storage changes +- async/background job changes +- result payload changes + +## Files To Modify + +Source: + +- `src/excelalchemy/core/abstract.py` +- `src/excelalchemy/core/alchemy.py` +- `src/excelalchemy/core/import_session.py` + +Tests: + +- `tests/contracts/test_import_contract.py` + +Docs: + +- `docs/public-api.md` +- `docs/architecture.md` +- `docs/domain-model.md` + +Optional example update: + +- `examples/employee_import_workflow.py` + +Files that should stay untouched for this narrowed design: + +- `src/excelalchemy/config.py` +- `src/excelalchemy/results.py` +- `src/excelalchemy/__init__.py` + +## Verifier Checklist + +- `import_data(...)` works unchanged when `on_event` is omitted +- successful imports emit: + - `started` + - `header_validated` + - one or more `row_processed` + - `completed` +- header-invalid imports emit: + - `started` + - `header_validated` + - `completed` +- header-invalid imports emit no `row_processed` +- data-invalid imports emit `completed` with `result='DATA_INVALID'` and `url` +- unexpected exceptions emit `failed` and still re-raise the original exception +- existing `ImportResult`, `CellErrorMap`, `RowIssueMap`, and + `last_import_snapshot` behavior remains unchanged +- no API payload smoke assets need regeneration unless an example is changed + +## Verification Commands + +When implementation starts, run: + +- `uv run ruff format --check .` +- `uv run ruff check .` +- `uv run pyright` +- `uv run pytest --cov=excelalchemy --cov-report=term-missing:skip-covered tests` +- `uv run python scripts/smoke_package.py` +- `uv run python scripts/smoke_examples.py` diff --git a/plans/v2-3-template-ux-metadata-v1-design.md b/plans/v2-3-template-ux-metadata-v1-design.md new file mode 100644 index 0000000..700c3d6 --- /dev/null +++ b/plans/v2-3-template-ux-metadata-v1-design.md @@ -0,0 +1,237 @@ +# v2.3 Template UX Metadata v1 Design Note + +Status: `planned` + +## Recommended Approach + +The narrowest additive public entry point for `Template UX metadata v1` should +be one new optional metadata argument on the existing stable metadata +constructors: + +- `FieldMeta(..., example_value=...)` +- `ExcelMeta(..., example_value=...)` + +Recommended field: + +- `example_value: str | None = None` + +This should extend the existing metadata path, not introduce a new helper +module or template-specific wrapper API. + +### Why this is the recommended approach + +The current repository state already points to this design: + +- template guidance is already modeled as metadata plus header comments +- `WorkbookPresentationMeta` already owns workbook-facing comment metadata such + as `hint` +- `FieldMetaInfo` already exposes `presentation_meta` and `comment_*` accessors +- `download_template(sample_data=...)` already covers the separate use case of + visible example rows +- `docs/invariants.md` and `tests/contracts/test_template_contract.py` lock in + the rule that generated templates do not rely on Excel data-validation rules + +So the smallest stable API surface is: + +- extend existing metadata structures +- keep the render target as header comments only +- avoid any new runtime helper layer + +## Relationship To Existing `hint` + +`hint` and the new feature should be complementary, not merged. + +Recommended semantics: + +- `hint` + - free-form instruction or rule text + - answers “what should the user keep in mind?” +- `example_value` + - concrete sample value text + - answers “what does a valid value look like?” + +Recommended rendering policy: + +- if both are present, render both +- keep them as separate comment lines +- do not infer one from the other +- do not overload `hint` to carry example semantics + +Recommended ordering inside comments: + +- keep existing comment ordering stable +- append the example line after existing hint-style text when present + +That makes `example_value` the most concrete final cue without changing the +meaning of `hint`. + +## Rendering Strategy Decision + +Use one rendering strategy only for v1: + +- workbook header comments + +This is the best fit among the candidate surfaces. + +### Why comments are best for v1 + +- already supported and stable in the repo +- already verified by template contract tests +- localized through the workbook display locale path +- additive without changing workbook structure +- easy to revert if needed + +### Rejected alternative + +Reject a visible helper row or dedicated example row area for v1. + +Reasoning: + +- the repo already has a visible sample-row path through + `download_template(sample_data=...)` +- adding a second visible example surface would blur the distinction between + field metadata and actual worksheet data +- it would change the worksheet layout and create more verifier churn than a + comment-only additive change + +I would also reject a dedicated instruction/help sheet for the same reason: it +creates a larger workbook contract change than this v1 needs, and the current +template UX contract is comment-centric rather than sheet-centric. + +## Precise Implementation Boundaries + +Implementation should stay within these boundaries: + +- extend `WorkbookPresentationMeta` with one new optional value: + - `example_value` +- expose a comment-ready accessor in the existing metadata layer: + - `comment_example` +- thread the new field through: + - `FieldMetaInfo.__init__(...)` + - `_build_excel_metadata(...)` + - `ExcelMeta(...)` + - `FieldMeta(...)` +- expose the value through the existing `FieldMetaInfo` facade in the same + style as `hint` and `comment_hint` +- localize the workbook-facing example label through `src/excelalchemy/i18n/messages.py` +- update built-in codec `build_comment(...)` paths to include the example line + where comments are already produced + +Implementation should explicitly stay out of these areas: + +- no new package-root helper +- no new template serializer/helper module +- no change to `download_template(...)` arguments or sample-row semantics +- no new worksheet, hidden sheet, helper area, or validation rule surface +- no compatibility shim changes +- no import pipeline or storage behavior changes + +## Likely File Change Checklist + +Core implementation: + +- `src/excelalchemy/metadata.py` +- `src/excelalchemy/i18n/messages.py` +- built-in comment-producing codecs under `src/excelalchemy/codecs/` + +Most likely touched codecs: + +- `src/excelalchemy/codecs/string.py` +- `src/excelalchemy/codecs/date.py` +- `src/excelalchemy/codecs/number.py` +- `src/excelalchemy/codecs/boolean.py` +- `src/excelalchemy/codecs/radio.py` +- `src/excelalchemy/codecs/multi_checkbox.py` +- `src/excelalchemy/codecs/organization.py` +- `src/excelalchemy/codecs/staff.py` +- `src/excelalchemy/codecs/tree.py` +- `src/excelalchemy/codecs/date_range.py` + +Tests: + +- `tests/contracts/test_template_contract.py` +- `tests/unit/test_field_metadata.py` +- representative codec unit tests only where comment text is already asserted +- `tests/integration/test_examples_smoke.py` if an example is updated + +Docs and examples: + +- `docs/getting-started.md` +- `docs/public-api.md` +- `examples/annotated_schema.py` + +Smoke and generated assets: + +- `scripts/smoke_examples.py` if example output changes +- `scripts/smoke_docs_assets.py` if doc-facing assets change +- `files/example-outputs/` only if an updated example intentionally changes + captured output + +## Strongest Verifier Anchors + +The strongest existing verifier anchors are: + +### Public behavior + +- `tests/contracts/test_template_contract.py` + - strongest anchor for template comment behavior, locale-visible workbook + text, required styling, merged headers, and the invariant that templates do + not use Excel data validations + +### Metadata layering + +- `tests/unit/test_field_metadata.py` + - strongest anchor for deciding that v1 should extend existing metadata + structures rather than add a helper layer + - already asserts split metadata layers and `comment_hint` behavior + +### Examples + +- `tests/integration/test_examples_smoke.py` + - strongest anchor for making one example carry the new public usage story + +### Smoke assets + +- `scripts/smoke_examples.py` +- `scripts/smoke_docs_assets.py` + +`scripts/smoke_api_payload_snapshot.py` is not a relevant anchor for this +feature and should remain untouched. + +## Docs That Must Change If Implemented Correctly + +These should change if the feature is implemented: + +- `docs/public-api.md` + - because `FieldMeta(...)` and `ExcelMeta(...)` are stable public entry + points and the new argument belongs on that public metadata surface +- `docs/getting-started.md` + - because it is the shortest schema declaration guide and should show the new + metadata in at least one concise example +- `examples/annotated_schema.py` + - because it is the clearest metadata-focused example surface in the repo + +These are likely optional rather than required: + +- `examples/README.md` + - only if the example description should explicitly mention template guidance +- `README.md` +- `README-pypi.md` + - only if the main snippet is intentionally updated to showcase the new field +- `docs/locale.md` + - only if the implementation adds enough workbook-facing wording that the + locale guide should explicitly mention example-comment labels + +## Final Recommendation + +Implement `Template UX metadata v1` as a small extension of the existing public +metadata layer: + +- add `example_value` to `FieldMeta(...)` and `ExcelMeta(...)` +- store it in `WorkbookPresentationMeta` +- expose a localized `comment_example` +- render it in header comments only + +This is the smallest additive API surface, it aligns with the current template +invariants, it keeps `hint` intact as a separate concept, and it has the +strongest existing verifier story in the repository. diff --git a/plans/v2-3-template-ux-metadata-v1.md b/plans/v2-3-template-ux-metadata-v1.md new file mode 100644 index 0000000..06d09f9 --- /dev/null +++ b/plans/v2-3-template-ux-metadata-v1.md @@ -0,0 +1,335 @@ +# v2.3 Template UX Metadata v1 + +Status: `planned` + +## Problem Statement + +ExcelAlchemy already generates user-facing workbook templates and already supports +template-side comments such as: + +- required vs optional guidance +- date/number formatting guidance +- option lists +- free-form `hint` + +That existing surface is useful, but it still leaves a common usability gap: +users may understand the rule while still not knowing what a valid value should +look like in practice. + +For `v2.3`, this experiment should stay narrow. The goal is not to redesign +template generation or introduce a spreadsheet validation engine. The goal is to +add one small, stable layer of template UX metadata so generated templates do +more to prevent user mistakes before upload. + +## Goals + +- Strengthen template-side user guidance in generated workbooks. +- Keep the feature additive and backward-compatible. +- Reuse the existing stable metadata and template-comment path. +- Make the new capability easy to verify through tests, docs, examples, and + smoke assets. +- Prefer one rendering strategy for v1 rather than several competing surfaces. + +## Non-Goals + +- Native Excel data-validation rules, dropdown matrices, or a validation engine. +- Async jobs, orchestration, or import remediation work. +- Import execution or runtime validation redesign. +- Broad schema metadata architecture redesign. +- Template layout redesign with extra sheets, guide tables, or hidden metadata + structures. +- Broad public API renaming or compatibility cleanup. +- Storage redesign. + +## Recommended Narrow v1 Scope + +The recommended v1 slice is: + +- add one new optional field-level metadata value for example guidance +- thread it through the existing public metadata entry points +- render it only into existing header comments for generated templates + +Recommended new metadata field: + +- `example_value: str | None = None` + +Recommended public entry points: + +- `FieldMeta(..., example_value=...)` +- `ExcelMeta(..., example_value=...)` + +This keeps the feature explicit and small. It also avoids overloading the +existing sample-row path on `download_template(...)`, which already exists for +real row data rather than field metadata. + +## Likely User-Visible Behavior + +When a field declares `example_value`, the generated template header comment for +that field gains one additional localized line. + +Representative examples: + +- `示例:张三` +- `Example: alice@company.com` + +Behavior rules for v1: + +- if `example_value` is omitted, template output is unchanged +- no new rows, columns, or sheets are introduced +- import parsing and validation behavior remain unchanged +- the feature is purely workbook-guidance metadata +- template generation must still avoid native Excel data-validation rules + +## Candidate Rendering Strategies + +### Option 1. Header comment augmentation + +Render the new metadata into the existing per-field header comment. + +Why it fits: + +- aligns with the current invariant that template guidance is encoded in + comments and formatting +- reuses existing public metadata and writer/comment seams +- does not alter workbook structure +- is easy to verify in contract tests and locale tests + +### Option 2. Prefilled example rows + +Do not choose this for v1. + +Why not: + +- overlaps with the existing sample-row behavior on `download_template(...)` +- changes worksheet content rather than metadata only +- creates ambiguity about whether the values are placeholders, examples, or + real user-editable rows + +### Option 3. Native Excel validation or input-message rules + +Do not choose this for v1. + +Why not: + +- conflicts with the current repository invariant that generated templates do + not rely on Excel data-validation rules +- expands scope into validation policy rather than UX metadata +- would require a broader stable API and test matrix than this experiment + should own + +## Recommended Direction + +Implement only Option 1 in v1: + +- extend metadata with one optional `example_value` +- render it through existing header comments +- keep all other template behavior unchanged + +## Likely Code Areas Affected + +### Public metadata surface + +- `src/excelalchemy/metadata.py` + - add the new optional metadata input and thread it through the existing + workbook-presentation metadata path + +### Template comment rendering + +- `src/excelalchemy/codecs/*.py` + - update existing built-in comment builders that already compose field-level + template comments +- `src/excelalchemy/core/writer.py` + - inspect only if a small comment-assembly touch is needed; avoid unrelated + rendering refactors + +### Locale / workbook-facing text + +- `src/excelalchemy/i18n/messages.py` + - only if a localized label such as `Example:` / `示例:` needs to be added + +### Tests + +- `tests/contracts/test_template_contract.py` +- `tests/unit/test_field_metadata.py` +- representative codec-level unit tests only if existing comment assertions are + already the local convention + +### Docs / examples / smoke + +- `docs/getting-started.md` +- `docs/public-api.md` +- one metadata-focused example such as `examples/annotated_schema.py` +- `tests/integration/test_examples_smoke.py` +- `scripts/smoke_examples.py` +- `scripts/smoke_docs_assets.py` +- `files/example-outputs/` only if a captured example output changes + +## Test Strategy + +### Contract tests + +Add or extend contract coverage to verify: + +- generated templates still return the same payload/artifact types +- generated templates still contain no Excel data validations +- a field with `example_value` produces a header comment containing the example + line +- English locale templates render the English example label +- templates without `example_value` remain unchanged in the relevant assertions + +### Unit tests + +Add focused metadata tests to verify: + +- `FieldMeta(..., example_value=...)` stores the expected metadata +- `ExcelMeta(..., example_value=...)` stores the expected metadata +- any comment-ready example accessor is localized correctly +- blank or `None` example values do not create comment text + +### Example and smoke coverage + +Keep this narrow: + +- update one runnable example to show the feature +- keep example smoke passing +- regenerate captured outputs only if example stdout or durable example assets + intentionally change + +## Docs, Examples, And Smoke Updates Required + +If the feature is implemented, update the minimum repository-facing material +that teaches template metadata: + +- `docs/getting-started.md` + - add one concise metadata example using `example_value` +- `docs/public-api.md` + - document the new optional metadata field on the stable public metadata + surface +- `examples/annotated_schema.py` + - show one practical use of `example_value` +- `examples/README.md` + - only if the example description should explicitly mention template guidance +- `README.md` and `README-pypi.md` + - only if the chosen example snippet there is intentionally updated + +If example output changes intentionally, also update: + +- `files/example-outputs/` +- `scripts/generate_example_output_assets.py` +- `scripts/smoke_docs_assets.py` + +`scripts/smoke_api_payload_snapshot.py` should remain untouched unless this +experiment accidentally spills into API payload work, which it should not. + +## Risks And Open Questions + +### 1. Field naming should stay narrow + +Recommended decision: + +- use `example_value`, not a broader name like `example` + +Why: + +- avoids confusion with sample rows +- avoids implying richer structured examples in v1 + +### 2. Comment composition is distributed across codecs + +Risk: + +- built-in field codecs already own portions of comment rendering, so the + change should be applied conservatively rather than used as an excuse for a + shared comment abstraction refactor + +Planned response: + +- keep implementation boring and explicit +- touch only the codecs that already participate in template comment building + +### 3. Locale wording must follow workbook-facing policy + +Risk: + +- the example label is workbook-facing text, so it should follow the workbook + locale rather than the runtime/API English-first policy + +Planned response: + +- localize the label through the existing workbook-facing message path + +### 4. Scope can easily drift into validation UX + +Risk: + +- once template UX is in scope, it is tempting to add dropdowns, rules, + prefilled rows, guide sheets, or other Excel features + +Planned response: + +- keep v1 to one metadata field and one rendering surface +- record broader ideas in `tech_debt/` or a later plan instead of expanding + this slice + +## Phased Implementation Steps + +### Phase 1. Lock the metadata contract + +- add the new optional metadata field to the accepted v1 scope +- confirm that the public surface is limited to `FieldMeta(...)` and + `ExcelMeta(...)` + +### Phase 2. Implement narrow rendering + +- thread the metadata into the existing workbook-presentation metadata path +- render the example line in header comments only +- keep workbook structure and import behavior unchanged + +### Phase 3. Protect behavior with tests + +- extend template contract tests +- extend metadata unit tests +- add the minimum representative comment-rendering assertions needed + +### Phase 4. Update repository-facing materials + +- update the smallest useful set of docs and one runnable example +- refresh example outputs only if durable example artifacts change +- keep smoke scripts passing + +## Acceptance Criteria + +- `FieldMeta` accepts `example_value` without breaking existing call sites +- `ExcelMeta` accepts `example_value` without breaking existing call sites +- generated templates remain structurally unchanged except for an added comment + line when `example_value` is present +- generated templates still contain no native Excel data-validation rules +- workbook-facing example labels are localized for `zh-CN` and `en` +- templates without `example_value` preserve existing behavior +- tests cover the new comment behavior and the existing template invariants +- at least one user-facing example and the relevant metadata docs show the new + capability + +## Verification + +Expected verification once implementation exists: + +- `uv run ruff format --check .` +- `uv run ruff check .` +- `uv run pyright` +- `uv run pytest --cov=excelalchemy --cov-report=term-missing:skip-covered tests` +- `uv run python scripts/smoke_examples.py` +- `uv run python scripts/smoke_docs_assets.py` + +## Decision Log + +- Decision: keep v1 on the existing header-comment surface only. + - Why: this is the smallest additive path that matches current invariants and + avoids workbook-structure changes. + - Affected paths: `src/excelalchemy/metadata.py`, built-in codec comment + builders, template contract tests, metadata docs/examples. + +- Decision: prefer a single `example_value` field instead of a broader template + guidance model. + - Why: narrower stable API surface, clearer semantics, easier verification. + - Affected paths: public metadata constructors, metadata docs, example usage. diff --git a/plans/v2-4-import-platform-layer-design-note.md b/plans/v2-4-import-platform-layer-design-note.md new file mode 100644 index 0000000..d9078a6 --- /dev/null +++ b/plans/v2-4-import-platform-layer-design-note.md @@ -0,0 +1,505 @@ +# v2.4 Import Platform Layer Design Note + +Status: `planned` + +Related plan: + +- [v2-4-import-platform-layer-design.md](./v2-4-import-platform-layer-design.md) + +## Current Repository State + +The repository already has the main pieces needed for an import platform story. +The gap is mostly documentation shape, not missing architecture. + +Current alignment points: + +- the facade already separates full import from preflight: + - `ExcelAlchemy.import_data(...)` + - `ExcelAlchemy.preflight_import(...)` +- preflight already has a dedicated internal path: + - `src/excelalchemy/core/preflight.py` +- full import already has a dedicated runtime owner: + - `src/excelalchemy/core/import_session.py` +- template-side guidance already lives in the public metadata layer: + - `FieldMeta(...)` + - `ExcelMeta(...)` + - `WorkbookPresentationMeta` +- post-import consumption already lives in one public results layer: + - `ImportPreflightResult` + - `ImportResult` + - `CellErrorMap` + - `RowIssueMap` + - `build_frontend_remediation_payload(...)` +- the main drift is doc-level: + - `README.md` already tells an import workflow story + - `docs/architecture.md` still centers internal collaborators + - `docs/public-api.md` lists stable surfaces without fully grouping them as + one platform model + - `docs/result-objects.md` explains outcomes well, but not as one layer in a + larger platform architecture + +That means v2.4 should be documentation architecture consolidation, not a +system rewrite. + +## Recommended Documentation Architecture + +### Recommended decomposition + +The cleanest platform-layer decomposition of the current system is four primary +capability layers plus three cross-cutting seams. + +#### 1. Contract Authoring + +Responsibility: + +- define the workbook contract before upload +- make generated templates self-explanatory + +Public surfaces: + +- schema models +- `FieldMeta(...)` +- `ExcelMeta(...)` +- template generation methods on `ExcelAlchemy` + +Internal alignment: + +- `src/excelalchemy/metadata.py` +- `src/excelalchemy/core/schema.py` +- `src/excelalchemy/core/rendering.py` +- `src/excelalchemy/core/writer.py` +- `src/excelalchemy/codecs/` + +#### 2. Structural Gate + +Responsibility: + +- answer whether a workbook is structurally importable before full execution + +Public surfaces: + +- `ExcelAlchemy.preflight_import(...)` +- `ImportPreflightResult` + +Internal alignment: + +- `src/excelalchemy/core/preflight.py` +- `src/excelalchemy/core/headers.py` +- `src/excelalchemy/core/schema.py` +- `src/excelalchemy/core/storage_protocol.py` + +#### 3. Execution and Observability + +Responsibility: + +- run the real import workflow +- expose synchronous lifecycle visibility without introducing a job framework + +Public surfaces: + +- `ExcelAlchemy.import_data(..., on_event=...)` +- `ImporterConfig.for_create(...)` +- `ImporterConfig.for_update(...)` +- `ImporterConfig.for_create_or_update(...)` +- `ImportMode` + +Internal alignment: + +- `src/excelalchemy/core/import_session.py` +- `src/excelalchemy/core/executor.py` +- `src/excelalchemy/core/rows.py` +- `src/excelalchemy/helper/pydantic.py` + +#### 4. Outcome and Remediation + +Responsibility: + +- expose the outcome of one import run for API, admin, and frontend consumers +- support remediation flows after the import, not during it + +Public surfaces: + +- `ImportResult` +- `CellErrorMap` +- `RowIssueMap` +- `build_frontend_remediation_payload(...)` + +Internal alignment: + +- `src/excelalchemy/results.py` +- `src/excelalchemy/core/rendering.py` +- `src/excelalchemy/core/writer.py` +- issue production paths in `src/excelalchemy/core/rows.py` and + `src/excelalchemy/core/executor.py` + +#### Cross-cutting seams + +These should be documented as seams, not as top-level phases: + +- `ExcelStorage` + - workbook input and workbook output boundary +- locale + - workbook-facing text policy +- result workbook rendering + - bridge between failed execution and user remediation + +### Internal architecture docs vs platform architecture docs + +These two doc types should differ on purpose. + +#### Platform architecture docs should answer: + +- what capabilities exist in the import platform +- when each capability is used in the workflow +- which public APIs belong to each capability +- how backend and frontend integrators should think about the flow + +Platform docs should emphasize: + +- capability boundaries +- lifecycle order +- public surfaces +- integration patterns + +Platform docs should avoid: + +- file-by-file module ownership detail +- deep internal collaborator sequencing +- compatibility inventory beyond brief guardrails + +#### Internal architecture docs should answer: + +- which modules own each behavior +- how collaborators interact at runtime +- where to edit when behavior changes +- which internal seams are stable enough to preserve + +Internal docs should emphasize: + +- `core/*`, `helper/*`, `i18n/*`, `_primitives/*` +- component responsibilities +- change-impact mapping +- extension boundaries + +Internal docs should avoid: + +- retelling the high-level import platform story as if end users are the main + audience + +### Official top-level concepts + +These should become the official top-level concepts in v2.4 docs: + +- `Import platform` +- `Contract authoring` +- `Template guidance` +- `Structural gate` +- `Execution` +- `Observability` +- `Outcome surfaces` +- `Remediation payload` +- `Storage seam` + +Recommended usage rules: + +- use `Contract authoring` for schema and template-facing guidance +- use `Template guidance` for additive workbook metadata such as `hint` and + `example_value` +- use `Structural gate` for `preflight_import(...)` only +- use `Execution` for the full import path +- use `Observability` for `on_event=...` only +- use `Outcome surfaces` for `ImportResult`, `CellErrorMap`, and `RowIssueMap` +- use `Remediation payload` only for the opt-in helper, not for all result + payloads +- use `Storage seam` for `ExcelStorage`, not as an import phase + +Terms to avoid as top-level concepts: + +- `job-friendly import` +- `pre-validation` as a synonym for the whole import system +- `frontend payloads` as the umbrella name for all result objects +- `internal architecture` as the primary reader-facing story + +### Which docs should remain, split, or mainly be relinked + +#### Should remain as distinct docs + +- `docs/public-api.md` + - still the stable-boundary reference +- `docs/result-objects.md` + - still the detailed result and payload reference +- `docs/api-response-cookbook.md` + - still the response-shape cookbook +- `docs/domain-model.md` + - still the named-concepts reference +- `src/excelalchemy/README.md` + - still the internal package guide +- `docs/integration-roadmap.md` + - still the role-based reading map + +#### Should be split in responsibility + +- `docs/architecture.md` + - should stop carrying both the platform story and the internal component + story + - recommended split: + - new `docs/import-platform.md` for capability-layer architecture + - retained `docs/architecture.md` for internal component architecture + +#### Should mainly be relinked and lightly reframed + +- `README.md` + - keep short platform summary, then relink to `docs/import-platform.md` +- `docs/getting-started.md` + - keep onboarding focus, then relink to platform/result docs +- `docs/integration-roadmap.md` + - keep as reading-path map; mostly relink to the new platform doc +- `docs/examples-showcase.md` + - keep visual/examples role; mainly relink rather than absorb architecture +- `README-pypi.md` + - keep concise; mostly relink, do not expand architecture detail +- `examples/README.md` + - keep example navigation role; only minor relinking if needed + +### Mermaid diagrams that are most useful + +Use three canonical diagrams and reuse them rather than inventing many +near-duplicates. + +#### 1. Capability-layer view + +Purpose: + +- explain the platform model at a glance + +Recommended content: + +- `Contract authoring` +- `Structural gate` +- `Execution and observability` +- `Outcome and remediation` +- side seams: + - `ExcelStorage` + - `Locale` + - `Result workbook rendering` + +Recommended home: + +- `docs/import-platform.md` + +#### 2. Runtime sequence view + +Purpose: + +- show the runtime order without exposing internal component detail first + +Recommended sequence: + +1. author schema and template guidance +2. generate template +3. upload workbook +4. run structural gate +5. run full import execution +6. emit lifecycle events during execution +7. produce result workbook and result surfaces +8. optionally build remediation payload + +Recommended home: + +- `docs/import-platform.md` +- optionally summarized in `README.md` + +#### 3. Integration blueprint view + +Purpose: + +- show how backend/API/frontend consumers connect to the platform surfaces + +Recommended nodes: + +- backend service +- `ExcelAlchemy` +- `ExcelStorage` +- spreadsheet user / uploaded workbook +- API response builder +- frontend/admin UI +- `ImportPreflightResult` +- `ImportResult` +- `CellErrorMap` +- `RowIssueMap` +- remediation payload + +Recommended home: + +- `docs/import-platform.md` +- `docs/api-response-cookbook.md` +- `docs/result-objects.md` + +## Code Structure That Already Aligns And Should Not Be Refactored + +These current boundaries already fit the proposed platform model well enough: + +- `src/excelalchemy/core/alchemy.py` + - one facade that exposes both preflight and full import +- `src/excelalchemy/core/preflight.py` + - dedicated structural-gate path +- `src/excelalchemy/core/import_session.py` + - dedicated runtime owner for full import and lifecycle events +- `src/excelalchemy/results.py` + - one public outcome/remediation surface +- `src/excelalchemy/metadata.py` + - already layered for declaration, presentation, and import constraints +- `src/excelalchemy/helper/pydantic.py` + - already isolates the Pydantic boundary +- `src/excelalchemy/core/storage_protocol.py` + - already models storage as a seam instead of a product decision +- `src/excelalchemy/core/rendering.py` and `src/excelalchemy/core/writer.py` + - already centralize workbook output concerns + +Specific non-refactor guidance: + +- do not merge preflight into `ImportSession` +- do not split `results.py` into new platform-specific public modules +- do not move template guidance out of `metadata.py` +- do not turn lifecycle events into a separate execution framework +- do not revisit compatibility-only modules as part of this documentation work + +## One Rejected Alternative + +### Rejected option + +Use `docs/architecture.md` as the single umbrella document for both: + +- user-facing platform architecture +- internal component architecture +- API integration story +- result-surface story + +### Why reject it + +This would keep the repository in the same ambiguous state: + +- the platform view would remain mixed with implementation detail +- the internal component map would keep competing with the public workflow story +- readers would still need to jump between sections to understand whether they + were reading: + - a capability model + - an implementation map + - an integration guide + +It also creates maintenance pressure: + +- every new import-facing capability would enlarge one already overloaded page +- diagrams would keep drifting between audiences +- `README.md`, `docs/public-api.md`, and `docs/result-objects.md` would still + need to restate architecture context anyway + +The cleaner design is: + +- new `docs/import-platform.md` for capability architecture +- retained `docs/architecture.md` for internal component architecture + +## Exact Scope Boundaries + +### In scope + +- define the platform-layer decomposition +- define the doc boundary between platform architecture and internal + architecture +- define official top-level terms +- define which docs remain, split, or mainly relink +- define the canonical Mermaid diagram set +- identify code structure that already aligns and should stay put +- document later file targets for the docs pass + +### Out of scope + +- code implementation +- public API additions or renames +- result payload changes +- `src/excelalchemy/core/` refactors +- async/job framework design +- storage redesign +- compatibility cleanup +- locale-policy changes +- example-script rewrites unless a doc update clearly requires them +- broad movement of content into new doc trees beyond the one new platform doc + +## Likely File Change Checklist + +### New doc + +- [ ] `docs/import-platform.md` + +### Docs to update directly + +- [ ] `README.md` +- [ ] `docs/architecture.md` +- [ ] `docs/domain-model.md` +- [ ] `docs/public-api.md` +- [ ] `docs/result-objects.md` +- [ ] `docs/api-response-cookbook.md` +- [ ] `docs/integration-roadmap.md` +- [ ] `src/excelalchemy/README.md` + +### Docs likely limited to relinking or light wording updates + +- [ ] `docs/getting-started.md` +- [ ] `docs/examples-showcase.md` +- [ ] `examples/README.md` +- [ ] `examples/fastapi_reference/README.md` +- [ ] `README-pypi.md` only if the public doc map changes should be reflected + +### Files that should usually stay untouched in this phase + +- [ ] `src/excelalchemy/core/alchemy.py` +- [ ] `src/excelalchemy/core/preflight.py` +- [ ] `src/excelalchemy/core/import_session.py` +- [ ] `src/excelalchemy/results.py` +- [ ] `src/excelalchemy/metadata.py` +- [ ] compatibility modules under `src/excelalchemy/types/` + +### Smoke and asset files only if doc references change materially + +- [ ] `scripts/smoke_docs_assets.py` +- [ ] `scripts/generate_example_output_assets.py` only if example capture links change +- [ ] `files/example-outputs/` only if doc-visible examples intentionally change + +## Verifier Checklist + +- [ ] The new design note clearly separates platform architecture from internal + component architecture. +- [ ] The recommended decomposition uses current public capabilities rather than + proposing a new runtime model. +- [ ] The official top-level concepts are few, reusable, and consistent with + existing docs. +- [ ] The recommendation introduces only one new primary architecture doc: + `docs/import-platform.md`. +- [ ] `docs/architecture.md` remains the internal/component architecture + reference. +- [ ] `docs/result-objects.md` remains the detailed result reference rather than + being absorbed into the platform doc. +- [ ] The recommended Mermaid set includes: + capability-layer view, runtime sequence view, integration blueprint view. +- [ ] The design note explicitly identifies modules that already align and + should not be refactored. +- [ ] The scope boundaries exclude code redesign, job frameworks, compatibility + cleanup, and broad refactors. +- [ ] The likely file checklist is documentation-first and additive. + +## Final Recommendation + +For v2.4, the cleanest design is a documentation architecture with one new +platform-level doc and no architectural rewrite: + +- add `docs/import-platform.md` as the capability-layer architecture page +- keep `docs/architecture.md` as the internal component map +- keep `docs/public-api.md`, `docs/result-objects.md`, and + `docs/api-response-cookbook.md` as separate reference docs +- standardize the top-level concepts around contract authoring, structural + gate, execution, observability, and outcome/remediation +- treat current code structure as already aligned enough to support this model + without refactoring + +This gives the repository one explicit import platform story while preserving +the current public API, compatibility posture, and internal architecture seams. diff --git a/plans/v2-4-import-platform-layer-design.md b/plans/v2-4-import-platform-layer-design.md new file mode 100644 index 0000000..71431b5 --- /dev/null +++ b/plans/v2-4-import-platform-layer-design.md @@ -0,0 +1,542 @@ +# v2.4 ExcelAlchemy Import Platform Layer Design Plan + +Status: `planned` + +## Problem Statement + +The repository already exposes a meaningful set of import-related capabilities, +but the current documentation still presents them mostly as: + +- isolated additive features +- internal components +- result-object details + +That leaves a gap at the platform level. + +Today, a reader can learn that ExcelAlchemy supports: + +- template-side UX metadata +- lightweight structural preflight +- synchronous import lifecycle events +- structured result objects and remediation payloads + +But the docs do not yet describe these as one coherent import platform model +with clear before-import, in-import, and after-import responsibilities. + +The result is documentation drift risk: + +- users may treat preflight, lifecycle events, and remediation as unrelated + utilities instead of one import workflow story +- architecture docs may over-emphasize internal collaborators and under-explain + platform capabilities +- public API docs may describe stable surfaces without showing how they compose + into a recommended integration model + +For v2.4, the goal is to make the platform layer explicit in docs and planning +without changing current public behavior. + +## Goals + +- Define a platform-layer architecture model for the existing import + capabilities. +- Show how current public surfaces compose into one coherent import workflow. +- Preserve the current additive nature of: + - template-side UX metadata + - `preflight_import(...)` + - `import_data(..., on_event=...)` + - `ImportResult`, `CellErrorMap`, `RowIssueMap`, and + `build_frontend_remediation_payload(...)` +- Add a documentation model that sits above internal component architecture. +- Standardize terminology across README, architecture docs, public API docs, + and result-object docs. +- Keep the design compatible with current 2.x public API and compatibility + policy. + +## Non-goals + +- No code implementation in this plan. +- No new public import method, config object, event class, or result type. +- No refactor of `src/excelalchemy/core/`. +- No change to current storage architecture or `ExcelStorage` semantics. +- No result payload shape changes. +- No change to locale policy. +- No background-job, streaming, or async redesign. +- No attempt to merge preflight, execution, and remediation into one overloaded + API. +- No opportunistic cleanup of unrelated architecture or docs debt. + +## Scope + +This plan covers: + +- platform-layer architecture framing +- documentation structure +- terminology alignment +- diagram planning +- phased rollout guidance for later implementation work + +This plan does not itself update any doc other than adding this plan file. + +## Current Repository Facts To Preserve + +- Template-side UX metadata already exists as additive workbook guidance via + public metadata declarations. +- Preflight already exists as a lightweight structural gate and is not a + replacement for full import execution. +- Import lifecycle events already exist as additive synchronous hooks on + `import_data(...)`. +- Result objects and remediation payloads already exist as post-import + consumption surfaces. +- Internal architecture docs already describe core collaborators, but they do + not yet make the platform-level capability model first-class. +- Stable public entry points remain rooted in: + - `excelalchemy` + - `excelalchemy.config` + - `excelalchemy.metadata` + - `excelalchemy.results` + - `excelalchemy.exceptions` + - `excelalchemy.codecs` + +## Recommended Platform-Layer Decomposition + +The recommended documentation model is a capability-oriented import platform +layer above the existing internal component map. + +### 1. Contract Authoring Layer + +Responsibility: + +- define the workbook contract before upload +- make the template self-explanatory without changing worksheet structure + +Current public surfaces: + +- `FieldMeta(...)` +- `ExcelMeta(...)` +- template generation methods on `ExcelAlchemy` + +Current capability framing: + +- schema declaration +- workbook-facing labels and ordering +- additive guidance such as `hint` and `example_value` + +Recommended platform name: + +- `Contract Authoring` + +### 2. Structural Gate Layer + +Responsibility: + +- answer whether a workbook is structurally importable before full execution + +Current public surfaces: + +- `ExcelAlchemy.preflight_import(...)` +- `ImportPreflightResult` + +Current capability framing: + +- sheet existence +- header validity +- lightweight structural validity +- estimated row count + +Recommended platform name: + +- `Structural Gate` + +### 3. Execution And Observability Layer + +Responsibility: + +- run the real import workflow +- expose additive inline lifecycle visibility without changing the execution + model + +Current public surfaces: + +- `ExcelAlchemy.import_data(..., on_event=...)` +- `ImportMode` +- `ImporterConfig.for_create(...)` +- `ImporterConfig.for_update(...)` +- `ImporterConfig.for_create_or_update(...)` + +Current capability framing: + +- row validation +- callback dispatch +- result-workbook generation +- synchronous lifecycle events + +Recommended platform name: + +- `Execution and Observability` + +### 4. Outcome And Remediation Layer + +Responsibility: + +- expose machine-readable and UI-friendly post-import outputs +- support API responses, admin review, and retry/remediation flows + +Current public surfaces: + +- `ImportResult` +- `CellErrorMap` +- `RowIssueMap` +- `build_frontend_remediation_payload(...)` + +Current capability framing: + +- top-level import outcome +- cell-level issue inspection +- row-level issue summaries +- remediation-oriented payload shaping + +Recommended platform name: + +- `Outcome and Remediation` + +### 5. Cross-cutting Integration Seams + +These are not separate import phases, but the platform docs should call them +out as cross-cutting seams: + +- `ExcelStorage` + - input workbook read boundary + - result workbook upload boundary +- locale + - workbook-facing display text + - not a separate platform capability +- result workbook rendering + - shared bridge between execution failures and user remediation + +### Recommended Documentation Rule + +Future docs should describe import capabilities in this order: + +1. contract authoring +2. structural gate +3. execution and observability +4. outcome and remediation + +Then map those capabilities down to internal components such as +`schema.py`, `headers.py`, `import_session.py`, `executor.py`, and +`results.py`. + +This keeps the platform story user-facing while preserving the current +component-level architecture docs. + +## Proposed New Architecture Docs + +### New doc + +- `docs/import-platform.md` + - purpose: + - define the platform-layer capability model + - show the before-import / in-import / after-import story + - link outward to `docs/public-api.md`, `docs/result-objects.md`, and + `docs/architecture.md` + - recommended sections: + - platform overview + - capability layers + - public surface by layer + - internal component mapping + - integration patterns for backend/API/frontend consumers + - boundaries and non-goals + +### Existing docs to update + +- `README.md` + - add a concise import-platform framing section and point to + `docs/import-platform.md` +- `docs/architecture.md` + - split the current content into: + - platform view + - component view + - keep component ownership intact +- `docs/domain-model.md` + - add platform-layer concepts as first-class domain concepts +- `docs/public-api.md` + - regroup import-related public surfaces by platform layer +- `docs/result-objects.md` + - position result objects explicitly as the outcome/remediation layer +- `docs/api-response-cookbook.md` + - align wording with the platform-layer terminology +- `docs/integration-roadmap.md` + - add a reading path for platform-oriented integrators +- `src/excelalchemy/README.md` + - explain which internal modules implement each platform capability + +## Terminology To Standardize + +Use these terms consistently in v2.4 docs: + +- `Import platform layer` + - the capability-oriented view above internal components +- `Contract authoring` + - schema declaration plus template-side workbook guidance +- `Template guidance` + - workbook-facing additive metadata such as `hint` and `example_value` +- `Structural gate` + - lightweight preflight before full execution +- `Execution and observability` + - full import plus synchronous lifecycle visibility +- `Outcome and remediation` + - post-import result objects, issue maps, and remediation payloads +- `Result surfaces` + - `ImportResult`, `CellErrorMap`, `RowIssueMap`, and related serializers +- `Remediation payload` + - the additive frontend-oriented payload helper, not the default result + contract +- `Internal component architecture` + - `core/*`, helper, renderer, writer, storage resolution, and related + collaborators + +Avoid these documentation drifts: + +- describing remediation as part of import execution +- describing lifecycle events as a job system +- describing preflight as validation equivalent to `import_data(...)` +- describing template guidance as a separate template engine +- presenting internal components as the primary platform story for end users + +## Suggested Mermaid Diagram Set + +The v2.4 docs should converge on a small, reusable diagram set rather than +ad hoc diagrams per page. + +### 1. Platform capability map + +Purpose: + +- show the four platform layers and the cross-cutting seams + +Recommended home: + +- `docs/import-platform.md` + +### 2. Import lifecycle sequence + +Purpose: + +- show the recommended order: + - template guidance + - preflight + - import execution + - result consumption + - remediation + +Recommended home: + +- `docs/import-platform.md` +- optionally summarized in `README.md` + +### 3. Public surface to capability map + +Purpose: + +- map stable public APIs to each platform layer + +Recommended home: + +- `docs/public-api.md` + +### 4. Capability to internal component map + +Purpose: + +- map platform layers to current internal modules without changing + implementation ownership + +Recommended home: + +- `docs/architecture.md` +- `src/excelalchemy/README.md` + +### 5. API and frontend consumption map + +Purpose: + +- show how `ImportPreflightResult`, `ImportResult`, `CellErrorMap`, + `RowIssueMap`, and remediation payloads serve backend and frontend flows + +Recommended home: + +- `docs/result-objects.md` +- `docs/api-response-cookbook.md` + +## Docs Likely Affected + +Primary docs: + +- `README.md` +- `docs/import-platform.md` (new) +- `docs/architecture.md` +- `docs/domain-model.md` +- `docs/public-api.md` +- `docs/result-objects.md` +- `docs/api-response-cookbook.md` +- `docs/integration-roadmap.md` +- `src/excelalchemy/README.md` + +Potentially affected, depending on final wording and links: + +- `README-pypi.md` +- `examples/README.md` +- `examples/fastapi_reference/README.md` +- `docs/getting-started.md` +- `docs/examples-showcase.md` + +## Risks And Open Questions + +### Risk: capability overlap wording + +Risk: + +- docs could blur the boundary between preflight, full import validation, and + remediation + +Mitigation: + +- define one short boundary statement per layer +- repeat it consistently across README, platform docs, and result docs + +### Risk: platform docs compete with component docs + +Risk: + +- adding a platform doc could duplicate `docs/architecture.md` instead of + clarifying it + +Mitigation: + +- keep `docs/import-platform.md` focused on capabilities and public surfaces +- keep `docs/architecture.md` focused on internal collaborators and ownership + +### Risk: accidental API signaling + +Risk: + +- a platform-layer framing could accidentally imply new abstractions or future + guarantees that do not exist today + +Mitigation: + +- anchor every layer in current public APIs only +- mark aspirational follow-up ideas separately in future plans or tech debt + +### Risk: terminology churn without implementation benefit + +Risk: + +- broad wording changes may create unnecessary review noise + +Mitigation: + +- standardize only the terms needed to describe the current import platform +- avoid renaming stable public APIs + +### Open question: one new platform doc or a broader architecture rewrite + +Recommendation: + +- start with one new `docs/import-platform.md` and targeted edits elsewhere +- avoid a large architecture-doc rewrite in one pass + +### Open question: should preflight and result objects stay in separate docs + +Recommendation: + +- yes +- keep detailed behavior in `docs/result-objects.md` +- use `docs/import-platform.md` to explain where those objects fit in the + broader workflow + +### Open question: should lifecycle events get a dedicated doc + +Recommendation: + +- not in this phase +- first group them under `Execution and Observability` +- create a dedicated doc later only if examples and integrations grow enough + +### Open question: how to handle newly discovered design gaps + +Recommendation: + +- do not expand v2.4 scope inside the docs pass +- record follow-up gaps in `plans/` or `tech_debt/` + +## Phased Implementation Plan + +### Phase 1. Approve the platform model + +- confirm the four-layer decomposition +- confirm the standardized terminology +- confirm the new-doc strategy centered on `docs/import-platform.md` + +### Phase 2. Add the platform-level doc + +- create `docs/import-platform.md` +- add the core capability map and lifecycle diagram +- link to existing public API, architecture, and result docs + +### Phase 3. Align current docs to the platform model + +- update `README.md` +- update `docs/architecture.md` +- update `docs/domain-model.md` +- update `docs/public-api.md` +- update `docs/result-objects.md` +- update `docs/api-response-cookbook.md` +- update `docs/integration-roadmap.md` +- update `src/excelalchemy/README.md` + +### Phase 4. Align examples and reading paths only where needed + +- update `examples/README.md` and `examples/fastapi_reference/README.md` only + if the new platform framing materially improves navigation +- avoid example script churn unless the public teaching story is unclear + +### Phase 5. Capture follow-up gaps separately + +- record any platform/code mismatches in `plans/` or `tech_debt/` +- do not fold unrelated design or code changes into the doc-alignment work + +## Acceptance Criteria + +- A v2.4 plan exists that defines the import platform layer as a coherent + architecture and documentation model. +- The plan preserves current public API and 2.x compatibility expectations. +- The recommended decomposition explicitly includes: + - contract authoring + - structural gate + - execution and observability + - outcome and remediation +- The plan names a concrete new architecture doc: + - `docs/import-platform.md` +- The plan specifies terminology to standardize across docs. +- The plan specifies a reusable Mermaid diagram set. +- The plan identifies the primary docs likely to be updated. +- The plan records risks and open questions without expanding scope into code + work. +- The phased plan keeps implementation additive, reviewable, and easy to + revert. +- The plan makes clear that preflight, lifecycle events, and remediation are + existing additive capabilities, not new v2.4 runtime features. + +## Verification For Later Execution + +When implementation begins, validate the documentation pass with: + +- `uv run ruff format --check .` +- `uv run ruff check .` +- `uv run pyright` +- `uv run pytest --cov=excelalchemy --cov-report=term-missing:skip-covered tests` +- `uv run python scripts/smoke_package.py` +- `uv run python scripts/smoke_examples.py` +- `uv run python scripts/smoke_docs_assets.py` +- `uv run python scripts/smoke_api_payload_snapshot.py` diff --git a/scripts/generate_example_output_assets.py b/scripts/generate_example_output_assets.py index 30bb5b3..aab1958 100644 --- a/scripts/generate_example_output_assets.py +++ b/scripts/generate_example_output_assets.py @@ -69,6 +69,7 @@ async def _build_import_failure_api_payload() -> dict[str, object]: ) from excelalchemy import ExcelAlchemy, ImporterConfig + from excelalchemy.results import build_frontend_remediation_payload storage = InMemorySmokeStorage() importer = ExcelAlchemy( @@ -86,6 +87,11 @@ async def _build_import_failure_api_payload() -> dict[str, object]: 'result': result.to_api_payload(), 'cell_errors': importer.cell_error_map.to_api_payload(), 'row_errors': importer.row_error_map.to_api_payload(), + 'remediation': build_frontend_remediation_payload( + result=result, + cell_error_map=importer.cell_error_map, + row_error_map=importer.row_error_map, + ), } diff --git a/scripts/smoke_api_payload_snapshot.py b/scripts/smoke_api_payload_snapshot.py index 0aace68..75cbdd4 100644 --- a/scripts/smoke_api_payload_snapshot.py +++ b/scripts/smoke_api_payload_snapshot.py @@ -22,6 +22,7 @@ async def _build_snapshot_payload() -> dict[str, object]: ) from excelalchemy import ExcelAlchemy, ImporterConfig + from excelalchemy.results import build_frontend_remediation_payload storage = InMemorySmokeStorage() importer = ExcelAlchemy( @@ -39,6 +40,11 @@ async def _build_snapshot_payload() -> dict[str, object]: 'result': result.to_api_payload(), 'cell_errors': importer.cell_error_map.to_api_payload(), 'row_errors': importer.row_error_map.to_api_payload(), + 'remediation': build_frontend_remediation_payload( + result=result, + cell_error_map=importer.cell_error_map, + row_error_map=importer.row_error_map, + ), } diff --git a/scripts/smoke_docs_assets.py b/scripts/smoke_docs_assets.py index dcd52ac..af7b251 100644 --- a/scripts/smoke_docs_assets.py +++ b/scripts/smoke_docs_assets.py @@ -17,11 +17,23 @@ ROOT / 'README.md': ( 'Choosing ExcelAlchemy', 'docs/tool-comparison.md', + 'example_value', + ), + DOCS_DIR / 'getting-started.md': ( + 'example_value', + 'header comment', + 'alice@company.com', + ), + DOCS_DIR / 'public-api.md': ( + 'FieldMeta(...)', + 'ExcelMeta(...)', + 'example_value=', ), DOCS_DIR / 'result-objects.md': ( 'ImportResult', 'CellErrorMap', 'RowIssueMap', + 'build_frontend_remediation_payload', 'docs/api-response-cookbook.md', 'display_message', ), diff --git a/scripts/smoke_package.py b/scripts/smoke_package.py index 6e76665..180a474 100644 --- a/scripts/smoke_package.py +++ b/scripts/smoke_package.py @@ -11,6 +11,7 @@ from excelalchemy import ExcelAlchemy, ExcelStorage, ExporterConfig, FieldMeta, ImporterConfig, Number, String, UrlStr from excelalchemy.core.table import WorksheetTable +from excelalchemy.results import build_frontend_remediation_payload class SmokeImporter(BaseModel): @@ -119,6 +120,11 @@ async def main() -> None: invalid_result_payload = invalid_result.to_api_payload() cell_payload = invalid_importer.cell_error_map.to_api_payload() row_payload = invalid_importer.row_error_map.to_api_payload() + remediation_payload = build_frontend_remediation_payload( + result=invalid_result, + cell_error_map=invalid_importer.cell_error_map, + row_error_map=invalid_importer.row_error_map, + ) assert invalid_result_payload['result'] == 'DATA_INVALID' assert invalid_result_payload['is_data_invalid'] is True assert invalid_result_payload['summary']['fail_count'] == 1 @@ -143,6 +149,10 @@ async def main() -> None: assert isinstance(first_row_issue['message'], str) and first_row_issue['message'] assert isinstance(first_row_issue['display_message'], str) and first_row_issue['display_message'] assert first_row_issue['row_number_for_humans'] == 1 + assert remediation_payload['remediation']['needs_remediation'] is True + assert remediation_payload['remediation']['affected_row_count'] >= 1 + assert remediation_payload['by_code'] + assert remediation_payload['items'] exporter = ExcelAlchemy(ExporterConfig.for_storage(SmokeImporter, storage=storage, locale='en')) artifact = exporter.export_artifact( diff --git a/src/excelalchemy/README.md b/src/excelalchemy/README.md new file mode 100644 index 0000000..6d8ebd0 --- /dev/null +++ b/src/excelalchemy/README.md @@ -0,0 +1,463 @@ +# `src/excelalchemy/` Package Guide + +This file explains the internal structure of the main package directory. +It is meant for developers and AI agents who need to change implementation details without confusing public API, compatibility layers, and internal collaborators. + +## Related docs + +- [../../README.md](../../README.md) for the public-facing overview. +- [../../AGENTS.md](../../AGENTS.md) for repository-local editing guidance. +- [../../docs/repo-map.md](../../docs/repo-map.md) for top-level repository navigation. +- [../../docs/domain-model.md](../../docs/domain-model.md) for the main concepts implemented here. +- [../../docs/invariants.md](../../docs/invariants.md) for important behavioral constraints. +- [../../tests/README.md](../../tests/README.md) for where this package's behavior is protected. + +## Role of This Package + +- `src/excelalchemy/` is the main library package. +- It contains: + - the stable public surface used by application code + - the internal orchestration that implements import, export, template generation, rendering, and storage integration + - compatibility modules retained for the 2.x line +- The package is organized around a small public facade and a set of focused internal collaborators. + +## High-Level Package Structure + +- `__init__.py` + - Main public re-export surface. + - If application-facing imports change, start here. +- `config.py` + - Public workflow configuration types. +- `metadata.py` + - Public metadata declarations plus the internal layered field-metadata model. +- `results.py` + - Public result models and API-friendly error maps. +- `exceptions.py` + - Public exception types. +- `artifacts.py` + - Public workbook artifact wrapper. +- `codecs/` + - Public field codecs and codec base classes. +- `core/` + - Internal workflow orchestration and execution. +- `helper/` + - Internal adapter layer, currently centered on Pydantic integration. +- `i18n/` + - Internal message and locale handling. +- `_primitives/` + - Internal low-level types, constants, payload aliases, diagnostics, and deprecation helpers. +- `types/`, `exc.py`, `identity.py`, `header_models.py`, `const.py`, `util/convertor.py` + - Compatibility-oriented modules retained in the 2.x line. + +## Public Surface vs Internal Implementation + +### Public surface + +These modules are the stable public entry points documented in `docs/public-api.md`: + +- `src/excelalchemy/__init__.py` +- `src/excelalchemy/config.py` +- `src/excelalchemy/metadata.py` +- `src/excelalchemy/results.py` +- `src/excelalchemy/exceptions.py` +- `src/excelalchemy/codecs/` +- `src/excelalchemy/artifacts.py` + +### Internal implementation + +These modules implement behavior but are not the recommended import paths for application code: + +- `src/excelalchemy/core/` +- `src/excelalchemy/helper/` +- `src/excelalchemy/i18n/` +- `src/excelalchemy/_primitives/` + +### Compatibility-only surface + +These exist to support the 2.x line and should not be treated as preferred implementation entry points for new work: + +- `src/excelalchemy/types/` +- `src/excelalchemy/exc.py` +- `src/excelalchemy/identity.py` +- `src/excelalchemy/header_models.py` +- `src/excelalchemy/const.py` +- `src/excelalchemy/util/convertor.py` + +## Major Modules and Responsibilities + +### Public-facing root modules + +- `src/excelalchemy/__init__.py` + - Re-exports `ExcelAlchemy`, configs, codecs, result types, exception types, and common identity/value types. + - Changes here affect top-level user imports directly. + +- `src/excelalchemy/config.py` + - Defines: + - `ExcelMode` + - `ImportMode` + - `ImporterConfig` + - `ExporterConfig` + - normalized schema/behavior/storage option groupings + - Also contains legacy Minio compatibility handling and deprecation warnings. + +- `src/excelalchemy/metadata.py` + - Defines public declaration helpers: + - `FieldMeta(...)` + - `ExcelMeta(...)` + - Also defines the internal metadata layers behind `FieldMetaInfo`: + - `DeclaredFieldMeta` + - `RuntimeFieldBinding` + - `WorkbookPresentationMeta` + - `ImportConstraints` + - This file is central when changing field declaration behavior, workbook comments, formatting hints, or constraint overlay rules. + +- `src/excelalchemy/results.py` + - Defines public result objects: + - `ImportResult` + - `ValidateHeaderResult` + - `ValidateResult` + - `ValidateRowResult` + - `CellErrorMap` + - `RowIssueMap` + - This is the main file for API payload shape and structured error access. + +- `src/excelalchemy/exceptions.py` + - Defines the public exception model: + - `ExcelAlchemyError` + - `ExcelCellError` + - `ExcelRowError` + - `ProgrammaticError` + - `ConfigError` + +- `src/excelalchemy/artifacts.py` + - Defines `ExcelArtifact`, which wraps rendered workbook content as bytes, base64, or a data URL. + +### `core/` internal orchestration + +- `src/excelalchemy/core/alchemy.py` + - Main facade implementation. + - Builds layout and storage, exposes the top-level workflow methods, and surfaces inspection properties like `worksheet_table` and `cell_error_map`. + - This is the first internal file to inspect when changing how the facade behaves. + +- `src/excelalchemy/core/import_session.py` + - Owns one import run’s runtime state. + - Tracks: + - workbook load state + - header table + - worksheet table + - issue maps + - execution counts + - result rendering state + - `ImportSessionSnapshot` + - This is the main import lifecycle owner. + +- `src/excelalchemy/core/schema.py` + - Converts extracted field metadata into `ExcelSchemaLayout`. + - Responsible for: + - layout ordering + - unique label/key indexing + - composite field expansion + - merged-header detection for selected output keys + +- `src/excelalchemy/core/headers.py` + - Header parsing and header validation. + - Responsible for: + - detecting simple vs merged headers + - normalizing parsed headers into `ExcelHeader` objects + - comparing workbook headers against schema layout + +- `src/excelalchemy/core/rows.py` + - Row reconstruction and issue tracking. + - `RowAggregator` groups flattened worksheet data back into model-shaped payloads. + - `ImportIssueTracker` maps row/cell failures back to workbook coordinates and prepends result columns. + +- `src/excelalchemy/core/executor.py` + - Dispatches the actual import execution path. + - Responsible for: + - choosing create/update/create-or-update behavior + - validating reconstructed payloads + - invoking configured callbacks + - mapping failures into row/cell issues + +- `src/excelalchemy/core/rendering.py` + - High-level rendering entry points for templates, exports, and import result workbooks. + +- `src/excelalchemy/core/writer.py` + - Lower-level workbook writing details: + - comments + - fills/colors + - workbook rows/cells + - result/reason columns + +- `src/excelalchemy/core/storage_protocol.py` + - Defines the `ExcelStorage` protocol. + - This is the main storage extension boundary. + +- `src/excelalchemy/core/storage.py` + - Resolves configured storage into a concrete gateway. + - Also defines the missing-storage fallback path. + +- `src/excelalchemy/core/storage_minio.py` + - Built-in Minio-backed storage implementation. + +- `src/excelalchemy/core/table.py` + - Defines `WorksheetTable`, `WorksheetRow`, and related helpers. + - This is the internal table abstraction used instead of pandas. + +### `codecs/` field behavior + +- `src/excelalchemy/codecs/base.py` + - Defines: + - `ExcelFieldCodec` + - `CompositeExcelFieldCodec` + - fallback logging helpers + - Start here when changing the codec contract itself. + +- `src/excelalchemy/codecs/*.py` + - Built-in concrete field codecs such as: + - `string.py` + - `number.py` + - `date.py` + - `date_range.py` + - `email.py` + - `phone_number.py` + - `money.py` + - `radio.py` + - `multi_checkbox.py` + - `organization.py` + - `staff.py` + - `tree.py` + - `url.py` + +### Adapter, i18n, and primitive helpers + +- `src/excelalchemy/helper/pydantic.py` + - Isolates the Pydantic boundary. + - Responsible for: + - extracting model metadata + - resolving codec types + - normalizing validation messages + - mapping Pydantic validation output to `ExcelCellError` and `ExcelRowError` + +- `src/excelalchemy/i18n/messages.py` + - Central message lookup and locale handling. + - Important when changing workbook-facing text, runtime error text, or locale policy. + +- `src/excelalchemy/_primitives/constants.py` + - Internal constants and enum-like definitions used across the package. + +- `src/excelalchemy/_primitives/identity.py` + - Internal typed wrappers for labels, keys, row indexes, column indexes, URLs, and related string-like identifiers. + +- `src/excelalchemy/_primitives/payloads.py` + - Shared payload type aliases for import/export/data-converter/callback paths. + +- `src/excelalchemy/_primitives/diagnostics.py` + - Developer-facing diagnostic logging helpers. + +- `src/excelalchemy/_primitives/deprecation.py` + - Deprecation warning helpers used by compatibility modules. + +- `src/excelalchemy/_primitives/header_models.py` + - Internal parsed-header model objects. + +## Major Internal Flows + +### Import validation flow + +The import path is implemented roughly in this order: + +1. `src/excelalchemy/core/alchemy.py` + - `ExcelAlchemy.import_data(...)` creates a new import session. +2. `src/excelalchemy/core/import_session.py` + - loads workbook data through storage + - builds header and worksheet state +3. `src/excelalchemy/core/headers.py` + - parses headers + - validates headers against the schema layout +4. `src/excelalchemy/core/rows.py` + - reconstructs model-shaped row payloads +5. `src/excelalchemy/core/executor.py` + - validates and dispatches create/update/upsert logic +6. `src/excelalchemy/helper/pydantic.py` + - adapts Pydantic validation into ExcelAlchemy issues +7. `src/excelalchemy/core/rows.py` + - records row/cell failures in workbook coordinates +8. `src/excelalchemy/core/rendering.py` and `src/excelalchemy/core/writer.py` + - render the import result workbook when rows fail +9. `src/excelalchemy/results.py` + - exposes the final result through `ImportResult`, `CellErrorMap`, and `RowIssueMap` + +### Template generation flow + +The template path is implemented roughly in this order: + +1. `src/excelalchemy/core/alchemy.py` + - selects output keys and header shape +2. `src/excelalchemy/core/schema.py` + - provides ordered layout and merged-header decisions +3. `src/excelalchemy/codecs/` + - provide comments, display formatting, and field-specific workbook semantics +4. `src/excelalchemy/core/rendering.py` +5. `src/excelalchemy/core/writer.py` + - produce the workbook output +6. `src/excelalchemy/artifacts.py` + - wraps the output when the artifact API is used + +### Export flow + +The export path is implemented roughly in this order: + +1. `src/excelalchemy/core/alchemy.py` + - accepts export rows and selected keys +2. `src/excelalchemy/core/schema.py` + - resolves output layout and merged-header needs +3. `src/excelalchemy/codecs/` + - format workbook-facing values +4. `src/excelalchemy/core/rendering.py` +5. `src/excelalchemy/core/writer.py` +6. `src/excelalchemy/core/storage_protocol.py` and `src/excelalchemy/core/storage.py` + - are used only when the upload path is chosen + +### Storage integration flow + +Storage-related behavior is split into three concerns: + +- contract: + - `src/excelalchemy/core/storage_protocol.py` +- resolution: + - `src/excelalchemy/core/storage.py` +- built-in Minio backend: + - `src/excelalchemy/core/storage_minio.py` + +The recommended 2.x design is: + +- config holds `storage=...` +- `build_storage_gateway(...)` resolves it +- import reads workbook data as `WorksheetTable` +- export/import-result uploads return a URL through the storage implementation +- custom storage readers currently use `src/excelalchemy/core/table.py` for that `WorksheetTable` contract + +## Where To Look When Changing Specific Behavior + +### Changing public API behavior + +Start here: + +- `src/excelalchemy/__init__.py` +- `src/excelalchemy/config.py` +- `src/excelalchemy/metadata.py` +- `src/excelalchemy/results.py` +- `src/excelalchemy/exceptions.py` +- `docs/public-api.md` +- `MIGRATIONS.md` +- `tests/contracts/` + +Use extra caution when changing: + +- exported names +- config constructor behavior +- result payload shape +- exception wording or exception type mapping +- compatibility aliases + +### Changing import validation behavior + +Start here: + +- `src/excelalchemy/core/import_session.py` +- `src/excelalchemy/core/headers.py` +- `src/excelalchemy/core/rows.py` +- `src/excelalchemy/core/executor.py` +- `src/excelalchemy/helper/pydantic.py` +- `src/excelalchemy/results.py` +- `tests/contracts/test_import_contract.py` +- `tests/contracts/test_core_components_contract.py` +- `tests/contracts/test_pydantic_contract.py` + +Typical examples: + +- header validation rules +- row reconstruction +- Pydantic error mapping +- create/update/upsert behavior +- result-workbook error placement + +### Changing export or template generation behavior + +Start here: + +- `src/excelalchemy/core/alchemy.py` +- `src/excelalchemy/core/schema.py` +- `src/excelalchemy/core/rendering.py` +- `src/excelalchemy/core/writer.py` +- `src/excelalchemy/codecs/` +- `tests/contracts/test_template_contract.py` +- `tests/contracts/test_export_contract.py` + +Typical examples: + +- workbook comments +- merged headers +- selected output keys +- workbook-facing display formatting +- artifact generation + +### Changing storage integration behavior + +Start here: + +- `src/excelalchemy/core/storage_protocol.py` +- `src/excelalchemy/core/storage.py` +- `src/excelalchemy/core/storage_minio.py` +- `src/excelalchemy/config.py` +- `examples/custom_storage.py` +- `tests/contracts/test_storage_contract.py` +- `tests/unit/test_config_options.py` + +Typical examples: + +- storage contract shape +- default gateway selection +- missing-storage behavior +- Minio compatibility behavior +- upload payload expectations + +### Changing locale-aware output behavior + +Start here: + +- `src/excelalchemy/i18n/messages.py` +- `src/excelalchemy/metadata.py` +- `src/excelalchemy/core/alchemy.py` +- `src/excelalchemy/core/writer.py` +- `docs/locale.md` +- `tests/contracts/test_template_contract.py` +- `tests/contracts/test_import_contract.py` + +Typical examples: + +- workbook instruction text +- header comments +- result/reason column labels +- row status text +- fallback locale behavior + +## Implementation Cautions + +- Do not treat compatibility modules under `src/excelalchemy/types/` and the root compatibility shims as preferred edit points for new behavior. +- Do not reintroduce pandas-style assumptions into the runtime path; this package now uses `WorksheetTable`. +- Do not hard-wire Minio into core workflow logic; storage is intentionally abstracted behind `ExcelStorage`. +- Treat `src/excelalchemy/core/table.py` as a narrow extension seam for current 2.x storage integrations, not as a general application import surface. +- If you change result payload shape, inspect: + - `src/excelalchemy/results.py` + - `docs/result-objects.md` + - `docs/api-response-cookbook.md` + - `scripts/smoke_api_payload_snapshot.py` + - `files/example-outputs/import-failure-api-payload.json` +- If you change docs-visible example behavior, inspect: + - `examples/` + - `files/example-outputs/` + - `scripts/generate_example_output_assets.py` + - `scripts/smoke_examples.py` + - `scripts/smoke_docs_assets.py` diff --git a/src/excelalchemy/__init__.py b/src/excelalchemy/__init__.py index c948d54..9843128 100644 --- a/src/excelalchemy/__init__.py +++ b/src/excelalchemy/__init__.py @@ -1,6 +1,6 @@ """A Python Library for Reading and Writing Excel Files""" -__version__ = '2.2.8' +__version__ = '2.3.0' from excelalchemy._primitives.constants import CharacterSet, DataRangeOption, DateFormat, Option from excelalchemy._primitives.deprecation import ExcelAlchemyDeprecationWarning from excelalchemy._primitives.identity import ( @@ -45,7 +45,13 @@ from excelalchemy.config import ExporterConfig, ImporterConfig, ImportMode from excelalchemy.core.alchemy import ExcelAlchemy from excelalchemy.core.storage_protocol import ExcelStorage -from excelalchemy.exceptions import ConfigError, ExcelCellError, ExcelRowError, ProgrammaticError +from excelalchemy.exceptions import ( + ConfigError, + ExcelCellError, + ExcelRowError, + ProgrammaticError, + WorksheetNotFoundError, +) from excelalchemy.helper.pydantic import extract_pydantic_model from excelalchemy.metadata import ExcelMeta, FieldMeta, PatchFieldMeta from excelalchemy.results import ( @@ -53,6 +59,8 @@ CellIssueRecord, CodeIssueSummary, FieldIssueSummary, + ImportPreflightResult, + ImportPreflightStatus, ImportResult, RowIssueMap, RowIssueRecord, @@ -94,6 +102,8 @@ 'FieldIssueSummary', 'FieldMeta', 'ImportMode', + 'ImportPreflightResult', + 'ImportPreflightStatus', 'ImportResult', 'ImporterConfig', 'Key', @@ -140,6 +150,7 @@ 'ValidateHeaderResult', 'ValidateResult', 'ValidateRowResult', + 'WorksheetNotFoundError', 'extract_pydantic_model', 'flatten', ] diff --git a/src/excelalchemy/codecs/boolean.py b/src/excelalchemy/codecs/boolean.py index 3b5cbc6..7fb28f8 100644 --- a/src/excelalchemy/codecs/boolean.py +++ b/src/excelalchemy/codecs/boolean.py @@ -39,6 +39,7 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: [ declared.comment_required, presentation.comment_hint, + *([presentation.comment_example] if presentation.comment_example else []), ] ) diff --git a/src/excelalchemy/codecs/date.py b/src/excelalchemy/codecs/date.py index 3ce5700..587c144 100644 --- a/src/excelalchemy/codecs/date.py +++ b/src/excelalchemy/codecs/date.py @@ -41,6 +41,7 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: presentation.comment_date_format, presentation.comment_date_range_option, presentation.comment_hint, + *([presentation.comment_example] if presentation.comment_example else []), ] ) diff --git a/src/excelalchemy/codecs/date_range.py b/src/excelalchemy/codecs/date_range.py index 9a6ac3d..6d23946 100644 --- a/src/excelalchemy/codecs/date_range.py +++ b/src/excelalchemy/codecs/date_range.py @@ -62,6 +62,7 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: declared.comment_required, presentation.comment_date_format, dmsg(MessageKey.COMMENT_DATE_RANGE_START_NOT_AFTER_END, extra_hint=presentation.hint or ''), + *([presentation.comment_example] if presentation.comment_example else []), ] ) diff --git a/src/excelalchemy/codecs/multi_checkbox.py b/src/excelalchemy/codecs/multi_checkbox.py index 76297c1..e507d78 100644 --- a/src/excelalchemy/codecs/multi_checkbox.py +++ b/src/excelalchemy/codecs/multi_checkbox.py @@ -60,6 +60,7 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: presentation.comment_options, dmsg(MessageKey.COMMENT_SELECTION_MODE, value=dmsg(MessageKey.COMMENT_SELECTION_VALUE_MULTI)), presentation.comment_hint, + *([presentation.comment_example] if presentation.comment_example else []), ] ) diff --git a/src/excelalchemy/codecs/number.py b/src/excelalchemy/codecs/number.py index 412fc10..bd8f8d8 100644 --- a/src/excelalchemy/codecs/number.py +++ b/src/excelalchemy/codecs/number.py @@ -56,6 +56,7 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: presentation.comment_fraction_digits, dmsg(MessageKey.COMMENT_NUMBER_INPUT_RANGE, value=cls.__get_range_description__(field_meta)), presentation.comment_unit, + *([presentation.comment_example] if presentation.comment_example else []), ] ) diff --git a/src/excelalchemy/codecs/organization.py b/src/excelalchemy/codecs/organization.py index c54bd58..1e0511a 100644 --- a/src/excelalchemy/codecs/organization.py +++ b/src/excelalchemy/codecs/organization.py @@ -28,7 +28,11 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: else MessageKey.COMMENT_REQUIRED_VALUE_OPTIONAL ) return '\n'.join( - [dmsg(MessageKey.COMMENT_REQUIRED, value=dmsg(value_key)), dmsg(MessageKey.COMMENT_HINT, value=extra_hint)] + [ + dmsg(MessageKey.COMMENT_REQUIRED, value=dmsg(value_key)), + dmsg(MessageKey.COMMENT_HINT, value=extra_hint), + *([presentation.comment_example] if presentation.comment_example else []), + ] ) @classmethod @@ -64,6 +68,7 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: [ declared.comment_required, dmsg(MessageKey.COMMENT_HINT, value=presentation.hint or dmsg(MessageKey.MULTI_ORGANIZATION_HINT)), + *([presentation.comment_example] if presentation.comment_example else []), ] ) diff --git a/src/excelalchemy/codecs/radio.py b/src/excelalchemy/codecs/radio.py index aa210ae..c771bcd 100644 --- a/src/excelalchemy/codecs/radio.py +++ b/src/excelalchemy/codecs/radio.py @@ -61,6 +61,7 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: presentation.comment_options, dmsg(MessageKey.COMMENT_SELECTION_MODE, value=dmsg(MessageKey.COMMENT_SELECTION_VALUE_SINGLE)), presentation.comment_hint, + *([presentation.comment_example] if presentation.comment_example else []), ] ) diff --git a/src/excelalchemy/codecs/staff.py b/src/excelalchemy/codecs/staff.py index faba879..213cafc 100644 --- a/src/excelalchemy/codecs/staff.py +++ b/src/excelalchemy/codecs/staff.py @@ -28,7 +28,13 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: if declared.effective_required else MessageKey.COMMENT_REQUIRED_VALUE_OPTIONAL ) - return f'{dmsg(MessageKey.COMMENT_REQUIRED, value=dmsg(value_key))} \n{dmsg(MessageKey.COMMENT_HINT, value=extra_hint)}' + base_comment = ( + f'{dmsg(MessageKey.COMMENT_REQUIRED, value=dmsg(value_key))} ' + f'\n{dmsg(MessageKey.COMMENT_HINT, value=extra_hint)}' + ) + if presentation.comment_example: + return f'{base_comment}\n{presentation.comment_example}' + return base_comment @classmethod def parse_input(cls, value: object, field_meta: FieldMetaInfo) -> str: @@ -64,6 +70,7 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: [ declared.comment_required, dmsg(MessageKey.COMMENT_HINT, value=presentation.hint or dmsg(MessageKey.MULTI_STAFF_HINT)), + *([presentation.comment_example] if presentation.comment_example else []), ] ) diff --git a/src/excelalchemy/codecs/string.py b/src/excelalchemy/codecs/string.py index 516fe12..e79960e 100644 --- a/src/excelalchemy/codecs/string.py +++ b/src/excelalchemy/codecs/string.py @@ -93,6 +93,7 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: constraints.comment_max_length, dmsg(MessageKey.COMMENT_STRING_ALLOWED_CONTENT), presentation.comment_hint, + *([presentation.comment_example] if presentation.comment_example else []), ] ) diff --git a/src/excelalchemy/codecs/tree.py b/src/excelalchemy/codecs/tree.py index 2be5295..5b4c6af 100644 --- a/src/excelalchemy/codecs/tree.py +++ b/src/excelalchemy/codecs/tree.py @@ -25,6 +25,7 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: [ declared.comment_required, dmsg(MessageKey.COMMENT_HINT, value=presentation.hint or dmsg(MessageKey.SINGLE_TREE_HINT)), + *([presentation.comment_example] if presentation.comment_example else []), ] ) @@ -68,7 +69,11 @@ def build_comment(cls, field_meta: FieldMetaInfo) -> str: else MessageKey.COMMENT_REQUIRED_VALUE_OPTIONAL ) return '\n'.join( - [dmsg(MessageKey.COMMENT_REQUIRED, value=dmsg(value_key)), dmsg(MessageKey.COMMENT_HINT, value=extra_hint)] + [ + dmsg(MessageKey.COMMENT_REQUIRED, value=dmsg(value_key)), + dmsg(MessageKey.COMMENT_HINT, value=extra_hint), + *([presentation.comment_example] if presentation.comment_example else []), + ] ) @classmethod diff --git a/src/excelalchemy/core/abstract.py b/src/excelalchemy/core/abstract.py index 7d0a717..b91bb8d 100644 --- a/src/excelalchemy/core/abstract.py +++ b/src/excelalchemy/core/abstract.py @@ -1,12 +1,12 @@ from abc import ABC, abstractmethod -from collections.abc import Sequence +from collections.abc import Callable, Sequence from pydantic import BaseModel from excelalchemy._primitives.identity import DataUrlStr, UrlStr from excelalchemy._primitives.payloads import ExportRowPayload from excelalchemy.artifacts import ExcelArtifact -from excelalchemy.results import ImportResult +from excelalchemy.results import ImportPreflightResult, ImportResult class ABCExcelAlchemy[ @@ -29,9 +29,19 @@ def download_template_artifact( """Render an import template and return a structured Excel artifact.""" @abstractmethod - async def import_data(self, input_excel_name: str, output_excel_name: str) -> ImportResult: + async def import_data( + self, + input_excel_name: str, + output_excel_name: str, + *, + on_event: Callable[[dict[str, object]], None] | None = None, + ) -> ImportResult: """Import workbook data and return a structured result.""" + @abstractmethod + def preflight_import(self, input_excel_name: str) -> ImportPreflightResult: + """Run lightweight structural validation for one workbook.""" + @abstractmethod def export(self, data: list[ExportRowPayload], keys: Sequence[str] | None = None) -> DataUrlStr: """Export rows and return the workbook as a data URL.""" diff --git a/src/excelalchemy/core/alchemy.py b/src/excelalchemy/core/alchemy.py index 34bdd36..8adbe9c 100644 --- a/src/excelalchemy/core/alchemy.py +++ b/src/excelalchemy/core/alchemy.py @@ -1,4 +1,4 @@ -from collections.abc import Sequence +from collections.abc import Callable, Sequence from typing import cast from pydantic import BaseModel @@ -22,6 +22,7 @@ from excelalchemy.core.abstract import ABCExcelAlchemy from excelalchemy.core.headers import ExcelHeaderParser, ExcelHeaderValidator from excelalchemy.core.import_session import ImportSession, ImportSessionSnapshot, build_import_result_field_meta +from excelalchemy.core.preflight import ImportPreflight from excelalchemy.core.rendering import ExcelRenderer from excelalchemy.core.schema import ExcelSchemaLayout from excelalchemy.core.storage import build_storage_gateway @@ -33,7 +34,7 @@ from excelalchemy.i18n.messages import display_message as dmsg from excelalchemy.i18n.messages import message as msg from excelalchemy.metadata import FieldMetaInfo -from excelalchemy.results import CellErrorMap, ImportResult, RowIssueMap +from excelalchemy.results import CellErrorMap, ImportPreflightResult, ImportResult, RowIssueMap from excelalchemy.util.file import flatten HEADER_HINT_LINE_COUNT = 1 @@ -144,14 +145,26 @@ def download_template_artifact( ) -> ExcelArtifact: return ExcelArtifact.from_data_url(self.download_template(sample_data), filename=filename) - async def import_data(self, input_excel_name: str, output_excel_name: str) -> ImportResult: + async def import_data( + self, + input_excel_name: str, + output_excel_name: str, + *, + on_event: Callable[[dict[str, object]], None] | None = None, + ) -> ImportResult: assert isinstance(self.config, ImporterConfig) if self.excel_mode != ExcelMode.IMPORT: raise ConfigError(msg(MessageKey.IMPORT_MODE_ONLY_METHOD)) session = self._new_import_session() self._last_import_session = session - return await session.run(input_excel_name, output_excel_name) + return await session.run(input_excel_name, output_excel_name, on_event=on_event) + + def preflight_import(self, input_excel_name: str) -> ImportPreflightResult: + assert isinstance(self.config, ImporterConfig) + if self.excel_mode != ExcelMode.IMPORT: + raise ConfigError(msg(MessageKey.IMPORT_MODE_ONLY_METHOD)) + return self._new_import_preflight().run(input_excel_name) def export(self, data: list[ExportRowPayload], keys: Sequence[str] | None = None) -> DataUrlStr: with use_display_locale(self.locale): @@ -372,6 +385,16 @@ def _new_import_session(self) -> ImportSession[ContextT, ImportCreateModelT, Imp context=self._context, ) + def _new_import_preflight(self) -> ImportPreflight[ContextT, ImportCreateModelT, ImportUpdateModelT]: + assert isinstance(self.config, ImporterConfig) + return ImportPreflight( + config=self.config, + layout=self._layout, + storage_gateway=self._storage_gateway, + header_parser=self._header_parser, + header_validator=self._header_validator, + ) + def _require_last_import_session(self) -> ImportSession[ContextT, ImportCreateModelT, ImportUpdateModelT]: if self._last_import_session is None: raise ConfigError(msg(MessageKey.WORKSHEET_TABLE_NOT_LOADED)) diff --git a/src/excelalchemy/core/import_session.py b/src/excelalchemy/core/import_session.py index 6a8f4b4..ad166e0 100644 --- a/src/excelalchemy/core/import_session.py +++ b/src/excelalchemy/core/import_session.py @@ -2,6 +2,7 @@ from __future__ import annotations +from collections.abc import Callable from dataclasses import dataclass, replace from enum import StrEnum from functools import cached_property @@ -9,6 +10,7 @@ from pydantic import BaseModel from excelalchemy._primitives.constants import REASON_COLUMN_KEY, RESULT_COLUMN_KEY +from excelalchemy._primitives.diagnostics import runtime_logger from excelalchemy._primitives.header_models import ExcelHeader from excelalchemy._primitives.identity import DataUrlStr, RowIndex, UniqueLabel, UrlStr from excelalchemy._primitives.payloads import FlatRowPayload, ModelRowPayload @@ -101,6 +103,7 @@ def __init__( self.row_aggregator = RowAggregator(self.layout, self.behavior.import_mode) self.executor = ImportExecutor(self.config, self.issue_tracker, lambda: self.context) self._snapshot = ImportSessionSnapshot() + self._on_event: Callable[[dict[str, object]], None] | None = None @property def cell_error_map(self) -> CellErrorMap: @@ -143,61 +146,82 @@ def extra_header_count_on_import(self) -> int: return 1 return 0 - async def run(self, input_excel_name: str, output_excel_name: str) -> ImportResult: + async def run( + self, + input_excel_name: str, + output_excel_name: str, + *, + on_event: Callable[[dict[str, object]], None] | None = None, + ) -> ImportResult: + self._on_event = on_event with use_display_locale(self.locale): - self._snapshot = replace( - self._snapshot, - phase=ImportSessionPhase.INITIALIZED, - input_excel_name=input_excel_name, - output_excel_name=output_excel_name, - rendered_result_workbook=False, - result=None, - data_row_count=0, - processed_row_count=0, - success_count=0, - fail_count=0, - ) - - validate_header = self._validate_header(input_excel_name) - if not validate_header.is_valid: - header_result = ImportResult.from_validate_header_result(validate_header) + try: self._snapshot = replace( self._snapshot, - phase=ImportSessionPhase.COMPLETED, - has_merged_header=self.input_excel_has_merged_header, - result=header_result.result, + phase=ImportSessionPhase.INITIALIZED, + input_excel_name=input_excel_name, + output_excel_name=output_excel_name, + rendered_result_workbook=False, + result=None, + data_row_count=0, + processed_row_count=0, + success_count=0, + fail_count=0, + ) + self._emit_event({'event': 'started'}) + + validate_header = self._validate_header(input_excel_name) + self._emit_event(self._header_validated_event(validate_header)) + if not validate_header.is_valid: + header_result = ImportResult.from_validate_header_result(validate_header) + self._snapshot = replace( + self._snapshot, + phase=ImportSessionPhase.COMPLETED, + has_merged_header=self.input_excel_has_merged_header, + result=header_result.result, + ) + self._emit_event(self._completed_event(header_result)) + return header_result + + self._prepare_rows_for_execution() + + all_success, success_count, fail_count = await self._execute_rows() + + url = None + if not all_success: + self._add_result_column() + content_with_prefix = self._render_import_result_excel() + url = self._upload_file(output_excel_name, content_with_prefix) + self._snapshot = replace( + self._snapshot, + phase=ImportSessionPhase.RESULT_RENDERED, + rendered_result_workbook=True, + ) + + import_result = ImportResult( + result=(ValidateResult.DATA_INVALID, ValidateResult.SUCCESS)[int(all_success)], + url=url, + success_count=success_count, + fail_count=fail_count, ) - return header_result - - self._prepare_rows_for_execution() - - all_success, success_count, fail_count = await self._execute_rows() - - url = None - if not all_success: - self._add_result_column() - content_with_prefix = self._render_import_result_excel() - url = self._upload_file(output_excel_name, content_with_prefix) self._snapshot = replace( self._snapshot, - phase=ImportSessionPhase.RESULT_RENDERED, - rendered_result_workbook=True, + phase=ImportSessionPhase.COMPLETED, + success_count=success_count, + fail_count=fail_count, + result=import_result.result, ) - - import_result = ImportResult( - result=(ValidateResult.DATA_INVALID, ValidateResult.SUCCESS)[int(all_success)], - url=url, - success_count=success_count, - fail_count=fail_count, - ) - self._snapshot = replace( - self._snapshot, - phase=ImportSessionPhase.COMPLETED, - success_count=success_count, - fail_count=fail_count, - result=import_result.result, - ) - return import_result + self._emit_event(self._completed_event(import_result)) + return import_result + except Exception as error: + self._emit_event( + { + 'event': 'failed', + 'error_type': type(error).__name__, + 'error_message': str(error), + } + ) + raise def _validate_header(self, input_excel_name: str) -> ValidateHeaderResult: self._load_workbook(input_excel_name) @@ -244,6 +268,15 @@ async def _execute_rows(self) -> tuple[bool, int, int]: processed_row_count += 1 all_success = all_success and success success_count, fail_count = (success_count + 1, fail_count) if success else (success_count, fail_count + 1) + self._emit_event( + { + 'event': 'row_processed', + 'processed_row_count': processed_row_count, + 'total_row_count': self._snapshot.data_row_count, + 'success_count': success_count, + 'fail_count': fail_count, + } + ) self._snapshot = replace( self._snapshot, @@ -290,6 +323,43 @@ def _add_result_column(self) -> None: extra_header_count_on_import=self.extra_header_count_on_import, ) + def _emit_event(self, event: dict[str, object]) -> None: + if self._on_event is None: + return + try: + self._on_event(event) + except Exception: + runtime_logger.exception( + 'Import lifecycle event handler raised an exception while processing event %s.', + event.get('event'), + ) + + @staticmethod + def _header_validated_event(validate_header: ValidateHeaderResult) -> dict[str, object]: + event: dict[str, object] = { + 'event': 'header_validated', + 'is_valid': validate_header.is_valid, + } + if not validate_header.is_valid: + event.update( + { + 'missing_required': [str(label) for label in validate_header.missing_required], + 'missing_primary': [str(label) for label in validate_header.missing_primary], + 'unrecognized': [str(label) for label in validate_header.unrecognized], + 'duplicated': [str(label) for label in validate_header.duplicated], + } + ) + return event + + def _completed_event(self, import_result: ImportResult) -> dict[str, object]: + return { + 'event': 'completed', + 'result': import_result.result.value, + 'success_count': import_result.success_count, + 'fail_count': import_result.fail_count, + 'url': import_result.url, + } + @property def df(self) -> WorksheetTable: """Backward-compatible alias for worksheet_table.""" diff --git a/src/excelalchemy/core/preflight.py b/src/excelalchemy/core/preflight.py new file mode 100644 index 0000000..786d850 --- /dev/null +++ b/src/excelalchemy/core/preflight.py @@ -0,0 +1,97 @@ +"""Lightweight workbook preflight for structural import checks.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from pydantic import BaseModel + +from excelalchemy.config import ImporterConfig +from excelalchemy.core.headers import ExcelHeaderParser, ExcelHeaderValidator +from excelalchemy.core.schema import ExcelSchemaLayout +from excelalchemy.core.storage_protocol import ExcelStorage +from excelalchemy.core.table import WorksheetTable +from excelalchemy.exceptions import ConfigError, WorksheetNotFoundError +from excelalchemy.results import ( + ImportPreflightResult, + ImportPreflightStatus, +) + +HEADER_HINT_LINE_COUNT = 1 + + +@dataclass(slots=True) +class ImportPreflight[ + ContextT, + ImportCreateModelT: BaseModel, + ImportUpdateModelT: BaseModel, +]: + """Read-only structural validation for one workbook.""" + + config: ImporterConfig[ContextT, ImportCreateModelT, ImportUpdateModelT] + layout: ExcelSchemaLayout + storage_gateway: ExcelStorage + header_parser: ExcelHeaderParser + header_validator: ExcelHeaderValidator + + def run(self, input_excel_name: str) -> ImportPreflightResult: + sheet_name = self.config.schema_options.sheet_name + + try: + worksheet_table = self.storage_gateway.read_excel_table( + input_excel_name, + skiprows=HEADER_HINT_LINE_COUNT, + sheet_name=sheet_name, + ) + except ConfigError: + raise + except WorksheetNotFoundError: + return ImportPreflightResult( + status=ImportPreflightStatus.SHEET_MISSING, + sheet_name=sheet_name, + sheet_exists=False, + structural_issue_codes=[], + ) + + return self._validate_loaded_table(worksheet_table, sheet_name=sheet_name) + + def _validate_loaded_table(self, worksheet_table: WorksheetTable, *, sheet_name: str) -> ImportPreflightResult: + if len(worksheet_table) == 0: + return ImportPreflightResult( + status=ImportPreflightStatus.STRUCTURE_INVALID, + sheet_name=sheet_name, + sheet_exists=True, + structural_issue_codes=['header_row_missing'], + ) + + header_table = worksheet_table.head(2) + has_merged_header = self.header_parser.has_merged_header(header_table) + if has_merged_header and len(header_table) < 2: + return ImportPreflightResult( + status=ImportPreflightStatus.STRUCTURE_INVALID, + sheet_name=sheet_name, + sheet_exists=True, + has_merged_header=True, + structural_issue_codes=['merged_header_incomplete'], + ) + + try: + headers = self.header_parser.extract(header_table) + except Exception: + return ImportPreflightResult( + status=ImportPreflightStatus.STRUCTURE_INVALID, + sheet_name=sheet_name, + sheet_exists=True, + has_merged_header=has_merged_header, + structural_issue_codes=['header_block_unreadable'], + ) + + validate_header = self.header_validator.validate(headers, self.layout, self.config.behavior.import_mode) + estimated_row_count = max(0, len(worksheet_table) - 1 - int(has_merged_header)) + return ImportPreflightResult.from_validate_header_result( + validate_header, + sheet_name=sheet_name, + sheet_exists=True, + has_merged_header=has_merged_header, + estimated_row_count=estimated_row_count, + ) diff --git a/src/excelalchemy/core/storage_minio.py b/src/excelalchemy/core/storage_minio.py index 2325503..44131b1 100644 --- a/src/excelalchemy/core/storage_minio.py +++ b/src/excelalchemy/core/storage_minio.py @@ -15,7 +15,7 @@ from excelalchemy.config import ExporterConfig, ImporterConfig from excelalchemy.core.storage_protocol import ExcelStorage from excelalchemy.core.table import WorksheetTable -from excelalchemy.exceptions import ConfigError +from excelalchemy.exceptions import ConfigError, WorksheetNotFoundError from excelalchemy.i18n.messages import MessageKey from excelalchemy.i18n.messages import message as msg from excelalchemy.util.file import remove_excel_prefix @@ -52,7 +52,11 @@ def read_excel_table(self, input_excel_name: str, *, skiprows: int, sheet_name: workbook = load_workbook(cast(BinaryIO, file_object), data_only=True) try: if sheet_name not in workbook.sheetnames: - raise ValueError(msg(MessageKey.WORKSHEET_NOT_FOUND, sheet_name=sheet_name)) + raise WorksheetNotFoundError( + msg(MessageKey.WORKSHEET_NOT_FOUND, sheet_name=sheet_name), + message_key=MessageKey.WORKSHEET_NOT_FOUND, + sheet_name=sheet_name, + ) worksheet = workbook[sheet_name] return self._worksheet_to_table(worksheet, skiprows=skiprows) finally: diff --git a/src/excelalchemy/exceptions.py b/src/excelalchemy/exceptions.py index 447a49c..0116626 100644 --- a/src/excelalchemy/exceptions.py +++ b/src/excelalchemy/exceptions.py @@ -160,3 +160,10 @@ class ConfigError(ExcelAlchemyError): def __repr__(self) -> str: return f"{type(self).__name__}(message='{self.message}', detail={self.detail!r})" + + +class WorksheetNotFoundError(ExcelAlchemyError): + """Raised when the configured worksheet does not exist in the workbook.""" + + def __repr__(self) -> str: + return f"{type(self).__name__}(message='{self.message}', detail={self.detail!r})" diff --git a/src/excelalchemy/i18n/messages.py b/src/excelalchemy/i18n/messages.py index 5860734..e53507d 100644 --- a/src/excelalchemy/i18n/messages.py +++ b/src/excelalchemy/i18n/messages.py @@ -109,6 +109,7 @@ class MessageKey(StrEnum): COMMENT_DATE_FORMAT = 'comment_date_format' COMMENT_DATE_RANGE_OPTION = 'comment_date_range_option' COMMENT_HINT = 'comment_hint' + COMMENT_EXAMPLE = 'comment_example' COMMENT_OPTIONS = 'comment_options' COMMENT_FRACTION_DIGITS = 'comment_fraction_digits' COMMENT_UNIT = 'comment_unit' @@ -293,6 +294,7 @@ class MessageKey(StrEnum): MessageKey.COMMENT_DATE_FORMAT: 'Format: date ({value})', MessageKey.COMMENT_DATE_RANGE_OPTION: 'Range: {value}', MessageKey.COMMENT_HINT: 'Hint: {value}', + MessageKey.COMMENT_EXAMPLE: 'Example: {value}', MessageKey.COMMENT_OPTIONS: 'Options: {value}', MessageKey.COMMENT_FRACTION_DIGITS: 'Fraction digits: {value}', MessageKey.COMMENT_UNIT: 'Unit: {value}', @@ -368,6 +370,7 @@ class MessageKey(StrEnum): MessageKey.COMMENT_DATE_FORMAT: '格式:日期({value})', MessageKey.COMMENT_DATE_RANGE_OPTION: '范围:{value}', MessageKey.COMMENT_HINT: '提示:{value}', + MessageKey.COMMENT_EXAMPLE: '示例:{value}', MessageKey.COMMENT_OPTIONS: '选项:{value}', MessageKey.COMMENT_FRACTION_DIGITS: '小数位数:{value}', MessageKey.COMMENT_UNIT: '单位:{value}', diff --git a/src/excelalchemy/metadata.py b/src/excelalchemy/metadata.py index 4cd9c45..597ed74 100644 --- a/src/excelalchemy/metadata.py +++ b/src/excelalchemy/metadata.py @@ -137,6 +137,7 @@ class WorkbookPresentationMeta: options: tuple[Option, ...] | None = None unit: str | None = None hint: str | None = None + example_value: str | None = None @property def comment_date_format(self) -> str: @@ -161,6 +162,12 @@ def comment_hint(self) -> str: return '' return dmsg(MessageKey.COMMENT_HINT, value=self.hint) + @property + def comment_example(self) -> str: + if self.example_value is None or not self.example_value.strip(): + return '' + return dmsg(MessageKey.COMMENT_EXAMPLE, value=self.example_value) + @property def comment_options(self) -> str: if self.options is None: @@ -293,6 +300,7 @@ def __init__( options: list[Option] | None = None, unit: str | None = None, hint: str | None = None, + example_value: str | None = None, ge: float | None = None, le: float | None = None, max_digits: int | None = None, @@ -321,6 +329,7 @@ def __init__( options=_normalize_options(options), unit=unit, hint=hint, + example_value=example_value, ) self.import_constraints = ImportConstraints( ge=ge, @@ -459,6 +468,10 @@ def comment_date_range_option(self) -> str: def comment_hint(self) -> str: return self.presentation_meta.comment_hint + @property + def comment_example(self) -> str: + return self.presentation_meta.comment_example + @property def comment_options(self) -> str: return self.presentation_meta.comment_options @@ -646,6 +659,14 @@ def hint(self) -> str | None: def hint(self, value: str | None) -> None: self.presentation_meta = replace(self.presentation_meta, hint=value) + @property + def example_value(self) -> str | None: + return self.presentation_meta.example_value + + @example_value.setter + def example_value(self, value: str | None) -> None: + self.presentation_meta = replace(self.presentation_meta, example_value=value) + @property def importer_ge(self) -> float | None: return self.import_constraints.ge @@ -800,6 +821,7 @@ def _build_excel_metadata( options: list[Option] | None = None, unit: str | None = None, hint: str | None = None, + example_value: str | None = None, ge: float | None = None, le: float | None = None, max_digits: int | None = None, @@ -825,6 +847,7 @@ def _build_excel_metadata( options=options, unit=unit, hint=hint, + example_value=example_value, ge=ge, le=le, max_digits=max_digits, @@ -853,6 +876,7 @@ def ExcelMeta( options: list[Option] | None = None, unit: str | None = None, hint: str | None = None, + example_value: str | None = None, ge: float | None = None, le: float | None = None, max_digits: int | None = None, @@ -879,6 +903,7 @@ def ExcelMeta( options=options, unit=unit, hint=hint, + example_value=example_value, ge=ge, le=le, max_digits=max_digits, @@ -910,6 +935,7 @@ def FieldMeta( options: list[Option] | None = None, unit: str | None = None, hint: str | None = None, + example_value: str | None = None, default_factory: FieldDefaultFactory | None = None, alias: str | None = None, title: str | None = None, @@ -950,6 +976,7 @@ def FieldMeta( options=options, unit=unit, hint=hint, + example_value=example_value, ge=ge, le=le, max_digits=max_digits, diff --git a/src/excelalchemy/results.py b/src/excelalchemy/results.py index 7218b03..3418b40 100644 --- a/src/excelalchemy/results.py +++ b/src/excelalchemy/results.py @@ -20,6 +20,63 @@ def _empty_labels() -> list[Label]: type RowIssue = ExcelRowError | ExcelCellError +@dataclass(slots=True, frozen=True) +class RemediationHint: + """Optional remediation hint data for frontend-oriented payloads.""" + + suggested_action: str | None = None + fix_hint: str | None = None + + def to_dict(self) -> dict[str, str]: + payload: dict[str, str] = {} + if self.suggested_action is not None: + payload['suggested_action'] = self.suggested_action + if self.fix_hint is not None: + payload['fix_hint'] = self.fix_hint + return payload + + +_REMEDIATION_HINTS_BY_MESSAGE_KEY: dict[MessageKey, RemediationHint] = { + MessageKey.VALID_EMAIL_REQUIRED: RemediationHint( + suggested_action='Enter a complete email address and re-upload the workbook.', + fix_hint='Use a format such as name@example.com.', + ), + MessageKey.INVALID_NUMBER_ENTER_NUMBER: RemediationHint( + suggested_action='Replace the invalid value with a numeric value and re-upload the workbook.', + fix_hint='Use digits only and avoid free-text values in this cell.', + ), + MessageKey.ENTER_NUMBER: RemediationHint( + suggested_action='Enter a numeric value and re-upload the workbook.', + fix_hint='Use digits only and avoid leaving the field as free text.', + ), + MessageKey.ENTER_NUMBER_EXPECTED_FORMAT: RemediationHint( + suggested_action='Correct the cell value to the expected numeric format and re-upload the workbook.', + fix_hint='Match the number format shown in the workbook guidance.', + ), + MessageKey.VALID_URL_REQUIRED: RemediationHint( + suggested_action='Enter a complete URL and re-upload the workbook.', + fix_hint='Use a format such as https://example.com.', + ), + MessageKey.VALID_PHONE_NUMBER_REQUIRED: RemediationHint( + suggested_action='Enter a valid phone number and re-upload the workbook.', + fix_hint='Use the expected phone number format for this field.', + ), + MessageKey.THIS_FIELD_IS_REQUIRED: RemediationHint( + suggested_action='Fill in the required field and re-upload the workbook.', + fix_hint='Required fields cannot be left blank.', + ), +} + +_REMEDIATION_HINTS_BY_CODE: dict[str, RemediationHint] = { + 'ExcelCellError': RemediationHint( + suggested_action='Review the highlighted cells, correct the invalid values, and re-upload the workbook.' + ), + 'ExcelRowError': RemediationHint( + suggested_action='Review the row-level validation message, correct the row, and re-upload the workbook.' + ), +} + + def _row_number_for_humans(row_index: RowIndex) -> int: return int(row_index) + 1 @@ -28,6 +85,53 @@ def _column_number_for_humans(column_index: ColumnIndex) -> int: return int(column_index) + 1 +def _merge_remediation_hints(*hints: RemediationHint) -> RemediationHint: + for hint in hints: + if hint.suggested_action is not None or hint.fix_hint is not None: + return hint + return RemediationHint() + + +def _hint_for_issue(error: RowIssue) -> RemediationHint: + message_hint = _REMEDIATION_HINTS_BY_MESSAGE_KEY.get(error.message_key) if error.message_key is not None else None + code_hint = _REMEDIATION_HINTS_BY_CODE.get(error.code) + return _merge_remediation_hints(message_hint or RemediationHint(), code_hint or RemediationHint()) + + +def _hint_for_issues(errors: Iterable[RowIssue]) -> RemediationHint: + for error in errors: + hint = _hint_for_issue(error) + if hint.suggested_action is not None or hint.fix_hint is not None: + return hint + return RemediationHint() + + +def _top_level_remediation_hint(result: 'ImportResult') -> RemediationHint: + if result.is_success: + return RemediationHint() + if result.is_header_invalid: + return RemediationHint( + suggested_action='Correct the workbook headers to match the template and retry the import.', + fix_hint='Use a fresh template or align missing, duplicated, and unrecognized headers before retrying.', + ) + if result.is_data_invalid: + fix_hint = ( + 'Download the result workbook and review the highlighted rows before re-uploading.' + if result.url is not None + else 'Review the invalid rows and field messages before re-uploading.' + ) + return RemediationHint( + suggested_action='Correct the invalid rows and re-upload the workbook.', + fix_hint=fix_hint, + ) + return RemediationHint() + + +def _with_remediation_fields(payload: dict[str, object], hint: RemediationHint) -> dict[str, object]: + payload.update(hint.to_dict()) + return payload + + @dataclass(slots=True, frozen=True) class FieldIssueSummary: """Field-level issue summary suitable for frontends and dashboards.""" @@ -494,6 +598,119 @@ def is_required_missing(self) -> bool: return bool(self.missing_required) +class ImportPreflightStatus(StrEnum): + """High-level preflight result type.""" + + VALID = 'VALID' + HEADER_INVALID = 'HEADER_INVALID' + SHEET_MISSING = 'SHEET_MISSING' + STRUCTURE_INVALID = 'STRUCTURE_INVALID' + + +class ImportPreflightResult(BaseModel): + """Structured result returned from lightweight import preflight.""" + + status: ImportPreflightStatus = Field(description='Overall preflight result.') + sheet_name: str = Field(description='Configured worksheet name used for preflight.') + sheet_exists: bool = Field(description='Whether the configured worksheet was found.') + has_merged_header: bool | None = Field( + default=None, + description='Whether the workbook uses a merged two-row header when the header block was readable.', + ) + estimated_row_count: int = Field(default=0, description='Estimated number of data rows for a later import run.') + structural_issue_codes: list[str] = Field( + default_factory=list, + description='Stable structural issue codes emitted for non-header preflight failures.', + ) + + is_required_missing: bool = Field(default=False, description='Whether required headers are missing.') + missing_required: list[Label] = Field( + default_factory=_empty_labels, description='Required headers missing from the workbook.' + ) + missing_primary: list[Label] = Field( + default_factory=_empty_labels, description='Primary-key headers missing from the workbook.' + ) + unrecognized: list[Label] = Field( + default_factory=_empty_labels, description='Headers present in the workbook but unknown to the schema.' + ) + duplicated: list[Label] = Field( + default_factory=_empty_labels, description='Headers that appear more than once in the workbook.' + ) + + @property + def is_valid(self) -> bool: + return self.status == ImportPreflightStatus.VALID + + @property + def is_header_invalid(self) -> bool: + return self.status == ImportPreflightStatus.HEADER_INVALID + + @property + def is_sheet_missing(self) -> bool: + return self.status == ImportPreflightStatus.SHEET_MISSING + + @property + def is_structure_invalid(self) -> bool: + return self.status == ImportPreflightStatus.STRUCTURE_INVALID + + def to_api_payload(self) -> dict[str, object]: + return { + 'status': self.status.value, + 'is_valid': self.is_valid, + 'is_header_invalid': self.is_header_invalid, + 'is_sheet_missing': self.is_sheet_missing, + 'is_structure_invalid': self.is_structure_invalid, + 'sheet': { + 'name': self.sheet_name, + 'exists': self.sheet_exists, + 'has_merged_header': self.has_merged_header, + }, + 'summary': { + 'estimated_row_count': self.estimated_row_count, + 'structural_issue_codes': list(self.structural_issue_codes), + }, + 'header_issues': { + 'is_required_missing': self.is_required_missing, + 'missing_required': [str(label) for label in self.missing_required], + 'missing_primary': [str(label) for label in self.missing_primary], + 'unrecognized': [str(label) for label in self.unrecognized], + 'duplicated': [str(label) for label in self.duplicated], + }, + } + + @classmethod + def from_validate_header_result( + cls, + result: ValidateHeaderResult, + *, + sheet_name: str, + sheet_exists: bool = True, + has_merged_header: bool | None = None, + estimated_row_count: int = 0, + ) -> 'ImportPreflightResult': + """Build a preflight result from a header-validation result.""" + if result.is_valid: + return cls( + status=ImportPreflightStatus.VALID, + sheet_name=sheet_name, + sheet_exists=sheet_exists, + has_merged_header=has_merged_header, + estimated_row_count=estimated_row_count, + ) + return cls( + status=ImportPreflightStatus.HEADER_INVALID, + sheet_name=sheet_name, + sheet_exists=sheet_exists, + has_merged_header=has_merged_header, + estimated_row_count=estimated_row_count, + is_required_missing=result.is_required_missing, + missing_required=result.missing_required, + missing_primary=result.missing_primary, + unrecognized=result.unrecognized, + duplicated=result.duplicated, + ) + + class ValidateResult(StrEnum): """High-level import result type.""" @@ -577,3 +794,87 @@ def from_validate_header_result(cls, result: ValidateHeaderResult) -> 'ImportRes duplicated=result.duplicated, missing_required=result.missing_required, ) + + +def build_frontend_remediation_payload( + *, + result: ImportResult, + cell_error_map: CellErrorMap, + row_error_map: RowIssueMap, +) -> dict[str, object]: + """Build a compact, remediation-oriented payload for frontend workflows.""" + + row_records = row_error_map.records() + cell_records = cell_error_map.records() + top_level_hint = _top_level_remediation_hint(result) + + by_field: list[dict[str, object]] = [] + for summary in cell_error_map.summary_by_field(): + summary_payload = summary.to_dict() + matching_errors = tuple( + record.error for record in cell_records if str(record.error.unique_label) == summary.unique_label + ) + by_field.append(_with_remediation_fields(summary_payload, _hint_for_issues(matching_errors))) + + by_code: list[dict[str, object]] = [] + for summary in row_error_map.summary_by_code(): + summary_payload = summary.to_dict() + matching_errors = tuple(record.error for record in row_records if record.error.code == summary.code) + by_code.append(_with_remediation_fields(summary_payload, _hint_for_issues(matching_errors))) + + items: list[dict[str, object]] = [] + for record in cell_records: + item_payload: dict[str, object] = { + 'scope': 'cell', + 'code': record.error.code, + 'message': record.error.message, + 'display_message': record.error.display_message, + 'row_index': int(record.row_index), + 'row_number_for_humans': _row_number_for_humans(record.row_index), + 'column_index': int(record.column_index), + 'column_number_for_humans': _column_number_for_humans(record.column_index), + 'field_label': str(record.error.label), + 'parent_label': None if record.error.parent_label is None else str(record.error.parent_label), + 'unique_label': str(record.error.unique_label), + } + if record.error.message_key is not None: + item_payload['message_key'] = record.error.message_key.value + items.append(_with_remediation_fields(item_payload, _hint_for_issue(record.error))) + + for record in row_records: + if isinstance(record.error, ExcelCellError): + continue + item_payload: dict[str, object] = { + 'scope': 'row', + 'code': record.error.code, + 'message': record.error.message, + 'display_message': record.error.display_message, + 'row_index': int(record.row_index), + 'row_number_for_humans': _row_number_for_humans(record.row_index), + } + if record.error.message_key is not None: + item_payload['message_key'] = record.error.message_key.value + items.append(_with_remediation_fields(item_payload, _hint_for_issue(record.error))) + + remediation_summary: dict[str, object] = { + 'needs_remediation': not result.is_success, + 'affected_row_count': len(row_error_map.summary_by_row()), + 'affected_field_count': len(cell_error_map.summary_by_field()), + 'affected_code_count': len(row_error_map.summary_by_code()), + 'header_issue_count': ( + len(result.missing_required) + + len(result.missing_primary) + + len(result.unrecognized) + + len(result.duplicated) + ), + 'result_workbook_available': result.url is not None, + } + _with_remediation_fields(remediation_summary, top_level_hint) + + return { + 'result': result.to_api_payload(), + 'remediation': remediation_summary, + 'by_field': by_field, + 'by_code': by_code, + 'items': items, + } diff --git a/tech_debt/README.md b/tech_debt/README.md new file mode 100644 index 0000000..74d7a2e --- /dev/null +++ b/tech_debt/README.md @@ -0,0 +1,72 @@ +# Technical Debt + +This directory is for explicit technical debt records in this repository. +Use it to capture implementation compromises that should be visible and actionable, not just known informally. + +## Related docs + +- [../AGENTS.md](../AGENTS.md) for repository-local change guidance. +- [../docs/repo-map.md](../docs/repo-map.md) for locating the affected files. +- [../plans/README.md](../plans/README.md) for execution plans tied to debt repayment work. +- [../adr/README.md](../adr/README.md) for the architecture decisions that may explain or constrain a debt item. + +## What qualifies as technical debt here + +- Compatibility code that is necessary in 2.x but adds maintenance cost. +- Temporary duplication or awkward layering in: + - `src/excelalchemy/` + - `docs/` + - `examples/` + - `tests/` +- Implementation patterns that are correct today but harder to maintain than the desired design. +- Gaps between the recommended public API and the current implementation reality. +- Missing automation or smoke coverage that creates avoidable risk. + +Do not use this directory for: + +- vague wishes +- feature requests with no implementation debt +- bugs that should be fixed immediately and do not represent a broader maintenance burden + +## Required fields for each debt entry + +Each entry should include: + +- Summary + - One short description of the debt. +- Impact + - What cost it creates in this repository. + - Examples: harder maintenance, compatibility drag, confusing API shape, duplicated docs/tests, fragile smoke behavior. +- Current workaround + - How the repository currently lives with the debt. +- Desired fix + - What the target state should be. +- Priority + - Use a simple priority label such as: + - `low` + - `medium` + - `high` +- Relevant paths + - Point to the code, tests, docs, or examples involved. + +## Practical guidance + +- Be concrete and repository-local. +- Prefer debt entries that point to specific files and seams such as: + - `src/excelalchemy/types/` + - `src/excelalchemy/core/storage_minio.py` + - `docs/public-api.md` + - `tests/unit/test_deprecation_policy.py` +- If the debt is tightly coupled to a planned piece of work, link the relevant plan under `plans/`. +- If the debt exists because of a deliberate architecture choice, link the relevant ADR under `adr/`. + +## Repository alignment + +Common debt categories in this repository are likely to involve: + +- 2.x compatibility shims and deprecation paths +- duplicated public vs compatibility naming +- result payload evolution and smoke snapshots +- example and docs synchronization cost +- storage abstraction vs legacy Minio behavior +- metadata layering and Pydantic boundary complexity diff --git a/tech_debt/compatibility-surface-sprawl-in-2x.md b/tech_debt/compatibility-surface-sprawl-in-2x.md new file mode 100644 index 0000000..4d571ba --- /dev/null +++ b/tech_debt/compatibility-surface-sprawl-in-2x.md @@ -0,0 +1,75 @@ +# Compatibility Surface Sprawl In 2.x + +## Summary + +The repository maintains the same conceptual surface through both preferred public APIs and multiple 2.x compatibility paths. + +## Impact + +- Increases maintenance cost across source, docs, and tests. +- Makes navigation noisier because “available” and “recommended” are not the same. +- Requires continued testing and documentation for deprecated import paths and old facade aliases. + +## Current workaround + +- The repository documents the preferred public modules and names in: + - `docs/public-api.md` + - `MIGRATIONS.md` +- Deprecated modules and aliases remain available but emit warnings where applicable. +- Tests explicitly protect the compatibility layer and deprecation guidance. + +## Desired fix + +- Continue narrowing usage toward the preferred public surface: + - `excelalchemy` + - `excelalchemy.config` + - `excelalchemy.metadata` + - `excelalchemy.results` + - `excelalchemy.exceptions` + - `excelalchemy.codecs` +- Reduce the amount of duplicated compatibility surface that must remain active in the runtime and documentation. +- Keep the deprecation path explicit and easy to remove when the 3.x line allows it. + +## Priority + +- `medium` + +## Evidence + +- `docs/public-api.md` + - distinguishes stable public modules from compatibility modules and internal modules +- `MIGRATIONS.md` + - states that `excelalchemy.types.*` remains available in 2.x and is scheduled for removal in 3.0 + - documents old vs preferred import-inspection names +- `src/excelalchemy/core/alchemy.py` + - still exposes compatibility aliases: + - `df` + - `header_df` + - `cell_errors` + - `row_errors` +- `src/excelalchemy/types/` + - compatibility namespace retained for migrations +- `src/excelalchemy/exc.py` +- `src/excelalchemy/identity.py` +- `src/excelalchemy/header_models.py` +- `src/excelalchemy/util/convertor.py` + - compatibility shims still exist in the package +- `tests/unit/test_deprecation_policy.py` + - verifies that compatibility imports still work and emit replacement guidance + +## Uncertainty + +- The repository clearly shows that compatibility surface exists and carries cost. +- The exact removal schedule beyond the documented `ExcelAlchemy 3.0` direction is not established here, so this record should not assume a more specific timeline. + +## Relevant paths + +- `docs/public-api.md` +- `MIGRATIONS.md` +- `src/excelalchemy/core/alchemy.py` +- `src/excelalchemy/types/` +- `src/excelalchemy/exc.py` +- `src/excelalchemy/identity.py` +- `src/excelalchemy/header_models.py` +- `src/excelalchemy/util/convertor.py` +- `tests/unit/test_deprecation_policy.py` diff --git a/tech_debt/example-docs-and-generated-asset-synchronization-overhead.md b/tech_debt/example-docs-and-generated-asset-synchronization-overhead.md new file mode 100644 index 0000000..874dd7c --- /dev/null +++ b/tech_debt/example-docs-and-generated-asset-synchronization-overhead.md @@ -0,0 +1,72 @@ +# Example, Docs, And Generated-Asset Synchronization Overhead + +## Summary + +Repository examples are treated as part of the public contract, and their behavior is synchronized across runnable examples, captured outputs, showcase docs, smoke scripts, and README surfaces. + +## Impact + +- Raises the cost of changing example behavior, even for small user-facing wording changes. +- Increases the number of places that must remain aligned: + - example scripts + - captured output assets + - docs pages + - smoke scripts + - README surfaces +- Makes release verification stronger, but also more coupled. + +## Current workaround + +- Examples are smoke-tested directly. +- Captured outputs live under `files/example-outputs/`. +- Generation and validation scripts enforce consistency: + - `scripts/generate_example_output_assets.py` + - `scripts/smoke_examples.py` + - `scripts/smoke_docs_assets.py` +- Example-driven docs link to the generated assets and runnable examples. + +## Desired fix + +- Keep the examples contract strong while reducing duplicated synchronization points where practical. +- Prefer a smaller number of authoritative example-output sources and clearer update paths when behavior changes. +- Make it easier to change examples intentionally without manually touching many separate surfaces. + +## Priority + +- `medium` + +## Evidence + +- `tests/integration/test_examples_smoke.py` + - runs the main example entry points directly +- `scripts/generate_example_output_assets.py` + - generates multiple captured output files plus `import-failure-api-payload.json` +- `scripts/smoke_docs_assets.py` + - asserts the presence of specific docs fragments and generated assets +- `docs/examples-showcase.md` + - embeds fixed outputs and links to generated assets +- `examples/README.md` + - describes examples as smoke-tested and points readers to `files/example-outputs/` +- `README.md` +- `README-pypi.md` + - both surface example-driven onboarding and fixed outputs +- `files/example-outputs/` + - stores generated example output artifacts that docs and smoke scripts depend on + +## Uncertainty + +- The repository clearly shows synchronization cost. +- The best long-term reduction strategy is not explicit here: the codebase does not say whether the intended fix is fewer generated assets, stronger generation automation, fewer duplicated docs surfaces, or some combination. + +## Relevant paths + +- `examples/README.md` +- `examples/` +- `docs/examples-showcase.md` +- `README.md` +- `README-pypi.md` +- `files/example-outputs/` +- `tests/integration/test_examples_smoke.py` +- `scripts/generate_example_output_assets.py` +- `scripts/smoke_examples.py` +- `scripts/smoke_docs_assets.py` diff --git a/tech_debt/v2-4-platform-doc-naming-drift.md b/tech_debt/v2-4-platform-doc-naming-drift.md new file mode 100644 index 0000000..6897a34 --- /dev/null +++ b/tech_debt/v2-4-platform-doc-naming-drift.md @@ -0,0 +1,39 @@ +# Platform Doc Naming Drift In v2.4 Planning Records + +## Summary + +The v2.4 planning documents still refer to `docs/import-platform.md`, while the +implemented documentation slice uses `docs/platform-architecture.md`. + +## Impact + +- creates avoidable confusion when moving between planning records and the + current docs +- increases review friction for follow-up documentation work +- makes future cross-linking and checklist updates slightly harder + +## Current workaround + +- the repository-facing docs now consistently link to + `docs/platform-architecture.md` +- planning records still contain the older filename + +## Desired fix + +- align the v2.4 plan and design-note references with the implemented doc names: + - `docs/platform-architecture.md` + - `docs/runtime-model.md` + - `docs/integration-blueprints.md` +- keep one canonical naming scheme for the platform-layer docs + +## Priority + +`low` + +## Relevant paths + +- `plans/v2-4-import-platform-layer-design.md` +- `plans/v2-4-import-platform-layer-design-note.md` +- `docs/platform-architecture.md` +- `docs/runtime-model.md` +- `docs/integration-blueprints.md` diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..0280551 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,315 @@ +# Test Suite Guide + +This directory holds the repository’s main automated test suite. +It is organized around stable public behavior first, then workflow integration, then focused unit coverage. + +## Related docs + +- [../README.md](../README.md) for the public-facing workflow overview. +- [../docs/repo-map.md](../docs/repo-map.md) for where the tested code lives. +- [../docs/invariants.md](../docs/invariants.md) for the behaviors this suite is meant to lock down. +- [../src/excelalchemy/README.md](../src/excelalchemy/README.md) for the implementation structure behind the tests. +- [../examples/README.md](../examples/README.md) for the example workflows that integration and smoke coverage protect. + +## How the test suite is organized + +### `tests/contracts/` + +- Purpose: + - protect the stable behavior that callers and downstream integrations rely on + - protect key contracts between the public facade and core collaborators +- Main files: + - `tests/contracts/test_template_contract.py` + - template payload shape, workbook comments, merged headers, required-field styling, locale-sensitive template output + - `tests/contracts/test_export_contract.py` + - export payload shape, selected key behavior, merged-header export behavior, workbook-facing values + - `tests/contracts/test_import_contract.py` + - import result status, header-invalid vs data-invalid behavior, result workbook upload behavior, workbook failure rendering, locale-sensitive result output + - `tests/contracts/test_storage_contract.py` + - storage gateway selection, missing-storage behavior, custom storage behavior, upload payload expectations, `WorksheetTable` reader behavior + - `tests/contracts/test_result_contract.py` + - `ImportResult`, `ValidateHeaderResult`, status helpers, API payload shape + - `tests/contracts/test_pydantic_contract.py` + - metadata extraction, Pydantic validation mapping, custom codec extension surface, `Annotated[..., ExcelMeta(...)]` declarations + - `tests/contracts/test_core_components_contract.py` + - schema layout, header parsing/validation, row aggregation, issue tracking column offsets + +### `tests/integration/` + +- Purpose: + - exercise broader end-to-end flows that cross multiple modules + - verify runnable examples and real workflow combinations +- Main files: + - `tests/integration/test_excelalchemy_workflows.py` + - end-to-end workflow coverage across import/export paths and many built-in field types + - `tests/integration/test_examples_smoke.py` + - smoke-style tests for repository examples and the FastAPI reference app + +### `tests/unit/` + +- Purpose: + - protect focused logic in individual modules or small subsystems + - keep regressions close to the code that changed +- Main files: + - `tests/unit/test_config_options.py` + - config normalization, helper constructors, storage option behavior, legacy Minio warnings + - `tests/unit/test_converters_and_schema_extraction.py` + - converter utilities and schema extraction details + - `tests/unit/test_deprecation_policy.py` + - compatibility import warnings and replacement guidance + - `tests/unit/test_diagnostics_logging.py` + - named logger behavior and developer-facing diagnostics + - `tests/unit/test_excel_exceptions.py` + - exception model and payload behavior + - `tests/unit/test_field_metadata.py` + - metadata layering, comments, inherited behavior, constraint overlay details + - `tests/unit/test_file_utils.py` + - workbook/data-url utility helpers + - `tests/unit/test_i18n_messages.py` + - message lookup and locale-sensitive text behavior + - `tests/unit/codecs/*.py` + - per-codec behavior for parse/normalize/render/comment logic + +### `tests/support/` + +- Purpose: + - shared fixtures and helpers used across contract, integration, and unit tests +- Important files: + - `tests/support/base.py` + - shared test base class + - `tests/support/contract_models.py` + - reusable Pydantic models and callbacks for tests + - `tests/support/mock_minio.py` + - local mock Minio behavior + - `tests/support/storage.py` + - in-memory storage test double + - `tests/support/registry.py` + - named workbook fixture registry + - `tests/support/workbook.py` + - workbook decoding, fill/color inspection, merge-range helpers, worksheet matrix helpers + +### `tests/files/` + +- Purpose: + - workbook fixtures used by tests +- Current examples: + - `tests/files/test_import_with_merge_header.xlsx` + - `tests/files/test_date_range_input.xlsx` + - `tests/files/test_date_range_missing_input_before.xlsx` + - `tests/files/test_date_range_missing_input_after.xlsx` + +## What kinds of behavior are protected by tests + +- Public API shape and recommended usage: + - facade methods + - config helper constructors + - result object payloads + - public exception behavior +- Schema and metadata behavior: + - `FieldMeta(...)` + - `ExcelMeta(...)` + - flattened layout ordering + - composite field expansion + - constraint overlay +- Import behavior: + - header validation + - row validation + - create/update/create-or-update execution + - result-workbook upload and rendering +- Export and template behavior: + - data URL vs artifact behavior + - workbook comments + - merged headers + - selected output keys + - workbook-facing value formatting +- Storage behavior: + - `ExcelStorage` contract behavior + - storage gateway selection + - `WorksheetTable` reader shape + - missing-storage failure mode + - Minio compatibility behavior + - custom storage behavior +- Localization behavior: + - template locale + - result-workbook locale + - message lookup and fallback behavior +- Compatibility behavior: + - deprecated import paths + - legacy naming aliases + - deprecation warnings +- Developer-facing diagnostics: + - named logger usage + - warning/info message consistency + +## Test types in this repository + +### Contract tests + +- Live under `tests/contracts/`. +- These are the highest-signal tests for public behavior. +- If a change would affect library consumers, examples, payload shape, or workbook output semantics, start here. + +### Integration tests + +- Live under `tests/integration/`. +- These cover multi-module flows and runnable repository examples. +- Use these when a change crosses facade, schema, codecs, storage, rendering, or example code. + +### Unit tests + +- Live under `tests/unit/`. +- These cover focused logic in one module or subsystem. +- Use these for metadata details, config normalization, exception behavior, i18n logic, utility helpers, and individual codecs. + +### Regression tests + +- There is no separate `tests/regression/` directory. +- Regression coverage is added next to the affected area: + - contract regressions go in `tests/contracts/` + - workflow regressions go in `tests/integration/` + - focused logic regressions go in `tests/unit/` + +### Smoke-style tests + +- Smoke-style tests inside `tests/` live mainly in: + - `tests/integration/test_examples_smoke.py` +- Additional smoke coverage exists outside `tests/` in: + - `scripts/smoke_examples.py` + - `scripts/smoke_package.py` + - `scripts/smoke_docs_assets.py` + - `scripts/smoke_api_payload_snapshot.py` + +## Where to add tests for specific changes + +### Public API changes + +- Add or update tests in: + - `tests/contracts/test_result_contract.py` + - `tests/contracts/test_storage_contract.py` + - `tests/contracts/test_template_contract.py` + - `tests/contracts/test_export_contract.py` + - `tests/contracts/test_import_contract.py` + - `tests/unit/test_config_options.py` + - `tests/unit/test_deprecation_policy.py` +- Use this area for: + - facade method behavior + - result payload shape + - config constructor behavior + - public naming changes + - compatibility-path changes + +### Schema and layout behavior + +- Add or update tests in: + - `tests/contracts/test_core_components_contract.py` + - `tests/contracts/test_pydantic_contract.py` + - `tests/unit/test_converters_and_schema_extraction.py` + - `tests/unit/test_field_metadata.py` +- Use this area for: + - field metadata extraction + - layout ordering + - merged-header shape + - composite field expansion + - schema converter behavior + +### Import validation behavior + +- Add or update tests in: + - `tests/contracts/test_import_contract.py` + - `tests/contracts/test_pydantic_contract.py` + - `tests/contracts/test_core_components_contract.py` + - `tests/integration/test_excelalchemy_workflows.py` +- Use this area for: + - header validation + - row validation + - Pydantic error mapping + - create/update/upsert flow behavior + - import session lifecycle behavior + +### Workbook rendering behavior + +- Add or update tests in: + - `tests/contracts/test_template_contract.py` + - `tests/contracts/test_export_contract.py` + - `tests/contracts/test_import_contract.py` + - `tests/support/workbook.py` helpers if you need new workbook assertions +- Use this area for: + - comments + - fills/colors + - result/reason columns + - merged-cell output + - workbook-facing display values + +### Localization behavior + +- Add or update tests in: + - `tests/contracts/test_template_contract.py` + - `tests/contracts/test_import_contract.py` + - `tests/unit/test_i18n_messages.py` +- Use this area for: + - workbook display locale + - runtime message lookup + - header comments and workbook instruction text + - result-workbook labels and row status text + +### Storage behavior + +- Add or update tests in: + - `tests/contracts/test_storage_contract.py` + - `tests/unit/test_config_options.py` + - `tests/integration/test_excelalchemy_workflows.py` if the change affects broader workflows + - `tests/support/storage.py` if a new storage test double is needed +- Use this area for: + - storage gateway selection + - reader return shape (`WorksheetTable`) + - `ExcelStorage` contract behavior + - upload payload expectations + - missing-storage errors + - legacy Minio compatibility + +## Useful shared helpers + +- Use `tests/support/contract_models.py` for reusable importer/exporter models and callbacks. +- Use `tests/support/storage.py` for in-memory storage-backed tests. +- Use `tests/support/workbook.py` for workbook assertions instead of reimplementing workbook parsing in each test. +- Use `tests/support/registry.py` and `tests/files/` for named workbook fixtures. + +## Running tests + +- Full test suite: + - `uv run pytest --cov=excelalchemy --cov-report=term-missing:skip-covered tests` +- Common smoke checks related to tests: + - `uv run python scripts/smoke_package.py` + - `uv run python scripts/smoke_examples.py` + - `uv run python scripts/smoke_docs_assets.py` + - `uv run python scripts/smoke_api_payload_snapshot.py` + +## What an agent should check before opening a PR + +- The changed behavior is covered at the right level: + - `tests/contracts/` for public behavior + - `tests/integration/` for end-to-end/example behavior + - `tests/unit/` for focused logic +- Existing tests for the touched area still pass. +- New behavior has a regression test near the affected subsystem. +- If examples changed, check: + - `tests/integration/test_examples_smoke.py` + - `scripts/smoke_examples.py` +- If result payloads changed, check: + - `tests/contracts/test_result_contract.py` + - `docs/result-objects.md` + - `docs/api-response-cookbook.md` + - `scripts/smoke_api_payload_snapshot.py` +- If locale-visible workbook text changed, check: + - `tests/contracts/test_template_contract.py` + - `tests/contracts/test_import_contract.py` + - `tests/unit/test_i18n_messages.py` + - `docs/locale.md` +- If storage behavior changed, check: + - `tests/contracts/test_storage_contract.py` + - `tests/unit/test_config_options.py` + - `examples/custom_storage.py` +- If compatibility behavior changed, check: + - `tests/unit/test_deprecation_policy.py` + - `MIGRATIONS.md` + - `docs/public-api.md` diff --git a/tests/contracts/test_import_contract.py b/tests/contracts/test_import_contract.py index d3dd642..905b542 100644 --- a/tests/contracts/test_import_contract.py +++ b/tests/contracts/test_import_contract.py @@ -2,16 +2,433 @@ from typing import cast from minio import Minio -from openpyxl import load_workbook - -from excelalchemy import ExcelAlchemy, ImporterConfig, ValidateResult +from openpyxl import Workbook, load_workbook +from pydantic import BaseModel + +from excelalchemy import ( + ExcelAlchemy, + FieldMeta, + ImporterConfig, + ImportPreflightStatus, + String, + ValidateResult, + WorksheetNotFoundError, +) from excelalchemy.const import BACKGROUND_ERROR_COLOR, REASON_COLUMN_LABEL, RESULT_COLUMN_LABEL from excelalchemy.core.import_session import ImportSessionPhase -from tests.support import BaseTestCase, FileRegistry, get_fill_color, load_binary_excel_to_workbook +from excelalchemy.i18n.messages import MessageKey +from excelalchemy.i18n.messages import message as msg +from tests.support import ( + BaseTestCase, + FileRegistry, + InMemoryExcelStorage, + get_fill_color, + load_binary_excel_to_workbook, +) from tests.support.contract_models import MergedContractImporter, SimpleContractImporter, creator, failing_creator class TestImportContracts(BaseTestCase): + @staticmethod + def _build_workbook_bytes(*, sheet_name: str = 'Sheet1', rows: list[list[str | None]]) -> bytes: + workbook = Workbook() + worksheet = workbook.active + assert worksheet is not None + worksheet.title = sheet_name + + for row_index, row in enumerate(rows, start=1): + for column_index, value in enumerate(row, start=1): + worksheet.cell(row=row_index, column=column_index, value=value) + + buffer = io.BytesIO() + workbook.save(buffer) + workbook.close() + return buffer.getvalue() + + async def test_preflight_import_returns_valid_result_for_valid_workbook(self): + alchemy = ExcelAlchemy(ImporterConfig(SimpleContractImporter, creator=creator, minio=cast(Minio, self.minio))) + + result = alchemy.preflight_import(FileRegistry.TEST_SIMPLE_IMPORT) + + assert result.status == ImportPreflightStatus.VALID + assert result.sheet_name == 'Sheet1' + assert result.sheet_exists is True + assert result.has_merged_header is False + assert result.estimated_row_count == 1 + assert result.structural_issue_codes == [] + + async def test_preflight_import_returns_header_invalid_for_invalid_header(self): + alchemy = ExcelAlchemy(ImporterConfig(SimpleContractImporter, creator=creator, minio=cast(Minio, self.minio))) + + result = alchemy.preflight_import(FileRegistry.TEST_HEADER_INVALID_INPUT) + + assert result.status == ImportPreflightStatus.HEADER_INVALID + assert result.sheet_exists is True + assert result.estimated_row_count == 1 + assert set(result.unrecognized) == {'不存在的表头'} + assert '年龄' in set(result.missing_required) + assert result.missing_primary == [] + + async def test_preflight_import_reports_missing_primary_fields_in_update_mode(self): + class UpdatePrimaryKeyImporter(BaseModel): + employee_id: String = FieldMeta(label='员工编号', order=1, is_primary_key=True) + name: String = FieldMeta(label='姓名', order=2) + + workbook_bytes = self._build_workbook_bytes( + rows=[ + ['ignored hint'], + ['姓名'], + ['张三'], + ] + ) + storage = InMemoryExcelStorage(fixtures={'preflight-update-missing-primary.xlsx': workbook_bytes}) + alchemy = ExcelAlchemy(ImporterConfig.for_update(UpdatePrimaryKeyImporter, storage=storage)) + + result = alchemy.preflight_import('preflight-update-missing-primary.xlsx') + + assert result.status == ImportPreflightStatus.HEADER_INVALID + assert result.sheet_exists is True + assert result.estimated_row_count == 1 + assert result.missing_primary == ['员工编号'] + assert result.missing_required == [] + assert result.unrecognized == [] + assert result.duplicated == [] + + async def test_preflight_import_reports_extra_fields_without_masking_present_required_fields(self): + workbook_bytes = self._build_workbook_bytes( + rows=[ + ['ignored hint'], + [ + '年龄', + '姓名', + '地址', + '是否启用', + '出生日期', + '邮箱', + '价格', + '爱好', + '公司', + '经理', + '部门', + '电话', + '单选', + '老板', + '领导', + '团队', + '网址', + '额外列', + ], + [ + '18', + '张三', + '北京市', + '是', + '2021-01-01', + 'noreply@example.com', + '100', + '篮球', + '阿里巴巴', + '李四', + '研发部', + '13800138000', + '选项1', + '马云', + '张三', + '研发部', + 'https://www.baidu.com', + 'unexpected', + ], + ] + ) + storage = InMemoryExcelStorage(fixtures={'preflight-extra-field.xlsx': workbook_bytes}) + alchemy = ExcelAlchemy(ImporterConfig.for_create(SimpleContractImporter, creator=creator, storage=storage)) + + result = alchemy.preflight_import('preflight-extra-field.xlsx') + + assert result.status == ImportPreflightStatus.HEADER_INVALID + assert result.sheet_exists is True + assert result.estimated_row_count == 1 + assert result.missing_required == [] + assert result.missing_primary == [] + assert result.unrecognized == ['额外列'] + assert result.duplicated == [] + + async def test_preflight_import_returns_sheet_missing_when_target_sheet_is_absent(self): + workbook = Workbook() + worksheet = workbook.active + assert worksheet is not None + worksheet.title = 'OtherSheet' + worksheet['A1'] = 'ignored hint' + worksheet['A2'] = '年龄' + worksheet['B2'] = '姓名' + + buffer = io.BytesIO() + workbook.save(buffer) + workbook.close() + + input_name = 'contract-preflight-missing-sheet.xlsx' + buffer.seek(0) + self.minio.put_object(self.minio.bucket_name, input_name, buffer, len(buffer.getvalue())) + + alchemy = ExcelAlchemy(ImporterConfig(SimpleContractImporter, creator=creator, minio=cast(Minio, self.minio))) + + result = alchemy.preflight_import(input_name) + + assert result.status == ImportPreflightStatus.SHEET_MISSING + assert result.sheet_name == 'Sheet1' + assert result.sheet_exists is False + assert result.has_merged_header is None + assert result.estimated_row_count == 0 + assert result.structural_issue_codes == [] + + async def test_preflight_import_returns_sheet_missing_for_explicit_storage_sheet_not_found_error(self): + class MissingSheetStorage(InMemoryExcelStorage): + def read_excel_table(self, input_excel_name: str, *, skiprows: int, sheet_name: str): + raise WorksheetNotFoundError( + msg(MessageKey.WORKSHEET_NOT_FOUND, sheet_name=sheet_name), + message_key=MessageKey.WORKSHEET_NOT_FOUND, + sheet_name=sheet_name, + ) + + storage = MissingSheetStorage(fixtures={'ignored.xlsx': b'ignored'}) + alchemy = ExcelAlchemy(ImporterConfig.for_create(SimpleContractImporter, creator=creator, storage=storage)) + + result = alchemy.preflight_import('ignored.xlsx') + + assert result.status == ImportPreflightStatus.SHEET_MISSING + assert result.sheet_name == 'Sheet1' + assert result.sheet_exists is False + assert result.structural_issue_codes == [] + + async def test_preflight_import_returns_structure_invalid_for_missing_header_row(self): + workbook_bytes = self._build_workbook_bytes(rows=[['ignored hint']]) + storage = InMemoryExcelStorage(fixtures={'header-missing.xlsx': workbook_bytes}) + alchemy = ExcelAlchemy(ImporterConfig.for_create(SimpleContractImporter, creator=creator, storage=storage)) + + result = alchemy.preflight_import('header-missing.xlsx') + + assert result.status == ImportPreflightStatus.STRUCTURE_INVALID + assert result.sheet_name == 'Sheet1' + assert result.sheet_exists is True + assert result.has_merged_header is None + assert result.estimated_row_count == 0 + assert result.structural_issue_codes == ['header_row_missing'] + + async def test_preflight_import_reraises_unreadable_workbook_errors(self): + storage = InMemoryExcelStorage(fixtures={'broken.xlsx': b'not-a-valid-workbook'}) + alchemy = ExcelAlchemy(ImporterConfig.for_create(SimpleContractImporter, creator=creator, storage=storage)) + + with self.assertRaisesRegex(Exception, 'File is not a zip file'): + alchemy.preflight_import('broken.xlsx') + + async def test_preflight_import_reraises_unexpected_storage_errors(self): + class ExplodingPreflightStorage(InMemoryExcelStorage): + def read_excel_table(self, input_excel_name: str, *, skiprows: int, sheet_name: str): + raise RuntimeError('boom') + + storage = ExplodingPreflightStorage(fixtures={'ignored.xlsx': b'ignored'}) + alchemy = ExcelAlchemy(ImporterConfig.for_create(SimpleContractImporter, creator=creator, storage=storage)) + + with self.assertRaisesRegex(RuntimeError, 'boom'): + alchemy.preflight_import('ignored.xlsx') + + async def test_preflight_import_estimates_rows_for_merged_header_workbook(self): + alchemy = ExcelAlchemy(ImporterConfig(MergedContractImporter, creator=creator, minio=cast(Minio, self.minio))) + + result = alchemy.preflight_import(FileRegistry.TEST_IMPORT_WITH_MERGE_HEADER) + + assert result.status == ImportPreflightStatus.VALID + assert result.sheet_exists is True + assert result.has_merged_header is True + assert result.estimated_row_count == 1 + + async def test_preflight_import_estimates_rows_for_simple_header_with_multiple_data_rows(self): + workbook_bytes = self._build_workbook_bytes( + rows=[ + ['ignored hint'], + [ + '年龄', + '姓名', + '地址', + '是否启用', + '出生日期', + '邮箱', + '价格', + '爱好', + '公司', + '经理', + '部门', + '电话', + '单选', + '老板', + '领导', + '团队', + '网址', + ], + [ + '18', + '张三', + '北京市', + '是', + '2021-01-01', + 'noreply@example.com', + '100', + '篮球', + '阿里巴巴', + '李四', + '研发部', + '13800138000', + '选项1', + '马云', + '张三', + '研发部', + 'https://www.baidu.com', + ], + [ + '19', + '李四', + '上海市', + '否', + '2021-01-02', + 'person@example.com', + '200', + '足球', + '腾讯', + '王五', + '市场部', + '13900139000', + '选项2', + '马化腾', + '李四', + '市场部', + 'https://example.com', + ], + ] + ) + storage = InMemoryExcelStorage(fixtures={'preflight-two-rows.xlsx': workbook_bytes}) + alchemy = ExcelAlchemy(ImporterConfig.for_create(SimpleContractImporter, creator=creator, storage=storage)) + + result = alchemy.preflight_import('preflight-two-rows.xlsx') + + assert result.status == ImportPreflightStatus.VALID + assert result.sheet_exists is True + assert result.has_merged_header is False + assert result.estimated_row_count == 2 + + async def test_preflight_import_does_not_execute_row_callbacks_or_mutate_last_import_session(self): + context: dict[str, object] = {'created_rows': []} + + async def tracking_creator( + data: dict[str, object], runtime_context: dict[str, object] | None + ) -> dict[str, object]: + assert runtime_context is not None + created_rows = runtime_context.setdefault('created_rows', []) + assert isinstance(created_rows, list) + created_rows.append(data.copy()) + return data + + alchemy = ExcelAlchemy( + ImporterConfig.for_create(SimpleContractImporter, creator=tracking_creator, minio=cast(Minio, self.minio)) + ) + alchemy.add_context(context) + + result = alchemy.preflight_import(FileRegistry.TEST_SIMPLE_IMPORT) + + assert result.status == ImportPreflightStatus.VALID + assert context['created_rows'] == [] + assert alchemy.last_import_snapshot is None + + async def test_preflight_import_does_not_upload_or_populate_error_maps(self): + workbook_bytes = self._build_workbook_bytes( + rows=[ + ['ignored hint'], + [ + '年龄', + '姓名', + '地址', + '是否启用', + '出生日期', + '邮箱', + '价格', + '爱好', + '公司', + '经理', + '部门', + '电话', + '单选', + '老板', + '领导', + '团队', + '网址', + ], + [ + '18', + '张三', + '北京市', + '是', + '2021-01-01', + 'noreply@example.com', + '100', + '篮球', + '阿里巴巴', + '李四', + '研发部', + '13800138000', + '选项1', + '马云', + '张三', + '研发部', + 'https://www.baidu.com', + ], + ] + ) + storage = InMemoryExcelStorage(fixtures={'preflight-no-side-effects.xlsx': workbook_bytes}) + alchemy = ExcelAlchemy(ImporterConfig.for_create(SimpleContractImporter, creator=creator, storage=storage)) + + result = alchemy.preflight_import('preflight-no-side-effects.xlsx') + + assert result.status == ImportPreflightStatus.VALID + assert storage.uploaded == {} + assert alchemy.last_import_snapshot is None + assert alchemy.cell_error_map == {} + assert alchemy.row_error_map == {} + + async def test_import_data_emits_expected_success_events(self): + alchemy = ExcelAlchemy(ImporterConfig(SimpleContractImporter, creator=creator, minio=cast(Minio, self.minio))) + events: list[dict[str, object]] = [] + + result = await alchemy.import_data( + input_excel_name=FileRegistry.TEST_SIMPLE_IMPORT, + output_excel_name='contract-success-events.xlsx', + on_event=events.append, + ) + + assert result.result == ValidateResult.SUCCESS + assert [event['event'] for event in events] == [ + 'started', + 'header_validated', + 'row_processed', + 'completed', + ] + assert events[1] == { + 'event': 'header_validated', + 'is_valid': True, + } + assert events[2] == { + 'event': 'row_processed', + 'processed_row_count': 1, + 'total_row_count': 1, + 'success_count': 1, + 'fail_count': 0, + } + assert events[3] == { + 'event': 'completed', + 'result': 'SUCCESS', + 'success_count': 1, + 'fail_count': 0, + 'url': None, + } + async def test_import_data_returns_success_result_for_valid_workbook(self): alchemy = ExcelAlchemy(ImporterConfig(SimpleContractImporter, creator=creator, minio=cast(Minio, self.minio))) @@ -40,6 +457,44 @@ async def test_import_data_returns_header_invalid_result_for_invalid_header(self assert '年龄' in set(result.missing_required) assert output_name not in self.minio.storage + async def test_import_data_emits_expected_header_invalid_events(self): + alchemy = ExcelAlchemy(ImporterConfig(SimpleContractImporter, creator=creator, minio=cast(Minio, self.minio))) + events: list[dict[str, object]] = [] + + result = await alchemy.import_data( + input_excel_name=FileRegistry.TEST_HEADER_INVALID_INPUT, + output_excel_name='contract-header-invalid-events.xlsx', + on_event=events.append, + ) + + assert result.result == ValidateResult.HEADER_INVALID + assert [event['event'] for event in events] == [ + 'started', + 'header_validated', + 'completed', + ] + assert events[1]['event'] == 'header_validated' + assert events[1]['is_valid'] is False + missing_required = events[1]['missing_required'] + missing_primary = events[1]['missing_primary'] + unrecognized = events[1]['unrecognized'] + duplicated = events[1]['duplicated'] + assert isinstance(missing_required, list) + assert isinstance(missing_primary, list) + assert isinstance(unrecognized, list) + assert isinstance(duplicated, list) + assert '年龄' in missing_required + assert missing_primary == [] + assert unrecognized == ['不存在的表头'] + assert duplicated == [] + assert events[2] == { + 'event': 'completed', + 'result': 'HEADER_INVALID', + 'success_count': 0, + 'fail_count': 0, + 'url': None, + } + async def test_import_data_reloads_workbook_state_on_each_run(self): alchemy = ExcelAlchemy(ImporterConfig(SimpleContractImporter, creator=creator, minio=cast(Minio, self.minio))) @@ -96,6 +551,67 @@ async def test_import_data_uploads_result_workbook_for_invalid_rows(self): assert result.url == f'excel/{output_name}' assert output_name in self.minio.storage + async def test_import_data_emits_expected_data_invalid_events(self): + output_name = 'contract-data-invalid-events.xlsx' + self.minio.storage.pop(output_name, None) + alchemy = ExcelAlchemy(ImporterConfig(SimpleContractImporter, creator=creator, minio=cast(Minio, self.minio))) + events: list[dict[str, object]] = [] + + result = await alchemy.import_data( + input_excel_name=FileRegistry.TEST_SIMPLE_IMPORT_WITH_ERROR, + output_excel_name=output_name, + on_event=events.append, + ) + + assert result.result == ValidateResult.DATA_INVALID + assert [event['event'] for event in events] == [ + 'started', + 'header_validated', + 'row_processed', + 'completed', + ] + assert events[2] == { + 'event': 'row_processed', + 'processed_row_count': 1, + 'total_row_count': 1, + 'success_count': 0, + 'fail_count': 1, + } + assert events[3] == { + 'event': 'completed', + 'result': 'DATA_INVALID', + 'success_count': 0, + 'fail_count': 1, + 'url': f'excel/{output_name}', + } + + async def test_import_data_emits_failed_event_for_unexpected_exception(self): + class ExplodingReadStorage(InMemoryExcelStorage): + def read_excel_table(self, input_excel_name: str, *, skiprows: int, sheet_name: str): + raise RuntimeError('boom') + + source_bytes = self.minio.storage[FileRegistry.TEST_SIMPLE_IMPORT]['data'].getvalue() + storage = ExplodingReadStorage(fixtures={FileRegistry.TEST_SIMPLE_IMPORT: source_bytes}) + alchemy = ExcelAlchemy(ImporterConfig.for_create(SimpleContractImporter, creator=creator, storage=storage)) + events: list[dict[str, object]] = [] + + with self.assertRaisesRegex(RuntimeError, 'boom'): + await alchemy.import_data( + input_excel_name=FileRegistry.TEST_SIMPLE_IMPORT, + output_excel_name='contract-failed-events.xlsx', + on_event=events.append, + ) + + assert [event['event'] for event in events] == [ + 'started', + 'failed', + ] + assert events[1] == { + 'event': 'failed', + 'error_type': 'RuntimeError', + 'error_message': 'boom', + } + async def test_import_result_workbook_returns_result_and_reason_columns(self): output_name = 'contract-data-invalid-columns.xlsx' self.minio.storage.pop(output_name, None) diff --git a/tests/contracts/test_result_contract.py b/tests/contracts/test_result_contract.py index 68e5375..3e445e0 100644 --- a/tests/contracts/test_result_contract.py +++ b/tests/contracts/test_result_contract.py @@ -1,7 +1,14 @@ import pytest -from excelalchemy import Label, ProgrammaticError, ValidateResult -from excelalchemy.results import ImportResult, ValidateHeaderResult +from excelalchemy import ImportPreflightStatus, Label, ProgrammaticError, ValidateResult +from excelalchemy.results import ( + CellErrorMap, + ImportPreflightResult, + ImportResult, + RowIssueMap, + ValidateHeaderResult, + build_frontend_remediation_payload, +) class TestResultContracts: @@ -131,3 +138,220 @@ def test_import_result_from_validate_header_result_rejects_valid_input(self): ImportResult.from_validate_header_result(validate_header) assert str(context.value) == 'ImportResult can only be built from an invalid header validation result' + + def test_import_preflight_result_from_validate_header_result_maps_invalid_header_fields(self): + validate_header = ValidateHeaderResult( + missing_required=[Label('年龄')], + missing_primary=[Label('邮箱')], + unrecognized=[Label('未知列')], + duplicated=[Label('姓名')], + is_valid=False, + ) + + result = ImportPreflightResult.from_validate_header_result( + validate_header, + sheet_name='Sheet1', + sheet_exists=True, + has_merged_header=False, + estimated_row_count=3, + ) + + assert result.status == ImportPreflightStatus.HEADER_INVALID + assert result.sheet_name == 'Sheet1' + assert result.sheet_exists is True + assert result.has_merged_header is False + assert result.estimated_row_count == 3 + assert result.is_required_missing is True + assert result.missing_required == [Label('年龄')] + assert result.missing_primary == [Label('邮箱')] + assert result.unrecognized == [Label('未知列')] + assert result.duplicated == [Label('姓名')] + + def test_import_preflight_result_to_api_payload_for_valid_case(self): + result = ImportPreflightResult( + status=ImportPreflightStatus.VALID, + sheet_name='Sheet1', + sheet_exists=True, + has_merged_header=False, + estimated_row_count=1, + ) + + assert result.to_api_payload() == { + 'status': 'VALID', + 'is_valid': True, + 'is_header_invalid': False, + 'is_sheet_missing': False, + 'is_structure_invalid': False, + 'sheet': { + 'name': 'Sheet1', + 'exists': True, + 'has_merged_header': False, + }, + 'summary': { + 'estimated_row_count': 1, + 'structural_issue_codes': [], + }, + 'header_issues': { + 'is_required_missing': False, + 'missing_required': [], + 'missing_primary': [], + 'unrecognized': [], + 'duplicated': [], + }, + } + + def test_import_preflight_result_status_helpers_remain_consistent(self): + valid = ImportPreflightResult(status=ImportPreflightStatus.VALID, sheet_name='Sheet1', sheet_exists=True) + header_invalid = ImportPreflightResult( + status=ImportPreflightStatus.HEADER_INVALID, + sheet_name='Sheet1', + sheet_exists=True, + ) + sheet_missing = ImportPreflightResult( + status=ImportPreflightStatus.SHEET_MISSING, + sheet_name='Sheet1', + sheet_exists=False, + ) + structure_invalid = ImportPreflightResult( + status=ImportPreflightStatus.STRUCTURE_INVALID, + sheet_name='Sheet1', + sheet_exists=False, + ) + + assert valid.is_valid is True + assert valid.is_header_invalid is False + assert valid.is_sheet_missing is False + assert valid.is_structure_invalid is False + + assert header_invalid.is_valid is False + assert header_invalid.is_header_invalid is True + assert header_invalid.is_sheet_missing is False + assert header_invalid.is_structure_invalid is False + + assert sheet_missing.is_valid is False + assert sheet_missing.is_header_invalid is False + assert sheet_missing.is_sheet_missing is True + assert sheet_missing.is_structure_invalid is False + + assert structure_invalid.is_valid is False + assert structure_invalid.is_header_invalid is False + assert structure_invalid.is_sheet_missing is False + assert structure_invalid.is_structure_invalid is True + + def test_build_frontend_remediation_payload_for_success_case(self): + result = ImportResult(result=ValidateResult.SUCCESS, success_count=1) + + payload = build_frontend_remediation_payload( + result=result, + cell_error_map=CellErrorMap(), + row_error_map=RowIssueMap(), + ) + + assert payload == { + 'result': result.to_api_payload(), + 'remediation': { + 'needs_remediation': False, + 'affected_row_count': 0, + 'affected_field_count': 0, + 'affected_code_count': 0, + 'header_issue_count': 0, + 'result_workbook_available': False, + }, + 'by_field': [], + 'by_code': [], + 'items': [], + } + + def test_build_frontend_remediation_payload_for_header_invalid_case(self): + result = ImportResult( + result=ValidateResult.HEADER_INVALID, + is_required_missing=True, + missing_required=[Label('年龄')], + missing_primary=[Label('邮箱')], + unrecognized=[Label('未知列')], + duplicated=[Label('姓名')], + ) + + payload = build_frontend_remediation_payload( + result=result, + cell_error_map=CellErrorMap(), + row_error_map=RowIssueMap(), + ) + + assert payload['result'] == result.to_api_payload() + assert payload['remediation'] == { + 'needs_remediation': True, + 'affected_row_count': 0, + 'affected_field_count': 0, + 'affected_code_count': 0, + 'header_issue_count': 4, + 'result_workbook_available': False, + 'suggested_action': 'Correct the workbook headers to match the template and retry the import.', + 'fix_hint': 'Use a fresh template or align missing, duplicated, and unrecognized headers before retrying.', + } + assert payload['by_field'] == [] + assert payload['by_code'] == [] + assert payload['items'] == [] + + def test_build_frontend_remediation_payload_does_not_change_existing_result_payloads(self): + result = ImportResult( + result=ValidateResult.DATA_INVALID, + success_count=2, + fail_count=1, + url='memory://result.xlsx', + ) + expected_result_payload = result.to_api_payload() + cell_error_map = CellErrorMap() + row_error_map = RowIssueMap() + + payload = build_frontend_remediation_payload( + result=result, + cell_error_map=cell_error_map, + row_error_map=row_error_map, + ) + + assert result.to_api_payload() == expected_result_payload + assert payload['result'] == expected_result_payload + assert cell_error_map.to_api_payload() == { + 'error_count': 0, + 'items': [], + 'by_row': {}, + 'facets': { + 'field_labels': [], + 'parent_labels': [], + 'unique_labels': [], + 'codes': [], + 'row_numbers_for_humans': [], + 'column_numbers_for_humans': [], + }, + 'grouped': { + 'messages_by_field': {}, + 'messages_by_row': {}, + 'messages_by_code': {}, + }, + 'summary': { + 'by_field': [], + 'by_row': [], + 'by_code': [], + }, + } + assert row_error_map.to_api_payload() == { + 'error_count': 0, + 'items': [], + 'by_row': {}, + 'facets': { + 'field_labels': [], + 'parent_labels': [], + 'unique_labels': [], + 'codes': [], + 'row_numbers_for_humans': [], + }, + 'grouped': { + 'messages_by_row': {}, + 'messages_by_code': {}, + }, + 'summary': { + 'by_row': [], + 'by_code': [], + }, + } diff --git a/tests/contracts/test_template_contract.py b/tests/contracts/test_template_contract.py index b05dd05..3c922bd 100644 --- a/tests/contracts/test_template_contract.py +++ b/tests/contracts/test_template_contract.py @@ -1,8 +1,9 @@ -from typing import cast +from typing import Annotated, cast from minio import Minio +from pydantic import BaseModel, Field -from excelalchemy import ExcelAlchemy, ImporterConfig +from excelalchemy import ExcelAlchemy, ExcelMeta, FieldMeta, ImporterConfig, String from excelalchemy.const import BACKGROUND_REQUIRED_COLOR, HEADER_HINT from tests.support import ( BaseTestCase, @@ -62,6 +63,22 @@ async def test_download_template_returns_simple_header_with_required_fill_and_co assert worksheet['A2'].comment is not None assert '必填性:必填' in worksheet['A2'].comment.text + async def test_download_template_renders_example_value_in_header_comment_only_when_declared(self): + class Importer(BaseModel): + full_name: String = FieldMeta(label='姓名', order=1, hint='填写法定姓名', example_value='张三') + nickname: String = FieldMeta(label='昵称', order=2) + + alchemy = ExcelAlchemy(ImporterConfig(Importer, creator=creator, minio=cast(Minio, self.minio))) + + workbook = decode_prefixed_excel_to_workbook(alchemy.download_template()) + worksheet = workbook['Sheet1'] + + assert worksheet['A2'].comment is not None + assert '提示:填写法定姓名' in worksheet['A2'].comment.text + assert '示例:张三' in worksheet['A2'].comment.text + assert worksheet['B2'].comment is not None + assert '示例:' not in worksheet['B2'].comment.text + async def test_download_template_returns_merged_header_with_expected_merge_ranges(self): alchemy = ExcelAlchemy(ImporterConfig(MergedContractImporter, creator=creator, minio=cast(Minio, self.minio))) @@ -102,3 +119,20 @@ async def test_download_template_supports_english_display_locale(self): assert worksheet['A1'].value.startswith('Import instructions:') assert worksheet['A2'].comment is not None assert 'Required: required' in worksheet['A2'].comment.text + + async def test_download_template_supports_english_example_value_comment(self): + class Importer(BaseModel): + full_name: Annotated[ + String, + Field(min_length=2), + ExcelMeta(label='Full name', order=1, hint='Use the legal name', example_value='Alice Chen'), + ] + + alchemy = ExcelAlchemy(ImporterConfig(Importer, creator=creator, minio=cast(Minio, self.minio), locale='en')) + + workbook = decode_prefixed_excel_to_workbook(alchemy.download_template()) + worksheet = workbook['Sheet1'] + + assert worksheet['A2'].comment is not None + assert 'Hint: Use the legal name' in worksheet['A2'].comment.text + assert 'Example: Alice Chen' in worksheet['A2'].comment.text diff --git a/tests/integration/test_examples_smoke.py b/tests/integration/test_examples_smoke.py index 4171af4..0d6258b 100644 --- a/tests/integration/test_examples_smoke.py +++ b/tests/integration/test_examples_smoke.py @@ -156,6 +156,7 @@ def test_fastapi_reference_project_main_runs_when_optional_dependency_is_availab assert 'Envelope sections:' in output assert 'Data sections:' in output assert 'Request tenant: tenant-001' in output + assert 'Remediation keys:' in output @pytest.mark.skipif(importlib.util.find_spec('minio') is None, reason='minio is not installed') @@ -259,6 +260,7 @@ def test_fastapi_reference_project_endpoints_work_when_optional_dependencies_are assert payload['data']['request']['tenant_id'] == 'tenant-001' assert payload['data']['cell_errors']['error_count'] == 0 assert payload['data']['row_errors']['error_count'] == 0 + assert payload['data']['remediation']['remediation']['needs_remediation'] is False missing_file_response = client.post('/employee-imports', data={'tenant_id': 'tenant-001'}) assert missing_file_response.status_code == 400 diff --git a/tests/support/storage.py b/tests/support/storage.py index 5a94e3e..29ee26f 100644 --- a/tests/support/storage.py +++ b/tests/support/storage.py @@ -6,6 +6,9 @@ from excelalchemy import UrlStr from excelalchemy.core.storage_protocol import ExcelStorage from excelalchemy.core.table import WorksheetTable +from excelalchemy.exceptions import WorksheetNotFoundError +from excelalchemy.i18n.messages import MessageKey +from excelalchemy.i18n.messages import message as msg class InMemoryExcelStorage(ExcelStorage): @@ -18,6 +21,12 @@ def __init__(self, fixtures: dict[str, bytes] | None = None): def read_excel_table(self, input_excel_name: str, *, skiprows: int, sheet_name: str) -> WorksheetTable: workbook = load_workbook(io.BytesIO(self.fixtures[input_excel_name]), data_only=True) try: + if sheet_name not in workbook.sheetnames: + raise WorksheetNotFoundError( + msg(MessageKey.WORKSHEET_NOT_FOUND, sheet_name=sheet_name), + message_key=MessageKey.WORKSHEET_NOT_FOUND, + sheet_name=sheet_name, + ) worksheet = workbook[sheet_name] rows = [ [None if value is None else str(value) for value in row] diff --git a/tests/unit/test_excel_exceptions.py b/tests/unit/test_excel_exceptions.py index 34f4269..9b94027 100644 --- a/tests/unit/test_excel_exceptions.py +++ b/tests/unit/test_excel_exceptions.py @@ -3,10 +3,14 @@ ConfigError, ExcelCellError, ExcelRowError, + ImportResult, Label, ProgrammaticError, RowIssueMap, + ValidateResult, ) +from excelalchemy.i18n.messages import MessageKey +from excelalchemy.results import build_frontend_remediation_payload from tests.support import BaseTestCase @@ -369,3 +373,248 @@ async def test_row_issue_map_supports_row_access_and_numbered_messages(self): assert issue_map.field_labels() == ('邮箱',) assert issue_map.codes() == ('ExcelCellError', 'ExcelRowError') assert issue_map.row_numbers_for_humans() == (1,) + + async def test_frontend_remediation_payload_uses_message_key_hints_for_cell_errors(self): + result = ImportResult(result=ValidateResult.DATA_INVALID, fail_count=1, url='memory://result.xlsx') + cell_error_map = CellErrorMap() + row_error_map = RowIssueMap() + error = ExcelCellError( + label=Label('邮箱'), + message='Enter a valid email address, such as name@example.com', + message_key=MessageKey.VALID_EMAIL_REQUIRED, + ) + + cell_error_map.add(0, 1, error) + row_error_map.add(0, error) + + payload = build_frontend_remediation_payload( + result=result, + cell_error_map=cell_error_map, + row_error_map=row_error_map, + ) + + assert payload['remediation'] == { + 'needs_remediation': True, + 'affected_row_count': 1, + 'affected_field_count': 1, + 'affected_code_count': 1, + 'header_issue_count': 0, + 'result_workbook_available': True, + 'suggested_action': 'Correct the invalid rows and re-upload the workbook.', + 'fix_hint': 'Download the result workbook and review the highlighted rows before re-uploading.', + } + assert payload['by_field'] == [ + { + 'field_label': '邮箱', + 'parent_label': None, + 'unique_label': '邮箱', + 'error_count': 1, + 'row_indices': [0], + 'row_numbers_for_humans': [1], + 'codes': ['valid_email_required'], + 'suggested_action': 'Enter a complete email address and re-upload the workbook.', + 'fix_hint': 'Use a format such as name@example.com.', + } + ] + assert payload['by_code'] == [ + { + 'code': 'valid_email_required', + 'error_count': 1, + 'row_indices': [0], + 'row_numbers_for_humans': [1], + 'unique_labels': ['邮箱'], + 'suggested_action': 'Enter a complete email address and re-upload the workbook.', + 'fix_hint': 'Use a format such as name@example.com.', + } + ] + assert payload['items'] == [ + { + 'scope': 'cell', + 'code': 'valid_email_required', + 'message': 'Enter a valid email address, such as name@example.com', + 'display_message': '【邮箱】Enter a valid email address, such as name@example.com', + 'row_index': 0, + 'row_number_for_humans': 1, + 'column_index': 1, + 'column_number_for_humans': 2, + 'field_label': '邮箱', + 'parent_label': None, + 'unique_label': '邮箱', + 'message_key': 'valid_email_required', + 'suggested_action': 'Enter a complete email address and re-upload the workbook.', + 'fix_hint': 'Use a format such as name@example.com.', + } + ] + + async def test_frontend_remediation_payload_falls_back_to_code_hint_for_row_errors(self): + result = ImportResult(result=ValidateResult.DATA_INVALID, fail_count=1) + row_error_map = RowIssueMap() + row_error = ExcelRowError(message='Combination invalid') + + row_error_map.add(0, row_error) + + payload = build_frontend_remediation_payload( + result=result, + cell_error_map=CellErrorMap(), + row_error_map=row_error_map, + ) + + assert payload['by_field'] == [] + assert payload['by_code'] == [ + { + 'code': 'ExcelRowError', + 'error_count': 1, + 'row_indices': [0], + 'row_numbers_for_humans': [1], + 'unique_labels': [], + 'suggested_action': 'Review the row-level validation message, correct the row, and re-upload the workbook.', + } + ] + assert payload['items'] == [ + { + 'scope': 'row', + 'code': 'ExcelRowError', + 'message': 'Combination invalid', + 'display_message': 'Combination invalid', + 'row_index': 0, + 'row_number_for_humans': 1, + 'suggested_action': 'Review the row-level validation message, correct the row, and re-upload the workbook.', + } + ] + + async def test_frontend_remediation_payload_omits_optional_hints_when_no_mapping_exists(self): + result = ImportResult(result=ValidateResult.DATA_INVALID, fail_count=1) + cell_error_map = CellErrorMap() + row_error_map = RowIssueMap() + error = ExcelCellError( + label=Label('备注'), message='Custom validation failed', message_key=MessageKey.INVALID_INPUT + ) + + cell_error_map.add(0, 0, error) + row_error_map.add(0, error) + + payload = build_frontend_remediation_payload( + result=result, + cell_error_map=cell_error_map, + row_error_map=row_error_map, + ) + + assert payload['by_field'] == [ + { + 'field_label': '备注', + 'parent_label': None, + 'unique_label': '备注', + 'error_count': 1, + 'row_indices': [0], + 'row_numbers_for_humans': [1], + 'codes': ['invalid_input'], + } + ] + assert payload['by_code'] == [ + { + 'code': 'invalid_input', + 'error_count': 1, + 'row_indices': [0], + 'row_numbers_for_humans': [1], + 'unique_labels': ['备注'], + } + ] + assert payload['items'] == [ + { + 'scope': 'cell', + 'code': 'invalid_input', + 'message': 'Custom validation failed', + 'display_message': '【备注】Custom validation failed', + 'row_index': 0, + 'row_number_for_humans': 1, + 'column_index': 0, + 'column_number_for_humans': 1, + 'field_label': '备注', + 'parent_label': None, + 'unique_label': '备注', + 'message_key': 'invalid_input', + } + ] + + async def test_frontend_remediation_payload_supports_mixed_issue_types_and_code_fallback_hints(self): + result = ImportResult(result=ValidateResult.DATA_INVALID, fail_count=2) + cell_error_map = CellErrorMap() + row_error_map = RowIssueMap() + cell_error = ExcelCellError(label=Label('年龄'), message='Invalid input; enter a number.') + row_error = ExcelRowError(message='Combination invalid') + + cell_error_map.add(0, 3, cell_error) + row_error_map.add(0, cell_error) + row_error_map.add(0, row_error) + + payload = build_frontend_remediation_payload( + result=result, + cell_error_map=cell_error_map, + row_error_map=row_error_map, + ) + + assert payload['remediation'] == { + 'needs_remediation': True, + 'affected_row_count': 1, + 'affected_field_count': 1, + 'affected_code_count': 2, + 'header_issue_count': 0, + 'result_workbook_available': False, + 'suggested_action': 'Correct the invalid rows and re-upload the workbook.', + 'fix_hint': 'Review the invalid rows and field messages before re-uploading.', + } + assert payload['by_field'] == [ + { + 'field_label': '年龄', + 'parent_label': None, + 'unique_label': '年龄', + 'error_count': 1, + 'row_indices': [0], + 'row_numbers_for_humans': [1], + 'codes': ['ExcelCellError'], + 'suggested_action': 'Review the highlighted cells, correct the invalid values, and re-upload the workbook.', + } + ] + assert payload['by_code'] == [ + { + 'code': 'ExcelCellError', + 'error_count': 1, + 'row_indices': [0], + 'row_numbers_for_humans': [1], + 'unique_labels': ['年龄'], + 'suggested_action': 'Review the highlighted cells, correct the invalid values, and re-upload the workbook.', + }, + { + 'code': 'ExcelRowError', + 'error_count': 1, + 'row_indices': [0], + 'row_numbers_for_humans': [1], + 'unique_labels': [], + 'suggested_action': 'Review the row-level validation message, correct the row, and re-upload the workbook.', + }, + ] + assert payload['items'] == [ + { + 'scope': 'cell', + 'code': 'ExcelCellError', + 'message': 'Invalid input; enter a number.', + 'display_message': '【年龄】Invalid input; enter a number.', + 'row_index': 0, + 'row_number_for_humans': 1, + 'column_index': 3, + 'column_number_for_humans': 4, + 'field_label': '年龄', + 'parent_label': None, + 'unique_label': '年龄', + 'suggested_action': 'Review the highlighted cells, correct the invalid values, and re-upload the workbook.', + }, + { + 'scope': 'row', + 'code': 'ExcelRowError', + 'message': 'Combination invalid', + 'display_message': 'Combination invalid', + 'row_index': 0, + 'row_number_for_humans': 1, + 'suggested_action': 'Review the row-level validation message, correct the row, and re-upload the workbook.', + }, + ] diff --git a/tests/unit/test_field_metadata.py b/tests/unit/test_field_metadata.py index 8d27cfd..e19f643 100644 --- a/tests/unit/test_field_metadata.py +++ b/tests/unit/test_field_metadata.py @@ -229,6 +229,28 @@ class Importer(BaseModel): alchemy = self.build_alchemy(Importer) assert alchemy.ordered_field_meta[0].comment_hint == '提示:请输入邮箱' + async def test_comment_example_returns_configured_example_value(self): + class Importer(BaseModel): + email: Email = FieldMeta( + label='邮箱', + order=1, + example_value='name@example.com', + ) + + alchemy = self.build_alchemy(Importer) + assert alchemy.ordered_field_meta[0].comment_example == '示例:name@example.com' + + async def test_comment_example_omits_blank_example_value(self): + class Importer(BaseModel): + email: Email = FieldMeta( + label='邮箱', + order=1, + example_value=' ', + ) + + alchemy = self.build_alchemy(Importer) + assert alchemy.ordered_field_meta[0].comment_example == '' + async def test_comment_options_lists_available_option_names(self): class Importer(BaseModel): sex: Radio = FieldMeta( @@ -339,6 +361,21 @@ class Importer(BaseModel): assert field_meta.excel_codec is Email assert field_meta.comment_max_length == '最大长度:10' + async def test_excelmeta_supports_example_value_in_annotated_field_declarations(self): + class Importer(BaseModel): + email: Annotated[ + Email, + Field(max_length=10), + ExcelMeta(label='邮箱', order=1, example_value='name@example.com'), + ] + + alchemy = self.build_alchemy(Importer) + field_meta = alchemy.ordered_field_meta[0] + + assert field_meta.label == '邮箱' + assert field_meta.example_value == 'name@example.com' + assert field_meta.comment_example == '示例:name@example.com' + async def test_field_metadata_exposes_split_internal_layers(self): class Importer(BaseModel): email: Email = FieldMeta( @@ -346,6 +383,7 @@ class Importer(BaseModel): order=1, unique=True, hint='请输入邮箱', + example_value='name@example.com', max_length=10, ) @@ -357,6 +395,7 @@ class Importer(BaseModel): assert field_meta.runtime_binding.parent_label == '邮箱' assert field_meta.runtime_binding.parent_key == 'email' assert field_meta.presentation_meta.hint == '请输入邮箱' + assert field_meta.presentation_meta.example_value == 'name@example.com' assert field_meta.import_constraints.max_length == 10 assert field_meta.declared is field_meta.declared_meta assert field_meta.runtime is field_meta.runtime_binding