diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..3b7169b --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,36 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: +// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/docker-existing-dockerfile +{ + "build": { + "dockerfile": "Dockerfile", + "args": { + "IMAGE": "mcr.microsoft.com/devcontainers/base:ubuntu" + } + }, + + "features": {}, + + // Sets the run context to one level up instead of the .devcontainer folder. + "context": "..", + + // Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename. + "dockerFile": "../Dockerfile", + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + "forwardPorts": [3000], + + // Uncomment the next line to run commands after the container is created - for example installing curl. + "postCreateCommand": "bash dev-bootstrap.sh", + + // Uncomment when using a ptrace-based debugger like C++, Go, and Rust + // "runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined" ], + + // Uncomment to use the Docker CLI from inside the container. See https://aka.ms/vscode-remote/samples/docker-from-docker. + // "mounts": [ "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" ], + + // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. + // "remoteUser": "vscode", + + // Set *default* container specific settings.json values on container create. + // "settings": {}, +} diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..52b556e --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,15 @@ +# CODEOWNERS file +# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners + +# Default owners for everything in the repo +# These owners will be requested for review when someone opens a pull request +* @cloudandthings/maintainers + +# GitHub Actions workflows +/.github/workflows/ @cloudandthings/devops + +# Documentation +*.md @cloudandthings/documentation + +# Tests +/tests/ @cloudandthings/qa diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..555b80f --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,127 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..6b3b17d --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,59 @@ +# Contributing + +When contributing to this repository, please first discuss the change you wish to make via issue, +email, or any other method with the owners of this repository before making a change. + +Please note we have a code of conduct, please follow it in all your interactions with the project. + +## Development environment + +We recommend [VSCode](https://code.visualstudio.com/) and [mise-en-place](https://mise.jdx.dev/). + +Once `mise` is installed and activated, it will create and activate a Python virtual environment +and install all required development tools defined in `mise.toml`. + +Install Python dependencies: + +```sh +uv sync --all-extras +``` + +### Reducing clutter + +To improve focus while developing, you may want to configure VSCode to hide all files beginning +with `.` from the Explorer view. Add `"**/.*"` to the `files.exclude` setting. + +## Code quality + +This project uses [ruff](https://github.com/astral-sh/ruff) for linting and formatting, managed +via pre-commit. Run all hooks before committing: + +```sh +pre-commit run -a +``` + +## Pull Request Process + +1. Update the code, examples, and/or documentation where appropriate. +1. Follow [conventional commits](https://www.conventionalcommits.org/) for your commit messages. +1. Run pre-commit hooks locally: `pre-commit run -a` +1. Run tests locally: `mise test` +1. Create a pull request. +1. Once all CI checks pass, notify a reviewer. + +Once all outstanding comments and checklist items have been addressed, your contribution will be +merged. Merged PRs will be included in the next release. + +## Testing + +```sh +mise test +# or directly: +pytest tests/ +``` + +## Releases + +Releases are automated via [release-please](https://github.com/googleapis/release-please) based +on [conventional commits](https://www.conventionalcommits.org/). A release will bump the version +in `pyproject.toml`, generate a changelog entry, and publish the package to PyPI. diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..5873c16 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,30 @@ +name: Bug Report +description: Report a bug or unexpected behavior +title: "bug: " +labels: ["bug"] +body: + - type: textarea + id: description + attributes: + label: What happened? + description: What did you expect, and what did you get instead? + validations: + required: true + + - type: textarea + id: reproduction + attributes: + label: Steps to reproduce + render: python + placeholder: | + from dispatchio import ... + validations: + required: true + + - type: input + id: versions + attributes: + label: dispatchio version / Python version + placeholder: "e.g., v0.1.0 / Python 3.11" + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..a41a5f1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: true +contact_links: + - name: Questions and Discussions + url: https://github.com/cloudandthings/python-dispatchio/discussions + about: Ask questions and discuss ideas with the community + - name: Security Vulnerability + url: https://github.com/cloudandthings/python-dispatchio/security/advisories/new + about: Report security vulnerabilities privately diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..3daaf4b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,22 @@ +name: Feature Request +description: Suggest an idea or enhancement +title: "feat: " +labels: ["enhancement"] +body: + - type: textarea + id: description + attributes: + label: What would you like? + description: Describe the feature and why it would be useful. + validations: + required: true + + - type: textarea + id: example + attributes: + label: Example usage + render: python + placeholder: | + from dispatchio import ... + validations: + required: false diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..507db49 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,11 @@ +## What does this PR do? + + + +## Checklist + +- [ ] `pre-commit run -a` passes locally +- [ ] `mise test` passes locally +- [ ] CI checks pass + +Closes # diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..716de12 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,270 @@ +# GitHub Copilot Instructions + +## Repository Overview + +This is a **Terraform AWS Module Template** repository that serves as a starting point for creating new Terraform AWS modules. It provides a standardized structure, development environment, testing framework, and CI/CD workflows. + +**Key Purpose**: Template for creating well-structured, tested, and documented Terraform modules for AWS infrastructure. + +## Code Structure + +```text +. +├── main.tf # Main module resources +├── variables.tf # Module input variables +├── outputs.tf # Module outputs +├── locals.tf # Computed local values +├── terraform.tf # Terraform and provider version constraints +├── examples/ +│ └── basic/ # Example usage of the module +├── tests/ +│ ├── basic.tftest.hcl # Unit tests (mock providers, no AWS needed) +│ └── integration/ +│ └── basic.tftest.hcl # Integration tests (real AWS resources) +├── .github/ +│ ├── workflows/ # CI/CD workflows +│ └── CONTRIBUTING.md # Contribution guidelines +└── .pre-commit-config.yaml # Pre-commit hooks configuration +``` + +## Development Environment + +### Tool Management + +- **mise-en-place** (`mise.toml`): Manages all development tools and their versions — all pinned to specific versions for reproducibility. + - OpenTofu (primary dev tool, `tofu` binary) + - Terraform (secondary, for CI matrix validation) + - Python 3.13, terraform-docs, tflint, pre-commit, etc. + - Run `./dev-bootstrap.sh` to set up tools and pre-commit hooks. + +### Environment Setup Options + +1. **GitHub Codespaces**: Ready-to-use cloud environment +2. **Dev Containers**: Local containerized environment using Docker +3. **Local with mise**: Install mise and run `./dev-bootstrap.sh` +4. **Manual**: Install tools from `mise.toml` manually (not recommended) + +## Terraform and OpenTofu Compatibility + +This template targets both **Terraform** (≥ 1.5.0) and **OpenTofu** (≥ 1.6.0). + +- Module `required_version`: `>= 1.5.0, < 2.0.0` +- Never use `~> 1.x.y` patch constraints in modules — `~> 1.5.7` resolves to `>= 1.5.7, < 1.6.0`, which excludes OpenTofu. +- The primary dev binary is `tofu`; CI also validates with `terraform`. +- Avoid features exclusive to one tool. Safe shared baseline: TF/OpenTofu 1.6–1.8. + - Do not use: Terraform 1.10+ exclusive features (ephemeral resources, write-only attributes) + - Do not use: OpenTofu-exclusive features not yet in Terraform + +## Terraform Conventions + +### Module Structure + +- **Required variables**: Must have `naming_prefix` (string) for resource naming +- **Optional variables**: Common `tags` (map) for resource tagging +- **Locals**: Computed values in `locals.tf` +- **Resources**: Follow AWS best practices with consistent naming +- **Documentation**: Auto-generated using terraform-docs (do not edit manually) + +### Naming Patterns + +- Use snake_case for variables, resources, and locals +- Prefix resources with module-specific identifier +- Use `local.naming_prefix` for resource naming consistency + +### Version Constraints + +- Terraform/OpenTofu: `>= 1.5.0, < 2.0.0` +- AWS provider: `>= 5, < 7` +- Pin provider versions appropriately + +## Testing + +### Framework + +Tests use the native `terraform test` / `tofu test` framework (`.tftest.hcl` files), available in both tools from version 1.6 onwards. No Python environment required. + +### Unit tests (no AWS credentials) + +```sh +tofu test +# or: terraform test +``` + +Unit tests use `mock_provider` blocks so they can run anywhere without credentials. + +### Integration tests (real AWS resources) + +```sh +AWS_PROFILE= tofu test -test-directory=tests/integration +``` + +### What to Test + +- Unit tests: all significant input variable combinations; mock providers cover plan-time logic. +- Integration tests: behaviour that needs real AWS (IAM evaluation, data source lookups, etc.). +- Cover bug fixes with a regression test. +- Ensure module behaves as expected before rolling out changes. + +## Code Quality & Linting + +### Pre-commit Hooks + +Run before every commit: `pre-commit run -a` + +**Enabled checks:** + +- **Terraform**: fmt, validate, tflint (with AWS ruleset), terraform-docs +- **Security**: checkov (Terraform framework), detect-aws-credentials, detect-private-key +- **General**: trailing whitespace, EOF fixer, merge conflicts +- **Python**: ruff, black +- **Shell**: shellcheck, shfmt +- **Spelling**: codespell + +### TFLint Configuration + +- Configuration in `.tflint.hcl` +- Includes the `tflint-ruleset-aws` plugin for AWS-specific checks +- Enforces naming conventions, documentation, module structure +- Must pass before merging + +### Documentation Generation + +- **terraform-docs** auto-generates README sections +- Configuration in `.tfdocs-config.yml` +- Generated sections between `` and `` +- Run via pre-commit hook: `python examples/terraform-docs.py` + +## Git & CI/CD + +### Branch Protection + +- Cannot commit directly to `main`, `master`, or `develop` +- Use feature branches for changes + +### Semantic Commits + +Follow conventional commit format: + +- `feat:` - New features +- `fix:` - Bug fixes +- `docs:` - Documentation changes +- `test:` - Test additions/changes +- `refactor:` - Code refactoring +- `ci:` - CI/CD changes +- `chore:` - Maintenance tasks (excluded from changelog) + +### CI Workflows + +1. **pre-commit-and-tests.yml**: Runs pre-commit, unit tests (ubuntu-latest, no AWS), and integration tests (CodeBuild + AWS) +2. **terraform-min-max.yml**: Validates with min/max Terraform versions and the pinned OpenTofu version +3. **pr-title.yml**: Validates PR titles follow conventions +4. **trivy-scan.yml**: Security scanning +5. **release.yml**: Automated releases using release-please + +### Dependency Updates + +Managed by **Renovate** (`renovate.json`), which covers: + +- GitHub Actions (with SHA digest pinning) +- Terraform providers +- Pre-commit hook versions +- `mise.toml` tool versions + +### Pull Request Process + +1. Update code, examples, and documentation +2. Run `pre-commit run -a` locally +3. Run `tofu test` locally +4. Ensure CI checks pass +5. Use semantic commit messages +6. Request review when ready + +## Common Patterns + +### Adding New Resources + +1. Define resource in `main.tf` +2. Add computed values to `locals.tf` +3. Add variables to `variables.tf` with descriptions and types +4. Add outputs to `outputs.tf` if needed +5. Update example in `examples/basic/` +6. Run `pre-commit run -a` to update docs +7. Add/update unit tests in `tests/basic.tftest.hcl` + +### Working with Examples + +- Keep examples minimal and focused +- Use `random_integer` for unique naming +- Example must be `tofu validate`-able +- Documentation auto-generated from code + +## Best Practices + +### Code Style + +- Follow existing patterns in the repository +- Use descriptive variable and resource names +- Add inline comments only for complex logic +- Let terraform-docs handle documentation + +### Security + +- Never commit AWS credentials +- Use IAM roles with OIDC for authentication in CI +- Keep sensitive data in AWS Secrets Manager/Parameter Store +- Security scanning via checkov (pre-commit) and trivy (CI) + +### Module Design + +- Keep modules focused and single-purpose +- Use sensible defaults for optional variables +- Make required variables truly required +- Provide clear, auto-generated documentation + +## Maintenance + +### Template Updates + +- Periodically review [terraform-aws-template](https://github.com/cloudandthings/terraform-aws-template) +- Sync improvements back to this template +- Dependency updates are automated via Renovate + +### Version Management + +- Use semantic versioning +- Automated via release-please based on commit messages +- CHANGELOG.md auto-generated + +## Common Commands + +```sh +# Setup environment +./dev-bootstrap.sh + +# Format and validate +tofu fmt -recursive +tofu validate + +# Run all pre-commit hooks +pre-commit run -a + +# Run unit tests (no AWS needed) +tofu test + +# Run integration tests (requires AWS credentials) +AWS_PROFILE= tofu test -test-directory=tests/integration + +# Generate documentation +python examples/terraform-docs.py + +# Check TFLint +tflint --config=.tflint.hcl +``` + +## Important Notes + +- This repository was created from [terraform-aws-template](https://github.com/cloudandthings/terraform-aws-template) +- The `null_resource.delete_me` in `main.tf` is a placeholder and should be deleted when implementing actual functionality +- Documentation between `BEGIN_TF_DOCS` and `END_TF_DOCS` is auto-generated — don't edit manually +- Tests run in isolated environments with unique resource naming to prevent conflicts +- All tool versions in `mise.toml` are pinned — update deliberately and verify compatibility diff --git a/.github/copilot-setup-steps.yml b/.github/copilot-setup-steps.yml new file mode 100644 index 0000000..036a6e0 --- /dev/null +++ b/.github/copilot-setup-steps.yml @@ -0,0 +1,8 @@ +steps: + - uses: jdx/mise-action@v3 + with: + cache: true + github_token: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup development environment + run: ./dev-bootstrap.sh diff --git a/.github/workflows/pr-title.yml b/.github/workflows/pr-title.yml new file mode 100644 index 0000000..a1438ce --- /dev/null +++ b/.github/workflows/pr-title.yml @@ -0,0 +1,56 @@ +name: Validate PR title + +on: + pull_request_target: + types: + - opened + - edited + - synchronize + +jobs: + main: + name: 👀 Validate PR title + # For public repos use runs-on: ubuntu-latest + # For private repos use runs-on: codebuild-... + runs-on: ubuntu-latest + steps: + # Please look up the latest version from + # https://github.com/amannn/action-semantic-pull-request/releases + - uses: amannn/action-semantic-pull-request@v5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + # Configure which types are allowed. + # Default: https://github.com/commitizen/conventional-commit-types + types: | + fix + feat + docs + ci + chore + # Configure that a scope must always be provided. + requireScope: false + # Configure additional validation for the subject based on a regex. + # This example ensures the subject starts with a character. + subjectPattern: ^[A-Za-z].+$ + # If `subjectPattern` is configured, you can use this property to override + # the default error message that is shown when the pattern doesn't match. + # The variables `subject` and `title` can be used within the message. + subjectPatternError: | + The subject "{subject}" found in the pull request title "{title}" + didn't match the configured pattern. Please ensure that the subject + starts with a character. + # For work-in-progress PRs you can typically use draft pull requests + # from Github. However, private repositories on the free plan don't have + # this option and therefore this action allows you to opt-in to using the + # special "[WIP]" prefix to indicate this state. This will avoid the + # validation of the PR title and the pull request checks remain pending. + # Note that a second check will be reported if this is enabled. + wip: true + # See: https://github.com/amannn/action-semantic-pull-request#legacy-configuration + # If the PR only contains a single commit, the action will validate that + # it matches the configured pattern. + validateSingleCommit: true + # Related to `validateSingleCommit` you can opt-in to validate that the PR + # title matches a single commit to avoid confusion. + validateSingleCommitMatchesPrTitle: true diff --git a/.github/workflows/pre-commit-and-tests.yml b/.github/workflows/pre-commit-and-tests.yml new file mode 100644 index 0000000..38de2b3 --- /dev/null +++ b/.github/workflows/pre-commit-and-tests.yml @@ -0,0 +1,60 @@ +name: pre-commit & tests + +on: + workflow_dispatch: + pull_request: + branches: + - main + - master + - develop + +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read + +defaults: + run: + shell: bash + +jobs: + #-------------------------------------------------------------- + # PRE-COMMIT + #-------------------------------------------------------------- + pre_commit: + name: 💍 pre-commit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: jdx/mise-action@v3 + with: + log_level: info + cache: true + + - name: pre-commit run + run: | + echo ...running pre-commit checks... + pre-commit run --config .pre-commit-config.yaml --all-files --show-diff-on-failure + + #-------------------------------------------------------------- + # TESTS + #-------------------------------------------------------------- + tests: + name: 🧪 tests + needs: [pre_commit] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: jdx/mise-action@v3 + with: + cache: true + + - name: Install dependencies + run: uv sync --all-extras + + - name: Run tests + run: mise test diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..8d092aa --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,80 @@ +name: release + +# Overview ref: https://github.com/googleapis/release-please +# Configuration ref: https://github.com/googleapis/release-please-action +# release-type: python bumps `version` in pyproject.toml automatically. +# The publish job uses PyPI Trusted Publisher (OIDC) — no API token needed. +# Before the publish job will succeed, configure a Trusted Publisher on PyPI: +# https://docs.pypi.org/trusted-publishers/adding-a-trusted-publisher/ + +on: + workflow_dispatch: + push: + branches: + - main + - master + - develop + +jobs: + release: + name: 🙏 release-please + runs-on: ubuntu-latest + outputs: + release_created: ${{ steps.release-please.outputs.release_created }} + tag_name: ${{ steps.release-please.outputs.tag_name }} + steps: + - uses: actions/checkout@v4 + + - name: release-please + uses: googleapis/release-please-action@v4 + id: release-please + with: + release-type: python + + publish-testpypi: + name: 📦 publish to TestPyPI + needs: [release] + if: ${{ needs.release.outputs.release_created }} + runs-on: ubuntu-latest + environment: + name: testpypi + url: https://test.pypi.org/project/dispatchio/ + permissions: + id-token: write + steps: + - uses: actions/checkout@v4 + + - uses: jdx/mise-action@v3 + with: + cache: true + + - name: Build package + run: uv build + + - name: Publish to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://upload.test.pypi.org/legacy/ + + publish: + name: 📦 publish to PyPI + needs: [release, publish-testpypi] + if: ${{ needs.release.outputs.release_created }} + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/project/dispatchio/ + permissions: + id-token: write + steps: + - uses: actions/checkout@v4 + + - uses: jdx/mise-action@v3 + with: + cache: true + + - name: Build package + run: uv build + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/trivy-scan.yml b/.github/workflows/trivy-scan.yml new file mode 100644 index 0000000..ebe7d9f --- /dev/null +++ b/.github/workflows/trivy-scan.yml @@ -0,0 +1,35 @@ +name: Trivy Security Scan + +on: + workflow_dispatch: + pull_request: + branches: + - main + - master + - develop + push: + branches: + - main + - master + - develop + +jobs: + security-scan: + runs-on: ubuntu-latest + permissions: + contents: read + security-events: write # For uploading SARIF results + actions: read # Required for workflow run information + + steps: + - uses: actions/checkout@v4 + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@v0.35.0 + with: + scan-type: fs + scan-ref: '.' + format: 'table' + severity: 'HIGH,CRITICAL' + exit-code: 1 + scanners: vuln diff --git a/.gitignore b/.gitignore index 36f27ee..2915925 100644 --- a/.gitignore +++ b/.gitignore @@ -182,9 +182,9 @@ cython_debug/ .abstra/ # Visual Studio Code -# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore -# and can be added to the global gitignore or merged into this file. However, if you prefer, +# and can be added to the global gitignore or merged into this file. However, if you prefer, # you could uncomment the following to ignore the entire vscode folder .vscode/ diff --git a/.mise.toml b/.mise.toml index 1d680da..c33d057 100644 --- a/.mise.toml +++ b/.mise.toml @@ -6,10 +6,43 @@ # Run a task: mise run # Run with args: mise run -- +[env] +# Use the project name derived from the current directory +PROJECT_NAME = "{{ config_root | basename }}" + +# Automatic virtualenv activation +_.python.venv = { path = ".venv", create = true } + +[settings] +lockfile = true +python.uv_venv_auto = "create|source" + +[tasks.install] +description = "Install dependencies" +alias = "i" +run = "uv pip install -r requirements.txt" + +[tasks.test] +description = "Run tests" +run = "pytest tests/" + +[tasks.lint] +description = "Lint the code" +run = "ruff src/" + +[tasks.info] +description = "Print project information" +run = ''' +echo "Project: $PROJECT_NAME" +echo "Virtual Environment: $VIRTUAL_ENV" +''' + [tools] jq = "1.8.1" pre-commit = "4.5.1" python = "3.13" +uv = "latest" +ruff = "latest" starship = "1.24.2" trivy = "v0.69.3" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..d80471f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,63 @@ +# https://pre-commit.com/ + +exclude: | + (?x)^( + ^\.venv/.*$| + ^\.pytest_cache/.*$ + )$ + +default_language_version: + python: python3.13 + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: check-added-large-files + args: ["--maxkb=1024"] + - id: check-case-conflict + - id: check-json + exclude: devcontainer.json + - id: check-merge-conflict + - id: check-symlinks + - id: check-toml + - id: check-vcs-permalinks + - id: check-yaml + - id: debug-statements + - id: detect-private-key + - id: end-of-file-fixer + - id: mixed-line-ending + args: ["--fix=lf"] + - id: no-commit-to-branch + args: ["--branch", "develop", "--branch", "main", "--branch", "master"] + - id: trailing-whitespace + + - repo: https://github.com/codespell-project/codespell + rev: v2.4.2 + hooks: + - id: codespell + + - repo: https://github.com/asottile/pyupgrade + rev: v3.21.0 + hooks: + - id: pyupgrade + args: ["--py313-plus"] + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.11.8 + hooks: + - id: ruff + args: ["--fix", "--exit-non-zero-on-fix"] + - id: ruff-format + + # SECURITY + # Mirrors the trivy config scan in trivy-scan.yml CI workflow. + # Only the 'config' scan type is run here (IaC misconfig); the 'fs' vuln scan runs in CI only. + - repo: local + hooks: + - id: trivy-config + name: trivy config scan + entry: trivy config --severity HIGH,CRITICAL --exit-code 1 --config trivy.yaml . + language: system + pass_filenames: false + always_run: true diff --git a/.release-please-manifest.json b/.release-please-manifest.json new file mode 100644 index 0000000..73d3293 --- /dev/null +++ b/.release-please-manifest.json @@ -0,0 +1,3 @@ +{ + ".": "1.4.0" +} diff --git a/COOKBOOK.md b/COOKBOOK.md index 2abf9e0..093dc3f 100644 --- a/COOKBOOK.md +++ b/COOKBOOK.md @@ -31,7 +31,7 @@ Two jobs: 1. hello_world — runs immediately, prints a greeting. 2. goodbye_world — runs after hello_world is done for the same day. -The Job class creates two jobs, and a dependency between them. +The Job class creates two jobs, and a dependency between them. Configuration is loaded from dispatchio.toml in this directory. For example, default_cadence is set to DAILY so it doesn't have to be specified in the Job definitions. @@ -50,7 +50,7 @@ CONFIG_FILE = os.getenv("DISPATCHIO_CONFIG", str(BASE / "dispatchio.toml")) hello_world = Job.create( "hello_world", # default_cadence is set to DAILY in dispatchio.toml - # cadence=DAILY, + # cadence=DAILY, executor=PythonJob( script=str(BASE / "my_work.py"), function="hello_world", @@ -65,7 +65,7 @@ goodbye_world = Job.create( ), depends_on=hello_world, # default_cadence is set to DAILY in dispatchio.toml - # cadence=DAILY, + # cadence=DAILY, ) JOBS = [hello_world, goodbye_world] diff --git a/build_cookbook.py b/build_cookbook.py index d6a3618..46fe4f3 100644 --- a/build_cookbook.py +++ b/build_cookbook.py @@ -18,16 +18,16 @@ import tomllib from pathlib import Path -ROOT = Path(__file__).parent.parent +ROOT = Path(__file__).parent.parent EXAMPLES_DIR = ROOT / "examples" -DEFAULT_OUT = ROOT / "COOKBOOK.md" +DEFAULT_OUT = ROOT / "COOKBOOK.md" _FENCE_LANG = { - ".py": "python", + ".py": "python", ".toml": "toml", - ".sh": "bash", + ".sh": "bash", ".yaml": "yaml", - ".yml": "yaml", + ".yml": "yaml", ".json": "json", } @@ -85,7 +85,9 @@ def build(output: Path = DEFAULT_OUT) -> None: for toml_path in EXAMPLES_DIR.glob("*/example.toml"): with toml_path.open("rb") as fh: meta = tomllib.load(fh) - entries.append((meta.get("order", 999), toml_path.parent.name, toml_path.parent, meta)) + entries.append( + (meta.get("order", 999), toml_path.parent.name, toml_path.parent, meta) + ) entries.sort(key=lambda e: (e[0], e[1])) @@ -98,7 +100,14 @@ def build(output: Path = DEFAULT_OUT) -> None: if __name__ == "__main__": - parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument("--output", type=Path, default=DEFAULT_OUT, metavar="PATH", - help=f"destination file (default: {DEFAULT_OUT.name})") + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "--output", + type=Path, + default=DEFAULT_OUT, + metavar="PATH", + help=f"destination file (default: {DEFAULT_OUT.name})", + ) build(parser.parse_args().output) diff --git a/dispatchio/__init__.py b/dispatchio/__init__.py index 1dc0741..e56c757 100644 --- a/dispatchio/__init__.py +++ b/dispatchio/__init__.py @@ -101,7 +101,9 @@ def local_orchestrator( state=FilesystemStateStore(base / "state"), executors={ "subprocess": SubprocessExecutor(), - "python": PythonJobExecutor(reporter_env={"DISPATCHIO_DROP_DIR": str(completions)}), + "python": PythonJobExecutor( + reporter_env={"DISPATCHIO_DROP_DIR": str(completions)} + ), }, receiver=FilesystemReceiver(completions), **orchestrator_kwargs, diff --git a/dispatchio/alerts/base.py b/dispatchio/alerts/base.py index d67e01e..59517ad 100644 --- a/dispatchio/alerts/base.py +++ b/dispatchio/alerts/base.py @@ -13,7 +13,7 @@ import logging from datetime import datetime -from typing import Any, Protocol, runtime_checkable +from typing import Protocol, runtime_checkable from pydantic import BaseModel @@ -24,13 +24,14 @@ class AlertEvent(BaseModel): """Emitted by the orchestrator when an alert condition is triggered.""" - alert_on: AlertOn - job_name: str - run_id: str - channels: list[str] - detail: str | None = None + + alert_on: AlertOn + job_name: str + run_id: str + channels: list[str] + detail: str | None = None occurred_at: datetime - record: RunRecord | None = None # full run record for context + record: RunRecord | None = None # full run record for context @runtime_checkable diff --git a/dispatchio/cadence.py b/dispatchio/cadence.py index bd812ac..6d09a0c 100644 --- a/dispatchio/cadence.py +++ b/dispatchio/cadence.py @@ -24,24 +24,26 @@ class Frequency(str, Enum): - HOURLY = "hourly" - DAILY = "daily" - WEEKLY = "weekly" + HOURLY = "hourly" + DAILY = "daily" + WEEKLY = "weekly" MONTHLY = "monthly" class DateCadence(BaseModel): """Date-based cadence — one run per calendar period.""" - type: Literal["date"] = "date" + + type: Literal["date"] = "date" frequency: Frequency = Frequency.DAILY - offset: int = 0 + offset: int = 0 # 0 = current period, -1 = previous, -2 = two ago, … # Positive values reference future periods (rare). class FixedCadence(BaseModel): """Fixed run_id — for ad-hoc / event-driven jobs.""" - type: Literal["literal"] = "literal" + + type: Literal["literal"] = "literal" value: str @@ -50,8 +52,9 @@ class IncrementalCadence(BaseModel): Batch-triggered — run_id sourced from MetadataStore. Implementation deferred to Phase 4 (MetadataStore required). """ + type: Literal["incremental"] = "incremental" - key: str # MetadataStore key written by a discovery job + key: str # MetadataStore key written by a discovery job Cadence = Annotated[ @@ -64,11 +67,11 @@ class IncrementalCadence(BaseModel): # Convenience constants # --------------------------------------------------------------------------- -HOURLY = DateCadence(frequency=Frequency.HOURLY) -DAILY = DateCadence(frequency=Frequency.DAILY) -WEEKLY = DateCadence(frequency=Frequency.WEEKLY) +HOURLY = DateCadence(frequency=Frequency.HOURLY) +DAILY = DateCadence(frequency=Frequency.DAILY) +WEEKLY = DateCadence(frequency=Frequency.WEEKLY) MONTHLY = DateCadence(frequency=Frequency.MONTHLY) -YESTERDAY = DateCadence(frequency=Frequency.DAILY, offset=-1) -LAST_WEEK = DateCadence(frequency=Frequency.WEEKLY, offset=-1) +YESTERDAY = DateCadence(frequency=Frequency.DAILY, offset=-1) +LAST_WEEK = DateCadence(frequency=Frequency.WEEKLY, offset=-1) LAST_MONTH = DateCadence(frequency=Frequency.MONTHLY, offset=-1) diff --git a/dispatchio/cli/main.py b/dispatchio/cli/main.py index ef9ae58..d4c39ce 100644 --- a/dispatchio/cli/main.py +++ b/dispatchio/cli/main.py @@ -24,12 +24,10 @@ import sys from datetime import datetime, timezone from pathlib import Path -from typing import Optional import importlib.util import click -from pydantic import BaseModel from dispatchio.models import RunRecord, Status, TickResult from dispatchio.orchestrator import Orchestrator @@ -45,6 +43,7 @@ # Config resolution # --------------------------------------------------------------------------- + def _resolve_state_dir(state_dir: str | None) -> str | None: return state_dir or os.environ.get("DISPATCHIO_STATE_DIR") @@ -88,9 +87,11 @@ def _load_state(state_dir: str) -> FilesystemStateStore: # Shared options # --------------------------------------------------------------------------- + def _orch_option(f): return click.option( - "--orchestrator", "-o", + "--orchestrator", + "-o", default=None, help="module:attribute path to an Orchestrator, e.g. myproject.jobs:orchestrator", )(f) @@ -98,10 +99,11 @@ def _orch_option(f): def _state_option(f): return click.option( - "--state-dir", "-s", + "--state-dir", + "-s", default=None, help="Path to state directory (FilesystemStateStore root). " - "Env: DISPATCHIO_STATE_DIR", + "Env: DISPATCHIO_STATE_DIR", )(f) @@ -109,6 +111,7 @@ def _state_option(f): # Root group # --------------------------------------------------------------------------- + @click.group() def cli(): """Dispatchio — lightweight tick-based batch job orchestrator.""" @@ -118,13 +121,14 @@ def cli(): # tick # --------------------------------------------------------------------------- + @cli.command() @_orch_option @click.option( - "--reference-time", "-t", + "--reference-time", + "-t", default=None, - help="ISO-8601 datetime to use as the tick reference time. " - "Defaults to now (UTC).", + help="ISO-8601 datetime to use as the tick reference time. Defaults to now (UTC).", ) def tick(orchestrator: str | None, reference_time: str | None): """Run one orchestrator tick and print results.""" @@ -151,7 +155,9 @@ def tick(orchestrator: str | None, reference_time: str | None): for r in result.results: marker = _action_icon(r.action.value) detail = f" {r.detail}" if r.detail else "" - click.echo(f" {marker} {r.job_name}[{r.run_id}] → {r.action.value}{detail}") + click.echo( + f" {marker} {r.job_name}[{r.run_id}] → {r.action.value}{detail}" + ) click.echo() @@ -159,13 +165,19 @@ def tick(orchestrator: str | None, reference_time: str | None): # run (PythonJob entry point) # --------------------------------------------------------------------------- + @cli.command("run") @click.argument("entry_point", required=False, default=None) -@click.option("--script", default=None, help="Path to a Python script file.") -@click.option("--function", "function_name", default=None, - help="Function name within the script (required with --script).") -@click.option("--job-name", default=None, - help="Job name override. Defaults to the function name.") +@click.option("--script", default=None, help="Path to a Python script file.") +@click.option( + "--function", + "function_name", + default=None, + help="Function name within the script (required with --script).", +) +@click.option( + "--job-name", default=None, help="Job name override. Defaults to the function name." +) def run_command( entry_point: str | None, script: str | None, @@ -233,12 +245,14 @@ def run_command( # status # --------------------------------------------------------------------------- + @cli.command() @_state_option @click.option("--job", "-j", default=None, help="Filter by job name.") @click.option("--run-id", "-r", default=None, help="Filter by run_id.") @click.option( - "--status", "filter_status", + "--status", + "filter_status", default=None, type=click.Choice([s.value for s in Status]), help="Filter by status.", @@ -273,6 +287,7 @@ def status( # record set (manual override) # --------------------------------------------------------------------------- + @cli.group("record") def record_group(): """Manually inspect or override run records.""" @@ -282,8 +297,9 @@ def record_group(): @_state_option @click.argument("job_name") @click.argument("run_id") -@click.argument("new_status", metavar="STATUS", - type=click.Choice([s.value for s in Status])) +@click.argument( + "new_status", metavar="STATUS", type=click.Choice([s.value for s in Status]) +) @click.option("--reason", default=None, help="Error reason string.") def record_set( state_dir: str | None, @@ -317,6 +333,7 @@ def record_set( # heartbeat # --------------------------------------------------------------------------- + @cli.command() @_state_option @click.argument("job_name") @@ -336,25 +353,25 @@ def heartbeat(state_dir: str | None, job_name: str, run_id: str): # --------------------------------------------------------------------------- _STATUS_COLOURS = { - "done": ("green", "DONE "), - "submitted": ("cyan", "SUBMITTED"), - "running": ("blue", "RUNNING "), - "error": ("red", "ERROR "), - "lost": ("yellow", "LOST "), - "pending": ("white", "PENDING "), - "skipped": ("white", "SKIPPED "), + "done": ("green", "DONE "), + "submitted": ("cyan", "SUBMITTED"), + "running": ("blue", "RUNNING "), + "error": ("red", "ERROR "), + "lost": ("yellow", "LOST "), + "pending": ("white", "PENDING "), + "skipped": ("white", "SKIPPED "), } _ACTION_ICONS = { - "submitted": "✓", - "retrying": "↺", - "marked_lost": "✗", - "marked_error": "✗", - "submission_failed": "✗", - "skipped_condition": "·", - "skipped_dependencies": "·", + "submitted": "✓", + "retrying": "↺", + "marked_lost": "✗", + "marked_error": "✗", + "submission_failed": "✗", + "skipped_condition": "·", + "skipped_dependencies": "·", "skipped_already_active": "·", - "skipped_already_done": "·", + "skipped_already_done": "·", } diff --git a/dispatchio/conditions.py b/dispatchio/conditions.py index 214f7cf..a13640b 100644 --- a/dispatchio/conditions.py +++ b/dispatchio/conditions.py @@ -40,18 +40,21 @@ class Condition(Protocol): The built-in condition types (TimeOfDayCondition, DayOfMonthCondition, etc.) satisfy this protocol automatically. Implement it for custom gates. """ - def is_met(self, reference_time: datetime, cadence: "Cadence") -> bool: ... + + def is_met(self, reference_time: datetime, cadence: Cadence) -> bool: ... # --------------------------------------------------------------------------- # Concrete condition types # --------------------------------------------------------------------------- + class TimeOfDayCondition(BaseModel): """Gate on wall-clock time. Natural fit for daily / weekly / monthly jobs.""" - type: Literal["time_of_day"] = "time_of_day" - after: time | None = None # ref.time() >= after - before: time | None = None # ref.time() < before + + type: Literal["time_of_day"] = "time_of_day" + after: time | None = None # ref.time() >= after + before: time | None = None # ref.time() < before @model_validator(mode="after") def _at_least_one(self) -> TimeOfDayCondition: @@ -59,8 +62,8 @@ def _at_least_one(self) -> TimeOfDayCondition: raise ValueError("at least one of 'after' or 'before' must be set") return self - def is_met(self, reference_time: datetime, cadence: "Cadence") -> bool: - ref_t = reference_time.time() # tzinfo stripped; always naive + def is_met(self, reference_time: datetime, cadence: Cadence) -> bool: + ref_t = reference_time.time() # tzinfo stripped; always naive if self.after is not None and ref_t < self.after: return False if self.before is not None and ref_t >= self.before: @@ -70,9 +73,10 @@ def is_met(self, reference_time: datetime, cadence: "Cadence") -> bool: class MinuteOfHourCondition(BaseModel): """Gate on minute within the current hour. Natural fit for hourly jobs.""" - type: Literal["minute_of_hour"] = "minute_of_hour" - after: int | None = None # ref.minute >= after (0–59) - before: int | None = None # ref.minute < before (0–59) + + type: Literal["minute_of_hour"] = "minute_of_hour" + after: int | None = None # ref.minute >= after (0–59) + before: int | None = None # ref.minute < before (0–59) @model_validator(mode="after") def _at_least_one(self) -> MinuteOfHourCondition: @@ -80,7 +84,7 @@ def _at_least_one(self) -> MinuteOfHourCondition: raise ValueError("at least one of 'after' or 'before' must be set") return self - def is_met(self, reference_time: datetime, cadence: "Cadence") -> bool: + def is_met(self, reference_time: datetime, cadence: Cadence) -> bool: m = reference_time.minute if self.after is not None and m < self.after: return False @@ -91,9 +95,10 @@ def is_met(self, reference_time: datetime, cadence: "Cadence") -> bool: class DayOfMonthCondition(BaseModel): """Gate on calendar day within the current month. Natural fit for monthly jobs.""" - type: Literal["day_of_month"] = "day_of_month" - after: int | None = None # ref.day >= after (1–31) - before: int | None = None # ref.day < before (1–31) + + type: Literal["day_of_month"] = "day_of_month" + after: int | None = None # ref.day >= after (1–31) + before: int | None = None # ref.day < before (1–31) @model_validator(mode="after") def _at_least_one(self) -> DayOfMonthCondition: @@ -101,7 +106,7 @@ def _at_least_one(self) -> DayOfMonthCondition: raise ValueError("at least one of 'after' or 'before' must be set") return self - def is_met(self, reference_time: datetime, cadence: "Cadence") -> bool: + def is_met(self, reference_time: datetime, cadence: Cadence) -> bool: d = reference_time.day if self.after is not None and d < self.after: return False @@ -112,10 +117,11 @@ def is_met(self, reference_time: datetime, cadence: "Cadence") -> bool: class DayOfWeekCondition(BaseModel): """Gate on day of week. Applies across any cadence.""" - type: Literal["day_of_week"] = "day_of_week" - on_days: list[int] # 0 = Mon … 6 = Sun - def is_met(self, reference_time: datetime, cadence: "Cadence") -> bool: + type: Literal["day_of_week"] = "day_of_week" + on_days: list[int] # 0 = Mon … 6 = Sun + + def is_met(self, reference_time: datetime, cadence: Cadence) -> bool: return reference_time.weekday() in self.on_days @@ -123,21 +129,24 @@ def is_met(self, reference_time: datetime, cadence: "Cadence") -> bool: # Composite conditions (defined before AnyCondition, rebuilt after) # --------------------------------------------------------------------------- + class AllOf(BaseModel): """All conditions must be met (logical AND).""" - type: Literal["all_of"] = "all_of" + + type: Literal["all_of"] = "all_of" conditions: list[AnyCondition] # resolved by model_rebuild() below - def is_met(self, reference_time: datetime, cadence: "Cadence") -> bool: + def is_met(self, reference_time: datetime, cadence: Cadence) -> bool: return all(c.is_met(reference_time, cadence) for c in self.conditions) class AnyOf(BaseModel): """At least one condition must be met (logical OR).""" - type: Literal["any_of"] = "any_of" + + type: Literal["any_of"] = "any_of" conditions: list[AnyCondition] # resolved by model_rebuild() below - def is_met(self, reference_time: datetime, cadence: "Cadence") -> bool: + def is_met(self, reference_time: datetime, cadence: Cadence) -> bool: return any(c.is_met(reference_time, cadence) for c in self.conditions) diff --git a/dispatchio/config/__init__.py b/dispatchio/config/__init__.py index 6cf7b1a..aea72d3 100644 --- a/dispatchio/config/__init__.py +++ b/dispatchio/config/__init__.py @@ -1,4 +1,9 @@ -from dispatchio.config.settings import DispatchioSettings, ReceiverSettings, StateSettings, SubmissionSettings +from dispatchio.config.settings import ( + DispatchioSettings, + ReceiverSettings, + StateSettings, + SubmissionSettings, +) from dispatchio.config.loader import load_config, orchestrator_from_config __all__ = [ diff --git a/dispatchio/config/loader.py b/dispatchio/config/loader.py index e72554a..9093c65 100644 --- a/dispatchio/config/loader.py +++ b/dispatchio/config/loader.py @@ -13,7 +13,11 @@ from pathlib import Path from typing import Any -from dispatchio.config.settings import DispatchioSettings, ReceiverSettings, StateSettings +from dispatchio.config.settings import ( + DispatchioSettings, + ReceiverSettings, + StateSettings, +) from dispatchio.config.sources.toml_ import TomlSource from dispatchio.executor import SubprocessExecutor, PythonJobExecutor from dispatchio.models import Job @@ -23,14 +27,15 @@ logger = logging.getLogger(__name__) -_CONFIG_ENV_VAR = "DISPATCHIO_CONFIG" -_SEARCH_PATHS = ["dispatchio.toml", "~/.dispatchio.toml"] +_CONFIG_ENV_VAR = "DISPATCHIO_CONFIG" +_SEARCH_PATHS = ["dispatchio.toml", "~/.dispatchio.toml"] # --------------------------------------------------------------------------- # Config file resolution # --------------------------------------------------------------------------- + def _find_config_file(path: str | Path | None) -> Path | None: """ Resolve a config file path using the lookup chain: @@ -53,9 +58,9 @@ def _find_config_file(path: str | Path | None) -> Path | None: if env_val.startswith("ssm://"): # SSM paths are handled by dispatchio[aws] — signal to caller raise NotImplementedError( - f"SSM config sources require dispatchio[aws]. " - f"Set DISPATCHIO_CONFIG to a local file path, or install " - f"dispatchio[aws] for SSM support." + "SSM config sources require dispatchio[aws]. " + "Set DISPATCHIO_CONFIG to a local file path, or install " + "dispatchio[aws] for SSM support." ) p = Path(env_val).expanduser() if not p.exists(): @@ -95,6 +100,7 @@ def _read_toml(path: Path) -> dict[str, Any]: def _resolve_relative_paths(data: dict[str, Any], base_dir: Path) -> dict[str, Any]: """Resolve relative path strings in config relative to base_dir.""" import copy + data = copy.deepcopy(data) _PATH_FIELDS = {"state": ["root"], "receiver": ["drop_dir"]} for section, keys in _PATH_FIELDS.items(): @@ -111,6 +117,7 @@ def _resolve_relative_paths(data: dict[str, Any], base_dir: Path) -> dict[str, A # Public: load_config # --------------------------------------------------------------------------- + def load_config(path: str | Path | None = None) -> DispatchioSettings: """ Load DispatchioSettings by merging a config file, environment variables, @@ -172,8 +179,9 @@ def settings_customise_sources( # Public: orchestrator_from_config # --------------------------------------------------------------------------- + def orchestrator_from_config( - jobs: list[Job], + jobs: list[Job] | None = None, config: str | Path | DispatchioSettings | None = None, **orchestrator_kwargs, ) -> Orchestrator: @@ -185,7 +193,9 @@ def orchestrator_from_config( job definitions stay decoupled from environment-specific values. Args: - jobs: List of Jobs to evaluate on each tick. + jobs: Optional list of Jobs to evaluate on each tick. + If omitted, an empty orchestrator is created and jobs can be + added later via Orchestrator.add_job(s). config: One of: - None auto-discover config file (see load_config) - str / Path explicit path to a TOML config file @@ -200,6 +210,10 @@ def orchestrator_from_config( JOBS = [Job(name="etl", executor=SubprocessConfig(...))] orchestrator = orchestrator_from_config(JOBS) # reads dispatchio.toml + + # Orchestrator-first flow (dynamic registration): + # orchestrator = orchestrator_from_config() + # orchestrator.add_jobs(JOBS) """ if isinstance(config, DispatchioSettings): settings = config @@ -210,11 +224,11 @@ def orchestrator_from_config( reporter_env = _build_reporter_env(settings.receiver) return Orchestrator( - jobs=jobs, + jobs=jobs or [], state=_build_state(settings.state), executors={ "subprocess": SubprocessExecutor(), - "python": PythonJobExecutor(reporter_env=reporter_env), + "python": PythonJobExecutor(reporter_env=reporter_env), }, receiver=_build_receiver(settings.receiver), submit_concurrency=settings.submission.concurrency, @@ -229,6 +243,7 @@ def orchestrator_from_config( # Internal: backend construction # --------------------------------------------------------------------------- + def _configure_logging(level: str) -> None: logging.basicConfig( level=getattr(logging, level.upper(), logging.INFO), @@ -247,6 +262,7 @@ def _build_state(cfg: StateSettings): if cfg.backend == "dynamodb": try: from dispatchio_aws.state.dynamodb import DynamoDBStateStore # type: ignore[import] + return DynamoDBStateStore(table_name=cfg.table_name, region=cfg.region) except ImportError: raise ImportError( @@ -284,6 +300,7 @@ def _build_receiver(cfg: ReceiverSettings): if cfg.backend == "sqs": try: from dispatchio_aws.receiver.sqs import SQSReceiver # type: ignore[import] + return SQSReceiver(queue_url=cfg.queue_url, region=cfg.region) except ImportError: raise ImportError( diff --git a/dispatchio/config/settings.py b/dispatchio/config/settings.py index 435a774..fce3cdd 100644 --- a/dispatchio/config/settings.py +++ b/dispatchio/config/settings.py @@ -42,6 +42,7 @@ # Sub-models (plain BaseModel — not BaseSettings themselves) # --------------------------------------------------------------------------- + class StateSettings(BaseModel): """ State store backend configuration. @@ -50,14 +51,15 @@ class StateSettings(BaseModel): backend="memory" — in-process only, lost on restart; useful for tests. backend="dynamodb" — AWS DynamoDB; requires dispatchio[aws]. """ + backend: Literal["filesystem", "memory", "dynamodb"] = "filesystem" # filesystem root: str = ".dispatchio/state" # dynamodb (dispatchio[aws]) - table_name: str = "dispatchio-state" - region: str | None = None + table_name: str = "dispatchio-state" + region: str | None = None class ReceiverSettings(BaseModel): @@ -68,6 +70,7 @@ class ReceiverSettings(BaseModel): backend="sqs" — AWS SQS queue; requires dispatchio[aws]. backend="none" — no receiver; jobs must write directly to the state store. """ + backend: Literal["filesystem", "sqs", "none"] = "filesystem" # filesystem @@ -75,13 +78,14 @@ class ReceiverSettings(BaseModel): # sqs (dispatchio[aws]) queue_url: str | None = None - region: str | None = None + region: str | None = None # --------------------------------------------------------------------------- # Top-level settings # --------------------------------------------------------------------------- + class SubmissionSettings(BaseModel): """ Controls how jobs are submitted each tick. @@ -94,9 +98,10 @@ class SubmissionSettings(BaseModel): Not yet enforced by local executors; reserved for cloud executors (e.g. ECS) where the API call can be slow. """ - concurrency: int = 8 + + concurrency: int = 8 max_per_tick: int | None = None - timeout: float | None = None + timeout: float | None = None class DispatchioSettings(BaseSettings): @@ -126,17 +131,18 @@ class DispatchioSettings(BaseSettings): extra="ignore", ) - log_level: str = "INFO" - state: StateSettings = Field(default_factory=StateSettings) - receiver: ReceiverSettings = Field(default_factory=ReceiverSettings) - submission: SubmissionSettings = Field(default_factory=SubmissionSettings) - default_cadence: Any = "daily" + log_level: str = "INFO" + state: StateSettings = Field(default_factory=StateSettings) + receiver: ReceiverSettings = Field(default_factory=ReceiverSettings) + submission: SubmissionSettings = Field(default_factory=SubmissionSettings) + default_cadence: Any = "daily" # Accepts a frequency string ("daily", "monthly", etc.) or a full # Cadence dict. Coerced to a DateCadence by _coerce_cadence below. @model_validator(mode="after") - def _coerce_cadence(self) -> "DispatchioSettings": + def _coerce_cadence(self) -> DispatchioSettings: from dispatchio.cadence import DateCadence, Frequency + v = self.default_cadence if isinstance(v, str): self.default_cadence = DateCadence(frequency=Frequency(v)) diff --git a/dispatchio/executor/python_.py b/dispatchio/executor/python_.py index e6ffa27..67cb714 100644 --- a/dispatchio/executor/python_.py +++ b/dispatchio/executor/python_.py @@ -61,9 +61,14 @@ def submit( cmd = [sys.executable, "-m", "dispatchio", "run", cfg.entry_point] else: cmd = [ - sys.executable, "-m", "dispatchio", "run", - "--script", cfg.script, - "--function", cfg.function, + sys.executable, + "-m", + "dispatchio", + "run", + "--script", + cfg.script, + "--function", + cfg.function, ] env = {**os.environ, **self._reporter_env, "DISPATCHIO_RUN_ID": run_id} diff --git a/dispatchio/executor/subprocess_.py b/dispatchio/executor/subprocess_.py index 327ead9..8ec6385 100644 --- a/dispatchio/executor/subprocess_.py +++ b/dispatchio/executor/subprocess_.py @@ -24,7 +24,6 @@ class SubprocessExecutor: - def submit( self, job: Job, @@ -39,8 +38,8 @@ def submit( ) ctx = { - "job_name": job.name, - "run_id": run_id, + "job_name": job.name, + "run_id": run_id, "reference_time": reference_time.isoformat(), } diff --git a/dispatchio/models.py b/dispatchio/models.py index 0896e39..89fa877 100644 --- a/dispatchio/models.py +++ b/dispatchio/models.py @@ -9,11 +9,11 @@ from datetime import datetime, time from enum import Enum -from typing import TYPE_CHECKING, Annotated, Any, Literal +from typing import Annotated, Any, Literal from pydantic import BaseModel, Field, field_validator, model_validator -from dispatchio.cadence import Cadence, DateCadence, Frequency +from dispatchio.cadence import Cadence from dispatchio.conditions import AnyCondition @@ -113,9 +113,7 @@ class Dependency(BaseModel): required_status: Status = Status.DONE @classmethod - def from_job( - cls, job: "Job", required_status: Status = Status.DONE - ) -> "Dependency": + def from_job(cls, job: Job, required_status: Status = Status.DONE) -> Dependency: """ Create a Dependency from a Job, inheriting its cadence. @@ -233,7 +231,7 @@ class PythonJob(BaseModel): pythonpath: list[str] = Field(default_factory=list) @model_validator(mode="after") - def _check_entry(self) -> "PythonJob": + def _check_entry(self) -> PythonJob: has_entry = self.entry_point is not None has_script = self.script is not None and self.function is not None if not has_entry and not has_script: @@ -257,7 +255,7 @@ def _check_entry(self) -> "PythonJob": # --------------------------------------------------------------------------- -def _normalise_dep(item: "Job | Dependency") -> Dependency: +def _normalise_dep(item: Job | Dependency) -> Dependency: """Convert anything dep-like to a Dependency.""" if isinstance(item, Dependency): return item @@ -287,7 +285,7 @@ class Job(BaseModel): alerts: list[AlertCondition] = Field(default_factory=list) @model_validator(mode="after") - def _check_dependency_threshold(self) -> "Job": + def _check_dependency_threshold(self) -> Job: if self.dependency_mode == DependencyMode.THRESHOLD: if self.dependency_threshold is None or self.dependency_threshold <= 0: raise ValueError( @@ -320,9 +318,9 @@ def create( cls, name: str, executor: ExecutorConfig, - depends_on: "list[Job | Dependency] | Dependency | Job | None" = None, + depends_on: list[Job | Dependency] | Dependency | Job | None = None, **kwargs, - ) -> "Job": + ) -> Job: """ Alternate constructor to allow positional name or executor. diff --git a/dispatchio/orchestrator.py b/dispatchio/orchestrator.py index 926a097..9b42e33 100644 --- a/dispatchio/orchestrator.py +++ b/dispatchio/orchestrator.py @@ -28,9 +28,10 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass from datetime import datetime, timezone +from collections.abc import Iterable from dispatchio.alerts.base import AlertEvent, AlertHandler, LogAlertHandler -from dispatchio.cadence import DAILY, Cadence, DateCadence +from dispatchio.cadence import DAILY, Cadence from dispatchio.conditions import TimeOfDayCondition from dispatchio.executor.base import Executor from dispatchio.models import ( @@ -40,7 +41,6 @@ JobAction, Job, JobTickResult, - RetryPolicy, RunRecord, Status, TickResult, @@ -78,6 +78,10 @@ class Orchestrator: submit_timeout: Per-submission deadline (seconds) forwarded to executor.submit(). Not yet enforced by local executors; reserved for cloud executors where the API call can block. + strict_dependencies: If True, unresolved dependencies raise ValueError + If False, unresolved deps warn so cross-orchestrator dependencies stay possible. + allow_runtime_mutation: If True, jobs can be added/removed after tick() has run. + If False, the job graph is frozen after the first tick. """ def __init__( @@ -91,6 +95,8 @@ def __init__( max_submissions_per_tick: int | None = None, submit_timeout: float | None = None, default_cadence: Cadence = DAILY, + strict_dependencies: bool = True, + allow_runtime_mutation: bool = False, ) -> None: self.jobs = jobs self.state = state @@ -101,29 +107,102 @@ def __init__( self.max_submissions_per_tick = max_submissions_per_tick self.submit_timeout = submit_timeout self.default_cadence = default_cadence + self.strict_dependencies = strict_dependencies + self.allow_runtime_mutation = allow_runtime_mutation - self._job_index: dict[str, Job] = {j.name: j for j in jobs} - self._warn_unresolved_dependencies() + self._job_index: dict[str, Job] = {} + self._jobs_dirty = False + self._has_ticked = False - def _warn_unresolved_dependencies(self) -> None: + self._rebuild_job_index() + self._validate_dependencies() + + def add_job(self, job: Job) -> None: + """Add a job definition to the orchestrator.""" + self.add_jobs([job]) + + def add_jobs(self, jobs: Iterable[Job]) -> None: + """ + Add job definitions to the orchestrator. + + Duplicate job names are rejected immediately. + """ + self._ensure_mutation_allowed() + new_jobs = list(jobs) + self._validate_duplicate_job_names(self.jobs + new_jobs) + self.jobs.extend(new_jobs) + self._jobs_dirty = True + + def remove_job(self, job_name: str) -> Job: """ - Warn at construction time if any dependency refers to a job name that - is not registered in this orchestrator. + Remove a job by name and return the removed definition. - A warning (not an error) is raised so that cross-orchestrator - dependencies — where the upstream job is managed by a separate - Orchestrator instance — remain valid. + Raises KeyError when the job is not registered. """ + self._ensure_mutation_allowed() + for idx, job in enumerate(self.jobs): + if job.name == job_name: + removed = self.jobs.pop(idx) + self._jobs_dirty = True + return removed + raise KeyError(f"Unknown job: {job_name}") + + def _ensure_mutation_allowed(self) -> None: + if self._has_ticked and not self.allow_runtime_mutation: + raise RuntimeError( + "Job mutation is disabled after the first tick. " + "Set allow_runtime_mutation=True to enable dynamic changes." + ) + + def _rebuild_job_index(self) -> None: + self._validate_duplicate_job_names(self.jobs) + self._job_index = {j.name: j for j in self.jobs} + + def _validate_duplicate_job_names(self, jobs: list[Job]) -> None: + seen: set[str] = set() + duplicates: set[str] = set() + for job in jobs: + if job.name in seen: + duplicates.add(job.name) + seen.add(job.name) + if duplicates: + names = ", ".join(sorted(duplicates)) + raise ValueError(f"Duplicate job names found: {names}") + + def _unresolved_dependencies(self) -> list[tuple[str, str]]: + unresolved: list[tuple[str, str]] = [] for job in self.jobs: for dep in job.depends_on: if dep.job_name not in self._job_index: - logger.warning( - "Job %r depends on %r, which is not registered in this " - "orchestrator. If this is a cross-orchestrator dependency " - "this warning can be ignored.", - job.name, - dep.job_name, - ) + unresolved.append((job.name, dep.job_name)) + return unresolved + + def _validate_dependencies(self) -> None: + unresolved = self._unresolved_dependencies() + if not unresolved: + return + + if self.strict_dependencies: + detail = ", ".join(f"{job}->{dep}" for job, dep in unresolved) + raise ValueError( + f"Unresolved dependencies in orchestrator job graph: {detail}" + ) + + for job_name, dep_name in unresolved: + logger.warning( + "Job %r depends on %r, which is not registered in this " + "orchestrator. If this is a cross-orchestrator dependency " + "this warning can be ignored.", + job_name, + dep_name, + ) + + def _refresh_job_graph_if_dirty(self) -> None: + if not self._jobs_dirty: + return + self._rebuild_job_index() + self._validate_dependencies() + self._jobs_dirty = False # ------------------------------------------------------------------ # Public API @@ -140,6 +219,8 @@ def tick(self, reference_time: datetime | None = None) -> TickResult: if reference_time is None: reference_time = datetime.now(tz=timezone.utc) + self._refresh_job_graph_if_dirty() + result = TickResult(reference_time=reference_time) # Phase 1 — apply inbound completion events @@ -178,6 +259,7 @@ def tick(self, reference_time: datetime | None = None) -> TickResult: # None → silently skipped (e.g. SKIPPED status) # _PendingSubmission still in outcomes → was beyond the cap; deferred + self._has_ticked = True return result # ------------------------------------------------------------------ @@ -309,7 +391,6 @@ def _evaluate_job( cadence: Cadence, reference_time: datetime, ) -> JobTickResult | _PendingSubmission | None: - existing = self.state.get(job.name, run_id) if existing and existing.is_active(): @@ -383,8 +464,7 @@ def _plan_retry( if policy.retry_on and record.error_reason: if not any(pat in record.error_reason for pat in policy.retry_on): detail = ( - f"error_reason does not match retry_on patterns: " - f"{policy.retry_on}" + f"error_reason does not match retry_on patterns: {policy.retry_on}" ) self._emit_alert(AlertOn.ERROR, job, run_id, detail, record) return JobTickResult( @@ -564,7 +644,7 @@ def _dep_status_matches( def _check_dependencies( self, job: Job, run_id: str, reference_time: datetime - ) -> "JobTickResult | None": + ) -> JobTickResult | None: """ Returns a blocking JobTickResult if deps are not satisfied, or None if clear to proceed. For THRESHOLD unreachability, also writes a SKIPPED RunRecord to state. diff --git a/dispatchio/receiver/base.py b/dispatchio/receiver/base.py index 9473804..379965d 100644 --- a/dispatchio/receiver/base.py +++ b/dispatchio/receiver/base.py @@ -32,12 +32,13 @@ class CompletionEvent(BaseModel): error_reason — optional message when status=ERROR metadata — arbitrary key/value pairs stored on the RunRecord """ - job_name: str - run_id: str - status: Status - error_reason: str | None = None - metadata: dict[str, Any] = {} - occurred_at: datetime | None = None # defaults to now() if omitted + + job_name: str + run_id: str + status: Status + error_reason: str | None = None + metadata: dict[str, Any] = {} + occurred_at: datetime | None = None # defaults to now() if omitted @runtime_checkable diff --git a/dispatchio/receiver/filesystem.py b/dispatchio/receiver/filesystem.py index 8b4486b..2e0ee7c 100644 --- a/dispatchio/receiver/filesystem.py +++ b/dispatchio/receiver/filesystem.py @@ -32,7 +32,6 @@ class FilesystemReceiver: - def __init__(self, drop_dir: str | Path) -> None: self.drop_dir = Path(drop_dir) self.drop_dir.mkdir(parents=True, exist_ok=True) diff --git a/dispatchio/run_id.py b/dispatchio/run_id.py index c5aecdf..684a780 100644 --- a/dispatchio/run_id.py +++ b/dispatchio/run_id.py @@ -13,13 +13,14 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from dispatchio.cadence import Cadence, DateCadence, FixedCadence, Frequency + from dispatchio.cadence import Cadence # --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- + def _subtract_months(dt: datetime, n: int) -> datetime: """ Apply a signed month offset to dt, clamping the day to the last of @@ -30,7 +31,7 @@ def _subtract_months(dt: datetime, n: int) -> datetime: n = 0 → no change """ month = dt.month - n - year = dt.year + (month - 1) // 12 + year = dt.year + (month - 1) // 12 month = ((month - 1) % 12) + 1 max_day = calendar.monthrange(year, month)[1] return dt.replace(year=year, month=month, day=min(dt.day, max_day)) @@ -40,7 +41,8 @@ def _subtract_months(dt: datetime, n: int) -> datetime: # Public API # --------------------------------------------------------------------------- -def resolve_run_id(cadence: "Cadence", reference_time: datetime) -> str: + +def resolve_run_id(cadence: Cadence, reference_time: datetime) -> str: """ Resolve a Cadence to a concrete run_id string. @@ -58,19 +60,23 @@ def resolve_run_id(cadence: "Cadence", reference_time: datetime) -> str: >>> resolve_run_id(HOURLY, ref) '2025011502' """ - from dispatchio.cadence import DateCadence, FixedCadence, Frequency, IncrementalCadence + from dispatchio.cadence import ( + DateCadence, + FixedCadence, + Frequency, + IncrementalCadence, + ) if isinstance(cadence, FixedCadence): return cadence.value if isinstance(cadence, IncrementalCadence): raise NotImplementedError( - "IncrementalCadence requires MetadataStore (Phase 4). " - f"Key: {cadence.key!r}" + f"IncrementalCadence requires MetadataStore (Phase 4). Key: {cadence.key!r}" ) if isinstance(cadence, DateCadence): - lookback = -cadence.offset # negative offset → positive lookback + lookback = -cadence.offset # negative offset → positive lookback freq = cadence.frequency if freq == Frequency.DAILY: @@ -89,29 +95,34 @@ def resolve_run_id(cadence: "Cadence", reference_time: datetime) -> str: raise ValueError(f"Cannot resolve cadence: {cadence!r}") # pragma: no cover -def describe_cadence(cadence: "Cadence") -> str: +def describe_cadence(cadence: Cadence) -> str: """Human-readable description of a cadence (used in CLI/log output).""" - from dispatchio.cadence import DateCadence, FixedCadence, Frequency, IncrementalCadence + from dispatchio.cadence import DateCadence, FixedCadence, IncrementalCadence if isinstance(cadence, FixedCadence): return f"literal '{cadence.value}'" if isinstance(cadence, IncrementalCadence): return f"incremental[{cadence.key!r}]" if isinstance(cadence, DateCadence): - freq = cadence.frequency.value + freq = cadence.frequency.value offset = cadence.offset if offset == 0: return { - "hourly": "current hour", "daily": "today", - "weekly": "current week", "monthly": "current month", + "hourly": "current hour", + "daily": "today", + "weekly": "current week", + "monthly": "current month", }.get(freq, freq) if offset == -1: return { - "hourly": "previous hour", "daily": "yesterday", - "weekly": "last week", "monthly": "last month", + "hourly": "previous hour", + "daily": "yesterday", + "weekly": "last week", + "monthly": "last month", }.get(freq, f"{freq} (offset {offset})") - n = -offset - unit = {"hourly": "hour", "daily": "day", - "weekly": "week", "monthly": "month"}[freq] + n = -offset + unit = {"hourly": "hour", "daily": "day", "weekly": "week", "monthly": "month"}[ + freq + ] return f"{n} {unit}{'s' if n > 1 else ''} ago" return repr(cadence) # pragma: no cover diff --git a/dispatchio/simulate.py b/dispatchio/simulate.py index 71aae19..ca44b89 100644 --- a/dispatchio/simulate.py +++ b/dispatchio/simulate.py @@ -29,11 +29,11 @@ def simulate( - orchestrator: "Orchestrator", + orchestrator: Orchestrator, *, tick_interval: float = 2.0, max_ticks: int = 20, - stop_when: "Callable[[StateStore, list[Job], str], bool] | None" = None, + stop_when: Callable[[StateStore, list[Job], str], bool] | None = None, reference_time: datetime | None = None, ) -> None: """ @@ -93,6 +93,6 @@ def simulate( log.warning("Reached max_ticks=%d without the stop condition being met.", max_ticks) -def _all_finished(store: "StateStore", jobs: "list[Job]", run_id: str) -> bool: +def _all_finished(store: StateStore, jobs: list[Job], run_id: str) -> bool: """Default stop condition: every job has a finished record for run_id.""" return all((rec := store.get(j.name, run_id)) and rec.is_finished() for j in jobs) diff --git a/dispatchio/state/base.py b/dispatchio/state/base.py index 4a5367d..07a9892 100644 --- a/dispatchio/state/base.py +++ b/dispatchio/state/base.py @@ -37,7 +37,7 @@ def heartbeat(self, job_name: str, run_id: str, at: datetime | None = None) -> N def list_records( self, job_name: str | None = None, - status: Status | None = None, + status: Status | None = None, ) -> list[RunRecord]: """ Return all records, optionally filtered by job_name and/or status. diff --git a/dispatchio/state/filesystem.py b/dispatchio/state/filesystem.py index 614adae..4d3284f 100644 --- a/dispatchio/state/filesystem.py +++ b/dispatchio/state/filesystem.py @@ -18,7 +18,6 @@ from __future__ import annotations -import json import os import tempfile from datetime import datetime, timezone @@ -28,7 +27,6 @@ class FilesystemStateStore: - def __init__(self, root: str | Path) -> None: self.root = Path(root) self.root.mkdir(parents=True, exist_ok=True) @@ -87,7 +85,7 @@ def heartbeat(self, job_name: str, run_id: str, at: datetime | None = None) -> N def list_records( self, job_name: str | None = None, - status: Status | None = None, + status: Status | None = None, ) -> list[RunRecord]: records: list[RunRecord] = [] diff --git a/dispatchio/state/memory.py b/dispatchio/state/memory.py index 5d5347f..9316d93 100644 --- a/dispatchio/state/memory.py +++ b/dispatchio/state/memory.py @@ -44,7 +44,7 @@ def heartbeat(self, job_name: str, run_id: str, at: datetime | None = None) -> N def list_records( self, job_name: str | None = None, - status: Status | None = None, + status: Status | None = None, ) -> list[RunRecord]: records = self._records.values() if job_name is not None: diff --git a/dispatchio/worker/harness.py b/dispatchio/worker/harness.py index 47f3dcf..1a0f1fc 100644 --- a/dispatchio/worker/harness.py +++ b/dispatchio/worker/harness.py @@ -38,8 +38,6 @@ def main(run_id: str) -> None: import os import sys import threading -import time -import traceback from collections.abc import Callable from typing import Any @@ -54,6 +52,7 @@ def main(run_id: str) -> None: # Heartbeat thread # --------------------------------------------------------------------------- + class _HeartbeatThread(threading.Thread): """ Posts a RUNNING event every `interval` seconds until stopped. @@ -69,21 +68,21 @@ def __init__( ) -> None: super().__init__(daemon=True, name=f"dispatchio-heartbeat-{job_name}") self._job_name = job_name - self._run_id = run_id + self._run_id = run_id self._reporter = reporter self._interval = interval - self._stop = threading.Event() + self._stop = threading.Event() def run(self) -> None: while not self._stop.wait(timeout=self._interval): try: - self._reporter.report( - self._job_name, self._run_id, Status.RUNNING - ) + self._reporter.report(self._job_name, self._run_id, Status.RUNNING) logger.debug("Heartbeat sent for %s/%s", self._job_name, self._run_id) except Exception: logger.warning( - "Heartbeat failed for %s/%s", self._job_name, self._run_id, + "Heartbeat failed for %s/%s", + self._job_name, + self._run_id, exc_info=True, ) @@ -95,6 +94,7 @@ def stop(self) -> None: # Argument parsing helpers # --------------------------------------------------------------------------- + def _arg_from_argv(flag: str) -> str | None: """Extract the value after `flag` in sys.argv, e.g. --run-id 20250115.""" argv = sys.argv @@ -106,11 +106,7 @@ def _arg_from_argv(flag: str) -> str | None: def _resolve_run_id(run_id: str | None) -> str: - value = ( - run_id - or _arg_from_argv("--run-id") - or os.environ.get("DISPATCHIO_RUN_ID") - ) + value = run_id or _arg_from_argv("--run-id") or os.environ.get("DISPATCHIO_RUN_ID") if not value: raise RuntimeError( "run_id is required. Pass it explicitly to run_job(), " @@ -128,10 +124,7 @@ def _resolve_reporter(reporter: Reporter | None) -> Reporter | None: if reporter is not None: return reporter - drop_dir = ( - _arg_from_argv("--drop-dir") - or os.environ.get("DISPATCHIO_DROP_DIR") - ) + drop_dir = _arg_from_argv("--drop-dir") or os.environ.get("DISPATCHIO_DROP_DIR") if drop_dir: return FilesystemReporter(drop_dir) @@ -146,6 +139,7 @@ def _resolve_reporter(reporter: Reporter | None) -> Reporter | None: # Public API # --------------------------------------------------------------------------- + def run_job( job_name: str, fn: Callable[[str], None], @@ -197,7 +191,9 @@ def run_job( fn(resolved_run_id) metadata = metadata_fn() if metadata_fn else {} - logger.info("Job completed successfully: job=%s run_id=%s", job_name, resolved_run_id) + logger.info( + "Job completed successfully: job=%s run_id=%s", job_name, resolved_run_id + ) if resolved_reporter is not None: resolved_reporter.report( @@ -208,13 +204,17 @@ def run_job( error_reason = f"{type(exc).__name__}: {exc}" logger.error( "Job failed: job=%s run_id=%s error=%s", - job_name, resolved_run_id, error_reason, + job_name, + resolved_run_id, + error_reason, ) logger.debug("Traceback:", exc_info=True) if resolved_reporter is not None: resolved_reporter.report( - job_name, resolved_run_id, Status.ERROR, + job_name, + resolved_run_id, + Status.ERROR, error_reason=error_reason, ) diff --git a/dispatchio/worker/reporter/filesystem.py b/dispatchio/worker/reporter/filesystem.py index e056376..555195c 100644 --- a/dispatchio/worker/reporter/filesystem.py +++ b/dispatchio/worker/reporter/filesystem.py @@ -24,7 +24,6 @@ class FilesystemReporter: - def __init__(self, drop_dir: str | Path) -> None: self.drop_dir = Path(drop_dir) self.drop_dir.mkdir(parents=True, exist_ok=True) @@ -53,5 +52,6 @@ def report( except Exception: logger.exception( "FilesystemReporter failed to write event for %s/%s", - job_name, run_id, + job_name, + run_id, ) diff --git a/docs/advanced_scheduling.md b/docs/advanced_scheduling.md index 43f3cae..b8e17ce 100644 --- a/docs/advanced_scheduling.md +++ b/docs/advanced_scheduling.md @@ -35,51 +35,69 @@ This has been implemented. ### Problem -Dispatchio's job list is currently fixed at `Orchestrator` construction time. +Dispatchio's job list was previously fixed at `Orchestrator` construction time. We have recently added some related behaviour which allows jobs to be added to an orchestrator, although it is not clear if this is done "per tick". + Generating jobs dynamically from metadata (e.g. "process all changed -entities for today") requires a way to re-derive the job list on each tick. +entities for today") could require a way to re-derive the job list on each tick. Unless there is another pattern / approach that should be used that perhaps works within the existing related behaviour. -### Proposed change +### Related behaviour + +Dynamic job graphs are now supported via explicit mutation APIs on +`Orchestrator`: + +- `add_job(job)` / `add_jobs(jobs)` +- `remove_job(job_name)` +- duplicate names raise `ValueError` +- dependency validation is re-run before `tick()` if and when the job graph changed. + +Two constructor flags control strictness: -Allow `jobs` to be a callable as well as a plain list. +- `strict_dependencies` (default `True`): unresolved dependencies raise when strict mode is enabled, or if False then warn (cross-orchestrator friendly) +- `allow_runtime_mutation` (default `False`): if enabled, jobs can be added or + removed after ticks have already run. + +This enables an orchestrator-first workflow from config: ```python -# Type alias (illustrative) -JobFactory = Callable[[datetime], list[Job]] +orchestrator = orchestrator_from_config( + config="dispatchio.toml", + allow_runtime_mutation=True, +) -class Orchestrator: - def __init__( - self, - jobs: list[Job] | JobFactory, - ... - ) +orchestrator.add_jobs(initial_jobs) + +# Later, after some ticks already ran: +orchestrator.add_job(dynamic_job) ``` -At the start of each tick, if `jobs` is callable, invoke it with -`reference_time` to obtain the current job list. The `_job_index` is -rebuilt from this list before evaluation begins. +`orchestrator_from_config` now also accepts no initial jobs, making this flow +natural for pipelines that discover work at runtime. -The factory receives `reference_time` so it can scope its queries to -the correct logical period. +### Alternative approach (factory callable) + +Callable job factories remain a valid future extension when teams want to +regenerate the full graph each tick from artifacts/metadata. With the current +APIs, many of those use cases can be handled by mutating the graph explicitly +instead of replacing it wholesale. ### Behaviour details | Concern | Behaviour | |---|---| -| Unresolved dependency warnings | Moved from `__init__` to the start of each tick when a factory is in use | +| Unresolved dependency warnings | Validated before `tick()` when the graph changed; warning or error depending on `strict_dependencies` | | Jobs that disappear between ticks | Their existing `RunRecord` in the state store is preserved; they are simply not evaluated that tick | | State store | Unchanged — dynamic jobs write the same `RunRecord` format as static jobs | -| `orchestrator_from_config` | Not affected — factories bypass config and are passed directly to `Orchestrator` | -| `_warn_unresolved_dependencies` | Still fires; for factories, fires per-tick | +| `orchestrator_from_config` | Can now create an empty orchestrator (`jobs` omitted) for orchestrator-first registration | +| Duplicate names | Rejected immediately with `ValueError` | -### Simple fan-in example +### Simple fan-in example (factory alternative) ```python -def job_factory(reference_time: datetime) -> list[Job]: +def job_factory(reference_time: datetime, artifacts: ArtifactStore) -> list[Job]: run_id = reference_time.strftime("%Y%m%d") - # Read which entities changed today (see Phase 4 for MetadataStore) - changed = metadata_store.get( + # Read which entities changed today (see Phase 4 for ArtifactStore) + changed = artifacts.read( job="discover_entities", run_id=run_id, key="entities", @@ -98,32 +116,63 @@ def job_factory(reference_time: datetime) -> list[Job]: return [*entity_jobs, collector] -orchestrator = Orchestrator(jobs=job_factory, ...) +orchestrator = Orchestrator(jobs=job_factory, artifact_store=FilesystemArtifactStore(...), ...) ``` ### Scope of change -- `dispatchio/orchestrator.py` — accept callable `jobs`; rebuild `_job_index` per tick when factory is in use -- `tests/test_orchestrator.py` — factory tests -- `examples/` — new example directory `dynamic_jobs` +- `dispatchio/orchestrator.py` — mutable APIs (`add_job`, `add_jobs`, `remove_job`), duplicate checks, pre-tick graph refresh +- `dispatchio/config/loader.py` — `orchestrator_from_config` now allows empty initial jobs +- `tests/test_orchestrator.py` — mutation lifecycle, strict dependencies, duplicate-name tests +- `tests/test_config.py` — empty-jobs factory test +- `examples/` — new example directory `dynamic_registration` --- -## Phase 4 — Metadata store +## Phase 4 — Artifact store ### Problem -Dynamic job factories need to read metadata that was written by a -previous job (e.g. a discovery job that determines which entities -changed). Currently there is no Dispatchio-native place to put this data. +Dynamic job factories need to read structured data that was written by a +previous job (e.g. a discovery job that determines which entities changed). +Currently there is no Dispatchio-native place to put this data. + +The name `MetadataStore` was considered and rejected — "metadata" in most +systems means data *about* jobs (status, timestamps), which is already +`StateStore`'s domain. What factories need is a store for named, structured +outputs produced by one job and consumed by another — the standard term in +data and CI/CD pipelines is *artifact*. `ArtifactStore` with `write`/`read` +method names is the clearest expression of this without implying a specific +value type or file-based storage. + +### Namespacing + +An `ArtifactStore` is constructed with a `namespace` (default `"default"`). +Multiple top-level `Orchestrator` instances that share the same backing +store can use distinct namespaces so that their artifact keys never collide: + +```python +orchestrator_a = Orchestrator(jobs=..., artifact_store=FilesystemArtifactStore( + path=ARTIFACT_DIR, namespace="pipeline_a" +)) +orchestrator_b = Orchestrator(jobs=..., artifact_store=FilesystemArtifactStore( + path=ARTIFACT_DIR, namespace="pipeline_b" +)) +``` + +The full internal key structure is `///`. +A factory reading artifacts never needs to know the namespace — it comes +from the store instance injected into the orchestrator. ### Proposed interface ```python -class MetadataStore(Protocol): - # Opinionated API (XCom-like): identify values by producer + logical key, +class ArtifactStore(Protocol): + namespace: str # set at construction; default "default" + + # Opinionated API: identify values by producer job + logical key, # scoped by run_id. - def push( + def write( self, value: Any, *, @@ -132,7 +181,7 @@ class MetadataStore(Protocol): key: str = "return_value", ) -> None: ... - def pull( + def read( self, *, job: str, @@ -141,9 +190,10 @@ class MetadataStore(Protocol): ) -> Any | None: ... # Escape hatch: direct key/value access for custom naming schemes. - def get(self, full_key: str) -> Any | None: ... - def put(self, full_key: str, value: Any) -> None: ... - def delete(self, full_key: str) -> None: ... + # Keys are always interpreted within the store's namespace. + def get(self, key: str) -> Any | None: ... + def put(self, key: str, value: Any) -> None: ... + def delete(self, key: str) -> None: ... def list_keys(self, prefix: str = "") -> list[str]: ... ``` @@ -151,61 +201,123 @@ Values are any JSON-serialisable Python object. Dispatchio's default key strategy (inspired by Airflow XCom lookup semantics) is: -- `producer_job` + `run_id` + `key` +- `namespace` + `producer_job` + `run_id` + `key` - default `key` is `"return_value"` - default `run_id` in worker context comes from `DISPATCHIO_RUN_ID` +- default `namespace` in worker context comes from `DISPATCHIO_ARTIFACT_NAMESPACE` By default this resolves to a full key like -`"discover_entities/20260115/entities"`. +`"default/discover_entities/20260115/entities"`. Users who want a different convention can either: -- pass explicit `full_key` to `get/put/delete`, or -- configure a custom key strategy callable in metadata settings. +- pass explicit `key` to `get/put/delete` (it is still scoped to the namespace), or +- configure a custom key strategy callable in artifact settings. + +### Tick-level I/O caching + +The artifact store is read at tick start (by the job factory) and written +at tick end (by completion events received from workers). A naive +implementation that issues a network or filesystem call for every individual +`write`/`read` will become a bottleneck in fan-out scenarios with many +dynamic child jobs. -### Implementations +The preferred pattern is a **load/flush cycle** tied to the tick boundary: -Same progression as `StateStore`: +```python +class ArtifactStoreCache(ArtifactStore): + """ + Wraps any ArtifactStore. Loads the full namespace into memory at + construction (or on load()), buffers all writes, and flushes on flush() + or __exit__. Individual write/read calls touch only the in-memory dict. + """ + def __init__(self, store: ArtifactStore) -> None: ... + def load(self) -> None: ... # reads namespace from backing store once + def flush(self) -> None: ... # writes buffered changes back in one batch + def __enter__(self) -> "ArtifactStoreCache": ... + def __exit__(self, *_: Any) -> None: ... # calls flush() +``` + +`Orchestrator` accepts an optional `ArtifactStoreCache` and, when present, +calls `load()` before the factory callable and `flush()` after phase 5 +(submissions). Implementations that do not use the cache can be passed +directly; the load/flush hooks become no-ops. + +The factory callable receives the orchestrator's artifact store directly +(as an injected argument), so user code never manages the cache lifecycle +manually: + +```python +def job_factory(reference_time: datetime, artifacts: ArtifactStore) -> list[Job]: + entities = artifacts.read(job="discover_entities", run_id=run_id, key="entities") or [] + ... +``` + +The updated factory type alias is: + +```python +JobFactory = Callable[[datetime, ArtifactStore], list[Job]] +``` + +### Backend recommendations | Class | Use case | |---|---| -| `MemoryMetadataStore` | Tests, in-process demos | -| `FilesystemMetadataStore` | Local dev (stores JSON files alongside state) | -| `DynamoDBMetadataStore` | Production AWS (future, same phase as DynamoDB StateStore) | +| `MemoryArtifactStore` | Tests, in-process demos | +| `FilesystemArtifactStore` | Local dev (JSON files, one file per key) | +| `SQLiteArtifactStore` | Production single-node and S3-synced deployments | +| `DynamoDBArtifactStore` | Distributed / multi-node deployments (future) | + +**SQLite is the preferred production backend** for the load/flush pattern. +Loading an entire namespace is a single `SELECT WHERE namespace = ?`; flushing +is a single `BEGIN … COMMIT` transaction. This compares very favourably to +DynamoDB, where a full-namespace scan is expensive and each write is an +individual `PutItem` call. + +For serverless / ephemeral orchestrators (Lambda, ECS task), the natural +pattern is: copy the SQLite file from S3 at tick start, load into +`ArtifactStoreCache`, run the tick, flush, upload back to S3. This keeps +I/O outside the hot path and keeps latency predictable. + +DynamoDB remains appropriate when multiple orchestrator instances run +concurrently and write payloads independently (true horizontal scaling); +at that point the load/flush cycle is no longer safe without additional +locking, and per-item access is the right model. ### Harness integration -For a job to write metadata it needs a reference to the store. -The cleanest approach is to inject the store path/config via an env var -(`DISPATCHIO_METADATA_DIR` for filesystem), analogous to how `DISPATCHIO_DROP_DIR` -is injected today. The worker then imports `MetadataStore` from dispatchio and -writes directly: +For a job to write payloads it needs a reference to the store. +The cleanest approach is to inject the store path/config via env vars +(`DISPATCHIO_ARTIFACT_DIR` and `DISPATCHIO_ARTIFACT_NAMESPACE` for filesystem), +analogous to how `DISPATCHIO_DROP_DIR` is injected today. The worker then +imports `get_artifact_store` from dispatchio and writes directly: ```python # Inside a discovery job worker function -from dispatchio.metadata import get_metadata_store +from dispatchio.artifact import get_artifact_store def discover(run_id: str) -> None: - store = get_metadata_store() # reads DISPATCHIO_METADATA_DIR from env + store = get_artifact_store() # reads DISPATCHIO_ARTIFACT_DIR / NAMESPACE from env entities = query_database_for_changes(run_id) - # Default key becomes: discover_entities//entities - store.push(entities, producer_job="discover_entities", run_id=run_id, key="entities") + # Full key: /discover_entities//entities + store.write(entities, job="discover_entities", run_id=run_id, key="entities") def build_jobs(run_id: str) -> list[str]: - store = get_metadata_store() - # Equivalent to XCom pull(task_id="discover_entities", key="entities") - return store.pull(producer_job="discover_entities", run_id=run_id, key="entities") or [] + store = get_artifact_store() + return store.read(job="discover_entities", run_id=run_id, key="entities") or [] ``` ### Scope of change -- `dispatchio/metadata/base.py` — `MetadataStore` protocol + `MetadataRecord` model -- `dispatchio/metadata/memory.py` — `MemoryMetadataStore` -- `dispatchio/metadata/filesystem.py` — `FilesystemMetadataStore` -- `dispatchio/metadata/__init__.py` — `get_metadata_store()` factory (reads env) + key strategy wiring -- `dispatchio/executor/python_.py` + `subprocess_.py` — inject `DISPATCHIO_METADATA_DIR` -- `dispatchio/config/settings.py` — optional `[dispatchio.metadata]` config section -- `dispatchio/__init__.py` — re-export `MetadataStore` +- `dispatchio/artifact/base.py` — `ArtifactStore` protocol + `ArtifactRecord` model + `ArtifactStoreCache` +- `dispatchio/artifact/memory.py` — `MemoryArtifactStore` +- `dispatchio/artifact/filesystem.py` — `FilesystemArtifactStore` +- `dispatchio/artifact/sqlite_.py` — `SQLiteArtifactStore` +- `dispatchio/artifact/__init__.py` — `get_artifact_store()` factory (reads env) + key strategy wiring +- `dispatchio/orchestrator.py` — accept `artifact_store` arg; inject into `JobFactory`; call `load()`/`flush()` around tick when `ArtifactStoreCache` is provided +- `dispatchio/executor/python_.py` + `subprocess_.py` — inject `DISPATCHIO_ARTIFACT_DIR` + `DISPATCHIO_ARTIFACT_NAMESPACE` +- `dispatchio/config/settings.py` — optional `[dispatchio.artifact]` config section +- `dispatchio/__init__.py` — re-export `ArtifactStore`, `ArtifactStoreCache` --- @@ -416,7 +528,7 @@ the execution `run_id`. | 1 — Extended schedule conditions | `Condition` Protocol + concrete types, orchestrator, tests, 1 example | None | | 2 — RunID abstraction | `Frequency`, `DateCadence`, `Cadence`, constants, `cadence` field, tests | None (independent of Phase 1) | | 3 — Factory callable | `Orchestrator` factory support, tests, 1 example | Phase 2 (jobs carry `cadence`) | -| 4 — Metadata store | New `dispatchio/metadata/` package, harness injection, 1 example | Phase 3 (factory reads from it) | +| 4 — Artifact store | New `dispatchio/artifact/` package, `ArtifactStoreCache`, namespace support, harness injection, 1 example | Phase 3 (factory reads from it) | | 5 — Cascading skip | Orchestrator change, tests | None | | 6 — Dependency modes | Model change, orchestrator, tests | Phase 5 (replaces it) | | 7 — Backfill helper | `simulate.py` extension, tests | Phase 1 (date stepping) | @@ -444,10 +556,22 @@ Phase 4 depends on Phase 3 in that its primary consumer is the factory pattern. the discovery job hasn't written metadata yet), the tick is a no-op. This is correct behaviour, but should it log a warning? -4. **Metadata store config** — should the metadata store be configured +4. **Artifact store config** — should the artifact store be configured under the same `dispatchio.toml` as the state store, or separately? Sharing the same section (with sub-keys) seems cleaner. -5. **Hierarchical option B (week_day expressions)** — worth a follow-up +5. **`ArtifactStoreCache` flush on error** — if an exception occurs mid-tick + after some writes have been buffered, should `flush()` still be called + (preserving partial results) or skipped (keeping the store clean)? + The safer default is to flush — partial artifacts are preferable to lost + artifacts because `write` is idempotent for a given (namespace, job, run_id, key). + +6. **SQLite + S3 sync protocol** — copy-on-open / upload-on-close works for + single-orchestrator deployments. If two orchestrators share a SQLite file + via S3, a last-writer-wins race is possible. This should be called out + clearly in docs; the DynamoDB backend is the answer when true concurrency + is required. + +7. **Hierarchical option B (week_day expressions)** — worth a follow-up design pass once the factory pattern is in use and we can see which mappings are written repeatedly. diff --git a/examples/cadence/jobs.py b/examples/cadence/jobs.py index d0ef169..1eecf74 100644 --- a/examples/cadence/jobs.py +++ b/examples/cadence/jobs.py @@ -39,7 +39,7 @@ orchestrator_from_config, ) -BASE = Path(__file__).parent +BASE = Path(__file__).parent CONFIG_FILE = os.getenv("DISPATCHIO_CONFIG", str(BASE / "dispatchio.toml")) # One run per calendar month — run_id = "202501", "202502", … diff --git a/examples/cadence/run.py b/examples/cadence/run.py index 6daa08d..39e294f 100644 --- a/examples/cadence/run.py +++ b/examples/cadence/run.py @@ -10,6 +10,7 @@ Run with: python examples/cadence/run.py """ + import sys import logging from datetime import datetime, timezone diff --git a/examples/conditions/run.py b/examples/conditions/run.py index 7310c97..d786475 100644 --- a/examples/conditions/run.py +++ b/examples/conditions/run.py @@ -11,6 +11,7 @@ Run with: python examples/conditions/run.py """ + import sys import logging from datetime import datetime, timezone diff --git a/examples/dependency_modes/jobs.py b/examples/dependency_modes/jobs.py index 02e7e38..023dffe 100644 --- a/examples/dependency_modes/jobs.py +++ b/examples/dependency_modes/jobs.py @@ -28,7 +28,6 @@ from pathlib import Path from dispatchio import ( - Dependency, DependencyMode, Job, PythonJob, diff --git a/examples/dependency_modes/my_work.py b/examples/dependency_modes/my_work.py index 60f62c0..a8ab5fc 100644 --- a/examples/dependency_modes/my_work.py +++ b/examples/dependency_modes/my_work.py @@ -23,10 +23,14 @@ def entity_c(run_id: str) -> None: def best_effort_collector(run_id: str) -> None: - print(f"best_effort_collector: all entities finished for {run_id}, collecting results.") + print( + f"best_effort_collector: all entities finished for {run_id}, collecting results." + ) time.sleep(0.2) def majority_collector(run_id: str) -> None: - print(f"majority_collector: threshold met for {run_id}, proceeding with majority results.") + print( + f"majority_collector: threshold met for {run_id}, proceeding with majority results." + ) time.sleep(0.2) diff --git a/examples/dynamic_registration/__init__.py b/examples/dynamic_registration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/dynamic_registration/dispatchio.toml b/examples/dynamic_registration/dispatchio.toml new file mode 100644 index 0000000..fd98de8 --- /dev/null +++ b/examples/dynamic_registration/dispatchio.toml @@ -0,0 +1,14 @@ +log_level = "INFO" +default_cadence = "daily" + +[state] +backend = "filesystem" +root = ".dispatchio/state" + +[receiver] +backend = "filesystem" +drop_dir = ".dispatchio/completions" + +[submission] +concurrency = 4 +max_per_tick = 10 diff --git a/examples/dynamic_registration/jobs.py b/examples/dynamic_registration/jobs.py new file mode 100644 index 0000000..a4197bd --- /dev/null +++ b/examples/dynamic_registration/jobs.py @@ -0,0 +1,65 @@ +""" +Dynamic registration example. + +This example shows an orchestrator-first flow: + 1. Build the orchestrator from config. + 2. Register jobs with add_jobs(). + 3. Optionally add more jobs after ticks have already run. +""" + +from __future__ import annotations + +import os +from pathlib import Path + +from dispatchio import DAILY, Dependency, Job, PythonJob, orchestrator_from_config + +BASE = Path(__file__).parent +CONFIG_FILE = os.getenv("DISPATCHIO_CONFIG", str(BASE / "dispatchio.toml")) + +orchestrator = orchestrator_from_config( + config=CONFIG_FILE, + allow_runtime_mutation=True, +) + + +def register_bootstrap_jobs() -> None: + """Register the initial pipeline jobs once.""" + existing = {job.name for job in orchestrator.jobs} + if "discover" in existing and "transform" in existing: + return + + discover_job = Job.create( + "discover", + PythonJob(script=str(BASE / "my_work.py"), function="discover"), + cadence=DAILY, + ) + transform_job = Job.create( + "transform", + PythonJob(script=str(BASE / "my_work.py"), function="transform"), + cadence=DAILY, + depends_on=[Dependency(job_name="discover", cadence=DAILY)], + ) + orchestrator.add_jobs([discover_job, transform_job]) + + +def register_entity_jobs(entities: list[str]) -> None: + """Register one job per entity after discovery completes.""" + existing = {job.name for job in orchestrator.jobs} + new_jobs: list[Job] = [] + + for entity in entities: + job_name = f"process_entity_{entity}" + if job_name in existing: + continue + new_jobs.append( + Job.create( + job_name, + PythonJob(script=str(BASE / "my_work.py"), function=job_name), + cadence=DAILY, + depends_on=[Dependency(job_name="transform", cadence=DAILY)], + ) + ) + + if new_jobs: + orchestrator.add_jobs(new_jobs) diff --git a/examples/dynamic_registration/my_work.py b/examples/dynamic_registration/my_work.py new file mode 100644 index 0000000..5a1e5bf --- /dev/null +++ b/examples/dynamic_registration/my_work.py @@ -0,0 +1,27 @@ +""" +Worker functions used by the dynamic registration example. +""" + +from __future__ import annotations + +import time + + +def discover(run_id: str) -> None: + print(f"discover finished for run_id={run_id}") + time.sleep(0.1) + + +def transform(run_id: str) -> None: + print(f"transform finished for run_id={run_id}") + time.sleep(0.1) + + +def process_entity_alpha(run_id: str) -> None: + print(f"entity alpha processed for run_id={run_id}") + time.sleep(0.1) + + +def process_entity_beta(run_id: str) -> None: + print(f"entity beta processed for run_id={run_id}") + time.sleep(0.1) diff --git a/examples/dynamic_registration/run.py b/examples/dynamic_registration/run.py new file mode 100644 index 0000000..8121b67 --- /dev/null +++ b/examples/dynamic_registration/run.py @@ -0,0 +1,49 @@ +""" +Dynamic registration demo runner. + +Run with: + python examples/dynamic_registration/run.py +""" + +from __future__ import annotations + +import logging +import sys +import time +from datetime import datetime, timezone +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parents[2])) + +from examples.dynamic_registration.jobs import ( # noqa: E402 + orchestrator, + register_bootstrap_jobs, + register_entity_jobs, +) + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +log = logging.getLogger(__name__) + + +if __name__ == "__main__": + reference_time = datetime.now(tz=timezone.utc) + + register_bootstrap_jobs() + + for tick_num in range(1, 8): + if tick_num == 3: + register_entity_jobs(["alpha", "beta"]) + log.info("Registered dynamic entity jobs after initial ticks") + + result = orchestrator.tick(reference_time=reference_time) + log.info("Tick %d", tick_num) + for event in result.results: + suffix = f" ({event.detail})" if event.detail else "" + log.info( + " %s[%s] -> %s%s", + event.job_name, + event.run_id, + event.action.value, + suffix, + ) + time.sleep(0.3) diff --git a/examples/hello_world/jobs.py b/examples/hello_world/jobs.py index 32ceb54..57e8229 100644 --- a/examples/hello_world/jobs.py +++ b/examples/hello_world/jobs.py @@ -5,7 +5,7 @@ 1. hello_world — runs immediately, prints a greeting. 2. goodbye_world — runs after hello_world is done for the same day. -The Job class creates two jobs, and a dependency between them. +The Job class creates two jobs, and a dependency between them. Configuration is loaded from dispatchio.toml in this directory. For example, default_cadence is set to DAILY so it doesn't have to be specified in the Job definitions. @@ -13,34 +13,35 @@ Run with: python examples/hello_world/run.py """ + import os from pathlib import Path from dispatchio import Job, PythonJob, orchestrator_from_config -BASE = Path(__file__).parent +BASE = Path(__file__).parent CONFIG_FILE = os.getenv("DISPATCHIO_CONFIG", str(BASE / "dispatchio.toml")) hello_world = Job.create( - "hello_world", - # default_cadence is set to DAILY in dispatchio.toml - # cadence=DAILY, - executor=PythonJob( - script=str(BASE / "my_work.py"), - function="hello_world", - ), - ) + "hello_world", + # default_cadence is set to DAILY in dispatchio.toml + # cadence=DAILY, + executor=PythonJob( + script=str(BASE / "my_work.py"), + function="hello_world", + ), +) goodbye_world = Job.create( - name="goodbye_world", - executor=PythonJob( - script=str(BASE / "my_work.py"), - function="goodbye_world", - ), - depends_on=hello_world, - # default_cadence is set to DAILY in dispatchio.toml - # cadence=DAILY, - ) + name="goodbye_world", + executor=PythonJob( + script=str(BASE / "my_work.py"), + function="goodbye_world", + ), + depends_on=hello_world, + # default_cadence is set to DAILY in dispatchio.toml + # cadence=DAILY, +) JOBS = [hello_world, goodbye_world] orchestrator = orchestrator_from_config(JOBS, config=CONFIG_FILE) diff --git a/examples/hello_world/run.py b/examples/hello_world/run.py index 945f3e4..8c62403 100644 --- a/examples/hello_world/run.py +++ b/examples/hello_world/run.py @@ -11,6 +11,7 @@ orchestrator.tick() """ + import sys import logging from pathlib import Path diff --git a/examples/subprocess_example/my_work.py b/examples/subprocess_example/my_work.py index fd0bdd2..1039b79 100644 --- a/examples/subprocess_example/my_work.py +++ b/examples/subprocess_example/my_work.py @@ -11,6 +11,7 @@ python my_work.py generate python my_work.py summarize """ + import sys from dispatchio.worker.harness import run_job diff --git a/examples/subprocess_example/run.py b/examples/subprocess_example/run.py index 5d733a0..99ab239 100644 --- a/examples/subprocess_example/run.py +++ b/examples/subprocess_example/run.py @@ -11,6 +11,7 @@ orchestrator.tick() """ + import sys import logging from pathlib import Path diff --git a/mise.lock b/mise.lock new file mode 100644 index 0000000..88cea2d --- /dev/null +++ b/mise.lock @@ -0,0 +1,348 @@ +# @generated - this file is auto-generated by `mise lock` https://mise.jdx.dev/dev-tools/mise-lock.html + +[[tools.fd]] +version = "v10.4.2" +backend = "aqua:sharkdp/fd" + +[tools.fd."platforms.linux-arm64"] +checksum = "sha256:f32d3657473fba74e2600babc8db0b93420d51169223b7e8143b2ed55d8fd9e8" +url = "https://github.com/sharkdp/fd/releases/download/v10.4.2/fd-v10.4.2-aarch64-unknown-linux-musl.tar.gz" + +[tools.fd."platforms.linux-arm64-musl"] +checksum = "sha256:f32d3657473fba74e2600babc8db0b93420d51169223b7e8143b2ed55d8fd9e8" +url = "https://github.com/sharkdp/fd/releases/download/v10.4.2/fd-v10.4.2-aarch64-unknown-linux-musl.tar.gz" + +[tools.fd."platforms.linux-x64"] +checksum = "sha256:e3257d48e29a6be965187dbd24ce9af564e0fe67b3e73c9bdcd180f4ec11bdde" +url = "https://github.com/sharkdp/fd/releases/download/v10.4.2/fd-v10.4.2-x86_64-unknown-linux-musl.tar.gz" + +[tools.fd."platforms.linux-x64-musl"] +checksum = "sha256:e3257d48e29a6be965187dbd24ce9af564e0fe67b3e73c9bdcd180f4ec11bdde" +url = "https://github.com/sharkdp/fd/releases/download/v10.4.2/fd-v10.4.2-x86_64-unknown-linux-musl.tar.gz" + +[tools.fd."platforms.macos-arm64"] +checksum = "sha256:623dc0afc81b92e4d4606b380d7bc91916ba7b97814263e554d50923a39e480a" +url = "https://github.com/sharkdp/fd/releases/download/v10.4.2/fd-v10.4.2-aarch64-apple-darwin.tar.gz" + +[tools.fd."platforms.windows-x64"] +checksum = "sha256:b2816e506390a89941c63c9187d58a3cc10e9a55f2ef0685f9ea0eccaf7c98c8" +url = "https://github.com/sharkdp/fd/releases/download/v10.4.2/fd-v10.4.2-x86_64-pc-windows-msvc.zip" + +[[tools.github-cli]] +version = "v2.89.0" +backend = "aqua:cli/cli" + +[tools.github-cli."platforms.linux-arm64"] +checksum = "sha256:9e64a623dfc242990aa5d9b3f507111149c4282f66b68eaad1dc79eeb13b9ce5" +url = "https://github.com/cli/cli/releases/download/v2.89.0/gh_2.89.0_linux_arm64.tar.gz" +provenance = "github-attestations" + +[tools.github-cli."platforms.linux-arm64-musl"] +checksum = "sha256:9e64a623dfc242990aa5d9b3f507111149c4282f66b68eaad1dc79eeb13b9ce5" +url = "https://github.com/cli/cli/releases/download/v2.89.0/gh_2.89.0_linux_arm64.tar.gz" +provenance = "github-attestations" + +[tools.github-cli."platforms.linux-x64"] +checksum = "sha256:d0422caade520530e76c1c558da47daebaa8e1203d6b7ff10ad7d6faba3490d8" +url = "https://github.com/cli/cli/releases/download/v2.89.0/gh_2.89.0_linux_amd64.tar.gz" +provenance = "github-attestations" + +[tools.github-cli."platforms.linux-x64-musl"] +checksum = "sha256:d0422caade520530e76c1c558da47daebaa8e1203d6b7ff10ad7d6faba3490d8" +url = "https://github.com/cli/cli/releases/download/v2.89.0/gh_2.89.0_linux_amd64.tar.gz" +provenance = "github-attestations" + +[tools.github-cli."platforms.macos-arm64"] +checksum = "sha256:2423d02ec0a2094898c378703a1b28a5846c08700f87461363857cb8cb3fda94" +url = "https://github.com/cli/cli/releases/download/v2.89.0/gh_2.89.0_macOS_arm64.zip" +provenance = "github-attestations" + +[tools.github-cli."platforms.macos-x64"] +checksum = "sha256:862e21cac6a71f81e7cd6e5127e3cd344f8537441ad2db94cd208319dd17b6e9" +url = "https://github.com/cli/cli/releases/download/v2.89.0/gh_2.89.0_macOS_amd64.zip" +provenance = "github-attestations" + +[tools.github-cli."platforms.windows-x64"] +checksum = "sha256:f3326f6406c98c99701b257d77e2b040eedf83d0176c2c2e9809aa339ceb467e" +url = "https://github.com/cli/cli/releases/download/v2.89.0/gh_2.89.0_windows_amd64.zip" +provenance = "github-attestations" + +[[tools.jq]] +version = "1.8.1" +backend = "aqua:jqlang/jq" + +[tools.jq."platforms.linux-arm64"] +checksum = "sha256:6bc62f25981328edd3cfcfe6fe51b073f2d7e7710d7ef7fcdac28d4e384fc3d4" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-linux-arm64" +provenance = "github-attestations" + +[tools.jq."platforms.linux-arm64-musl"] +checksum = "sha256:6bc62f25981328edd3cfcfe6fe51b073f2d7e7710d7ef7fcdac28d4e384fc3d4" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-linux-arm64" +provenance = "github-attestations" + +[tools.jq."platforms.linux-x64"] +checksum = "sha256:020468de7539ce70ef1bceaf7cde2e8c4f2ca6c3afb84642aabc5c97d9fc2a0d" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-linux-amd64" +provenance = "github-attestations" + +[tools.jq."platforms.linux-x64-musl"] +checksum = "sha256:020468de7539ce70ef1bceaf7cde2e8c4f2ca6c3afb84642aabc5c97d9fc2a0d" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-linux-amd64" +provenance = "github-attestations" + +[tools.jq."platforms.macos-arm64"] +checksum = "sha256:a9fe3ea2f86dfc72f6728417521ec9067b343277152b114f4e98d8cb0e263603" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-macos-arm64" +provenance = "github-attestations" + +[tools.jq."platforms.macos-x64"] +checksum = "sha256:e80dbe0d2a2597e3c11c404f03337b981d74b4a8504b70586c354b7697a7c27f" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-macos-amd64" +provenance = "github-attestations" + +[tools.jq."platforms.windows-x64"] +checksum = "sha256:23cb60a1354eed6bcc8d9b9735e8c7b388cd1fdcb75726b93bc299ef22dd9334" +url = "https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-windows-amd64.exe" +provenance = "github-attestations" + +[[tools.pre-commit]] +version = "4.5.1" +backend = "aqua:pre-commit/pre-commit" + +[tools.pre-commit."platforms.linux-arm64"] +checksum = "sha256:19d00ba35cbf9c04dc3736cd8bb641e8633f3eddd4cedde71809574ae68a2cd1" +url = "https://github.com/pre-commit/pre-commit/releases/download/v4.5.1/pre-commit-4.5.1.pyz" + +[tools.pre-commit."platforms.linux-arm64-musl"] +checksum = "sha256:19d00ba35cbf9c04dc3736cd8bb641e8633f3eddd4cedde71809574ae68a2cd1" +url = "https://github.com/pre-commit/pre-commit/releases/download/v4.5.1/pre-commit-4.5.1.pyz" + +[tools.pre-commit."platforms.linux-x64"] +checksum = "sha256:19d00ba35cbf9c04dc3736cd8bb641e8633f3eddd4cedde71809574ae68a2cd1" +url = "https://github.com/pre-commit/pre-commit/releases/download/v4.5.1/pre-commit-4.5.1.pyz" + +[tools.pre-commit."platforms.linux-x64-musl"] +checksum = "sha256:19d00ba35cbf9c04dc3736cd8bb641e8633f3eddd4cedde71809574ae68a2cd1" +url = "https://github.com/pre-commit/pre-commit/releases/download/v4.5.1/pre-commit-4.5.1.pyz" + +[tools.pre-commit."platforms.macos-arm64"] +checksum = "sha256:19d00ba35cbf9c04dc3736cd8bb641e8633f3eddd4cedde71809574ae68a2cd1" +url = "https://github.com/pre-commit/pre-commit/releases/download/v4.5.1/pre-commit-4.5.1.pyz" + +[tools.pre-commit."platforms.macos-x64"] +checksum = "sha256:19d00ba35cbf9c04dc3736cd8bb641e8633f3eddd4cedde71809574ae68a2cd1" +url = "https://github.com/pre-commit/pre-commit/releases/download/v4.5.1/pre-commit-4.5.1.pyz" + +[[tools.python]] +version = "3.13.12" +backend = "core:python" + +[tools.python."platforms.linux-arm64"] +checksum = "sha256:8abd16da242b56902a13ba1d6e3aaa0d6d19dfc71547f04e437f707fde179915" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.13.12+20260324-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" +provenance = "github-attestations" + +[tools.python."platforms.linux-arm64-musl"] +checksum = "sha256:8abd16da242b56902a13ba1d6e3aaa0d6d19dfc71547f04e437f707fde179915" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.13.12+20260324-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz" +provenance = "github-attestations" + +[tools.python."platforms.linux-x64"] +checksum = "sha256:91c8920a197c3f7c562612eb4537e18e0c52c1c918bb39912cbd0b1b3c154229" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.13.12+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +provenance = "github-attestations" + +[tools.python."platforms.linux-x64-musl"] +checksum = "sha256:91c8920a197c3f7c562612eb4537e18e0c52c1c918bb39912cbd0b1b3c154229" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.13.12+20260324-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" +provenance = "github-attestations" + +[tools.python."platforms.macos-arm64"] +checksum = "sha256:3de7059c87c0e7604e3e9397a547020c161f65c8d51d358e230d3a7d3a456aaa" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.13.12+20260324-aarch64-apple-darwin-install_only_stripped.tar.gz" +provenance = "github-attestations" + +[tools.python."platforms.macos-x64"] +checksum = "sha256:7fd6a6162ef82c9df74eb1c71dad042a05960869789915b1203d61f14d3dc629" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.13.12+20260324-x86_64-apple-darwin-install_only_stripped.tar.gz" +provenance = "github-attestations" + +[tools.python."platforms.windows-x64"] +checksum = "sha256:b11194bd12122df60e24b08a71a4e5aaa1d15c2cf436f7f4f57bbea4f9a80462" +url = "https://github.com/astral-sh/python-build-standalone/releases/download/20260324/cpython-3.13.12+20260324-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" +provenance = "github-attestations" + +[[tools.ripgrep]] +version = "15.1.0" +backend = "aqua:BurntSushi/ripgrep" + +[tools.ripgrep."platforms.linux-arm64"] +checksum = "sha256:2b661c6ef508e902f388e9098d9c4c5aca72c87b55922d94abdba830b4dc885e" +url = "https://github.com/BurntSushi/ripgrep/releases/download/15.1.0/ripgrep-15.1.0-aarch64-unknown-linux-gnu.tar.gz" + +[tools.ripgrep."platforms.linux-arm64-musl"] +checksum = "sha256:2b661c6ef508e902f388e9098d9c4c5aca72c87b55922d94abdba830b4dc885e" +url = "https://github.com/BurntSushi/ripgrep/releases/download/15.1.0/ripgrep-15.1.0-aarch64-unknown-linux-gnu.tar.gz" + +[tools.ripgrep."platforms.linux-x64"] +checksum = "sha256:1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599" +url = "https://github.com/BurntSushi/ripgrep/releases/download/15.1.0/ripgrep-15.1.0-x86_64-unknown-linux-musl.tar.gz" + +[tools.ripgrep."platforms.linux-x64-musl"] +checksum = "sha256:1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599" +url = "https://github.com/BurntSushi/ripgrep/releases/download/15.1.0/ripgrep-15.1.0-x86_64-unknown-linux-musl.tar.gz" + +[tools.ripgrep."platforms.macos-arm64"] +checksum = "sha256:378e973289176ca0c6054054ee7f631a065874a352bf43f0fa60ef079b6ba715" +url = "https://github.com/BurntSushi/ripgrep/releases/download/15.1.0/ripgrep-15.1.0-aarch64-apple-darwin.tar.gz" + +[tools.ripgrep."platforms.macos-x64"] +checksum = "sha256:64811cb24e77cac3057d6c40b63ac9becf9082eedd54ca411b475b755d334882" +url = "https://github.com/BurntSushi/ripgrep/releases/download/15.1.0/ripgrep-15.1.0-x86_64-apple-darwin.tar.gz" + +[tools.ripgrep."platforms.windows-x64"] +checksum = "sha256:124510b94b6baa3380d051fdf4650eaa80a302c876d611e9dba0b2e18d87493a" +url = "https://github.com/BurntSushi/ripgrep/releases/download/15.1.0/ripgrep-15.1.0-x86_64-pc-windows-msvc.zip" + +[[tools.ruff]] +version = "0.15.9" +backend = "aqua:astral-sh/ruff" + +[tools.ruff."platforms.linux-arm64"] +checksum = "sha256:e017dd0c1fd7475aaddc49bde8cddcee3c27d42f6ce139a96df0c1022e06d85b" +url = "https://github.com/astral-sh/ruff/releases/download/0.15.9/ruff-aarch64-unknown-linux-musl.tar.gz" +provenance = "github-attestations" + +[tools.ruff."platforms.linux-arm64-musl"] +checksum = "sha256:e017dd0c1fd7475aaddc49bde8cddcee3c27d42f6ce139a96df0c1022e06d85b" +url = "https://github.com/astral-sh/ruff/releases/download/0.15.9/ruff-aarch64-unknown-linux-musl.tar.gz" +provenance = "github-attestations" + +[tools.ruff."platforms.linux-x64"] +checksum = "sha256:e30e6e50dbf925b42335f28e2fa296d404294f294159b314dca47b88317fc477" +url = "https://github.com/astral-sh/ruff/releases/download/0.15.9/ruff-x86_64-unknown-linux-musl.tar.gz" +provenance = "github-attestations" + +[tools.ruff."platforms.linux-x64-musl"] +checksum = "sha256:e30e6e50dbf925b42335f28e2fa296d404294f294159b314dca47b88317fc477" +url = "https://github.com/astral-sh/ruff/releases/download/0.15.9/ruff-x86_64-unknown-linux-musl.tar.gz" +provenance = "github-attestations" + +[tools.ruff."platforms.macos-arm64"] +checksum = "sha256:013d878f17c625550e4a6b19235c22fc229639f66f563bb72cb2c896aeca11e8" +url = "https://github.com/astral-sh/ruff/releases/download/0.15.9/ruff-aarch64-apple-darwin.tar.gz" +provenance = "github-attestations" + +[tools.ruff."platforms.macos-x64"] +checksum = "sha256:7e0fe9daba25848f85cb3d43e47ecd7d23f14e92e8799f92c1bcd8319a4ce4f8" +url = "https://github.com/astral-sh/ruff/releases/download/0.15.9/ruff-x86_64-apple-darwin.tar.gz" +provenance = "github-attestations" + +[tools.ruff."platforms.windows-x64"] +checksum = "sha256:e38fddd19805bc8f7329003c2abdaf49d8ca9e5bc0c6702e8472e16f127bcd44" +url = "https://github.com/astral-sh/ruff/releases/download/0.15.9/ruff-x86_64-pc-windows-msvc.zip" +provenance = "github-attestations" + +[[tools.starship]] +version = "1.24.2" +backend = "aqua:starship/starship" + +[tools.starship."platforms.linux-arm64"] +checksum = "sha256:56b9ff412bbf374d29b99e5ac09a849124cb37a0a13121e8470df32de53c1ea6" +url = "https://github.com/starship/starship/releases/download/v1.24.2/starship-aarch64-unknown-linux-musl.tar.gz" + +[tools.starship."platforms.linux-arm64-musl"] +checksum = "sha256:56b9ff412bbf374d29b99e5ac09a849124cb37a0a13121e8470df32de53c1ea6" +url = "https://github.com/starship/starship/releases/download/v1.24.2/starship-aarch64-unknown-linux-musl.tar.gz" + +[tools.starship."platforms.linux-x64"] +checksum = "sha256:00ff3c1f8ffb59b5c15d4b44c076bcca04d92cf0055c86b916248c14f3ae714a" +url = "https://github.com/starship/starship/releases/download/v1.24.2/starship-x86_64-unknown-linux-musl.tar.gz" + +[tools.starship."platforms.linux-x64-musl"] +checksum = "sha256:00ff3c1f8ffb59b5c15d4b44c076bcca04d92cf0055c86b916248c14f3ae714a" +url = "https://github.com/starship/starship/releases/download/v1.24.2/starship-x86_64-unknown-linux-musl.tar.gz" + +[tools.starship."platforms.macos-arm64"] +checksum = "sha256:d3a0da21374962625a2ee992110979bc1fa33424d7b6aea58a70405e26544fd9" +url = "https://github.com/starship/starship/releases/download/v1.24.2/starship-aarch64-apple-darwin.tar.gz" + +[tools.starship."platforms.macos-x64"] +checksum = "sha256:237beb10cc970c4361536e9f9f434dfed755f8282c5cd951b6a7e3fcbda8e779" +url = "https://github.com/starship/starship/releases/download/v1.24.2/starship-x86_64-apple-darwin.tar.gz" + +[tools.starship."platforms.windows-x64"] +checksum = "sha256:d38424c595320e9639a276666347d5e44cb004de3972f40b9a5a1a6b88537f29" +url = "https://github.com/starship/starship/releases/download/v1.24.2/starship-x86_64-pc-windows-msvc.zip" + +[[tools.trivy]] +version = "v0.69.3" +backend = "aqua:aquasecurity/trivy" + +[tools.trivy."platforms.linux-arm64"] +checksum = "sha256:7e3924a974e912e57b4a99f65ece7931f8079584dae12eb7845024f97087bdfd" +url = "https://github.com/aquasecurity/trivy/releases/download/v0.69.3/trivy_0.69.3_Linux-ARM64.tar.gz" +provenance = "cosign" + +[tools.trivy."platforms.linux-arm64-musl"] +checksum = "sha256:7e3924a974e912e57b4a99f65ece7931f8079584dae12eb7845024f97087bdfd" +url = "https://github.com/aquasecurity/trivy/releases/download/v0.69.3/trivy_0.69.3_Linux-ARM64.tar.gz" +provenance = "cosign" + +[tools.trivy."platforms.linux-x64"] +checksum = "sha256:1816b632dfe529869c740c0913e36bd1629cb7688bd5634f4a858c1d57c88b75" +url = "https://github.com/aquasecurity/trivy/releases/download/v0.69.3/trivy_0.69.3_Linux-64bit.tar.gz" +provenance = "cosign" + +[tools.trivy."platforms.linux-x64-musl"] +checksum = "sha256:1816b632dfe529869c740c0913e36bd1629cb7688bd5634f4a858c1d57c88b75" +url = "https://github.com/aquasecurity/trivy/releases/download/v0.69.3/trivy_0.69.3_Linux-64bit.tar.gz" +provenance = "cosign" + +[tools.trivy."platforms.macos-arm64"] +checksum = "sha256:a2f2179afd4f8bb265ca3c7aefb56a666bc4a9a411663bc0f22c3549fbc643a5" +url = "https://github.com/aquasecurity/trivy/releases/download/v0.69.3/trivy_0.69.3_macOS-ARM64.tar.gz" +provenance = "cosign" + +[tools.trivy."platforms.macos-x64"] +checksum = "sha256:fec4a9f7569b624dd9d044fca019e5da69e032700edbb1d7318972c448ec2f4e" +url = "https://github.com/aquasecurity/trivy/releases/download/v0.69.3/trivy_0.69.3_macOS-64bit.tar.gz" +provenance = "cosign" + +[tools.trivy."platforms.windows-x64"] +checksum = "sha256:74362dc711383255308230ecbeb587eb1e4e83a8d332be5b0259afac6e0c2224" +url = "https://github.com/aquasecurity/trivy/releases/download/v0.69.3/trivy_0.69.3_windows-64bit.zip" +provenance = "cosign" + +[[tools.uv]] +version = "0.9.7" +backend = "aqua:astral-sh/uv" + +[tools.uv."platforms.linux-arm64"] +checksum = "sha256:f177397625eb7ca184a01a954c7d487d1cafee68e53cc8f2789a02f75c347a68" +url = "https://github.com/astral-sh/uv/releases/download/0.9.7/uv-aarch64-unknown-linux-musl.tar.gz" + +[tools.uv."platforms.linux-arm64-musl"] +checksum = "sha256:f177397625eb7ca184a01a954c7d487d1cafee68e53cc8f2789a02f75c347a68" +url = "https://github.com/astral-sh/uv/releases/download/0.9.7/uv-aarch64-unknown-linux-musl.tar.gz" + +[tools.uv."platforms.linux-x64"] +checksum = "sha256:611d1e4d340c6b78519891d37d512c184f58b5424aebecdbd983585ff659ff16" +url = "https://github.com/astral-sh/uv/releases/download/0.9.7/uv-x86_64-unknown-linux-musl.tar.gz" + +[tools.uv."platforms.linux-x64-musl"] +checksum = "sha256:611d1e4d340c6b78519891d37d512c184f58b5424aebecdbd983585ff659ff16" +url = "https://github.com/astral-sh/uv/releases/download/0.9.7/uv-x86_64-unknown-linux-musl.tar.gz" + +[tools.uv."platforms.macos-arm64"] +checksum = "sha256:35572b9619fc14d67fc1cd72582c3cfc5c9c66d97f310192e04f26fb3fe96005" +url = "https://github.com/astral-sh/uv/releases/download/0.9.7/uv-aarch64-apple-darwin.tar.gz" + +[tools.uv."platforms.macos-x64"] +checksum = "sha256:41946d87e1576c297d6d3cca88b089b6942b8777a5a25e70de1ef8c57b94b9cf" +url = "https://github.com/astral-sh/uv/releases/download/0.9.7/uv-x86_64-apple-darwin.tar.gz" + +[tools.uv."platforms.windows-x64"] +checksum = "sha256:5d250c32d3604e28dbe18dc65c668ff628c53e00dde2c642576e831e4a60da64" +url = "https://github.com/astral-sh/uv/releases/download/0.9.7/uv-x86_64-pc-windows-msvc.zip" diff --git a/pyproject.toml b/pyproject.toml index 4075f4a..43bb2bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Topic :: Office/Business :: Scheduling", - "Topic :: Utilities", + "Topic :: Utilities", ] dependencies = [ "pydantic>=2.0", @@ -53,3 +53,8 @@ packages = ["dispatchio"] [tool.pytest.ini_options] testpaths = ["tests"] + +[dependency-groups] +dev = [ + "pytest>=9.0.2", +] diff --git a/release-please-config.json b/release-please-config.json new file mode 100644 index 0000000..67a2be1 --- /dev/null +++ b/release-please-config.json @@ -0,0 +1,18 @@ +{ + "release-type": "simple", + "bump-minor-pre-major": true, + "bump-patch-for-minor-pre-major": true, + "changelog-sections": [ + {"type": "feat", "section": "Features", "hidden": false}, + {"type": "fix", "section": "Bug Fixes", "hidden": false}, + {"type": "perf", "section": "Performance Improvements", "hidden": false}, + {"type": "docs", "section": "Documentation", "hidden": false}, + {"type": "refactor", "section": "Code Refactoring", "hidden": false}, + {"type": "test", "section": "Tests", "hidden": false}, + {"type": "ci", "section": "Continuous Integration", "hidden": false}, + {"type": "chore", "section": "Miscellaneous", "hidden": true} + ], + "release-search-depth": 100, + "draft": false, + "prerelease": false +} diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..0e55507 --- /dev/null +++ b/renovate.json @@ -0,0 +1,24 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": ["config:recommended"], + "labels": ["dependencies"], + "commitMessagePrefix": "chore:", + "schedule": ["every weekend"], + "packageRules": [ + { + "matchManagers": ["github-actions"], + "pinDigests": true, + "automerge": false + }, + { + "matchManagers": ["terraform"], + "groupName": "terraform providers", + "automerge": false + }, + { + "matchManagers": ["pre-commit"], + "groupName": "pre-commit hooks", + "automerge": false + } + ] +} diff --git a/scripts/build_cookbook.py b/scripts/build_cookbook.py index d6a3618..46fe4f3 100644 --- a/scripts/build_cookbook.py +++ b/scripts/build_cookbook.py @@ -18,16 +18,16 @@ import tomllib from pathlib import Path -ROOT = Path(__file__).parent.parent +ROOT = Path(__file__).parent.parent EXAMPLES_DIR = ROOT / "examples" -DEFAULT_OUT = ROOT / "COOKBOOK.md" +DEFAULT_OUT = ROOT / "COOKBOOK.md" _FENCE_LANG = { - ".py": "python", + ".py": "python", ".toml": "toml", - ".sh": "bash", + ".sh": "bash", ".yaml": "yaml", - ".yml": "yaml", + ".yml": "yaml", ".json": "json", } @@ -85,7 +85,9 @@ def build(output: Path = DEFAULT_OUT) -> None: for toml_path in EXAMPLES_DIR.glob("*/example.toml"): with toml_path.open("rb") as fh: meta = tomllib.load(fh) - entries.append((meta.get("order", 999), toml_path.parent.name, toml_path.parent, meta)) + entries.append( + (meta.get("order", 999), toml_path.parent.name, toml_path.parent, meta) + ) entries.sort(key=lambda e: (e[0], e[1])) @@ -98,7 +100,14 @@ def build(output: Path = DEFAULT_OUT) -> None: if __name__ == "__main__": - parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument("--output", type=Path, default=DEFAULT_OUT, metavar="PATH", - help=f"destination file (default: {DEFAULT_OUT.name})") + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "--output", + type=Path, + default=DEFAULT_OUT, + metavar="PATH", + help=f"destination file (default: {DEFAULT_OUT.name})", + ) build(parser.parse_args().output) diff --git a/tests/conftest.py b/tests/conftest.py index ca68c6c..a7196b8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,7 +6,6 @@ from datetime import datetime, timezone from dispatchio.state.memory import MemoryStateStore -from dispatchio.executor.subprocess_ import SubprocessExecutor from dispatchio.models import Job, SubprocessJob diff --git a/tests/test_conditions.py b/tests/test_conditions.py index 105ed70..296fdb5 100644 --- a/tests/test_conditions.py +++ b/tests/test_conditions.py @@ -24,15 +24,20 @@ ) # Convenience reference times -_MON_09_30 = datetime(2025, 1, 13, 9, 30, tzinfo=timezone.utc) # Monday, day 13, 09:30 -_WED_06_00 = datetime(2025, 1, 15, 6, 0, tzinfo=timezone.utc) # Wednesday, day 15, 06:00 -_SAT_23_59 = datetime(2025, 1, 18, 23, 59, tzinfo=timezone.utc) # Saturday, day 18, 23:59 +_MON_09_30 = datetime(2025, 1, 13, 9, 30, tzinfo=timezone.utc) # Monday, day 13, 09:30 +_WED_06_00 = datetime( + 2025, 1, 15, 6, 0, tzinfo=timezone.utc +) # Wednesday, day 15, 06:00 +_SAT_23_59 = datetime( + 2025, 1, 18, 23, 59, tzinfo=timezone.utc +) # Saturday, day 18, 23:59 # --------------------------------------------------------------------------- # TimeOfDayCondition # --------------------------------------------------------------------------- + class TestTimeOfDayCondition: def test_after_met_exactly(self): cond = TimeOfDayCondition(after=time(9, 30)) @@ -71,10 +76,11 @@ def test_requires_at_least_one_field(self): # MinuteOfHourCondition # --------------------------------------------------------------------------- + class TestMinuteOfHourCondition: def test_after_met(self): cond = MinuteOfHourCondition(after=30) - assert cond.is_met(_MON_09_30, DAILY) is True # minute=30 + assert cond.is_met(_MON_09_30, DAILY) is True # minute=30 def test_after_not_met(self): cond = MinuteOfHourCondition(after=31) @@ -102,10 +108,11 @@ def test_requires_at_least_one_field(self): # DayOfMonthCondition # --------------------------------------------------------------------------- + class TestDayOfMonthCondition: def test_after_met(self): cond = DayOfMonthCondition(after=13) - assert cond.is_met(_MON_09_30, DAILY) is True # day=13 + assert cond.is_met(_MON_09_30, DAILY) is True # day=13 def test_after_not_met(self): cond = DayOfMonthCondition(after=14) @@ -121,10 +128,10 @@ def test_before_at_boundary_excluded(self): def test_window(self): cond = DayOfMonthCondition(after=10, before=20) - assert cond.is_met(_MON_09_30, DAILY) is True # day=13 - assert cond.is_met(_SAT_23_59, DAILY) is True # day=18 + assert cond.is_met(_MON_09_30, DAILY) is True # day=13 + assert cond.is_met(_SAT_23_59, DAILY) is True # day=18 feb_1 = datetime(2025, 2, 1, tzinfo=timezone.utc) - assert cond.is_met(feb_1, DAILY) is False # day=1 + assert cond.is_met(feb_1, DAILY) is False # day=1 def test_cadence_agnostic(self): # DayOfMonthCondition doesn't use cadence — same result with MONTHLY @@ -140,15 +147,16 @@ def test_requires_at_least_one_field(self): # DayOfWeekCondition # --------------------------------------------------------------------------- + class TestDayOfWeekCondition: def test_weekday_included(self): cond = DayOfWeekCondition(on_days=[0, 1, 2, 3, 4]) - assert cond.is_met(_MON_09_30, DAILY) is True # Monday = 0 - assert cond.is_met(_WED_06_00, DAILY) is True # Wednesday = 2 + assert cond.is_met(_MON_09_30, DAILY) is True # Monday = 0 + assert cond.is_met(_WED_06_00, DAILY) is True # Wednesday = 2 def test_weekend_excluded(self): cond = DayOfWeekCondition(on_days=[0, 1, 2, 3, 4]) - assert cond.is_met(_SAT_23_59, DAILY) is False # Saturday = 5 + assert cond.is_met(_SAT_23_59, DAILY) is False # Saturday = 5 def test_specific_days(self): mon_wed_fri = DayOfWeekCondition(on_days=[0, 2, 4]) @@ -161,27 +169,34 @@ def test_specific_days(self): # AllOf (AND composite) # --------------------------------------------------------------------------- + class TestAllOf: def test_all_met(self): - cond = AllOf(conditions=[ - TimeOfDayCondition(after=time(9, 0)), - DayOfWeekCondition(on_days=[0, 1, 2, 3, 4]), - ]) + cond = AllOf( + conditions=[ + TimeOfDayCondition(after=time(9, 0)), + DayOfWeekCondition(on_days=[0, 1, 2, 3, 4]), + ] + ) assert cond.is_met(_MON_09_30, DAILY) is True def test_one_not_met(self): - cond = AllOf(conditions=[ - TimeOfDayCondition(after=time(9, 0)), - DayOfWeekCondition(on_days=[0, 1, 2, 3, 4]), - ]) - assert cond.is_met(_SAT_23_59, DAILY) is False # weekend + cond = AllOf( + conditions=[ + TimeOfDayCondition(after=time(9, 0)), + DayOfWeekCondition(on_days=[0, 1, 2, 3, 4]), + ] + ) + assert cond.is_met(_SAT_23_59, DAILY) is False # weekend def test_none_met(self): - cond = AllOf(conditions=[ - TimeOfDayCondition(after=time(10, 0)), - DayOfWeekCondition(on_days=[0, 1, 2, 3, 4]), - ]) - assert cond.is_met(_MON_09_30, DAILY) is False # before 10:00 + cond = AllOf( + conditions=[ + TimeOfDayCondition(after=time(10, 0)), + DayOfWeekCondition(on_days=[0, 1, 2, 3, 4]), + ] + ) + assert cond.is_met(_MON_09_30, DAILY) is False # before 10:00 def test_empty_is_true(self): assert AllOf(conditions=[]).is_met(_MON_09_30, DAILY) is True @@ -191,19 +206,24 @@ def test_empty_is_true(self): # AnyOf (OR composite) # --------------------------------------------------------------------------- + class TestAnyOf: def test_one_met(self): - cond = AnyOf(conditions=[ - TimeOfDayCondition(after=time(10, 0)), # not met (09:30) - DayOfWeekCondition(on_days=[0, 1, 2, 3, 4]), # met (Monday) - ]) + cond = AnyOf( + conditions=[ + TimeOfDayCondition(after=time(10, 0)), # not met (09:30) + DayOfWeekCondition(on_days=[0, 1, 2, 3, 4]), # met (Monday) + ] + ) assert cond.is_met(_MON_09_30, DAILY) is True def test_none_met(self): - cond = AnyOf(conditions=[ - TimeOfDayCondition(after=time(10, 0)), - DayOfWeekCondition(on_days=[5, 6]), # weekend only - ]) + cond = AnyOf( + conditions=[ + TimeOfDayCondition(after=time(10, 0)), + DayOfWeekCondition(on_days=[5, 6]), # weekend only + ] + ) assert cond.is_met(_MON_09_30, DAILY) is False # 09:30, Monday def test_empty_is_false(self): @@ -214,12 +234,15 @@ def test_empty_is_false(self): # Nested composites # --------------------------------------------------------------------------- + class TestNested: def test_allof_inside_anyof(self): - weekday_morning = AllOf(conditions=[ - TimeOfDayCondition(after=time(6, 0)), - DayOfWeekCondition(on_days=[0, 1, 2, 3, 4]), - ]) + weekday_morning = AllOf( + conditions=[ + TimeOfDayCondition(after=time(6, 0)), + DayOfWeekCondition(on_days=[0, 1, 2, 3, 4]), + ] + ) weekend_any = DayOfWeekCondition(on_days=[5, 6]) cond = AnyOf(conditions=[weekday_morning, weekend_any]) diff --git a/tests/test_config.py b/tests/test_config.py index 2ca9051..84a289b 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -5,22 +5,24 @@ from __future__ import annotations -import os import textwrap from pathlib import Path import pytest -from dispatchio.config.settings import DispatchioSettings, ReceiverSettings, StateSettings +from dispatchio.config.settings import ( + DispatchioSettings, + ReceiverSettings, + StateSettings, +) from dispatchio.config.loader import ( _find_config_file, - _read_toml, load_config, orchestrator_from_config, ) from dispatchio.models import Job, SubprocessJob from dispatchio.orchestrator import Orchestrator -from dispatchio.state import FilesystemStateStore, MemoryStateStore +from dispatchio.state import FilesystemStateStore from dispatchio.receiver import FilesystemReceiver @@ -28,6 +30,7 @@ # Fixtures # --------------------------------------------------------------------------- + def _toml(content: str, path: Path) -> Path: """Write TOML content to a file and return the path.""" path.write_text(textwrap.dedent(content)) @@ -46,6 +49,7 @@ def simple_job(): # Default settings # --------------------------------------------------------------------------- + class TestDefaultSettings: def test_log_level_default(self): s = DispatchioSettings() @@ -66,6 +70,7 @@ def test_receiver_backend_default(self): # Environment variable overrides # --------------------------------------------------------------------------- + class TestEnvVarOverrides: def test_top_level_env_var(self, monkeypatch): monkeypatch.setenv("DISPATCHIO_LOG_LEVEL", "DEBUG") @@ -95,22 +100,28 @@ def test_nested_receiver_drop_dir(self, monkeypatch): def test_case_insensitive(self, monkeypatch): monkeypatch.setenv("DISPATCHIO_LOG_LEVEL", "warning") s = DispatchioSettings() - assert s.log_level == "warning" # stored as-is; logging.getLevelName handles case + assert ( + s.log_level == "warning" + ) # stored as-is; logging.getLevelName handles case # --------------------------------------------------------------------------- # TOML loading # --------------------------------------------------------------------------- + class TestTomlLoading: def test_bare_toml_file(self, tmp_path): - f = _toml(""" + f = _toml( + """ log_level = "DEBUG" [state] backend = "memory" [receiver] backend = "none" - """, tmp_path / "dispatchio.toml") + """, + tmp_path / "dispatchio.toml", + ) s = load_config(f) assert s.log_level == "DEBUG" assert s.state.backend == "memory" @@ -118,7 +129,8 @@ def test_bare_toml_file(self, tmp_path): def test_dispatchio_section_in_toml(self, tmp_path): """[dispatchio] section should be extracted from a larger file.""" - f = _toml(""" + f = _toml( + """ [tool.something] foo = "bar" @@ -127,15 +139,20 @@ def test_dispatchio_section_in_toml(self, tmp_path): [dispatchio.state] backend = "memory" - """, tmp_path / "pyproject.toml") + """, + tmp_path / "pyproject.toml", + ) s = load_config(f) assert s.log_level == "WARNING" assert s.state.backend == "memory" def test_missing_keys_use_defaults(self, tmp_path): - f = _toml(""" + f = _toml( + """ log_level = "DEBUG" - """, tmp_path / "dispatchio.toml") + """, + tmp_path / "dispatchio.toml", + ) s = load_config(f) assert s.state.backend == "filesystem" # default preserved @@ -148,6 +165,7 @@ def test_file_not_found_raises(self): # Config file resolution # --------------------------------------------------------------------------- + class TestConfigFileResolution: def test_explicit_path_wins(self, tmp_path): f = _toml('log_level = "DEBUG"', tmp_path / "explicit.toml") @@ -197,33 +215,38 @@ def test_find_config_file_none_when_no_file(self, tmp_path, monkeypatch): # Priority: env vars override TOML # --------------------------------------------------------------------------- + class TestPriority: def test_env_overrides_toml(self, tmp_path, monkeypatch): f = _toml('log_level = "DEBUG"', tmp_path / "dispatchio.toml") monkeypatch.setenv("DISPATCHIO_LOG_LEVEL", "ERROR") s = load_config(f) - assert s.log_level == "ERROR" # env wins + assert s.log_level == "ERROR" # env wins def test_env_overrides_nested_toml(self, tmp_path, monkeypatch): - f = _toml(""" + f = _toml( + """ [state] backend = "filesystem" root = "/from/toml" - """, tmp_path / "dispatchio.toml") + """, + tmp_path / "dispatchio.toml", + ) monkeypatch.setenv("DISPATCHIO_STATE__ROOT", "/from/env") s = load_config(f) - assert s.state.root == "/from/env" # env wins + assert s.state.root == "/from/env" # env wins def test_toml_overrides_defaults(self, tmp_path): f = _toml('log_level = "WARNING"', tmp_path / "dispatchio.toml") s = load_config(f) - assert s.log_level == "WARNING" # toml wins over default "INFO" + assert s.log_level == "WARNING" # toml wins over default "INFO" # --------------------------------------------------------------------------- # orchestrator_from_config # --------------------------------------------------------------------------- + class TestOrchestratorFromConfig: def test_returns_orchestrator(self, simple_job, tmp_path): settings = DispatchioSettings( @@ -248,6 +271,7 @@ def test_memory_state_backend(self, simple_job): ) orch = orchestrator_from_config([simple_job], config=settings) from dispatchio.state.memory import MemoryStateStore + assert isinstance(orch.state, MemoryStateStore) def test_filesystem_receiver(self, simple_job, tmp_path): @@ -270,12 +294,15 @@ def test_no_receiver_when_none(self, simple_job): assert orch.receiver is None def test_accepts_path_to_toml(self, simple_job, tmp_path): - f = _toml(""" + f = _toml( + """ [state] backend = "memory" [receiver] backend = "none" - """, tmp_path / "dispatchio.toml") + """, + tmp_path / "dispatchio.toml", + ) orch = orchestrator_from_config([simple_job], config=f) assert isinstance(orch, Orchestrator) @@ -288,8 +315,17 @@ def test_jobs_are_passed_through(self, simple_job): assert len(orch.jobs) == 1 assert orch.jobs[0].name == "j" + def test_jobs_default_to_empty_list(self): + settings = DispatchioSettings( + state=StateSettings(backend="memory"), + receiver=ReceiverSettings(backend="none"), + ) + orch = orchestrator_from_config(config=settings) + assert len(orch.jobs) == 0 + def test_orchestrator_kwargs_forwarded(self, simple_job): from dispatchio.alerts.base import LogAlertHandler + handler = LogAlertHandler() settings = DispatchioSettings( state=StateSettings(backend="memory"), @@ -303,8 +339,9 @@ def test_orchestrator_kwargs_forwarded(self, simple_job): def test_unknown_state_backend_raises(self, simple_job): settings = DispatchioSettings.__new__(DispatchioSettings) object.__setattr__(settings, "log_level", "INFO") - object.__setattr__(settings, "state", - StateSettings.model_construct(backend="unknown")) # type: ignore + object.__setattr__( + settings, "state", StateSettings.model_construct(backend="unknown") + ) # type: ignore object.__setattr__(settings, "receiver", ReceiverSettings(backend="none")) with pytest.raises(ValueError, match="Unknown state backend"): orchestrator_from_config([simple_job], config=settings) diff --git a/tests/test_models.py b/tests/test_models.py index 6316711..b82ba21 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -5,8 +5,6 @@ from dispatchio.cadence import DAILY, MONTHLY from dispatchio.conditions import TimeOfDayCondition from dispatchio.models import ( - AlertCondition, - AlertOn, Dependency, HeartbeatPolicy, HttpJob, @@ -14,7 +12,6 @@ Job, JobTickResult, PythonJob, - RetryPolicy, RunRecord, Status, SubprocessJob, diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index d07fae6..4ef05aa 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -8,13 +8,11 @@ from __future__ import annotations from datetime import datetime, time, timedelta, timezone -from typing import Any -from unittest.mock import MagicMock import pytest -from dispatchio.alerts.base import AlertEvent, AlertHandler -from dispatchio.cadence import DAILY, MONTHLY, WEEKLY, YESTERDAY, DateCadence, Frequency +from dispatchio.alerts.base import AlertEvent +from dispatchio.cadence import DAILY, MONTHLY, YESTERDAY, DateCadence, Frequency from dispatchio.conditions import DayOfWeekCondition, TimeOfDayCondition from dispatchio.models import ( AlertCondition, @@ -152,7 +150,7 @@ def test_job_blocked_before_time(self): def test_job_runs_after_time(self): j = _job("timed", condition=TimeOfDayCondition(after=time(8, 0))) orch, store, executor = _make_orch([j]) - result = orch.tick(REF) # 09:00 >= 08:00 + orch.tick(REF) # 09:00 >= 08:00 assert len(executor.calls) == 1 def test_day_of_week_condition_blocks_on_wrong_day(self): @@ -181,7 +179,9 @@ def test_blocked_when_dependency_not_met(self): downstream = _job("down", depends_on=[Dependency(job_name="up", cadence=DAILY)]) orch, store, executor = _make_orch([upstream, downstream]) store.put(RunRecord(job_name="up", run_id="20250115", status=Status.SUBMITTED)) - orch2, store2, executor2 = _make_orch([downstream], store=store) + orch2, store2, executor2 = _make_orch( + [downstream], store=store, strict_dependencies=False + ) result = orch2.tick(REF) assert not any( r.job_name == "down" and r.action == JobAction.SUBMITTED @@ -190,7 +190,7 @@ def test_blocked_when_dependency_not_met(self): def test_unblocked_when_dependency_done(self): downstream = _job("down", depends_on=[Dependency(job_name="up", cadence=DAILY)]) - orch, store, executor = _make_orch([downstream]) + orch, store, executor = _make_orch([downstream], strict_dependencies=False) store.put(RunRecord(job_name="up", run_id="20250115", status=Status.DONE)) result = orch.tick(REF) assert any( @@ -204,7 +204,7 @@ def test_date_offset_dependency(self): downstream = _job( "down", depends_on=[Dependency(job_name="up", cadence=three_days_ago)] ) - orch, store, executor = _make_orch([downstream]) + orch, store, executor = _make_orch([downstream], strict_dependencies=False) # offset=-3 from Jan 15 = Jan 12 store.put(RunRecord(job_name="up", run_id="20250112", status=Status.DONE)) result = orch.tick(REF) @@ -214,7 +214,7 @@ def test_yesterday_shorthand(self): downstream = _job( "down", depends_on=[Dependency(job_name="up", cadence=YESTERDAY)] ) - orch, store, executor = _make_orch([downstream]) + orch, store, executor = _make_orch([downstream], strict_dependencies=False) store.put(RunRecord(job_name="up", run_id="20250114", status=Status.DONE)) result = orch.tick(REF) assert result.submitted()[0].job_name == "down" @@ -225,7 +225,7 @@ def test_monthly_dependency(self): cadence=MONTHLY, depends_on=[Dependency(job_name="monthly_load", cadence=MONTHLY)], ) - orch, store, executor = _make_orch([downstream]) + orch, store, executor = _make_orch([downstream], strict_dependencies=False) store.put( RunRecord(job_name="monthly_load", run_id="202501", status=Status.DONE) ) @@ -238,7 +238,7 @@ def test_cross_cadence_daily_depends_on_monthly(self): cadence=DAILY, depends_on=[Dependency(job_name="monthly_report", cadence=MONTHLY)], ) - orch, store, executor = _make_orch([daily]) + orch, store, executor = _make_orch([daily], strict_dependencies=False) store.put( RunRecord(job_name="monthly_report", run_id="202501", status=Status.DONE) ) @@ -253,7 +253,7 @@ def test_multiple_dependencies_all_must_be_met(self): Dependency(job_name="b", cadence=DAILY), ], ) - orch, store, executor = _make_orch([j]) + orch, store, executor = _make_orch([j], strict_dependencies=False) store.put(RunRecord(job_name="a", run_id="20250115", status=Status.DONE)) result = orch.tick(REF) assert len(result.submitted()) == 0 @@ -670,7 +670,7 @@ def test_warns_on_unknown_dependency(self, caplog): depends_on=[Dependency(job_name="external_job", cadence=DAILY)], ) with caplog.at_level(logging.WARNING, logger="dispatchio.orchestrator"): - _make_orch([j]) + _make_orch([j], strict_dependencies=False) assert any("external_job" in msg for msg in caplog.messages) def test_no_warning_when_dependency_is_known(self, caplog): @@ -693,6 +693,109 @@ def test_no_warning_for_job_with_no_dependencies(self, caplog): assert caplog.messages == [] +# --------------------------------------------------------------------------- +# Mutable job graph +# --------------------------------------------------------------------------- + + +class TestMutableJobGraph: + def test_duplicate_job_names_raise_in_constructor(self): + with pytest.raises(ValueError, match="Duplicate job names"): + _make_orch([_job("dup"), _job("dup")]) + + def test_add_jobs_rejects_duplicate_name(self): + orch, _, _ = _make_orch([_job("a")]) + with pytest.raises(ValueError, match="Duplicate job names"): + orch.add_job(_job("a")) + + def test_add_jobs_applies_on_next_tick(self): + orch, store, executor = _make_orch([_job("a")]) + orch.add_job(_job("b")) + + result = orch.tick(REF) + submitted_names = {r.job_name for r in result.submitted()} + assert submitted_names == {"a", "b"} + assert len(executor.calls) == 2 + assert store.get("b", "20250115") is not None + + def test_remove_job_stops_future_evaluation(self): + orch, _, executor = _make_orch([_job("a"), _job("b")]) + orch.remove_job("b") + + result = orch.tick(REF) + submitted_names = {r.job_name for r in result.submitted()} + assert submitted_names == {"a"} + assert len(executor.calls) == 1 + + def test_remove_unknown_job_raises_key_error(self): + orch, _, _ = _make_orch([_job("a")]) + with pytest.raises(KeyError, match="Unknown job"): + orch.remove_job("missing") + + def test_mutation_after_tick_disabled_by_default(self): + orch, _, _ = _make_orch([_job("a")]) + orch.tick(REF) + with pytest.raises(RuntimeError, match="allow_runtime_mutation=True"): + orch.add_job(_job("b")) + + def test_mutation_after_tick_allowed_when_enabled(self): + orch, _, executor = _make_orch( + [_job("a")], + allow_runtime_mutation=True, + ) + orch.tick(REF) + orch.add_job(_job("b")) + + later = REF + timedelta(days=1) + result = orch.tick(later) + assert any( + r.job_name == "b" and r.action == JobAction.SUBMITTED + for r in result.results + ) + assert any(call["job"] == "b" for call in executor.calls) + + def test_unresolved_dependency_validation_runs_after_graph_change(self, caplog): + import logging + + orch, _, _ = _make_orch( + [_job("a")], + allow_runtime_mutation=True, + strict_dependencies=False, + ) + orch.add_job( + _job( + "consumer", depends_on=[Dependency(job_name="external", cadence=DAILY)] + ) + ) + + with caplog.at_level(logging.WARNING, logger="dispatchio.orchestrator"): + orch.tick(REF) + assert any("external" in msg for msg in caplog.messages) + + def test_strict_dependencies_raise_in_constructor_by_default(self): + with pytest.raises(ValueError, match="Unresolved dependencies"): + _make_orch( + [ + _job( + "consumer", + depends_on=[Dependency(job_name="external", cadence=DAILY)], + ) + ] + ) + + def test_strict_dependencies_raise_after_graph_change(self): + orch, _, _ = _make_orch( + [_job("a")], allow_runtime_mutation=True, strict_dependencies=True + ) + orch.add_job( + _job( + "consumer", depends_on=[Dependency(job_name="external", cadence=DAILY)] + ) + ) + with pytest.raises(ValueError, match="Unresolved dependencies"): + orch.tick(REF) + + # --------------------------------------------------------------------------- # Dependency satisfaction modes # --------------------------------------------------------------------------- @@ -738,7 +841,7 @@ def test_all_finished_proceeds_when_all_terminal(self): ], dependency_mode=DependencyMode.ALL_FINISHED, ) - orch, store, executor = _make_orch([collector]) + orch, store, executor = _make_orch([collector], strict_dependencies=False) # one DONE, one ERROR — both finished store.put(RunRecord(job_name="entity_a", run_id="20250115", status=Status.DONE)) @@ -763,7 +866,7 @@ def test_all_finished_waits_when_some_still_running(self): ], dependency_mode=DependencyMode.ALL_FINISHED, ) - orch, store, executor = _make_orch([collector]) + orch, store, executor = _make_orch([collector], strict_dependencies=False) # entity_a done but entity_b still running store.put(RunRecord(job_name="entity_a", run_id="20250115", status=Status.DONE)) @@ -791,7 +894,7 @@ def test_threshold_proceeds_when_met(self): dependency_mode=DependencyMode.THRESHOLD, dependency_threshold=2, ) - orch, store, executor = _make_orch([collector]) + orch, store, executor = _make_orch([collector], strict_dependencies=False) # 2 of 3 done — threshold=2 met store.put(RunRecord(job_name="a", run_id="20250115", status=Status.DONE)) @@ -817,7 +920,7 @@ def test_threshold_waits_when_not_yet_met_but_reachable(self): dependency_mode=DependencyMode.THRESHOLD, dependency_threshold=2, ) - orch, store, executor = _make_orch([collector]) + orch, store, executor = _make_orch([collector], strict_dependencies=False) # 1 done, 1 running (still reachable), 1 not started store.put(RunRecord(job_name="a", run_id="20250115", status=Status.DONE)) @@ -844,7 +947,7 @@ def test_threshold_unreachable_marks_skipped(self): dependency_mode=DependencyMode.THRESHOLD, dependency_threshold=2, ) - orch, store, executor = _make_orch([collector]) + orch, store, executor = _make_orch([collector], strict_dependencies=False) # 1 done, 2 error — met=1, not_yet_finished=0 → 1+0 < 2, unreachable store.put(RunRecord(job_name="a", run_id="20250115", status=Status.DONE)) diff --git a/tests/test_run_id.py b/tests/test_run_id.py index 7351485..12f08fb 100644 --- a/tests/test_run_id.py +++ b/tests/test_run_id.py @@ -3,13 +3,22 @@ from datetime import datetime, timezone import pytest from dispatchio.cadence import ( - DAILY, HOURLY, MONTHLY, WEEKLY, YESTERDAY, LAST_MONTH, LAST_WEEK, - DateCadence, FixedCadence, Frequency, IncrementalCadence, + DAILY, + HOURLY, + MONTHLY, + WEEKLY, + YESTERDAY, + LAST_MONTH, + LAST_WEEK, + DateCadence, + FixedCadence, + Frequency, + IncrementalCadence, ) from dispatchio.run_id import describe_cadence, resolve_run_id -REF = datetime(2025, 1, 15, 9, 30, tzinfo=timezone.utc) # Wednesday +REF = datetime(2025, 1, 15, 9, 30, tzinfo=timezone.utc) # Wednesday class TestDailyResolution: diff --git a/tests/test_state.py b/tests/test_state.py index a3e6f59..99427e2 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -6,7 +6,6 @@ from datetime import datetime, timezone from pathlib import Path -import pytest from dispatchio.models import RunRecord, Status from dispatchio.state.filesystem import FilesystemStateStore @@ -21,6 +20,7 @@ def _make_record(job_name="job", run_id="20250115", status=Status.DONE, **kw): # Shared behaviour — run the same tests against both stores # --------------------------------------------------------------------------- + class SharedStateStoreBehaviour: """Mix-in. Subclasses provide self.store.""" diff --git a/tests/test_worker.py b/tests/test_worker.py index 4dfc420..5f8816d 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -7,7 +7,6 @@ from __future__ import annotations -import threading import time from typing import Any @@ -15,25 +14,27 @@ from dispatchio.models import Status from dispatchio.worker.harness import run_job, _HeartbeatThread -from dispatchio.worker.reporter.base import Reporter # --------------------------------------------------------------------------- # Spy reporter # --------------------------------------------------------------------------- + class SpyReporter: def __init__(self): self.calls: list[dict[str, Any]] = [] def report(self, job_name, run_id, status, *, error_reason=None, metadata=None): - self.calls.append({ - "job_name": job_name, - "run_id": run_id, - "status": status, - "error_reason": error_reason, - "metadata": metadata or {}, - }) + self.calls.append( + { + "job_name": job_name, + "run_id": run_id, + "status": status, + "error_reason": error_reason, + "metadata": metadata or {}, + } + ) def statuses(self) -> list[Status]: return [c["status"] for c in self.calls] @@ -43,6 +44,7 @@ def statuses(self) -> list[Status]: # Helpers # --------------------------------------------------------------------------- + def _noop(run_id: str) -> None: pass @@ -63,6 +65,7 @@ def _run(fn, reporter=None, run_id="20250115", **kwargs): # Success path # --------------------------------------------------------------------------- + class TestSuccessPath: def test_posts_done_on_success(self): spy = SpyReporter() @@ -74,8 +77,8 @@ def test_done_event_has_correct_fields(self): _run(_noop, reporter=spy, run_id="20250115") call = spy.calls[0] assert call["job_name"] == "test_job" - assert call["run_id"] == "20250115" - assert call["status"] == Status.DONE + assert call["run_id"] == "20250115" + assert call["status"] == Status.DONE assert call["error_reason"] is None def test_metadata_fn_attached_to_done_event(self): @@ -85,6 +88,7 @@ def test_metadata_fn_attached_to_done_event(self): def test_fn_receives_run_id(self): received = [] + def capture(run_id): received.append(run_id) @@ -97,6 +101,7 @@ def capture(run_id): # Failure path # --------------------------------------------------------------------------- + class TestFailurePath: def test_posts_error_on_exception(self): spy = SpyReporter() @@ -129,15 +134,18 @@ def test_exits_with_nonzero_on_failure(self): # No reporter # --------------------------------------------------------------------------- + class TestNoReporter: def test_runs_successfully_without_reporter(self): """Job should run even if no reporter is configured.""" called = [] + def fn(run_id): called.append(run_id) # No reporter, no argv flags — should just run and log a warning import sys + orig_argv = sys.argv sys.argv = ["test"] # ensure no --drop-dir try: @@ -149,6 +157,7 @@ def fn(run_id): def test_failure_still_exits_without_reporter(self): import sys + orig_argv = sys.argv sys.argv = ["test"] try: @@ -162,6 +171,7 @@ def test_failure_still_exits_without_reporter(self): # Heartbeat thread # --------------------------------------------------------------------------- + class TestHeartbeatThread: def test_posts_running_events_at_interval(self): spy = SpyReporter() @@ -215,9 +225,11 @@ def slow_fail(run_id): # FilesystemReporter # --------------------------------------------------------------------------- + class TestFilesystemReporter: def test_writes_json_file_on_done(self, tmp_path): from dispatchio.worker.reporter.filesystem import FilesystemReporter + reporter = FilesystemReporter(tmp_path) reporter.report("myjob", "20250115", Status.DONE) files = list(tmp_path.glob("*.json")) @@ -227,20 +239,28 @@ def test_writes_json_file_on_done(self, tmp_path): def test_written_file_is_valid_completion_event(self, tmp_path): from dispatchio.worker.reporter.filesystem import FilesystemReporter from dispatchio.receiver.base import CompletionEvent + reporter = FilesystemReporter(tmp_path) reporter.report("myjob", "20250115", Status.ERROR, error_reason="oops") path = list(tmp_path.glob("*.json"))[0] event = CompletionEvent.model_validate_json(path.read_text()) assert event.job_name == "myjob" - assert event.run_id == "20250115" - assert event.status == Status.ERROR + assert event.run_id == "20250115" + assert event.status == Status.ERROR assert event.error_reason == "oops" def test_does_not_raise_on_bad_path(self): """Reporter must never raise — it logs and swallows errors.""" from dispatchio.worker.reporter.filesystem import FilesystemReporter + reporter = FilesystemReporter.__new__(FilesystemReporter) - reporter.drop_dir = type("P", (), {"mkdir": lambda *a, **kw: None, - "__truediv__": lambda s, o: reporter.drop_dir})() + reporter.drop_dir = type( + "P", + (), + { + "mkdir": lambda *a, **kw: None, + "__truediv__": lambda s, o: reporter.drop_dir, + }, + )() # Just verify the protocol: report() must not propagate exceptions # (this is hard to force without monkeypatching; the above tests cover the happy path) diff --git a/trivy.yaml b/trivy.yaml new file mode 100644 index 0000000..094bae6 --- /dev/null +++ b/trivy.yaml @@ -0,0 +1,4 @@ +scan: + skip-dirs: + - "**/.terraform" + - "modules/external" diff --git a/uv.lock b/uv.lock index ca40152..79269a9 100644 --- a/uv.lock +++ b/uv.lock @@ -183,6 +183,11 @@ dev = [ { name = "pytest-cov" }, ] +[package.dev-dependencies] +dev = [ + { name = "pytest" }, +] + [package.metadata] requires-dist = [ { name = "boto3", marker = "extra == 'aws'", specifier = ">=1.26" }, @@ -194,6 +199,9 @@ requires-dist = [ { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" }, ] +[package.metadata.requires-dev] +dev = [{ name = "pytest", specifier = ">=9.0.2" }] + [[package]] name = "freezegun" version = "1.5.5"