From abbc745d606be86254e7fc39e6d39144e1971b1c Mon Sep 17 00:00:00 2001 From: Vikram Koka Date: Wed, 27 May 2026 07:26:06 -0700 Subject: [PATCH 01/28] Add AIP progress tracker example DAG for common.ai provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Demonstrates LLMOperator with Dynamic Task Mapping, structured output, UsageLimits, and HITL approval — no framework dependency. The DAG gathers AIP specs from the Confluence wiki and searches GitHub for related PRs, then uses an LLM to assess each AIP's progress and synthesize a cross-AIP report for maintainer review. --- .../example_aip_progress_tracker.py | 475 ++++++++++++++++++ 1 file changed, 475 insertions(+) create mode 100644 providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py diff --git a/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py b/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py new file mode 100644 index 0000000000000..114b56302f67d --- /dev/null +++ b/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py @@ -0,0 +1,475 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +AIP progress tracker -- multi-source data fusion with common.ai operators. + +Demonstrates Dynamic Task Mapping, structured LLM output, cost-controlled +synthesis, and HITL approval using only ``LLMOperator`` -- no LlamaIndex or +LangChain dependency required. + +For each active Airflow Improvement Proposal the Dag gathers evidence from +two sources (Confluence spec text, GitHub PRs and commits), asks an LLM to +assess spec-vs-implementation progress, then synthesizes a cross-AIP report +for maintainer review. + +``example_aip_progress_tracker`` (manual trigger): + +.. code-block:: text + + fetch_aip_list (@task) + → gather_aip_evidence (@task, mapped ×N AIPs) + → format_analysis_prompt (@task, mapped ×N) + → analyze_aip (LLMOperator, mapped ×N) + → collect_analyses (@task) + → synthesize_report (LLMOperator, with UsageLimits) + → review_report (ApprovalOperator) + +**What this makes visible that a notebook hides:** + +* Each AIP investigation is a named, logged task instance with its own + retry behaviour -- not a loop iteration buried inside one cell. +* If the GitHub API is rate-limited for one AIP, only that mapped + instance retries; the others preserve their XCom results. +* The synthesis step's inputs and token budget are fully auditable. +* A maintainer reviews the report before it goes to the dev list. + +Before running: + +1. Create an LLM connection named ``pydanticai_default`` (or the value of + ``LLM_CONN_ID``) for your chosen model provider. +2. Set ``USE_SAMPLE_DATA = False`` in the DAG file to fetch live data + from the Apache Confluence wiki and GitHub API. +""" + +from __future__ import annotations + +import json +import re +import urllib.parse +import urllib.request +from datetime import timedelta + +from pydantic import BaseModel +from pydantic_ai.usage import UsageLimits + +from airflow.providers.common.ai.operators.llm import LLMOperator +from airflow.providers.common.compat.sdk import dag, task +from airflow.providers.standard.operators.hitl import ApprovalOperator + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +LLM_CONN_ID = "pydanticai_default" + +# Confluence wiki -- the AIP listing page is public, no auth required. +# https://cwiki.apache.org/confluence/display/AIRFLOW/Airflow+Improvement+Proposals +CONFLUENCE_BASE_URL = "https://cwiki.apache.org/confluence" +AIP_LISTING_PAGE_ID = "89066602" +GITHUB_REPO = "apache/airflow" + +# When True the Dag runs on built-in sample data (self-contained, no network). +# Set to False to fetch live data from Confluence and GitHub. +USE_SAMPLE_DATA = True + +# --------------------------------------------------------------------------- +# Structured output model -- enforces a schema on the per-AIP LLM response +# --------------------------------------------------------------------------- + +# [START aip_tracker_structured_output] + + +class AIPStatus(BaseModel): + """Per-AIP analysis produced by the LLM.""" + + aip_number: int + title: str + spec_summary: str + implementation_status: str + key_prs: list[str] + blockers: list[str] + next_steps: list[str] + completion_pct: int + + +# [END aip_tracker_structured_output] + +# --------------------------------------------------------------------------- +# Sample data -- replace with Confluence / GitHub API calls for production +# --------------------------------------------------------------------------- + +SAMPLE_AIPS: list[dict] = [ + {"aip_number": 76, "title": "Asset Partitions"}, + {"aip_number": 99, "title": "Common Data Access Pattern + AI"}, + {"aip_number": 103, "title": "Task State Management"}, + {"aip_number": 105, "title": "LLM Retry Policy"}, + {"aip_number": 108, "title": "Language Coordinator Layer"}, +] + +SAMPLE_EVIDENCE: dict[int, dict] = { + 76: { + "spec_text": ( + "AIP-76 adds partition awareness to Airflow assets. Instead of " + "triggering on any update to an asset, Dags can depend on specific " + "partitions (e.g. a date-based slice of a dataset). The scheduler " + "tracks which partitions have been produced and only triggers " + "downstream Dags when the required partitions are available." + ), + "prs": [ + "#62400 -- Asset partition model and metadata schema", + "#63900 -- Partition-aware scheduling in the DagRun creator", + "#65100 -- UI: partition status badges on Asset views", + ], + "commits": [ + "Add AssetPartition model with composite key", + "Extend DagScheduleAssetReference for partition filters", + "Show partition status in React Asset detail view", + ], + }, + 99: { + "spec_text": ( + "AIP-99 adds first-class AI/ML operators to the common.ai provider. " + "LLMOperator wraps pydantic-ai for structured LLM calls with retries. " + "AgentOperator enables multi-turn ReAct agents with tool use. " + "LangChain and LlamaIndex hooks bridge framework models to Airflow " + "connections. DocumentLoaderOperator parses files for RAG pipelines." + ), + "prs": [ + "#61200 -- LLMOperator and PydanticAIHook", + "#62800 -- AgentOperator with tool calling and HITL", + "#64100 -- LangChain hook and integration", + "#65500 -- LlamaIndex embedding and retrieval operators", + "#66300 -- DocumentLoaderOperator for multi-format parsing", + ], + "commits": [ + "Add LLMOperator with structured output support", + "Add AgentOperator with ReAct loop and durable execution", + "Add LangChainHook bridging langchain models to connections", + "Add LlamaIndex embedding and retrieval operators", + "Add DocumentLoaderOperator with PDF and DOCX support", + ], + }, + 103: { + "spec_text": ( + "AIP-103 introduces task-level state persistence via " + "context['task_state']. Tasks can checkpoint intermediate results " + "that survive retries and restarts. The state backend stores " + "key-value pairs scoped to a task instance, enabling long-running " + "tasks to resume from the last checkpoint rather than starting " + "from scratch." + ), + "prs": [ + "#65000 -- Task state storage backend and API", + "#65800 -- context['task_state'] integration in Task SDK", + "#66700 -- State-aware retry logic for LLM tasks", + ], + "commits": [ + "Add TaskState model with key-value storage", + "Expose task_state in TaskInstanceContext", + "Add state checkpoint and restore in retry path", + ], + }, + 105: { + "spec_text": ( + "AIP-105 introduces LLMRetryPolicy, an intelligent retry mechanism " + "that uses an LLM to classify task failures before deciding whether " + "to retry, fail fast, or back off. Instead of static exception-type " + "matching, the policy sends the error context to an LLM that " + "determines the appropriate action: rate-limit errors trigger " + "exponential backoff, auth errors fail immediately, transient " + "network errors retry with a short delay." + ), + "prs": [ + "#64800 -- RetryPolicy base class in Task SDK (AIP-105 prerequisite)", + "#65600 -- LLMRetryPolicy with pydantic-ai error classification", + "#66200 -- Integration tests for LLM-classified retry scenarios", + ], + "commits": [ + "Add RetryPolicy protocol and RetryRule dataclass", + "Implement LLMRetryPolicy with structured ErrorClassification output", + "Wire RetryPolicy into task runner retry loop", + ], + }, + 108: { + "spec_text": ( + "AIP-108 defines a language coordinator layer that enables tasks " + "written in Java, Go, and TypeScript to run alongside Python tasks. " + "A lightweight coordinator process manages the non-Python runtime " + "lifecycle, handles serialization between the task and the Execution " + "API, and provides the same guarantees (heartbeat, state, XCom) that " + "Python tasks get from the Task SDK." + ), + "prs": [ + "#66100 -- Coordinator protocol specification and protobuf schema", + "#66800 -- Java Task SDK with coordinator bridge", + "#67200 -- Go Task SDK initial implementation", + ], + "commits": [ + "Define coordinator gRPC protocol for multi-language tasks", + "Add Java Task SDK with Maven build and coordinator client", + "Scaffold Go Task SDK with coordinator handshake", + ], + }, +} + +# --------------------------------------------------------------------------- +# HTTP helpers -- used when USE_SAMPLE_DATA is False +# --------------------------------------------------------------------------- + + +def _confluence_rest_get(path: str) -> dict: + """GET a Confluence REST API endpoint (public, no auth required).""" + url = f"{CONFLUENCE_BASE_URL}{path}" + req = urllib.request.Request(url, headers={"Accept": "application/json"}) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read()) + + +def _github_api_get(path: str) -> dict: + """GET a GitHub REST API endpoint (public, rate-limited to 10 req/min).""" + url = f"https://api.github.com{path}" + req = urllib.request.Request(url, headers={"Accept": "application/vnd.github.v3+json"}) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read()) + + +def _strip_html_tags(html: str) -> str: + """Remove HTML/Confluence markup, returning plain text.""" + text = re.sub(r"<[^>]+>", " ", html) + return re.sub(r"\s+", " ", text).strip() + + +def _parse_accepted_aips(listing_html: str) -> list[dict]: + """Extract accepted AIPs from the rendered AIP listing page.""" + match = re.search(r"Accepted AIPs.*?(?=]+>([^<]*AIP-(\d+)[^<]*)", section): + title = re.sub(r"\s+", " ", m.group(1)).strip() + aip_number = int(m.group(2)) + aips.append({"aip_number": aip_number, "title": title}) + return aips + + +# --------------------------------------------------------------------------- +# System prompts +# --------------------------------------------------------------------------- + +ANALYSIS_SYSTEM_PROMPT = """\ +You are an Airflow project analyst. Given an AIP specification and its \ +GitHub evidence (pull requests and commits), produce a structured status \ +assessment. + +Be specific about what has been implemented versus what remains. Rate \ +completion percentage based on the ratio of spec goals that have \ +corresponding PRs or commits.""" + +SYNTHESIS_SYSTEM_PROMPT = """\ +You are an Airflow release coordinator. Given individual AIP status \ +assessments, produce a concise cross-AIP progress report. + +Identify the top priorities, shared blockers across AIPs, and recommend \ +where maintainer attention is most needed. Keep the report actionable \ +and under 500 words.""" + + +# --------------------------------------------------------------------------- +# DAG +# --------------------------------------------------------------------------- + + +# [START example_aip_progress_tracker] +@dag(catchup=False, tags=["example", "aip_tracker", "common_ai"]) +def example_aip_progress_tracker(): + """ + Track AIP progress by analysing Confluence specs against GitHub evidence. + + Task graph:: + + fetch_aip_list (@task) + → gather_aip_evidence (@task ×N, via Dynamic Task Mapping) + → format_analysis_prompt (@task ×N) + → analyze_aip (LLMOperator ×N, structured output) + → collect_analyses (@task) + → synthesize_report (LLMOperator, with UsageLimits) + → review_report (ApprovalOperator) + """ + + # ------------------------------------------------------------------ + # Step 1: Fetch the list of active AIPs to investigate. + # The length of this list determines how many mapped instances are + # created in the downstream steps -- N is decided at runtime. + # ------------------------------------------------------------------ + @task + def fetch_aip_list() -> list[dict]: + if USE_SAMPLE_DATA: + return SAMPLE_AIPS + # Fetch the AIP listing page and extract the "Accepted" section. + # https://cwiki.apache.org/confluence/display/AIRFLOW/Airflow+Improvement+Proposals + page = _confluence_rest_get(f"/rest/api/content/{AIP_LISTING_PAGE_ID}?expand=body.view") + accepted = _parse_accepted_aips(page["body"]["view"]["value"]) + return accepted or SAMPLE_AIPS + + aip_list = fetch_aip_list() + + # ------------------------------------------------------------------ + # Step 2: Gather evidence for each AIP from multiple sources. + # Each mapped instance fetches one AIP's spec text from the + # Confluence wiki (cwiki.apache.org) and searches GitHub for + # related PRs and commits. If the GitHub API is rate-limited + # for one AIP, only that instance retries. + # ------------------------------------------------------------------ + @task + def gather_aip_evidence(aip: dict) -> dict: + aip_number = aip["aip_number"] + if USE_SAMPLE_DATA: + evidence = SAMPLE_EVIDENCE[aip_number] + return { + "aip_number": aip_number, + "title": aip["title"], + "spec_text": evidence["spec_text"], + "prs": evidence["prs"], + "commits": evidence["commits"], + } + # Fetch spec text from the AIP's Confluence wiki page via CQL search. + # Example: https://cwiki.apache.org/confluence/display/AIRFLOW/AIP-103 + cql = urllib.parse.quote( + f'space="AIRFLOW" AND title~"AIP-{aip_number}" AND ancestor={AIP_LISTING_PAGE_ID}' + ) + results = _confluence_rest_get(f"/rest/api/content/search?cql={cql}&expand=body.view&limit=1") + spec_text = "" + if results.get("results"): + raw_html = results["results"][0]["body"]["view"]["value"] + spec_text = _strip_html_tags(raw_html)[:3000] + # Search GitHub for related PRs. + pr_query = urllib.parse.quote(f"AIP-{aip_number} repo:{GITHUB_REPO} is:pr") + pr_data = _github_api_get(f"/search/issues?q={pr_query}&per_page=10") + prs = [f"#{it['number']} -- {it['title']}" for it in pr_data.get("items", [])] + # Search GitHub for related commits. + commit_query = urllib.parse.quote(f"AIP-{aip_number} repo:{GITHUB_REPO}") + commit_data = _github_api_get(f"/search/commits?q={commit_query}&per_page=10") + commits = [it["commit"]["message"].split("\n")[0] for it in commit_data.get("items", [])] + return { + "aip_number": aip_number, + "title": aip["title"], + "spec_text": spec_text, + "prs": prs, + "commits": commits, + } + + evidence = gather_aip_evidence.expand(aip=aip_list) + + # ------------------------------------------------------------------ + # Step 3: Format the gathered evidence into an LLM analysis prompt. + # Separating formatting from data gathering keeps each task focused + # and makes prompt iteration independent of API logic. + # ------------------------------------------------------------------ + @task + def format_analysis_prompt(evidence: dict) -> str: + prs_text = "\n".join(f" - {pr}" for pr in evidence["prs"]) + commits_text = "\n".join(f" - {c}" for c in evidence["commits"]) + return ( + f"Analyze AIP-{evidence['aip_number']}: {evidence['title']}\n\n" + f"Specification:\n{evidence['spec_text']}\n\n" + f"Pull Requests:\n{prs_text}\n\n" + f"Recent Commits:\n{commits_text}" + ) + + prompts = format_analysis_prompt.expand(evidence=evidence) + + # ------------------------------------------------------------------ + # Step 4: Analyze each AIP with a structured LLM call. + # Dynamic Task Mapping creates one LLMOperator instance per AIP. + # output_type=AIPStatus enforces the Pydantic schema on the response. + # ------------------------------------------------------------------ + # [START aip_tracker_dtm_analysis] + analyses = LLMOperator.partial( + task_id="analyze_aip", + llm_conn_id=LLM_CONN_ID, + system_prompt=ANALYSIS_SYSTEM_PROMPT, + output_type=AIPStatus, + ).expand(prompt=prompts) + # [END aip_tracker_dtm_analysis] + + # ------------------------------------------------------------------ + # Step 5: Collect all per-AIP analyses into a single context string + # for the synthesis step. + # ------------------------------------------------------------------ + @task + def collect_analyses(analyses: list) -> str: + sections = [] + for raw in analyses: + a = json.loads(raw) if isinstance(raw, str) else raw + blockers = ", ".join(a["blockers"]) if a["blockers"] else "None identified" + next_steps = ", ".join(a["next_steps"]) if a["next_steps"] else "N/A" + sections.append( + f"## AIP-{a['aip_number']}: {a['title']}\n" + f"Status: {a['implementation_status']} " + f"({a['completion_pct']}% complete)\n" + f"Summary: {a['spec_summary']}\n" + f"Key PRs: {', '.join(a['key_prs'])}\n" + f"Blockers: {blockers}\n" + f"Next steps: {next_steps}" + ) + return "\n\n".join(sections) + + collected = collect_analyses(analyses.output) + + # ------------------------------------------------------------------ + # Step 6: Synthesize a cross-AIP progress report. + # UsageLimits caps the token spend so a runaway prompt cannot + # exhaust the API budget in a single Dag run. + # ------------------------------------------------------------------ + # [START aip_tracker_synthesis] + synthesize = LLMOperator( + task_id="synthesize_report", + llm_conn_id=LLM_CONN_ID, + system_prompt=SYNTHESIS_SYSTEM_PROMPT, + prompt="""\ +Create a cross-AIP progress report from these individual assessments. +Prioritize AIPs that are close to completion or have shared blockers. + +{{ ti.xcom_pull(task_ids='collect_analyses') }}""", + usage_limits=UsageLimits( + request_limit=5, + input_tokens_limit=20_000, + output_tokens_limit=4_000, + ), + ) + # [END aip_tracker_synthesis] + collected >> synthesize + + # ------------------------------------------------------------------ + # Step 7: A maintainer reviews the synthesized report before it is + # shared on the dev list. The Dag pauses here until the human + # approves, requests changes, or the timeout expires. + # ------------------------------------------------------------------ + # [START aip_tracker_hitl] + ApprovalOperator( + task_id="review_report", + subject="Review AIP Progress Report before sharing", + body=synthesize.output, + response_timeout=timedelta(hours=24), + ) + # [END aip_tracker_hitl] + + +# [END example_aip_progress_tracker] + +example_aip_progress_tracker() From d9e5446aaac4bf1b39d28a3245a3d39bcfbfb113 Mon Sep 17 00:00:00 2001 From: "leon.jeon" <58650453+Leondon9@users.noreply.github.com> Date: Wed, 27 May 2026 23:39:51 +0900 Subject: [PATCH 02/28] Avoid logging Execution API bearer credentials (#67059) * Avoid logging Execution API bearer credentials * Add newsfragment for Execution API bearer logging * Avoid exposing JWT validation details --- .../api_fastapi/execution_api/security.py | 6 +-- .../execution_api/test_security.py | 42 +++++++++++++++++++ 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/airflow-core/src/airflow/api_fastapi/execution_api/security.py b/airflow-core/src/airflow/api_fastapi/execution_api/security.py index df3b7993a0857..98aee04cf334a 100644 --- a/airflow-core/src/airflow/api_fastapi/execution_api/security.py +++ b/airflow-core/src/airflow/api_fastapi/execution_api/security.py @@ -123,9 +123,9 @@ async def __call__( # type: ignore[override] try: claims = await validator.avalidated_claims(creds.credentials, dict(self.required_claims)) - except Exception as err: - log.warning("Failed to validate JWT", exc_info=True, token=creds.credentials) - raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"Invalid auth token: {err}") + except Exception: + log.warning("Failed to validate JWT", exc_info=True) + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid auth token") claims.setdefault("scope", "execution") diff --git a/airflow-core/tests/unit/api_fastapi/execution_api/test_security.py b/airflow-core/tests/unit/api_fastapi/execution_api/test_security.py index 22b7f58a95993..e98d918385933 100644 --- a/airflow-core/tests/unit/api_fastapi/execution_api/test_security.py +++ b/airflow-core/tests/unit/api_fastapi/execution_api/test_security.py @@ -20,9 +20,12 @@ from uuid import UUID import pytest +import svcs from fastapi import APIRouter, FastAPI, Request, Security from fastapi.testclient import TestClient +from structlog.testing import capture_logs +from airflow.api_fastapi.auth.tokens import JWTValidator from airflow.api_fastapi.execution_api.datamodels.token import TIClaims, TIToken, TokenScope from airflow.api_fastapi.execution_api.security import ( ExecutionAPIRoute, @@ -158,6 +161,45 @@ def test_execution_token_accepted_on_both_routes(self, token_type_app): assert run.status_code == 200 +class TestJWTBearerLogging: + @pytest.fixture + def app(self): + app = FastAPI() + app.state.svcs_registry = svcs.Registry() + + @app.get("/protected") + def protected(token: TIToken = Security(require_auth)): + return {"id": str(token.id)} + + return app + + @pytest.mark.parametrize( + "bearer_credential", + [ + pytest.param("eyJ.invalid.jwt", id="jwt-looking-token"), + pytest.param("opaque-token-value", id="opaque-token"), + ], + ) + def test_validation_failure_does_not_log_supplied_credential(self, app, bearer_credential): + validator = MagicMock(spec=JWTValidator) + validator.avalidated_claims.side_effect = ValueError("invalid token") + app.state.svcs_registry.register_value(JWTValidator, validator) + client = TestClient(app) + + with capture_logs() as logs: + response = client.get( + "/protected", + headers={"Authorization": f"Bearer {bearer_credential}"}, + ) + + assert response.status_code == 403 + assert response.json() == {"detail": "Invalid auth token"} + validator.avalidated_claims.assert_awaited_once_with(bearer_credential, {}) + assert any(log["event"] == "Failed to validate JWT" for log in logs) + assert bearer_credential not in repr(logs) + assert "invalid token" not in response.text + + class TestTiSelfScopeEnforcement: """Routes with the ``ti:self`` scope reject mismatched JWT subjects.""" From 5151f6cd2033bfcc3ffdb356330333de8a6591d5 Mon Sep 17 00:00:00 2001 From: Henry Chen Date: Thu, 28 May 2026 01:08:33 +0800 Subject: [PATCH 03/28] Fix SecretsMasker merge round-trip for Kubernetes env vars (#67122) --- .../secrets_masker/secrets_masker.py | 8 +++++ .../secrets_masker/test_secrets_masker.py | 35 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/shared/secrets_masker/src/airflow_shared/secrets_masker/secrets_masker.py b/shared/secrets_masker/src/airflow_shared/secrets_masker/secrets_masker.py index 6ae5343ec00d5..03198bf48a8a9 100644 --- a/shared/secrets_masker/src/airflow_shared/secrets_masker/secrets_masker.py +++ b/shared/secrets_masker/src/airflow_shared/secrets_masker/secrets_masker.py @@ -445,12 +445,20 @@ def _merge( # Determine if we should treat this as sensitive is_sensitive = force_sensitive or (name is not None and self.should_hide_value_for_key(name)) + v1_env_var_name = None + if isinstance(new_item, dict) and _is_v1_env_var(old_item): + # redact(V1EnvVar) returns a dict, so merge against the old object's serialized shape. + old_item = old_item.to_dict() + v1_env_var_name = old_item.get("name") + if isinstance(new_item, dict) and isinstance(old_item, dict): merged = {} for key in new_item.keys(): if key in old_item: # For dicts, pass the key as name unless we're in sensitive mode child_name = None if is_sensitive else key + if key == "value" and v1_env_var_name: + child_name = v1_env_var_name merged[key] = self._merge( new_item[key], old_item[key], diff --git a/shared/secrets_masker/tests/secrets_masker/test_secrets_masker.py b/shared/secrets_masker/tests/secrets_masker/test_secrets_masker.py index 1e8b50522e205..1f107fee07eab 100644 --- a/shared/secrets_masker/tests/secrets_masker/test_secrets_masker.py +++ b/shared/secrets_masker/tests/secrets_masker/test_secrets_masker.py @@ -1482,6 +1482,41 @@ def test_merge_enum_values(self): assert result == new_enum assert isinstance(result, MyEnum) + def test_merge_round_trip_kubernetes_env_var(self): + class MockV1EnvVar: + def __init__(self, name, value, value_from=None): + self.name = name + self.value = value + self.value_from = value_from + + def to_dict(self): + return {"name": self.name, "value": self.value, "value_from": self.value_from} + + original_env_var = MockV1EnvVar("password", "original_password", "original_source") + normal_env_var = MockV1EnvVar("app_name", "original_app") + + with patch( + "airflow_shared.secrets_masker.secrets_masker._is_v1_env_var", + side_effect=lambda item: isinstance(item, MockV1EnvVar), + ): + redacted_env_var = self.masker.redact(original_env_var) + redacted_env_var["value_from"] = "***" + + merged = self.masker.merge(redacted_env_var, original_env_var) + + updated_env_var = {**redacted_env_var, "value": "updated_password"} + merged_updated = self.masker.merge(updated_env_var, original_env_var) + + normal_merged = self.masker.merge({"name": "app_name", "value": "***"}, normal_env_var) + + assert merged == { + "name": "password", + "value": "original_password", + "value_from": "***", + } + assert merged_updated["value"] == "updated_password" + assert normal_merged["value"] == "***" + def test_merge_round_trip(self): # Original data with sensitive information original_config = { From f08e118476e7e042deed52d9faaea8e7fc1601b9 Mon Sep 17 00:00:00 2001 From: "Jason(Zhe-You) Liu" <68415893+jason810496@users.noreply.github.com> Date: Thu, 28 May 2026 01:12:41 +0800 Subject: [PATCH 04/28] Add Coordinator Layer and Java Coordinator (#65958) Co-authored-by: Tzu-ping Chung Co-authored-by: TP --- .../src/airflow/config_templates/config.yml | 37 + .../src/airflow/dag_processing/processor.py | 14 +- .../src/airflow/jobs/triggerer_job_runner.py | 12 +- .../unit/dag_processing/test_processor.py | 4 +- .../includes/sections-and-options.rst | 2 +- .../src/tests_common/pytest_plugin.py | 17 +- docs/spelling_wordlist.txt | 4 + task-sdk/.pre-commit-config.yaml | 4 +- .../airflow/sdk/coordinators/java/__init__.py | 27 + .../sdk/coordinators/java/coordinator.py | 424 ++++++++++ .../src/airflow/sdk/execution_time/comms.py | 7 +- .../airflow/sdk/execution_time/coordinator.py | 246 ++++++ .../sdk/execution_time/schema/schema.json | 231 ++++-- .../airflow/sdk/execution_time/supervisor.py | 179 ++++- .../tests/task_sdk/coordinators/__init__.py | 16 + .../task_sdk/coordinators/java/__init__.py | 16 + .../coordinators/java/test_coordinator.py | 759 ++++++++++++++++++ .../definitions/test_mappedoperator.py | 4 +- .../task_sdk/definitions/test_xcom_arg.py | 2 +- .../tests/task_sdk/docs/test_public_api.py | 21 +- .../task_sdk/execution_time/test_comms.py | 3 + .../execution_time/test_coordinator.py | 122 +++ .../execution_time/test_supervisor.py | 242 +++++- .../execution_time/test_task_runner.py | 89 +- 24 files changed, 2300 insertions(+), 182 deletions(-) create mode 100644 task-sdk/src/airflow/sdk/coordinators/java/__init__.py create mode 100644 task-sdk/src/airflow/sdk/coordinators/java/coordinator.py create mode 100644 task-sdk/src/airflow/sdk/execution_time/coordinator.py create mode 100644 task-sdk/tests/task_sdk/coordinators/__init__.py create mode 100644 task-sdk/tests/task_sdk/coordinators/java/__init__.py create mode 100644 task-sdk/tests/task_sdk/coordinators/java/test_coordinator.py create mode 100644 task-sdk/tests/task_sdk/execution_time/test_coordinator.py diff --git a/airflow-core/src/airflow/config_templates/config.yml b/airflow-core/src/airflow/config_templates/config.yml index a6f3de25e2196..502934dbc7749 100644 --- a/airflow-core/src/airflow/config_templates/config.yml +++ b/airflow-core/src/airflow/config_templates/config.yml @@ -2004,6 +2004,43 @@ workers: type: integer example: ~ default: "60" +sdk: + description: Settings for non-Python SDK runtime coordination + options: + coordinators: + description: | + JSON object mapping of coordinator keys to coordinator definitions. + + Each value is an object with ``classpath`` and optional ``kwargs``. + ``classpath`` is resolved via ``import_string`` and constructed with + ``kwargs`` on first use. Entries are + independent instances, so the same ``classpath`` can be configured + multiple times under different names with different ``kwargs`` (for + example, two ``JavaCoordinator`` instances pinned to different JDK + versions). + version_added: 3.3.0 + type: string + example: | + { + "jdk-17": { + "classpath": "airflow.sdk.coordinators.java.JavaCoordinator", + "kwargs": {"java_executable": "/usr/lib/jvm/java-17-openjdk/bin/java", "jvm_args": ["-Xmx1024m"]} + } + } + default: ~ + queue_to_coordinator: + description: | + JSON mapping of queue names to a coordinator key from + ``[sdk] coordinators``. + + This mapping is checked to route a task to a configured coordinator + instance based on its queue. An entry in this mapping is needed if + tasks of a queue should be handled by a custom coordinator instead of + the default Python task-running mechanism. + version_added: 3.3.0 + type: string + example: '{"legacy-java": "jdk-11", "modern-java": "jdk-17"}' + default: ~ api_auth: description: Settings relating to authentication on the Airflow APIs options: diff --git a/airflow-core/src/airflow/dag_processing/processor.py b/airflow-core/src/airflow/dag_processing/processor.py index 303b62d141111..f7b3affd00830 100644 --- a/airflow-core/src/airflow/dag_processing/processor.py +++ b/airflow-core/src/airflow/dag_processing/processor.py @@ -609,9 +609,19 @@ def _get_target_loggers(self) -> tuple[FilteringBoundLogger, ...]: ) def _create_log_forwarder( - self, loggers: tuple[FilteringBoundLogger, ...], name: str, log_level: int = logging.INFO + self, + loggers: tuple[FilteringBoundLogger, ...], + name: str, + *, + data: bytes, + log_level: int = logging.INFO, ) -> Callable[[socket], bool]: - return super()._create_log_forwarder(loggers, name.replace("task.", "dag_processor.", 1), log_level) + return super()._create_log_forwarder( + loggers, + name.replace("task.", "dag_processor.", 1), + data=data, + log_level=log_level, + ) def _handle_request(self, msg: ToManager, log: FilteringBoundLogger, req_id: int) -> None: from airflow.sdk.api.datamodels._generated import ( diff --git a/airflow-core/src/airflow/jobs/triggerer_job_runner.py b/airflow-core/src/airflow/jobs/triggerer_job_runner.py index 36102aaa0d3ad..cb8e45b343d5f 100644 --- a/airflow-core/src/airflow/jobs/triggerer_job_runner.py +++ b/airflow-core/src/airflow/jobs/triggerer_job_runner.py @@ -873,8 +873,16 @@ def update_triggers(self, requested_trigger_ids: set[int]): # Enqueue orphaned triggers for cancellation self.cancelling_triggers.update(cancel_trigger_ids) - def _register_pipe_readers(self, stdout: socket, stderr: socket, requests: socket, logs: socket): - super()._register_pipe_readers(stdout, stderr, requests, logs) + def _register_pipe_readers( + self, + stdout: socket, + stderr: socket, + requests: socket, + logs: socket, + *, + data: dict[socket, bytes], + ): + super()._register_pipe_readers(stdout, stderr, requests, logs, data=data) # We want to handle logging differently here, so un-register the one our parent class created self.selector.unregister(logs) diff --git a/airflow-core/tests/unit/dag_processing/test_processor.py b/airflow-core/tests/unit/dag_processing/test_processor.py index 7fe7204e68e2c..1ee02d0cbb813 100644 --- a/airflow-core/tests/unit/dag_processing/test_processor.py +++ b/airflow-core/tests/unit/dag_processing/test_processor.py @@ -2075,8 +2075,8 @@ def test_create_log_forwarder_rewrites_task_prefix_to_dag_processor(self, proc): from airflow.sdk.execution_time.supervisor import WatchedSubprocess with patch.object(WatchedSubprocess, "_create_log_forwarder") as mock_base: - proc._create_log_forwarder((), "task.stdout") - mock_base.assert_called_once_with((), "dag_processor.stdout", logging.INFO) + proc._create_log_forwarder((), "task.stdout", data=b"") + mock_base.assert_called_once_with((), "dag_processor.stdout", data=b"", log_level=logging.INFO) def test_handle_request_get_connection_masks_password_and_extra(self, proc): proc.client.connections.get.return_value = ConnectionResponse( diff --git a/devel-common/src/sphinx_exts/includes/sections-and-options.rst b/devel-common/src/sphinx_exts/includes/sections-and-options.rst index e04383c8c5582..edb674adb843f 100644 --- a/devel-common/src/sphinx_exts/includes/sections-and-options.rst +++ b/devel-common/src/sphinx_exts/includes/sections-and-options.rst @@ -85,7 +85,7 @@ {% if "\n" in example %} .. code-block:: - {{ example }} + {{ example | indent(width=8) }} {% else %} ``{{ example }}`` {% endif %} diff --git a/devel-common/src/tests_common/pytest_plugin.py b/devel-common/src/tests_common/pytest_plugin.py index 6f47d98fd649d..defee35f0e30c 100644 --- a/devel-common/src/tests_common/pytest_plugin.py +++ b/devel-common/src/tests_common/pytest_plugin.py @@ -2519,7 +2519,6 @@ def execute(self, context): from uuid6 import uuid7 from airflow.sdk import DAG - from airflow.sdk.api.datamodels._generated import TaskInstance from airflow.sdk.execution_time.comms import BundleInfo, StartupDetails from airflow.timetables.base import TimeRestriction @@ -2547,6 +2546,15 @@ def _create_task_instance( should_retry: bool | None = None, max_tries: int | None = None, ) -> RuntimeTaskInstance: + from tests_common.test_utils.version_compat import AIRFLOW_V_3_3_PLUS + + if AIRFLOW_V_3_3_PLUS: + from airflow.sdk.execution_time.workloads.task import TaskInstanceDTO + else: + from airflow.sdk.api.datamodels._generated import ( # type: ignore[no-redef,assignment] + TaskInstance as TaskInstanceDTO, + ) + from airflow.sdk.api.datamodels._generated import DagRun, DagRunState, TIRunContext from airflow.utils.types import DagRunType @@ -2624,14 +2632,17 @@ def _create_task_instance( } startup_details = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=ti_id, task_id=task.task_id, dag_id=dag_id, run_id=run_id, try_number=try_number, - map_index=map_index, + map_index=map_index, # type: ignore[arg-type] dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), dag_rel_path="", bundle_info=BundleInfo(name="anything", version="any"), diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 0c1b1fc373fca..869f06770b3cf 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -860,6 +860,7 @@ iTerm iterm itertools Jarek +JavaCoordinator javascript jaydebeapi Jdbc @@ -897,6 +898,7 @@ jsonl juli Jupyter jupyter +jvm jwks JWT jwt @@ -1131,6 +1133,7 @@ openai openapi openfaas OpenID +openjdk openlineage OpenSearch opensearch @@ -1863,6 +1866,7 @@ XComs Xiaodong xlarge xml +Xmx xpath XSS xyz diff --git a/task-sdk/.pre-commit-config.yaml b/task-sdk/.pre-commit-config.yaml index f9a224f60d483..74c9f8d401cef 100644 --- a/task-sdk/.pre-commit-config.yaml +++ b/task-sdk/.pre-commit-config.yaml @@ -43,10 +43,10 @@ repos: ^src/airflow/sdk/definitions/deadline\.py$| ^src/airflow/sdk/definitions/dag\.py$| ^src/airflow/sdk/definitions/_internal/types\.py$| - ^src/airflow/sdk/execution_time/schema/__init__\.py$| + ^src/airflow/sdk/execution_time/callback_supervisor\.py$| ^src/airflow/sdk/execution_time/execute_workload\.py$| ^src/airflow/sdk/execution_time/secrets_masker\.py$| - ^src/airflow/sdk/execution_time/callback_supervisor\.py$| + ^src/airflow/sdk/execution_time/schema/__init__\.py$| ^src/airflow/sdk/execution_time/supervisor\.py$| ^src/airflow/sdk/execution_time/task_runner\.py$| ^src/airflow/sdk/serde/serializers/kubernetes\.py$| diff --git a/task-sdk/src/airflow/sdk/coordinators/java/__init__.py b/task-sdk/src/airflow/sdk/coordinators/java/__init__.py new file mode 100644 index 0000000000000..b458478e328ed --- /dev/null +++ b/task-sdk/src/airflow/sdk/coordinators/java/__init__.py @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Java runtime coordinator for the Apache Airflow Task SDK. + +See :class:`JavaCoordinator` for details. +""" + +from __future__ import annotations + +from airflow.sdk.coordinators.java.coordinator import JavaCoordinator + +__all__ = ["JavaCoordinator"] diff --git a/task-sdk/src/airflow/sdk/coordinators/java/coordinator.py b/task-sdk/src/airflow/sdk/coordinators/java/coordinator.py new file mode 100644 index 0000000000000..138c91cecf1cd --- /dev/null +++ b/task-sdk/src/airflow/sdk/coordinators/java/coordinator.py @@ -0,0 +1,424 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Java runtime coordinator that launches a JVM subprocess for Dag file processing and task execution.""" + +from __future__ import annotations + +import email +import itertools +import os +import pathlib +import selectors +import signal +import socket +import subprocess +import time +import zipfile +from typing import TYPE_CHECKING, TypeVar, cast + +import attrs +import structlog + +from airflow.sdk.execution_time.coordinator import BaseCoordinator +from airflow.sdk.execution_time.schema import get_schema_version_migrator +from airflow.sdk.execution_time.supervisor import ActivitySubprocess, NeverRaised, ProcessTracker + +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator, Sequence + + from structlog.typing import FilteringBoundLogger + from typing_extensions import Self + + from airflow.sdk.api.client import Client + from airflow.sdk.api.datamodels._generated import BundleInfo + from airflow.sdk.execution_time.workloads.task import TaskInstanceDTO + + Tracked = TypeVar("Tracked", socket.socket, subprocess.Popen) + +log: FilteringBoundLogger = structlog.get_logger(logger_name="coordinators.java") + + +def _start_server() -> socket.socket: + server = socket.socket() + server.bind(("127.0.0.1", 0)) + server.setblocking(True) + server.listen(1) # Just need to listen to the child process. + return server + + +def _find_jars(items: Iterable[pathlib.Path]) -> Iterator[pathlib.Path]: + for item in items: + if item.is_dir(): + yield from _find_jars(item.iterdir()) + elif item.is_file() and item.suffix == ".jar": + yield item + + +def _calculate_classpath(jars_root: Sequence[pathlib.Path]) -> str: + jars = (p.as_posix() for p in _find_jars(jars_root)) + return os.pathsep.join(sorted(jars)) # Keep output deterministic. + + +@attrs.define +class _JarMetadata: + main_class: str + schema_version: str + + @classmethod + def from_jar(cls, path: pathlib.Path) -> Self | None: + try: + with zipfile.ZipFile(path) as zf: + try: + manifest_info = zf.getinfo("META-INF/MANIFEST.MF") + except KeyError: + log.debug("JAR does not contain META-INF/MANIFEST.MF; ignored", path=path) + return None + with zf.open(manifest_info) as f: + manifest = email.message_from_binary_file(f) + return cls(manifest["Main-Class"], manifest["Airflow-Supervisor-Schema-Version"]) + except zipfile.BadZipFile: + log.exception("Cannot read JAR; ignored", path=path) + return None + + +def _validate_schema_version(instance, _, value) -> str: + return get_schema_version_migrator().resolve_version(str(value)) + + +@attrs.define +class _JarInfo: + main_class: str + schema_version: str = attrs.field(validator=_validate_schema_version) + + @attrs.define + class _Progress: + main_class: str | None = attrs.field(init=False, default=None) + schema_version: str | None = attrs.field(init=False, default=None) + + def collect(self) -> _JarInfo | None: + if self.main_class is None or self.schema_version is None: + return None + return _JarInfo(self.main_class, self.schema_version) + + @classmethod + def find(cls, roots: Sequence[pathlib.Path], main_class: str) -> _JarInfo: + log.debug("Finding JARs recursively", roots=roots) + progress = cls._Progress() + for p in _find_jars(roots): + if (metadata := _JarMetadata.from_jar(p)) is None: + continue + if metadata.main_class and ((main_class == metadata.main_class) or not main_class): + log.debug("JAR located with Main-Class metadata", path=p, main_class=metadata.main_class) + progress.main_class = metadata.main_class + if metadata.schema_version: + log.debug( + "JAR located with Airflow-Supervisor-Schema-Version metadata", + path=p, + schema_version=metadata.schema_version, + ) + progress.schema_version = metadata.schema_version + if (result := progress.collect()) is not None: + return result + if progress.main_class is not None: + tp = "cannot find a JAR with Airflow-Supervisor-Schema-Version metadata in {1}" + elif main_class: + tp = "cannot find a JAR with Main-Class matching {0!r} in {1}" + else: + tp = "cannot find a JAR with Main-Class metadata in {1}" + raise FileNotFoundError(tp.format(main_class, os.pathsep.join(os.fspath(p.resolve()) for p in roots))) + + +def _accept_connections( + servers: dict[str, socket.socket], + drains: dict[str, socket.socket], + proc: subprocess.Popen, + *, + max_wait: float = 10.0, + drain_size: int = 4096, +) -> tuple[dict[socket.socket, socket.socket], dict[socket.socket, bytes]]: + """Block until the Java process connects to servers.""" + accepted: dict[socket.socket, socket.socket] = {} + drained: dict[socket.socket, bytes] = {s: b"" for s in drains.values()} + with selectors.DefaultSelector() as sel: + for key, soc in itertools.chain(servers.items(), drains.items()): + sel.register(soc, selectors.EVENT_READ, data=key) + deadline = time.monotonic() + max_wait + while len(accepted) < len(servers): + remaining = deadline - time.monotonic() + if remaining <= 0: + for s in accepted.values(): + s.close() + raise TimeoutError("process did not connect within timeout") + if proc.poll() is not None: + for s in accepted.values(): + s.close() + raise RuntimeError(f"process exited with {proc.returncode} before connecting") + for event, _ in sel.select(timeout=min(remaining, 1.0)): + soc = cast("socket.socket", event.fileobj) + if soc in drained: + if incoming := soc.recv(drain_size): + log.debug("Draining child process stream", key=event.data) + drained[soc] += incoming + else: + log.warning("Child stream closed before ready!", key=event.data) + sel.unregister(soc) + else: + log.debug("Accepting child process connection", key=event.data) + conn, _ = soc.accept() + sel.unregister(soc) + accepted[soc] = conn + return accepted, drained + + +class PopenTracker(ProcessTracker): + """ + Process tracker backed by :class:`subprocess.Popen`. + + :meta private: + """ + + ProcessNotFound = NeverRaised + TimeoutExpired = subprocess.TimeoutExpired + + def __init__(self, impl: subprocess.Popen) -> None: + self._impl = impl + + @property + def pid(self) -> int: + return self._impl.pid + + def send_signal(self, s: signal.Signals) -> None: + self._impl.send_signal(s) + + def wait(self, timeout: float | None) -> int: + return self._impl.wait(timeout) + + +@attrs.define(kw_only=True) +class _ResourceTracker: + timeout: float + tracked: dict[int, socket.socket | subprocess.Popen] = attrs.field(init=False, factory=dict) + + def __enter__(self): + return self + + def __exit__(self, *exc_info): + for o in self.tracked.values(): + match o: + case socket.socket(): + o.close() + case subprocess.Popen(): + o.terminate() + try: + o.wait(self.timeout) + except subprocess.TimeoutExpired: + o.kill() + + def track(self, *objects: Tracked) -> tuple[Tracked, ...]: + self.tracked.update((id(o), o) for o in objects) + return objects + + def untrack(self, *objects: Tracked) -> tuple[Tracked, ...]: + for o in objects: + self.tracked.pop(id(o), None) + return objects + + +@attrs.define(kw_only=True) +class _JavaActivitySubprocess(ActivitySubprocess): + """Java task runner process.""" + + _comm_server: socket.socket + _logs_server: socket.socket + + @classmethod + def start( # type: ignore[override] + cls, + *, + what: TaskInstanceDTO, + dag_rel_path: str | os.PathLike[str], + bundle_info, + logger: FilteringBoundLogger | None = None, + sentry_integration: str = "", + java_executable: str, + jvm_args: list[str], + jars_root: Sequence[pathlib.Path], + main_class: str, + startup_timeout: float = 10.0, + **kwargs, + ) -> Self: + jar = _JarInfo.find(jars_root, main_class) + with _ResourceTracker(timeout=startup_timeout) as tracker: + comm_server, logs_server = tracker.track(_start_server(), _start_server()) + stdout_r, stdout_w = tracker.track(*socket.socketpair()) + stderr_r, stderr_w = tracker.track(*socket.socketpair()) + + proc = subprocess.Popen( + [ + java_executable, + "-classpath", + _calculate_classpath(jars_root), + *jvm_args, + jar.main_class, + # Arguments to MainClass... + "--comm={0[0]}:{0[1]}".format(comm_server.getsockname()), + "--logs={0[0]}:{0[1]}".format(logs_server.getsockname()), + ], + stdout=stdout_w.fileno(), + stderr=stderr_w.fileno(), + ) + tracker.track(proc) + for soc in tracker.untrack(stdout_w, stderr_w): + soc.close() + log.info("Starting subprocess", pid=proc.pid) + + socks, drained = _accept_connections( + {"comm": comm_server, "logs": logs_server}, + {"stdout": stdout_r, "stderr": stderr_r}, + proc, + max_wait=startup_timeout, + ) + tracker.track(*socks.values()) + + self = cls( + id=what.id, + pid=proc.pid, + process=PopenTracker(proc), + process_log=logger or structlog.get_logger(logger_name="task").bind(), + start_time=time.monotonic(), + stdin=socks[comm_server], + subprocess_schema_version=jar.schema_version, + comm_server=comm_server, + logs_server=logs_server, + **kwargs, + ) + self._register_pipe_readers( + *tracker.untrack(stdout_r, stderr_r, socks[comm_server], socks[logs_server]), + data=drained, + ) + self._on_child_started( + ti=what, + dag_rel_path=dag_rel_path, + bundle_info=bundle_info, + sentry_integration=sentry_integration, + ) + + # Untrack everything left. 'self' keeps track of these and close the + # servers when the subprocess exits in 'wait'. + tracker.untrack(comm_server, logs_server, proc) + + return self + + def wait(self) -> int: + code = super().wait() + self._close_unused_sockets(self._comm_server, self._logs_server) + return code + + +def _convert_jars_root( + value: None | os.PathLike[str] | pathlib.Path | list[os.PathLike[str] | pathlib.Path], +) -> list[pathlib.Path]: + if value is None: + return [] + if isinstance(value, (str, os.PathLike, pathlib.Path)): + return [pathlib.Path(value).expanduser()] + return [pathlib.Path(v).expanduser() for v in value] + + +@attrs.define(kw_only=True) +class JavaCoordinator(BaseCoordinator): + """ + Coordinator that launches a JVM subprocess for DAG parsing and task execution. + + Configuration is taken from the ``[sdk] coordinators`` entry that constructs + this instance:: + + { + "name": "jdk-17", + "classpath": "airflow.sdk.coordinators.java.JavaCoordinator", + "kwargs": { + "java_executable": "/usr/lib/jvm/java-17-openjdk/bin/java", + "jvm_args": ["-Xmx1024m"], + "jars_root": ["~/airflow/jars"], + }, + } + + :param java_executable: Path to the ``java`` command (defaults to + ``"java"``, which relies on ``$PATH``). + :param jvm_args: Extra arguments passed to the JVM (e.g. ``["-Xmx512m"]``). + :param jars_root: A list of directories scanned for JAR bundles. + :param main_class: Explicit entry point to execute with *java_executable*. + :param task_startup_timeout: Maximum time the coordinator waits for a task + process to start, in seconds. The default is 10 seconds. + + If *main_class* is not explicitly set, JavaCoordinator scans *jars_root* to + find an executable JAR (one with Main-Class set in its metadata). If more + than one executable JAR is found, it may be nondeterministic which one ends + up being executed. + + A JAR containing metadata *Airflow-Supervisor-Schema-Version* should also be + available to specify the wire schema version. The JAR containing the Java + SDK automatically sets this, so you don't generally need to do anything if + dependency JARs are deployed as-is. If you repackage the dependencies, + however, you must also reproduce the metadata entry in one of the JARs. + + The default *task_startup_timeout* should plenty long enough since a task- + containing JAR is not supposed to consume significant time to perform setup + (it should happen in individual tasks instead). However, if the launch time + has to be so slow, you can increase the timeout to give the JAR more time. + Note that decreasing the value is generally not meaningful since the + coordinator does not need to wait for the full period. + """ + + java_executable: str = "java" + jvm_args: list[str] = attrs.field(factory=list) + jars_root: list[pathlib.Path] = attrs.field( + converter=_convert_jars_root, + validator=attrs.validators.min_len(1), + ) + main_class: str = "" + task_startup_timeout: float = 10.0 + + def execute_task( + self, + *, + what: TaskInstanceDTO, + dag_rel_path: str | os.PathLike[str], + bundle_info: BundleInfo, + client: Client, + logger: FilteringBoundLogger | None = None, + sentry_integration: str = "", + subprocess_logs_to_stdout: bool, + **kwargs, + ) -> BaseCoordinator.ExecutionResult: + process = _JavaActivitySubprocess.start( + what=what, + dag_rel_path=dag_rel_path, + bundle_info=bundle_info, + client=client, + logger=logger, + subprocess_logs_to_stdout=subprocess_logs_to_stdout, + sentry_integration=sentry_integration, + java_executable=self.java_executable, + jvm_args=self.jvm_args, + jars_root=self.jars_root, + main_class=self.main_class, + startup_timeout=self.task_startup_timeout, + ) + exit_code = process.wait() + return self.ExecutionResult(exit_code, process.final_state) diff --git a/task-sdk/src/airflow/sdk/execution_time/comms.py b/task-sdk/src/airflow/sdk/execution_time/comms.py index 7b494cab83592..b9678c79e291b 100644 --- a/task-sdk/src/airflow/sdk/execution_time/comms.py +++ b/task-sdk/src/airflow/sdk/execution_time/comms.py @@ -80,7 +80,6 @@ PreviousTIResponse, PrevSuccessfulDagRunResponse, TaskBreadcrumbsResponse, - TaskInstance, TaskInstanceState, TaskStateResponse, TaskStatesResponse, @@ -98,6 +97,10 @@ XComSequenceSliceResponse, ) from airflow.sdk.exceptions import ErrorType +from airflow.sdk.execution_time.workloads.task import ( + # Pydantic needs this at runtime since we don't model_rebuild() StartupDetails. + TaskInstanceDTO, # noqa: TC001 +) try: from socket import recv_fds @@ -334,7 +337,7 @@ def _get_response(self) -> ReceiveMsgType | None: class StartupDetails(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) - ti: TaskInstance + ti: TaskInstanceDTO dag_rel_path: str bundle_info: BundleInfo start_date: datetime diff --git a/task-sdk/src/airflow/sdk/execution_time/coordinator.py b/task-sdk/src/airflow/sdk/execution_time/coordinator.py new file mode 100644 index 0000000000000..072e9c0b6c739 --- /dev/null +++ b/task-sdk/src/airflow/sdk/execution_time/coordinator.py @@ -0,0 +1,246 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Runtime coordinator for non-Python DAG file processing and task execution. + +Provides :class:`BaseCoordinator`, the base class for +SDK-specific coordinators that bridge subprocess I/O between the +Airflow supervisor and an external-SDK runtime (Java, Go, Rust, etc.), +and :class:`CoordinatorManager`, the registry that loads coordinator +instances from the ``[sdk] coordinators`` configuration. + +The coordinator's :meth:`~BaseCoordinator.run_task_execution` handles the full +lifecycle: + +1. Creates TCP servers for comm and logs channels, and a socketpair for stderr. +2. Calls :meth:`~BaseCoordinator.task_execution_cmd` (provided by the subclass) + to obtain the subprocess command. +3. Spawns the subprocess and accepts TCP connections from it. +4. Runs a selector-based bridge that transparently forwards bytes + between fd 0 (supervisor) and the subprocess comm socket, and + re-emits the subprocess's log and stderr output through structlog. +""" + +from __future__ import annotations + +import contextlib +import functools +from typing import TYPE_CHECKING, Any + +import attrs +import pydantic +import structlog + +from airflow.sdk._shared.module_loading import import_string +from airflow.sdk.configuration import conf + +if TYPE_CHECKING: + from collections.abc import Mapping + from os import PathLike + + from structlog.typing import FilteringBoundLogger + from typing_extensions import Self + + from airflow.sdk.api.client import Client + from airflow.sdk.execution_time.workloads.task import TaskInstanceDTO + +__all__ = [ + "BaseCoordinator", + "CoordinatorManager", + "get_coordinator_manager", + "reset_coordinator_manager", +] + +log = structlog.get_logger(__name__) + + +class BaseCoordinator: + """ + Base coordinator for runtime-specific DAG file processing and task execution. + + Coordinators are instantiated from the ``[sdk] coordinators`` configuration + (see :class:`CoordinatorManager`) — each entry's ``classpath`` is resolved + via :func:`~airflow.sdk._shared.module_loading.import_string` and + constructed with the entry's ``kwargs``. + """ + + @attrs.define(slots=True) + class ExecutionResult: + """Return value for :meth:`BaseCoordinator.execute_task`.""" + + exit_code: Any + final_state: str + + def execute_task( + self, + *, + what: TaskInstanceDTO, + dag_rel_path: str | PathLike[str], + bundle_info, + client: Client, + logger: FilteringBoundLogger | None = None, + sentry_integration: str = "", + subprocess_logs_to_stdout: bool, + **kwargs, + ) -> ExecutionResult: + """ + Start task execution. + + This should execute the task and return a result. + """ + raise NotImplementedError + + +class _CoordinatorSpec(pydantic.BaseModel): + classpath: str + kwargs: dict[str, Any] = pydantic.Field(default_factory=dict) + + +class _PythonCoordinator(BaseCoordinator): + """ + Coordinator implementation to execute Python tasks. + + This is not supposed to be specified by users directly, but the fallback + used by default when nothing is specified. + """ + + def execute_task( + self, + *, + what: TaskInstanceDTO, + dag_rel_path: str | PathLike[str], + bundle_info, + client: Client, + logger: FilteringBoundLogger | None = None, + sentry_integration: str = "", + subprocess_logs_to_stdout: bool, + **kwargs, + ) -> BaseCoordinator.ExecutionResult: + # TODO: Importing this at the top causes circular imports. + # ActivitySubprocess and WatchedSubprocess should be moved out of the + # supervisor, and maybe with additional refactoring to abstract out + # process handling. + from airflow.sdk.execution_time.supervisor import ActivitySubprocess + + process = ActivitySubprocess.start( + dag_rel_path=dag_rel_path, + what=what, + client=client, + logger=logger, + bundle_info=bundle_info, + subprocess_logs_to_stdout=subprocess_logs_to_stdout, + sentry_integration=sentry_integration, + ) + exit_code = process.wait() + return self.ExecutionResult(exit_code, process.final_state) + + +@functools.cache +def _build_python_coordinator() -> _PythonCoordinator: + return _PythonCoordinator() + + +class InvalidCoordinatorError(ValueError): + """Raised for an invalid coordinator configuration.""" + + +@attrs.define(kw_only=True) +class CoordinatorManager: + """ + Registry of coordinator instances loaded from ``[sdk]`` configurations. + + The ``[sdk] coordinators`` value is a JSON object keyed by coordinator name:: + + { + "jdk-11": { + "classpath": "airflow.sdk.coordinators.java.JavaCoordinator", + "kwargs": {"java_executable": "/usr/lib/jvm/jdk-11/bin/java", ...}, + } + } + + The ``classpath`` is resolved via + :func:`~airflow.sdk._shared.module_loading.import_string` and constructed + with ``kwargs`` on first use. A coordinator entry that is never looked up + incurs no startup cost. + + The ``[sdk] queue_to_coordinator`` config maps queue names to a key in the + object, which lets users reuse existing queue assignments to route tasks to + a specific coordinator instance (for example, a ``"legacy-java"`` queue + routed to a JDK 11 coordinator, and a ``"modern-java"`` queue routed to a + JDK 17 coordinator). + + :meta private: + """ + + _coordinator_specs: Mapping[str, _CoordinatorSpec] + _queue_to_coordinator: Mapping[str, str] + + _created_coordinators: dict[str, BaseCoordinator] = attrs.field(init=False, factory=dict) + + @classmethod + def from_config(cls) -> Self: + """Load coordinator specs from configuration without initialization.""" + coordinator_specs = { + k: _CoordinatorSpec.model_validate(v) + for k, v in conf.getjson("sdk", "coordinators", fallback={}).items() + } + queue_to_coordinator = conf.getjson("sdk", "queue_to_coordinator", fallback={}) + for key in queue_to_coordinator.values(): + if key not in coordinator_specs: + raise ValueError(f"[sdk] queue_to_coordinator references invalid coordinator key: {key!r}") + return cls(coordinator_specs=coordinator_specs, queue_to_coordinator=queue_to_coordinator) + + def _find_queue(self, key: str) -> BaseCoordinator: + with contextlib.suppress(KeyError): + return self._created_coordinators[key] + spec = self._coordinator_specs[key] + coordinator = self._created_coordinators[key] = import_string(spec.classpath)(**spec.kwargs) + return coordinator + + def for_queue(self, queue: str) -> BaseCoordinator: + """ + Find the coordinator for *queue*. + + If an entry is not registered, a Python coordinator is returned. + """ + try: + key = self._queue_to_coordinator[queue] + except KeyError: + log.debug("Queue not configured to a coordinator; defaulting to Python", queue=queue) + return _build_python_coordinator() + try: + coordinator = self._find_queue(key) + except KeyError: + raise InvalidCoordinatorError(f"Queue {queue!r} configured to nonexistent coordinator") + except ImportError: + raise InvalidCoordinatorError(f"Cannot import coordinator {key!r}") + except TypeError: + raise InvalidCoordinatorError(f"Cannot instantiate coordinator {key!r}") + log.debug("Coordinator found for queue", coordinator=coordinator, queue=queue) + return coordinator + + +@functools.cache +def get_coordinator_manager() -> CoordinatorManager: + """Return the process-wide :class:`CoordinatorManager`, loaded from config on first use.""" + return CoordinatorManager.from_config() + + +def reset_coordinator_manager() -> None: + """Clear the cached :class:`CoordinatorManager` (test helper).""" + get_coordinator_manager.cache_clear() diff --git a/task-sdk/src/airflow/sdk/execution_time/schema/schema.json b/task-sdk/src/airflow/sdk/execution_time/schema/schema.json index ce6cbb6b73a3a..6f07e7e3cb18c 100644 --- a/task-sdk/src/airflow/sdk/execution_time/schema/schema.json +++ b/task-sdk/src/airflow/sdk/execution_time/schema/schema.json @@ -3582,7 +3582,7 @@ "StartupDetails": { "properties": { "ti": { - "$ref": "#/$defs/TaskInstance" + "$ref": "#/$defs/TaskInstanceDTO" }, "dag_rel_path": { "title": "Dag Rel Path", @@ -3817,6 +3817,164 @@ "title": "TaskCallbackRequest", "type": "object" }, + "TaskInstance": { + "description": "Schema for TaskInstance model with minimal required fields needed for Runtime.", + "properties": { + "id": { + "format": "uuid", + "title": "Id", + "type": "string" + }, + "task_id": { + "title": "Task Id", + "type": "string" + }, + "dag_id": { + "title": "Dag Id", + "type": "string" + }, + "run_id": { + "title": "Run Id", + "type": "string" + }, + "try_number": { + "title": "Try Number", + "type": "integer" + }, + "dag_version_id": { + "format": "uuid", + "title": "Dag Version Id", + "type": "string" + }, + "map_index": { + "default": -1, + "title": "Map Index", + "type": "integer" + }, + "hostname": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Hostname" + }, + "context_carrier": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Context Carrier" + } + }, + "required": [ + "id", + "task_id", + "dag_id", + "run_id", + "try_number", + "dag_version_id" + ], + "title": "TaskInstance", + "type": "object" + }, + "TaskInstanceDTO": { + "description": "Task SDK TaskInstanceDTO.", + "properties": { + "id": { + "format": "uuid", + "title": "Id", + "type": "string" + }, + "dag_version_id": { + "format": "uuid", + "title": "Dag Version Id", + "type": "string" + }, + "task_id": { + "title": "Task Id", + "type": "string" + }, + "dag_id": { + "title": "Dag Id", + "type": "string" + }, + "run_id": { + "title": "Run Id", + "type": "string" + }, + "try_number": { + "title": "Try Number", + "type": "integer" + }, + "map_index": { + "default": -1, + "title": "Map Index", + "type": "integer" + }, + "pool_slots": { + "title": "Pool Slots", + "type": "integer" + }, + "queue": { + "title": "Queue", + "type": "string" + }, + "priority_weight": { + "title": "Priority Weight", + "type": "integer" + }, + "parent_context_carrier": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Parent Context Carrier" + }, + "context_carrier": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Context Carrier" + } + }, + "required": [ + "id", + "dag_version_id", + "task_id", + "dag_id", + "run_id", + "try_number", + "pool_slots", + "queue", + "priority_weight" + ], + "title": "TaskInstanceDTO", + "type": "object" + }, "TaskInstanceState": { "description": "All possible states that a Task Instance can be in.\n\nNote that None is also allowed, so always use this in a type hint with Optional.", "enum": [ @@ -4697,77 +4855,6 @@ "title": "TIRunContext", "type": "object" }, - "TaskInstance": { - "description": "Schema for TaskInstance model with minimal required fields needed for Runtime.", - "properties": { - "id": { - "format": "uuid", - "title": "Id", - "type": "string" - }, - "task_id": { - "title": "Task Id", - "type": "string" - }, - "dag_id": { - "title": "Dag Id", - "type": "string" - }, - "run_id": { - "title": "Run Id", - "type": "string" - }, - "try_number": { - "title": "Try Number", - "type": "integer" - }, - "dag_version_id": { - "format": "uuid", - "title": "Dag Version Id", - "type": "string" - }, - "map_index": { - "default": -1, - "title": "Map Index", - "type": "integer" - }, - "hostname": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Hostname" - }, - "context_carrier": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Context Carrier" - } - }, - "required": [ - "id", - "task_id", - "dag_id", - "run_id", - "try_number", - "dag_version_id" - ], - "title": "TaskInstance", - "type": "object" - }, "VariableResponse": { "additionalProperties": false, "description": "Variable schema for responses with fields that are needed for Runtime.", diff --git a/task-sdk/src/airflow/sdk/execution_time/supervisor.py b/task-sdk/src/airflow/sdk/execution_time/supervisor.py index ac479dcebe69d..781e98ae2f67f 100644 --- a/task-sdk/src/airflow/sdk/execution_time/supervisor.py +++ b/task-sdk/src/airflow/sdk/execution_time/supervisor.py @@ -130,6 +130,7 @@ _RequestFrame, _ResponseFrame, ) +from airflow.sdk.execution_time.coordinator import get_coordinator_manager from airflow.sdk.execution_time.request_handlers import ( handle_delete_variable, handle_delete_xcom, @@ -170,6 +171,7 @@ from airflow.executors.workloads import BundleInfo from airflow.sdk.bases.secrets_backend import BaseSecretsBackend from airflow.sdk.definitions.connection import Connection + from airflow.sdk.execution_time.workloads.task import TaskInstanceDTO from airflow.sdk.types import RuntimeTaskInstanceProtocol as RuntimeTI __all__ = ["ActivitySubprocess", "WatchedSubprocess", "supervise", "supervise_task"] @@ -533,6 +535,66 @@ def _child_exec_main(): _fork_main(child_requests, child_stdout, child_stderr, 0, _subprocess_main) +class NeverRaised(Exception): + """ + An exception type that's never raised. + + This is used in :class:`_ProcessTracker` as a stand-in when an exception is + not supposed to be raised by an implementation. + + :meta private: + """ + + +class ProcessTracker: + """ + Common interface to track a process. + + This protocol works for both :class:`psutil.Process` (used by the forking + model) and :class:`subprocess.Popen` (used by the subprocess model). A + custom class may also be implemented if needed in the future. + + :meta private: + """ + + ProcessNotFound: type[Exception] + TimeoutExpired: type[Exception] + + @property + def pid(self) -> int: + raise NotImplementedError + + def send_signal(self, s: signal.Signals) -> None: + raise NotImplementedError + + def wait(self, timeout: float | None) -> int: + raise NotImplementedError + + +class PsutilTracker(ProcessTracker): + """ + Process tracker backed by :class:`psutil.Process`. + + :meta private: + """ + + ProcessNotFound = psutil.NoSuchProcess + TimeoutExpired = psutil.TimeoutExpired + + def __init__(self, impl: psutil.Process) -> None: + self._impl = impl + + @property + def pid(self) -> int: + return self._impl.pid + + def send_signal(self, s: signal.Signals) -> None: + self._impl.send_signal(s) + + def wait(self, timeout: float | None) -> int: + return self._impl.wait(timeout) + + def _validate_schema_version(instance, _, value) -> str | None: if value is None: return None @@ -559,8 +621,8 @@ class WatchedSubprocess: decoder: ClassVar[TypeAdapter] """The decoder to use for incoming messages from the child process.""" - _process: psutil.Process = attrs.field(repr=False) - """File descriptor for request handling.""" + _process: ProcessTracker = attrs.field(repr=False) + """Handler to track the process.""" _subprocess_schema_version: str | None = attrs.field(default=None, validator=_validate_schema_version) """ @@ -678,22 +740,31 @@ def start( proc = cls( pid=pid, stdin=read_requests, - process=psutil.Process(pid), + process=PsutilTracker(psutil.Process(pid)), process_log=logger, start_time=time.monotonic(), **constructor_kwargs, ) proc._register_pipe_readers( - stdout=read_stdout, - stderr=read_stderr, - requests=read_requests, - logs=read_logs, + read_stdout, + read_stderr, + read_requests, + read_logs, + data={}, ) return proc - def _register_pipe_readers(self, stdout: socket, stderr: socket, requests: socket, logs: socket): + def _register_pipe_readers( + self, + stdout: socket, + stderr: socket, + requests: socket, + logs: socket, + *, + data: dict[socket, bytes], + ): """Register handlers for subprocess communication channels.""" # self.selector is a way of registering a handler/callback to be called when the given IO channel has # activity to read on (https://www.man7.org/linux/man-pages/man2/select.2.html etc, but better @@ -713,12 +784,19 @@ def _register_pipe_readers(self, stdout: socket, stderr: socket, requests: socke target_loggers = self._get_target_loggers() self.selector.register( - stdout, selectors.EVENT_READ, self._create_log_forwarder(target_loggers, "task.stdout") + stdout, + selectors.EVENT_READ, + self._create_log_forwarder(target_loggers, "task.stdout", data=data.get(stdout, b"")), ) self.selector.register( stderr, selectors.EVENT_READ, - self._create_log_forwarder(target_loggers, "task.stderr", log_level=logging.ERROR), + self._create_log_forwarder( + target_loggers, + "task.stderr", + data=data.get(stderr, b""), + log_level=logging.ERROR, + ), ) self.selector.register( logs, @@ -740,7 +818,14 @@ def _get_target_loggers(self) -> tuple[FilteringBoundLogger, ...]: target_loggers += (log,) return target_loggers - def _create_log_forwarder(self, loggers, name, log_level=logging.INFO) -> Callable[[socket], bool]: + def _create_log_forwarder( + self, + loggers: tuple[FilteringBoundLogger, ...], + name: str, + *, + data: bytes, + log_level: int = logging.INFO, + ) -> Callable[[socket], bool]: """Create a socket handler that forwards logs to a logger.""" loggers = tuple( reconfigure_logger( @@ -750,7 +835,9 @@ def _create_log_forwarder(self, loggers, name, log_level=logging.INFO) -> Callab for log in loggers ) return make_buffered_socket_reader( - forward_to_log(loggers, logger=name, level=log_level), on_close=self._on_socket_closed + forward_to_log(loggers, logger=name, level=log_level), + data=data, + on_close=self._on_socket_closed, ) def _on_socket_closed(self, sock: socket): @@ -942,7 +1029,7 @@ def kill( if sig != escalation_path[-1]: msg += "; escalating" log.warning(msg, pid=self.pid, signal=sig.name) - except psutil.NoSuchProcess: + except self._process.ProcessNotFound: log.debug("Process already terminated", pid=self.pid) self._exit_code = -1 return @@ -1020,7 +1107,7 @@ def _check_subprocess_exit( try: self._exit_code = self._process.wait(timeout=0) - except psutil.TimeoutExpired: + except self._process.TimeoutExpired: if raise_on_timeout: raise else: @@ -1228,7 +1315,7 @@ class ActivitySubprocess(WatchedSubprocess): def start( # type: ignore[override] cls, *, - what: TaskInstance, + what: TaskInstanceDTO, dag_rel_path: str | os.PathLike[str], bundle_info, client: Client, @@ -1257,7 +1344,7 @@ def start( # type: ignore[override] def _on_child_started( self, *, - ti: TaskInstance, + ti: TaskInstanceDTO, dag_rel_path: str | os.PathLike[str], bundle_info, sentry_integration: str, @@ -2074,15 +2161,29 @@ def run_task_in_process(ti: TaskInstance, task) -> TaskRunResult: # to a (sync) generator def make_buffered_socket_reader( gen: Generator[None, bytes | bytearray, None], + *, + data: bytes = b"", on_close: Callable[[socket], None], buffer_size: int = 4096, ): - buffer = bytearray() # This will hold our accumulated binary data + buffer = bytearray(data) # This will hold our accumulated binary data read_buffer = bytearray(buffer_size) # Temporary buffer for each read # We need to start up the generator to get it to the point it's at waiting on the yield next(gen) + def process(buffer: bytearray) -> bytearray: + # We could have read multiple lines in one go, yield them all + while (newline_pos := buffer.find(b"\n")) != -1: + line = buffer[: newline_pos + 1] + gen.send(line) + buffer = buffer[newline_pos + 1 :] # Update the buffer with remaining data + return buffer + + # Flush any complete lines that arrived before the selector was running. + with contextlib.suppress(StopIteration): + buffer = process(buffer) + def cb(sock: socket): nonlocal buffer, read_buffer # Read up to `buffer_size` bytes of data from the socket @@ -2096,15 +2197,10 @@ def cb(sock: socket): return False buffer.extend(read_buffer[:n_received]) - - # We could have read multiple lines in one go, yield them all - while (newline_pos := buffer.find(b"\n")) != -1: - line = buffer[: newline_pos + 1] - try: - gen.send(line) - except StopIteration: - return False - buffer = buffer[newline_pos + 1 :] # Update the buffer with remaining data + try: + buffer = process(buffer) + except StopIteration: + return False return True @@ -2285,7 +2381,7 @@ def _configure_logging(log_path: str, client: Client) -> tuple[FilteringBoundLog def supervise_task( *, - ti: TaskInstance, + ti: TaskInstanceDTO, bundle_info: BundleInfo, dag_rel_path: str | os.PathLike[str], token: str, @@ -2312,6 +2408,8 @@ def supervise_task( path to a callable to initialize it (empty means no integration). :return: Exit code of the process. :raises ValueError: If server URL is empty or invalid. + :raises InvalidCoordinatorError: If the coordinator for the task is not + configured correctly. """ _make_process_nondumpable() @@ -2352,6 +2450,17 @@ def supervise_task( if not dag_rel_path: raise ValueError("dag_path is required") + try: + coordinator = get_coordinator_manager().for_queue(ti.queue) + except: + log.exception( + "Failed to initialize coordinator for task", + dag_id=ti.dag_id, + task_id=ti.task_id, + queue=ti.queue, + ) + raise + with _ensure_client(server, token, client=client, dry_run=dry_run) as client: start = time.monotonic() @@ -2371,27 +2480,25 @@ def supervise_task( reset_secrets_masker() try: - process = ActivitySubprocess.start( - dag_rel_path=dag_rel_path, + result = coordinator.execute_task( what=ti, + dag_rel_path=dag_rel_path, + bundle_info=bundle_info, client=client, logger=logger, - bundle_info=bundle_info, - subprocess_logs_to_stdout=subprocess_logs_to_stdout, sentry_integration=sentry_integration, + subprocess_logs_to_stdout=subprocess_logs_to_stdout, ) - - exit_code = process.wait() end = time.monotonic() log.info( "Workload finished", workload_type="ExecuteTask", workload_id=str(ti.id), - exit_code=exit_code, + exit_code=result.exit_code, duration=end - start, - final_state=process.final_state, + final_state=result.final_state, ) - return exit_code + return result.exit_code finally: if log_path and log_file_descriptor: log_file_descriptor.close() diff --git a/task-sdk/tests/task_sdk/coordinators/__init__.py b/task-sdk/tests/task_sdk/coordinators/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/task-sdk/tests/task_sdk/coordinators/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/task-sdk/tests/task_sdk/coordinators/java/__init__.py b/task-sdk/tests/task_sdk/coordinators/java/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/task-sdk/tests/task_sdk/coordinators/java/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/task-sdk/tests/task_sdk/coordinators/java/test_coordinator.py b/task-sdk/tests/task_sdk/coordinators/java/test_coordinator.py new file mode 100644 index 0000000000000..58014d76e32a2 --- /dev/null +++ b/task-sdk/tests/task_sdk/coordinators/java/test_coordinator.py @@ -0,0 +1,759 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import contextlib +import os +import pathlib +import re +import socket +import subprocess +import threading +import time +import zipfile +from unittest.mock import ANY, MagicMock, call, patch + +import pytest +from uuid6 import uuid7 + +from airflow.sdk.coordinators.java.coordinator import ( + JavaCoordinator, + _accept_connections, + _calculate_classpath, + _JarInfo, + _JavaActivitySubprocess, + _ResourceTracker, + _start_server, +) +from airflow.sdk.execution_time.coordinator import BaseCoordinator +from airflow.sdk.execution_time.supervisor import ActivitySubprocess +from airflow.sdk.execution_time.workloads.task import TaskInstanceDTO + +from tests_common.test_utils.version_compat import AIRFLOW_V_3_3_PLUS + +if not AIRFLOW_V_3_3_PLUS: + pytest.skip("Coordinator is only compatible with Airflow >= 3.3.0", allow_module_level=True) + +METADATA_YAML_PATH = "META-INF/airflow-metadata.yaml" +DAG_CODE_PATH = "dag_source.py" +TEST_MAIN_CLASS = "com.example.MyBundle" + + +def _make_ti(dag_id: str = "test_dag", queue: str = "java") -> TaskInstanceDTO: + return TaskInstanceDTO( + id=uuid7(), + dag_version_id=uuid7(), + task_id="task_1", + dag_id=dag_id, + run_id="run_1", + try_number=1, + map_index=-1, + pool_slots=1, + queue=queue, + priority_weight=1, + ) + + +def _make_jar( + path: pathlib.Path, + *, + main_class: str | None = "com.example.Main", + schema_version: str | None = None, +) -> pathlib.Path: + """Write a minimal JAR with (optionally) a Main-Class manifest entry.""" + lines = ["Manifest-Version: 1.0"] + if main_class: + lines.append(f"Main-Class: {main_class}") + if schema_version: + lines.append(f"Airflow-Supervisor-Schema-Version: {schema_version}") + manifest = "\n".join(lines) + "\n\n" + with zipfile.ZipFile(path, "w") as zf: + zf.writestr("META-INF/MANIFEST.MF", manifest) + return path + + +class TestStartServer: + def test_returns_listening_socket(self): + server = _start_server() + try: + _, port = server.getsockname() + finally: + server.close() + assert port > 0 + + def test_two_calls_return_different_ports(self): + s1 = _start_server() + s2 = _start_server() + try: + _, port1 = s1.getsockname() + _, port2 = s2.getsockname() + finally: + s1.close() + s2.close() + assert port1 != port2 + + def test_accepts_connection(self): + conn = client = None + server = _start_server() + try: + _, port = server.getsockname() + client = socket.socket() + client.connect(("127.0.0.1", port)) + conn, _ = server.accept() + conn.sendall(b"ping") + received = client.recv(4) + finally: + if conn: + conn.close() + if client: + client.close() + server.close() + assert received == b"ping" + + +class TestCalculateClasspath: + def test_single_jar(self, tmp_path): + jar = tmp_path.joinpath("app.jar") + jar.write_bytes(b"") + result = _calculate_classpath([tmp_path]) + assert result == jar.as_posix() + + def test_multiple_jars_all_included(self, tmp_path): + tmp_path.joinpath("a.jar").write_bytes(b"") + tmp_path.joinpath("b.jar").write_bytes(b"") + tmp_path.joinpath("c.jar").write_bytes(b"") + result = _calculate_classpath([tmp_path]) + entries = set(result.split(os.pathsep)) + assert entries == { + tmp_path.joinpath("a.jar").as_posix(), + tmp_path.joinpath("b.jar").as_posix(), + tmp_path.joinpath("c.jar").as_posix(), + } + + def test_non_jar_files_excluded(self, tmp_path): + jar = tmp_path.joinpath("app.jar") + jar.write_bytes(b"") + tmp_path.joinpath("readme.txt").write_bytes(b"") + tmp_path.joinpath("config.yaml").write_bytes(b"") + result = _calculate_classpath([tmp_path]) + assert result == jar.as_posix() + + def test_empty_directory_returns_empty_string(self, tmp_path): + result = _calculate_classpath([tmp_path]) + assert result == "" + + +class TestMainJar: + def test_returns_main_class_from_jar(self, tmp_path): + _make_jar(tmp_path.joinpath("app.jar"), main_class="com.example.Main", schema_version="2026-06-16") + assert _JarInfo.find([tmp_path], "") == _JarInfo("com.example.Main", "2026-06-16") + + def test_no_jars_raises_file_not_found(self, tmp_path): + with pytest.raises(FileNotFoundError, match=re.escape(str(tmp_path.resolve()))): + _JarInfo.find([tmp_path], "") + + def test_jar_without_main_class_not_returned(self, tmp_path): + _make_jar(tmp_path.joinpath("app.jar"), main_class=None) + with pytest.raises(FileNotFoundError): + _JarInfo.find([tmp_path], "") + + def test_jar_with_main_class_but_no_schema_version_raises(self, tmp_path): + """A JAR with Main-Class but no Airflow-Supervisor-Schema-Version must raise ValueError.""" + _make_jar(tmp_path.joinpath("app.jar"), main_class="com.example.Main") + with pytest.raises(FileNotFoundError, match="Airflow-Supervisor-Schema-Version"): + _JarInfo.find([tmp_path], "") + + def test_non_jar_files_skipped(self, tmp_path): + tmp_path.joinpath("readme.txt").write_bytes(b"not a jar") + _make_jar(tmp_path.joinpath("app.jar"), main_class="com.example.Main", schema_version="2026-06-16") + assert _JarInfo.find([tmp_path], "") == _JarInfo("com.example.Main", "2026-06-16") + + def test_first_jar_missing_main_class_falls_through_to_second(self, tmp_path): + # Alphabetically: a.jar (no Main-Class), b.jar (has Main-Class). + _make_jar(tmp_path.joinpath("a.jar"), main_class=None) + _make_jar(tmp_path.joinpath("b.jar"), main_class="com.example.Fallback", schema_version="2026-06-16") + assert _JarInfo.find([tmp_path], "") == _JarInfo("com.example.Fallback", "2026-06-16") + + def test_fully_qualified_class_name_preserved(self, tmp_path): + _make_jar( + tmp_path.joinpath("app.jar"), + main_class="org.apache.airflow.sdk.java.TaskRunner", + schema_version="2026-06-16", + ) + assert _JarInfo.find([tmp_path], "") == _JarInfo( + main_class="org.apache.airflow.sdk.java.TaskRunner", + schema_version="2026-06-16", + ) + + def test_find_by_explicit_main_class(self, tmp_path): + """When a main_class filter is given, only the matching JAR is returned.""" + _make_jar(tmp_path.joinpath("a.jar"), main_class="com.example.Alpha", schema_version="2026-06-16") + _make_jar(tmp_path.joinpath("b.jar"), main_class="com.example.Beta", schema_version="2026-06-16") + result = _JarInfo.find([tmp_path], "com.example.Beta") + assert result.main_class == "com.example.Beta" + + def test_find_by_explicit_main_class_not_present_raises(self, tmp_path): + """When no JAR matches the main_class filter, FileNotFoundError is raised.""" + _make_jar(tmp_path.joinpath("app.jar"), main_class="com.example.Main", schema_version="2026-06-16") + with pytest.raises(FileNotFoundError, match="com.example.Missing"): + _JarInfo.find([tmp_path], "com.example.Missing") + + +class TestAcceptConnections: + def _connect_after_delay(self, addr: tuple[str, int], delay: float = 0.0) -> None: + def _connect(): + time.sleep(delay) + c = socket.socket() + with contextlib.suppress(OSError): # Server may already be closed in teardown. + c.connect(addr) + + threading.Thread(target=_connect, daemon=True).start() + + def test_accepts_single_server(self): + server = _start_server() + _, port = server.getsockname() + self._connect_after_delay(("127.0.0.1", port)) + + mock_proc = MagicMock(spec=subprocess.Popen) + mock_proc.poll.return_value = None + + try: + accepted, _ = _accept_connections({"comm": server}, {}, mock_proc) + assert server in accepted + accepted[server].close() + finally: + server.close() + + def test_accepts_multiple_servers(self): + comm_server = _start_server() + logs_server = _start_server() + _, comm_port = comm_server.getsockname() + _, logs_port = logs_server.getsockname() + + self._connect_after_delay(("127.0.0.1", comm_port)) + self._connect_after_delay(("127.0.0.1", logs_port)) + + mock_proc = MagicMock(spec=subprocess.Popen) + mock_proc.poll.return_value = None + + try: + accepted, _ = _accept_connections({"comm": comm_server, "logs": logs_server}, {}, mock_proc) + assert set(accepted) == {comm_server, logs_server} + for sock in accepted.values(): + sock.close() + finally: + comm_server.close() + logs_server.close() + + def test_raises_timeout_when_no_connection(self): + server = _start_server() + mock_proc = MagicMock(spec=subprocess.Popen) + mock_proc.poll.return_value = None + try: + with pytest.raises(TimeoutError, match="did not connect within timeout"): + _accept_connections({"comm": server}, {}, mock_proc, max_wait=0.05) + finally: + server.close() + + def test_raises_runtime_error_if_process_exits_before_connecting(self): + server = _start_server() + mock_proc = MagicMock(spec=subprocess.Popen) + # proc has already exited + mock_proc.poll.return_value = 1 + mock_proc.returncode = 1 + try: + with pytest.raises(RuntimeError, match="process exited with 1"): + _accept_connections({"comm": server}, {}, mock_proc) + finally: + server.close() + + def test_returned_sockets_are_connected(self): + """Accepted sockets should be real, usable connections.""" + server = _start_server() + _, port = server.getsockname() + + client = socket.socket() + client.connect(("127.0.0.1", port)) + + mock_proc = MagicMock(spec=subprocess.Popen) + mock_proc.poll.return_value = None + + try: + accepted, _ = _accept_connections({"comm": server}, {}, mock_proc) + accepted[server].sendall(b"hello") + assert client.recv(5) == b"hello" + accepted[server].close() + client.close() + finally: + server.close() + + def test_empty_drains_returns_empty_drained_dict(self): + """When drains={} the returned drained mapping must also be empty.""" + server = _start_server() + _, port = server.getsockname() + self._connect_after_delay(("127.0.0.1", port)) + mock_proc = MagicMock(spec=subprocess.Popen) + mock_proc.poll.return_value = None + try: + _, drained = _accept_connections({"comm": server}, {}, mock_proc) + assert drained == {} + finally: + server.close() + + def test_drain_socket_present_in_drained_dict(self): + """The drained dict must be keyed by the drain socket objects.""" + server = _start_server() + drain_r, drain_w = socket.socketpair() + _, port = server.getsockname() + self._connect_after_delay(("127.0.0.1", port)) + mock_proc = MagicMock(spec=subprocess.Popen) + mock_proc.poll.return_value = None + try: + _, drained = _accept_connections({"comm": server}, {"stdout": drain_r}, mock_proc) + assert drain_r in drained + finally: + server.close() + drain_r.close() + drain_w.close() + + def test_bytes_written_to_drain_socket_are_returned(self): + """Bytes written to a drain socket before the connection is accepted + must be captured and returned in the drained dict.""" + server = _start_server() + drain_r, drain_w = socket.socketpair() + _, port = server.getsockname() + + drain_w.sendall(b"early output\n") + self._connect_after_delay(("127.0.0.1", port), delay=0.05) + + mock_proc = MagicMock(spec=subprocess.Popen) + mock_proc.poll.return_value = None + try: + _, drained = _accept_connections({"comm": server}, {"stdout": drain_r}, mock_proc) + assert drained[drain_r] == b"early output\n" + finally: + server.close() + drain_r.close() + drain_w.close() + + def test_accepted_dict_keyed_by_server_socket_object(self): + """The returned accepted mapping must use server socket objects as keys, + not the string names passed in the servers dict.""" + server = _start_server() + _, port = server.getsockname() + self._connect_after_delay(("127.0.0.1", port)) + mock_proc = MagicMock(spec=subprocess.Popen) + mock_proc.poll.return_value = None + try: + accepted, _ = _accept_connections({"comm": server}, {}, mock_proc) + # Key must be the socket object itself, not the string "comm" + assert server in accepted + assert "comm" not in accepted + accepted[server].close() + finally: + server.close() + + +class TestResourceTracker: + """Unit tests for the _ResourceTracker context manager introduced in this PR. + + _ResourceTracker tracks sockets and Popen objects and ensures they are + closed/terminated on context-manager exit, unless explicitly untracked + beforehand. + """ + + def test_track_returns_passed_objects_as_tuple(self): + tracker = _ResourceTracker(timeout=0.1) + sock = MagicMock(spec=socket.socket) + result = tracker.track(sock) + assert result == (sock,) + + def test_track_multiple_objects_returns_all(self): + tracker = _ResourceTracker(timeout=0.1) + sock1 = MagicMock(spec=socket.socket) + sock2 = MagicMock(spec=socket.socket) + result = tracker.track(sock1, sock2) + assert set(result) == {sock1, sock2} + + def test_untrack_returns_objects(self): + tracker = _ResourceTracker(timeout=0.1) + sock = MagicMock(spec=socket.socket) + tracker.track(sock) + result = tracker.untrack(sock) + assert result == (sock,) + + def test_context_manager_closes_tracked_socket_on_exit(self): + sock = MagicMock(spec=socket.socket) + with _ResourceTracker(timeout=0.1) as tracker: + tracker.track(sock) + sock.close.assert_called_once() + + def test_context_manager_terminates_tracked_popen_on_exit(self): + proc = MagicMock(spec=subprocess.Popen) + with _ResourceTracker(timeout=0.1) as tracker: + tracker.track(proc) + proc.terminate.assert_called_once() + + def test_untracked_socket_not_closed_on_exit(self): + sock = MagicMock(spec=socket.socket) + with _ResourceTracker(timeout=0.1) as tracker: + tracker.track(sock) + tracker.untrack(sock) + sock.close.assert_not_called() + + def test_only_remaining_tracked_objects_cleaned_up(self): + """After untracking one socket the other must still be closed.""" + sock_keep = MagicMock(spec=socket.socket) + sock_release = MagicMock(spec=socket.socket) + with _ResourceTracker(timeout=0.1) as tracker: + tracker.track(sock_keep, sock_release) + tracker.untrack(sock_release) + sock_keep.close.assert_called_once() + sock_release.close.assert_not_called() + + def test_untrack_unknown_object_does_not_raise(self): + sock = MagicMock(spec=socket.socket) + tracker = _ResourceTracker(timeout=0.1) + # Untracking something never tracked must be a no-op, not an error + tracker.untrack(sock) + + +class TestJavaCoordinatorAttributes: + def test_default_kwargs(self): + coordinator = JavaCoordinator(jars_root="/airflow/java-bundles") + assert coordinator.java_executable == "java" + assert coordinator.jvm_args == [] + assert coordinator.jars_root == [pathlib.Path("/airflow/java-bundles")] + + def test_custom_kwargs(self): + coordinator = JavaCoordinator( + java_executable="/opt/java/bin/java", + jvm_args=["-Xmx512m", "-Xms256m"], + jars_root=["/airflow/java-bundles"], + ) + assert coordinator.java_executable == "/opt/java/bin/java" + assert coordinator.jvm_args == ["-Xmx512m", "-Xms256m"] + assert coordinator.jars_root == [pathlib.Path("/airflow/java-bundles")] + + +@pytest.fixture +def jars_root(tmp_path): + _make_jar(tmp_path.joinpath("app.jar"), main_class="com.example.TaskRunner", schema_version="2026-06-16") + return tmp_path + + +@pytest.fixture +def mock_client(make_ti_context): + client = MagicMock() + client.task_instances.start.return_value = make_ti_context() + return client + + +class TestJavaCoordinatorExecuteTask: + def _captured_popen_cmd( + self, + jars_root: pathlib.Path, + mock_client, + *, + java_executable: str = "java", + jvm_args: list[str] | None = None, + ) -> list[str]: + """Run execute_task with mocked subprocess and return the command list.""" + ti = _make_ti() + coordinator = JavaCoordinator( + java_executable=java_executable, + jvm_args=jvm_args or [], + jars_root=jars_root, + ) + + mock_proc = MagicMock(spec=subprocess.Popen) + mock_proc.pid = 12345 + comm_sock = MagicMock(spec=socket.socket) + logs_sock = MagicMock(spec=socket.socket) + popen_calls: list = [] + + def capture_popen(cmd, **kwargs): + popen_calls.append(cmd) + return mock_proc + + with ( + patch( + "airflow.sdk.coordinators.java.coordinator.subprocess.Popen", + side_effect=capture_popen, + ), + patch( + "airflow.sdk.coordinators.java.coordinator._accept_connections", + side_effect=lambda servers, drains, proc, **kw: ( + {servers["comm"]: comm_sock, servers["logs"]: logs_sock}, + {soc: b"" for soc in drains.values()}, + ), + ), + patch.object(ActivitySubprocess, "_register_pipe_readers"), + patch.object(ActivitySubprocess, "_on_child_started"), + patch.object(ActivitySubprocess, "wait", return_value=0), + patch("psutil.Process"), + ): + coordinator.execute_task( + what=ti, + dag_rel_path="dags/test.jar", + bundle_info=MagicMock(), + client=mock_client, + subprocess_logs_to_stdout=False, + ) + + assert popen_calls, "subprocess.Popen was not called" + return popen_calls[0] + + def test_java_executable_is_first_arg(self, jars_root, mock_client): + cmd = self._captured_popen_cmd( + jars_root, mock_client, java_executable="/usr/lib/jvm/java-17/bin/java" + ) + assert cmd[0] == "/usr/lib/jvm/java-17/bin/java" + + def test_classpath_flag_and_value_present(self, jars_root, mock_client): + cmd = self._captured_popen_cmd(jars_root, mock_client) + assert "-classpath" in cmd + cp_idx = cmd.index("-classpath") + classpath = cmd[cp_idx + 1] + assert jars_root.joinpath("app.jar").as_posix() in classpath + + def test_main_class_present(self, jars_root, mock_client): + cmd = self._captured_popen_cmd(jars_root, mock_client) + assert "com.example.TaskRunner" in cmd + + def test_comm_and_logs_args_present(self, jars_root, mock_client): + cmd = self._captured_popen_cmd(jars_root, mock_client) + comm_args = [a for a in cmd if a.startswith("--comm=")] + logs_args = [a for a in cmd if a.startswith("--logs=")] + assert len(comm_args) == 1 + assert len(logs_args) == 1 + + def test_comm_and_logs_contain_port(self, jars_root, mock_client): + cmd = self._captured_popen_cmd(jars_root, mock_client) + comm_arg = next(a for a in cmd if a.startswith("--comm=")) + logs_arg = next(a for a in cmd if a.startswith("--logs=")) + # format is host:port + assert ":" in comm_arg.split("=", 1)[1] + assert ":" in logs_arg.split("=", 1)[1] + + def test_jvm_args_inserted_before_main_class(self, jars_root, mock_client): + cmd = self._captured_popen_cmd(jars_root, mock_client, jvm_args=["-Xmx512m", "-Dsome.prop=value"]) + main_idx = cmd.index("com.example.TaskRunner") + for jvm_arg in ["-Xmx512m", "-Dsome.prop=value"]: + assert jvm_arg in cmd + assert cmd.index(jvm_arg) < main_idx + + def test_comm_and_logs_after_main_class(self, jars_root, mock_client): + cmd = self._captured_popen_cmd(jars_root, mock_client) + main_idx = cmd.index("com.example.TaskRunner") + comm_idx = next(i for i, a in enumerate(cmd) if a.startswith("--comm=")) + logs_idx = next(i for i, a in enumerate(cmd) if a.startswith("--logs=")) + assert comm_idx > main_idx + assert logs_idx > main_idx + + def test_returns_execution_result(self, jars_root, mock_client): + ti = _make_ti() + coordinator = JavaCoordinator(jars_root=jars_root) + + mock_proc = MagicMock(spec=subprocess.Popen) + mock_proc.pid = 99999 + comm_sock = MagicMock(spec=socket.socket) + logs_sock = MagicMock(spec=socket.socket) + + with ( + patch("subprocess.Popen", return_value=mock_proc), + patch( + "airflow.sdk.coordinators.java.coordinator._accept_connections", + side_effect=lambda servers, drains, proc, **kw: ( + {servers["comm"]: comm_sock, servers["logs"]: logs_sock}, + {soc: b"" for soc in drains.values()}, + ), + ), + patch.object(ActivitySubprocess, "_register_pipe_readers"), + patch.object(ActivitySubprocess, "_on_child_started"), + patch.object(ActivitySubprocess, "wait", return_value=0), + patch("psutil.Process"), + ): + result = coordinator.execute_task( + what=ti, + dag_rel_path="dags/test.jar", + bundle_info=MagicMock(), + client=mock_client, + subprocess_logs_to_stdout=False, + ) + + assert isinstance(result, BaseCoordinator.ExecutionResult) + assert result.exit_code == 0 + + +class TestJavaActivitySubprocessStart: + """ + Unit tests for _JavaActivitySubprocess.start(). + + These tests mock subprocess.Popen and _accept_connections to verify that + start() wires up the right command and stores the right sockets, + without requiring a real Java runtime. + """ + + def _start_with_mocks( + self, + jars_root: pathlib.Path, + mock_client, + *, + java_executable: str = "java", + jvm_args: list[str] | None = None, + ti: TaskInstanceDTO | None = None, + ): + """Call _JavaActivitySubprocess.start() with all subprocess machinery mocked out.""" + ti = ti or _make_ti() + + mock_proc = MagicMock(spec=subprocess.Popen) + mock_proc.pid = 12345 + comm_sock = MagicMock(spec=socket.socket) + logs_sock = MagicMock(spec=socket.socket) + + with ( + patch( + "airflow.sdk.coordinators.java.coordinator.subprocess.Popen", + return_value=mock_proc, + ) as popen_mock, + patch( + "airflow.sdk.coordinators.java.coordinator._accept_connections", + side_effect=lambda servers, drains, proc, **kw: ( + {servers["comm"]: comm_sock, servers["logs"]: logs_sock}, + {soc: b"" for soc in drains.values()}, + ), + ), + patch.object(ActivitySubprocess, "_register_pipe_readers"), + patch.object(ActivitySubprocess, "_on_child_started"), + patch("psutil.Process"), + ): + proc = _JavaActivitySubprocess.start( + what=ti, + dag_rel_path="dags/test.jar", + bundle_info=MagicMock(), + client=mock_client, + java_executable=java_executable, + jvm_args=jvm_args or [], + jars_root=[jars_root], + main_class="", + subprocess_logs_to_stdout=False, + ) + + return proc, popen_mock + + def test_stdin_is_comm_socket(self, jars_root, mock_client): + """stdin (used by send_msg) must be the accepted comm socket.""" + ti = _make_ti() + comm_sock = MagicMock(spec=socket.socket) + logs_sock = MagicMock(spec=socket.socket) + + with ( + patch("airflow.sdk.coordinators.java.coordinator.subprocess.Popen") as popen_mock, + patch( + "airflow.sdk.coordinators.java.coordinator._accept_connections", + side_effect=lambda servers, drains, proc, **kw: ( + {servers["comm"]: comm_sock, servers["logs"]: logs_sock}, + {soc: b"" for soc in drains.values()}, + ), + ), + patch.object(ActivitySubprocess, "_register_pipe_readers"), + patch.object(ActivitySubprocess, "_on_child_started"), + patch("psutil.Process"), + ): + popen_mock.return_value.pid = 12345 + proc = _JavaActivitySubprocess.start( + what=ti, + dag_rel_path="dags/test.jar", + bundle_info=MagicMock(), + client=MagicMock(), + java_executable="java", + jvm_args=[], + jars_root=[jars_root], + main_class="", + subprocess_logs_to_stdout=False, + ) + + assert proc.stdin is comm_sock + + def test_pid_taken_from_popen(self, jars_root, mock_client): + proc, _ = self._start_with_mocks(jars_root, mock_client) + assert proc.pid == 12345 + + def test_on_child_started_called(self, jars_root, mock_client): + ti = _make_ti() + with ( + patch("airflow.sdk.coordinators.java.coordinator.subprocess.Popen") as popen_mock, + patch( + "airflow.sdk.coordinators.java.coordinator._accept_connections", + side_effect=lambda servers, drains, proc, **kw: ( + {soc: MagicMock(spec=socket.socket) for soc in servers.values()}, + {soc: b"" for soc in drains.values()}, + ), + ), + patch.object(ActivitySubprocess, "_register_pipe_readers"), + patch.object(ActivitySubprocess, "_on_child_started") as mock_on_started, + patch("psutil.Process"), + ): + popen_mock.return_value.pid = 12345 + _JavaActivitySubprocess.start( + what=ti, + dag_rel_path="dags/test.jar", + bundle_info=MagicMock(), + client=mock_client, + java_executable="java", + jvm_args=[], + jars_root=[jars_root], + main_class="", + subprocess_logs_to_stdout=False, + ) + + mock_on_started.assert_called_once() + kwargs = mock_on_started.call_args.kwargs + assert kwargs["ti"] is ti + assert kwargs["dag_rel_path"] == "dags/test.jar" + + def test_register_pipe_readers_called_with_four_sockets(self, jars_root, mock_client): + """Both socketpair read-ends and both TCP sockets must be registered, with a data kwarg.""" + with ( + patch("airflow.sdk.coordinators.java.coordinator.subprocess.Popen") as popen_mock, + patch( + "airflow.sdk.coordinators.java.coordinator._accept_connections", + side_effect=lambda servers, drains, proc, **kw: ( + {soc: MagicMock(spec=socket.socket) for soc in servers.values()}, + {soc: b"" for soc in drains.values()}, + ), + ), + patch.object(ActivitySubprocess, "_register_pipe_readers") as mock_register, + patch.object(ActivitySubprocess, "_on_child_started"), + patch("psutil.Process"), + ): + popen_mock.return_value.pid = 12345 + _JavaActivitySubprocess.start( + what=_make_ti(), + dag_rel_path="dags/test.jar", + bundle_info=MagicMock(), + client=mock_client, + java_executable="java", + jvm_args=[], + jars_root=[jars_root], + main_class="", + subprocess_logs_to_stdout=False, + ) + assert mock_register.mock_calls == [call(ANY, ANY, ANY, ANY, data=ANY)] diff --git a/task-sdk/tests/task_sdk/definitions/test_mappedoperator.py b/task-sdk/tests/task_sdk/definitions/test_mappedoperator.py index 2b34fac6ea0f9..93c5cc19aed47 100644 --- a/task-sdk/tests/task_sdk/definitions/test_mappedoperator.py +++ b/task-sdk/tests/task_sdk/definitions/test_mappedoperator.py @@ -680,14 +680,14 @@ def mock_comms_response(msg): ("tg.t2", 0): ["a", "b"], ("tg.t2", 1): [4], ("tg.t2", 2): ["z"], - ("t3", None): [["a", "b"], [4], ["z"]], + ("t3", -1): [["a", "b"], [4], ["z"]], } # We hard-code the number of expansions here as the server is in charge of that. expansion_per_task_id = { "tg.t1": range(3), "tg.t2": range(3), - "t3": [None], + "t3": [-1], } for task in dag.tasks: for map_index in expansion_per_task_id[task.task_id]: diff --git a/task-sdk/tests/task_sdk/definitions/test_xcom_arg.py b/task-sdk/tests/task_sdk/definitions/test_xcom_arg.py index af487851b07cb..6014d26f2208b 100644 --- a/task-sdk/tests/task_sdk/definitions/test_xcom_arg.py +++ b/task-sdk/tests/task_sdk/definitions/test_xcom_arg.py @@ -344,7 +344,7 @@ def xcom_get(msg): mock_supervisor_comms.send.side_effect = xcom_get # Run "pull_one" and "pull_all". - assert run_ti(dag, "pull_all", None) == TaskInstanceState.SUCCESS + assert run_ti(dag, "pull_all", -1) == TaskInstanceState.SUCCESS assert all_results == ["a", "b", "c", 1, 2] states = [run_ti(dag, "pull_one", map_index) for map_index in range(5)] diff --git a/task-sdk/tests/task_sdk/docs/test_public_api.py b/task-sdk/tests/task_sdk/docs/test_public_api.py index a21424ea101f6..0961458131735 100644 --- a/task-sdk/tests/task_sdk/docs/test_public_api.py +++ b/task-sdk/tests/task_sdk/docs/test_public_api.py @@ -42,30 +42,31 @@ def test_airflow_sdk_no_unexpected_exports(): ignore = { "__getattr__", "__lazy_imports", - "SecretCache", "TYPE_CHECKING", + "SecretCache", "annotations", "api", "bases", + "configuration", + "coordinators", + "crypto", "definitions", + "exceptions", "execution_time", "io", + "lineage", + "listener", "log", - "exceptions", - "timezone", - "secrets_masker", - "configuration", "module_loading", - "yaml", - "serde", "observability", "plugins_manager", - "listener", - "crypto", "providers_manager_runtime", - "lineage", + "secrets_masker", + "serde", + "timezone", "types", "state", + "yaml", } unexpected = actual - public - ignore assert not unexpected, f"Unexpected exports in airflow.sdk: {sorted(unexpected)}" diff --git a/task-sdk/tests/task_sdk/execution_time/test_comms.py b/task-sdk/tests/task_sdk/execution_time/test_comms.py index 5c6d88439250c..37a91dd0ecc28 100644 --- a/task-sdk/tests/task_sdk/execution_time/test_comms.py +++ b/task-sdk/tests/task_sdk/execution_time/test_comms.py @@ -86,6 +86,9 @@ def test_recv_StartupDetails(self): "run_id": "b", "dag_id": "c", "dag_version_id": uuid.UUID("4d828a62-a417-4936-a7a6-2b3fabacecab"), + "pool_slots": 1, + "queue": "default", + "priority_weight": 1, }, "ti_context": { "dag_run": { diff --git a/task-sdk/tests/task_sdk/execution_time/test_coordinator.py b/task-sdk/tests/task_sdk/execution_time/test_coordinator.py new file mode 100644 index 0000000000000..8e3bcfa102172 --- /dev/null +++ b/task-sdk/tests/task_sdk/execution_time/test_coordinator.py @@ -0,0 +1,122 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import json + +import pytest + +from airflow.sdk.configuration import conf +from airflow.sdk.execution_time.coordinator import ( + BaseCoordinator, + CoordinatorManager, + _PythonCoordinator, + get_coordinator_manager, + reset_coordinator_manager, +) + + +class _CoordinatorA(BaseCoordinator): + def __init__(self, *, label: str = "a"): + self.label = label + + +class _CoordinatorB(BaseCoordinator): + pass + + +@pytest.fixture +def sdk_config(monkeypatch): + """Set the ``[sdk]`` env vars consumed by :meth:`CoordinatorManager.from_config`. + + :return: Callable ``apply(*, coordinators=None, queue_to_coordinator=None)`` -- + each argument is the raw JSON string for the matching env var, or ``None`` + to unset it. The conf cache is invalidated after each call (and again on + teardown) so ``from_config()`` re-reads the values just set. + """ + from airflow.sdk.configuration import conf + + def _apply(*, coordinators: str | None = None, queue_to_coordinator: str | None = None) -> None: + if coordinators is None: + monkeypatch.delenv("AIRFLOW__SDK__COORDINATORS", raising=False) + else: + monkeypatch.setenv("AIRFLOW__SDK__COORDINATORS", coordinators) + if queue_to_coordinator is None: + monkeypatch.delenv("AIRFLOW__SDK__QUEUE_TO_COORDINATOR", raising=False) + else: + monkeypatch.setenv("AIRFLOW__SDK__QUEUE_TO_COORDINATOR", queue_to_coordinator) + conf.invalidate_cache() + + yield _apply + conf.invalidate_cache() + + +class TestCoordinatorManager: + @pytest.fixture(autouse=True) + def _reset_cache(self): + reset_coordinator_manager() + yield + reset_coordinator_manager() + + def test_from_config_loads_specs_and_resolves_instances(self, sdk_config): + sdk_config( + coordinators=json.dumps( + { + "alpha": { + "classpath": f"{_CoordinatorA.__module__}._CoordinatorA", + "kwargs": {"label": "alpha-label"}, + }, + "beta": {"classpath": f"{_CoordinatorB.__module__}._CoordinatorB", "kwargs": {}}, + } + ), + queue_to_coordinator=json.dumps({"queue-a": "alpha"}), + ) + manager = CoordinatorManager.from_config() + assert manager._queue_to_coordinator == {"queue-a": "alpha"} + assert manager._created_coordinators == {} + + coordinator_for_queue_a = manager.for_queue("queue-a") + assert isinstance(coordinator_for_queue_a, _CoordinatorA) + assert manager.for_queue("queue-a") is coordinator_for_queue_a, "instance should be cached" + assert manager._created_coordinators == {"alpha": coordinator_for_queue_a} + + coordinator_for_queue_missing = manager.for_queue("queue-1") + assert isinstance(coordinator_for_queue_missing, _PythonCoordinator) + assert manager.for_queue("queue-1") is coordinator_for_queue_missing + assert manager._created_coordinators == {"alpha": coordinator_for_queue_a} + + def test_from_config_empty(self, monkeypatch): + monkeypatch.delenv("AIRFLOW__SDK__COORDINATORS", raising=False) + monkeypatch.delenv("AIRFLOW__SDK__QUEUE_TO_COORDINATOR", raising=False) + conf.invalidate_cache() + + manager = CoordinatorManager.from_config() + assert manager._coordinator_specs == {} + assert manager._queue_to_coordinator == {} + + def test_get_coordinator_manager_is_cached(self, monkeypatch): + monkeypatch.delenv("AIRFLOW__SDK__COORDINATORS", raising=False) + + from airflow.sdk.configuration import conf + + conf.invalidate_cache() + + m1 = get_coordinator_manager() + m2 = get_coordinator_manager() + assert m1 is m2 diff --git a/task-sdk/tests/task_sdk/execution_time/test_supervisor.py b/task-sdk/tests/task_sdk/execution_time/test_supervisor.py index 11d22865c0dd0..798500d381c45 100644 --- a/task-sdk/tests/task_sdk/execution_time/test_supervisor.py +++ b/task-sdk/tests/task_sdk/execution_time/test_supervisor.py @@ -64,7 +64,6 @@ DagRunState, DagRunType, PreviousTIResponse, - TaskInstance, TaskInstanceState, ) from airflow.sdk.exceptions import AirflowRuntimeError, ErrorType, TaskAlreadyRunningError @@ -161,14 +160,17 @@ ActivitySubprocess, InProcessSupervisorComms, InProcessTestSupervisor, + ProcessTracker, _make_process_nondumpable, _remote_logging_conn, in_process_api_server, + make_buffered_socket_reader, process_log_messages_from_subprocess, set_supervisor_comms, supervise_task, ) from airflow.sdk.execution_time.task_runner import run +from airflow.sdk.execution_time.workloads.task import TaskInstanceDTO from tests_common.test_utils.config import conf_vars @@ -231,13 +233,16 @@ def test_supervise( """ Test that the supervisor validates server URL and dry_run parameter combinations correctly. """ - ti = TaskInstance( + ti = TaskInstanceDTO( id=uuid7(), task_id="async", dag_id="super_basic_deferred_run", run_id="d", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ) bundle_info = BundleInfo(name="my-bundle", version=None) @@ -324,13 +329,16 @@ def subprocess_main(): proc = ActivitySubprocess.start( dag_rel_path=os.devnull, bundle_info=FAKE_BUNDLE, - what=TaskInstance( + what=TaskInstanceDTO( id="4d828a62-a417-4936-a7a6-2b3fabacecab", task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=client_with_ti_start, target=subprocess_main, @@ -399,13 +407,16 @@ def subprocess_main(): proc = ActivitySubprocess.start( dag_rel_path=os.devnull, bundle_info=FAKE_BUNDLE, - what=TaskInstance( + what=TaskInstanceDTO( id="4d828a62-a417-4936-a7a6-2b3fabacecab", task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=client_with_ti_start, target=subprocess_main, @@ -496,13 +507,16 @@ def on_kill(self) -> None: proc = ActivitySubprocess.start( dag_rel_path=os.devnull, bundle_info=FAKE_BUNDLE, - what=TaskInstance( + what=TaskInstanceDTO( id=ti_id, task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=make_client(transport=httpx.MockTransport(handle_request)), target=subprocess_main, @@ -525,13 +539,16 @@ def subprocess_main(): proc = ActivitySubprocess.start( dag_rel_path=os.devnull, bundle_info=FAKE_BUNDLE, - what=TaskInstance( + what=TaskInstanceDTO( id="4d828a62-a417-4936-a7a6-2b3fabacecab", task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=client_with_ti_start, target=subprocess_main, @@ -560,8 +577,16 @@ def subprocess_main(): proc = ActivitySubprocess.start( dag_rel_path=os.devnull, bundle_info=FAKE_BUNDLE, - what=TaskInstance( - id=uuid7(), task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7() + what=TaskInstanceDTO( + id=uuid7(), + task_id="b", + dag_id="c", + run_id="d", + try_number=1, + dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=mock_client, target=subprocess_main, @@ -599,13 +624,16 @@ def test_resume_start_date_from_context(self, mocker, make_ti_context, start_dat proc = ActivitySubprocess.start( dag_rel_path=os.devnull, bundle_info=FAKE_BUNDLE, - what=TaskInstance( + what=TaskInstanceDTO( id=uuid7(), task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=mock_client, target=lambda: None, @@ -642,8 +670,16 @@ def subprocess_main(): proc = ActivitySubprocess.start( dag_rel_path=os.devnull, bundle_info=FAKE_BUNDLE, - what=TaskInstance( - id=ti_id, task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7() + what=TaskInstanceDTO( + id=ti_id, + task_id="b", + dag_id="c", + run_id="d", + try_number=1, + dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=sdk_client.Client(base_url="", dry_run=True, token=""), target=subprocess_main, @@ -679,8 +715,16 @@ def _on_child_started(self, *args, **kwargs): proc = ActivitySubprocess.start( dag_rel_path=os.devnull, bundle_info=FAKE_BUNDLE, - what=TaskInstance( - id=ti_id, task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7() + what=TaskInstanceDTO( + id=ti_id, + task_id="b", + dag_id="c", + run_id="d", + try_number=1, + dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=sdk_client.Client(base_url="", dry_run=True, token=""), target=subprocess_main, @@ -695,13 +739,16 @@ def test_run_simple_dag(self, test_dags_dir, captured_logs, time_machine, mocker time_machine.move_to(instant, tick=False) dagfile_path = test_dags_dir - ti = TaskInstance( + ti = TaskInstanceDTO( id=uuid7(), task_id="hello", dag_id="super_basic_run", run_id="c", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ) bundle_info = BundleInfo(name="my-bundle", version=None) @@ -736,13 +783,16 @@ def test_supervise_handles_deferred_task( """ instant = timezone.datetime(2024, 11, 7, 12, 34, 56, 0) - ti = TaskInstance( + ti = TaskInstanceDTO( id=uuid7(), task_id="async", dag_id="super_basic_deferred_run", run_id="d", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ) # Create a mock client to assert calls to the client @@ -863,8 +913,16 @@ def handle_request(request: httpx.Request) -> httpx.Response: proc = ActivitySubprocess.start( dag_rel_path=os.devnull, - what=TaskInstance( - id=ti_id, task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7() + what=TaskInstanceDTO( + id=ti_id, + task_id="b", + dag_id="c", + run_id="d", + try_number=1, + dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=make_client(transport=httpx.MockTransport(handle_request)), target=subprocess_main, @@ -941,8 +999,16 @@ def subprocess_main(): ActivitySubprocess.start( dag_rel_path=os.devnull, bundle_info=FAKE_BUNDLE, - what=TaskInstance( - id=ti_id, task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7() + what=TaskInstanceDTO( + id=ti_id, + task_id="b", + dag_id="c", + run_id="d", + try_number=1, + dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=make_client(transport=httpx.MockTransport(handle_request)), target=subprocess_main, @@ -1146,13 +1212,16 @@ def subprocess_main(): proc = ActivitySubprocess.start( dag_rel_path=os.devnull, bundle_info=FAKE_BUNDLE, - what=TaskInstance( + what=TaskInstanceDTO( id="4d828a62-a417-4936-a7a6-2b3fabacecab", task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=client_with_ti_start, target=subprocess_main, @@ -1202,9 +1271,12 @@ def test_cleanup_sockets_after_delay(self, monkeypatch, mocker): class TestWatchedSubprocessKill: @pytest.fixture def mock_process(self, mocker): - process = mocker.Mock(spec=psutil.Process) - process.pid = 12345 - return process + return mocker.Mock( + spec=ProcessTracker, + pid=12345, + ProcessNotFound=psutil.NoSuchProcess, + TimeoutExpired=psutil.TimeoutExpired, + ) @pytest.fixture def watched_subprocess(self, mocker, mock_process): @@ -1303,8 +1375,16 @@ def _handler(sig, frame): proc = ActivitySubprocess.start( dag_rel_path=os.devnull, bundle_info=FAKE_BUNDLE, - what=TaskInstance( - id=ti_id, task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7() + what=TaskInstanceDTO( + id=ti_id, + task_id="b", + dag_id="c", + run_id="d", + try_number=1, + dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=client_with_ti_start, target=subprocess_main, @@ -3316,13 +3396,16 @@ def execute(self, context: Context): task.dag = DAG(dag_id="test_dag") # Create a simple TaskInstance datamodel to pass to the supervisor - ti = TaskInstance( + ti = TaskInstanceDTO( id=uuid7(), - task_id=task.task_id, + dag_version_id=uuid7(), dag_id="test_dag", + task_id=task.task_id, run_id="r", try_number=1, - dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ) # Patch the API client used by InProcessTestSupervisor to return a predictable TI context @@ -3739,13 +3822,16 @@ def subprocess_main(): proc = ActivitySubprocess.start( dag_rel_path=os.devnull, bundle_info=FAKE_BUNDLE, - what=TaskInstance( + what=TaskInstanceDTO( id="4d828a62-a417-4936-a7a6-2b3fabacecab", task_id="b", dag_id="c", run_id="d", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), client=client_with_ti_start, target=subprocess_main, @@ -3996,3 +4082,101 @@ def capture_on_get(key): assert captured == [expected_span_id] # Context is detached after dispatch — no leak. assert get_current_span().get_span_context().span_id != expected_span_id + + +class TestMakeBufferedSocketReader: + """Tests for the data= pre-seeding parameter added to make_buffered_socket_reader. + + The ``data`` kwarg lets callers inject bytes that arrived on the socket + before the selector loop started (e.g. bytes drained from a subprocess + stdout/stderr pipe while waiting for a TCP handshake). Complete lines + in ``data`` must be dispatched to the generator immediately on + construction; partial lines must be held in the buffer until the socket + delivers the remainder. + """ + + def _collecting_gen(self, received: list[bytes]): + """Generator that appends every line sent to it into *received*.""" + while True: + line = yield + received.append(bytes(line)) + + def test_empty_data_dispatches_nothing_before_socket_reads(self): + received: list[bytes] = [] + on_close = MagicMock() + make_buffered_socket_reader(self._collecting_gen(received), data=b"", on_close=on_close) + assert received == [] + + def test_complete_line_in_data_dispatched_immediately(self): + received: list[bytes] = [] + on_close = MagicMock() + make_buffered_socket_reader(self._collecting_gen(received), data=b"hello\n", on_close=on_close) + assert received == [b"hello\n"] + + def test_multiple_complete_lines_in_data_all_dispatched(self): + received: list[bytes] = [] + on_close = MagicMock() + make_buffered_socket_reader( + self._collecting_gen(received), data=b"line1\nline2\nline3\n", on_close=on_close + ) + assert received == [b"line1\n", b"line2\n", b"line3\n"] + + def test_partial_line_in_data_held_until_socket_completes_it(self): + """A line without a trailing newline in data must not be dispatched + until the socket delivers the rest of the line.""" + received: list[bytes] = [] + on_close = MagicMock() + r, w = socket.socketpair() + try: + cb, _ = make_buffered_socket_reader( + self._collecting_gen(received), data=b"partial", on_close=on_close + ) + assert received == [] # incomplete line — must not fire yet + w.sendall(b" rest\n") + cb(r) + assert received == [b"partial rest\n"] + finally: + r.close() + w.close() + + def test_complete_and_partial_lines_in_data_only_complete_dispatched(self): + """Only the complete line is flushed on construction; the trailing + fragment is retained and completed by subsequent socket reads.""" + received: list[bytes] = [] + on_close = MagicMock() + r, w = socket.socketpair() + try: + cb, _ = make_buffered_socket_reader( + self._collecting_gen(received), data=b"full\nincomplete", on_close=on_close + ) + assert received == [b"full\n"] + w.sendall(b" suffix\n") + cb(r) + assert received == [b"full\n", b"incomplete suffix\n"] + finally: + r.close() + w.close() + + def test_data_pre_seeding_does_not_trigger_on_close(self): + """Flushing pre-seeded lines must never invoke the on_close callback.""" + received: list[bytes] = [] + on_close = MagicMock() + make_buffered_socket_reader(self._collecting_gen(received), data=b"line\n", on_close=on_close) + on_close.assert_not_called() + + def test_socket_data_appended_after_pre_seeded_data(self): + """Bytes arriving on the socket are appended after any pre-seeded + partial content, preserving message order.""" + received: list[bytes] = [] + on_close = MagicMock() + r, w = socket.socketpair() + try: + cb, _ = make_buffered_socket_reader( + self._collecting_gen(received), data=b"seeded\n", on_close=on_close + ) + w.sendall(b"live\n") + cb(r) + assert received == [b"seeded\n", b"live\n"] + finally: + r.close() + w.close() diff --git a/task-sdk/tests/task_sdk/execution_time/test_task_runner.py b/task-sdk/tests/task_sdk/execution_time/test_task_runner.py index 4ba821e537e56..0fccb92573d28 100644 --- a/task-sdk/tests/task_sdk/execution_time/test_task_runner.py +++ b/task-sdk/tests/task_sdk/execution_time/test_task_runner.py @@ -170,6 +170,7 @@ run, startup, ) +from airflow.sdk.execution_time.workloads.task import TaskInstanceDTO from airflow.sdk.execution_time.xcom import XCom from airflow.sdk.serde import deserialize from airflow.triggers.base import BaseEventTrigger, BaseTrigger, TriggerEvent @@ -201,13 +202,16 @@ def execute(self, context): def test_parse(test_dags_dir: Path, make_ti_context): """Test that checks parsing of a basic dag with an un-mocked parse.""" what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="a", dag_id="super_basic", run_id="c", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), dag_rel_path="super_basic.py", bundle_info=BundleInfo(name="my-bundle", version=None), @@ -248,13 +252,16 @@ def test_parse_dag_bag(mock_dagbag, test_dags_dir: Path, make_ti_context): mock_dag.task_dict = {"a": mock_task} what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="a", dag_id="super_basic", run_id="c", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), dag_rel_path="super_basic.py", bundle_info=BundleInfo(name="my-bundle", version=None), @@ -308,13 +315,16 @@ def test_parse_dag_bag(mock_dagbag, test_dags_dir: Path, make_ti_context): def test_parse_not_found(test_dags_dir: Path, make_ti_context, dag_id, task_id, expected_error): """Check for nice error messages on dag not found.""" what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id=task_id, dag_id=dag_id, run_id="c", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), dag_rel_path="super_basic.py", bundle_info=BundleInfo(name="my-bundle", version=None), @@ -354,13 +364,16 @@ def test_parse_not_found_does_not_reschedule_when_max_attempts_reached(test_dags and should surface as a hard failure (SystemExit in the task runner process). """ what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="a", dag_id="madeup_dag_id", run_id="c", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), dag_rel_path="super_basic.py", bundle_info=BundleInfo(name="my-bundle", version=None), @@ -415,13 +428,16 @@ def test_main_sends_reschedule_task_when_startup_reschedules( mock_comms_instance.socket = None mock_comms_decoder_cls.__getitem__.return_value.return_value = mock_comms_instance what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="my_task", dag_id="test_dag", run_id="test_run", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, context_carrier={}, ), dag_rel_path="", @@ -588,13 +604,16 @@ def test_task_span_is_child_of_dag_run_span(make_ti_context): # Step 3: build StartupDetails with ti.context_carrier = ti_carrier. what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="my_task", dag_id="test_dag", run_id="test_run", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, context_carrier=ti_carrier, ), dag_rel_path="", @@ -656,13 +675,16 @@ def test_task_span_no_parent_when_no_context_carrier(make_ti_context): provider.add_span_processor(SimpleSpanProcessor(in_mem_exporter)) what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="standalone_task", dag_id="test_dag", run_id="test_run", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, context_carrier=None, ), dag_rel_path="", @@ -697,13 +719,16 @@ def test_parse_module_in_bundle_root(tmp_path: Path, make_ti_context): dag1_path.write_text(textwrap.dedent(dag1_code)) what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="a", dag_id="dag_name", run_id="c", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), dag_rel_path="path_test.py", bundle_info=BundleInfo(name="my-bundle", version=None), @@ -1144,13 +1169,16 @@ def test_basic_templated_dag(mocked_parse, make_ti_context, mock_supervisor_comm ) what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="templated_task", dag_id="basic_templated_dag", run_id="c", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), bundle_info=FAKE_BUNDLE, dag_rel_path="", @@ -1260,13 +1288,16 @@ def execute(self, context): instant = timezone.datetime(2024, 12, 3, 10, 0) what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="templated_task", dag_id="basic_dag", run_id="c", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), dag_rel_path="", bundle_info=FAKE_BUNDLE, @@ -1308,13 +1339,16 @@ def execute(self, context): instant = timezone.datetime(2024, 12, 3, 10, 0) what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="impersonation_task", dag_id="basic_dag", run_id="c", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), dag_rel_path="", bundle_info=FAKE_BUNDLE, @@ -1356,13 +1390,16 @@ def execute(self, context): instant = timezone.datetime(2024, 12, 3, 10, 0) what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="impersonation_task", dag_id="basic_dag", run_id="c", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), dag_rel_path="", bundle_info=FAKE_BUNDLE, @@ -1396,13 +1433,16 @@ def execute(self, context): instant = timezone.datetime(2024, 12, 3, 10, 0) what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="impersonation_task", dag_id="basic_dag", run_id="c", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), dag_rel_path="", bundle_info=FAKE_BUNDLE, @@ -1569,8 +1609,16 @@ def test_dag_parsing_context(make_ti_context, mock_supervisor_comms, monkeypatch task_id = "conditional_task" what = StartupDetails( - ti=TaskInstance( - id=uuid7(), task_id=task_id, dag_id=dag_id, run_id="c", try_number=1, dag_version_id=uuid7() + ti=TaskInstanceDTO( + id=uuid7(), + task_id=task_id, + dag_id=dag_id, + run_id="c", + try_number=1, + dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), dag_rel_path="dag_parsing_context.py", bundle_info=BundleInfo(name="my-bundle", version=None), @@ -2209,8 +2257,10 @@ def execute(self, context): test_task_id = "pull_task" task = CustomOperator(task_id=test_task_id) - # In case of the specific map_index or None we should check it is passed to TI - extra_for_ti = {"map_index": map_indexes} if map_indexes in (1, None) else {} + # In case of the specific map_index we should check it is passed to TI. + # ``None`` is not a valid TaskInstanceDTO.map_index value, but xcom_pull's + # behaviour with ``map_indexes=None`` is independent of the TI's own map_index. + extra_for_ti = {"map_index": map_indexes} if isinstance(map_indexes, int) else {} runtime_ti = create_runtime_ti(task=task, **extra_for_ti) ser_value = BaseXCom.serialize_value(xcom_values) @@ -4105,13 +4155,16 @@ def execute(self, context): task_id="test_task_runner_calls_listeners", do_xcom_push=True, multiple_outputs=True ) what = StartupDetails( - ti=TaskInstance( + ti=TaskInstanceDTO( id=uuid7(), task_id="templated_task", dag_id="basic_dag", run_id="c", try_number=1, dag_version_id=uuid7(), + pool_slots=1, + queue="default", + priority_weight=1, ), dag_rel_path="", bundle_info=FAKE_BUNDLE, From 696862836907892c015891c331769d5cd9497114 Mon Sep 17 00:00:00 2001 From: Jens Scheffler <95105677+jscheffl@users.noreply.github.com> Date: Wed, 27 May 2026 20:17:28 +0200 Subject: [PATCH 05/28] Update providers metadata 2026-05-27 (#67611) --- generated/provider_metadata.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/generated/provider_metadata.json b/generated/provider_metadata.json index a9fbddcf47795..be6d798a022b0 100644 --- a/generated/provider_metadata.json +++ b/generated/provider_metadata.json @@ -159,6 +159,10 @@ "5.4.2": { "associated_airflow_version": "3.2.1", "date_released": "2026-05-11T15:47:10Z" + }, + "5.5.0": { + "associated_airflow_version": "3.2.1", + "date_released": "2026-05-27T12:45:31Z" } }, "akeyless": { @@ -5629,6 +5633,10 @@ "4.8.2": { "associated_airflow_version": "3.2.1", "date_released": "2026-05-11T15:47:10Z" + }, + "4.9.0": { + "associated_airflow_version": "3.2.1", + "date_released": "2026-05-27T12:45:31Z" } }, "dingding": { From 7bf71a3d444e2e1f5ef22b4c18f782c03281d001 Mon Sep 17 00:00:00 2001 From: Shahar Epstein <60007259+shahar1@users.noreply.github.com> Date: Wed, 27 May 2026 21:39:43 +0300 Subject: [PATCH 06/28] Speed up TaskGroup.topological_sort with int-indexed projected sweep (#67288) --- .../newsfragments/67288.improvement.rst | 1 + .../serialization/definitions/taskgroup.py | 97 ++++++++---- .../tests/unit/utils/test_task_group.py | 28 ++++ .../src/airflow/sdk/definitions/taskgroup.py | 125 +++++++++------ .../task_sdk/definitions/test_taskgroup.py | 144 ++++++++++++++++++ 5 files changed, 322 insertions(+), 73 deletions(-) create mode 100644 airflow-core/newsfragments/67288.improvement.rst diff --git a/airflow-core/newsfragments/67288.improvement.rst b/airflow-core/newsfragments/67288.improvement.rst new file mode 100644 index 0000000000000..03293e4ffa240 --- /dev/null +++ b/airflow-core/newsfragments/67288.improvement.rst @@ -0,0 +1 @@ +Speed up ``TaskGroup.topological_sort`` across Dag shapes (chain, diamond, layered, reverse-chain); benchmarks show roughly 2-8x faster on large groups. diff --git a/airflow-core/src/airflow/serialization/definitions/taskgroup.py b/airflow-core/src/airflow/serialization/definitions/taskgroup.py index d971c303c7c53..5db656019f1cb 100644 --- a/airflow-core/src/airflow/serialization/definitions/taskgroup.py +++ b/airflow-core/src/airflow/serialization/definitions/taskgroup.py @@ -18,7 +18,6 @@ from __future__ import annotations -import copy import functools import operator import weakref @@ -217,35 +216,79 @@ def iter_mapped_task_groups(self) -> Iterator[SerializedMappedTaskGroup]: def topological_sort(self) -> list[DAGNode]: """ - Sorts children in topographical order. + Sort children topologically — a task always comes after its upstream dependencies. - A task in the result would come after any of its upstream dependencies. + See ``TaskGroup.topological_sort`` in task-sdk for the algorithm. Cycles are + treated as corrupt input: ``DAG.check_cycle`` rejects cyclic Dags before + serialization, so a cycle reaching this code indicates malformed serialized data, + and we raise ``ValueError`` rather than silently looping forever. """ - # This uses a modified version of Kahn's Topological Sort algorithm to - # not have to pre-compute the "in-degree" of the nodes. - graph_unsorted = copy.copy(self.children) - graph_sorted: list[DAGNode] = [] - if not self.children: - return graph_sorted - while graph_unsorted: - for node in list(graph_unsorted.values()): - for edge in node.upstream_list: - if edge.node_id in graph_unsorted: + children = self.children + if not children: + return [] + nodes = list(children.values()) + id_to_idx = {nid: i for i, nid in enumerate(children)} + projected = [self._project_child_deps(i, c, id_to_idx) for i, c in enumerate(nodes)] + return self._sweep_projection(nodes, projected) + + def _project_child_deps( + self, child_idx: int, child: DAGNode, id_to_idx: dict[str, int] + ) -> tuple[int, ...]: + upstream_ids = child.upstream_task_ids + if not upstream_ids: + return () + sib_deps: set[int] = set() + for edge_id in upstream_ids: + j = id_to_idx.get(edge_id) + if j is not None: + sib_deps.add(j) + continue + tg = self.dag.get_task(edge_id).task_group + while tg is not None: + j = id_to_idx.get(tg.node_id) + if j is not None: + sib_deps.add(j) + break + tg = tg.parent_group + sib_deps.discard(child_idx) + return tuple(sib_deps) + + def _sweep_projection(self, nodes: list[DAGNode], projected: list[tuple[int, ...]]) -> list[DAGNode]: + n = len(nodes) + emitted = bytearray(n) + order: list[DAGNode] = [] + order_append = order.append + pending: list[int] = [] + pending_append = pending.append + for i in range(n): + blocked = False + for d in projected[i]: + if not emitted[d]: + blocked = True + break + if blocked: + pending_append(i) + continue + emitted[i] = 1 + order_append(nodes[i]) + while pending: + next_pending: list[int] = [] + next_pending_append = next_pending.append + for i in pending: + blocked = False + for d in projected[i]: + if not emitted[d]: + blocked = True break - # Check for task's group is a child (or grand child) of this TG, - tg = edge.task_group - while tg: - if tg.node_id in graph_unsorted: - break - tg = tg.parent_group - - if tg: - # We are already going to visit that TG - break - else: - del graph_unsorted[node.node_id] - graph_sorted.append(node) - return graph_sorted + if blocked: + next_pending_append(i) + continue + emitted[i] = 1 + order_append(nodes[i]) + if len(next_pending) == len(pending): + raise ValueError(f"A cyclic dependency occurred in dag: {self.dag_id}") + pending = next_pending + return order def add(self, node: DAGNode) -> DAGNode: # Set the TG first, as setting it might change the return value of node_id! diff --git a/airflow-core/tests/unit/utils/test_task_group.py b/airflow-core/tests/unit/utils/test_task_group.py index 3b62ad75a7290..ffc217fc0789d 100644 --- a/airflow-core/tests/unit/utils/test_task_group.py +++ b/airflow-core/tests/unit/utils/test_task_group.py @@ -1117,6 +1117,34 @@ def nested_topo(group): ] +def test_topological_sort_serialized_layered(): + """SerializedTaskGroup.topological_sort emits a valid order after DAG round-trip. + + Exercises the projected-sweep path on the serialization variant (which is otherwise + untested), using a layered shape that forces multi-pass behavior. + """ + with DAG("test_topo_sort_serialized", schedule=None, start_date=DEFAULT_DATE) as dag: + layers: list[list[BaseOperator]] = [] + for layer_idx in range(4): + cur = [EmptyOperator(task_id=f"L{layer_idx}_t{i}") for i in range(3)] + if layers: + for upstream in layers[-1]: + upstream >> cur + layers.append(cur) + + serialized = create_scheduler_dag(dag) + order = [node.node_id for node in serialized.task_group.topological_sort()] + position = {nid: i for i, nid in enumerate(order)} + + assert set(position) == {t.task_id for layer in layers for t in layer} + for layer_idx in range(len(layers) - 1): + for upstream in layers[layer_idx]: + for downstream in layers[layer_idx + 1]: + assert position[upstream.task_id] < position[downstream.task_id], ( + f"{upstream.task_id!r} must precede {downstream.task_id!r}, got {order!r}" + ) + + def test_task_group_arrow_with_setup_group(): with DAG(dag_id="setup_group_teardown_group") as dag: with TaskGroup("group_1") as g1: diff --git a/task-sdk/src/airflow/sdk/definitions/taskgroup.py b/task-sdk/src/airflow/sdk/definitions/taskgroup.py index 50527f6b43bcd..67376cb817ae4 100644 --- a/task-sdk/src/airflow/sdk/definitions/taskgroup.py +++ b/task-sdk/src/airflow/sdk/definitions/taskgroup.py @@ -523,57 +523,90 @@ def hierarchical_alphabetical_sort(self): key=lambda node: (not isinstance(node, TaskGroup), node.node_id), ) - def topological_sort(self): + def topological_sort(self) -> list[DAGNode]: """ - Sorts children in topographical order, such that a task comes after any of its upstream dependencies. + Sort children topologically — a task always comes after its upstream dependencies. - :return: list of tasks in topological order + Projects each child's per-task upstream IDs onto sibling-level integer indices once, + then runs a greedy multi-pass sweep using a bytearray-backed emission flag. Equivalent + in emission order to the previous modified-Kahn implementation, but moves the per-edge + ``upstream_list`` materialization and ``parent_group`` walks out of the sweep's inner + loop so they happen once per call instead of once per outer-loop pass. """ - # This uses a modified version of Kahn's Topological Sort algorithm to - # not have to pre-compute the "in-degree" of the nodes. - graph_unsorted = copy.copy(self.children) - - graph_sorted: list[DAGNode] = [] - - # special case - if not self.children: - return graph_sorted - - # Run until the unsorted graph is empty. - while graph_unsorted: - # Go through each of the node/edges pairs in the unsorted graph. If a set of edges doesn't contain - # any nodes that haven't been resolved, that is, that are still in the unsorted graph, remove the - # pair from the unsorted graph, and append it to the sorted graph. Note here that by using - # the values() method for iterating, a copy of the unsorted graph is used, allowing us to modify - # the unsorted graph as we move through it. - # - # We also keep a flag for checking that graph is acyclic, which is true if any nodes are resolved - # during each pass through the graph. If not, we need to exit as the graph therefore can't be - # sorted. - acyclic = False - for node in list(graph_unsorted.values()): - for edge in node.upstream_list: - if edge.node_id in graph_unsorted: - break - # Check for task's group is a child (or grand child) of this TG, - tg = edge.task_group - while tg: - if tg.node_id in graph_unsorted: - break - tg = tg.parent_group - - if tg: - # We are already going to visit that TG + children = self.children + if not children: + return [] + nodes = list(children.values()) + id_to_idx = {nid: i for i, nid in enumerate(children)} + projected = [self._project_child_deps(i, c, id_to_idx) for i, c in enumerate(nodes)] + return self._sweep_projection(nodes, projected) + + def _project_child_deps( + self, child_idx: int, child: DAGNode, id_to_idx: dict[str, int] + ) -> tuple[int, ...]: + # Project one child's per-task upstream IDs onto sibling-level integer indices. + # Self-deps are filtered once at the end via ``discard`` so the inner loop stays tight. + upstream_ids = child.upstream_task_ids + if not upstream_ids: + return () + sib_deps: set[int] = set() + for edge_id in upstream_ids: + j = id_to_idx.get(edge_id) + if j is not None: + sib_deps.add(j) + continue + tg = self.dag.get_task(edge_id).task_group + while tg is not None: + j = id_to_idx.get(tg.node_id) + if j is not None: + sib_deps.add(j) + break + tg = tg.parent_group + sib_deps.discard(child_idx) + return tuple(sib_deps) + + def _sweep_projection(self, nodes: list[DAGNode], projected: list[tuple[int, ...]]) -> list[DAGNode]: + # Greedy multi-pass sweep. emitted[i] == 1 iff nodes[i] has been emitted. + # Pass 1 iterates range(n) directly; only blocked nodes are recorded into + # ``pending`` and re-checked in subsequent passes. Avoids paying for a + # ``list(range(n))`` allocation on single-pass shapes (the common case) while + # still skipping already-emitted nodes on multi-pass shapes (e.g. a diamond's + # single trailing sink). + n = len(nodes) + emitted = bytearray(n) + order: list[DAGNode] = [] + order_append = order.append + pending: list[int] = [] + pending_append = pending.append + for i in range(n): + blocked = False + for d in projected[i]: + if not emitted[d]: + blocked = True + break + if blocked: + pending_append(i) + continue + emitted[i] = 1 + order_append(nodes[i]) + while pending: + next_pending: list[int] = [] + next_pending_append = next_pending.append + for i in pending: + blocked = False + for d in projected[i]: + if not emitted[d]: + blocked = True break - else: - acyclic = True - del graph_unsorted[node.node_id] - graph_sorted.append(node) - - if not acyclic: + if blocked: + next_pending_append(i) + continue + emitted[i] = 1 + order_append(nodes[i]) + if len(next_pending) == len(pending): raise AirflowDagCycleException(f"A cyclic dependency occurred in dag: {self.dag_id}") - - return graph_sorted + pending = next_pending + return order def iter_mapped_task_groups(self) -> Iterator[MappedTaskGroup]: """ diff --git a/task-sdk/tests/task_sdk/definitions/test_taskgroup.py b/task-sdk/tests/task_sdk/definitions/test_taskgroup.py index 18c7f65faf2e4..d1ba11e3056c9 100644 --- a/task-sdk/tests/task_sdk/definitions/test_taskgroup.py +++ b/task-sdk/tests/task_sdk/definitions/test_taskgroup.py @@ -957,3 +957,147 @@ def test_getitem_missing_is_key_error(self): with pytest.raises(KeyError): tg["nonexistent"] + + +# --- topological_sort: cross-shape correctness --- +# +# Mirrors the shapes covered by the benchmark gist referenced from PR #67288 +# (https://gist.github.com/shahar1/9c61dc9f34f7e77cd29cfb9d67af7ceb). +# Wall-clock timing is intentionally not asserted here — CI runners are too +# variable for ms thresholds to be meaningful. The gist above can be run +# manually to gauge performance. + + +def _make_chain(n: int) -> DAG: + with DAG(f"chain_{n}", schedule=None, start_date=DEFAULT_DATE) as dag: + prev = None + for i in range(n): + t = EmptyOperator(task_id=f"t{i}") + if prev is not None: + prev >> t + prev = t + return dag + + +def _make_reverse_chain(n: int) -> DAG: + with DAG(f"reverse_chain_{n}", schedule=None, start_date=DEFAULT_DATE) as dag: + tasks = [EmptyOperator(task_id=f"t{n - 1 - i}") for i in range(n)] + by_id = {t.task_id: t for t in tasks} + for i in range(n - 1): + by_id[f"t{i}"] >> by_id[f"t{i + 1}"] + return dag + + +def _make_diamond(n: int) -> DAG: + with DAG(f"diamond_{n}", schedule=None, start_date=DEFAULT_DATE) as dag: + root = EmptyOperator(task_id="root") + sink = EmptyOperator(task_id="sink") + middles = [EmptyOperator(task_id=f"m{i}") for i in range(max(n - 2, 1))] + root >> middles >> sink + return dag + + +def _make_independent(n: int) -> DAG: + with DAG(f"independent_{n}", schedule=None, start_date=DEFAULT_DATE) as dag: + for i in range(n): + EmptyOperator(task_id=f"t{i}") + return dag + + +def _make_layered(n: int, layers: int = 4) -> DAG: + per_layer = max(n // layers, 1) + with DAG(f"layered_{n}", schedule=None, start_date=DEFAULT_DATE) as dag: + prev_layer: list[EmptyOperator] = [] + for layer in range(layers): + cur = [EmptyOperator(task_id=f"L{layer}_t{i}") for i in range(per_layer)] + if prev_layer: + for upstream in prev_layer: + upstream >> cur + prev_layer = cur + return dag + + +def _make_nested_groups(n: int, depth: int = 3) -> DAG: + per_group = max(n // (depth * depth), 1) + with DAG(f"nested_{n}", schedule=None, start_date=DEFAULT_DATE) as dag: + + def build_group(level: int, idx: int) -> TaskGroup: + with TaskGroup(group_id=f"g{level}_{idx}") as tg: + prev = None + for i in range(per_group): + t = EmptyOperator(task_id=f"l{level}_g{idx}_t{i}") + if prev is not None: + prev >> t + prev = t + if level + 1 < depth: + inner_prev = None + for j in range(depth): + inner = build_group(level + 1, j) + if inner_prev is not None: + inner_prev >> inner + inner_prev = inner + return tg + + top_prev = None + for j in range(depth): + top = build_group(0, j) + if top_prev is not None: + top_prev >> top + top_prev = top + return dag + + +def _project_sibling(group: TaskGroup, upstream_task_id: str, child_id: str) -> str | None: + """Mirror of TaskGroup._project_child_deps' projection, returning a string ID.""" + children = group.children + if upstream_task_id in children: + return upstream_task_id if upstream_task_id != child_id else None + upstream = group.dag.get_task(upstream_task_id) + tg = upstream.task_group + while tg is not None: + if tg.node_id in children: + return tg.node_id if tg.node_id != child_id else None + tg = tg.parent_group + return None + + +def _walk_groups(tg: TaskGroup): + yield tg + for child in tg.children.values(): + if isinstance(child, TaskGroup): + yield from _walk_groups(child) + + +def _assert_valid_topological_order(group: TaskGroup, order: list[str]) -> None: + position = {node_id: i for i, node_id in enumerate(order)} + assert set(position) == set(group.children), ( + f"topological_sort output {order!r} does not cover children of {group.node_id!r}" + ) + for child_id, child in group.children.items(): + for upstream_id in child.upstream_task_ids: + sib = _project_sibling(group, upstream_id, child_id) + if sib is None: + continue + assert position[sib] < position[child_id], ( + f"In group {group.node_id!r}: sibling {sib!r} must precede {child_id!r}, got order {order!r}" + ) + + +@pytest.mark.parametrize( + ("shape", "builder"), + [ + ("chain", _make_chain), + ("rev-chain", _make_reverse_chain), + ("diamond", _make_diamond), + ("independent", _make_independent), + ("layered", _make_layered), + ("nested", _make_nested_groups), + ], +) +@pytest.mark.parametrize("n", [20, 100]) +def test_topological_sort_shape_correctness(shape, builder, n): + """topological_sort emits a valid order for every nested group across DAG shapes.""" + dag = builder(n) + for group in _walk_groups(dag.task_group): + order = [node.node_id for node in group.topological_sort()] + _assert_valid_topological_order(group, order) From 03291c4e676595717cfb0e5a0b28020b87dcef42 Mon Sep 17 00:00:00 2001 From: stephen-bracken <18257727+stephen-bracken@users.noreply.github.com> Date: Wed, 27 May 2026 19:45:56 +0100 Subject: [PATCH 07/28] Add support for mTLS and private CAs to the api client / server (#67214) --- .../src/airflow/api_fastapi/gunicorn_app.py | 10 +++++ airflow-core/src/airflow/cli/cli_config.py | 13 +++++++ .../cli/commands/api_server_command.py | 30 ++++++++++++--- .../src/airflow/config_templates/config.yml | 23 +++++++++++ .../cli/commands/test_api_server_command.py | 38 +++++++++++++------ .../cli/commands/test_gunicorn_monitor.py | 4 ++ task-sdk/src/airflow/sdk/api/client.py | 18 +++++++-- task-sdk/tests/task_sdk/api/test_client.py | 19 ++++++++++ 8 files changed, 135 insertions(+), 20 deletions(-) diff --git a/airflow-core/src/airflow/api_fastapi/gunicorn_app.py b/airflow-core/src/airflow/api_fastapi/gunicorn_app.py index c01d3e8b2aa14..91330de9043b5 100644 --- a/airflow-core/src/airflow/api_fastapi/gunicorn_app.py +++ b/airflow-core/src/airflow/api_fastapi/gunicorn_app.py @@ -44,6 +44,8 @@ from airflow.configuration import conf if TYPE_CHECKING: + from ssl import VerifyMode + from fastapi import FastAPI from gunicorn.app.base import Application @@ -243,6 +245,8 @@ def create_gunicorn_app( worker_timeout: int, ssl_cert: str | None = None, ssl_key: str | None = None, + ssl_ca_file: str | None = None, + ssl_cert_reqs: VerifyMode | None = None, log_level: str = "info", proxy_headers: bool = False, ) -> AirflowGunicornApp: @@ -255,6 +259,8 @@ def create_gunicorn_app( :param worker_timeout: Worker timeout in seconds :param ssl_cert: Path to SSL certificate file :param ssl_key: Path to SSL key file + :param ssl_ca_file: Path to the SSL CA certs file + :param ssl_cert_reqs: SSL client certificate requirements :param log_level: Log level (debug, info, warning, error, critical) :param proxy_headers: Whether to trust proxy headers """ @@ -275,6 +281,10 @@ def create_gunicorn_app( if ssl_cert and ssl_key: options["certfile"] = ssl_cert options["keyfile"] = ssl_key + if ssl_ca_file: + options["ca_certs"] = ssl_ca_file + if ssl_cert_reqs is not None: + options["cert_reqs"] = ssl_cert_reqs if proxy_headers: options["forwarded_allow_ips"] = "*" diff --git a/airflow-core/src/airflow/cli/cli_config.py b/airflow-core/src/airflow/cli/cli_config.py index e9fd5e613a076..57b2aa9e41cbb 100644 --- a/airflow-core/src/airflow/cli/cli_config.py +++ b/airflow-core/src/airflow/cli/cli_config.py @@ -770,6 +770,17 @@ def string_lower_type(val): default=conf.get("api", "ssl_key"), help="Path to the key to use with the SSL certificate", ) +ARG_SSL_CA_FILE = Arg( + ("--ssl-ca-file",), + default=conf.get("api", "ssl_ca_file", fallback=None), + help="(Optional) Path to the SSL CA file", +) +ARG_SSL_CERT_REQS = Arg( + ("--ssl-cert-reqs",), + default=conf.get("api", "ssl_cert_reqs", fallback="none"), + help="(Optional) Set certificate verification options.", + choices=("none", "optional", "required"), +) ARG_DEV = Arg(("-d", "--dev"), help="Start in development mode with hot-reload enabled", action="store_true") # scheduler @@ -2173,6 +2184,8 @@ class GroupCommand(NamedTuple): ARG_LOG_FILE, ARG_SSL_CERT, ARG_SSL_KEY, + ARG_SSL_CA_FILE, + ARG_SSL_CERT_REQS, ARG_DEV, ARG_API_SERVER_ALLOW_PROXY_FORWARDING, ), diff --git a/airflow-core/src/airflow/cli/commands/api_server_command.py b/airflow-core/src/airflow/cli/commands/api_server_command.py index 11b57305b1a66..9bf3a0df1f04b 100644 --- a/airflow-core/src/airflow/cli/commands/api_server_command.py +++ b/airflow-core/src/airflow/cli/commands/api_server_command.py @@ -19,6 +19,7 @@ from __future__ import annotations import os +import ssl import sys import textwrap from collections.abc import Callable @@ -64,7 +65,7 @@ def _run_api_server_with_gunicorn( """ from airflow.api_fastapi.gunicorn_app import create_gunicorn_app - ssl_cert, ssl_key = _get_ssl_cert_and_key_filepaths(args) + ssl_cert, ssl_key, ssl_ca_file = _get_ssl_filepaths(args) log_level = conf.get("logging", "uvicorn_logging_level", fallback="info").lower() @@ -75,6 +76,8 @@ def _run_api_server_with_gunicorn( worker_timeout=worker_timeout, ssl_cert=ssl_cert, ssl_key=ssl_key, + ssl_ca_file=ssl_ca_file, + ssl_cert_reqs=_ssl_cert_reqs(args), log_level=log_level, proxy_headers=proxy_headers, ) @@ -96,7 +99,7 @@ def _run_api_server_with_uvicorn( This is the default mode. Note that uvicorn's multiprocess mode does not share memory between workers (each worker loads everything independently). """ - ssl_cert, ssl_key = _get_ssl_cert_and_key_filepaths(args) + ssl_cert, ssl_key, ssl_ca_file = _get_ssl_filepaths(args) # setproctitle causes issue on Mac OS: https://github.com/benoitc/gunicorn/issues/3021 os_type = sys.platform @@ -118,6 +121,8 @@ def _run_api_server_with_uvicorn( "timeout_worker_healthcheck": worker_timeout, "ssl_keyfile": ssl_key, "ssl_certfile": ssl_cert, + "ssl_ca_certs": ssl_ca_file, + "ssl_cert_reqs": _ssl_cert_reqs(args), # HttpAccessLogMiddleware handles access logging; disable uvicorn's built-in access log. "access_log": False, "log_level": uvicorn_log_level, @@ -254,21 +259,34 @@ def api_server(args: Namespace): ) -def _get_ssl_cert_and_key_filepaths(cli_arguments) -> tuple[str | None, str | None]: +def _get_ssl_filepaths(cli_arguments) -> tuple[str | None, str | None, str | None]: error_template_1 = "Need both, have provided {} but not {}" error_template_2 = "SSL related file does not exist {}" - ssl_cert, ssl_key = cli_arguments.ssl_cert, cli_arguments.ssl_key + ssl_cert, ssl_key, ssl_ca_file = cli_arguments.ssl_cert, cli_arguments.ssl_key, cli_arguments.ssl_ca_file if ssl_cert and ssl_key: if not os.path.isfile(ssl_cert): raise AirflowConfigException(error_template_2.format(ssl_cert)) if not os.path.isfile(ssl_key): raise AirflowConfigException(error_template_2.format(ssl_key)) + if ssl_ca_file is not None and not os.path.isfile(ssl_ca_file): + raise AirflowConfigException(error_template_2.format(ssl_ca_file)) - return (ssl_cert, ssl_key) + return (ssl_cert, ssl_key, ssl_ca_file) if ssl_cert: raise AirflowConfigException(error_template_1.format("SSL certificate", "SSL key")) if ssl_key: raise AirflowConfigException(error_template_1.format("SSL key", "SSL certificate")) - return (None, None) + return (None, None, None) + + +def _ssl_cert_reqs(cli_arguments): + cert_reqs = cli_arguments.ssl_cert_reqs + if cert_reqs is None or cert_reqs == "none": + return ssl.CERT_NONE + if cert_reqs == "required": + return ssl.CERT_REQUIRED + if cert_reqs == "optional": + return ssl.CERT_OPTIONAL + raise ValueError(f"Invalid ssl_cert_reqs option: {cert_reqs}") diff --git a/airflow-core/src/airflow/config_templates/config.yml b/airflow-core/src/airflow/config_templates/config.yml index 502934dbc7749..4c650bbeb64e9 100644 --- a/airflow-core/src/airflow/config_templates/config.yml +++ b/airflow-core/src/airflow/config_templates/config.yml @@ -1767,6 +1767,22 @@ api: type: string example: ~ default: "" + ssl_ca_file: + description: | + Path to the SSL CA file for the api server. Defaults to None. + version_added: ~ + type: string + example: ~ + default: ~ + ssl_cert_reqs: + description: | + Enable ssl certificate verification on the api server. + See https://docs.python.org/3/library/ssl.html#ssl.SSLContext.verify_mode + Valid options are 'none', 'optional' or 'required' + version_added: ~ + type: string + example: "required" + default: "none" maximum_page_limit: description: | Used to set the maximum page limit for API requests. If limit passed as param @@ -1885,6 +1901,13 @@ api: type: string example: "/etc/airflow/certs/client.key" default: ~ + client_use_public_certs: + description: | + Enable loading of public CA certificates from certifi into the client SSL context. + version_added: ~ + type: boolean + example: ~ + default: "True" workers: description: Configuration related to workers that run Airflow tasks. options: diff --git a/airflow-core/tests/unit/cli/commands/test_api_server_command.py b/airflow-core/tests/unit/cli/commands/test_api_server_command.py index 4d6e3f62d5b61..55c375721a107 100644 --- a/airflow-core/tests/unit/cli/commands/test_api_server_command.py +++ b/airflow-core/tests/unit/cli/commands/test_api_server_command.py @@ -16,6 +16,7 @@ # under the License. from __future__ import annotations +import ssl import sys from unittest import mock @@ -145,12 +146,18 @@ def test_api_apps_env(self, args, dev_mode, original_env): "ssl_cert_path_placeholder", "--ssl-key", "ssl_key_path_placeholder", + "--ssl-ca-file", + "ssl_ca_file_placeholder", + "--ssl-cert-reqs", + "required", "--apps", "core", ], { "ssl_keyfile": "ssl_key_path_placeholder", "ssl_certfile": "ssl_cert_path_placeholder", + "ssl_ca_certs": "ssl_ca_file_placeholder", + "ssl_cert_reqs": ssl.CERT_REQUIRED, }, id="api-server with SSL cert and key", ), @@ -163,20 +170,25 @@ def test_api_apps_env(self, args, dev_mode, original_env): { "ssl_keyfile": None, "ssl_certfile": None, + "ssl_ca_certs": None, + "ssl_cert_reqs": ssl.CERT_NONE, "log_config": "my_log_config.yaml", }, id="api-server with log config", ), ], ) - def test_args_to_uvicorn(self, ssl_cert_and_key, cli_args, expected_additional_kwargs): - cert_path, key_path = ssl_cert_and_key + def test_args_to_uvicorn(self, ssl_cert_key_and_ca, cli_args, expected_additional_kwargs): + cert_path, key_path, ca_path = ssl_cert_key_and_ca if "ssl_cert_path_placeholder" in cli_args: cli_args[cli_args.index("ssl_cert_path_placeholder")] = str(cert_path) expected_additional_kwargs["ssl_certfile"] = str(cert_path) if "ssl_key_path_placeholder" in cli_args: cli_args[cli_args.index("ssl_key_path_placeholder")] = str(key_path) expected_additional_kwargs["ssl_keyfile"] = str(key_path) + if "ssl_ca_file_placeholder" in cli_args: + cli_args[cli_args.index("ssl_ca_file_placeholder")] = str(ca_path) + expected_additional_kwargs["ssl_ca_certs"] = str(ca_path) with ( mock.patch("uvicorn.run") as mock_run, @@ -247,6 +259,8 @@ def test_run_command_daemon( timeout_worker_healthcheck=60, ssl_keyfile=None, ssl_certfile=None, + ssl_ca_certs=None, + ssl_cert_reqs=ssl.CERT_NONE, access_log=False, log_level="info", proxy_headers=False, @@ -314,22 +328,24 @@ def test_run_command_daemon( (["--ssl-key", "_.key"], "Need both.*key.*certificate"), ], ) - def test_get_ssl_cert_and_key_filepaths_with_incorrect_usage(self, ssl_arguments, error_pattern): + def test_get_ssl_filepaths_with_incorrect_usage(self, ssl_arguments, error_pattern): args = self.parser.parse_args(["api-server"] + ssl_arguments) with pytest.raises(AirflowConfigException, match=error_pattern): - api_server_command._get_ssl_cert_and_key_filepaths(args) + api_server_command._get_ssl_filepaths(args) - def test_get_ssl_cert_and_key_filepaths_with_correct_usage(self, ssl_cert_and_key): - cert_path, key_path = ssl_cert_and_key + def test_get_ssl_filepaths_with_correct_usage(self, ssl_cert_key_and_ca): + cert_path, key_path, ca_path = ssl_cert_key_and_ca args = self.parser.parse_args( - ["api-server"] + ["--ssl-cert", str(cert_path), "--ssl-key", str(key_path)] + ["api-server"] + + ["--ssl-cert", str(cert_path), "--ssl-key", str(key_path), "--ssl-ca-file", str(ca_path)] ) - assert api_server_command._get_ssl_cert_and_key_filepaths(args) == (str(cert_path), str(key_path)) + assert api_server_command._get_ssl_filepaths(args) == (str(cert_path), str(key_path), str(ca_path)) @pytest.fixture - def ssl_cert_and_key(self, tmp_path): - cert_path, key_path = tmp_path / "_.crt", tmp_path / "_.key" + def ssl_cert_key_and_ca(self, tmp_path): + cert_path, key_path, ca_path = tmp_path / "_.crt", tmp_path / "_.key", tmp_path / "ca.crt" cert_path.touch() key_path.touch() - return cert_path, key_path + ca_path.touch() + return cert_path, key_path, ca_path diff --git a/airflow-core/tests/unit/cli/commands/test_gunicorn_monitor.py b/airflow-core/tests/unit/cli/commands/test_gunicorn_monitor.py index f80410c0a6c48..3cd9c3e74916c 100644 --- a/airflow-core/tests/unit/cli/commands/test_gunicorn_monitor.py +++ b/airflow-core/tests/unit/cli/commands/test_gunicorn_monitor.py @@ -430,12 +430,16 @@ def test_create_app_with_ssl(self): worker_timeout=120, ssl_cert="/path/to/cert.pem", ssl_key="/path/to/key.pem", + ssl_ca_file="/path/to/ca.crt", + ssl_cert_reqs=1, ) options = mock_app_class.call_args[0][0] assert options["certfile"] == "/path/to/cert.pem" assert options["keyfile"] == "/path/to/key.pem" + assert options["ca_certs"] == "/path/to/ca.crt" + assert options["cert_reqs"] == 1 def test_create_app_with_proxy_headers(self): """Test creating an app with proxy headers enabled.""" diff --git a/task-sdk/src/airflow/sdk/api/client.py b/task-sdk/src/airflow/sdk/api/client.py index 1da539f29a3dc..da0e0b56f4d6c 100644 --- a/task-sdk/src/airflow/sdk/api/client.py +++ b/task-sdk/src/airflow/sdk/api/client.py @@ -1089,9 +1089,11 @@ def noop_handler(request: httpx.Request) -> httpx.Response: API_RETRY_WAIT_MIN = conf.getfloat("workers", "execution_api_retry_wait_min") API_RETRY_WAIT_MAX = conf.getfloat("workers", "execution_api_retry_wait_max") API_SSL_CERT_PATH = conf.get("api", "ssl_cert") +API_SSL_CA_FILE_PATH = conf.get("api", "ssl_ca_file", fallback=None) API_TIMEOUT = conf.getfloat("workers", "execution_api_timeout") API_CLIENT_SSL_CERT = conf.get("api", "client_ssl_cert", fallback=None) API_CLIENT_SSL_KEY = conf.get("api", "client_ssl_key", fallback=None) +API_CLIENT_USE_PUBLIC_CERTS = conf.getboolean("api", "client_use_public_certs", fallback=True) def _should_retry_api_request(exception: BaseException) -> bool: @@ -1105,9 +1107,19 @@ def _should_retry_api_request(exception: BaseException) -> bool: class Client(httpx.Client): @lru_cache() @staticmethod - def _get_ssl_context_cached(ca_file: str, ca_path: str | None = None) -> ssl.SSLContext: - """Cache SSL context to prevent memory growth from repeated context creation.""" + def _get_ssl_context_cached(ca_file: str | None = None, ca_path: str | None = None) -> ssl.SSLContext: + """ + Cache SSL context to prevent memory growth from repeated context creation. + + If `client_use_public_certs` is enabled certifi.where() will be loaded into the context. + + :param ca_file: Certificate Authority, optional. + :param ca_path: Certificate File, optional. + """ ctx = ssl.create_default_context(cafile=ca_file) + if API_CLIENT_USE_PUBLIC_CERTS: + log.info("Using Public CAs from certifi") + ctx.load_verify_locations(certifi.where()) if ca_path: ctx.load_verify_locations(ca_path) return ctx @@ -1125,7 +1137,7 @@ def __init__(self, *, base_url: str | None, dry_run: bool = False, token: str, * else: kwargs["base_url"] = base_url # Call via the class to avoid binding lru_cache wires to this instance. - kwargs["verify"] = type(self)._get_ssl_context_cached(certifi.where(), API_SSL_CERT_PATH) + kwargs["verify"] = type(self)._get_ssl_context_cached(API_SSL_CA_FILE_PATH, API_SSL_CERT_PATH) if API_CLIENT_SSL_CERT or API_CLIENT_SSL_KEY: if not (API_CLIENT_SSL_CERT and API_CLIENT_SSL_KEY): diff --git a/task-sdk/tests/task_sdk/api/test_client.py b/task-sdk/tests/task_sdk/api/test_client.py index 93e2041878e01..dfe9d9679359a 100644 --- a/task-sdk/tests/task_sdk/api/test_client.py +++ b/task-sdk/tests/task_sdk/api/test_client.py @@ -106,6 +106,25 @@ def handle_request(request: httpx.Request) -> httpx.Response: assert isinstance(err.value, FileNotFoundError) + @mock.patch("airflow.sdk.api.client.API_SSL_CA_FILE_PATH", "/capath/does/not/exist/") + def test_ssl_with_cafile(self): + def handle_request(request: httpx.Request) -> httpx.Response: + return httpx.Response(status_code=200) + + with pytest.raises(FileNotFoundError) as err: + make_client(httpx.MockTransport(handle_request)) + + assert isinstance(err.value, FileNotFoundError) + + @mock.patch("ssl.create_default_context") + @mock.patch("airflow.sdk.api.client.API_CLIENT_USE_PUBLIC_CERTS", True) + def test_use_public_certs(self, mock_default_context): + def handle_request(request: httpx.Request) -> httpx.Response: + return httpx.Response(status_code=200) + + make_client(httpx.MockTransport(handle_request)) + mock_default_context.return_value.load_verify_locations.assert_called_with(certifi.where()) + @mock.patch("airflow.sdk.api.client.API_TIMEOUT", 60.0) def test_timeout_configuration(self): def handle_request(request: httpx.Request) -> httpx.Response: From 2ac00bbc27d4ae34e27230db3bebed52567e8287 Mon Sep 17 00:00:00 2001 From: johanjk <45788075+johanjk@users.noreply.github.com> Date: Wed, 27 May 2026 21:56:27 +0200 Subject: [PATCH 08/28] Chart: add serviceAccountTokenVolume to cleanup cron (#67446) * [helm chart] add cleanup serviceAccountTokenVolume * [helm chart] update doc for cleanup --- chart/docs/production-guide.rst | 4 +- chart/templates/_helpers.yaml | 51 +++++++++++++++++++ chart/templates/cleanup/cleanup-cronjob.yaml | 3 ++ .../scheduler/scheduler-deployment.yaml | 30 +---------- chart/values.schema.json | 39 +++++++++++++- chart/values.yaml | 19 +++++++ 6 files changed, 115 insertions(+), 31 deletions(-) diff --git a/chart/docs/production-guide.rst b/chart/docs/production-guide.rst index 3e5ffc51da6f0..2ce003aa77b28 100644 --- a/chart/docs/production-guide.rst +++ b/chart/docs/production-guide.rst @@ -788,12 +788,12 @@ This container-specific approach ensures that: Configuration Options ^^^^^^^^^^^^^^^^^^^^^ -The service account token volume configuration is available for the scheduler component and includes the following options: +The service account token volume configuration is available for the scheduler and cleanup component and includes the following options: .. code-block:: yaml :caption: values.yaml - scheduler: + (scheduler|cleanup): serviceAccount: automountServiceAccountToken: false serviceAccountTokenVolume: diff --git a/chart/templates/_helpers.yaml b/chart/templates/_helpers.yaml index 709e9a22d1bf8..f04e55b18d363 100644 --- a/chart/templates/_helpers.yaml +++ b/chart/templates/_helpers.yaml @@ -1154,3 +1154,54 @@ Usage: {{- end -}} {{- toYaml $newValues -}} {{- end -}} + + +{{/* +serviceAccountTokenVolume mount + +Usage: + {{ include "serviceAccountTokenVolumeMount" (list . .Values.scheduler.serviceAccount) }} +*/}} +{{- define "serviceAccountTokenVolumeMount" -}} + {{- $root := index . 0 -}} + {{- $sa := index . 1 -}} + {{- if and (eq (include "airflow.podLaunchingExecutor" $root ) "true") (not $sa.automountServiceAccountToken) $sa.serviceAccountTokenVolume.enabled }} +- name: {{ $sa.serviceAccountTokenVolume.volumeName }} + mountPath: {{ $sa.serviceAccountTokenVolume.mountPath }} + readOnly: true + {{- end }} +{{- end -}} + +{{/* +serviceAccountTokenVolume + +Usage: + {{ include "serviceAccountTokenVolume" (list . .Values.scheduler.serviceAccount) }} +*/}} +{{- define "serviceAccountTokenVolume" -}} + {{- $root := index . 0 -}} + {{- $sa := index . 1 -}} + {{- if and (eq (include "airflow.podLaunchingExecutor" $root ) "true") (not $sa.automountServiceAccountToken) $sa.serviceAccountTokenVolume.enabled }} +- name: {{ $sa.serviceAccountTokenVolume.volumeName }} + projected: + defaultMode: 420 + sources: + - serviceAccountToken: + {{- if $sa.serviceAccountTokenVolume.audience }} + audience: {{ $sa.serviceAccountTokenVolume.audience }} + {{- end }} + expirationSeconds: {{ $sa.serviceAccountTokenVolume.expirationSeconds }} + path: token + - configMap: + items: + - key: ca.crt + path: ca.crt + name: kube-root-ca.crt + - downwardAPI: + items: + - fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + path: namespace + {{- end }} +{{- end -}} diff --git a/chart/templates/cleanup/cleanup-cronjob.yaml b/chart/templates/cleanup/cleanup-cronjob.yaml index 70cb53862d1cb..9314e933c29a6 100644 --- a/chart/templates/cleanup/cleanup-cronjob.yaml +++ b/chart/templates/cleanup/cleanup-cronjob.yaml @@ -110,6 +110,7 @@ spec: {{- if .Values.volumeMounts }} {{- toYaml .Values.volumeMounts | nindent 16 }} {{- end }} + {{- include "serviceAccountTokenVolumeMount" (list . .Values.cleanup.serviceAccount) | nindent 16 }} resources: {{- toYaml .Values.cleanup.resources | nindent 16 }} volumes: - name: config @@ -118,4 +119,6 @@ spec: {{- if .Values.volumes }} {{- toYaml .Values.volumes | nindent 12 }} {{- end }} + {{- include "serviceAccountTokenVolume" (list . .Values.cleanup.serviceAccount) | nindent 12 }} + {{- end }} diff --git a/chart/templates/scheduler/scheduler-deployment.yaml b/chart/templates/scheduler/scheduler-deployment.yaml index 3ba9d89dae495..294975fab0a76 100644 --- a/chart/templates/scheduler/scheduler-deployment.yaml +++ b/chart/templates/scheduler/scheduler-deployment.yaml @@ -246,11 +246,7 @@ spec: {{- if .Values.scheduler.extraVolumeMounts }} {{- tpl (toYaml .Values.scheduler.extraVolumeMounts) . | nindent 12 }} {{- end }} - {{- if and (eq (include "airflow.podLaunchingExecutor" .) "true") (not .Values.scheduler.serviceAccount.automountServiceAccountToken) .Values.scheduler.serviceAccount.serviceAccountTokenVolume.enabled }} - - name: {{ .Values.scheduler.serviceAccount.serviceAccountTokenVolume.volumeName }} - mountPath: {{ .Values.scheduler.serviceAccount.serviceAccountTokenVolume.mountPath }} - readOnly: true - {{- end }} + {{- include "serviceAccountTokenVolumeMount" (list . .Values.scheduler.serviceAccount) | nindent 12 }} {{- if and $localOrDagProcessorDisabled .Values.dags.gitSync.enabled }} {{- include "git_sync_container" . | indent 8 }} {{- end }} @@ -334,29 +330,7 @@ spec: {{- if .Values.scheduler.extraVolumes }} {{- tpl (toYaml .Values.scheduler.extraVolumes) . | nindent 8 }} {{- end }} - {{- if and (eq (include "airflow.podLaunchingExecutor" .) "true") (not .Values.scheduler.serviceAccount.automountServiceAccountToken) .Values.scheduler.serviceAccount.serviceAccountTokenVolume.enabled }} - - name: {{ .Values.scheduler.serviceAccount.serviceAccountTokenVolume.volumeName }} - projected: - defaultMode: 420 - sources: - - serviceAccountToken: - {{- if .Values.scheduler.serviceAccount.serviceAccountTokenVolume.audience }} - audience: {{ .Values.scheduler.serviceAccount.serviceAccountTokenVolume.audience }} - {{- end }} - expirationSeconds: {{ .Values.scheduler.serviceAccount.serviceAccountTokenVolume.expirationSeconds }} - path: token - - configMap: - items: - - key: ca.crt - path: ca.crt - name: kube-root-ca.crt - - downwardAPI: - items: - - fieldRef: - apiVersion: v1 - fieldPath: metadata.namespace - path: namespace - {{- end }} + {{- include "serviceAccountTokenVolume" (list . .Values.scheduler.serviceAccount) | nindent 8 }} {{- if .Values.logs.persistence.enabled }} - name: logs persistentVolumeClaim: diff --git a/chart/values.schema.json b/chart/values.schema.json index 953b07e02c669..040b716d6eaea 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -10300,10 +10300,47 @@ "additionalProperties": false, "properties": { "automountServiceAccountToken": { - "description": "Specifies if ServiceAccount's API credentials should be mounted onto Pods", + "description": "Specifies if ServiceAccount's API credentials should be mounted onto Pods. When false, you can use `serviceAccountTokenVolume` to manually configure service account token volume for pod-launching executors.", "type": "boolean", "default": true }, + "serviceAccountTokenVolume": { + "description": "Configuration for manual service account token volume. Only used when automountServiceAccountToken is false and for pod-launching executors. (CeleryExecutor, KubernetesExecutor)", + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "description": "Enable manual service account token volume configuration.", + "type": "boolean", + "default": false + }, + "mountPath": { + "description": "Path where the service account token volume will be mounted.", + "type": "string", + "default": "/var/run/secrets/kubernetes.io/serviceaccount" + }, + "volumeName": { + "description": "Name of the service account token volume.", + "type": "string", + "default": "kube-api-access" + }, + "expirationSeconds": { + "description": "Token expiration time in seconds.", + "type": "integer", + "minimum": 600, + "maximum": 7776000, + "default": 3600 + }, + "audience": { + "description": "Intended audience of the token. Optional - defaults to the identifier of the Kubernetes API server.", + "type": [ + "string", + "null" + ], + "default": null + } + } + }, "create": { "description": "Specifies whether a ServiceAccount should be created.", "type": "boolean", diff --git a/chart/values.yaml b/chart/values.yaml index 7698af004e598..ebb845fc94837 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -3755,6 +3755,25 @@ cleanup: # Annotations to add to cleanup CronJob Kubernetes Service Account. annotations: {} + # Service Account Token Volume configuration + # This is only used when `automountServiceAccountToken` is 'false' + # and allows manual configuration of the Service Account token volume + serviceAccountTokenVolume: + # Enable manual Service Account token volume configuration + enabled: false + + # Path where the Service Account token should be mounted + mountPath: /var/run/secrets/kubernetes.io/serviceaccount + + # Name of the volume + volumeName: kube-api-access + + # Token expiration in seconds + expirationSeconds: 3600 + + # Audience for the token + audience: ~ + # When not set, the values defined in the global `securityContext` will be used # (deprecated, use `cleanup.securityContexts` instead) securityContext: {} From 1e756428307b5a573cfeb9914f983129e61168c4 Mon Sep 17 00:00:00 2001 From: Jens Scheffler <95105677+jscheffl@users.noreply.github.com> Date: Wed, 27 May 2026 23:08:29 +0200 Subject: [PATCH 09/28] [main] CI: Upgrade important CI environment (#67613) --- .pre-commit-config.yaml | 2 +- pyproject.toml | 10 ---------- uv.lock | 5 ++--- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 26b69f63d814f..5f8351b30e790 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -410,7 +410,7 @@ repos: require_serial: true entry: zizmor - repo: https://github.com/lycheeverse/lychee - rev: 1662ec573c55eb9648db2797a66f74bcc6216d62 # frozen: nightly + rev: 2bba271688c1abb1503097a064e6c3bc1d1b6a9b # frozen: lychee-lib-v0.24.2 hooks: # Use the upstream `lychee-docker` variant rather than the script-based # `lychee` hook because the prebuilt lychee binaries that the script diff --git a/pyproject.toml b/pyproject.toml index a0202dca38377..1051632695c01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1532,14 +1532,6 @@ apache-aurflow-docker-stack = false # Manual overrides (kept outside the auto-generated block above so the # update_airflow_pyproject_toml.py script doesn't clobber them). -# Starlette 1.0.1 carries a Host-header parsing fix that prevents -# `request.url.path` from diverging from `scope["path"]` when the Host header -# contains characters invalid per RFC 9110 §7.2. The fix landed too recently -# to be admitted by the global 4-day cooldown; this override lets the floor -# bump in `airflow-core/pyproject.toml` resolve. -# REMOVE BY 2026-05-26 — once 1.0.1 is older than the global 4-day cooldown -# this override is redundant and should be deleted along with the line below. -starlette = "6 hours" [tool.uv.pip] # Synchroonize with scripts/ci/prek/upgrade_important_versions.py @@ -1682,8 +1674,6 @@ apache-aurflow-docker-stack = false # Manual overrides — see the matching block under # `[tool.uv.exclude-newer-package]` above for rationale. -# REMOVE BY 2026-05-26 along with the matching entry above. -starlette = "6 hours" [tool.uv.sources] diff --git a/uv.lock b/uv.lock index bca09add09083..83759330bb2f5 100644 --- a/uv.lock +++ b/uv.lock @@ -87,7 +87,6 @@ apache-airflow-shared-template-rendering = false apache-airflow-mypy = false apache-airflow-providers-http = false apache-airflow-providers-slack = false -starlette = { timestamp = "0001-01-01T00:00:00Z", span = "PT6H" } apache-airflow-providers-vespa = false apache-airflow-providers-databricks = false apache-airflow-shared-state = false @@ -21092,8 +21091,8 @@ name = "secretstorage" version = "3.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cryptography", marker = "python_full_version >= '3.14' or platform_machine != 'arm64' or sys_platform != 'darwin'" }, - { name = "jeepney", marker = "python_full_version >= '3.14' or platform_machine != 'arm64' or sys_platform != 'darwin'" }, + { name = "cryptography", marker = "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version < '3.15' and sys_platform == 'emscripten') or (python_full_version < '3.15' and sys_platform == 'win32') or (platform_machine != 'arm64' and sys_platform == 'darwin') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'win32')" }, + { name = "jeepney", marker = "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version < '3.15' and sys_platform == 'emscripten') or (python_full_version < '3.15' and sys_platform == 'win32') or (platform_machine != 'arm64' and sys_platform == 'darwin') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'win32')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" } wheels = [ From 70257e6968603cdfcb5d34f03e01c5c001d2075f Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Wed, 27 May 2026 23:57:15 +0200 Subject: [PATCH 10/28] Reject wildcard origin in CORS config instead of toggling credentials (#67502) The Access-Control-Allow-Origin: * + Access-Control-Allow-Credentials: true combination is invalid per the CORS spec and browsers refuse to honour any response that does so. The previous fix (#66503) added an access_control_allow_credentials toggle, but allow_credentials=False would break Airflow's UI on any deployment where API and UI are on different origins, so that knob has no realistic use case. Drop the toggle, always send credentialed CORS, and fail loudly at startup with AirflowConfigException if access_control_allow_origins contains "*" so operators see the bad configuration immediately instead of debugging mysterious CORS errors in the browser. Closes #67193 (the revert is no longer needed once the underlying misconfiguration is rejected directly). --- airflow-core/docs/security/api.rst | 10 +++--- .../src/airflow/api_fastapi/core_api/app.py | 19 ++++++++-- .../src/airflow/config_templates/config.yml | 15 +++----- .../unit/api_fastapi/core_api/test_app.py | 35 ++++++++++++------- 4 files changed, 49 insertions(+), 30 deletions(-) diff --git a/airflow-core/docs/security/api.rst b/airflow-core/docs/security/api.rst index c02033d0b12bb..b45616ece8925 100644 --- a/airflow-core/docs/security/api.rst +++ b/airflow-core/docs/security/api.rst @@ -86,10 +86,12 @@ from scripts running in the browser. access_control_allow_methods = POST, GET, OPTIONS, DELETE access_control_allow_origins = https://exampleclientapp1.com https://exampleclientapp2.com -The ``Access-Control-Allow-Credentials`` header is included by default. Set -``access_control_allow_credentials = False`` if you have configured -``access_control_allow_origins`` and do not want browsers to send credentials -(cookies, ``Authorization`` header) with cross-origin requests. +Airflow's API always responds with ``Access-Control-Allow-Credentials: true`` so the UI and +clients can send cookies and ``Authorization`` headers across origins. Because of that, +``access_control_allow_origins`` must list the exact origins that need access — the wildcard +``*`` is rejected at startup. The CORS spec forbids combining +``Access-Control-Allow-Origin: *`` with credentialed responses, and browsers refuse any +response that does so, so a wildcard origin would simply break every cross-origin request. Page size limit --------------- diff --git a/airflow-core/src/airflow/api_fastapi/core_api/app.py b/airflow-core/src/airflow/api_fastapi/core_api/app.py index 20ecbedb23645..27213a4c0a2e8 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/app.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/app.py @@ -29,7 +29,7 @@ from fastapi.templating import Jinja2Templates from airflow.api_fastapi.auth.tokens import get_signing_key -from airflow.exceptions import AirflowException +from airflow.exceptions import AirflowConfigException, AirflowException log = logging.getLogger(__name__) @@ -143,13 +143,26 @@ def init_config(app: FastAPI) -> None: allow_origins = conf.getlist("api", "access_control_allow_origins") allow_methods = conf.getlist("api", "access_control_allow_methods") allow_headers = conf.getlist("api", "access_control_allow_headers") - allow_credentials = conf.getboolean("api", "access_control_allow_credentials", fallback=True) + + if "*" in allow_origins: + # The CORS spec forbids combining `Access-Control-Allow-Origin: *` with + # `Access-Control-Allow-Credentials: true`, and browsers reject any response that does so + # (see https://fetch.spec.whatwg.org/#cors-protocol-and-credentials). Airflow's API needs + # credentialed requests for cookie / Authorization-header auth, so a wildcard origin is + # never a valid configuration. Fail loudly at startup instead of silently shipping a + # response shape that no browser will accept. + raise AirflowConfigException( + "`[api] access_control_allow_origins` must not contain `*`: the wildcard origin is " + "incompatible with the credentialed CORS Airflow's API requires, and browsers will " + "reject every cross-origin response. List the exact origins that need access " + "(e.g. `https://airflow.mycompany.com`) instead." + ) if allow_origins or allow_methods or allow_headers: app.add_middleware( CORSMiddleware, allow_origins=allow_origins, - allow_credentials=allow_credentials, + allow_credentials=True, allow_methods=allow_methods, allow_headers=allow_headers, ) diff --git a/airflow-core/src/airflow/config_templates/config.yml b/airflow-core/src/airflow/config_templates/config.yml index 4c650bbeb64e9..ae47f132b08ef 100644 --- a/airflow-core/src/airflow/config_templates/config.yml +++ b/airflow-core/src/airflow/config_templates/config.yml @@ -1821,21 +1821,14 @@ api: access_control_allow_origins: description: | Indicates whether the response can be shared with requesting code from the given origins. - Separate URLs with space. + Separate URLs with space. Wildcard (``*``) is not allowed: Airflow's API requires + credentialed CORS, which is incompatible with a wildcard origin per the CORS spec, and + browsers reject any response that combines the two. List exact origins instead + (for example ``https://airflow.mycompany.com``). type: string version_added: 2.2.0 example: ~ default: "" - access_control_allow_credentials: - description: | - Whether the FastAPI server includes the ``Access-Control-Allow-Credentials`` header on - CORS responses. Defaults to True to preserve existing behavior; set to False if you have - configured ``access_control_allow_origins`` and do not want browsers to send credentials - (cookies, Authorization header) with cross-origin requests. - type: boolean - version_added: 3.2.2 - example: ~ - default: "True" grid_view_sorting_order: description: | Sorting order in grid view. Valid values are: ``topological``, ``hierarchical_alphabetical`` diff --git a/airflow-core/tests/unit/api_fastapi/core_api/test_app.py b/airflow-core/tests/unit/api_fastapi/core_api/test_app.py index 7a999e0f3b335..9e3c7219f0339 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/test_app.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/test_app.py @@ -149,20 +149,31 @@ def test_ui_router_carries_get_user_dependency(self): ) -class TestCorsMiddlewareAllowCredentials: - @pytest.mark.parametrize( - ("config_value", "expected_allow_credentials"), - [(None, True), ("True", True), ("False", False)], - ) - def test_init_config_passes_allow_credentials(self, config_value, expected_allow_credentials): - config = {("api", "access_control_allow_origins"): "https://example.com"} - if config_value is not None: - config[("api", "access_control_allow_credentials")] = config_value - - with conf_vars(config): +class TestCorsMiddlewareConfig: + def test_init_config_enables_credentialed_cors_for_explicit_origins(self): + with conf_vars({("api", "access_control_allow_origins"): "https://example.com"}): app = FastAPI() init_config(app) cors_middlewares = [m for m in app.user_middleware if m.cls is CORSMiddleware] assert len(cors_middlewares) == 1 - assert cors_middlewares[0].kwargs["allow_credentials"] is expected_allow_credentials + assert cors_middlewares[0].kwargs["allow_credentials"] is True + assert cors_middlewares[0].kwargs["allow_origins"] == ["https://example.com"] + + @pytest.mark.parametrize( + "origins", + ["*", "https://example.com,*", "*,https://example.com"], + ) + def test_init_config_rejects_wildcard_origin(self, origins): + """Wildcard origin is incompatible with credentialed CORS; reject it at startup. + + Browsers refuse any response that combines ``Access-Control-Allow-Origin: *`` with + ``Access-Control-Allow-Credentials: true``, so silently accepting ``*`` would just ship + a configuration where every cross-origin request fails. Fail loudly instead. + """ + from airflow.exceptions import AirflowConfigException + + with conf_vars({("api", "access_control_allow_origins"): origins}): + app = FastAPI() + with pytest.raises(AirflowConfigException, match=r"must not contain `\*`"): + init_config(app) From f0f978d2736891a2f9e9d2954e87fc358e1ef4e3 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Thu, 28 May 2026 01:47:09 +0200 Subject: [PATCH 11/28] Mask per-key secrets-backend-kwarg overrides on the Config API (#67622) Per-key environment-variable overrides like `AIRFLOW__SECRETS__BACKEND_KWARG__SECRET_ID` and `AIRFLOW__WORKERS__SECRETS_BACKEND_KWARG__SECRET_ID` are materialised by `conf.as_dict` as synthetic options under the `secrets` and `workers` sections (e.g. `backend_kwarg__secret_id`). These synthetic options carry the same Vault / role_id / secret_id material as the registered `backend_kwargs` option, but they are not present in `conf.sensitive_config_values`, so the Config API was returning their values unmasked. This change adds: - a constant `_PER_KEY_SENSITIVE_PREFIXES` that names the two synthetic-option prefixes, - a helper `_mask_per_key_sensitive_options` that the `GET /config` route calls when `display_sensitive=False`, - a helper `_is_per_key_sensitive_option` that extends the sensitivity check in the `GET /config/section/{section}/option/{option}` route. Reference: airflow-s/airflow-s#433 Generated-by: Claude Opus 4.7 (1M context) following the guidelines at https://github.com/apache/airflow/blob/main/contributing-docs/05_pull_requests.rst#gen-ai-assisted-contributions --- .../core_api/routes/public/config.py | 9 ++- .../core_api/services/public/config.py | 32 +++++++++ .../core_api/routes/public/test_config.py | 67 +++++++++++++++++++ 3 files changed, 107 insertions(+), 1 deletion(-) diff --git a/airflow-core/src/airflow/api_fastapi/core_api/routes/public/config.py b/airflow-core/src/airflow/api_fastapi/core_api/routes/public/config.py index 784d652c155f2..9f4b3359508a1 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/routes/public/config.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/routes/public/config.py @@ -32,6 +32,8 @@ from airflow.api_fastapi.core_api.security import requires_access_configuration from airflow.api_fastapi.core_api.services.public.config import ( _check_expose_config, + _is_per_key_sensitive_option, + _mask_per_key_sensitive_options, _response_based_on_accept, ) from airflow.configuration import conf @@ -101,6 +103,8 @@ def get_config( detail=f"Section {section} not found.", ) conf_dict = conf.as_dict(display_source=False, display_sensitive=display_sensitive) + if not display_sensitive: + _mask_per_key_sensitive_options(conf_dict) if section: conf_section_value = conf_dict[section] @@ -148,7 +152,10 @@ def get_config_value( detail=f"Option [{section}/{option}] not found.", ) - if (section.lower(), option.lower()) in conf.sensitive_config_values: + section_l, option_l = section.lower(), option.lower() + if (section_l, option_l) in conf.sensitive_config_values or _is_per_key_sensitive_option( + section_l, option_l + ): value = "< hidden >" else: value = conf.get(section, option) diff --git a/airflow-core/src/airflow/api_fastapi/core_api/services/public/config.py b/airflow-core/src/airflow/api_fastapi/core_api/services/public/config.py index 0b51a6c33aea1..d805827d8d61d 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/services/public/config.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/services/public/config.py @@ -25,6 +25,38 @@ from airflow.api_fastapi.core_api.datamodels.config import Config from airflow.configuration import conf +# Per-key environment-variable overrides for secrets-backend kwargs are +# surfaced by ``conf.as_dict`` as synthetic options under the ``secrets`` +# and ``workers`` sections. They carry the same secrets-backend material +# (e.g. Vault role_id / secret_id) as the registered ``backend_kwargs`` +# option, so they need the same redaction treatment when +# ``display_sensitive=False``. +_PER_KEY_SENSITIVE_PREFIXES: dict[str, str] = { + "secrets": "backend_kwarg__", + "workers": "secrets_backend_kwarg__", +} + + +def _is_per_key_sensitive_option(section: str, option: str) -> bool: + """Return True for synthetic per-key secrets-backend-kwarg options.""" + prefix = _PER_KEY_SENSITIVE_PREFIXES.get(section) + return prefix is not None and option.startswith(prefix) + + +def _mask_per_key_sensitive_options(conf_dict: dict) -> None: + """Mask synthetic per-key secrets-backend-kwarg options in-place.""" + for section, prefix in _PER_KEY_SENSITIVE_PREFIXES.items(): + options = conf_dict.get(section) + if not options: + continue + for option in list(options): + if option.startswith(prefix): + current = options[option] + if isinstance(current, tuple): + options[option] = ("< hidden >", current[1]) + else: + options[option] = "< hidden >" + def _check_expose_config() -> bool: display_sensitive: bool | None = None diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_config.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_config.py index aa1a74890f78e..cebd1cf12118e 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_config.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_config.py @@ -492,3 +492,70 @@ def test_get_config_value_should_response_403(self, unauthorized_test_client): f"/config/section/{SECTION_DATABASE}/option/{OPTION_KEY_SQL_ALCHEMY_CONN}" ) assert response.status_code == 403 + + +SECTION_SECRETS = "secrets" +SECTION_WORKERS = "workers" +PER_KEY_OPTION_SECRETS = "backend_kwarg__secret_id" +PER_KEY_OPTION_WORKERS = "secrets_backend_kwarg__secret_id" +PER_KEY_VALUE = "vault-role-id-or-secret-id-material" + + +class TestPerKeyBackendKwargMasking(TestConfigEndpoint): + """Synthetic per-key secrets-backend-kwarg options (e.g. + ``AIRFLOW__SECRETS__BACKEND_KWARG__SECRET_ID``) materialised by + ``conf.as_dict`` carry the same Vault / role_id / secret_id material as + the registered ``backend_kwargs`` option. The Config API must redact them + on the way out when ``display_sensitive=False``.""" + + @pytest.fixture(autouse=True) + def setup_per_key(self) -> Generator[None, None, None]: + per_key_dict = { + SECTION_CORE: {OPTION_KEY_PARALLELISM: OPTION_VALUE_PARALLELISM}, + SECTION_SECRETS: {PER_KEY_OPTION_SECRETS: PER_KEY_VALUE}, + SECTION_WORKERS: {PER_KEY_OPTION_WORKERS: PER_KEY_VALUE}, + } + + def _mock_conf_as_dict(display_sensitive: bool, **_): + return {section: options.copy() for section, options in per_key_dict.items()} + + def _mock_has_option(section: str, option: str) -> bool: + return option in per_key_dict.get(section, {}) + + with ( + conf_vars(AIRFLOW_CONFIG_ENABLE_EXPOSE_CONFIG), + patch( + "airflow.api_fastapi.core_api.routes.public.config.conf.as_dict", + new=_mock_conf_as_dict, + ), + patch( + "airflow.api_fastapi.core_api.routes.public.config.conf.has_option", + new=_mock_has_option, + ), + ): + yield + + def test_get_config_masks_per_key_secrets_backend_kwargs(self, test_client): + """``GET /config`` must redact synthetic per-key options under both + the ``secrets`` and ``workers`` sections when + ``display_sensitive=False`` (the API-server default).""" + response = test_client.get("/config", headers=HEADERS_JSON) + assert response.status_code == 200 + + sections = { + s["name"]: {o["key"]: o["value"] for o in s["options"]} for s in response.json()["sections"] + } + assert sections[SECTION_SECRETS][PER_KEY_OPTION_SECRETS] == OPTION_VALUE_SENSITIVE_HIDDEN + assert sections[SECTION_WORKERS][PER_KEY_OPTION_WORKERS] == OPTION_VALUE_SENSITIVE_HIDDEN + # Non-sensitive option in the same response must remain untouched. + assert sections[SECTION_CORE][OPTION_KEY_PARALLELISM] == OPTION_VALUE_PARALLELISM + + def test_get_config_value_masks_per_key_secrets_backend_kwarg(self, test_client): + """``GET /config/section/{section}/option/{option}`` must redact a + per-key synthetic option the same way the section-dump does.""" + response = test_client.get( + f"/config/section/{SECTION_SECRETS}/option/{PER_KEY_OPTION_SECRETS}", + headers=HEADERS_JSON, + ) + assert response.status_code == 200 + assert response.json()["sections"][0]["options"][0]["value"] == OPTION_VALUE_SENSITIVE_HIDDEN From 1f5b99948a7afe8411bda8f201e725e515543269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EB=B0=B1=ED=98=95=EC=A4=80?= <138356797+vividbaek@users.noreply.github.com> Date: Thu, 28 May 2026 09:55:34 +0900 Subject: [PATCH 12/28] Add dynamic task mapping no-op example (#67022) --- .../dynamic-task-mapping.rst | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/airflow-core/docs/authoring-and-scheduling/dynamic-task-mapping.rst b/airflow-core/docs/authoring-and-scheduling/dynamic-task-mapping.rst index 1a5366cc4c374..d9dfec21fe80e 100644 --- a/airflow-core/docs/authoring-and-scheduling/dynamic-task-mapping.rst +++ b/airflow-core/docs/authoring-and-scheduling/dynamic-task-mapping.rst @@ -676,3 +676,33 @@ Automatically skipping zero-length maps ======================================= If the input is empty (zero length), no new tasks will be created and the mapped task will be marked as ``SKIPPED``. + +This can be useful when a Dag discovers work to do at runtime, but sometimes there is no work for that run. +For example, a scan-and-repair Dag can return an empty list when it does not find anything to repair. +In that case, the mapped task is skipped, and a downstream summary task can still treat the run as a successful no-op if it uses a trigger rule that allows skipped upstream tasks. + +.. code-block:: python + + from airflow.sdk import TriggerRule, task + + + @task + def find_work_items(): + # Return an empty list when no files, records, or partitions need repair. + return [] + + + @task + def repair(item): ... + + + @task(trigger_rule=TriggerRule.NONE_FAILED) + def summarize(repaired_items): + if not repaired_items: + print("No work found; nothing to repair.") + return + print(f"Repaired {len(repaired_items)} item(s).") + + + repaired_items = repair.expand(item=find_work_items()) + summarize(repaired_items) From 3276dded17244c83c5b01439441874d890286dfc Mon Sep 17 00:00:00 2001 From: "Jason(Zhe-You) Liu" <68415893+jason810496@users.noreply.github.com> Date: Thu, 28 May 2026 09:32:57 +0800 Subject: [PATCH 13/28] Add prek hook to enforce keyword-only `session` on `@provide_session` (#67150) --- .pre-commit-config.yaml | 6 + .../ci/prek/check_provide_session_kwargs.py | 427 ++++++++++++++++ .../prek/known_provide_session_positional.txt | 89 ++++ .../prek/test_check_provide_session_kwargs.py | 482 ++++++++++++++++++ 4 files changed, 1004 insertions(+) create mode 100755 scripts/ci/prek/check_provide_session_kwargs.py create mode 100644 scripts/ci/prek/known_provide_session_positional.txt create mode 100644 scripts/tests/ci/prek/test_check_provide_session_kwargs.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5f8351b30e790..42ab2035ec07e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1071,6 +1071,12 @@ repos: language: python pass_filenames: true files: ^(airflow-core|airflow-ctl|task-sdk|providers|shared)/.*\.py$ + - id: check-no-new-provide-session-positional + name: Check that no new @provide_session functions declare `session` positionally + entry: ./scripts/ci/prek/check_provide_session_kwargs.py + language: python + pass_filenames: true + files: ^(airflow-core|providers)/.*\.py$|^scripts/ci/prek/known_provide_session_positional\.txt$|^scripts/ci/prek/check_provide_session_kwargs\.py$ - id: check-no-new-airflow-core-utils-modules name: Check that no new modules are added under airflow-core/src/airflow/utils entry: ./scripts/ci/prek/check_no_new_airflow_core_utils_modules.py diff --git a/scripts/ci/prek/check_provide_session_kwargs.py b/scripts/ci/prek/check_provide_session_kwargs.py new file mode 100755 index 0000000000000..29152e13a4d86 --- /dev/null +++ b/scripts/ci/prek/check_provide_session_kwargs.py @@ -0,0 +1,427 @@ +#!/usr/bin/env python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "rich>=13.0.0", +# ] +# /// +"""Check that no new ``@provide_session`` functions declare ``session`` positionally. + +The project convention is that any function decorated with ``@provide_session`` +must declare ``session`` as keyword-only (after a bare ``*`` in the signature), +so callers cannot pass it positionally by accident. See +``contributing-docs/05_pull_requests.rst#database-session-handling``. + +All *existing* offenders are recorded in ``known_provide_session_positional.txt`` +next to this script as ``relative/path::N`` entries (one per file), where ``N`` +is the maximum number of ``@provide_session`` functions with a positional +``session`` argument allowed in that file. A file whose current count exceeds +the recorded limit is treated as a violation – move the ``session`` argument +behind a bare ``*`` instead. + +Modes +----- +Default (files passed by prek/pre-commit): + Check only the supplied files; fail if any file's count exceeds the limit. + When a file's count has *decreased*, the allowlist entry is tightened + automatically and the hook exits with a non-zero code so that pre-commit + reports the modified allowlist – just stage + ``scripts/ci/prek/known_provide_session_positional.txt`` and re-run. + +``--all-files``: + Walk every ``.py`` file under the project source roots + (``airflow-core``, ``providers``, ``shared``) — + the same scope the pre-commit hook applies to. + +``--cleanup``: + Remove entries for files that no longer exist. Safe to run at any time; + does not add new entries or raise limits. + +``--generate``: + Scan the same project source roots as ``--all-files`` and *rebuild* the + allowlist from scratch. Intended for the initial setup or after a + large-scale clean-up sprint. +""" + +from __future__ import annotations + +import argparse +import ast +import subprocess +import typing +from pathlib import Path + +from rich.console import Console +from rich.panel import Panel + +console = Console(color_system="standard", width=200) + +REPO_ROOT = Path(__file__).parents[3] + +_PROVIDE_SESSION_DECORATOR = "provide_session" + +# Top-level directories scanned by ``--all-files`` / ``--generate``. Keep in sync with the +# ``files:`` pattern for this hook in ``.pre-commit-config.yaml``. +_PROJECT_SOURCE_ROOTS = ("airflow-core", "providers", "shared") + + +def _has_provide_session_decorator(nodes: list[ast.expr]) -> bool: + """Whether one of ``nodes`` is a ``@provide_session`` decorator. + + Accepts both bare names (``@provide_session``) and attribute access + (``@something.provide_session``). + """ + for node in nodes: + if isinstance(node, ast.Name) and node.id == _PROVIDE_SESSION_DECORATOR: + return True + if isinstance(node, ast.Attribute) and node.attr == _PROVIDE_SESSION_DECORATOR: + return True + return False + + +def _session_is_positional(args: ast.arguments) -> ast.arg | None: + """Return the ``session`` arg if it is positional (not keyword-only). + + Covers both regular positional args and positional-only args (``def f(session, /, ...)``). + """ + for argument in (*args.posonlyargs, *args.args): + if argument.arg == "session": + return argument + return None + + +def _iter_positional_session_in_provide_session( + path: Path, +) -> typing.Iterator[tuple[ast.FunctionDef | ast.AsyncFunctionDef, ast.arg]]: + """Yield ``@provide_session`` functions in *path* whose ``session`` is positional.""" + try: + source = path.read_text(encoding="utf-8", errors="replace") + except OSError: + return + try: + tree = ast.parse(source, str(path)) + except SyntaxError: + return + for node in ast.walk(tree): + if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + continue + if not _has_provide_session_decorator(node.decorator_list): + continue + argument = _session_is_positional(node.args) + if argument is None: + continue + yield node, argument + + +def _count_violations(path: Path) -> int: + return sum(1 for _ in _iter_positional_session_in_provide_session(path)) + + +def _is_safe_relative(rel: str) -> bool: + """Whether ``rel`` is a plain relative path that stays inside ``REPO_ROOT``. + + Rejects absolute paths and any entry that resolves outside the repo root so + callers can ``relative_to(REPO_ROOT)`` without fear of a ``ValueError``. + """ + candidate = Path(rel) + if candidate.is_absolute(): + return False + try: + (REPO_ROOT / candidate).resolve().relative_to(REPO_ROOT.resolve()) + except ValueError: + return False + return True + + +class AllowlistManager: + def __init__(self, allowlist_file: Path) -> None: + self.allowlist_file = allowlist_file + + @staticmethod + def parse(text: str) -> dict[str, int]: + """Parse allowlist *text* into a ``{rel_path: count}`` mapping. + + Same validation rules as :meth:`load` so we can reuse parsing for the + on-disk allowlist *and* for the git-tracked version fetched from + ``HEAD`` when guarding against entry-removal bypasses. + """ + result: dict[str, int] = {} + for raw_line in text.splitlines(): + if not (stripped := raw_line.strip()): + continue + + rel_str, _, count_str = stripped.rpartition("::") + if not rel_str or not count_str: + continue + + try: + count = int(count_str) + except ValueError: + continue + + if not _is_safe_relative(rel_str): + console.print( + f"[yellow]Ignoring unsafe allowlist entry (escapes repo root):[/yellow] {rel_str}" + ) + continue + + result[rel_str] = count + + return result + + def load(self) -> dict[str, int]: + if not self.allowlist_file.exists(): + return {} + return self.parse(self.allowlist_file.read_text()) + + def save(self, counts: dict[str, int]) -> None: + lines = [f"{rel}::{count}" for rel, count in sorted(counts.items())] + self.allowlist_file.write_text("\n".join(lines) + "\n") + + def generate(self) -> int: + roots = ", ".join(_PROJECT_SOURCE_ROOTS) + console.print( + f"Scanning project source roots ([cyan]{roots}[/cyan]) under [cyan]{REPO_ROOT}[/cyan] " + "for @provide_session functions with positional session …" + ) + counts: dict[str, int] = {} + for path in _iter_python_files(): + n = _count_violations(path) + if n > 0: + counts[str(path.relative_to(REPO_ROOT))] = n + + self.save(counts) + total = sum(counts.values()) + console.print( + f"[green]Generated[/green] [cyan]{self.allowlist_file.relative_to(REPO_ROOT)}[/cyan] " + f"with [bold]{len(counts)}[/bold] files / [bold]{total}[/bold] offenders." + ) + return 0 + + def cleanup(self) -> int: + allowlist = self.load() + if not allowlist: + console.print("[yellow]Allowlist is empty - nothing to clean up.[/yellow]") + return 0 + + stale: list[str] = [rel for rel in allowlist if not (REPO_ROOT / rel).exists()] + if stale: + console.print( + f"[yellow]Removing {len(stale)} stale entr{'y' if len(stale) == 1 else 'ies'}:[/yellow]" + ) + for s in sorted(stale): + console.print(f" [dim]-[/dim] {s}") + for s in stale: + del allowlist[s] + self.save(allowlist) + console.print( + f"\n[green]Updated[/green] [cyan]{self.allowlist_file.relative_to(REPO_ROOT)}[/cyan]" + ) + else: + console.print("[green]No stale entries found.[/green]") + return 0 + + +def _iter_python_files() -> list[Path]: + candidates: list[Path] = [] + for top in _PROJECT_SOURCE_ROOTS: + candidates.extend( + p.resolve() + for p in (REPO_ROOT / top).rglob("*.py") + if ".tox" not in p.parts and "__pycache__" not in p.parts + ) + return candidates + + +def _check_provide_session_kwargs( + files: list[Path], allowlist: dict[str, int], manager: AllowlistManager +) -> int: + allowlist_file = manager.allowlist_file.resolve() + if any(p.resolve() == allowlist_file for p in files) and not allowlist_file.exists(): + console.print( + Panel.fit( + f"Allowlist file [cyan]{allowlist_file}[/cyan] is missing.\n" + "It was passed to the hook but cannot be read, so the check cannot proceed.\n" + "Restore it from git or regenerate it with:\n\n" + " [cyan]uv run ./scripts/ci/prek/check_provide_session_kwargs.py --generate[/cyan]", + title="[red]Check failed[/red]", + border_style="red", + ) + ) + return 1 + + violations: list[tuple[Path, int, int]] = [] + tightened: list[tuple[str, int, int]] = [] + + for path in files: + if not path.exists() or path.suffix != ".py": + continue + actual = _count_violations(path) + rel = str(path.relative_to(REPO_ROOT)) + allowed = allowlist.get(rel, 0) + if actual > allowed: + violations.append((path, actual, allowed)) + elif actual < allowed: + if actual == 0: + del allowlist[rel] + else: + allowlist[rel] = actual + tightened.append((rel, allowed, actual)) + + if tightened: + manager.save(allowlist) + console.print( + f"[green]Tightened {len(tightened)} entr{'y' if len(tightened) == 1 else 'ies'} " + f"in [cyan]{manager.allowlist_file.relative_to(REPO_ROOT)}[/cyan][/green] " + "(stage the updated file):" + ) + for rel, old, new in tightened: + console.print(f" [cyan]{rel}[/cyan] {old} -> {new}") + + if violations: + console.print( + Panel.fit( + "New [bold]@provide_session[/bold] function with positional ``session`` detected.\n" + "Move ``session`` after a bare ``*`` in the signature so callers must pass it by keyword:\n\n" + " [cyan]@provide_session\n" + " def foo(arg, *, session: Session = NEW_SESSION) -> None: ...[/cyan]\n\n" + "If this usage is intentional and pre-existing, run:\n\n" + " [cyan]uv run ./scripts/ci/prek/check_provide_session_kwargs.py --generate[/cyan]\n\n" + "to regenerate the allowlist, then commit the updated\n" + "[cyan]scripts/ci/prek/known_provide_session_positional.txt[/cyan].", + title="[red]Check failed[/red]", + border_style="red", + ) + ) + for path, actual, allowed in violations: + console.print(f" [cyan]{path.relative_to(REPO_ROOT)}[/cyan] count={actual} (allowed={allowed})") + for func, argument in _iter_positional_session_in_provide_session(path): + console.print(f" [dim]L{argument.lineno}[/dim] def {func.name}(...)") + return 1 + + return 1 if tightened else 0 + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Prevent new @provide_session functions from declaring `session` positionally.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument("files", nargs="*", metavar="FILE", help="Files to check (provided by prek)") + parser.add_argument( + "--all-files", + action="store_true", + help=("Check every Python file under the project source roots (airflow-core, providers, shared)"), + ) + parser.add_argument( + "--cleanup", + action="store_true", + help="Remove stale entries from the allowlist and exit", + ) + parser.add_argument( + "--generate", + action="store_true", + help="Regenerate the allowlist from the current codebase and exit", + ) + args = parser.parse_args(argv) + + manager = AllowlistManager(Path(__file__).parent / "known_provide_session_positional.txt") + + if args.generate: + return manager.generate() + + if args.cleanup: + return manager.cleanup() + + allowlist = manager.load() + + if args.all_files: + return _check_provide_session_kwargs(_iter_python_files(), allowlist, manager) + + if not args.files: + console.print( + "[yellow]No files provided. Pass filenames or use --all-files to scan the whole repo.[/yellow]" + ) + return 0 + + paths = [Path(f).resolve() for f in args.files] + paths = _expand_for_allowlist_edits(paths, manager, allowlist) + return _check_provide_session_kwargs(paths, allowlist, manager) + + +def _parse_tracked_allowlist(manager: AllowlistManager) -> dict[str, int]: + """Return the allowlist as recorded at ``HEAD`` (the git-tracked version). + + Used by :func:`_expand_for_allowlist_edits` so that *removing* an entry + cannot silently drop coverage: the previously-listed file is still + re-validated against the new (post-edit) allowlist. Returns an empty mapping + when git is unavailable, the file does not yet exist at ``HEAD``, or the + allowlist sits outside ``REPO_ROOT``. + """ + try: + rel = manager.allowlist_file.resolve().relative_to(REPO_ROOT.resolve()) + except ValueError: + return {} + try: + completed = subprocess.run( + ["git", "-C", str(REPO_ROOT), "show", f"HEAD:{rel.as_posix()}"], + capture_output=True, + text=True, + check=False, + ) + except (FileNotFoundError, OSError): + return {} + if completed.returncode != 0: + return {} + return AllowlistManager.parse(completed.stdout) + + +def _expand_for_allowlist_edits( + paths: list[Path], manager: AllowlistManager, allowlist: dict[str, int] +) -> list[Path]: + """Add allowlisted files when the allowlist itself is being changed. + + Without this, a contributor could raise counts in + ``known_provide_session_positional.txt`` and the hook would do no validation + (since only the ``.txt`` file is passed), letting the loosened allowlist + sail through. We also union the git-tracked allowlist (from ``HEAD``) so + that removing an entry cannot silently bypass the check for a file that + still has positional ``session`` arguments. + + Both sides of the allowlist-file comparison are resolved so the detection is + robust to symlinks and unresolved inputs (the hook can be invoked with either). + """ + allowlist_file = manager.allowlist_file.resolve() + if not any(p.resolve() == allowlist_file for p in paths): + return paths + + expanded = list(paths) + seen = {p.resolve() for p in paths if p.suffix == ".py"} + tracked = _parse_tracked_allowlist(manager) + for rel in {*allowlist, *tracked}: + candidate = (REPO_ROOT / rel).resolve() + if candidate.exists() and candidate not in seen: + seen.add(candidate) + expanded.append(candidate) + return expanded + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/ci/prek/known_provide_session_positional.txt b/scripts/ci/prek/known_provide_session_positional.txt new file mode 100644 index 0000000000000..d0c84e2f6b48f --- /dev/null +++ b/scripts/ci/prek/known_provide_session_positional.txt @@ -0,0 +1,89 @@ +airflow-core/src/airflow/api/common/delete_dag.py::1 +airflow-core/src/airflow/api/common/mark_tasks.py::1 +airflow-core/src/airflow/callbacks/database_callback_sink.py::1 +airflow-core/src/airflow/cli/commands/dag_command.py::8 +airflow-core/src/airflow/cli/commands/jobs_command.py::1 +airflow-core/src/airflow/cli/commands/task_command.py::1 +airflow-core/src/airflow/cli/commands/team_command.py::4 +airflow-core/src/airflow/cli/commands/variable_command.py::1 +airflow-core/src/airflow/dag_processing/dagbag.py::1 +airflow-core/src/airflow/dag_processing/manager.py::4 +airflow-core/src/airflow/jobs/base_job_runner.py::2 +airflow-core/src/airflow/jobs/job.py::7 +airflow-core/src/airflow/jobs/scheduler_job_runner.py::11 +airflow-core/src/airflow/jobs/triggerer_job_runner.py::1 +airflow-core/src/airflow/models/connection.py::2 +airflow-core/src/airflow/models/dag.py::7 +airflow-core/src/airflow/models/dagcode.py::6 +airflow-core/src/airflow/models/dagrun.py::15 +airflow-core/src/airflow/models/dagwarning.py::1 +airflow-core/src/airflow/models/deadline.py::1 +airflow-core/src/airflow/models/deadline_alert.py::1 +airflow-core/src/airflow/models/pool.py::11 +airflow-core/src/airflow/models/renderedtifields.py::4 +airflow-core/src/airflow/models/revoked_token.py::2 +airflow-core/src/airflow/models/serialized_dag.py::6 +airflow-core/src/airflow/models/taskinstance.py::21 +airflow-core/src/airflow/models/taskinstancehistory.py::2 +airflow-core/src/airflow/models/team.py::1 +airflow-core/src/airflow/models/trigger.py::7 +airflow-core/src/airflow/models/variable.py::2 +airflow-core/src/airflow/secrets/metastore.py::2 +airflow-core/src/airflow/serialization/definitions/dag.py::2 +airflow-core/src/airflow/ti_deps/deps/base_ti_dep.py::2 +airflow-core/src/airflow/ti_deps/deps/dag_ti_slots_available_dep.py::1 +airflow-core/src/airflow/ti_deps/deps/dag_unpaused_dep.py::1 +airflow-core/src/airflow/ti_deps/deps/dagrun_exists_dep.py::1 +airflow-core/src/airflow/ti_deps/deps/exec_date_after_start_date_dep.py::1 +airflow-core/src/airflow/ti_deps/deps/not_in_retry_period_dep.py::1 +airflow-core/src/airflow/ti_deps/deps/pool_slots_available_dep.py::1 +airflow-core/src/airflow/ti_deps/deps/prev_dagrun_dep.py::1 +airflow-core/src/airflow/ti_deps/deps/ready_to_reschedule.py::1 +airflow-core/src/airflow/ti_deps/deps/runnable_exec_date_dep.py::1 +airflow-core/src/airflow/ti_deps/deps/task_concurrency_dep.py::1 +airflow-core/src/airflow/ti_deps/deps/task_not_running_dep.py::1 +airflow-core/src/airflow/ti_deps/deps/valid_state_dep.py::1 +airflow-core/src/airflow/utils/cli_action_loggers.py::1 +airflow-core/src/airflow/utils/db.py::7 +airflow-core/src/airflow/utils/db_cleanup.py::2 +airflow-core/src/airflow/utils/log/file_task_handler.py::1 +airflow-core/tests/unit/api_fastapi/common/test_exceptions.py::4 +airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_assets.py::19 +airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_connections.py::2 +airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_run.py::1 +airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_tags.py::1 +airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_warning.py::1 +airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_event_logs.py::1 +airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_import_error.py::8 +airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_job.py::1 +airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_monitor.py::2 +airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_pools.py::2 +airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_variables.py::2 +airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_xcom.py::3 +airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_calendar.py::2 +airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_dags.py::1 +airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_gantt.py::1 +airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_grid.py::1 +airflow-core/tests/unit/cli/commands/test_rotate_fernet_key_command.py::2 +airflow-core/tests/unit/jobs/test_scheduler_job.py::1 +airflow-core/tests/unit/listeners/test_listeners.py::7 +airflow-core/tests/unit/models/test_taskinstance.py::4 +airflow-core/tests/unit/models/test_timestamp.py::2 +providers/amazon/src/airflow/providers/amazon/aws/triggers/emr.py::1 +providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/executors/kubernetes_executor.py::1 +providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/template_rendering.py::1 +providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/triggers/pod.py::1 +providers/common/ai/tests/unit/common/ai/plugins/test_hitl_review.py::1 +providers/databricks/src/airflow/providers/databricks/plugins/databricks_workflow.py::2 +providers/edge3/src/airflow/providers/edge3/executors/edge_executor.py::3 +providers/edge3/src/airflow/providers/edge3/models/edge_worker.py::10 +providers/edge3/src/airflow/providers/edge3/plugins/edge_executor_plugin.py::1 +providers/edge3/src/airflow/providers/edge3/worker_api/routes/logs.py::1 +providers/fab/src/airflow/providers/fab/auth_manager/cli_commands/permissions_command.py::1 +providers/google/src/airflow/providers/google/cloud/triggers/bigquery.py::1 +providers/google/src/airflow/providers/google/cloud/triggers/dataproc.py::3 +providers/openlineage/src/airflow/providers/openlineage/utils/utils.py::1 +providers/standard/src/airflow/providers/standard/operators/trigger_dagrun.py::1 +providers/standard/src/airflow/providers/standard/sensors/external_task.py::1 +providers/standard/src/airflow/providers/standard/utils/sensor_helper.py::1 +providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py::3 diff --git a/scripts/tests/ci/prek/test_check_provide_session_kwargs.py b/scripts/tests/ci/prek/test_check_provide_session_kwargs.py new file mode 100644 index 0000000000000..78b85cd270bbb --- /dev/null +++ b/scripts/tests/ci/prek/test_check_provide_session_kwargs.py @@ -0,0 +1,482 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import ast +import os +import subprocess +import textwrap +from pathlib import Path + +import pytest +from ci.prek import check_provide_session_kwargs as hook +from ci.prek.check_provide_session_kwargs import ( + AllowlistManager, + _check_provide_session_kwargs, + _count_violations, + _expand_for_allowlist_edits, + _has_provide_session_decorator, + _iter_positional_session_in_provide_session, + _parse_tracked_allowlist, + _session_is_positional, +) + + +@pytest.fixture +def find_violations(write_python_file): + """Factory fixture: write code to a temp file and return positional-session violations.""" + + def _check(code: str) -> list[tuple[ast.FunctionDef | ast.AsyncFunctionDef, ast.arg]]: + path = write_python_file(code) + return list(_iter_positional_session_in_provide_session(path)) + + return _check + + +@pytest.fixture +def create_fake_repo(tmp_path, monkeypatch): + """Create a fake repo layout and patch REPO_ROOT so paths resolve correctly.""" + monkeypatch.setattr(hook, "REPO_ROOT", tmp_path) + + def _write(rel: str, code: str) -> Path: + path = tmp_path / rel + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(textwrap.dedent(code)) + return path + + return _write + + +@pytest.fixture +def create_git_repo(create_fake_repo, tmp_path): + """Initialise ``tmp_path`` as a git repo so ``git show HEAD:`` works. + + Returns a helper that commits the current working-tree contents under a given + message, so tests can stage a "previous" allowlist at HEAD before mutating it. + """ + env = { + **os.environ, + "GIT_AUTHOR_NAME": "t", + "GIT_AUTHOR_EMAIL": "t@t", + "GIT_COMMITTER_NAME": "t", + "GIT_COMMITTER_EMAIL": "t@t", + } + + def _run(*args: str) -> None: + subprocess.run(["git", "-C", str(tmp_path), *args], check=True, env=env, capture_output=True) + + _run("init", "-q", "-b", "main") + _run("config", "commit.gpgsign", "false") + + def _commit(message: str) -> None: + _run("add", "-A") + _run("commit", "-q", "--allow-empty", "-m", message) + + return _commit + + +class TestHasProvideSessionDecorator: + def test_provide_session_name(self): + func = ast.parse("@provide_session\ndef foo(): pass").body[0] + assert _has_provide_session_decorator(func.decorator_list) is True + + def test_provide_session_attribute(self): + func = ast.parse("@utils.provide_session\ndef foo(): pass").body[0] + assert _has_provide_session_decorator(func.decorator_list) is True + + def test_no_decorator(self): + func = ast.parse("def foo(): pass").body[0] + assert _has_provide_session_decorator(func.decorator_list) is False + + def test_unrelated_decorator(self): + func = ast.parse("@staticmethod\ndef foo(): pass").body[0] + assert _has_provide_session_decorator(func.decorator_list) is False + + def test_multiple_decorators_including_provide_session(self): + func = ast.parse("@staticmethod\n@provide_session\ndef foo(): pass").body[0] + assert _has_provide_session_decorator(func.decorator_list) is True + + +class TestSessionIsPositional: + def test_no_session_arg(self): + func = ast.parse("def foo(x, y): pass").body[0] + assert _session_is_positional(func.args) is None + + def test_session_positional(self): + func = ast.parse("def foo(session=NEW_SESSION): pass").body[0] + argument = _session_is_positional(func.args) + assert argument is not None + assert argument.arg == "session" + + def test_session_keyword_only(self): + func = ast.parse("def foo(*, session=NEW_SESSION): pass").body[0] + assert _session_is_positional(func.args) is None + + def test_session_positional_among_other_args(self): + func = ast.parse("def foo(x, y, session=NEW_SESSION): pass").body[0] + argument = _session_is_positional(func.args) + assert argument is not None + assert argument.arg == "session" + + def test_session_kwonly_after_other_positional(self): + func = ast.parse("def foo(x, y, *, session=NEW_SESSION): pass").body[0] + assert _session_is_positional(func.args) is None + + def test_session_positional_only(self): + func = ast.parse("def foo(session, /, x): pass").body[0] + argument = _session_is_positional(func.args) + assert argument is not None + assert argument.arg == "session" + + +class TestIterPositionalSessionInProvideSession: + def test_keyword_only_session_is_clean(self, find_violations): + code = """\ + @provide_session + def foo(*, session=NEW_SESSION): + pass + """ + assert find_violations(code) == [] + + def test_positional_session_is_flagged(self, find_violations): + code = """\ + @provide_session + def foo(session=NEW_SESSION): + pass + """ + violations = find_violations(code) + assert len(violations) == 1 + func, argument = violations[0] + assert func.name == "foo" + assert argument.arg == "session" + + def test_no_provide_session_decorator_is_ignored(self, find_violations): + code = """\ + def foo(session=NEW_SESSION): + pass + """ + assert find_violations(code) == [] + + def test_async_function_with_positional_session_is_flagged(self, find_violations): + code = """\ + @provide_session + async def foo(session=NEW_SESSION): + pass + """ + violations = find_violations(code) + assert len(violations) == 1 + + def test_method_with_positional_session_is_flagged(self, find_violations): + code = """\ + class C: + @provide_session + def foo(self, session=NEW_SESSION): + pass + """ + violations = find_violations(code) + assert len(violations) == 1 + assert violations[0][0].name == "foo" + + def test_attribute_decorator_is_recognised(self, find_violations): + code = """\ + @airflow.utils.session.provide_session + def foo(session=NEW_SESSION): + pass + """ + violations = find_violations(code) + assert len(violations) == 1 + + def test_count_violations_multiple_in_file(self, write_python_file): + code = """\ + @provide_session + def a(session=NEW_SESSION): + pass + + @provide_session + def b(x, session=NEW_SESSION): + pass + + @provide_session + def c(*, session=NEW_SESSION): + pass + """ + path = write_python_file(code) + assert _count_violations(path) == 2 + + def test_syntax_error_returns_no_violations(self, write_python_file): + path = write_python_file("def foo(:\n pass") + assert _count_violations(path) == 0 + + def test_invalid_utf8_does_not_crash(self, tmp_path): + path = tmp_path / "invalid_utf8.py" + path.write_bytes(b"# bad byte: \xff\n@provide_session\ndef foo(session=NEW_SESSION):\n pass\n") + + assert _count_violations(path) == 1 + + +class TestAllowlistManager: + def test_load_missing_file_returns_empty(self, tmp_path): + manager = AllowlistManager(tmp_path / "missing.txt") + assert manager.load() == {} + + def test_save_and_load_round_trip(self, tmp_path): + manager = AllowlistManager(tmp_path / "allowlist.txt") + manager.save({"b/file.py": 2, "a/file.py": 1}) + # Sorted by key in the file + text = (tmp_path / "allowlist.txt").read_text() + assert text.splitlines() == ["a/file.py::1", "b/file.py::2"] + assert manager.load() == {"a/file.py": 1, "b/file.py": 2} + + def test_load_skips_blank_and_malformed_lines(self, tmp_path): + path = tmp_path / "allowlist.txt" + path.write_text("\nvalid/file.py::3\nnocount\n::5\nbad::notanumber\n") + assert AllowlistManager(path).load() == {"valid/file.py": 3} + + @pytest.mark.usefixtures("create_fake_repo") + def test_load_skips_unsafe_entries(self, tmp_path): + """Entries that escape REPO_ROOT (absolute paths or `..` segments) are ignored.""" + path = tmp_path / "allowlist.txt" + path.write_text("airflow-core/src/airflow/safe.py::1\n../escape.py::1\n/etc/passwd::1\n") + # `create_fake_repo` patches REPO_ROOT to tmp_path so the safety check is meaningful. + assert AllowlistManager(path).load() == {"airflow-core/src/airflow/safe.py": 1} + + +class TestCheckProvideSessionKwargs: + def test_no_violations_in_clean_file(self, create_fake_repo, tmp_path): + path = create_fake_repo( + "airflow-core/src/airflow/clean.py", + """\ + @provide_session + def foo(*, session=NEW_SESSION): + pass + """, + ) + manager = AllowlistManager(tmp_path / "allowlist.txt") + assert _check_provide_session_kwargs([path], {}, manager) == 0 + + def test_new_violation_fails(self, create_fake_repo, tmp_path): + path = create_fake_repo( + "airflow-core/src/airflow/bad.py", + """\ + @provide_session + def foo(session=NEW_SESSION): + pass + """, + ) + manager = AllowlistManager(tmp_path / "allowlist.txt") + assert _check_provide_session_kwargs([path], {}, manager) == 1 + + def test_violation_within_allowlist_passes(self, create_fake_repo, tmp_path): + path = create_fake_repo( + "airflow-core/src/airflow/grandfathered.py", + """\ + @provide_session + def foo(session=NEW_SESSION): + pass + """, + ) + manager = AllowlistManager(tmp_path / "allowlist.txt") + allowlist = {"airflow-core/src/airflow/grandfathered.py": 1} + assert _check_provide_session_kwargs([path], allowlist, manager) == 0 + + def test_exceeding_allowlist_fails(self, create_fake_repo, tmp_path): + path = create_fake_repo( + "airflow-core/src/airflow/grew.py", + """\ + @provide_session + def a(session=NEW_SESSION): + pass + + @provide_session + def b(session=NEW_SESSION): + pass + """, + ) + manager = AllowlistManager(tmp_path / "allowlist.txt") + allowlist = {"airflow-core/src/airflow/grew.py": 1} + assert _check_provide_session_kwargs([path], allowlist, manager) == 1 + + def test_reducing_violations_tightens_allowlist(self, create_fake_repo, tmp_path): + path = create_fake_repo( + "airflow-core/src/airflow/improved.py", + """\ + @provide_session + def foo(session=NEW_SESSION): + pass + + @provide_session + def bar(*, session=NEW_SESSION): + pass + """, + ) + manager = AllowlistManager(tmp_path / "allowlist.txt") + allowlist = {"airflow-core/src/airflow/improved.py": 2} + # Exit non-zero so pre-commit reports the modified allowlist + assert _check_provide_session_kwargs([path], allowlist, manager) == 1 + assert manager.load() == {"airflow-core/src/airflow/improved.py": 1} + + def test_fixing_all_violations_removes_entry(self, create_fake_repo, tmp_path): + path = create_fake_repo( + "airflow-core/src/airflow/fixed.py", + """\ + @provide_session + def foo(*, session=NEW_SESSION): + pass + """, + ) + manager = AllowlistManager(tmp_path / "allowlist.txt") + allowlist = {"airflow-core/src/airflow/fixed.py": 1} + assert _check_provide_session_kwargs([path], allowlist, manager) == 1 + assert manager.load() == {} + + def test_non_python_file_is_skipped(self, create_fake_repo, tmp_path): + path = create_fake_repo( + "airflow-core/src/airflow/not_python.txt", "@provide_session\ndef foo(session=N): pass\n" + ) + manager = AllowlistManager(tmp_path / "allowlist.txt") + assert _check_provide_session_kwargs([path], {}, manager) == 0 + + @pytest.mark.usefixtures("create_fake_repo") + def test_missing_allowlist_file_fails_loudly(self, tmp_path): + """Passing the allowlist path when the file is missing must fail, not silently pass.""" + allowlist_path = tmp_path / "allowlist.txt" + manager = AllowlistManager(allowlist_path) + assert not allowlist_path.exists() + assert _check_provide_session_kwargs([allowlist_path.resolve()], {}, manager) == 1 + + +class TestExpandForAllowlistEdits: + def test_unchanged_when_allowlist_not_in_paths(self, create_fake_repo, tmp_path): + py = create_fake_repo("airflow-core/src/airflow/x.py", "pass") + manager = AllowlistManager(tmp_path / "allowlist.txt") + assert _expand_for_allowlist_edits([py], manager, {"airflow-core/src/airflow/x.py": 1}) == [py] + + def test_appends_allowlisted_files_when_allowlist_edited(self, create_fake_repo, tmp_path): + allowlist_path = tmp_path / "allowlist.txt" + manager = AllowlistManager(allowlist_path) + listed = create_fake_repo("airflow-core/src/airflow/listed.py", "pass") + # Pass a resolved path — matches production behavior (``main()`` resolves argv). + result = _expand_for_allowlist_edits( + [allowlist_path.resolve()], + manager, + {"airflow-core/src/airflow/listed.py": 1, "airflow-core/src/airflow/gone.py": 1}, + ) + assert allowlist_path.resolve() in result + assert listed in result + # File in allowlist that does not exist on disk should be ignored. + assert (tmp_path / "airflow-core/src/airflow/gone.py").resolve() not in result + + def test_detection_robust_to_symlinked_allowlist(self, create_fake_repo, tmp_path): + """A symlink pointing at the allowlist file must still trigger expansion.""" + allowlist_path = tmp_path / "allowlist.txt" + manager = AllowlistManager(allowlist_path) + listed = create_fake_repo("airflow-core/src/airflow/listed.py", "pass") + manager.save({"airflow-core/src/airflow/listed.py": 1}) + + symlink = tmp_path / "allowlist_link.txt" + symlink.symlink_to(allowlist_path) + + # Production resolves argv before calling the helper — a symlinked path resolves + # to the real allowlist file and must be recognised as an allowlist edit. + result = _expand_for_allowlist_edits([symlink.resolve()], manager, manager.load()) + + assert listed in result + + def test_includes_parse_tracked_allowlist_entries_when_removed( + self, create_fake_repo, create_git_repo, tmp_path + ): + """Removing an entry from the allowlist must still re-check the previously-listed file.""" + rel = "airflow-core/src/airflow/dropped.py" + create_fake_repo( + rel, + """\ + @provide_session + def foo(session=NEW_SESSION): + pass + """, + ) + allowlist_path = tmp_path / "allowlist.txt" + manager = AllowlistManager(allowlist_path) + manager.save({rel: 1}) + create_git_repo("seed allowlist at HEAD") + + # Working tree: remove the entry, but the offending file still exists. + allowlist_path.write_text("") + current = manager.load() + assert current == {} + + expanded = _expand_for_allowlist_edits([allowlist_path.resolve()], manager, current) + # The previously-listed file must be re-validated. + assert (tmp_path / rel).resolve() in expanded + + # And the full check should fail because the file still has positional sessions. + assert _check_provide_session_kwargs(expanded, current, manager) == 1 + + @pytest.mark.usefixtures("create_fake_repo") + def test_parse_tracked_allowlist_empty_when_no_git_history(self, tmp_path): + """Without a git repo the git-tracked allowlist lookup returns empty and does not crash.""" + manager = AllowlistManager(tmp_path / "allowlist.txt") + assert _parse_tracked_allowlist(manager) == {} + + def test_re_validates_listed_files_so_loosening_cannot_bypass(self, create_fake_repo, tmp_path, capsys): + """Editing only the allowlist must still trigger validation of listed files.""" + rel = "airflow-core/src/airflow/loosened.py" + create_fake_repo( + rel, + """\ + @provide_session + def foo(session=NEW_SESSION): + pass + + @provide_session + def bar(session=NEW_SESSION): + pass + """, + ) + allowlist_path = tmp_path / "allowlist.txt" + manager = AllowlistManager(allowlist_path) + # Allowlist loosened to 5 although file only has 2 positional sessions. + allowlist = {rel: 5} + manager.save(allowlist) + + # Only the allowlist file is "changed"; without re-validation this would return 0. + # Resolve the path to mirror what ``main()`` does in production. + paths = _expand_for_allowlist_edits([allowlist_path.resolve()], manager, allowlist) + rc = _check_provide_session_kwargs(paths, allowlist, manager) + + # Tightened from 5 -> 2, so the hook exits non-zero to surface the modified allowlist. + assert rc == 1 + assert manager.load() == {rel: 2} + + +class TestCleanup: + def test_cleanup_removes_stale_entries(self, create_fake_repo, tmp_path): + create_fake_repo("airflow-core/src/airflow/keeper.py", "pass") + allowlist_path = tmp_path / "allowlist.txt" + manager = AllowlistManager(allowlist_path) + manager.save( + { + "airflow-core/src/airflow/keeper.py": 1, + "airflow-core/src/airflow/gone.py": 1, + } + ) + assert manager.cleanup() == 0 + assert manager.load() == {"airflow-core/src/airflow/keeper.py": 1} + + def test_cleanup_empty_allowlist(self, tmp_path): + manager = AllowlistManager(tmp_path / "allowlist.txt") + assert manager.cleanup() == 0 From fd9241cdf0bb64d5b3c4619be83619db62671824 Mon Sep 17 00:00:00 2001 From: Amogh Desai Date: Thu, 28 May 2026 10:05:26 +0530 Subject: [PATCH 14/28] Add `ResumableJobMixin` with `SparkSubmitOperator` as a case study for surviving worker failures (standalone) (#67118) --- providers/apache/spark/docs/index.rst | 2 + providers/apache/spark/docs/operators.rst | 21 ++ providers/apache/spark/provider.yaml | 16 ++ providers/apache/spark/pyproject.toml | 2 + .../apache/spark/get_provider_info.py | 10 + .../apache/spark/hooks/spark_submit.py | 52 ++-- .../apache/spark/operators/spark_submit.py | 147 +++++++++- .../apache/spark/hooks/test_spark_submit.py | 30 ++ .../spark/operators/test_spark_submit.py | 262 +++++++++++++++++- scripts/ci/prek/known_airflow_exceptions.txt | 2 +- task-sdk/docs/api.rst | 2 + task-sdk/src/airflow/sdk/__init__.py | 3 + .../src/airflow/sdk/bases/resumablemixin.py | 167 +++++++++++ .../task_sdk/bases/test_resumablemixin.py | 177 ++++++++++++ uv.lock | 4 + 15 files changed, 870 insertions(+), 27 deletions(-) create mode 100644 task-sdk/src/airflow/sdk/bases/resumablemixin.py create mode 100644 task-sdk/tests/task_sdk/bases/test_resumablemixin.py diff --git a/providers/apache/spark/docs/index.rst b/providers/apache/spark/docs/index.rst index eb3cadfb81b85..5138bf8952d32 100644 --- a/providers/apache/spark/docs/index.rst +++ b/providers/apache/spark/docs/index.rst @@ -104,6 +104,8 @@ PIP package Version required ``apache-airflow-providers-common-compat`` ``>=1.12.0`` ``pyspark-client`` ``>=4.0.0`` ``grpcio-status`` ``>=1.67.0`` +``requests`` ``>=2.32.0`` +``tenacity`` ``>=8.3.0`` ========================================== ================== Cross provider package dependencies diff --git a/providers/apache/spark/docs/operators.rst b/providers/apache/spark/docs/operators.rst index 125039ebdf31a..0a64554232386 100644 --- a/providers/apache/spark/docs/operators.rst +++ b/providers/apache/spark/docs/operators.rst @@ -181,3 +181,24 @@ Reference """"""""" For further information, look at `Apache Spark submitting applications `_. + +Cluster mode crash recovery (Spark standalone) +""""""""""""""""""""""""""""""""""""""""""""""" + +When running in Spark standalone cluster mode (``--deploy-mode cluster``), the Spark driver runs +independently on the cluster. If the Airflow worker dies while the Spark job is running, the driver keeps running but +Airflow loses track of it and the behaviour to submit a brand new job would be wasting +the compute already done or even cause conflicts if the Spark job itself is not designed to be idempotent. + +Now, the ``SparkSubmitOperator`` solves this by persisting the driver ID to ``task_state`` immediately after +submission. On retry, it reads the ID back and reconnects to the already-running driver instead of +resubmitting. + +This is the **synchronous path** — the worker holds a slot for the duration of polling. This is +a crash-safety net for teams running sync operators for log observability, org constraints, or +because a Triggerer is not available. Teams with a Triggerer available may also consider +deferrable operators, which free the worker slot but may come with added complexity. + +.. note:: + Crash recovery in cluster mode requires Airflow 3.3+ (``task_state`` support). On earlier + versions the operator falls back to the previous behavior of always submitting fresh. diff --git a/providers/apache/spark/provider.yaml b/providers/apache/spark/provider.yaml index 2fd094e6d752c..bb91a8e312796 100644 --- a/providers/apache/spark/provider.yaml +++ b/providers/apache/spark/provider.yaml @@ -210,6 +210,22 @@ connection-types: - string - 'null' format: password + rest-scheme: + label: REST scheme + description: Scheme for the Spark standalone REST API (http or https). Default is http. + schema: + type: + - string + - 'null' + default: http + rest-port: + label: REST port + description: Port for the Spark standalone REST API (spark.master.rest.port). Default is 6066. + schema: + type: + - string + - 'null' + default: '6066' task-decorators: - class-name: airflow.providers.apache.spark.decorators.pyspark.pyspark_task diff --git a/providers/apache/spark/pyproject.toml b/providers/apache/spark/pyproject.toml index 216c5c003daa6..e7f4fa480d3c1 100644 --- a/providers/apache/spark/pyproject.toml +++ b/providers/apache/spark/pyproject.toml @@ -63,6 +63,8 @@ dependencies = [ "apache-airflow-providers-common-compat>=1.12.0", "pyspark-client>=4.0.0", "grpcio-status>=1.67.0", + "requests>=2.32.0", + "tenacity>=8.3.0", ] # The optional dependencies should be modified in place in the generated file diff --git a/providers/apache/spark/src/airflow/providers/apache/spark/get_provider_info.py b/providers/apache/spark/src/airflow/providers/apache/spark/get_provider_info.py index b987115625719..ef09d0a6ae99a 100644 --- a/providers/apache/spark/src/airflow/providers/apache/spark/get_provider_info.py +++ b/providers/apache/spark/src/airflow/providers/apache/spark/get_provider_info.py @@ -126,6 +126,16 @@ def get_provider_info(): "description": "Run the command `base64 ` and use its output.", "schema": {"type": ["string", "null"], "format": "password"}, }, + "rest-scheme": { + "label": "REST scheme", + "description": "Scheme for the Spark standalone REST API (http or https). Default is http.", + "schema": {"type": ["string", "null"], "default": "http"}, + }, + "rest-port": { + "label": "REST port", + "description": "Port for the Spark standalone REST API (spark.master.rest.port). Default is 6066.", + "schema": {"type": ["string", "null"], "default": "6066"}, + }, }, }, ], diff --git a/providers/apache/spark/src/airflow/providers/apache/spark/hooks/spark_submit.py b/providers/apache/spark/src/airflow/providers/apache/spark/hooks/spark_submit.py index 62d18aac0491c..9aa3ddc885efb 100644 --- a/providers/apache/spark/src/airflow/providers/apache/spark/hooks/spark_submit.py +++ b/providers/apache/spark/src/airflow/providers/apache/spark/hooks/spark_submit.py @@ -160,6 +160,18 @@ def get_connection_form_widgets(cls) -> dict[str, Any]: description="Run the command `base64 ` and use its output.", validators=[Optional()], ), + "rest-scheme": StringField( + lazy_gettext("REST scheme"), + widget=BS3TextFieldWidget(), + description="Scheme for the Spark standalone REST API (http or https). Default: http.", + validators=[Optional()], + ), + "rest-port": StringField( + lazy_gettext("REST port"), + widget=BS3TextFieldWidget(), + description="Port for the Spark standalone REST API (spark.master.rest.port). Default: 6066.", + validators=[Optional()], + ), } def __init__( @@ -258,7 +270,7 @@ def _resolve_should_track_driver_status(self) -> bool: def _resolve_connection(self) -> dict[str, Any]: # Build from connection master or default to yarn if not available - conn_data = { + conn_data: dict[str, Any] = { "master": "yarn", "queue": None, # yarn queue "deploy_mode": None, @@ -266,6 +278,9 @@ def _resolve_connection(self) -> dict[str, Any]: "namespace": None, "principal": self._principal, "keytab": self._keytab, + # fallback if connection lookup fails; overridden by rest-scheme/rest-port extras below + "rest_scheme": "http", + "rest_port": 6066, } try: @@ -308,6 +323,8 @@ def _resolve_connection(self) -> dict[str, Any]: ) conn_data["spark_binary"] = self.spark_binary conn_data["namespace"] = extra.get("namespace") + conn_data["rest_scheme"] = extra.get("rest-scheme", "http") + conn_data["rest_port"] = int(extra.get("rest-port", 6066)) if conn_data["principal"] is None: conn_data["principal"] = extra.get("principal") if conn_data["keytab"] is None: @@ -587,7 +604,7 @@ def _run_post_submit_commands(self) -> None: except Exception as exc: self.log.warning("Post-submit command raised an exception: %s. Error: %s", cmd, exc) - def submit(self, application: str = "", **kwargs: Any) -> None: + def submit(self, application: str = "", **kwargs: Any) -> str | None: """ Remote Popen to execute the spark-submit job. @@ -626,27 +643,18 @@ def submit(self, application: str = "", **kwargs: Any) -> None: f"Cannot execute: {self._mask_cmd(spark_submit_cmd)}. Error code is: {returncode}." ) - self.log.debug("Should track driver: %s", self._should_track_driver_status) - - # We want the Airflow job to wait until the Spark driver is finished - if self._should_track_driver_status: - if self._driver_id is None: - raise AirflowException( - "No driver id is known: something went wrong when executing the spark submit command" - ) - - # We start with the SUBMITTED status as initial status - self._driver_status = "SUBMITTED" - - # Start tracking the driver status (blocking function) - self._start_driver_status_tracking() - - if self._driver_status != "FINISHED": - raise AirflowException( - f"ERROR : Driver {self._driver_id} badly exited with status {self._driver_status}" - ) + if self._should_track_driver_status and self._driver_id is None: + raise AirflowException( + "No driver id is known: something went wrong when executing the spark submit command" + ) finally: - self._run_post_submit_commands() + # In cluster mode with driver tracking, the operator calls poll_until_complete + # after submit() returns, so post_submit_commands are deferred there to preserve + # the "runs after job finishes" contract. In all other modes, run them here. + if not self._should_track_driver_status: + self._run_post_submit_commands() + + return self._driver_id def _process_spark_submit_log(self, itr: Iterator[Any]) -> None: """ diff --git a/providers/apache/spark/src/airflow/providers/apache/spark/operators/spark_submit.py b/providers/apache/spark/src/airflow/providers/apache/spark/operators/spark_submit.py index 0e67fa5b50d21..76b010107da1d 100644 --- a/providers/apache/spark/src/airflow/providers/apache/spark/operators/spark_submit.py +++ b/providers/apache/spark/src/airflow/providers/apache/spark/operators/spark_submit.py @@ -18,7 +18,10 @@ from __future__ import annotations from collections.abc import Sequence -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast + +import requests +from tenacity import retry, stop_after_attempt, wait_fixed from airflow.providers.apache.spark.hooks.spark_submit import SparkSubmitHook from airflow.providers.common.compat.openlineage.utils.spark import ( @@ -27,11 +30,30 @@ ) from airflow.providers.common.compat.sdk import BaseOperator, conf +try: + from airflow.sdk.bases.resumablemixin import ResumableJobMixin +except ImportError: + # Airflow 2 compat. + # ResumableJobMixin does not exist in Airflow 2, so we need to add a stub to make it + # behave as before + class ResumableJobMixin: # type: ignore[no-redef] + """Airflow 2 stub — no task_state, always submits fresh.""" + + external_id_key: str = "remote_job_id" + + def execute_resumable(self, context): + external_id = self.submit_job(context) + self.poll_until_complete(external_id, context) + return self.get_job_result(external_id, context) + + if TYPE_CHECKING: + from pydantic import JsonValue + from airflow.providers.common.compat.sdk import Context -class SparkSubmitOperator(BaseOperator): +class SparkSubmitOperator(ResumableJobMixin, BaseOperator): """ Wrap the spark-submit binary to kick off a spark-submit job; requires "spark-submit" binary in the PATH. @@ -88,6 +110,10 @@ class SparkSubmitOperator(BaseOperator): Useful for cleaning up sidecars such as Istio. Failures produce a warning but do not fail the task. """ + # Generic key used across all Spark deployment modes (standalone driver ID, + # YARN application ID, K8s driver pod name). + external_id_key = "spark_job_id" + template_fields: Sequence[str] = ( "application", "conf", @@ -141,6 +167,7 @@ def __init__( deploy_mode: str | None = None, use_krb5ccache: bool = False, post_submit_commands: list[str] | None = None, + reconnect_on_retry: bool = True, openlineage_inject_parent_job_info: bool = conf.getboolean( "openlineage", "spark_inject_parent_job_info", fallback=False ), @@ -184,6 +211,7 @@ def __init__( self._conn_id = conn_id self._use_krb5ccache = use_krb5ccache + self.reconnect_on_retry = reconnect_on_retry self._openlineage_inject_parent_job_info = openlineage_inject_parent_job_info self._openlineage_inject_transport_info = openlineage_inject_transport_info @@ -198,7 +226,120 @@ def execute(self, context: Context) -> None: self.conf = inject_transport_information_into_spark_properties(self.conf, context) if self._hook is None: self._hook = self._get_hook() - self._hook.submit(self.application) + hook = self._hook + if hook._should_track_driver_status: + if self.reconnect_on_retry: + return self.execute_resumable(context) + # reconnect_on_retry=False: still submit-and-poll, just skip task_state persistence. + driver_id = self.submit_job(context) + self.poll_until_complete(driver_id, context) + return self.get_job_result(driver_id, context) + hook.submit(self.application) + + def submit_job(self, context: Context) -> str: + if self._hook is None: + self._hook = self._get_hook() + driver_id = self._hook.submit(self.application) + if not driver_id: + raise RuntimeError("spark-submit did not return a driver ID") + self.log.info("Spark driver submitted: %s", driver_id) + return driver_id + + def get_job_status(self, external_id: JsonValue) -> str: + # called from submit_job which always returns a str (Spark driver IDs are strings) + external_id = cast("str", external_id) + if self._hook is None: + self._hook = self._get_hook() + # The YARN and K8s branches below (and in is_job_active, is_job_succeeded, poll_until_complete) + # are currently unreachable: execute_resumable is only called when _should_track_driver_status + # is True, which requires spark:// + cluster mode. They are scaffolding for a follow-up PR + # that extends ResumableJobMixin support to YARN and Kubernetes. + if self._hook._is_yarn: + # TODO: call YARN ResourceManager REST API + # GET http://rm:8088/ws/v1/cluster/apps/{external_id} + raise NotImplementedError("YARN job status not yet implemented") + if self._hook._is_kubernetes: + # TODO: call K8s pod status API + raise NotImplementedError("K8s job status not yet implemented") + scheme = self._hook._connection.get("rest_scheme", "http") + rest_port = self._hook._connection.get("rest_port", 6066) + # HA master URLs can look like spark://m1:7077,m2:7077 — try each host in order. + # The master URL port (e.g. 7077) is the RPC port — not the REST API port. + # Use rest-port connection extra to override spark.master.rest.port (default 6066). + master_urls = self._hook._connection["master"].replace("spark://", "").split(",") + last_exc: Exception = RuntimeError("No Spark masters to query") + for m in master_urls: + host = m.strip().split(":")[0] + url = f"{scheme}://{host}:{rest_port}/v1/submissions/status/{external_id}" + try: + status = self._fetch_driver_status(url, external_id) + return status + except Exception as e: + self.log.warning("Could not reach Spark master %s: %s", host, e) + last_exc = e + raise last_exc + + @retry(stop=stop_after_attempt(3), wait=wait_fixed(1), reraise=True) + def _fetch_driver_status(self, url: str, external_id: str) -> str: + response = requests.get(url, timeout=30) + response.raise_for_status() + # "success:false" means the master does not recognise the driver ID or is in recovery. + # https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/deploy/master/DriverState.scala + data = response.json() + if not data.get("success"): + raise RuntimeError( + f"Spark REST API returned failure for {external_id}: {data.get('message', 'unknown error')}" + ) + status = data["driverState"] + self.log.info("Driver %s status: %s", external_id, status) + return status + + def is_job_active(self, status: str) -> bool: + if self._hook is None: + self._hook = self._get_hook() + status = status.upper() + if self._hook._is_yarn: + # https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html + return status in ("NEW", "NEW_SAVING", "SUBMITTED", "ACCEPTED", "RUNNING") + if self._hook._is_kubernetes: + return status in ("PENDING", "RUNNING") + # RELAUNCHING: driver is being restarted after a failure, still alive. + # UNKNOWN: master is in failure recovery, state is temporarily unavailable. + # https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/deploy/master/DriverState.scala + return status in ("SUBMITTED", "RUNNING", "RELAUNCHING", "UNKNOWN") + + def is_job_succeeded(self, status: str) -> bool: + if self._hook is None: + self._hook = self._get_hook() + status = status.upper() + if self._hook._is_kubernetes: + return status == "SUCCEEDED" + # standalone and YARN both use FINISHED + return status == "FINISHED" + + def poll_until_complete(self, external_id: JsonValue, context: Context) -> None: + # called from submit_job which always returns a str (Spark driver IDs are strings) + external_id = cast("str", external_id) + if self._hook is None: + self._hook = self._get_hook() + if self._hook._is_yarn: + # TODO: poll YARN ResourceManager until app reaches terminal state + raise NotImplementedError("YARN poll not yet implemented") + if self._hook._is_kubernetes: + # TODO: poll K8s pod phase until terminal + raise NotImplementedError("K8s poll not yet implemented") + self.log.info("Polling driver %s until completion", external_id) + self._hook._driver_id = external_id + try: + self._hook._start_driver_status_tracking() + if self._hook._driver_status != "FINISHED": + raise RuntimeError(f"Driver {external_id} exited with status {self._hook._driver_status}") + finally: + # post-submit commands must fire whether the job succeeded or failed. + self._hook._run_post_submit_commands() + + def get_job_result(self, external_id: JsonValue, context: Context) -> None: + return None def on_kill(self) -> None: if self._hook is None: diff --git a/providers/apache/spark/tests/unit/apache/spark/hooks/test_spark_submit.py b/providers/apache/spark/tests/unit/apache/spark/hooks/test_spark_submit.py index 1b2feaa33e9aa..c909e9f12ab55 100644 --- a/providers/apache/spark/tests/unit/apache/spark/hooks/test_spark_submit.py +++ b/providers/apache/spark/tests/unit/apache/spark/hooks/test_spark_submit.py @@ -397,6 +397,8 @@ def test_resolve_connection_yarn_default(self, sdk_connection_not_found): "namespace": None, "principal": None, "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert dict_cmd["--master"] == "yarn" @@ -420,6 +422,8 @@ def test_resolve_connection_yarn_default_connection(self): "namespace": None, "principal": None, "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert dict_cmd["--master"] == "yarn" @@ -443,6 +447,8 @@ def test_resolve_connection_mesos_default_connection(self): "namespace": None, "principal": None, "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert dict_cmd["--master"] == "mesos://host:5050" @@ -465,6 +471,8 @@ def test_resolve_connection_spark_yarn_cluster_connection(self): "namespace": None, "principal": None, "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert dict_cmd["--master"] == "yarn://yarn-master" @@ -489,6 +497,8 @@ def test_resolve_connection_spark_k8s_cluster_connection(self): "namespace": "mynamespace", "principal": None, "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert dict_cmd["--master"] == "k8s://https://k8s-master" @@ -515,6 +525,8 @@ def test_resolve_connection_spark_k8s_cluster_ns_conf(self): "namespace": "airflow", "principal": None, "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert dict_cmd["--master"] == "k8s://https://k8s-master" @@ -538,6 +550,8 @@ def test_resolve_connection_spark_binary_set_connection(self): "namespace": None, "principal": None, "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert cmd[0] == "spark2-submit" @@ -559,6 +573,8 @@ def test_resolve_connection_spark_binary_spark3_submit_set_connection(self): "namespace": None, "principal": None, "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert cmd[0] == "spark3-submit" @@ -619,6 +635,8 @@ def test_resolve_connection_spark_binary_default_value_override(self): "namespace": None, "principal": None, "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert cmd[0] == "spark3-submit" @@ -641,6 +659,8 @@ def test_resolve_connection_spark_binary_default_value(self): "namespace": None, "principal": None, "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert cmd[0] == "spark-submit" @@ -662,6 +682,8 @@ def test_resolve_connection_spark_standalone_cluster_connection(self): "namespace": None, "principal": None, "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert cmd[0] == "spark-submit" @@ -684,6 +706,8 @@ def test_resolve_connection_principal_set_connection(self): "namespace": None, "principal": "user/spark@airflow.org", "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert dict_cmd["--principal"] == "user/spark@airflow.org" @@ -706,6 +730,8 @@ def test_resolve_connection_principal_value_override(self): "namespace": None, "principal": "will-override", "keytab": None, + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert dict_cmd["--principal"] == "will-override" @@ -732,6 +758,8 @@ def test_resolve_connection_keytab_set_connection(self, mock_create_keytab_path_ "namespace": None, "principal": None, "keytab": "privileged_user.keytab", + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert dict_cmd["--keytab"] == "privileged_user.keytab" @@ -757,6 +785,8 @@ def test_resolve_connection_keytab_value_override(self, mock_create_keytab_path_ "namespace": None, "principal": None, "keytab": "will-override", + "rest_scheme": "http", + "rest_port": 6066, } assert connection == expected_spark_connection assert dict_cmd["--keytab"] == "will-override" diff --git a/providers/apache/spark/tests/unit/apache/spark/operators/test_spark_submit.py b/providers/apache/spark/tests/unit/apache/spark/operators/test_spark_submit.py index 6a85ce1b92026..65af1116861bf 100644 --- a/providers/apache/spark/tests/unit/apache/spark/operators/test_spark_submit.py +++ b/providers/apache/spark/tests/unit/apache/spark/operators/test_spark_submit.py @@ -32,7 +32,7 @@ from tests_common.test_utils.dag import sync_dag_to_db from tests_common.test_utils.taskinstance import create_task_instance, render_template_fields -from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS +from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_3_PLUS DEFAULT_DATE = timezone.datetime(2017, 1, 1) @@ -321,6 +321,7 @@ def test_inject_simple_openlineage_config_to_spark(self, mock_get_openlineage_li openlineage_inject_transport_info=True, **self._config, ) + mock_get_hook.return_value._should_track_driver_status = False operator.execute(MagicMock()) assert operator.conf == { @@ -387,6 +388,7 @@ def test_inject_composite_openlineage_config_to_spark(self, mock_get_openlineage openlineage_inject_transport_info=True, **self._config, ) + mock_get_hook.return_value._should_track_driver_status = False operator.execute({"ti": mock_ti}) assert operator.conf == { @@ -425,6 +427,7 @@ def test_inject_openlineage_composite_config_wrong_transport_to_spark( CompositeConfig.from_dict({"transports": {"test1": {"type": "console"}}}) ) + mock_get_hook.return_value._should_track_driver_status = False with caplog.at_level(logging.INFO): operator = SparkSubmitOperator( task_id="spark_submit_job", @@ -456,6 +459,7 @@ def test_inject_openlineage_simple_config_wrong_transport_to_spark( config=ConsoleConfig() ) + mock_get_hook.return_value._should_track_driver_status = False with caplog.at_level(logging.INFO): operator = SparkSubmitOperator( task_id="spark_submit_job", @@ -474,3 +478,259 @@ def test_inject_openlineage_simple_config_wrong_transport_to_spark( assert operator.conf == { "parquet.compression": "SNAPPY", } + + +class FakeTaskState: + """In-memory task state for tests.""" + + def __init__(self, stored: dict[str, str] | None = None): + self._store: dict[str, str] = dict(stored or {}) + + def get(self, key: str) -> str | None: + return self._store.get(key) + + def set(self, key: str, value: str) -> None: + self._store[key] = value + + +@pytest.mark.skipif( + not AIRFLOW_V_3_3_PLUS, + reason="ResumableJobMixin reconnect requires task_state, available in Airflow 3.3+", +) +class TestSparkSubmitOperatorResumable: + def setup_method(self): + args = {"owner": "airflow", "start_date": DEFAULT_DATE} + self.dag = DAG("test_resumable_dag", schedule=None, default_args=args) + + def _make_operator(self, **kwargs): + return SparkSubmitOperator(task_id="test", dag=self.dag, application="test.jar", **kwargs) + + def _make_hook(self, should_track=False, is_yarn=False, is_kubernetes=False): + hook = MagicMock() + hook._should_track_driver_status = should_track + hook._is_yarn = is_yarn + hook._is_kubernetes = is_kubernetes + hook._connection = {"master": "spark://localhost:7077"} + return hook + + def test_non_cluster_mode_calls_hook_submit_directly(self): + operator = self._make_operator() + operator._hook = self._make_hook(should_track=False) + + operator.execute(context={}) + + operator._hook.submit.assert_called_once_with("test.jar") + + def test_cluster_mode_first_run_persists_id_before_polling(self): + operator = self._make_operator() + operator._hook = self._make_hook(should_track=True) + operator._hook.submit.return_value = "driver-001" + + task_state = FakeTaskState() + persisted_before_poll = [] + + def track_poll(external_id, context): + persisted_before_poll.append(task_state.get("spark_job_id")) + + operator.poll_until_complete = track_poll + + operator.execute(context={"task_state": task_state}) + + operator._hook.submit.assert_called_once_with("test.jar") + assert persisted_before_poll == ["driver-001"] + + @pytest.mark.parametrize( + ("prior_status", "expect_submit", "expect_poll_id"), + [ + ("RUNNING", False, "driver-001"), + ("SUBMITTED", False, "driver-001"), + ("FINISHED", False, None), + ("FAILED", True, "driver-new"), + ("KILLED", True, "driver-new"), + ], + ) + def test_retry_behaviour_based_on_prior_driver_status(self, prior_status, expect_submit, expect_poll_id): + operator = self._make_operator() + operator._hook = self._make_hook(should_track=True) + operator._hook.submit.return_value = "driver-new" + task_state = FakeTaskState({"spark_job_id": "driver-001"}) + + operator.get_job_status = lambda external_id: prior_status + polled = [] + operator.poll_until_complete = lambda external_id, context: polled.append(external_id) + + operator.execute(context={"task_state": task_state}) + + if expect_submit: + operator._hook.submit.assert_called_once_with("test.jar") + else: + operator._hook.submit.assert_not_called() + + if expect_poll_id: + assert polled == [expect_poll_id] + else: + assert polled == [] + + def test_submits_fresh_when_task_state_unavailable(self): + operator = self._make_operator() + operator._hook = self._make_hook(should_track=True) + operator._hook.submit.return_value = "driver-001" + polled = [] + operator.poll_until_complete = lambda external_id, context: polled.append(external_id) + + # no task_state key in context + operator.execute(context={}) + + operator._hook.submit.assert_called_once_with("test.jar") + assert polled == ["driver-001"] + + def test_reconnect_on_retry_false_submits_fresh_and_polls(self): + operator = self._make_operator(reconnect_on_retry=False) + operator._hook = self._make_hook(should_track=True) + operator._hook.submit.return_value = "driver-new" + task_state = FakeTaskState({"spark_job_id": "driver-old"}) + polled = [] + operator.poll_until_complete = lambda external_id, context: polled.append(external_id) + + operator.execute(context={"task_state": task_state}) + # reconnect_on_retry=False: ignores prior driver ID, submits fresh, but still polls + operator._hook.submit.assert_called_once_with("test.jar") + assert polled == ["driver-new"] + + @pytest.mark.parametrize( + ("is_yarn", "is_kubernetes", "status", "expected_active", "expected_succeeded"), + [ + (False, False, "RUNNING", True, False), + (False, False, "SUBMITTED", True, False), + (False, False, "FINISHED", False, True), + (False, False, "FAILED", False, False), + (True, False, "RUNNING", True, False), + (True, False, "ACCEPTED", True, False), + (True, False, "NEW", True, False), + (True, False, "FINISHED", False, True), + (True, False, "FAILED", False, False), + (False, True, "Running", True, False), + (False, True, "Pending", True, False), + (False, True, "Succeeded", False, True), + (False, True, "Failed", False, False), + ], + ) + def test_job_status_mappings(self, is_yarn, is_kubernetes, status, expected_active, expected_succeeded): + operator = self._make_operator() + operator._hook = self._make_hook(is_yarn=is_yarn, is_kubernetes=is_kubernetes) + + assert operator.is_job_active(status) == expected_active + assert operator.is_job_succeeded(status) == expected_succeeded + + @pytest.mark.parametrize( + ("response_json", "expected_status", "expected_error"), + [ + ({"success": True, "driverState": "RUNNING"}, "RUNNING", None), + ({"success": False, "message": "driver not found"}, None, "driver not found"), + ({"driverState": "RUNNING"}, None, "unknown error"), + ], + ) + def test_get_job_status(self, response_json, expected_status, expected_error): + operator = self._make_operator() + operator._hook = self._make_hook(should_track=True) + mock_response = MagicMock() + mock_response.json.return_value = response_json + + with mock.patch("requests.get", return_value=mock_response): + if expected_error: + with pytest.raises(RuntimeError, match=expected_error): + operator.get_job_status("driver-001") + else: + assert operator.get_job_status("driver-001") == expected_status + + def test_get_job_status_ha_tries_next_master(self): + operator = self._make_operator() + hook = self._make_hook(should_track=True) + # Master URL port (7077) is RPC — REST API must use 6066, not 7077 + hook._connection = {"master": "spark://m1:7077,m2:7077"} + operator._hook = hook + + good_response = MagicMock() + good_response.json.return_value = {"success": True, "driverState": "RUNNING"} + captured_urls = [] + + def side_effect(url, timeout): + captured_urls.append(url) + if "m1" in url: + raise ConnectionError("m1 unreachable") + return good_response + + with mock.patch("requests.get", side_effect=side_effect): + assert operator.get_job_status("driver-001") == "RUNNING" + + assert all(":6066/" in url for url in captured_urls), "REST API must use port 6066, not the RPC port" + + def test_get_job_status_ha_tries_next_master_on_success_false(self): + """success:false from m1 (e.g. HA recovery in progress) should fall through to m2.""" + operator = self._make_operator() + hook = self._make_hook(should_track=True) + hook._connection = {"master": "spark://m1:7077,m2:7077"} + operator._hook = hook + + bad_response = MagicMock() + bad_response.json.return_value = {"success": False, "message": "Driver not found"} + good_response = MagicMock() + good_response.json.return_value = {"success": True, "driverState": "RUNNING"} + + def side_effect(url, timeout): + if "m1" in url: + return bad_response + return good_response + + with mock.patch("requests.get", side_effect=side_effect): + assert operator.get_job_status("driver-001") == "RUNNING" + + def test_get_job_status_ha_raises_when_all_masters_unreachable(self): + operator = self._make_operator() + hook = self._make_hook(should_track=True) + hook._connection = {"master": "spark://m1:7077,m2:7077"} + operator._hook = hook + + with mock.patch("requests.get", side_effect=ConnectionError("unreachable")): + with pytest.raises(ConnectionError): + operator.get_job_status("driver-001") + + def test_get_job_status_uses_rest_scheme_from_connection(self): + operator = self._make_operator() + hook = self._make_hook(should_track=True) + hook._connection = {"master": "spark://myhost:6066", "rest_scheme": "https"} + operator._hook = hook + + mock_response = MagicMock() + mock_response.json.return_value = {"success": True, "driverState": "RUNNING"} + captured_urls = [] + + def capture(url, timeout): + captured_urls.append(url) + return mock_response + + with mock.patch("requests.get", side_effect=capture): + operator.get_job_status("driver-001") + + assert len(captured_urls) == 1 + assert captured_urls[0].startswith("https://") + + def test_poll_until_complete_runs_post_submit_on_failure(self): + """post_submit_commands must run even when the driver exits with a failure status.""" + operator = self._make_operator() + hook = self._make_hook(should_track=True) + hook._connection = {"master": "spark://myhost:7077"} + hook._driver_status = "FAILED" + + def simulate_failed_tracking(): + hook._driver_status = "FAILED" + + hook._start_driver_status_tracking = mock.MagicMock(side_effect=simulate_failed_tracking) + post_submit_called = [] + hook._run_post_submit_commands = mock.MagicMock(side_effect=lambda: post_submit_called.append(True)) + operator._hook = hook + + with pytest.raises(RuntimeError, match="FAILED"): + operator.poll_until_complete("driver-001", {}) + + assert post_submit_called, "_run_post_submit_commands must be called even on driver failure" diff --git a/scripts/ci/prek/known_airflow_exceptions.txt b/scripts/ci/prek/known_airflow_exceptions.txt index f1ddfbd1efc27..bd4570fc55f25 100644 --- a/scripts/ci/prek/known_airflow_exceptions.txt +++ b/scripts/ci/prek/known_airflow_exceptions.txt @@ -145,7 +145,7 @@ providers/apache/livy/src/airflow/providers/apache/livy/operators/livy.py::3 providers/apache/pig/src/airflow/providers/apache/pig/hooks/pig.py::1 providers/apache/pinot/src/airflow/providers/apache/pinot/hooks/pinot.py::1 providers/apache/spark/src/airflow/providers/apache/spark/hooks/spark_sql.py::2 -providers/apache/spark/src/airflow/providers/apache/spark/hooks/spark_submit.py::11 +providers/apache/spark/src/airflow/providers/apache/spark/hooks/spark_submit.py::10 providers/arangodb/src/airflow/providers/arangodb/hooks/arangodb.py::9 providers/arangodb/src/airflow/providers/arangodb/operators/arangodb.py::1 providers/atlassian/jira/src/airflow/providers/atlassian/jira/hooks/jira.py::1 diff --git a/task-sdk/docs/api.rst b/task-sdk/docs/api.rst index 8f3f3de953921..c2bb0a19fbcef 100644 --- a/task-sdk/docs/api.rst +++ b/task-sdk/docs/api.rst @@ -99,6 +99,8 @@ Bases .. autoapiclass:: airflow.sdk.SkipMixin +.. autoclass:: airflow.sdk.ResumableJobMixin + .. autoapiclass:: airflow.sdk.BaseHook Callbacks diff --git a/task-sdk/src/airflow/sdk/__init__.py b/task-sdk/src/airflow/sdk/__init__.py index eeae86f1eb3d0..ab834658c0f69 100644 --- a/task-sdk/src/airflow/sdk/__init__.py +++ b/task-sdk/src/airflow/sdk/__init__.py @@ -66,6 +66,7 @@ "PartitionMapper", "PokeReturnValue", "ProductMapper", + "ResumableJobMixin", "RetryAction", "RetryDecision", "RetryPolicy", @@ -117,6 +118,7 @@ cross_downstream, ) from airflow.sdk.bases.operatorlink import BaseOperatorLink + from airflow.sdk.bases.resumablemixin import ResumableJobMixin from airflow.sdk.bases.sensor import BaseSensorOperator, PokeReturnValue from airflow.sdk.bases.skipmixin import SkipMixin from airflow.sdk.bases.xcom import BaseXCom @@ -233,6 +235,7 @@ "PartitionMapper": ".definitions.partition_mappers.base", "PokeReturnValue": ".bases.sensor", "ProductMapper": ".definitions.partition_mappers.product", + "ResumableJobMixin": ".bases.resumablemixin", "RetryAction": ".definitions.retry_policy", "RetryDecision": ".definitions.retry_policy", "RetryPolicy": ".definitions.retry_policy", diff --git a/task-sdk/src/airflow/sdk/bases/resumablemixin.py b/task-sdk/src/airflow/sdk/bases/resumablemixin.py new file mode 100644 index 0000000000000..4e252d5474315 --- /dev/null +++ b/task-sdk/src/airflow/sdk/bases/resumablemixin.py @@ -0,0 +1,167 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from pydantic import JsonValue + + from airflow.sdk.definitions.context import Context + from airflow.sdk.types import Logger + + +class ResumableJobMixin: + """ + Mixin for operators that submit one long-running job to an external system and poll for completion. + + **Purpose:** This mixin makes the synchronous operator path crash-safe. It is not a replacement + for deferrable operators — deferrable remains the recommended approach for long-running tasks when + a Triggerer is available and the async model fits the team. This mixin is for teams already running + synchronous operators who want worker crashes to reconnect to the existing job rather than + resubmitting a duplicate. + + **How it works:** On the first run, after submitting the job, the external ID (driver ID, YARN + application ID, etc.) is persisted to ``task_state`` before polling starts. On retry, the mixin + reads that ID back and reconnects to the already-running job instead of starting a new one. + + **What it does not do:** It does not free the worker slot during polling (use deferrable for that), + and it does not stream logs from the remote system (the operator controls that separately). + + Usage: call ``execute_resumable(context)`` from the operator's ``execute()`` when reconnection + is supported. + + Subclasses must implement the methods specific to their external system. The mixin owns + only ``execute_resumable()`` and the task_state read/write logic. + + Example:: + + class MyOperator(ResumableJobMixin, BaseOperator): + external_id_key = "my_job_id" + + def execute(self, context): + return self.execute_resumable(context) + + def submit_job(self, context) -> JsonValue: + return self.hook.submit(...) + + def get_job_status(self, external_id: JsonValue) -> str: + return self.hook.get_status(external_id) + + def is_job_active(self, status: str) -> bool: + return status in ("RUNNING", "PENDING") + + def is_job_succeeded(self, status: str) -> bool: + return status == "SUCCEEDED" + + def poll_until_complete(self, external_id: JsonValue, context: Context) -> None: + self.hook.poll(external_id) + + def get_job_result(self, external_id: JsonValue, context: Context) -> Any: + return None + """ + + if TYPE_CHECKING: + # log comes from BaseOperator (via LoggingMixin) at runtime, but mypy cannot see + # that because ResumableJobMixin does not inherit from it directly. + log: Logger + + # Key used to store and retrieve the external job ID from task_state across retries. + # Renaming this on a deployed operator breaks in-flight retries — the old key is already stored. + external_id_key: str = "remote_job_id" + + def execute_resumable(self, context: Context) -> Any: + """ + Core of the resumable execution logic. Call this from execute() when reconnection is supported. + + On initial run: submits the job, persists the external ID to task_state, then polls. + + Behaviour on retry: + - On retry with active job: skips submission, reconnects to the running job. + - On retry with succeeded job: skips submission and polling, returns result immediately. + - On retry with failed job: falls through and resubmits fresh. + + Known limitation: there is a small window between ``submit_job`` returning and + ``task_state.set`` completing. If the worker dies in that gap, the next retry still + holds the previous (terminal) ID and will resubmit a fresh job rather than reconnecting. + Closing this window would require atomic "submit + persist", which is not possible across + an external system boundary. + """ + task_state = context.get("task_state") + + if task_state is not None: + external_id = task_state.get(self.external_id_key) + if external_id: + status = self.get_job_status(external_id) + if self.is_job_active(status): + self.log.info( + "Reconnecting to existing job identified by: %s (status: %s)", external_id, status + ) + return self.poll_until_complete(external_id, context) + if self.is_job_succeeded(status): + self.log.info( + "Job with identifier: %s already completed successfully, skipping resubmission", + external_id, + ) + return self.get_job_result(external_id, context) + self.log.info( + "Prior job with identifier: %s in terminal state %s, resubmitting fresh", + external_id, + status, + ) + + external_id = self.submit_job(context) + + if task_state is not None: + task_state.set(self.external_id_key, external_id) + + self.poll_until_complete(external_id, context) + return self.get_job_result(external_id, context) + + def submit_job(self, context: Context) -> JsonValue: + """Submit the job to the external system. Return its external ID.""" + raise NotImplementedError + + def get_job_status(self, external_id: JsonValue) -> str: + """Query the external system for the current job status.""" + raise NotImplementedError + + def is_job_active(self, status: str) -> bool: + """ + Return True if the job is still running and can be reconnected to. + + ``status`` is a raw string returned by the external system — not an Airflow enum. + Its values are backend-specific (e.g. ``"RUNNING"``, ``"Pending"``, ``"ContainerCreating"``). + """ + raise NotImplementedError + + def is_job_succeeded(self, status: str) -> bool: + """ + Return True if the job completed successfully. + + ``status`` is a raw string returned by the external system — not an Airflow enum. + Its values are backend-specific (e.g. ``"FINISHED"``, ``"Succeeded"``). + """ + raise NotImplementedError + + def poll_until_complete(self, external_id: JsonValue, context: Context) -> None: + """Block until the job reaches a terminal state. Raise on failure.""" + raise NotImplementedError + + def get_job_result(self, external_id: JsonValue, context: Context) -> Any: + """Return the job result after completion. Return None if not applicable.""" + raise NotImplementedError diff --git a/task-sdk/tests/task_sdk/bases/test_resumablemixin.py b/task-sdk/tests/task_sdk/bases/test_resumablemixin.py new file mode 100644 index 0000000000000..8e95e132f3afc --- /dev/null +++ b/task-sdk/tests/task_sdk/bases/test_resumablemixin.py @@ -0,0 +1,177 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +from airflow.sdk.bases.operator import BaseOperator +from airflow.sdk.bases.resumablemixin import ResumableJobMixin + +if TYPE_CHECKING: + from pydantic import JsonValue + + +class ConcreteResumableOperator(ResumableJobMixin, BaseOperator): + """Minimal concrete implementation for testing the mixin.""" + + external_id_key = "test_job_id" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.submitted_ids: list[str] = [] + self.polled_ids: list[str] = [] + self._next_id = "job-001" + self._status_map: dict[str, str] = {} + self._active_statuses = {"RUNNING", "PENDING"} + self._succeeded_statuses = {"SUCCEEDED"} + + def submit_job(self, context) -> JsonValue: + self.submitted_ids.append(self._next_id) + return self._next_id + + def get_job_status(self, external_id: JsonValue) -> str: + return self._status_map.get(str(external_id), "UNKNOWN") + + def is_job_active(self, status: str) -> bool: + return status in self._active_statuses + + def is_job_succeeded(self, status: str) -> bool: + return status in self._succeeded_statuses + + def poll_until_complete(self, external_id: JsonValue, context) -> None: + self.polled_ids.append(str(external_id)) + + def get_job_result(self, external_id: JsonValue, context) -> str: + return f"result-of-{external_id}" + + +class FakeTaskState: + def __init__(self, stored: dict[str, str] | None = None): + self._store: dict[str, str] = stored or {} + + def get(self, key: str) -> str | None: + return self._store.get(key) + + def set(self, key: str, value: str) -> None: + self._store[key] = value + + +def make_context(task_state: FakeTaskState | None = None) -> dict: + ctx: dict = {} + if task_state is not None: + ctx["task_state"] = task_state + return ctx + + +class TestFirstSubmission: + def test_submits_and_polls_when_no_prior_state(self): + op = ConcreteResumableOperator(task_id="test_task") + task_state = FakeTaskState() + ctx = make_context(task_state) + + op.execute_resumable(ctx) + + assert op.submitted_ids == ["job-001"] + assert op.polled_ids == ["job-001"] + + def test_persists_external_id_before_polling(self): + """The ID must be in task_state before poll_until_complete is called.""" + op = ConcreteResumableOperator(task_id="test_task") + task_state = FakeTaskState() + persisted_at_poll: list[str | None] = [] + + original_set = task_state.set + + def set_and_track(key, value): + original_set(key, value) + + def poll_side_effect(external_id, context): + persisted_at_poll.append(task_state.get("test_job_id")) + + task_state.set = set_and_track + op.poll_until_complete = poll_side_effect + + op.execute_resumable(make_context(task_state)) + + assert persisted_at_poll == ["job-001"], "ID must be persisted before polling starts" + + def test_returns_job_result(self): + op = ConcreteResumableOperator(task_id="test_task") + result = op.execute_resumable(make_context(FakeTaskState())) + + assert result == "result-of-job-001" + + +class TestRetryWithDifferentJobStatuses: + def test_skips_submission_when_job_active(self): + op = ConcreteResumableOperator(task_id="test_task") + op._status_map["job-001"] = "RUNNING" + task_state = FakeTaskState({"test_job_id": "job-001"}) + ctx = make_context(task_state) + + op.execute_resumable(ctx) + + assert op.submitted_ids == [], "should not resubmit when job is active" + assert op.polled_ids == ["job-001"] + + def test_pending_status_also_skips_submission(self): + op = ConcreteResumableOperator(task_id="test_task") + op._status_map["job-001"] = "PENDING" + task_state = FakeTaskState({"test_job_id": "job-001"}) + + op.execute_resumable(make_context(task_state)) + + assert op.submitted_ids == [] + assert op.polled_ids == ["job-001"] + + def test_returns_result_immediately_without_polling(self): + op = ConcreteResumableOperator(task_id="test_task") + op._status_map["job-001"] = "SUCCEEDED" + task_state = FakeTaskState({"test_job_id": "job-001"}) + + result = op.execute_resumable(make_context(task_state)) + + assert op.submitted_ids == [], "should not resubmit" + assert op.polled_ids == [], "should not poll again" + assert result == "result-of-job-001" + + @pytest.mark.parametrize("status", ["FAILED", "KILLED", "ERROR", "UNKNOWN"]) + def test_resubmits_when_prior_job_in_terminal_failure(self, status): + op = ConcreteResumableOperator(task_id="test_task") + op._status_map["job-001"] = status + op._next_id = "job-002" + task_state = FakeTaskState({"test_job_id": "job-001"}) + + op.execute_resumable(make_context(task_state)) + + assert op.submitted_ids == ["job-002"], "should resubmit fresh" + assert op.polled_ids == ["job-002"] + + +class TestExternalIdKey: + def test_custom_key_used_for_storage_and_retrieval(self): + class CustomKeyOp(ConcreteResumableOperator): + external_id_key = "my_custom_key" + + op = CustomKeyOp(task_id="test_task") + task_state = FakeTaskState() + + op.execute_resumable(make_context(task_state)) + + assert task_state.get("my_custom_key") == "job-001" diff --git a/uv.lock b/uv.lock index 83759330bb2f5..a02c1fca513f4 100644 --- a/uv.lock +++ b/uv.lock @@ -3813,6 +3813,8 @@ dependencies = [ { name = "apache-airflow-providers-common-compat" }, { name = "grpcio-status" }, { name = "pyspark-client" }, + { name = "requests" }, + { name = "tenacity" }, ] [package.optional-dependencies] @@ -3849,6 +3851,8 @@ requires-dist = [ { name = "grpcio-status", specifier = ">=1.67.0" }, { name = "pyspark", marker = "extra == 'pyspark'", specifier = ">=4.0.0" }, { name = "pyspark-client", specifier = ">=4.0.0" }, + { name = "requests", specifier = ">=2.32.0" }, + { name = "tenacity", specifier = ">=8.3.0" }, ] provides-extras = ["cncf-kubernetes", "openlineage", "pyspark"] From b66f4433e004fad81b66023bd8caccee55e6e4f7 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Thu, 28 May 2026 08:55:01 +0200 Subject: [PATCH 15/28] Guard finally-block logger.info in HTTP access log middleware (#67501) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ``finally`` block in ``HttpAccessLogMiddleware`` called ``logger.info()`` without exception protection. If ``logger.info()`` raised — broken handler, OOM in the formatter, downstream forwarder unavailable — and the original ``try`` block was already propagating an application exception, Python's ``finally``-replacement semantics would discard the original exception in favour of the logger's, so uvicorn never saw the real failure. Wrap the emit in ``contextlib.suppress(Exception)`` so logging failures never disrupt the application or mask the original exception. The HTTP response has already been sent to the client by the time we reach the log emit, so swallowing the logger failure costs nothing beyond a missing log line for that one request. --- .../api_fastapi/common/http_access_log.py | 24 +++++---- .../common/test_http_access_log.py | 52 +++++++++++++++++++ 2 files changed, 67 insertions(+), 9 deletions(-) diff --git a/airflow-core/src/airflow/api_fastapi/common/http_access_log.py b/airflow-core/src/airflow/api_fastapi/common/http_access_log.py index efaf570c58050..ad37b094a9f59 100644 --- a/airflow-core/src/airflow/api_fastapi/common/http_access_log.py +++ b/airflow-core/src/airflow/api_fastapi/common/http_access_log.py @@ -121,12 +121,18 @@ async def capture_send(message: Message) -> None: client = scope.get("client") client_addr = f"{client[0]}:{client[1]}" if client else None - logger.info( - "request finished", - method=method, - path=path, - query=query, - status_code=status, - duration_us=duration_us, - client_addr=client_addr, - ) + # Guard the log emit: if it raised inside a ``finally`` while the + # original ``try`` block was already propagating an app exception, + # Python's exception-replacement semantics would discard the + # original. Swallow logging failures so the application exception + # always reaches uvicorn intact. + with contextlib.suppress(Exception): + logger.info( + "request finished", + method=method, + path=path, + query=query, + status_code=status, + duration_us=duration_us, + client_addr=client_addr, + ) diff --git a/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py b/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py index 243049aedbe57..882f93a3151d0 100644 --- a/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py +++ b/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py @@ -184,3 +184,55 @@ def test_logs_redact_sensitive_query_param(_password_sensitive_field): query = logs[0]["query"] assert "topsecret" not in query assert "keep=ok" in query + + +def test_logger_failure_does_not_mask_app_exception(monkeypatch): + """ + If ``logger.info`` raises while the app already raised, the original app exception must + still propagate (rather than being replaced by the logger's exception). + """ + import airflow.api_fastapi.common.http_access_log as mod + + def broken_info(*_args, **_kwargs): + raise RuntimeError("logger broken") + + monkeypatch.setattr(mod.logger, "info", broken_info) + + import asyncio + + async def raising_app(scope, receive, send): + # Send response.start so the middleware's response variable is populated, then raise. + await send({"type": "http.response.start", "status": 503, "headers": []}) + raise RuntimeError("app exception") + + middleware = HttpAccessLogMiddleware(raising_app) + scope = { + "type": "http", + "method": "GET", + "path": "/boom", + "query_string": b"", + "headers": [], + "client": ("test", 1), + } + + async def receive(): + return {"type": "http.request", "body": b""} + + async def send(_message): + return None + + with pytest.raises(RuntimeError, match="app exception"): + asyncio.run(middleware(scope, receive, send)) + + +def test_logger_failure_swallowed_on_clean_request(monkeypatch): + """No app exception + a broken logger must not break the request.""" + import airflow.api_fastapi.common.http_access_log as mod + + monkeypatch.setattr( + mod.logger, "info", lambda *_a, **_kw: (_ for _ in ()).throw(RuntimeError("logger broken")) + ) + + client = TestClient(_make_app(), raise_server_exceptions=False) + response = client.get("/") + assert response.status_code == 200 From 7f54b63eb9d3211dee72443c204e1975e7060635 Mon Sep 17 00:00:00 2001 From: Pierre Jeambrun Date: Thu, 28 May 2026 13:53:27 +0200 Subject: [PATCH 16/28] UI: Add bulk Clear on the Dag Runs list page (#67564) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-introduces collective Clear on the Dag Runs list page — the Airflow 2.x ``DagRunModelView`` action that the Airflow 3 UI did not yet replicate (#63854). The button sits next to the bulk Delete shipped in #67095 and opens a dialog mirroring the existing single-run Clear: a segmented control (``Clear existing tasks`` / ``Clear only failed tasks`` / ``Queue up new tasks``), an affected-tasks preview grouped by run, and an optional note. No backend change is required — the dialog fans out the existing ``POST /dags/{dag_id}/dagRuns/{dag_run_id}/clear`` endpoint over the selected runs with ``Promise.allSettled``, then patches the note via ``PATCH /dags/{dag_id}/dagRuns/{dag_run_id}`` on the runs that succeeded. Per-run outcomes are surfaced via the partial-failure UX landed in #67284: successful rows are deselected, failures stay in the selection and appear as inline errors so the user can retry just the remaining set. Bulk Mark as success / failed on Dag Runs (the other half of #63854) is intentionally out of scope here. --- .../pages/DagRuns/BulkClearDagRunsButton.tsx | 120 ++++++++++++ .../airflow/ui/src/pages/DagRuns/DagRuns.tsx | 2 + .../ui/src/queries/useBulkClearDagRuns.ts | 181 ++++++++++++++++++ .../src/queries/useBulkClearDagRunsDryRun.ts | 82 ++++++++ 4 files changed, 385 insertions(+) create mode 100644 airflow-core/src/airflow/ui/src/pages/DagRuns/BulkClearDagRunsButton.tsx create mode 100644 airflow-core/src/airflow/ui/src/queries/useBulkClearDagRuns.ts create mode 100644 airflow-core/src/airflow/ui/src/queries/useBulkClearDagRunsDryRun.ts diff --git a/airflow-core/src/airflow/ui/src/pages/DagRuns/BulkClearDagRunsButton.tsx b/airflow-core/src/airflow/ui/src/pages/DagRuns/BulkClearDagRunsButton.tsx new file mode 100644 index 0000000000000..084a739e48328 --- /dev/null +++ b/airflow-core/src/airflow/ui/src/pages/DagRuns/BulkClearDagRunsButton.tsx @@ -0,0 +1,120 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { Button, Flex, Heading, VStack, useDisclosure } from "@chakra-ui/react"; +import { useState } from "react"; +import { useTranslation } from "react-i18next"; +import { CgRedo } from "react-icons/cg"; + +import type { DAGRunResponse } from "openapi/requests/types.gen"; +import { ActionAccordion } from "src/components/ActionAccordion"; +import { ActionErrors } from "src/components/ActionErrors"; +import { Dialog } from "src/components/ui"; +import SegmentedControl from "src/components/ui/SegmentedControl"; +import { useBulkClearDagRuns } from "src/queries/useBulkClearDagRuns"; +import { useBulkClearDagRunsDryRun } from "src/queries/useBulkClearDagRunsDryRun"; + +type Props = { + readonly deselectKeys: (keys: Array) => void; + readonly selectedDagRuns: Array; +}; + +const BulkClearDagRunsButton = ({ deselectKeys, selectedDagRuns }: Props) => { + const { t: translate } = useTranslation(["common", "dags"]); + const { onClose, onOpen, open } = useDisclosure(); + const [selectedOptions, setSelectedOptions] = useState>(["existingTasks"]); + const [note, setNote] = useState(null); + const { bulkClear, data, isPending } = useBulkClearDagRuns({ + deselectKeys, + onSuccessConfirm: onClose, + }); + + const handleClose = () => { + setNote(null); + onClose(); + }; + + const onlyFailed = selectedOptions.includes("onlyFailed"); + const onlyNew = selectedOptions.includes("newTasks"); + + const { data: affectedTasks, isFetching } = useBulkClearDagRunsDryRun(open, selectedDagRuns, { + onlyFailed, + onlyNew, + }); + + return ( + <> + + + + + + + + {translate("dags:runAndTaskActions.clear.title", { type: translate("dagRun_other") })} + + + + + + + + + + + + + + + + + + + ); +}; + +export default BulkClearDagRunsButton; diff --git a/airflow-core/src/airflow/ui/src/pages/DagRuns/DagRuns.tsx b/airflow-core/src/airflow/ui/src/pages/DagRuns/DagRuns.tsx index 5552fcb8ada34..797d91d2afeba 100644 --- a/airflow-core/src/airflow/ui/src/pages/DagRuns/DagRuns.tsx +++ b/airflow-core/src/airflow/ui/src/pages/DagRuns/DagRuns.tsx @@ -44,6 +44,7 @@ import { SearchParamsKeys, type SearchParamsKeysType } from "src/constants/searc import { useAdvancedSearchArg } from "src/hooks/useAdvancedSearch"; import { renderDuration, useAutoRefresh, isStatePending } from "src/utils"; +import BulkClearDagRunsButton from "./BulkClearDagRunsButton"; import BulkDeleteDagRunsButton from "./BulkDeleteDagRunsButton"; import { DagRunsFilters } from "./DagRunsFilters"; import DeleteRunButton from "./DeleteRunButton"; @@ -373,6 +374,7 @@ export const DagRuns = () => { {selectedRows.size} {translate("selected")} + diff --git a/airflow-core/src/airflow/ui/src/queries/useBulkClearDagRuns.ts b/airflow-core/src/airflow/ui/src/queries/useBulkClearDagRuns.ts new file mode 100644 index 0000000000000..33288985fd9f1 --- /dev/null +++ b/airflow-core/src/airflow/ui/src/queries/useBulkClearDagRuns.ts @@ -0,0 +1,181 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { useQueryClient } from "@tanstack/react-query"; +import { useState } from "react"; +import { useTranslation } from "react-i18next"; + +import { + UseDagRunServiceGetDagRunKeyFn, + useDagRunServiceGetDagRunsKey, + UseGanttServiceGetGanttDataKeyFn, + useTaskInstanceServiceGetMappedTaskInstanceKey, + useTaskInstanceServiceGetTaskInstanceKey, + useTaskInstanceServiceGetTaskInstancesKey, +} from "openapi/queries"; +import { DagRunService } from "openapi/requests/services.gen"; +import type { BulkActionResponse, DAGRunResponse } from "openapi/requests/types.gen"; +import { toaster } from "src/components/ui"; + +import { gridQueryKeys, tiPerAttemptQueryKeys } from "./gridViewQueryKeys"; +import { useBulkClearDagRunsDryRunKey } from "./useBulkClearDagRunsDryRun"; +import { useClearDagRunDryRunKey } from "./useClearDagRunDryRun"; + +type Props = { + readonly deselectKeys: (keys: Array) => void; + readonly onSuccessConfirm: VoidFunction; +}; + +export type BulkClearDagRunsOptions = { + note: string | null; + onlyFailed: boolean; + onlyNew: boolean; +}; + +// Mirrors the bulk-endpoint success key (``{dag_id}.{run_id}``) so callers can pass +// the result straight into ``deselectKeys`` without an extra mapping. +const getRowKey = (dagRun: DAGRunResponse) => `${dagRun.dag_id}.${dagRun.dag_run_id}`; + +const formatError = (reason: unknown): string => { + if (reason instanceof Error) { + return reason.message; + } + if (typeof reason === "object" && reason !== null && "body" in reason) { + const { body } = reason as { body?: { detail?: unknown } }; + + if (body?.detail !== undefined) { + return typeof body.detail === "string" ? body.detail : JSON.stringify(body.detail); + } + } + + return String(reason); +}; + +export const useBulkClearDagRuns = ({ deselectKeys, onSuccessConfirm }: Props) => { + const queryClient = useQueryClient(); + const [data, setData] = useState<{ clear: BulkActionResponse } | undefined>(undefined); + const [isPending, setIsPending] = useState(false); + const { t: translate } = useTranslation(["common", "dags"]); + + const reset = () => { + setData(undefined); + }; + + const invalidateQueries = async (dagRuns: ReadonlyArray) => { + const dagIds = new Set(dagRuns.map((dagRun) => dagRun.dag_id)); + const keys = [ + [useDagRunServiceGetDagRunsKey], + [useTaskInstanceServiceGetTaskInstancesKey], + [useTaskInstanceServiceGetTaskInstanceKey], + [useTaskInstanceServiceGetMappedTaskInstanceKey], + [useBulkClearDagRunsDryRunKey], + ...tiPerAttemptQueryKeys, + ...[...dagIds].flatMap((dagId) => [...gridQueryKeys(dagId), [useClearDagRunDryRunKey, dagId]]), + ...dagRuns.flatMap((dagRun) => [ + UseDagRunServiceGetDagRunKeyFn({ dagId: dagRun.dag_id, dagRunId: dagRun.dag_run_id }), + UseGanttServiceGetGanttDataKeyFn({ dagId: dagRun.dag_id, runId: dagRun.dag_run_id }), + ]), + ]; + + await Promise.all(keys.map((queryKey) => queryClient.invalidateQueries({ queryKey }))); + }; + + const bulkClear = async (dagRuns: Array, options: BulkClearDagRunsOptions) => { + reset(); + setIsPending(true); + + const settled = await Promise.allSettled( + dagRuns.map((dagRun) => + DagRunService.clearDagRun({ + dagId: dagRun.dag_id, + dagRunId: dagRun.dag_run_id, + requestBody: { + dry_run: false, + only_failed: options.onlyFailed, + only_new: options.onlyNew, + }, + }).then(() => dagRun), + ), + ); + + const succeeded: Array = []; + const errors: Array> = []; + + settled.forEach((outcome, index) => { + if (outcome.status === "fulfilled") { + succeeded.push(outcome.value); + } else { + const dagRun = dagRuns[index]; + + errors.push({ + error: dagRun + ? `${getRowKey(dagRun)}: ${formatError(outcome.reason)}` + : formatError(outcome.reason), + }); + } + }); + + if (succeeded.length > 0 && options.note !== null) { + const noteSettled = await Promise.allSettled( + succeeded + .filter((dagRun) => dagRun.note !== options.note) + .map((dagRun) => + DagRunService.patchDagRun({ + dagId: dagRun.dag_id, + dagRunId: dagRun.dag_run_id, + requestBody: { note: options.note }, + }).then(() => dagRun), + ), + ); + + noteSettled.forEach((outcome) => { + if (outcome.status === "rejected") { + errors.push({ error: `note: ${formatError(outcome.reason)}` }); + } + }); + } + + await invalidateQueries(dagRuns); + + if (succeeded.length > 0) { + toaster.create({ + description: translate("toaster.bulkClear.success.description", { + count: succeeded.length, + keys: succeeded.map((dagRun) => dagRun.dag_run_id).join(", "), + resourceName: translate("dagRun_other"), + }), + title: translate("toaster.bulkClear.success.title", { + resourceName: translate("dagRun_other"), + }), + type: "success", + }); + deselectKeys(succeeded.map(getRowKey)); + } + + setData({ clear: { errors, success: succeeded.map(getRowKey) } }); + setIsPending(false); + + // Per-run failures keep the dialog open so the user can see what failed; + // the consumer renders ``data.clear.errors``. + if (errors.length === 0) { + onSuccessConfirm(); + } + }; + + return { bulkClear, data, isPending, reset }; +}; diff --git a/airflow-core/src/airflow/ui/src/queries/useBulkClearDagRunsDryRun.ts b/airflow-core/src/airflow/ui/src/queries/useBulkClearDagRunsDryRun.ts new file mode 100644 index 0000000000000..fbdf858bcc57e --- /dev/null +++ b/airflow-core/src/airflow/ui/src/queries/useBulkClearDagRunsDryRun.ts @@ -0,0 +1,82 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { useQueries } from "@tanstack/react-query"; + +import { DagRunService } from "openapi/requests/services.gen"; +import type { + ClearTaskInstanceCollectionResponse, + DAGRunResponse, + TaskInstanceCollectionResponse, + TaskInstanceResponse, +} from "openapi/requests/types.gen"; + +type Options = { + onlyFailed: boolean; + onlyNew: boolean; +}; + +const EMPTY: TaskInstanceCollectionResponse = { task_instances: [], total_entries: 0 }; + +export const useBulkClearDagRunsDryRunKey = "bulkClearDagRunsDryRun"; + +export const useBulkClearDagRunsDryRun = ( + enabled: boolean, + selectedDagRuns: Array, + options: Options, +) => { + const results = useQueries({ + queries: selectedDagRuns.map((dagRun) => ({ + enabled, + queryFn: () => + DagRunService.clearDagRun({ + dagId: dagRun.dag_id, + dagRunId: dagRun.dag_run_id, + requestBody: { + dry_run: true, + only_failed: options.onlyFailed, + only_new: options.onlyNew, + }, + }) as Promise, + queryKey: [ + useBulkClearDagRunsDryRunKey, + dagRun.dag_id, + dagRun.dag_run_id, + { only_failed: options.onlyFailed, only_new: options.onlyNew }, + ], + refetchOnMount: "always" as const, + })), + }); + + const isFetching = results.some((result) => result.isFetching); + // Each per-run call is scoped to a distinct ``(dag_id, dag_run_id)`` so the + // concatenated array can't contain duplicates; the response is also + // homogeneous (``only_new=true`` yields ``NewTaskResponse`` placeholders, + // ``false`` yields real ``TaskInstanceResponse``), so the cast is safe even + // though the OpenAPI type widens to a union. + const taskInstances = results.flatMap((result) => result.data?.task_instances ?? []); + const data: TaskInstanceCollectionResponse = + taskInstances.length === 0 + ? EMPTY + : { + task_instances: taskInstances as Array, + total_entries: taskInstances.length, + }; + + return { data, isFetching }; +}; From f02d91e98a845154f9b493efdf6831240bdf8495 Mon Sep 17 00:00:00 2001 From: Amogh Desai Date: Thu, 28 May 2026 20:22:34 +0530 Subject: [PATCH 17/28] Fix compat test flakiness in SnowflakeSqlApiHook timeout test (#67641) --- .../unit/snowflake/hooks/test_snowflake_sql_api.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/providers/snowflake/tests/unit/snowflake/hooks/test_snowflake_sql_api.py b/providers/snowflake/tests/unit/snowflake/hooks/test_snowflake_sql_api.py index fcbbb2287a886..8da08dae79ea6 100644 --- a/providers/snowflake/tests/unit/snowflake/hooks/test_snowflake_sql_api.py +++ b/providers/snowflake/tests/unit/snowflake/hooks/test_snowflake_sql_api.py @@ -1245,20 +1245,20 @@ def test_wait_for_query_handles_unknown_status(self, sleep_mock): assert result == {"status": "queued", "info": ["a", "b"]} sleep_mock.assert_not_called() - @mock.patch(f"{MODULE_PATH}.time.time") @mock.patch(f"{MODULE_PATH}.time.sleep") - def test_wait_for_query_timeout_error(self, sleep_mock, time_mock): + def test_wait_for_query_timeout_error(self, sleep_mock, time_machine): hook = SnowflakeSqlApiHook(snowflake_conn_id="test_conn") # Simulate a query that keeps running and never finishes hook.get_sql_api_query_status = mock.MagicMock(return_value={"status": "running"}) - # More side effects to ensure we hit the timeout and avoid StopIteration error - time_mock.side_effect = list(range(10)) - qid = "qid-789" timeout = 3 + # Freeze the clock. Each sleep advances it explicitly so logger time.time() calls do not skew the timeout. + time_machine.move_to(0, tick=False) + sleep_mock.side_effect = lambda seconds: time_machine.shift(seconds + 0.1) + with pytest.raises(TimeoutError): hook.wait_for_query(query_id=qid, timeout=timeout, poll_interval=1) @@ -1267,7 +1267,6 @@ def test_wait_for_query_timeout_error(self, sleep_mock, time_mock): sleep_mock.assert_has_calls([mock.call(1)] * 3) assert hook.get_sql_api_query_status.call_count == 4 hook.get_sql_api_query_status.assert_has_calls([mock.call(query_id=qid)] * 4) - assert time_mock.call_count >= 3 @mock.patch(f"{HOOK_PATH}._make_api_call_with_retries") @mock.patch(f"{HOOK_PATH}._process_response") From a8423447983688fe2f298b7e29eb5c42845e25cc Mon Sep 17 00:00:00 2001 From: Pierre Jeambrun Date: Thu, 28 May 2026 17:20:48 +0200 Subject: [PATCH 18/28] UI: Revert @chakra-ui/react bump that broke modal dismissal (#67646) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #66225 bumped ``@chakra-ui/react`` from 3.34 to 3.35, which pulled in ``@ark-ui/react`` 5.34.1 → 5.36.2. The Ark 5.36 release notes flag: > Dialog / Drawer: Avoid setting inline ``pointer-events`` when modal, > letting the dismissable layer manage it. Before 5.36 the lock was an inline style on the dialog DOM, so it disappeared on unmount. After 5.36 the dismissable layer owns it and only releases it on close-transition completion. Several Clear / Mark-as buttons in this codebase mount their dialog conditionally (``{open ? : undefined}``), which yanks the component out before the transition fires — the ``pointer-events: none`` overlay is left on ``document`` and the page refuses every click (scroll still works) until refresh. Revert the bump on its own; add a section to ``CONTRIBUTING.md`` so the next contributor trying to bump knows to rewrite the conditional-mount sites first. --- airflow-core/src/airflow/ui/CONTRIBUTING.md | 21 + airflow-core/src/airflow/ui/package.json | 2 +- airflow-core/src/airflow/ui/pnpm-lock.yaml | 1374 +++++++++---------- 3 files changed, 700 insertions(+), 697 deletions(-) diff --git a/airflow-core/src/airflow/ui/CONTRIBUTING.md b/airflow-core/src/airflow/ui/CONTRIBUTING.md index 3dc9073b5cd08..e904d83a724e3 100644 --- a/airflow-core/src/airflow/ui/CONTRIBUTING.md +++ b/airflow-core/src/airflow/ui/CONTRIBUTING.md @@ -40,6 +40,27 @@ Manually: - Run `pnpm install && pnpm dev` - Note: Make sure to access the UI via the Airflow localhost port (8080 or 28080) and not the vite port (5173) +## Dependency upgrade caveats + +### `@chakra-ui/react` — held at `~3.34.0` + +Do not relax this pin or bump `@chakra-ui/react` above `3.34.x` without +re-checking every dialog in this codebase that is mounted conditionally +(`{open ? : undefined}`), e.g. `ClearRunButton`, +`MarkRunAsButton`, `ClearTaskInstanceButton`, `MarkTaskInstanceAsButton`. + +`@chakra-ui/react@3.35.0` pulls in `@ark-ui/react@>=5.36.0`, where dialog +`pointer-events` cleanup moved from an inline style on the dialog DOM +to the dismissable layer's close-transition completion. Conditionally +mounted dialogs unmount before that transition runs, leaving the +`pointer-events: none` lock stuck on `document` — the page then refuses +every click (scroll still works) until a full refresh. See PR #67646 +for the original revert and the timeline. + +To bump safely, first rewrite the conditional-mount sites to always +render the dialog (and gate any expensive dry-run queries with +`enabled: open`) so the dialog can drive its own close transition. + ## More See [node environment setup docs](/contributing-docs/15_node_environment_setup.rst) diff --git a/airflow-core/src/airflow/ui/package.json b/airflow-core/src/airflow/ui/package.json index 10425b00919c5..bc5c71b8e72e6 100644 --- a/airflow-core/src/airflow/ui/package.json +++ b/airflow-core/src/airflow/ui/package.json @@ -26,7 +26,7 @@ }, "dependencies": { "@chakra-ui/anatomy": "^2.3.4", - "@chakra-ui/react": "^3.35.0", + "@chakra-ui/react": "~3.34.0", "@emotion/react": "^11.14.0", "@guanmingchiu/sqlparser-ts": "^0.61.1", "@lezer/highlight": "^1.2.3", diff --git a/airflow-core/src/airflow/ui/pnpm-lock.yaml b/airflow-core/src/airflow/ui/pnpm-lock.yaml index 7296c1c1f7383..1176b45fc6417 100644 --- a/airflow-core/src/airflow/ui/pnpm-lock.yaml +++ b/airflow-core/src/airflow/ui/pnpm-lock.yaml @@ -34,8 +34,8 @@ importers: specifier: ^2.3.4 version: 2.3.4 '@chakra-ui/react': - specifier: ^3.35.0 - version: 3.35.0(@emotion/react@11.14.0(@types/react@19.2.14)(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5) + specifier: ~3.34.0 + version: 3.34.0(@emotion/react@11.14.0(@types/react@19.2.14)(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5) '@emotion/react': specifier: ^11.14.0 version: 11.14.0(@types/react@19.2.14)(react@19.2.5) @@ -74,7 +74,7 @@ importers: version: 1.16.1 chakra-react-select: specifier: ^6.1.1 - version: 6.1.1(@chakra-ui/react@3.35.0(@emotion/react@11.14.0(@types/react@19.2.14)(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5))(@types/react@19.2.14)(next-themes@0.4.6(react-dom@19.2.5(react@19.2.5))(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5) + version: 6.1.1(@chakra-ui/react@3.34.0(@emotion/react@11.14.0(@types/react@19.2.14)(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5))(@types/react@19.2.14)(next-themes@0.4.6(react-dom@19.2.5(react@19.2.5))(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5) chart.js: specifier: ^4.5.1 version: 4.5.1 @@ -222,7 +222,7 @@ importers: version: 6.0.1(@rolldown/plugin-babel@0.2.2(@babel/core@7.29.0)(@babel/runtime@7.29.2)(rolldown@1.0.0-rc.15)(vite@8.0.8(@types/node@24.10.3)(esbuild@0.27.7)(jiti@1.21.7)(yaml@2.8.3)))(babel-plugin-react-compiler@1.0.0)(vite@8.0.8(@types/node@24.10.3)(esbuild@0.27.7)(jiti@1.21.7)(yaml@2.8.3)) '@vitejs/plugin-react-swc': specifier: ^4.2.3 - version: 4.2.3(@swc/helpers@0.5.21)(vite@8.0.8(@types/node@24.10.3)(esbuild@0.27.7)(jiti@1.21.7)(yaml@2.8.3)) + version: 4.2.3(@swc/helpers@0.5.19)(vite@8.0.8(@types/node@24.10.3)(esbuild@0.27.7)(jiti@1.21.7)(yaml@2.8.3)) '@vitest/coverage-v8': specifier: ^4.1.4 version: 4.1.4(vitest@4.1.4) @@ -321,8 +321,8 @@ packages: resolution: {integrity: sha512-9K6xOqeevacvweLGik6LnZCb1fBtCOSIWQs8d096XGeqoLKC33UVMGz9+77Gw44KvbH4pKcQPWo4ZpxkXYj05w==} engines: {node: '>= 16'} - '@ark-ui/react@5.36.2': - resolution: {integrity: sha512-2lrZ7+Qtlj7hGx4qU2jZkE892JNrkULg/fUxqUuqmQfv9UGAXhdcw1Hr3N+zBgMDVz3aqip0Qa4v0Mox09MMvg==} + '@ark-ui/react@5.34.1': + resolution: {integrity: sha512-RJlXCvsHzbK9LVxUVtaSD5pyF1PL8IUR1rHHkf0H0Sa397l6kOFE4EH7MCSj3pDumj2NsmKDVeVgfkfG0KCuEw==} peerDependencies: react: '>=18.0.0' react-dom: '>=18.0.0' @@ -471,8 +471,8 @@ packages: '@chakra-ui/anatomy@2.3.4': resolution: {integrity: sha512-fFIYN7L276gw0Q7/ikMMlZxP7mvnjRaWJ7f3Jsf9VtDOi6eAYIBRrhQe6+SZ0PGmoOkRaBc7gSE5oeIbgFFyrw==} - '@chakra-ui/react@3.35.0': - resolution: {integrity: sha512-qzfRNLwxKjxx2IXjBj6uz1nYI+pKsq6uwHxO619+hx1OzNNuwLIjEHJxnDfBzoynO7sPCBlubMwFWb1e1PrXzw==} + '@chakra-ui/react@3.34.0': + resolution: {integrity: sha512-VLhpVwv5IVxhwajO10KnS1VQT4hDqQMQP/A796Ya+uVu8AdoSX+5HHyTLTkYIeXIDMe0xLqJfov04OBKbBchJA==} peerDependencies: '@emotion/react': '>=11' react: '>=18' @@ -823,8 +823,8 @@ packages: '@types/node': optional: true - '@internationalized/date@3.12.0': - resolution: {integrity: sha512-/PyIMzK29jtXaGU23qTvNZxvBXRtKbNnGDFD+PY6CZw/Y8Ex8pFUzkuCJCG9aOqmShjqhS9mPqP6Dk5onQY8rQ==} + '@internationalized/date@3.11.0': + resolution: {integrity: sha512-BOx5huLAWhicM9/ZFs84CzP+V3gBW6vlpM02yzsdYC7TGlZJX1OJiEEHcSayF00Z+3jLlm4w79amvSt6RqKN3Q==} '@internationalized/number@3.6.5': resolution: {integrity: sha512-6hY4Kl4HPBvtfS62asS/R22JzNNy8vi/Ssev7x6EobfCp+9QIB2hKvI2EtbdJ0VSQacxVNtqhE/NmF/NZ0gm6g==} @@ -915,8 +915,8 @@ packages: '@oxc-project/types@0.124.0': resolution: {integrity: sha512-VBFWMTBvHxS11Z5Lvlr3IWgrwhMTXV+Md+EQF0Xf60+wAdsGFTBx7X7K/hP4pi8N7dcm1RvcHwDxZ16Qx8keUg==} - '@pandacss/is-valid-prop@1.11.0': - resolution: {integrity: sha512-KVR+mv3rhlY4meObtp7SZh7EGMaNsuVh/a5lk0UbxRWJrjPIRdkIgJAXxRt+Rlv883RFgnVxnn2Nv2nVtKVdDA==} + '@pandacss/is-valid-prop@1.9.0': + resolution: {integrity: sha512-AZvpXWGyjbHc8TC+YVloQ31Z2c4j2xMvYj6UfVxuZdB5w4c9+4N8wy5R7I/XswNh8e4cfUlkvsEGDXjhJRgypw==} '@pkgr/core@0.2.9': resolution: {integrity: sha512-QNqXyfVS2wm9hweSYD2O7F0G06uurj9kZ96TRQE5Y9hU7+tgdZwIkbAKc5Ocy1HxEY2kuDQa6cQ1WRs/O5LFKA==} @@ -1133,8 +1133,8 @@ packages: '@swc/counter@0.1.3': resolution: {integrity: sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==} - '@swc/helpers@0.5.21': - resolution: {integrity: sha512-jI/VAmtdjB/RnI8GTnokyX7Ug8c+g+ffD6QRLa6XQewtnGyukKkKSk3wLTM3b5cjt1jNh9x0jfVlagdN2gDKQg==} + '@swc/helpers@0.5.19': + resolution: {integrity: sha512-QamiFeIK3txNjgUTNppE6MiG3p7TdninpZu0E0PbqVh1a9FNLT2FRhisaa4NcaX52XVhA5l7Pk58Ft7Sqi/2sA==} '@swc/types@0.1.25': resolution: {integrity: sha512-iAoY/qRhNH8a/hBvm3zKj9qQ4oc2+3w1unPJa2XvTK3XjeLXtzcCingVPw/9e5mn1+0yPqxcBGp9Jf0pkfMb1g==} @@ -1495,239 +1495,234 @@ packages: '@xyflow/system@0.0.75': resolution: {integrity: sha512-iXs+AGFLi8w/VlAoc/iSxk+CxfT6o64Uw/k0CKASOPqjqz6E0rb5jFZgJtXGZCpfQI6OQpu5EnumP5fGxQheaQ==} - '@zag-js/accordion@1.40.0': - resolution: {integrity: sha512-YDdyvZJ6fr92RZazyXQq+juT3ZA0ubjDISptb5YPgMoTPdnjKNiICPpMeCeVj1ncYRDkHXrOdChS/5CtuX/K6g==} + '@zag-js/accordion@1.35.3': + resolution: {integrity: sha512-wmw6yo5Zr6ShiKGTc5ICEOJCurWAOSGubIpGISiHi3cZ4tlxKF/vpATIUT3eq8xzdB56YK57yKCujs/WmwqqoA==} - '@zag-js/anatomy@1.40.0': - resolution: {integrity: sha512-oiB4uAaV//L38JluLVPtOHO3xvqambrfrXVOoq4kmNrBv1LLlCmFvrXA2HOR9lakn4ExK27XSUrKhUN7YlKjfQ==} + '@zag-js/anatomy@1.35.3': + resolution: {integrity: sha512-oqU9iLNNylrtJMBX5Xu4DsxnPNvtZLiobryv2oNtsDI1mi1Fca/XHghQC9K5aYT0qNsmHj1M3W5WAWTaOtPLkQ==} - '@zag-js/angle-slider@1.40.0': - resolution: {integrity: sha512-6X6bOBoCyYG0/lFY0Y+AXJZZG6CeYQiWkcMXvegxCC2zxthodqOVzkVOASW+6rzLjn2bru+V5O9RMjNgmCumKg==} + '@zag-js/angle-slider@1.35.3': + resolution: {integrity: sha512-HXRlmsbNEJSBT53fq9XQKL/vwZWwJC3nprskI7s4f/jy8a4uXPTlv7N7zuBYjew+ScTMzZah6fLWzUztBehmSg==} - '@zag-js/aria-hidden@1.40.0': - resolution: {integrity: sha512-lNWujEIlfGKwMQIcgfXuOZSsJD2avrgPsQHrXNVF9mkXygjLFcIRKz2pEexTSCqFh/HuUZJ6rG4pM/hJ/BiVCw==} + '@zag-js/aria-hidden@1.35.3': + resolution: {integrity: sha512-dk5POebn10WneQfLrEgbTzwolaXWpCSHL6F3jCTinW9IbOx7BXghzJD21iU5Iun+y9CorqJPW3p7LplYNUMO5Q==} - '@zag-js/async-list@1.40.0': - resolution: {integrity: sha512-hLGUTtwRFl6FIdYxSIYSeLQjJeG4isKpdmGCUvtWNnKr7ayf1yAkkSwX10SdBMWOCldbtvKCZXumKvP6dDwNvw==} + '@zag-js/async-list@1.35.3': + resolution: {integrity: sha512-SXX3wGzLK/maKS1PJ3XfLIGWbu0022f/OhcFsT1PbiHnoFZTH7h2fBhirrCBfy2TYFQ6r5uxgjkhPUNkuaeYnA==} - '@zag-js/auto-resize@1.40.0': - resolution: {integrity: sha512-eZC+AGKUip7UMu41/ApeT1wCIgn2fmo63FJeGAdMMD8E9M8M7QLsfISMIoieNNGBAYWhSyqELQ3jPgkUf6xReA==} + '@zag-js/auto-resize@1.35.3': + resolution: {integrity: sha512-ufG8HSqzLd9h5rnos8aumj8iORlRskeR/gbpJu1NHrnHBWIrpuXm6KJJR2oZhTFY1BUMMk8eYIBA2QkVuiJzWA==} - '@zag-js/avatar@1.40.0': - resolution: {integrity: sha512-DayZDsNXbipT+1GUkX29tVhO4hZonDnidwE3SjEQv9Ic9vCdnwP95+B0FPEuaca03F5ZXFqVXjnPmRVbRMyDYQ==} + '@zag-js/avatar@1.35.3': + resolution: {integrity: sha512-lbQ2Q4Va8AAScKULOHw2tCQez+0JRYGHSMFq6i+dJmeT3dlSgRanm69ra6K2po6hM9E4v6pRe+xOVE+9QMDnuA==} - '@zag-js/carousel@1.40.0': - resolution: {integrity: sha512-9svWc2jjvUP8iQ0afuu/ZAI75PuPLm4qB7h+10rmDrAgUPn7fwUBVzyATKubJPdtmaYQQvTTIiZU2B8mV88oGg==} + '@zag-js/carousel@1.35.3': + resolution: {integrity: sha512-F+b8HzUeZfB+xUkAkLG4r0Ubui8pj7pSgZhi26ZiWgsM7tsd7cD+xRMXkvPEITN5Fd5QCe3KlVBuE00w5byjmg==} - '@zag-js/cascade-select@1.40.0': - resolution: {integrity: sha512-0fkE0Fd2VQ4QsaWXHdgQxHWiaef3UWW0l6Jd47frtMNnrvg5t5Xfqowa7c2S23hcduOUfz2WC0xEuGXnO4UVDQ==} + '@zag-js/cascade-select@1.35.3': + resolution: {integrity: sha512-Nifdx77hEuAdXqr1wpZSPjLXqygRhq/WvnPjGhCeSqFPpy62uT4JZ3avyjUZ4I0UhvIpkleUcXtFwQ3cSMh4ww==} - '@zag-js/checkbox@1.40.0': - resolution: {integrity: sha512-oFCgnkOjrUDejB1wEp5s3cyJ+uFe/GoI3+wqNyckqOtcdKL1MBxy193GYVdj0LDfuCNrk8V0aIJGTdusCD2b4A==} + '@zag-js/checkbox@1.35.3': + resolution: {integrity: sha512-8XBt/Wg2zSQWqV2ZFqZBQUjYRkOYHA2O3IEi0VVYtds3S1n7Pu/HqkZT5qDw+E/SY2+X9Uyx4hO7h2XrlsiZQQ==} - '@zag-js/clipboard@1.40.0': - resolution: {integrity: sha512-QbFhJMwwUxTKcbWyb9ZrKgAp13U4+IzfHSLhPxbDVSQ15mIrjIkjW68gS6ElzhRDwGr1qawkZVApsqcToUqSaQ==} + '@zag-js/clipboard@1.35.3': + resolution: {integrity: sha512-obTwynBpp6c17fLHe5tg//FQ497QsyCEry+K3bTdlrivWW200wvfHxZ6RKVbKwDAwhH+ye0bI1xkYAId8j7sdA==} - '@zag-js/collapsible@1.40.0': - resolution: {integrity: sha512-xDLY4j9D3gdoTirkwzMaCtelfCjnMhBzPyY6c/mh4oPvD3RB6dr3V3kI80i3yxHaUUeDCIUm/XAxK0InPsRBug==} + '@zag-js/collapsible@1.35.3': + resolution: {integrity: sha512-IweG8JOBCerJwLO6QzTZGEMlsYUmQfQSeD0jniFguMM8vcunvGVSrM+AaL8pDbmXd+snXokaGyJpGO3vzMW6Fw==} - '@zag-js/collection@1.40.0': - resolution: {integrity: sha512-+3o1nvbcA9Kz2hDDFf8Kngpd+of33S4TS5Tb9KvrHlU5ieQdvEUtc7/pWG2aCTkGpmgda+j91akB6ZB8+oVkvA==} + '@zag-js/collection@1.35.3': + resolution: {integrity: sha512-BYoWJ4b7ma2PgiuQbRSnP603f2DlK6se5JtViUHTamZScLLLWnWHuQ6zFa1KS5kiIkbb7CFM6/bJ3WNYLch8Ig==} - '@zag-js/color-picker@1.40.0': - resolution: {integrity: sha512-lT93xd1BlNBbitl2RxST8ARYE6q/HZD5a0QhMIT1RbndB8F4e9j/NxkStgE9f0QqgpC/rO+nKHLoR+H1xs/EkA==} + '@zag-js/color-picker@1.35.3': + resolution: {integrity: sha512-i9roSgtqeA1b4Q+jWqnxjXB//BQXMP5m1FQ4YcZVq/0yT14A53JIknchuqrh3wC3yPsJMXFqCoKg+NET2+OVig==} - '@zag-js/color-utils@1.40.0': - resolution: {integrity: sha512-PZihcGheb5bn0/cEUwozjJjPoKkEwlJNpTA5mUxj/+sOElLaZM+zY2AnGYeMl6w5zIyZZUDoJMIT5rcb5sN87g==} + '@zag-js/color-utils@1.35.3': + resolution: {integrity: sha512-vxkEVgz4YdSbdaPvjiRI1VsJAdwzu/dUNvzqOaiVcPDrHr/FFgmUbv0SOFjnfSb2QWGI8EDEMn02RW9ym+BzGw==} - '@zag-js/combobox@1.40.0': - resolution: {integrity: sha512-5IVCDrB8m7XrKBu28j7bIRE5KiyKJLPDZB3AJ+PLJyL69D+9z1anhLDmkUYcPseyCasszLKzIejby+kYQJgHlA==} + '@zag-js/combobox@1.35.3': + resolution: {integrity: sha512-s1qmttTGJTMjlDakL+uvWSEggpafKr1vhOeZCh8j+N4eFt9bLAwaffjuh/1JzWBvzovw7WoMVkizdTXPlN8oYg==} - '@zag-js/core@1.40.0': - resolution: {integrity: sha512-0YcqCh7TmhSonkbKM/7NWolxlaQgvvXgqedocW9oeRYiDJIpBZyRqnHPoGAS2XwbBPkCnrqSosxSF5yBjhZpgw==} + '@zag-js/core@1.35.3': + resolution: {integrity: sha512-fGAHyqOYSEFmo52t7wI4dvbFfLyJmUlyf7wknsiUlzUHlrn3yv5PAZYZ2TibpOD1hwXIp4AoCjbiIPPZBxirZw==} - '@zag-js/date-input@1.40.0': - resolution: {integrity: sha512-/VU8g3dugggC5xW2OJW1KONWzPkEbK/yLA0lPxymW/Uo0ixh2mKJUVTOTqDFWf1b0vzLX2XlYoLL+I2ryUyPvA==} + '@zag-js/date-picker@1.35.3': + resolution: {integrity: sha512-4G10h6pzzLbd84SE2CKtqi6Z9wEBhSyx4GRSxxy3tsf5wAxnz4anRFat9CGwn2YVUYcUJpD+umYgBMPt6zGDnA==} peerDependencies: '@internationalized/date': '>=3.0.0' - '@zag-js/date-picker@1.40.0': - resolution: {integrity: sha512-Nm3aSKn/5tGOZk8rIddLyBk+oeE0zr/ZsJuuTc3rysd04owVy1UhmUh6X9CqfTJtwTDpUZe+orHaIvKlE3Rd0w==} + '@zag-js/date-utils@1.35.3': + resolution: {integrity: sha512-1co0FPpZ6nO5dN8sZtECkMYaf+3E5zu0KSIJZpZiXb4TgsZMDyHu7K7IsiKFHk9qmhuF6AdPpNxBju91pSXMFg==} peerDependencies: '@internationalized/date': '>=3.0.0' - '@zag-js/date-utils@1.40.0': - resolution: {integrity: sha512-nuB1QM3X7yY0k2JiZbHHm6wigY+Cl1QK6sRlh+C7mOyzEKnNEqNSVIqgSionCtWO6zAZh1R8Znp5ZeCdbbc27w==} - peerDependencies: - '@internationalized/date': '>=3.0.0' + '@zag-js/dialog@1.35.3': + resolution: {integrity: sha512-byosV+aBHH5LoFKnjEgC7WdqJid7bP9UhgWLSC7+IXbxrif9Czg1YVp6ZlQM6Nx6uD1vnty4touI3P7D7CTKcw==} - '@zag-js/dialog@1.40.0': - resolution: {integrity: sha512-1FHxR7/Kuu+9K2dxH7dKlSckCZ26n5ec79qWr0aMSSs2DF+ypQf5GUlaS6z2UqroZvIoJCvABVMm9OMko/qxlA==} + '@zag-js/dismissable@1.35.3': + resolution: {integrity: sha512-XPk+lqmsZp2Z1yMb5K1yj/e7Sobv4D7zK66B1GS97lk9Xzz8vuSgsimcLy0p7RXQl3KL6H5L69inSuQa2exybQ==} - '@zag-js/dismissable@1.40.0': - resolution: {integrity: sha512-bBkFvPg/zbYn31ZgEfx8not6s2Ekx7zU2sO8tGXb8rYPnHBfGDYEzVQansUStJn0Atzw+y7XR7B3G3u5AFQJKw==} + '@zag-js/dom-query@1.35.3': + resolution: {integrity: sha512-1RbFZoT4CjlHN9TUNse1++ZVOyKo45ktucTIT349o6HMsoWWKmTJDPvFkMBbmu/qY6XXn4dT+LJEp4bL3DR+Qw==} - '@zag-js/dom-query@1.40.0': - resolution: {integrity: sha512-4J3EO2gHpZ1VZiGLuMlH6G1Tsp4gKB8PPt2yKeNQWYGEXyrHUXrvMhRUzv7Z4/2I1s1tnxlFG4F8ovB3kTpz/Q==} + '@zag-js/drawer@1.35.3': + resolution: {integrity: sha512-DN5bwa7bDCDaUSbNzFxMc2U/WmbLcXvPSQjyOpKI6CC3VbW2kKaOnjJ5qQG+W5YBO0FpmJBtaxRV7lke4sZH2w==} - '@zag-js/drawer@1.40.0': - resolution: {integrity: sha512-N2OR5ZYuTsWkYYmwsNgmL+wfuM3qUxB8GAfo53AWvOh07QUVz1Dvh1WP4km5L6Tkz4UBQZACu8T/ZLyeZ+PdWg==} + '@zag-js/editable@1.35.3': + resolution: {integrity: sha512-HcjeacS61vQXfNT9IalZj/+oS45yW5bIDO2NjJWV7zNe5AG29NCceUnvBhy+hrUKPnKcjfDocdW5rCL+Lvs/CQ==} - '@zag-js/editable@1.40.0': - resolution: {integrity: sha512-X23wOg42BPvFWfJQi3yd8HiL8xtisrpL5ouFEzba56SQIxWZHDRpeWoqXqyLODq2/z2+SsZ0wV3laRD3ZH0C2g==} + '@zag-js/file-upload@1.35.3': + resolution: {integrity: sha512-oIYwnDct4ERo2mfmcxsBIJnlmpzjrzYx82SQsXWD3NGKx3cgdh2lwBX+ebItaLH1jkgzBa3z0TWxc6rfvcUXbw==} - '@zag-js/file-upload@1.40.0': - resolution: {integrity: sha512-hUZlJYjSGk7SAflTmQIjZv6M+icujaHS6I+dik2LM48rLWwNa/GYTNx+uY4zJLd9oW1eEj+6NcCYZpPWzKku4Q==} + '@zag-js/file-utils@1.35.3': + resolution: {integrity: sha512-Tb05RCzx4swc156hd4jLiO7z+Gxg/HQ+JCds03jgTbrFJAz2D56YaMeI7gSDc1m4Xre3nyqQpSo9AeX5nzbE/w==} - '@zag-js/file-utils@1.40.0': - resolution: {integrity: sha512-BGny4rafiBQ5TPCBXfzbH7lSyFdnoix7brq/+FllKpDqpWPQz0tIsgSZueF/Z8GPTrAkwMKOFI99P7OVhAhRig==} + '@zag-js/floating-panel@1.35.3': + resolution: {integrity: sha512-nTZypcS0X46Oo1kpCQTnP5UlzjhypOAj3B4dq2z/3bAOC0TntYTnFkj8PbEJtExk7364xfMyxfgZOiv7Aqq01w==} - '@zag-js/floating-panel@1.40.0': - resolution: {integrity: sha512-e2QXwapCbjLJnU+MAz06CoByj4XJ3sdSBgWF+PSe2X2T8dd/FkZUnaDPaX0yyfyTWKzBbyRRNyon2LMAs8ndHw==} + '@zag-js/focus-trap@1.35.3': + resolution: {integrity: sha512-evErLlGFdDVCI8xipNS5k0rAvO+KFRA9g273bbfWAL1+mT54mcB/XHa85nC3QpPgMNrSh+6LUNq9fapyOGoyYg==} - '@zag-js/focus-trap@1.40.0': - resolution: {integrity: sha512-Q6W+DU7pix5rtRwoDnYzTYMkUV2kMWrFV0/EdNN3spFSvnUSkDWRmcNpzf+56AuCNeqsAZxaLJpsHLZkcT2xrw==} + '@zag-js/focus-visible@1.35.3': + resolution: {integrity: sha512-g4F8PRGIoFoKBrHiQ1HQh5AjCS7brFRXHvpbDNb9+T11FGlF5Turb+6OVRoNV8MmiuqMltO2I28l36YsGc//uQ==} - '@zag-js/focus-visible@1.40.0': - resolution: {integrity: sha512-63byl/kLVzDYlnHFma4HKEKrqB1Vx2zg0sBmUSENPyh+Ia1xhEVVC5vu6GX7nu4t/8QRy3Jn0q7T5og81FGb1A==} + '@zag-js/highlight-word@1.35.3': + resolution: {integrity: sha512-K+mvEBbf3SUFjQeMeJQYb3cjri3x6sPaPhcKWayalelSLB/StWEGqcpmz+a6uUYrCUAK5kEi3Hn0YLGfn0GOig==} - '@zag-js/highlight-word@1.40.0': - resolution: {integrity: sha512-+aeVn3S5NPG6Tk4Sanl0VZk/0atjnF7Xy7POPs1HD5SBui29/6i3vn3bUBNXJXrnhUoNrUhuySVYVhgkffcQ7w==} + '@zag-js/hover-card@1.35.3': + resolution: {integrity: sha512-xVoKOtvrnzhYzciZ1csgiV76IQ4DRtx1lsJeFSrfg5MH0kYWeC/pcmm3yCd2+Qh/45J7DbSXeZneqxpyiF5Vvw==} - '@zag-js/hover-card@1.40.0': - resolution: {integrity: sha512-lkuLaikPLBIOnR0X75kSXdDYgv3ritAsn4TF1eGs12iYnZVX4PTL3J39tVNm9QrEXZ+iKcA1D2cUXNhEteCTyA==} + '@zag-js/i18n-utils@1.35.3': + resolution: {integrity: sha512-k7UcNxbnC2jvGwCoHYAkFD3ZaRSMQNVHfuy8TujZQ+ci3IJovwgWLveZoRfFbXHkTLfhmbpE2tFXBdpwOVZutg==} - '@zag-js/i18n-utils@1.40.0': - resolution: {integrity: sha512-8D3ki9V81gMKZvtRfNVoHCBDVYjr+WJLBvdfSv3cdOsVM2/E8//xAfYbYzl5Fdmeny3H71fxBNqOX05GN4K6OA==} + '@zag-js/image-cropper@1.35.3': + resolution: {integrity: sha512-1PH6bg8JAQESHzNqjka2TJ0QGNBGBAO6rb7AZ+9CaCCLw0pIzbUJhqPMkwd9GhdWGKGP+e7wFitnjcT4W5Js8g==} - '@zag-js/image-cropper@1.40.0': - resolution: {integrity: sha512-bpTCaiUXM0Mh6ddoJ1fA1B/YXp5Fc8LA0hg8CuEByDwGRVKPJ0KotL6QXMF6cEJZ1fcHF3Lcmpbj5Xotfkr4mA==} + '@zag-js/interact-outside@1.35.3': + resolution: {integrity: sha512-tOcuo/IztzpU7UKXtjVrLZtXzzcbhP4n2WynKwDRkTkq3mRCp61xXJp1csIBycI3JHm/CMeAEcPdRIioxIT/Zw==} - '@zag-js/interact-outside@1.40.0': - resolution: {integrity: sha512-Fws+O4uD9vS0I5KVcf3U2tNjLKvqlv+RExFbTywckDLOCJ145M/pMQWTr1FHil04jk5PFyM1iGfsbom8tozHpQ==} + '@zag-js/json-tree-utils@1.35.3': + resolution: {integrity: sha512-nOv2dPJf+1mxsobYiSlYt96hR1MK7iHKG1iDLoO5wLggS6GQA3ix1BerHJK0zdehoEZ71R45el5ghCG1HB9VzQ==} - '@zag-js/json-tree-utils@1.40.0': - resolution: {integrity: sha512-7zEzU59Gz76nV7n3l70uMB5yAOOQMmt1PTAni6S97uw7/6KzPktsEWBcw7ocC4IIA42PKdT7akpq721H0vthbA==} + '@zag-js/listbox@1.35.3': + resolution: {integrity: sha512-FE6FOuBr6aWtOb8U8oDvAvcUzD6JKLXAe8WngiLFG+b2yyW4nlaz2AcKRG1bjjB066UMxMo9/+2p4D0Kf5Id1Q==} - '@zag-js/listbox@1.40.0': - resolution: {integrity: sha512-zB33y+dk6/e0ZTs3wun2KsuPaH/wygOuD8scnH2a2Y/W9a2P1rq503Kgm5d5kVXBKQLxOBwievWJ8Blajv8LnA==} + '@zag-js/live-region@1.35.3': + resolution: {integrity: sha512-64rWcfggYpyr2Fn4pdrB/lljMgm3quwn9is+vdDN85Vv3WShKWoz08T4njidm0hwcIbzas0bRqQYWDLLsAoSJQ==} - '@zag-js/live-region@1.40.0': - resolution: {integrity: sha512-i1Dx02KGcQOAZGNhkFe8kz26gYJcn7KsT/M1UovjS9RTbl9diY8ShiyfIAhqruoaHQyqsHMRh/f7Idu45HdiDA==} + '@zag-js/marquee@1.35.3': + resolution: {integrity: sha512-bKZVpmAJWPDORP7WOWnS+65W5ZQBQmRs8zvV33ZfCpFbkXjhRiqKSzIj223/VOc2NEDjyWagz2vioAxrFYVzww==} - '@zag-js/marquee@1.40.0': - resolution: {integrity: sha512-XfvAwSNYXV3fEIRc44a9sAsoJoLKt+CWbpSPgQBpiFPpWh0rZ8frUZCslevTzBB3ifIWoSg+svDHQOGsDa8wGA==} + '@zag-js/menu@1.35.3': + resolution: {integrity: sha512-KyY0EZXkIU57Mjt+Lg+pupiePk3LcnQcB3Gl05Vva61bNjBjdKV71qwCQru/OxPZEwYgPo46L7TDIb56kfK/VQ==} - '@zag-js/menu@1.40.0': - resolution: {integrity: sha512-FRBqwsOjxBi0eSwqwrOw2td1rd0Xxl0f41J2lGc8E7z+2PabbBcJ/poqSiEn8YoaCT4mAWNjt4QQU/Pe1bRJ/g==} + '@zag-js/navigation-menu@1.35.3': + resolution: {integrity: sha512-8cCHx0X/KjEpr2BaMOxJS5LiA6fs/CNqVTF/sTTgZAv7Dm+MH0yNuKm4kpPvcLaVeBpVE09bnyCHrNKzZes+Fw==} - '@zag-js/navigation-menu@1.40.0': - resolution: {integrity: sha512-aJkEGYH8P9NfsQOjxMzxuF4YrrV2N1GQj6Y5Ow19MKuLh42o35bUhwoGsYjFbxgEcImabINtZJqtAPAkOdJXmQ==} + '@zag-js/number-input@1.35.3': + resolution: {integrity: sha512-uqawVybAcLcefVEHMVONuAA5kDSDPP5TsROr5PnAyFlhM1iD85+r3KAfCueoDX5w2X4ibbu9o2tdV6zTFKD/nQ==} - '@zag-js/number-input@1.40.0': - resolution: {integrity: sha512-WffdeqSOpsKmgPzBkNZl9nAolQPlyl9dIabaPguGgXdYtZW/OGCGj8jCYqyEu4VL3kDPPVVQRWEqC/XzwzVCRg==} + '@zag-js/pagination@1.35.3': + resolution: {integrity: sha512-fKm4s5KAd12RiCI/EDmmGKjPQ+i2qS/UsJPdMe65yb/4mY5OibwV2zyHcVeFsOD4gBZpnU6kYlDAGSttmLWLlQ==} - '@zag-js/pagination@1.40.0': - resolution: {integrity: sha512-Ykotky0A/7rswb6BfOD9aXL1EssKwUYfBRbdWGe52uhVc7dGagMSTUDRVeNhVsP/MEdtwqys7urvDbAlEqq+GA==} + '@zag-js/password-input@1.35.3': + resolution: {integrity: sha512-etd0gm6ELAm3y+cFhPU+TYm8khm9cL5Mg5m2DcZxu1Mqpj7JY0LsXZ8SFOdCZgTIHuMEhKBiYfnuyMAd4CJztA==} - '@zag-js/password-input@1.40.0': - resolution: {integrity: sha512-mD4tbA4m82oV+0NbJ+P00Q4Gwz+zf1kZEZ3Z48ohICfK/WO1KhCgviY7vu/7bCMnRiD3dbi+nEeym8Kb29wRHw==} + '@zag-js/pin-input@1.35.3': + resolution: {integrity: sha512-ZFt+WIHMdVlSg29BrQLFq5ijabiUO3tXMhoKhjjzTSe/tLqfNeu3UxFB6y/FYpn8+Cvn6xwvhu3lgnORYmI0zQ==} - '@zag-js/pin-input@1.40.0': - resolution: {integrity: sha512-iJIXDJC+9DUx+A3sRdTmHV7vPZXCw9O6le3R0lKf/8kQOgj7FKjbVw2SkUMAoOZ0u5J7Zwg2oZc7ddt1pwUk9w==} + '@zag-js/popover@1.35.3': + resolution: {integrity: sha512-+MIEENPsbKPxzoNuDI/C5d5ZN9uxnfZ+MBDc5C5XSgjjg9FcvMXClNq7IFM1aZi24peRXg9cMNf//lApVRT37w==} - '@zag-js/popover@1.40.0': - resolution: {integrity: sha512-bjvOep1YNlsvIYGh/rPsFCHjH2cCt2aKsVLyRvzTT1jhGZJvBdQKQBJjSuG5Nh4y1PUqtrrz69ZMWRrJGQ3rNg==} + '@zag-js/popper@1.35.3': + resolution: {integrity: sha512-gpB7Xn9WtlfrUsIVbSgNQGDwgNOL/cSGt0Id3wEQKArmqVC704EWtPvXzOMMybBEdm8YW2hQrXuo+o66abI1Sg==} - '@zag-js/popper@1.40.0': - resolution: {integrity: sha512-rCkgqgwlpgMwcnuSVrZK2xXl1Mvptpuw3cZy6rC2C5F3yE1GmWohdts5VkeQNro+sd/xHTdVovOqY6cU9Htj1w==} + '@zag-js/presence@1.35.3': + resolution: {integrity: sha512-ev5E7+U9IZAGvEaflpdVLHaZl8ZaQMhGB3ypd0yKhPwXeM51obV8w3+5HjzTqHPl8TKuoHWL31YaiUBd5EuS6w==} - '@zag-js/presence@1.40.0': - resolution: {integrity: sha512-P0bAuzEIDuMglE1xfmW5xTuSBlWjNZ8nOGXoIksKOKb+b+jy2Vys6WjZjKipV/jop4u85wfzKchcPc3C+cXuog==} + '@zag-js/progress@1.35.3': + resolution: {integrity: sha512-u0GxQN1AfXMAgzYOUMxKQA12DyuAP0svh2S//KvOorTSv7d5hAa8nZXi2cEv5abYsyfKJ6/bc1Z56byzW1jVZw==} - '@zag-js/progress@1.40.0': - resolution: {integrity: sha512-V61a5CHEs8suevQVS+/1ENj1RDVYNOUUTawK6uriCA6Ol59xe30DmF+eV6Y9miM7L/pN3YjZRq9uEDJMXXK32g==} + '@zag-js/qr-code@1.35.3': + resolution: {integrity: sha512-t0Ehwogr49vTNtWyNdQU2tYex7uJyfAn7N/5LgD7FXw8aa+RBMWZWlqjCUvHqJ929tVMrn+LIrQnZCcwNunalA==} - '@zag-js/qr-code@1.40.0': - resolution: {integrity: sha512-xD37tVrQ46CeqVLqkSm61kURoJ4Z/uOFcB8z7Hu3UX+1OFTfkhgrns6iLUneoRjO3hsqQaTaVkxVOQeLYWb+wA==} + '@zag-js/radio-group@1.35.3': + resolution: {integrity: sha512-kOzocjqWk3dXuRfyfsHwfw63Z99NHbc7rvVUutSsfXANXi+DFYZHuqdPUwMt+29LfaL15XTOfuGV+yUXDCgQHQ==} - '@zag-js/radio-group@1.40.0': - resolution: {integrity: sha512-sFJCdyOKzQC9hylSP19R71yv44by/C78D9EHfsxQJtvOgDv9E+h13NNX4n9wWyubC20xftlxkja8sNT5NfJKUw==} + '@zag-js/rating-group@1.35.3': + resolution: {integrity: sha512-BmhJZdbaTnd3nFWMY+nR+HF952UhWXfaXXxiBWptSLMBfAYImQTWBMrLgTHCSnVfmFATj4Gb7xQe79FQU8T5fA==} - '@zag-js/rating-group@1.40.0': - resolution: {integrity: sha512-UMBI3xAMcm7otpAczMGPEA7jC1hvV8NhnZ4mN3oftJB0bc1winoXxJdCkrXN58TTNWrGNSRzjtm048G+HPCdpw==} - - '@zag-js/react@1.40.0': - resolution: {integrity: sha512-2TFS1HYABYGc0lurC+4WEXvKkpxsVv6vKm+t8QAL7wfoeZnw6HDQWLc91kINp89vln+A2kwCfYqIq8HSm+9EeA==} + '@zag-js/react@1.35.3': + resolution: {integrity: sha512-x2PxYUCQ6OgOpUdmSkG5tbL9JWVqYRh42r4V2UeAdMh0MRwjAJtxjvAy50DZ8Sfia5o4UGdZMXJyDY2O7Pdhyw==} peerDependencies: react: '>=18.0.0' react-dom: '>=18.0.0' - '@zag-js/rect-utils@1.40.0': - resolution: {integrity: sha512-ikgLuE4rLlACm4mGLp6Ga8sJA44uFwohA1nVmb95sQ+VIyx2naf91CEF7SMrZVEwFKHaHpxdKVQSZLRjJqO/dw==} + '@zag-js/rect-utils@1.35.3': + resolution: {integrity: sha512-mt/oD3RXdyaX6ZPSd8BO13vvPBJ7QpVWieubE3O0WM3OPhU7ykDMRp/tR7cYMQrzUm04GlY9pbkmSSw2uABxlA==} - '@zag-js/remove-scroll@1.40.0': - resolution: {integrity: sha512-f6EgODnJMRtkbgdJCgyllND8jui+RtPrCZy6JYhhOg7KQ+bFfV36KzWQMty38ZdOyrh23UUO7MJ3WGcFXPvk3g==} + '@zag-js/remove-scroll@1.35.3': + resolution: {integrity: sha512-e59z9SbEpPiw0qwNQa2cB5/h30ZCLREaHsCw1TKTANFhwg7v85k9Lq1H/G/49li1CAjmiaOU9BNGlDvbzpNETQ==} - '@zag-js/scroll-area@1.40.0': - resolution: {integrity: sha512-7EtWETRIn8dY7xqAeMOlnEuzhOrtc65mN/0YvT3XYcBz/CzmHzyZTmos3UXBJGnKHSGj61aEpP9g3RK+x/w63A==} + '@zag-js/scroll-area@1.35.3': + resolution: {integrity: sha512-IQwdUws/AckRIHK1z/wHdHurnOeGd8h8Dmspfh3VT7NkwTnxeJ4SW9di9smuD+d25eXkJRuX5zGEDHAyx2IaPQ==} - '@zag-js/scroll-snap@1.40.0': - resolution: {integrity: sha512-XtjeOd+pwGX0+K7NvsQncrKwV8CTSzHfVVJrdQ+MweiWBpGNeAh43ySN4L+KSTgtnUiZbuwBIxlKK0tX+WupgQ==} + '@zag-js/scroll-snap@1.35.3': + resolution: {integrity: sha512-NVa2yRm2DQnF6hTV9k7Xz7l8YCZBagZTiqSwNvWKUulKD1csjt2fpBxvUt2cK+1iQnLOey2ydhs7MMsAnXPbJA==} - '@zag-js/select@1.40.0': - resolution: {integrity: sha512-auMI9SvocVvKHNWF2DobyQN6+1k3OO6UsQTdkofvbHxX7maosy8ZXA6k1r9Ndt4qLUu7CbdAAQ+qJ4VkgJyvxA==} + '@zag-js/select@1.35.3': + resolution: {integrity: sha512-ztszGHWvlbBDE0YT5LYPH+sMd6VH1ct5pH/M9VSzIUO6C5PARkW0NwSVQ1rCQJMj4sfvSE1gC1/r7urRzqEcUQ==} - '@zag-js/signature-pad@1.40.0': - resolution: {integrity: sha512-L0LTxcpdckaGdDDXcQCr4AG+J9xUHH+lsenH7NG4ZI7rSr4nRmHMdDH0GR7nBa6MMdPIIimjWIE/TwZ1OuHzCQ==} + '@zag-js/signature-pad@1.35.3': + resolution: {integrity: sha512-jvtxxzAQ8fre11zWUh6HflG4Ycr5z83Wba4pONRJbUE/vNgkJQ7yJgfyUl1QTlkn8Arfg2Zwoxu9GIq80HLZWg==} - '@zag-js/slider@1.40.0': - resolution: {integrity: sha512-xZGycm+ghGFG3kTYq8g0t1Av1moxg45WiFz5E3bRgP7YU9beSTaFZI8h6f65NiC5P3YuwA0RoYxA46GH22qoZg==} + '@zag-js/slider@1.35.3': + resolution: {integrity: sha512-Th142JO4Fqla5AWhGrTW6CQicwvTw87PdVpur/WotQ7brlZIww5HipzEMh5eQJSWfwpKD4PI2bYK9V/ZE/mpXA==} - '@zag-js/splitter@1.40.0': - resolution: {integrity: sha512-64KNKwlIjyUIjp7i/whDCpREiSFrNI/cF7MpBJvBGRPUWq8NpNxMGKWD+vBCV+JC61QF9xg/NgNoigFycS9sYw==} + '@zag-js/splitter@1.35.3': + resolution: {integrity: sha512-IsIbRwzjr5amGANEDsZDSToaSn8wHUWvS2l0XHmf3BiiguVApaZgQTlfqthVQC9hBHMOaGIXIW1CFUOrQYkvUQ==} - '@zag-js/steps@1.40.0': - resolution: {integrity: sha512-5sVFzcIYubCn1nJSQIx9WWNlJuFoOJMpkD/ZMwNp0LzpnmnspsCOmdnQUWEftMQ1KdwZ+qNgfo/+kHclb9cBjg==} + '@zag-js/steps@1.35.3': + resolution: {integrity: sha512-TYIrqV+v9/ULhvrTRBtQFFvJQPPTWOmjFXxlIxDwozek5R4dCIyeUYt1/ChJEc2mNETocbfDVSTxRO1dwCFpwQ==} - '@zag-js/store@1.40.0': - resolution: {integrity: sha512-EmgYIdbNZ4TN4Qht/jugY4UVkaWx69l8P1qiX23U4YwqNLq10tyOJmcXWbvsrprU1dGb24B+xq0WBm/RIjw4WA==} + '@zag-js/store@1.35.3': + resolution: {integrity: sha512-7kEV4T/20DU36UIfVMzuDlLhWSSEy/vabmpiB700tcdD9BBBODTiSg3ZeljW17dQbvE545vZOFEjVf/cQ5LVGA==} - '@zag-js/switch@1.40.0': - resolution: {integrity: sha512-hUH3AF79ndSFZxt7Plw7mVZV0QlM0kFqKwrAGBEOE77P3rKpOsMJ3wWgMb3w6nwlxGQsbwmMgAFvYUslLpM4Lg==} + '@zag-js/switch@1.35.3': + resolution: {integrity: sha512-EP/2cJ46sd+6C5x5+89jn/9NOpM05CRESYB4RMhOnTe/WFtcS4IpiYtVHFhikdXkvJoibm67O2EHep2Pm/Xj4w==} - '@zag-js/tabs@1.40.0': - resolution: {integrity: sha512-xqfPC2nQ6Bn4nqy1L+1CVcQcg/Z7K2q753OvsX2C8Wtu+7tF//HyMbOpF6fGikqlLkUzCkvjkqDjdOXcfWN9ZQ==} + '@zag-js/tabs@1.35.3': + resolution: {integrity: sha512-lZKlDmxE25miCikj9QZCCnL02SVV2K14KZy5bn7+XDgrWlfSNTpNTj8r5E3zGlSgio5pkTGou57ASqS7WaPDWg==} - '@zag-js/tags-input@1.40.0': - resolution: {integrity: sha512-3cB7nPlUvzZNZwQw5AaTuxwcRn1n2qkDCjLEb2NEwtmI+YxHbK3k1MtXjTccjcYjU8cAkv+jaeyZPs6KFKQcHA==} + '@zag-js/tags-input@1.35.3': + resolution: {integrity: sha512-HqyoQ3DZFhByOGnDShFfxi6u0bIf7aSVTlwmAvcL+b2ZhyU6/wIMGc4WJE7BMx1NYWM/jNLHedvGExAI8R0kXQ==} - '@zag-js/timer@1.40.0': - resolution: {integrity: sha512-Rvet226fhUtZnItjHpUYV7MH0uEFZfXT9PSRrX5jdiU4/P0eWKbirwi//AVeqcWFexXvw6ajYSfQN7EVyr2x4w==} + '@zag-js/timer@1.35.3': + resolution: {integrity: sha512-edmgitbRgsq+msxvVB4wc17Q5d5k63zMWaLJnWjUdDGAgEtM6/HNxwGb3riv46S2U3RgYxaaHTNZ/M7EE5mvYw==} - '@zag-js/toast@1.40.0': - resolution: {integrity: sha512-EDH43zdiH4Bz30cE6YI9g//qXGOOfWObM3dFLG8I0q/cJRf7/6jO82rwZAHPwfOSfKhUDxStirD8F6eoY6BWXA==} + '@zag-js/toast@1.35.3': + resolution: {integrity: sha512-whlR791GHdnMD21nNPsl2Dbql8+qu1wBZl75QzwYrjR8FlKjp8bhr3gXKzQEddcBXe9GPEFGvUs4iCyXsuTbpg==} - '@zag-js/toggle-group@1.40.0': - resolution: {integrity: sha512-+JKcnfEbdQnr5p7uRvYLdivhUsM6iio71UC10tK74nXYRnYm0/Uvxg3oQzvbNTq9WdcU/DIh3gZVZ2Vex9nBnQ==} + '@zag-js/toggle-group@1.35.3': + resolution: {integrity: sha512-Gn6JHzkQ4tlttjZcE0ZjIdxYkFeVp9VHrcMVizjJTkGZRmQ+kPZ5G/wOsZhIrvLX3Dw6Y0NkuBcP+jDHz/o3TA==} - '@zag-js/toggle@1.40.0': - resolution: {integrity: sha512-DW7682lzTP2eDlMvrS7tUX3zAm7ufrrKr7VDiX8BB6oXBRETXrVIxCYNuoIdqjwXebdjAoxaCiUZEreRVucYQg==} + '@zag-js/toggle@1.35.3': + resolution: {integrity: sha512-aFfHKuR4sKzglhkmWLA+0RTNPs9dfeqwtc96qljawGYfAYWJXkEPYK9dFfVa+arZ7L84xBi24QSLiTg7LGSFLw==} - '@zag-js/tooltip@1.40.0': - resolution: {integrity: sha512-pyrvit+nB8dIwVNTGBRlHPsh7yMJGAxxM1zfY7HOTJqF+n6+6xYTQ4gQ/Ocy1Q7I5kO88+m16naEh0qLFiTZww==} + '@zag-js/tooltip@1.35.3': + resolution: {integrity: sha512-/pImDGYl79MfLdvEphj3rSvNdj2tLW4GwGEncgdLM/GKwQiEUjfi/9EJOfLYP23M4lOOnoW7orehJ9xeaXOAkA==} - '@zag-js/tour@1.40.0': - resolution: {integrity: sha512-VczYGFQM9xsSbfy5N0NP91GdKxbYvfPCDAguD+WQSs1umEIgAAozSKPUdV3NNCX5Pq6B1F3dBxi6gYPdNqrAHg==} + '@zag-js/tour@1.35.3': + resolution: {integrity: sha512-DI2aCXmZaE9KcPZDs9itc2BO7ixLApJ/yVRfM69pXwVOrucdSeDDNPFkfbhj5XwB+9VjjZEkqWFHKntRIyPl5g==} - '@zag-js/tree-view@1.40.0': - resolution: {integrity: sha512-v/20ekjbM+HXDEkpHAz6k8WpoZRmZmdCApDIkIgXVHPRQk+kwAiiIPY20ZDG+DjRu7Lh0MUdQavdZtGj6Ihwkw==} + '@zag-js/tree-view@1.35.3': + resolution: {integrity: sha512-DbHaLxSNa1goE3o3IsXxEdzp8P5dvmkk1rVWgNUUIhpA+44idEjSSNXJkHPl18Mk5blqSMVjK1EX91oqai01Vw==} - '@zag-js/types@1.40.0': - resolution: {integrity: sha512-LVvxEyqFv/u9SEe5xdivvG2vYb9cCmbkD+5r6s+IGljpDLaRgv4BYyxEh40ri1ai070tL08ZKmoLfx2/xfvY/A==} + '@zag-js/types@1.35.3': + resolution: {integrity: sha512-Fnm3AMs1lfb55hlkip/eJeWHOjFB3gSi1JkZlkkdltG2l7y/zsHkumPSe6jIKy+DRRIFKRCyXVTatbPN27bO3w==} - '@zag-js/utils@1.40.0': - resolution: {integrity: sha512-XUpqDtXfHe7CySjOhLPLj9H8rxbiFUJAGgmBzNdpsGPP4wx12cpOXrpSjRXZ2kMwooMPz/P7RPDBteto8sqhAQ==} + '@zag-js/utils@1.35.3': + resolution: {integrity: sha512-LHcC+9y6TFhDsIz9I3koYxONl2JFfx5yQDzc6ZEQO2cqzXedRcN0R9IPqNGCX7JuhGt14ctDkVCm1JWGP2J6Wg==} acorn-jsx@5.3.2: resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==} @@ -4386,75 +4381,73 @@ snapshots: '@types/json-schema': 7.0.15 js-yaml: 4.1.1 - '@ark-ui/react@5.36.2(react-dom@19.2.5(react@19.2.5))(react@19.2.5)': - dependencies: - '@internationalized/date': 3.12.0 - '@zag-js/accordion': 1.40.0 - '@zag-js/anatomy': 1.40.0 - '@zag-js/angle-slider': 1.40.0 - '@zag-js/async-list': 1.40.0 - '@zag-js/auto-resize': 1.40.0 - '@zag-js/avatar': 1.40.0 - '@zag-js/carousel': 1.40.0 - '@zag-js/cascade-select': 1.40.0 - '@zag-js/checkbox': 1.40.0 - '@zag-js/clipboard': 1.40.0 - '@zag-js/collapsible': 1.40.0 - '@zag-js/collection': 1.40.0 - '@zag-js/color-picker': 1.40.0 - '@zag-js/color-utils': 1.40.0 - '@zag-js/combobox': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/date-input': 1.40.0(@internationalized/date@3.12.0) - '@zag-js/date-picker': 1.40.0(@internationalized/date@3.12.0) - '@zag-js/date-utils': 1.40.0(@internationalized/date@3.12.0) - '@zag-js/dialog': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/drawer': 1.40.0 - '@zag-js/editable': 1.40.0 - '@zag-js/file-upload': 1.40.0 - '@zag-js/file-utils': 1.40.0 - '@zag-js/floating-panel': 1.40.0 - '@zag-js/focus-trap': 1.40.0 - '@zag-js/focus-visible': 1.40.0 - '@zag-js/highlight-word': 1.40.0 - '@zag-js/hover-card': 1.40.0 - '@zag-js/i18n-utils': 1.40.0 - '@zag-js/image-cropper': 1.40.0 - '@zag-js/json-tree-utils': 1.40.0 - '@zag-js/listbox': 1.40.0 - '@zag-js/marquee': 1.40.0 - '@zag-js/menu': 1.40.0 - '@zag-js/navigation-menu': 1.40.0 - '@zag-js/number-input': 1.40.0 - '@zag-js/pagination': 1.40.0 - '@zag-js/password-input': 1.40.0 - '@zag-js/pin-input': 1.40.0 - '@zag-js/popover': 1.40.0 - '@zag-js/presence': 1.40.0 - '@zag-js/progress': 1.40.0 - '@zag-js/qr-code': 1.40.0 - '@zag-js/radio-group': 1.40.0 - '@zag-js/rating-group': 1.40.0 - '@zag-js/react': 1.40.0(react-dom@19.2.5(react@19.2.5))(react@19.2.5) - '@zag-js/scroll-area': 1.40.0 - '@zag-js/select': 1.40.0 - '@zag-js/signature-pad': 1.40.0 - '@zag-js/slider': 1.40.0 - '@zag-js/splitter': 1.40.0 - '@zag-js/steps': 1.40.0 - '@zag-js/switch': 1.40.0 - '@zag-js/tabs': 1.40.0 - '@zag-js/tags-input': 1.40.0 - '@zag-js/timer': 1.40.0 - '@zag-js/toast': 1.40.0 - '@zag-js/toggle': 1.40.0 - '@zag-js/toggle-group': 1.40.0 - '@zag-js/tooltip': 1.40.0 - '@zag-js/tour': 1.40.0 - '@zag-js/tree-view': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@ark-ui/react@5.34.1(react-dom@19.2.5(react@19.2.5))(react@19.2.5)': + dependencies: + '@internationalized/date': 3.11.0 + '@zag-js/accordion': 1.35.3 + '@zag-js/anatomy': 1.35.3 + '@zag-js/angle-slider': 1.35.3 + '@zag-js/async-list': 1.35.3 + '@zag-js/auto-resize': 1.35.3 + '@zag-js/avatar': 1.35.3 + '@zag-js/carousel': 1.35.3 + '@zag-js/cascade-select': 1.35.3 + '@zag-js/checkbox': 1.35.3 + '@zag-js/clipboard': 1.35.3 + '@zag-js/collapsible': 1.35.3 + '@zag-js/collection': 1.35.3 + '@zag-js/color-picker': 1.35.3 + '@zag-js/color-utils': 1.35.3 + '@zag-js/combobox': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/date-picker': 1.35.3(@internationalized/date@3.11.0) + '@zag-js/date-utils': 1.35.3(@internationalized/date@3.11.0) + '@zag-js/dialog': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/drawer': 1.35.3 + '@zag-js/editable': 1.35.3 + '@zag-js/file-upload': 1.35.3 + '@zag-js/file-utils': 1.35.3 + '@zag-js/floating-panel': 1.35.3 + '@zag-js/focus-trap': 1.35.3 + '@zag-js/highlight-word': 1.35.3 + '@zag-js/hover-card': 1.35.3 + '@zag-js/i18n-utils': 1.35.3 + '@zag-js/image-cropper': 1.35.3 + '@zag-js/json-tree-utils': 1.35.3 + '@zag-js/listbox': 1.35.3 + '@zag-js/marquee': 1.35.3 + '@zag-js/menu': 1.35.3 + '@zag-js/navigation-menu': 1.35.3 + '@zag-js/number-input': 1.35.3 + '@zag-js/pagination': 1.35.3 + '@zag-js/password-input': 1.35.3 + '@zag-js/pin-input': 1.35.3 + '@zag-js/popover': 1.35.3 + '@zag-js/presence': 1.35.3 + '@zag-js/progress': 1.35.3 + '@zag-js/qr-code': 1.35.3 + '@zag-js/radio-group': 1.35.3 + '@zag-js/rating-group': 1.35.3 + '@zag-js/react': 1.35.3(react-dom@19.2.5(react@19.2.5))(react@19.2.5) + '@zag-js/scroll-area': 1.35.3 + '@zag-js/select': 1.35.3 + '@zag-js/signature-pad': 1.35.3 + '@zag-js/slider': 1.35.3 + '@zag-js/splitter': 1.35.3 + '@zag-js/steps': 1.35.3 + '@zag-js/switch': 1.35.3 + '@zag-js/tabs': 1.35.3 + '@zag-js/tags-input': 1.35.3 + '@zag-js/timer': 1.35.3 + '@zag-js/toast': 1.35.3 + '@zag-js/toggle': 1.35.3 + '@zag-js/toggle-group': 1.35.3 + '@zag-js/tooltip': 1.35.3 + '@zag-js/tour': 1.35.3 + '@zag-js/tree-view': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 react: 19.2.5 react-dom: 19.2.5(react@19.2.5) @@ -4650,15 +4643,15 @@ snapshots: '@chakra-ui/anatomy@2.3.4': {} - '@chakra-ui/react@3.35.0(@emotion/react@11.14.0(@types/react@19.2.14)(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5)': + '@chakra-ui/react@3.34.0(@emotion/react@11.14.0(@types/react@19.2.14)(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5)': dependencies: - '@ark-ui/react': 5.36.2(react-dom@19.2.5(react@19.2.5))(react@19.2.5) + '@ark-ui/react': 5.34.1(react-dom@19.2.5(react@19.2.5))(react@19.2.5) '@emotion/is-prop-valid': 1.4.0 '@emotion/react': 11.14.0(@types/react@19.2.14)(react@19.2.5) '@emotion/serialize': 1.3.3 '@emotion/use-insertion-effect-with-fallbacks': 1.2.0(react@19.2.5) '@emotion/utils': 1.4.2 - '@pandacss/is-valid-prop': 1.11.0 + '@pandacss/is-valid-prop': 1.9.0 csstype: 3.2.3 react: 19.2.5 react-dom: 19.2.5(react@19.2.5) @@ -4949,13 +4942,13 @@ snapshots: optionalDependencies: '@types/node': 24.10.3 - '@internationalized/date@3.12.0': + '@internationalized/date@3.11.0': dependencies: - '@swc/helpers': 0.5.21 + '@swc/helpers': 0.5.19 '@internationalized/number@3.6.5': dependencies: - '@swc/helpers': 0.5.21 + '@swc/helpers': 0.5.19 '@isaacs/cliui@9.0.0': {} @@ -5047,7 +5040,7 @@ snapshots: '@oxc-project/types@0.124.0': {} - '@pandacss/is-valid-prop@1.11.0': {} + '@pandacss/is-valid-prop@1.9.0': {} '@pkgr/core@0.2.9': {} @@ -5163,7 +5156,7 @@ snapshots: '@swc/core-win32-x64-msvc@1.15.18': optional: true - '@swc/core@1.15.18(@swc/helpers@0.5.21)': + '@swc/core@1.15.18(@swc/helpers@0.5.19)': dependencies: '@swc/counter': 0.1.3 '@swc/types': 0.1.25 @@ -5178,11 +5171,11 @@ snapshots: '@swc/core-win32-arm64-msvc': 1.15.18 '@swc/core-win32-ia32-msvc': 1.15.18 '@swc/core-win32-x64-msvc': 1.15.18 - '@swc/helpers': 0.5.21 + '@swc/helpers': 0.5.19 '@swc/counter@0.1.3': {} - '@swc/helpers@0.5.21': + '@swc/helpers@0.5.19': dependencies: tslib: 2.8.1 @@ -5549,10 +5542,10 @@ snapshots: d3-time-format: 4.1.0 internmap: 2.0.3 - '@vitejs/plugin-react-swc@4.2.3(@swc/helpers@0.5.21)(vite@8.0.8(@types/node@24.10.3)(esbuild@0.27.7)(jiti@1.21.7)(yaml@2.8.3))': + '@vitejs/plugin-react-swc@4.2.3(@swc/helpers@0.5.19)(vite@8.0.8(@types/node@24.10.3)(esbuild@0.27.7)(jiti@1.21.7)(yaml@2.8.3))': dependencies: '@rolldown/pluginutils': 1.0.0-rc.2 - '@swc/core': 1.15.18(@swc/helpers@0.5.21) + '@swc/core': 1.15.18(@swc/helpers@0.5.19) vite: 8.0.8(@types/node@24.10.3)(esbuild@0.27.7)(jiti@1.21.7)(yaml@2.8.3) transitivePeerDependencies: - '@swc/helpers' @@ -5644,572 +5637,561 @@ snapshots: d3-selection: 3.0.0 d3-zoom: 3.0.0 - '@zag-js/accordion@1.40.0': + '@zag-js/accordion@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/anatomy@1.40.0': {} + '@zag-js/anatomy@1.35.3': {} - '@zag-js/angle-slider@1.40.0': + '@zag-js/angle-slider@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/rect-utils': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/rect-utils': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/aria-hidden@1.40.0': + '@zag-js/aria-hidden@1.35.3': dependencies: - '@zag-js/dom-query': 1.40.0 + '@zag-js/dom-query': 1.35.3 - '@zag-js/async-list@1.40.0': + '@zag-js/async-list@1.35.3': dependencies: - '@zag-js/core': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/core': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/auto-resize@1.40.0': + '@zag-js/auto-resize@1.35.3': dependencies: - '@zag-js/dom-query': 1.40.0 + '@zag-js/dom-query': 1.35.3 - '@zag-js/avatar@1.40.0': + '@zag-js/avatar@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/carousel@1.40.0': + '@zag-js/carousel@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/scroll-snap': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/scroll-snap': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/cascade-select@1.40.0': + '@zag-js/cascade-select@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/collection': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-visible': 1.40.0 - '@zag-js/popper': 1.40.0 - '@zag-js/rect-utils': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/collection': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-visible': 1.35.3 + '@zag-js/popper': 1.35.3 + '@zag-js/rect-utils': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/checkbox@1.40.0': + '@zag-js/checkbox@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-visible': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-visible': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/clipboard@1.40.0': + '@zag-js/clipboard@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/collapsible@1.40.0': + '@zag-js/collapsible@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/collection@1.40.0': + '@zag-js/collection@1.35.3': dependencies: - '@zag-js/utils': 1.40.0 + '@zag-js/utils': 1.35.3 - '@zag-js/color-picker@1.40.0': + '@zag-js/color-picker@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/color-utils': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/popper': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/color-utils': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/popper': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/color-utils@1.40.0': - dependencies: - '@zag-js/utils': 1.40.0 + '@zag-js/color-utils@1.35.3': + dependencies: + '@zag-js/utils': 1.35.3 - '@zag-js/combobox@1.40.0': + '@zag-js/combobox@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/collection': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-visible': 1.40.0 - '@zag-js/live-region': 1.40.0 - '@zag-js/popper': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/aria-hidden': 1.35.3 + '@zag-js/collection': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-visible': 1.35.3 + '@zag-js/popper': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/core@1.40.0': + '@zag-js/core@1.35.3': dependencies: - '@zag-js/dom-query': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/dom-query': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/date-input@1.40.0(@internationalized/date@3.12.0)': + '@zag-js/date-picker@1.35.3(@internationalized/date@3.11.0)': dependencies: - '@internationalized/date': 3.12.0 - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/date-utils': 1.40.0(@internationalized/date@3.12.0) - '@zag-js/dom-query': 1.40.0 - '@zag-js/live-region': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/date-picker@1.40.0(@internationalized/date@3.12.0)': - dependencies: - '@internationalized/date': 3.12.0 - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/date-utils': 1.40.0(@internationalized/date@3.12.0) - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/live-region': 1.40.0 - '@zag-js/popper': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@internationalized/date': 3.11.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/date-utils': 1.35.3(@internationalized/date@3.11.0) + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/live-region': 1.35.3 + '@zag-js/popper': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/date-utils@1.40.0(@internationalized/date@3.12.0)': + '@zag-js/date-utils@1.35.3(@internationalized/date@3.11.0)': dependencies: - '@internationalized/date': 3.12.0 + '@internationalized/date': 3.11.0 - '@zag-js/dialog@1.40.0': + '@zag-js/dialog@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/aria-hidden': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-trap': 1.40.0 - '@zag-js/remove-scroll': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/aria-hidden': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-trap': 1.35.3 + '@zag-js/remove-scroll': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/dismissable@1.40.0': + '@zag-js/dismissable@1.35.3': dependencies: - '@zag-js/dom-query': 1.40.0 - '@zag-js/interact-outside': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/dom-query': 1.35.3 + '@zag-js/interact-outside': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/dom-query@1.40.0': + '@zag-js/dom-query@1.35.3': dependencies: - '@zag-js/types': 1.40.0 + '@zag-js/types': 1.35.3 - '@zag-js/drawer@1.40.0': + '@zag-js/drawer@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/aria-hidden': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-trap': 1.40.0 - '@zag-js/remove-scroll': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/aria-hidden': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-trap': 1.35.3 + '@zag-js/remove-scroll': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/editable@1.40.0': + '@zag-js/editable@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/interact-outside': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/interact-outside': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/file-upload@1.40.0': + '@zag-js/file-upload@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/file-utils': 1.40.0 - '@zag-js/i18n-utils': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/file-utils': 1.35.3 + '@zag-js/i18n-utils': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/file-utils@1.40.0': + '@zag-js/file-utils@1.35.3': dependencies: - '@zag-js/i18n-utils': 1.40.0 + '@zag-js/i18n-utils': 1.35.3 - '@zag-js/floating-panel@1.40.0': + '@zag-js/floating-panel@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/popper': 1.40.0 - '@zag-js/rect-utils': 1.40.0 - '@zag-js/store': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/popper': 1.35.3 + '@zag-js/rect-utils': 1.35.3 + '@zag-js/store': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/focus-trap@1.40.0': + '@zag-js/focus-trap@1.35.3': dependencies: - '@zag-js/dom-query': 1.40.0 + '@zag-js/dom-query': 1.35.3 - '@zag-js/focus-visible@1.40.0': + '@zag-js/focus-visible@1.35.3': dependencies: - '@zag-js/dom-query': 1.40.0 + '@zag-js/dom-query': 1.35.3 - '@zag-js/highlight-word@1.40.0': {} + '@zag-js/highlight-word@1.35.3': {} - '@zag-js/hover-card@1.40.0': + '@zag-js/hover-card@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/popper': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/popper': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/i18n-utils@1.40.0': + '@zag-js/i18n-utils@1.35.3': dependencies: - '@zag-js/dom-query': 1.40.0 + '@zag-js/dom-query': 1.35.3 - '@zag-js/image-cropper@1.40.0': + '@zag-js/image-cropper@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/interact-outside@1.40.0': + '@zag-js/interact-outside@1.35.3': dependencies: - '@zag-js/dom-query': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/dom-query': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/json-tree-utils@1.40.0': {} + '@zag-js/json-tree-utils@1.35.3': {} - '@zag-js/listbox@1.40.0': + '@zag-js/listbox@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/collection': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-visible': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/collection': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-visible': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/live-region@1.40.0': {} + '@zag-js/live-region@1.35.3': {} - '@zag-js/marquee@1.40.0': + '@zag-js/marquee@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/menu@1.40.0': + '@zag-js/menu@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-visible': 1.40.0 - '@zag-js/popper': 1.40.0 - '@zag-js/rect-utils': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-visible': 1.35.3 + '@zag-js/popper': 1.35.3 + '@zag-js/rect-utils': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/navigation-menu@1.40.0': + '@zag-js/navigation-menu@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/number-input@1.40.0': + '@zag-js/number-input@1.35.3': dependencies: '@internationalized/number': 3.6.5 - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/pagination@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/password-input@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/pin-input@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/popover@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/aria-hidden': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-trap': 1.40.0 - '@zag-js/popper': 1.40.0 - '@zag-js/remove-scroll': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/popper@1.40.0': + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/pagination@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/password-input@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/pin-input@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/popover@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/aria-hidden': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-trap': 1.35.3 + '@zag-js/popper': 1.35.3 + '@zag-js/remove-scroll': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/popper@1.35.3': dependencies: '@floating-ui/dom': 1.7.6 - '@zag-js/dom-query': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/dom-query': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/presence@1.40.0': + '@zag-js/presence@1.35.3': dependencies: - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 - '@zag-js/progress@1.40.0': + '@zag-js/progress@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/qr-code@1.40.0': + '@zag-js/qr-code@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 proxy-memoize: 3.0.1 uqr: 0.1.2 - '@zag-js/radio-group@1.40.0': + '@zag-js/radio-group@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-visible': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-visible': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/rating-group@1.40.0': + '@zag-js/rating-group@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/react@1.40.0(react-dom@19.2.5(react@19.2.5))(react@19.2.5)': + '@zag-js/react@1.35.3(react-dom@19.2.5(react@19.2.5))(react@19.2.5)': dependencies: - '@zag-js/core': 1.40.0 - '@zag-js/store': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/core': 1.35.3 + '@zag-js/store': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 react: 19.2.5 react-dom: 19.2.5(react@19.2.5) - '@zag-js/rect-utils@1.40.0': {} + '@zag-js/rect-utils@1.35.3': {} - '@zag-js/remove-scroll@1.40.0': + '@zag-js/remove-scroll@1.35.3': dependencies: - '@zag-js/dom-query': 1.40.0 + '@zag-js/dom-query': 1.35.3 - '@zag-js/scroll-area@1.40.0': + '@zag-js/scroll-area@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/scroll-snap@1.40.0': + '@zag-js/scroll-snap@1.35.3': dependencies: - '@zag-js/dom-query': 1.40.0 + '@zag-js/dom-query': 1.35.3 - '@zag-js/select@1.40.0': + '@zag-js/select@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/collection': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-visible': 1.40.0 - '@zag-js/popper': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/collection': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-visible': 1.35.3 + '@zag-js/popper': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/signature-pad@1.40.0': + '@zag-js/signature-pad@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 perfect-freehand: 1.2.3 - '@zag-js/slider@1.40.0': + '@zag-js/slider@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/splitter@1.40.0': + '@zag-js/splitter@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/steps@1.40.0': + '@zag-js/steps@1.35.3': dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 - '@zag-js/store@1.40.0': + '@zag-js/store@1.35.3': dependencies: proxy-compare: 3.0.1 - '@zag-js/switch@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-visible': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/tabs@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/tags-input@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/auto-resize': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/interact-outside': 1.40.0 - '@zag-js/live-region': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/timer@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/toast@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/toggle-group@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/toggle@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/tooltip@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-visible': 1.40.0 - '@zag-js/popper': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/tour@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dismissable': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/focus-trap': 1.40.0 - '@zag-js/interact-outside': 1.40.0 - '@zag-js/popper': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/tree-view@1.40.0': - dependencies: - '@zag-js/anatomy': 1.40.0 - '@zag-js/collection': 1.40.0 - '@zag-js/core': 1.40.0 - '@zag-js/dom-query': 1.40.0 - '@zag-js/types': 1.40.0 - '@zag-js/utils': 1.40.0 - - '@zag-js/types@1.40.0': + '@zag-js/switch@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-visible': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/tabs@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/tags-input@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/auto-resize': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/interact-outside': 1.35.3 + '@zag-js/live-region': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/timer@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/toast@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/toggle-group@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/toggle@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/tooltip@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-visible': 1.35.3 + '@zag-js/popper': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/tour@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dismissable': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/focus-trap': 1.35.3 + '@zag-js/interact-outside': 1.35.3 + '@zag-js/popper': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/tree-view@1.35.3': + dependencies: + '@zag-js/anatomy': 1.35.3 + '@zag-js/collection': 1.35.3 + '@zag-js/core': 1.35.3 + '@zag-js/dom-query': 1.35.3 + '@zag-js/types': 1.35.3 + '@zag-js/utils': 1.35.3 + + '@zag-js/types@1.35.3': dependencies: csstype: 3.2.3 - '@zag-js/utils@1.40.0': {} + '@zag-js/utils@1.35.3': {} acorn-jsx@5.3.2(acorn@8.14.1): dependencies: @@ -6453,9 +6435,9 @@ snapshots: chai@6.2.2: {} - chakra-react-select@6.1.1(@chakra-ui/react@3.35.0(@emotion/react@11.14.0(@types/react@19.2.14)(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5))(@types/react@19.2.14)(next-themes@0.4.6(react-dom@19.2.5(react@19.2.5))(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5): + chakra-react-select@6.1.1(@chakra-ui/react@3.34.0(@emotion/react@11.14.0(@types/react@19.2.14)(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5))(@types/react@19.2.14)(next-themes@0.4.6(react-dom@19.2.5(react@19.2.5))(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5): dependencies: - '@chakra-ui/react': 3.35.0(@emotion/react@11.14.0(@types/react@19.2.14)(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5) + '@chakra-ui/react': 3.34.0(@emotion/react@11.14.0(@types/react@19.2.14)(react@19.2.5))(react-dom@19.2.5(react@19.2.5))(react@19.2.5) next-themes: 0.4.6(react-dom@19.2.5(react@19.2.5))(react@19.2.5) react: 19.2.5 react-select: 5.10.1(@types/react@19.2.14)(react-dom@19.2.5(react@19.2.5))(react@19.2.5) From ba36e029c584de6e20640a0b640aa8f0242443c5 Mon Sep 17 00:00:00 2001 From: Vincent <97131062+vincbeck@users.noreply.github.com> Date: Thu, 28 May 2026 09:42:33 -0700 Subject: [PATCH 19/28] Add `consumer_teams` to `AssetAccessControl` in task-sdk (#67625) --- .../sdk/definitions/asset/access_control.py | 10 +++-- .../definitions/test_asset_access_control.py | 37 ++++++++++++++++++- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/task-sdk/src/airflow/sdk/definitions/asset/access_control.py b/task-sdk/src/airflow/sdk/definitions/asset/access_control.py index 04c218bfdb63c..e642c14f0c54f 100644 --- a/task-sdk/src/airflow/sdk/definitions/asset/access_control.py +++ b/task-sdk/src/airflow/sdk/definitions/asset/access_control.py @@ -19,10 +19,10 @@ import attrs -def _validate_producer_teams(instance, attribute, value): +def _validate_teams(instance, attribute, value): for entry in value: if not isinstance(entry, str) or not entry or entry.isspace(): - raise ValueError("Each entry in producer_teams must be a non-empty string") + raise ValueError(f"Each entry in {attribute.name} must be a non-empty string") return value @@ -32,6 +32,10 @@ class AssetAccessControl: producer_teams: list[str] = attrs.field( factory=list, - validator=[_validate_producer_teams], + validator=[_validate_teams], + ) + consumer_teams: list[str] = attrs.field( + factory=list, + validator=[_validate_teams], ) allow_global: bool = attrs.field(default=True, validator=[attrs.validators.instance_of(bool)]) diff --git a/task-sdk/tests/task_sdk/definitions/test_asset_access_control.py b/task-sdk/tests/task_sdk/definitions/test_asset_access_control.py index 8524d8b0ccb93..86e4f916aa69b 100644 --- a/task-sdk/tests/task_sdk/definitions/test_asset_access_control.py +++ b/task-sdk/tests/task_sdk/definitions/test_asset_access_control.py @@ -25,11 +25,17 @@ class TestAssetAccessControl: def test_defaults(self): ac = AssetAccessControl() assert ac.producer_teams == [] + assert ac.consumer_teams == [] assert ac.allow_global is True def test_explicit_values(self): - ac = AssetAccessControl(producer_teams=["team_a", "team_b"], allow_global=False) + ac = AssetAccessControl( + producer_teams=["team_a", "team_b"], + consumer_teams=["team_c"], + allow_global=False, + ) assert ac.producer_teams == ["team_a", "team_b"] + assert ac.consumer_teams == ["team_c"] assert ac.allow_global is False @pytest.mark.parametrize( @@ -47,6 +53,21 @@ def test_rejects_invalid_producer_teams(self, teams): with pytest.raises(ValueError, match="producer_teams"): AssetAccessControl(producer_teams=teams) + @pytest.mark.parametrize( + "teams", + [ + [""], + [123], + [None], + [True], + [{}], + ["team_a", " ", "team_b"], + ], + ) + def test_rejects_invalid_consumer_teams(self, teams): + with pytest.raises(ValueError, match="consumer_teams"): + AssetAccessControl(consumer_teams=teams) + @pytest.mark.parametrize( "teams", [ @@ -61,6 +82,20 @@ def test_accepts_valid_producer_teams(self, teams): ac = AssetAccessControl(producer_teams=teams) assert ac.producer_teams == teams + @pytest.mark.parametrize( + "teams", + [ + [], + ["team_a"], + ["team_a", "team_b"], + ["team-with-dashes"], + ["team_with_underscores"], + ], + ) + def test_accepts_valid_consumer_teams(self, teams): + ac = AssetAccessControl(consumer_teams=teams) + assert ac.consumer_teams == teams + def test_allow_global_must_be_bool(self): with pytest.raises(TypeError): AssetAccessControl(allow_global="yes") From fc845cf64c0ab20e08890b3e0470d6c50368a260 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Thu, 28 May 2026 18:54:11 +0200 Subject: [PATCH 20/28] Write Cloud SQL keyfile_dict credentials with 0600 permissions (#67507) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the Google connection supplies credentials via ``keyfile_dict``, ``CloudSqlProxyRunner._get_credential_parameters`` wrote the credentials file with ``open(path, "w")``. That inherits the process umask (typically ``0o644`` on most distributions), leaving the service-account private key world-readable on shared worker hosts — including any other process on the same machine that can read the worker's temp directory. Use ``os.open(..., O_WRONLY | O_CREAT | O_TRUNC, 0o600)`` followed by ``os.fdopen`` so the file is created with restrictive permissions atomically. Matches the explicit-mode handling already used for the SSL temp files in the same module. --- .../providers/google/cloud/hooks/cloud_sql.py | 6 +++- .../unit/google/cloud/hooks/test_cloud_sql.py | 30 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/providers/google/src/airflow/providers/google/cloud/hooks/cloud_sql.py b/providers/google/src/airflow/providers/google/cloud/hooks/cloud_sql.py index 518686102234e..d4d5f18134fc8 100644 --- a/providers/google/src/airflow/providers/google/cloud/hooks/cloud_sql.py +++ b/providers/google/src/airflow/providers/google/cloud/hooks/cloud_sql.py @@ -626,7 +626,11 @@ def _get_credential_parameters(self) -> list[str]: elif keyfile_dict: keyfile_content = keyfile_dict if isinstance(keyfile_dict, dict) else json.loads(keyfile_dict) self.log.info("Saving credentials to %s", self.credentials_path) - with open(self.credentials_path, "w") as file: + # Explicit 0o600 — the file holds a service-account private key. The plain + # ``open()`` form inherits the process umask (typically 0o644), which leaves the + # key world-readable on shared worker hosts. + fd = os.open(self.credentials_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + with os.fdopen(fd, "w") as file: json.dump(keyfile_content, file) credential_params = ["-credential_file", self.credentials_path] else: diff --git a/providers/google/tests/unit/google/cloud/hooks/test_cloud_sql.py b/providers/google/tests/unit/google/cloud/hooks/test_cloud_sql.py index b2a82c2a781ab..710f4793fa81f 100644 --- a/providers/google/tests/unit/google/cloud/hooks/test_cloud_sql.py +++ b/providers/google/tests/unit/google/cloud/hooks/test_cloud_sql.py @@ -21,7 +21,9 @@ import json import os import platform +import stat import tempfile +from pathlib import Path from unittest import mock from unittest.mock import PropertyMock, call, mock_open from urllib.parse import parse_qsl, unquote, urlsplit @@ -1934,6 +1936,34 @@ def test_cloud_sql_proxy_runner_keeps_key_path_credentials_with_iam_login(self, assert runner._get_credential_parameters() == ["-credential_file", "/tmp/key.json"] assert "-enable_iam_login" in runner.command_line_parameters + @mock.patch("airflow.providers.google.cloud.hooks.cloud_sql.GoogleBaseHook.get_connection") + def test_credentials_file_from_keyfile_dict_is_chmod_0600(self, get_connection, tmp_path): + """The keyfile_dict credentials file must be written with explicit 0600 permissions. + + Plain ``open(...)`` inherits the process umask (typically 0644), leaving the + service-account private key world-readable on shared worker hosts. + """ + keyfile_dict = {"type": "service_account", "private_key": "PRIVATE"} + connection = Connection(conn_id="google_conn", conn_type="google_cloud_platform") + extra = json.dumps({"keyfile_dict": json.dumps(keyfile_dict)}) + if AIRFLOW_V_3_1_PLUS: + connection.extra = extra + else: + connection.set_extra(extra) + get_connection.return_value = connection + + runner = CloudSqlProxyRunner( + path_prefix=str(tmp_path / "creds"), + instance_specification="project:us-east-1:instance", + gcp_conn_id="google_conn", + ) + runner._get_credential_parameters() + + creds_path = Path(runner.credentials_path) + assert creds_path.exists() + # Mask off the file-type bits, keep only the permission bits. + assert stat.S_IMODE(creds_path.stat().st_mode) == 0o600 + class TestCloudSQLAsyncHook: @pytest.mark.asyncio From 23b334f29eba4f3e3ebfed46540f2ea2d59d1e68 Mon Sep 17 00:00:00 2001 From: David Blain Date: Thu, 28 May 2026 19:01:15 +0200 Subject: [PATCH 21/28] Add IBM MQ provider (#62790) Added IBM MQ provider --- .github/CODEOWNERS | 1 + .../ISSUE_TEMPLATE/1-airflow_bug_report.yml | 1 + .github/boring-cyborg.yml | 3 + Dockerfile.ci | 2 +- airflow-core/docs/extra-packages-ref.rst | 2 + .../tests/unit/always/test_example_dags.py | 1 + .../12_provider_distributions.rst | 13 + dev/breeze/doc/images/output_build-docs.svg | 6 +- dev/breeze/doc/images/output_build-docs.txt | 2 +- ...release-management_add-back-references.svg | 6 +- ...release-management_add-back-references.txt | 2 +- ...output_release-management_publish-docs.svg | 6 +- ...output_release-management_publish-docs.txt | 2 +- ...t_sbom_generate-providers-requirements.svg | 2 +- ...t_sbom_generate-providers-requirements.txt | 2 +- .../output_workflow-run_publish-docs.svg | 6 +- .../output_workflow-run_publish-docs.txt | 2 +- devel-common/src/docs/provider_conf.py | 2 +- docs/spelling_wordlist.txt | 6 + providers/ibm/mq/.gitignore | 1 + providers/ibm/mq/LICENSE | 201 +++ providers/ibm/mq/NOTICE | 5 + providers/ibm/mq/README.rst | 92 ++ .../ibm/mq/docs/.latest-doc-only-change.txt | 1 + providers/ibm/mq/docs/changelog.rst | 32 + providers/ibm/mq/docs/commits.rst | 35 + providers/ibm/mq/docs/conf.py | 27 + providers/ibm/mq/docs/connections/ibmmq.rst | 30 + .../docs/connections/images/mq_connection.png | Bin 0 -> 31251 bytes providers/ibm/mq/docs/index.rst | 136 ++ .../installing-providers-from-sources.rst | 18 + .../ibm/mq/docs/integration-logos/ibm-mq.png | Bin 0 -> 67759 bytes providers/ibm/mq/docs/message-queues.rst | 100 ++ providers/ibm/mq/docs/redirects.txt | 1 + providers/ibm/mq/docs/security.rst | 18 + providers/ibm/mq/pre_extras_install.yaml | 33 + providers/ibm/mq/provider.yaml | 67 + providers/ibm/mq/pyproject.toml | 139 ++ providers/ibm/mq/src/airflow/__init__.py | 17 + .../ibm/mq/src/airflow/providers/__init__.py | 17 + .../mq/src/airflow/providers/ibm/__init__.py | 17 + .../src/airflow/providers/ibm/mq/__init__.py | 39 + .../providers/ibm/mq/get_provider_info.py | 57 + .../providers/ibm/mq/hooks/__init__.py | 16 + .../src/airflow/providers/ibm/mq/hooks/mq.py | 708 ++++++++++ .../providers/ibm/mq/queues/__init__.py | 16 + .../src/airflow/providers/ibm/mq/queues/mq.py | 137 ++ .../providers/ibm/mq/triggers/__init__.py | 16 + .../airflow/providers/ibm/mq/triggers/mq.py | 80 ++ .../providers/ibm/mq/version_compat.py | 39 + providers/ibm/mq/tests/conftest.py | 120 ++ providers/ibm/mq/tests/system/__init__.py | 17 + providers/ibm/mq/tests/system/ibm/__init__.py | 17 + .../ibm/mq/tests/system/ibm/mq/__init__.py | 16 + .../mq/example_dag_message_queue_trigger.py | 48 + providers/ibm/mq/tests/unit/__init__.py | 17 + providers/ibm/mq/tests/unit/ibm/__init__.py | 17 + .../ibm/mq/tests/unit/ibm/mq/__init__.py | 17 + .../mq/tests/unit/ibm/mq/hooks/__init__.py | 16 + .../ibm/mq/tests/unit/ibm/mq/hooks/test_mq.py | 1150 +++++++++++++++++ .../mq/tests/unit/ibm/mq/queues/__init__.py | 16 + .../mq/tests/unit/ibm/mq/queues/test_mq.py | 249 ++++ .../mq/tests/unit/ibm/mq/triggers/__init__.py | 16 + .../mq/tests/unit/ibm/mq/triggers/test_mq.py | 76 ++ pyproject.toml | 10 + scripts/ci/docker-compose/remove-sources.yml | 1 + scripts/ci/docker-compose/tests-sources.yml | 1 + scripts/docker/entrypoint_ci.sh | 2 +- .../in_container/run_pre_extras_install.py | 94 +- uv.lock | 67 +- 70 files changed, 4102 insertions(+), 27 deletions(-) create mode 100644 providers/ibm/mq/.gitignore create mode 100644 providers/ibm/mq/LICENSE create mode 100644 providers/ibm/mq/NOTICE create mode 100644 providers/ibm/mq/README.rst create mode 100644 providers/ibm/mq/docs/.latest-doc-only-change.txt create mode 100644 providers/ibm/mq/docs/changelog.rst create mode 100644 providers/ibm/mq/docs/commits.rst create mode 100644 providers/ibm/mq/docs/conf.py create mode 100644 providers/ibm/mq/docs/connections/ibmmq.rst create mode 100644 providers/ibm/mq/docs/connections/images/mq_connection.png create mode 100644 providers/ibm/mq/docs/index.rst create mode 100644 providers/ibm/mq/docs/installing-providers-from-sources.rst create mode 100644 providers/ibm/mq/docs/integration-logos/ibm-mq.png create mode 100644 providers/ibm/mq/docs/message-queues.rst create mode 100644 providers/ibm/mq/docs/redirects.txt create mode 100644 providers/ibm/mq/docs/security.rst create mode 100644 providers/ibm/mq/pre_extras_install.yaml create mode 100644 providers/ibm/mq/provider.yaml create mode 100644 providers/ibm/mq/pyproject.toml create mode 100644 providers/ibm/mq/src/airflow/__init__.py create mode 100644 providers/ibm/mq/src/airflow/providers/__init__.py create mode 100644 providers/ibm/mq/src/airflow/providers/ibm/__init__.py create mode 100644 providers/ibm/mq/src/airflow/providers/ibm/mq/__init__.py create mode 100644 providers/ibm/mq/src/airflow/providers/ibm/mq/get_provider_info.py create mode 100644 providers/ibm/mq/src/airflow/providers/ibm/mq/hooks/__init__.py create mode 100644 providers/ibm/mq/src/airflow/providers/ibm/mq/hooks/mq.py create mode 100644 providers/ibm/mq/src/airflow/providers/ibm/mq/queues/__init__.py create mode 100644 providers/ibm/mq/src/airflow/providers/ibm/mq/queues/mq.py create mode 100644 providers/ibm/mq/src/airflow/providers/ibm/mq/triggers/__init__.py create mode 100644 providers/ibm/mq/src/airflow/providers/ibm/mq/triggers/mq.py create mode 100644 providers/ibm/mq/src/airflow/providers/ibm/mq/version_compat.py create mode 100644 providers/ibm/mq/tests/conftest.py create mode 100644 providers/ibm/mq/tests/system/__init__.py create mode 100644 providers/ibm/mq/tests/system/ibm/__init__.py create mode 100644 providers/ibm/mq/tests/system/ibm/mq/__init__.py create mode 100644 providers/ibm/mq/tests/system/ibm/mq/example_dag_message_queue_trigger.py create mode 100644 providers/ibm/mq/tests/unit/__init__.py create mode 100644 providers/ibm/mq/tests/unit/ibm/__init__.py create mode 100644 providers/ibm/mq/tests/unit/ibm/mq/__init__.py create mode 100644 providers/ibm/mq/tests/unit/ibm/mq/hooks/__init__.py create mode 100644 providers/ibm/mq/tests/unit/ibm/mq/hooks/test_mq.py create mode 100644 providers/ibm/mq/tests/unit/ibm/mq/queues/__init__.py create mode 100644 providers/ibm/mq/tests/unit/ibm/mq/queues/test_mq.py create mode 100644 providers/ibm/mq/tests/unit/ibm/mq/triggers/__init__.py create mode 100644 providers/ibm/mq/tests/unit/ibm/mq/triggers/test_mq.py diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 3591ad93e4e9b..68536fe6e7418 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -98,6 +98,7 @@ airflow-core/src/airflow/ui/public/i18n/locales/zh-TW/ @Lee-W @jason810496 @guan /providers/fab/ @vincbeck /providers/google/ @shahar1 /providers/hashicorp/ @hussein-awala +/providers/ibm/mq/ @dabla /providers/informatica/ @RNHTTR # + @cetingokhan @sertaykabuk @umutozel /providers/keycloak/ @vincbeck @bugraoz93 /providers/microsoft/azure/docs/**/msgraph.rst @dabla diff --git a/.github/ISSUE_TEMPLATE/1-airflow_bug_report.yml b/.github/ISSUE_TEMPLATE/1-airflow_bug_report.yml index a9c8978740344..d688a40be2262 100644 --- a/.github/ISSUE_TEMPLATE/1-airflow_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/1-airflow_bug_report.yml @@ -162,6 +162,7 @@ body: - grpc - hashicorp - http + - ibm-mq - imap - influxdb - informatica diff --git a/.github/boring-cyborg.yml b/.github/boring-cyborg.yml index 21b44a9fb840f..e65d79a757b74 100644 --- a/.github/boring-cyborg.yml +++ b/.github/boring-cyborg.yml @@ -171,6 +171,9 @@ labelPRBasedOnFilePath: provider:http: - providers/http/** + provider:ibm-mq: + - providers/ibm/mq/** + provider:imap: - providers/imap/** diff --git a/Dockerfile.ci b/Dockerfile.ci index a9d8b2e7f1dff..933eb5c3b1733 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -1478,7 +1478,7 @@ function reinstall_shared_distributions() { uv pip install --no-deps $(ls -d /opt/airflow/shared/*/) } -PROVIDERS_NEEDING_PRE_EXTRAS_INSTALL=() +PROVIDERS_NEEDING_PRE_EXTRAS_INSTALL=("ibm.mq") function run_pre_extras_install_if_registered() { local provider_id="${1}" diff --git a/airflow-core/docs/extra-packages-ref.rst b/airflow-core/docs/extra-packages-ref.rst index 2646b0a7c3079..e10ca5ef72989 100644 --- a/airflow-core/docs/extra-packages-ref.rst +++ b/airflow-core/docs/extra-packages-ref.rst @@ -343,6 +343,8 @@ Some of those enable Airflow to use executors to run tasks with them - other tha +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ | influxdb | ``pip install 'apache-airflow[influxdb]'`` | Influxdb operators and hook | | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ +| ibm-mq | ``pip install 'apache-airflow[ibm-mq]'`` | IBM MQ hook and trigger | | ++---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ | jenkins | ``pip install 'apache-airflow[jenkins]'`` | Jenkins hooks and operators | | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ | mongo | ``pip install 'apache-airflow[mongo]'`` | Mongo hooks and operators | | diff --git a/airflow-core/tests/unit/always/test_example_dags.py b/airflow-core/tests/unit/always/test_example_dags.py index 0b84f6ae26ce5..bd9c5478bd76d 100644 --- a/airflow-core/tests/unit/always/test_example_dags.py +++ b/airflow-core/tests/unit/always/test_example_dags.py @@ -60,6 +60,7 @@ "providers/google/tests/system/google/cloud/kubernetes_engine/example_kubernetes_engine_job.py", "providers/google/tests/system/google/cloud/kubernetes_engine/example_kubernetes_engine_kueue.py", "providers/google/tests/system/google/cloud/kubernetes_engine/example_kubernetes_engine_resource.py", + "providers/ibm/mq/tests/system/ibm/mq/example_dag_message_queue_trigger.py", # Deprecated Operators/Hooks, which replaced by common.sql Operators/Hooks ) diff --git a/contributing-docs/12_provider_distributions.rst b/contributing-docs/12_provider_distributions.rst index 8ae79f1074b11..639d0c014cd8e 100644 --- a/contributing-docs/12_provider_distributions.rst +++ b/contributing-docs/12_provider_distributions.rst @@ -189,6 +189,8 @@ must succeed first. Use the per-provider pre-extras-install manifest: - url: https://public.dhe.ibm.com/.../9.4.0.0-IBM-MQC-Redist-LinuxX64.tar.gz sha256: <64 lowercase hex chars> extract_to: /opt/mqm + fallback_ips: # optional, see schema below + - 170.225.126.18 env: MQ_FILE_PATH: /opt/mqm @@ -199,6 +201,17 @@ must succeed first. Use the per-provider pre-extras-install manifest: ``/tmp/`` and may not contain ``..``). Supported archive formats are ``.tar``, ``.tar.gz``/``.tgz`` and ``.zip``; the extractor refuses any entry whose resolved path escapes ``extract_to``. + + Each ``downloads`` entry may also include ``fallback_ips`` (optional list of IPv4 or + IPv6 address strings). The interpreter tries the URL with normal DNS resolution first; + only on connection or resolution failure does it retry the same URL with each listed + IP, in order, by temporarily overriding ``socket.getaddrinfo`` for the hostname. The + TLS SNI and certificate verification stay bound to the URL hostname, and the + ``sha256`` check still runs end-to-end on whichever attempt succeeds, so a fallback + entry only changes *which IP is dialled*, not what is trusted. Use this when the + upstream publishes anycast IPs (e.g. IBM's notice at + https://www.ibm.com/support/pages/node/6826677) and the CI test container has been + observed to fail DNS resolution for that host. - ``env`` (optional mapping): each name must match ``^[A-Z][A-Z0-9_]*$``, each value must be a string. The interpreter writes ``export NAME='value'`` lines that the entrypoint hook sources into the shell that subsequently runs ``uv sync --all-extras``. diff --git a/dev/breeze/doc/images/output_build-docs.svg b/dev/breeze/doc/images/output_build-docs.svg index 1858bbb097e91..c77fb1fcfacbd 100644 --- a/dev/breeze/doc/images/output_build-docs.svg +++ b/dev/breeze/doc/images/output_build-docs.svg @@ -237,9 +237,9 @@ apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes | cohere |      common.ai | common.compat | common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |  discord | docker | docker-stack | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google | grpc -hashicorp | helm-chart | http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure |        -microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage |  -opensearch | opsgenie | oracle | pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis |    +hashicorp | helm-chart | http | ibm.mq | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure +microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage +opensearch | opsgenie | oracle | pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis |  salesforce | samba | segment | sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | standard |    tableau | task-sdk | telegram | teradata | trino | vertica | vespa | weaviate | yandex | ydb | zendesk]...             diff --git a/dev/breeze/doc/images/output_build-docs.txt b/dev/breeze/doc/images/output_build-docs.txt index 54d8d4e3f39bb..058fe216adc62 100644 --- a/dev/breeze/doc/images/output_build-docs.txt +++ b/dev/breeze/doc/images/output_build-docs.txt @@ -1 +1 @@ -c5f2067ec852773089ed0ca7b8d1d533 +d5e90318932654ced452b0c815d51b31 diff --git a/dev/breeze/doc/images/output_release-management_add-back-references.svg b/dev/breeze/doc/images/output_release-management_add-back-references.svg index f17f7f47ed43b..0c095f46b131f 100644 --- a/dev/breeze/doc/images/output_release-management_add-back-references.svg +++ b/dev/breeze/doc/images/output_release-management_add-back-references.svg @@ -152,9 +152,9 @@ apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes | cohere |      common.ai | common.compat | common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |  discord | docker | docker-stack | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google | grpc -hashicorp | helm-chart | http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure |        -microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage |  -opensearch | opsgenie | oracle | pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis |    +hashicorp | helm-chart | http | ibm.mq | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure +microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage +opensearch | opsgenie | oracle | pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis |  salesforce | samba | segment | sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | standard |    tableau | task-sdk | telegram | teradata | trino | vertica | vespa | weaviate | yandex | ydb | zendesk]...             diff --git a/dev/breeze/doc/images/output_release-management_add-back-references.txt b/dev/breeze/doc/images/output_release-management_add-back-references.txt index ffc7eeea6018b..c045d840ae6f0 100644 --- a/dev/breeze/doc/images/output_release-management_add-back-references.txt +++ b/dev/breeze/doc/images/output_release-management_add-back-references.txt @@ -1 +1 @@ -3df401aef0085547b08fe896a9a65381 +f3b260fa0f604d3bc810d5ae7201feaf diff --git a/dev/breeze/doc/images/output_release-management_publish-docs.svg b/dev/breeze/doc/images/output_release-management_publish-docs.svg index d119da2013d75..a384fd4aa264b 100644 --- a/dev/breeze/doc/images/output_release-management_publish-docs.svg +++ b/dev/breeze/doc/images/output_release-management_publish-docs.svg @@ -191,9 +191,9 @@ apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes | cohere |      common.ai | common.compat | common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |  discord | docker | docker-stack | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google | grpc -hashicorp | helm-chart | http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure |        -microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage |  -opensearch | opsgenie | oracle | pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis |    +hashicorp | helm-chart | http | ibm.mq | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure +microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage +opensearch | opsgenie | oracle | pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis |  salesforce | samba | segment | sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | standard |    tableau | task-sdk | telegram | teradata | trino | vertica | vespa | weaviate | yandex | ydb | zendesk]...             diff --git a/dev/breeze/doc/images/output_release-management_publish-docs.txt b/dev/breeze/doc/images/output_release-management_publish-docs.txt index c73c7846664c8..652fc017f7dd6 100644 --- a/dev/breeze/doc/images/output_release-management_publish-docs.txt +++ b/dev/breeze/doc/images/output_release-management_publish-docs.txt @@ -1 +1 @@ -4521ec02334b8909f66e82c460a69446 +8b9fcd4862fc5c2024201bb0e6ce8ed7 diff --git a/dev/breeze/doc/images/output_sbom_generate-providers-requirements.svg b/dev/breeze/doc/images/output_sbom_generate-providers-requirements.svg index fd62a65b513d4..c3dbec94ecdc1 100644 --- a/dev/breeze/doc/images/output_sbom_generate-providers-requirements.svg +++ b/dev/breeze/doc/images/output_sbom_generate-providers-requirements.svg @@ -186,7 +186,7 @@ atlassian.jira | celery | cloudant | cncf.kubernetes | cohere | common.ai | common.compat |  common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |  discord | docker | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google -| grpc | hashicorp | http | imap | influxdb | informatica | jdbc | jenkins | keycloak |  +| grpc | hashicorp | http | ibm.mq | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j |  odbc | openai | openfaas | openlineage | opensearch | opsgenie | oracle | pagerduty | papermill  | pgvector | pinecone | postgres | presto | qdrant | redis | salesforce | samba | segment |  diff --git a/dev/breeze/doc/images/output_sbom_generate-providers-requirements.txt b/dev/breeze/doc/images/output_sbom_generate-providers-requirements.txt index a7761ea29d68a..be48d6b3fad8b 100644 --- a/dev/breeze/doc/images/output_sbom_generate-providers-requirements.txt +++ b/dev/breeze/doc/images/output_sbom_generate-providers-requirements.txt @@ -1 +1 @@ -fa98bbcd73f9160c29eff1b6779a23bc +e489e3174e2ddaa097d5c58d224494c1 diff --git a/dev/breeze/doc/images/output_workflow-run_publish-docs.svg b/dev/breeze/doc/images/output_workflow-run_publish-docs.svg index 511790e79d721..ec5a35026f591 100644 --- a/dev/breeze/doc/images/output_workflow-run_publish-docs.svg +++ b/dev/breeze/doc/images/output_workflow-run_publish-docs.svg @@ -197,9 +197,9 @@ apache.tinkerpop | apprise | arangodb | asana | atlassian.jira | celery | cloudant | cncf.kubernetes | cohere |      common.ai | common.compat | common.io | common.messaging | common.sql | databricks | datadog | dbt.cloud | dingding |  discord | docker | docker-stack | edge3 | elasticsearch | exasol | fab | facebook | ftp | git | github | google | grpc -hashicorp | helm-chart | http | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure |        -microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage |  -opensearch | opsgenie | oracle | pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis |    +hashicorp | helm-chart | http | ibm.mq | imap | influxdb | informatica | jdbc | jenkins | keycloak | microsoft.azure +microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openai | openfaas | openlineage +opensearch | opsgenie | oracle | pagerduty | papermill | pgvector | pinecone | postgres | presto | qdrant | redis |  salesforce | samba | segment | sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | standard |    tableau | task-sdk | telegram | teradata | trino | vertica | vespa | weaviate | yandex | ydb | zendesk]...             diff --git a/dev/breeze/doc/images/output_workflow-run_publish-docs.txt b/dev/breeze/doc/images/output_workflow-run_publish-docs.txt index 6a433f7935a96..54502c66975e7 100644 --- a/dev/breeze/doc/images/output_workflow-run_publish-docs.txt +++ b/dev/breeze/doc/images/output_workflow-run_publish-docs.txt @@ -1 +1 @@ -6ff7091e58988c6273e51f372bb8a1a6 +a20de48a75b32bcbc950236feb3b8108 diff --git a/devel-common/src/docs/provider_conf.py b/devel-common/src/docs/provider_conf.py index 6bc9da15f5f61..bc18265346828 100644 --- a/devel-common/src/docs/provider_conf.py +++ b/devel-common/src/docs/provider_conf.py @@ -151,7 +151,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -empty_subpackages = ["apache", "atlassian", "common", "cncf", "dbt", "microsoft"] +empty_subpackages = ["apache", "atlassian", "common", "cncf", "dbt", "ibm", "microsoft"] exclude_patterns = [ "operators/_partials", "_api/airflow/index.rst", diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 869f06770b3cf..961c824750780 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -184,6 +184,7 @@ bigquery BigQueryHook Bigtable bigtable +bitmask Bitnami bitshift bitwise @@ -784,7 +785,10 @@ hyperparameter hyperparameters IaC iam +IBM +ibm ibmcloudant +ibmmq IdC ideation idempotence @@ -1049,6 +1053,8 @@ mongo mongodb monospace moto +MQ +mq msfabric msg msgraph diff --git a/providers/ibm/mq/.gitignore b/providers/ibm/mq/.gitignore new file mode 100644 index 0000000000000..bff2d7629604d --- /dev/null +++ b/providers/ibm/mq/.gitignore @@ -0,0 +1 @@ +*.iml diff --git a/providers/ibm/mq/LICENSE b/providers/ibm/mq/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/ibm/mq/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/ibm/mq/NOTICE b/providers/ibm/mq/NOTICE new file mode 100644 index 0000000000000..a51bd9390d030 --- /dev/null +++ b/providers/ibm/mq/NOTICE @@ -0,0 +1,5 @@ +Apache Airflow +Copyright 2016-2026 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). diff --git a/providers/ibm/mq/README.rst b/providers/ibm/mq/README.rst new file mode 100644 index 0000000000000..f08c8d1bc0d10 --- /dev/null +++ b/providers/ibm/mq/README.rst @@ -0,0 +1,92 @@ + +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +.. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + ``PROVIDER_README_TEMPLATE.rst.jinja2`` IN the ``dev/breeze/src/airflow_breeze/templates`` DIRECTORY + +Package ``apache-airflow-providers-ibm-mq`` + +Release: ``0.1.0`` + + +`IBM MQ `__ + + +Provider package +---------------- + +This is a provider package for ``ibm.mq`` provider. All classes for this provider package +are in ``airflow.providers.ibm.mq`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation +(see ``Requirements`` below for the minimum Airflow version supported) via + +``pip install apache-airflow-providers-ibm-mq`` + +This installs only the provider package. To use the IBM MQ operators at +runtime you also need the ``ibmmq`` dependency, which can be installed via +the provider extra: + +``pip install apache-airflow-providers-ibm-mq[ibmmq]`` + +The ``ibmmq`` extra installs the Python wrapper for the IBM MQ client that is required by the provider hooks and operators. + +Note that the `ibmmq `_ Python package requires the native `IBM MQ Redistributable Client `_ libraries to be installed on the system. + +Refer to the IBM MQ documentation for installation instructions for your platform. + +The package supports the following python versions: 3.10,3.11,3.12,3.13 + +Requirements +------------ + +============================================= ===================================== +PIP package Version required +============================================= ===================================== +``apache-airflow`` ``>=2.11.0`` +``apache-airflow-providers-common-messaging`` ``>=2.0.0`` +``importlib-resources`` ``>=1.3`` +============================================= ===================================== + + +======================================================================================================================== ==================== +Dependent package Extra +======================================================================================================================== ==================== +`apache-airflow-providers-common-compat `_ ``common.compat`` +`apache-airflow-providers-common-messaging `_ ``common.messaging`` +======================================================================================================================== ==================== + +Optional dependencies +---------------------- + +========== ================ +Extra Dependencies +========== ================ +``ibmmq`` ``ibmmq>=2.0.4`` +========== ================ + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/ibm/mq/docs/.latest-doc-only-change.txt b/providers/ibm/mq/docs/.latest-doc-only-change.txt new file mode 100644 index 0000000000000..f41e3226a6f43 --- /dev/null +++ b/providers/ibm/mq/docs/.latest-doc-only-change.txt @@ -0,0 +1 @@ +7b2ec33c7ad4998d9c9735b79593fcdcd3b9dd1f diff --git a/providers/ibm/mq/docs/changelog.rst b/providers/ibm/mq/docs/changelog.rst new file mode 100644 index 0000000000000..a91d8126f60ca --- /dev/null +++ b/providers/ibm/mq/docs/changelog.rst @@ -0,0 +1,32 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +.. NOTE TO CONTRIBUTORS: + Please, only add notes to the Changelog just below the "Changelog" header when there are some breaking changes + and you want to add an explanation to the users on how they are supposed to deal with them. + The changelog is updated and maintained semi-automatically by release manager. + +``apache-airflow-providers-ibm-mq`` + +Changelog +--------- + +0.1.0 +..... + +Initial version of the provider. diff --git a/providers/ibm/mq/docs/commits.rst b/providers/ibm/mq/docs/commits.rst new file mode 100644 index 0000000000000..1b08b59cb402c --- /dev/null +++ b/providers/ibm/mq/docs/commits.rst @@ -0,0 +1,35 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_COMMITS_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + .. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN! + +Package apache-airflow-providers-ibm-mq +------------------------------------------------------ + +`IBM MQ `__ + + +This is detailed commit list of changes for versions provider package: ``mq``. +For high-level changelog, see :doc:`package information including changelog `. + +.. airflow-providers-commits:: diff --git a/providers/ibm/mq/docs/conf.py b/providers/ibm/mq/docs/conf.py new file mode 100644 index 0000000000000..425d9e512683d --- /dev/null +++ b/providers/ibm/mq/docs/conf.py @@ -0,0 +1,27 @@ +# Disable Flake8 because of all the sphinx imports +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Configuration of Providers docs building.""" + +from __future__ import annotations + +import os + +os.environ["AIRFLOW_PACKAGE_NAME"] = "apache-airflow-providers-ibm-mq" + +from docs.provider_conf import * # noqa: F403 diff --git a/providers/ibm/mq/docs/connections/ibmmq.rst b/providers/ibm/mq/docs/connections/ibmmq.rst new file mode 100644 index 0000000000000..f6f714c34f09a --- /dev/null +++ b/providers/ibm/mq/docs/connections/ibmmq.rst @@ -0,0 +1,30 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. _howto/connection:ibmmq: + +IBM MQ connection +================= + +The MQ connection type enables connection to an IBM MQ. + +.. raw:: html + +
+ IBM MQ connection form +
diff --git a/providers/ibm/mq/docs/connections/images/mq_connection.png b/providers/ibm/mq/docs/connections/images/mq_connection.png new file mode 100644 index 0000000000000000000000000000000000000000..37a2a6a55443c60b7149066f458523f4d1150726 GIT binary patch literal 31251 zcmd432UJr{*e=SaSg-(!ilP*iB3){vgQ)ZlB3*h35HZvQh>C(plioq8QX{^-ZQgjX3sp&`#ukmhWZ-k&v2Y! zU|=|}rTM^^f#LWo28Lr{r%wWB3~tpv13vzN8f)BVs2sSl0(@a~Rnt>rV5o{adtiS8 z_|Ei9(*nxCz}!Lq|4%36lM@3&&?l`2YNmm<>$6N*dQ04$o0#Y-A{9lfCblFzurRrp zP}t6QNl;LT@8*NI@uE7$@tRJe#uEvA$ud;b^^+Zk7>pDyS^>=|b=4ZG3_+5bU4sad z1W6n|XaIV=NH_h@MHX|U?d4SXfDBp&919T>WdvHn&{z)3=GgILv(69x{D^Yx zXJWs=h_q?pH~;pH7jbmV_=9R__j~2T^Kbs4-@tORVo?FMmgYHKZ&MG!@Nb5_;=Ox! zNJL7_Q2nXl!LD(Qi(Yq@V4?;6VurWZnK_k3dQ(IbKMR}dUOhL)pJU0Ud757Ji$<-Q z92e#O(2ReM*U1`YhFv^HuYiXz(pNlbAoW4vbn^_`Uim|MF)tdmYjRz_sIGHLJBFW| z@T8wpwJroMd-_3>p)Kqf{hH&iUE3MX10wNr=MiV`by@|X8rr9xVvhG$I1oi zXH{o#121N7pQax(tN;6R#BVNOj2IZ6z2AT{{(aQg`!PK9Gxtu*{4I4}mGR#zuD<&B z{5dsN`tLWwdH$m$m)@|pO>{+cW-Z}9G? ze?Dsvj}nx?{9ygH+o$QR{h~BcJIpq6SXd8wt){@pypFLFN*4C&j|th#DK|$#nyQ^E zwCFW_X)`Pd4jdZ2`)P+$@y3N?GI!=g@d#Qr$KUof+S#tph~>F#*uGZ1{%;cr`JNV1 zR*tu*EH|ou62$&(|BOHmg>k>xb;JASztxA#UprPO_sLaixP|j1)8A))7Pmq?z27V_ zBz11C0&~Ii-yUe}BnL3&uU`Gh92P(dU_Acl)TtJEzvZWTfBXDTwS;4*a-+mW$7 z^@(1c745j*`25cDCZm?|t+(l65wTqw+;J&&c+eBzHDF-aZJ}Od6t`*S6}1dViI#h( zGoe`M)Gtpvf8i7hS4sf&eEh{FE*WZ>dVOnQp?AlXYL9;6V?i9p#_;KD@`_KIeet55 zvALhI^x8LEDSW31N08idD_Ze2ma{EFn0^}`vD98&YE+pyvb!DqNcz^X#T+BpPqo_1FAz0QvLvi?1q302v8|>Za@bIjTs_F|x>$d6w?c|Y+ zv-APTolWIw44gb82i?g^61UR6$ftd${h;|#Z0%BcLWd6R| zSvzg->=jX|{s(RD>ocu;It4wq&-Wb$r9+dYW&Cd5WG}xMKkE?$**_3(&k#*TTF1XS zvDIS*Y4!9Fb#$2C$sVcJV~4VQY+K(qXnRWFBYW6wq(M>Ql081!aB5Ol zDIn&Ae6lH_ct@o2VZR)Hs;%LYIQ7$wd(Sk*Y|kZ68OTaG`=YjHK!eb7DzdYp6GAMj zG;heCCOHKx=7;&qliW9O#vj}+ld{f?=OpEtAGjpbR|MAU1b5c4XCJ<5aF;z66DTn$ zPU_@_{~D`4Y;{OpJA%ZSNoJ*t8V*%1lvO+85DC0m4qCZl6-@z1DxTPLIp7J&w`?gy z&TlpP;5Tz=saC!KpUsrCi=sS-GFWlq!4+{E`=T1BZr;s5Co-Or#%k7`wrU}L!H|we z!Ti~_gr&KF_}$I0#qW_OBMUJUr{RR9$B}q{!qn}UnU^}56RS-oQ7K}n-pDpK!Po05 z&kS=s4_bAz6xEG+4fy>$RBmZq6p+Pz?58znT3Qt1z~69okJcLI*wf`qPS$wF!}o(V z-@+A2cpbyZE;-tnvUPH@+1!_@er{yl>(B#^0>8HA&{_R;*kHFoFXUXn{yuqmFDLl! zvqoP1EX`xbk1w?=QFS-AQJ@PzUwv6`DONL7u(qgmdczbNr)1focttG9ZfmKZuh<|r z{<1Lo317!yi&t^LpW1`Pq^4uj=VabCSk9%g2^jPYJ9hRr60CHW z{p}ECxlM>ljfPm4mg|qjCFA5PND*-dx;q4gr%DUWP@KBc_!hB0)O+txpv15;NszW( z-UM1hPVlZO1vSsLAMIMa(qNXvZJ^D-&b<=|y=&{gmim>CJD~|mBZo^n)uH3}(-n`{ zNKdMQ-JvR2Pz?&?jG#FtlGj1J4($Q7nny?0E#J%A;b>Yq&(4toviHxP6fbz9k3Var zEdk~!uOcM;xH9D^Oj%-(d&kJGM%ji~FDkrL^2i`9<42tGk@T(iQ{+G?Crtqat!ZW^ zVh?Ws<#Ubo#DGp}oS^HAf0gI5x^y|245r z&y73-X`>l|O(4k=Cqt>T!E9pJ$I3+Ya=ou^%JUke=-RFvwc!X3b6#Y)*>qPa?ThP%@VZk9(}v-o zJblr238=zz^8Mf0`(yvtG((R=8M&bElT8Yo-7n|x}QD|W?_yJMvmHh)goAXXCjn-r)a4HNWDWPV_#hR2QO|6vYP#2MihaXTk$!e8*v zJ<_q$AZ@cRT`HqfFO!D4`F?$_4Lued)I7e0a!Uh`eTcnAbKgTshK~4-&TP5>@1Pe~ zh?hArWCj(IdTlU4FqlRtI=3>KVOjT@i7kVA6wVw>3I7P~oJhD8bVe(YzqNz?JZ6q^ zdrmBbQ_6yKV3~yg&uz)?)bEP5p`~YhOJd&Rd;~P`3uY$qZv#E13Q(R;YJU0N`y;1& z`skN5+xVO^2*O0XS?#bV=`y!v_qaO%L%VuG$e-3=e)9a`STr1Ht&#i822;u7!dKUb zpr>iA8-Ik0CRdU_->uPKC%cTLX-WF+>zZJHb0q6&2yxu#)KBw(b((UV>1+xs*a^v!TzSoTOzv1xrDr^0!Rq?fSDEJw6GUi+~B-W3d{xE1UI~glzi{ifjrww)I@4 zI8chS9($jbLqhX4&b>Vyc9xwcB7;}FJ-L-6t7o|lqDk}Q)DwJ!ZEHWb4(Hcgqdgls z-EHxqJgM}F)s~W}RyZ?@?DXLWh=B)c ze%B%DxZzJM>jHmfi66$O4IXmIKF>-=Ch+MA$ z$c0b#hlSwcdKc@kRF^J^+b1)E=Dh0;`rQoj?ewmLJ%s$)>$J-B6q<+URJQ|8CZDug z7bUkq_|r4QDl8NNH$GcTtfhxnI~}31s8;wx#4Cs4R`DeX%H9~4LS5&`ZH+jc$q_!M zgS4B*wVF3g^cCJ-8zPGCd6J)!7WT|5U#A}qq}>ejB@9^t$i=J2{psj@k#o{f^utn9 zBM_c!i}dHor@mLTFox!YW>Mc<3b_-^O&`J;_&@n!VK&+FzMuK&Xo4=myfRT|=^Us4#KC`WYgu80*WzdM|YYHhmr|0K}Sp%1Uxoag#mhkFi zXdaujO8}nRr(Y&jXCUHrPq);~1i28Q_OoFejP4v~=5vNKRL{Q@OyJ{ZJjU?io(>X9 zhG`(-Ymg@iMB+P(E<|VLT2$GvS?WhlU{_=_ciLNFe2cTww zdla*Akum2aV*Y~+JAIG{%>9XxC_1P zdCYX z-=dId8LG+B?ibUQgK7_IpA|WEXASqYa06tzfUqFbALI?Jm^ZY425r?w=aLH-`{MN;ou_RS z)j0v-fkZjd742jVjtA22!FfhH7A>J+X7}m9)aB^&h1~RK={2fyQnM>dw$FEFf=nJ| zs-#qs8>PLc9s^ic%nU3m?M(LRwWFm;)_Y9{Xm=D_Wi9~(B56mwO-v(>vmqs%IJzcx zwoeJ+f6WV4Qs?obS6S#3^Nj?|V`Qr08{v#~P6hvOimN+Hnf9yxmbC=W93+0YD1%Lz zHZqn{YBhBiqT0Ea#E_2G}BA()m`rP zbFbi(bx?<6+~d@>$?7LiNCAb_GlU_M0RT zBU2ZBnr;RN-XrZbhRx@ArJTEOR`A8NP5Z<(kjvXzkIiWJ{j_^NxAve5SDJiR63p)N z`jsY0uGH9%6phzxZGJ!*H#t~L*1?fC&2u@1arydVvh3|r@!os0EzWo7)D zXY&fVCHn9B?K+JG*2GG=4)jbQ>bsYO%@>=BVtC&uY|Jvl6$-VlBm>aG68dnYSVy>U z<|E~McVc`--kfKyV_5i>Fu_qf{#KJrXT97vvCZF|W93$-RE z-ca!w$$-;rTKx6CRoW&?7Dn~1UM4w>?ia>~bBqgR*s3LmPd2K8O%awyWlhA*VU5y% z!vu<5Krz^^95apyg!R#Ey~$iC@wQyRTtNTR*fk4Yv(lvMHC%E$A!549vBCX{#iysr zS_!;WGL(Tr?ezO+TF^~E1mgMX*tPDon;``^?Z3S?&UUz3?-;wL>- zkk_77y%)y0|1w3^pcg~)1j&R_{AB60Udzvg*w0S(Fv$Z-p#r_i32Mox`k{l)Z zOzK*#dg`%a1)K&sS+m2i8&9ur;8Z~^3JnSH6m8i&lR0>0FIDoVg zQTAvPxjAWbqOUICIjIYt}+W7?9pe<)iH?o zj#k)MnG!W)z{!_$Wwc0wsu;X@kE9Ti({G-fva<1qbP>y(u;cHmWB`IdVp|qymgyX$ zzl!u!{j6X)b*ZBxm$?%OghMd^xt-(QLb?0X1HJQKaj9yC?iIgkwekyS@HSo@tZwyI zLtPPD%4K15oul9&U@F)0FK!3bWCrgZWj;UHWn!~b-Vdxby$wfxIKC808Vss|Nvs!#bmx6FWza_dh+@MW3BBK>UR zN6|Sv*JZuq1JyvNbC$;`>*j0)sxK8?6q>mX&wpe3%B=$gek=+dQe}_Ze14WLHz?&w z<4KotO#)C{PgZ&^2F=|}pJ=Qt$NpQW?#pZqoF>toE!rjnP%~D_-oo}E0GtjR9{r7K zJ`4DN50st_0w|p5SGeu}Kw+=w5-|d@ezL z*LXS%2o*DrU_V&0eY$zq{uK|nl6FR+-m<}(mtVq>&`9(DqQ0t90ToabLg z#Ac^R3npq;7ew+79r7^&!-lqoHZ2Bgexmnb`e^Z1%iT|E?~6V%5Uy1)jj^WPvT(T1 z1B^utSzZGf@b7>a@#xhs%n1MYK<-4ib)#F%u`jcQw9N;4z)jE2v=KJZ?;5^9afdaf z7lGCRZD!;*NEzdxb9sh)YEMUaBP)#P$1m6zS0UE)BZjQ7+_=975&zE-_m|_20LmC? zbhlz+YI<7K;!hO42E7z_Rofh99eYE`B}LGNpfs3RX^>kShkDRpjV{BZ*8{6@aKdz# z1Y1QRP#=XGm}kc;1~X?DyXN#xcko_2#Y=>!(?u&8!Q z5O)c_enDAj1;%pY9uO~>ZVW0j{j^h1>%Q&zJ#>8^UME6zcP?NB!1Xf~0Qr8uww+rp zN}c`Mr{L*+nv_Gqm-^8GVbCW{HL(7eEU+y4uk8!EtfjQq2pshsI17I7Q~U$jc8Z&kiM_ae@lI9_I^+Z)?P+KTQ{L zHwNoFogm@1EgXS~NQi5e>CwU$gE6cp*I9p+jn)`fYubXnXYR8yZm5!4H9o7pY*i+9 z2phG&Eh(wV4DeXDT?qH%a}4(dDzMZC(;k;wdljuu{SZp-N!I`Z`STtN%&&6nhol%i z45P~cw)Q8wz9K{G-KB0wW3D(5)#TrGXBdN1YyFY1@pls=X+c$9XQ2iklULuhJXA;~s~L8f!4 zq(^JXtS}G(sN;31ZB>T+S-i%tx!=rk*Dl8zv|ebcP_gN)4{QhrVVozkS&v;-V^z)i z_>XQ_+DS=>rSPXZpV9UVFAs`$M0n%x+npBu+M`xb6IJ+|YG$7#Vi~6v+yD&T`y=XO zk;O`v{b2cx zd?*Rev;4eiG46b32eG2tNCur=@Uw+0(tZ z&=sX5fEKJ=-VA+yco1bBQXSbpmz9v+415^iy00kjwoyHCJXDwQN-vG1sL#^lB z1C9x}wU~9|9oztwkTZ*MM{mvAA1Il=qK7lKftPRBBtqM1Ipj%2nK;cVzqsGRSEX5$pp9cHJ z&#_(n`QeDbQP#liRhe`t$Vbzl0<{OEp8_0pN%Zs=--9cn7T#Ev>znD(K*DHI#Be}2 z0M?Y6r4%riG;CPVf9pEPOE@Z3EVWeTdWPhF;}Q~~du>`aKyz51+cyYn%~yD+gX}em zvlN~cIQ;G2h8q+c1vu}X{nv|vwmU|0&*tB+s2Rd9j{QrmV4&Wy^;);tNku!fN#0Dp zxh8a!72+b~_|#EvQ$sB^Iguf6JBO2rdS{kQ%(LBhv<)lywLbBjQ_XjpaQCLKDwbq>A8NFhu^!aKPt#3pN;tFUvL z#G7MJi+|tGlwziSLXu$>Q9uG&$FiPq>-y1*L-ILx#n94niv~3yUS)FTN|!Ju6lC|a z85d+p>*QZiaeG}0p0(zX`E;9}^Fm`**)Lr6%iiW_NEp&dbtVfOU(9Z3?!6Uo8b4Ce z;J#Y&yTkzVa-<4>Jwe*t;o7bmgtB9sy1Ticb2Fgpn*8$Y21c$oQ%Cu5Q9Y;)E}-@P z?n{dX0k{Hq!ux#=66F*tYSUc1HKB0aX|1sY`^#&1om4SISdY3Q$_qvx3Z7!-k|dGS zu83Ijr%O5CMMiq9lGC>q@^TmRSuiQCIi=$5EtGscs`vAQ?^-pA%}~Fv-qfY3q?L1j{Rp zFN0g&C6}yB)xqNgp+~`Q%Z8=AtWUf%^YfZUjruKVrc+Zc+!x=!lfonn^(0@F~GOQ^!2F0H{ex6Ext!@6ys9OtW?>oqy-x9RXnFrVbt zltWo+FMl`Ow6(HomWtn6+VG*^Uo|?TQTY>B0YD+OyRaSSqS{`5K{x5nM>?)-a`ULI z#o~v4=Wl&qkm2)NH3c@*hhXplKi?Q5R}S1tESLL#?N* z$-g$)2zxSpc!x6MrmUcfqyeO5Q)@}d%s!?5YPN9hu~|s>?q)|PTR8GMF#M`(TfID= zC?`FZhg~B4rPLA6)~95cT3}{9Nn&DU=6#7k2JI1thiRD`?1Y)WDvZ?~I@3&eR?)9c zS(}9%`xrg~ByOMZ1uOV$B?XP4#rSb_uJYyZcdFW5jh*t5&qoVe#QN;59XNM)6A1-A z33VF0RbpJ2_EnjQ1;Q9WJj|HR1TRK$#uWU^1KmAZ^|RxnuZ?JMT;$hv3zfD6 zch*8Lx0s|_70N{Jk(N^Qtd3HE5RV1)M_d2s7DZ)RF9tKa_Rx%C7xp21q-BrBqidZad9ZL#$f8k)_6{c-YLQ6 zShD+QS_qf&^DM@=%!?&^r6ZD@w!yaFW`6eoGaa{rUGMh^IIt5o!TW=fMU2Lj6nSrV z>Rv+EfsG4|<-py3q$?uvVzYAzNSA@+E8@P)D%hzwO(qJ-;d$k};(kyoc6b;_ovKIt#1d*uZJpFp4yNhJ zg&w^24Tz|SQ|1!A+>@F>M5R0R{P+t94|~!ivd`U+)V5vwrH;yM85&a-^13L$Ga6R_ zpv-Cr=ARma4viRSDhu+&XolqZCVFc7LcTy(c#IXV?NE%k`gZB%rD-I;=F4M2eE<{w z`0=CU&0R)pk;SqRsC zxLtg4X9oV|bGKiOrO@nJA~KpCrR|IL6>JmO$YEb{aZ&^{&&e)8=vBHWe21N&KSGb9p2FXc~Nn z{%xM{KOOo0D^a2|rbe8Ybt|vUeyEU44!r*yG49}-LGOoBr$J7GW0TET$G=REF0bb$jKqaY4;i$o5Kuc7axv-u)@e8Zkm_lz6peW_DL> z$Zs0ayXN)diW1=y3am5;_pBbQ3sE!*QPI$#?8U_Kr?pcy?0_r^CF#f~uvtgNegA=< z#nBS0l;YmApj^Z<;S@3IO+a(~`^Pb03zGnSbV$wFRJf>3Mv|nvA&X!ZC_+X6)jE^7{??RIjTPR>Lxt%Y0CcjUL)w+1+`2z20%u5f1<5 zG}%V{JW1>5H0VyS18K&b`O*K}1~I6%Q218iQaiPKOcOtPC~S1k;DH=m<%)=%K%A+F zPsbiv*$$@o@a$p&txd-IkzTeim6=m&x{lHoy!ndYdSd2TJ!E17Gm;|qJ zM5D1w6phS1fx)cA6(~Y-u!dqj*qw_YCZyWAITb0nSNxWs9P#NTbe-FgcRgw}zSw9t zolkJ)i&~5cWcf9{sVLApi$}jN85~DbN8K@*&ZyXN>h{+5ToWs-I}0sT$Sp|}rz+yf zv2CaN8&O3IE+TiQC!rh5dUnD}M_(6Q9<|zSZ!gbKOa#*eGUWzyrW@>}W|7JSic!7p zN?#S+06w)lC`Q>bq>vy7p1bm{Fz;>X{9X=O5tW7_)*hsiLSwg}D@|TXmFHHB(nI3vIvEI)gLlqQZ%-)fF6LszEbsFuab4Ss_ku_Uit zZB?1xan%{@T!f@P2p*`7 zK5|V`)){d_%iMSCfz7Izy`Z# z3CjV8AOledUz(x{d=6(FN5#ox-kJa#;Bjh{AJi;Cn|6w;TD$Oeiw)Wl5wN^7n00n@ zFUKUO6L(0tRl3lzZdC^p1$3E&Y)691E0`J)@1V89-e&v#m!5GrYTQbBRrDzfH5Hyow!0y59&7f=D60%w3o3??dC3VeL8X-^vYW^LMh5VYA?rI`rkDYO7oRkchW-dgA4Zr`x^2?eG`f1bzVl^|4Y$sqXn z&+cVPbK3U$9O9k>#tSx3o0WoAqoC1>5F^eOR1PVnA~H~EJFYFIZe?d%<)kfq(;X1+ z4k{Xn=4qud)7DjP87NxMS#s#xsld7GN>+j?Ki&#Zw~t^>5U740%r5XXCo5tDVjZi1A~Bg(_YK$DiI5WC4vAK~B-*(4-%=RwSKP^$oU zPwwcNoh~LIC)mphT~XJ%L%pvOFm+2+#p<%oWm)HKYy4AUZvCx|gH~!Ddn;_XWXX>c z;e~OWa~y7C@tVt}eUH5OU~evV1viAIL@R@ui5F)z4r4!CX)h};*OT{*IEzuC`|_*7 zgE&|y3nOBqdNKudvVj_MeUwy^2P3+7RjF9PtYN`M!+?OP7}L4*j{R;Y{&3dAiiA_a zDtLEg(xL)YR`!*wvX-Iw`<{gcnIqBy>qocYYP7C>Z-=~ock-*AquY6i z*6q9};<08`ubP%tOxDoBie)HWeMLXEdLiWgJv9`w1ZTM)^hjGLar=aI<22QjTSXsZ! z6^%r(4z90OE&qsho!@z=9zSwcih5r%o7%RhCH`oQnVCQMFrO)^K2nN3!=0KZP)d1o z5@aAD=MRAm$jp?awLXTe!I;-5@rNp#&2g~GHMGGB)eLi+T|LB(QR4^NReew1b&jaT z);x=xu4pAH?oHLBXr{gt&K1cPlal57Vt@grjBYjOHn%rp<}ku@#E#6!)i%yV(#Hi9 z%9Y$S9Fn@M3>$%$Zr_+&y9agdceB-KbK6ESQ8F5C{<5fm5j*R{-#bDM{}D%EW!5Nam;gdsSP>}UbMlwqpES0 zeH8Wwu=Y$ujec6Dox{^uH8kS%z>{P|?6!4~-4pPKppc=!AmlU&8|>B}10qKG+7ZtZ zp$LIa@2GBYA-f{1a)(RNqiCHc2sFEv<<2^;%5kB&;%vUqit;_oW41NKqK+n<@tQy}J0 zKZGHtP-($CNjOUWA!>rCQgH!2rJ9W-A_{Mcmcm=N>mGAl9q(604^^}Z_gp3@;{v8|+Vsq-gcMh+wyae0{i z!sLU;cBhoYGJ!j2=in!6KP|(b_SVyM^>Qdb(rgl4?ViAhj?=~5pt=jNfAXf^t%h8< zJm`q_T9)`Fa}oCS6Eas^BBX+R_=#Pegs{Cn%&1Q>YFL;&&+w%H!wJ<05S>=o_CjK#N=t#O#P8D>ckU8U@Z?cMKX z>(aSRKnkuq@R6F6yt0@8n1m}xYkVCKF2#)>8C(cyUq-z8PF9?5RT978(7L(bKzweK47b>oX#ok-&-q+qjQ(H1C#^(wprrzuU%`rYj-FF?^^Bl zGHW6MPbMu-ii{Nz+0J(~Ojq+Z52hR{*B-A@_K2{TU|I#S&Eg4oC~n0Cz>0NaYVGZ< z54R_ym=3*1+HLk{Qs+1?S9uW^sxfi0UO$l(uhoE0N5z_^x%glAlxLb>-0XPN2=m2z z08R~p4E5Luigj2e55^KaSDO6e5!>jgm#!pX>tY#hOKMr2(TmZ0D+;U`GZ z29Q29KA>rsXMUN|3%W#G;7*@l^f0;ORbSel%OJ4f`I95H{rCmwL0hAW+h0!=1M~e+ zLpqy&#`B7Y{(0}e_t~&Dh7g{DFVTOSXM8zY7%4*!doetF^#yx=>q$QSm_gvQu4~NS zqpX`7jWTrl&hRY!%L+8W;y*7a4+%{9uRFEZ?HV4@GsX=1Us!sM1E2pB^0ogW@&7;E z&h}q{2`!q>%JZ}=W%gU_>d3dfOVxjkF)t8jPoIAOJ@SuE>7o|je`L(%g9{kR*YABF z$Hj0>h|oUV+!(4FP2hJn)=H6%XKXg$rFV$p>B=VS=gPdM`Z-MTeJdt=Hdhax4jD4( zW(uXttfFUbmuQ&jpC|@%<&GSO1x3W|&tiOn+1pqw+WA z$!N?*&%(^B6{Y{Qqbhtv|2yRWLu=LlDi!8_eB~^hE*^9S63!D@Eel=4j>gR+3ob*O z3O+!RW^w~}MK0i|RYt%?)_YE<=}b!MPrYev&8`#RN=Sq86`;+kQx)gEd zjZoU>LiKcQKPSNG@|Y>G=6G5NO7YbBiq)uVH3j-%*byvZmM$W6;iC5s4f~d7x8oI` zam|2Kb>PYwscb-yqy@M$)&PeGJ%QYJ7Vs=VQ8XhiBqdA4Vkas+0x#*J2^q*#qN|wt z29qShcs1Xv0=g(KF8YMsZNJC;*BzAqN}wBXGmYTu#HtsNX4UrFw=@tYmy#h+_ z-r0}UflDVzSF^L>EZwClNA-XVe&E#CD0cB4Ku*bLl=pf|Gp6UF_}sR-m~rC^X%E9r zx&(w_f+r=;w~8)kSRHxoNt$x z8piScWRh1)26g^XY`R;~_R%-yZ1|%YyEHj-EqB@o{yF)VPmT48{2Cm01bCQA(H@>f zXD?MacyQQ=rJ6~$h9yrzuV?~NPgru*nJ2Y$CsUtNV6$G!8X&Ih1u~CiW83;JwQC#k z3%V9?OdVS)pwQC-d}VsqW|93?4n%~KV%+rTfDOFhA%5TjwSwZfDuI`n?fa^((brU$ zO95;=i}PPQCPwhJGi4BR%ssJ?8xpWrb@bIpKN+OqB4~RjmFcC?8^FNFualrDlq}}i z@0S7$Q6j(It?a3Gp3Q|*-Rx4R~By> z0@O%Wd=Hc}@0!W`hp01i%kd`*rzA&|nWy{{rqR=SBC6uzZ@{E>lelx;A)`(fGVvrs zwnQ9V*;GJ0rS9BJebO4Df`|u#%R+hXWjmiIE7e)9fi}-ZvWw?Pms1)G{J0%E#o89% zD`?J&PDQ!d1O(dD_=hRZ?rOOoc{=(u(N-i3sLzYcRv&Q?GV0wR1753Rg8iGTdw^}r zpnwz%q{Q5Jf;IO3=uq$$)oDR`J;{QCEi=E_fd0E+cOXd=Q>k~H(OZ@kO^Aw{bQIGGj%-ywho z`%{W*!J8vXE;&<8;J7mDm^>=}Ky8Xjt$0sBwhIGU zL7t_uiDdPNxC~|&G3#jS@q^{tW`%)Ow_Fr?cvMRti$Q)4<|w&xH4uM!H zGt@keA)&ggrW``6W1$a1Ev0qn5^Cv>2fk~(qcv`(zUfCt^HH4euBq+ox`Fs}YgE;P*G^@|`F9KrDM{!r(jvaz9q2l9QUTa(&>2N9fqHK_3|DH84JO9-O(NM$p zV(D*F7(p884}`8dE<8$%(*o4CoZsK0vowYxd{S+{aP>0paD)Q~oJO<~60vSP026aC71YW|^t8%JA;jv*69SUZ{Lsu_} zw-!NW_hTAnC$)b@uk<$q32g3{n(Nx;u2VJ`*A$=W(2{xJkIUJ%=e zxqyb(L`T)`;hX|kg-r9BFA18dYnzpezIkV!76&K2qB z3OZ|FN&NCfb#JF-59`WT8~JJH7_5a$$l5SN*8S=I<;0lW)W_?#u~!oW+UI6Wi7d55 znX$v+TvxX(PX!A=*O|`FrpKY=o?%dFON}rsKPs@$s1V4JX)A`!LD@6O)+oSeB4-vS@7K+HKaI9Un%Ndr<>TuH8?U3$2;r5pa&k_Er6z^;K80H zR%XPn!E@Fk6mUph5_xa%+^N^$uY-{_`HPx36@?bA9HI`(qjp;_0nShTYPbJWly2a~gAB@PyJIKgHJm;~%SPIty~@TOM` zn0qIm%s_>}(wL3NIX~V+A!IqrqYufqORTG+&4jE0#oW?L@oM^EwjA{o+c_#_-B~my z$DdUy=-@O|S3o~Y65o}nP&}D(XmTx}BhimL8B&Dcf5`9{|D5ddo1!cub)Th zKJbpcSY~uXfBfq!I_w}EcJc<)lPCVN4#l(siD|%IGm)PC1DF;JFB1P|5&u6|oBlUf z&~V>quMA(pMClDpLjZc%a@xtSBG*POP3W!Mcq$*b_!`JUXN1$S+5d8W^HM-R z_dht4PqxdKFW;Hi0GTVjrbd0K|FIr-{x|{H!vFdWVA~l0?oJW05tG|WNg^QIy!sG_jFdY=L%U1q#Kh7>L{Kf3Dv$+GYjP?F_w&~_Bkc@$gm zyr1(7G18SkSc8OrVd*E@nlqMIHa;@k87-I4Fnf1;x4xXmVzrH)*e`;+472D z70Aa+Vvj8zBll)Sz|%*Uq3t?fXk z;2jEo?JxC055L9b;e`ZIt1By;+5$dJ){^ppc})I*okC(E@*-c-TwAkFj$+edI{$fI zy;V~eoAJ`eN!b3Jg!^x{GToSe>?Hz+Bu`Fg~^K=z;UM0{4-+>CESgT@ocv zK1K)4&Bv)EpCIty-2ptf6|HasACds9U=H>1vmwkZHwsEQfu}qAs=%mhUG-Ygx`few z=+D}KL$y;%;N~|2wL@53Z=!*siw8MH%<85}pB?XY&>^3Az)>#UTf-$}Y&b`p);V*j z3*6J-zMXY$jTWnL+DvA2HILd~A3_zg*zEvS2jFtCsJ4Rcgs$&{kGc+;q;&V})%UxH z;^sh0byp0C3ef{E;*=vVJ+0d@>L?YDn#CV%l}&E_1lu@cqa$51**at1v*#ct7#shh z{oB(G#23-I_xgSBllFGwx3{9e7QrqH9gN9VSfg9ad2?;y@`Zr_QI6aQ6%LYV-Xv%!(2*Xvg)3renY67=T!*&5%V!9IUl-63Px|`_y z_inqMCMd3=aCj8Qc+wVGGCGBtA5>1QaUJZQe;MNZlYL=v+&jC#da_H#Ge8~Nm%bR; z95;J+u{U*E==bzPq@^||Iw*&H53jP!gCe2x~_sq8XAVNGq@+DAdG2x37{Kq(@cR7J+c6OKVd;?E3K((tnQJDz2)R0mVD=}Wf_A^H$lu^$K~Jxw8gY}9BY9t z=ddW*66EhEeeRmuT|=zGJZ_>{X9n{l{Avaq&by~j_&5*A{6W3({e@|FL{N6WSkw=F zE*}&Y!Jb!mwoaxOZ2@vYyeFe!X_p!E*TAfIiwqb>K)eop6`D0K z6~W4`?kGAhW$JR+1Yn9LRmZ`~mQZ{9MVQ$-PA%O6e{ue{N#>x0Cy_0cKIb7}1Mw!K^cF+A&P4E-N>=Px5rkH&I z1qKzWuBS0~e-h2RE8_WEWA4;m?K0!PZrPxG%0I|Qe=E%Ift^*!b77mW!;uw9x z37D`#S7n!TM!;O(eN>QHRKTo)2t))KLD*7O1%FFE z;~dU@n}9xqN3sySw<_5W+#JvO4+>jX+y@4sP_@whXu8Cf4CINZYPnsB`b}f)(b*d6 z2S>apsz&C)*4W77Ot~_6EMmL*qO|W%M8$*FpPB|G27&~zQbB+dZRm3bvxtauwk^(< zc$URq^J&D9*n{-OBAA0vDL8+0MzGzt)$33K=2!|-1lUxA956S$jizL)VMEK3icr5@*|x z@vBvoK$;$#bD>P%y3Cl7IOEQVNfY~olb9(9(LT(isDM=o$H+zFY-`h+WQF#bqPfbT z%WVBJpW4S7>l@?mb$Dcw7;kPSbk|8O7D3Y}pFUz%9*Bqu6L| z+Q1*`g?23*LF6FYnCCK9c!$%dvd|6-gXLi-MtP@C-C0&$VL305&{8cmsLH*;;85Fz z$hV!@V?|<9(T|pH4nL*@Ncxgi%ia?U`W}{O3k9Bt9{*cA8$OltgKONA8}>GUC6|#l z*+pm9*w(*<>tk09D+?mi1JksWb~~FDRx-yJef9K}pv(9A@3eOPe$i#yze%_B0Eth( zwo|p#PdsFwGTW%ul6LcRpgn8QKwwX> z@)!>hw9O{H$9M5)6sBqB%7EwJO+AVJF^fTQY*t`-=lI7wknOd6 z`n;Xy(O6>gBEGM7_q~q$;TjXc#OAskmzL9B?9ruzQ^O~Dp4iP*v5&PT6qz(7_7(f( z1d2}w_*JgM=WgQEDC^Ap`YboB3-#8lh@|1a16IU8mH{+=uEe zpRU@$&-wYy_e-VSeIiD&=TlV^hDYVN)r@=WC>-kS3t?!^iJQHFD_1W zE)w3>6B!&O?e;LuRsRdk0W2-28HYP_sU$gDvXEF5}D@_UyO5b3Ft9*TeGuJAL zpK8mcEnFNAlIria{~8vg^Z{4mm+vq$5y7oF(qO-V)*3n)_JZ+BA&R4mA!8VA5ix-3 zdQDGWjh(mG1G@Px3+}t)AVkopztWyaroos^b!(<#H$FYFIvRbPNhl2I;xklYRqfqF zw1HJxkf_?;D%r5fxGubJZ?k!aqxIi$2tzwhlUfxX`MV6!ACLXc3CX@;mIHz)2Gg$_ zm*Tm#?ZN(B)>ess@??@KY_?~~CRqGeiRWaeR`VHNTXJ~vDP8&1#f9NidrKJFB-@DB zZnbw%*FzI+_31F(BbH$IvF1C?JBuXy<;*p9=`#h+>Feb%^r%Y?P7@)K#ewF*1Z-Cb zlqAc$X$5a?F!9>ty{lT>Y(aU@_1mC7SDs~|T9v6LPCMd?3*KGbiM%Kq*kH~VC1J+s zF^I#}mL{NzOIB-lVTg#{xkj05M1!iID&sXql_u@cPvxBtnq#yM!P~qx2fEBZH+9*^ zi|Ml9OJl)>kfuDLe(t1q$Ido?UpiZ>S$Q)5d#g*W6l>H(aR!L_H9tSz&=GGRm=AQE z^MLo~0sA{Pmfz=O*1D=QB^2%=kG;-!Xqk~&9!rl4xIv@cZw>5%lpIhjaP*cq0G#Qm zLgTZIvhI!_Tun6d7<=Mv)hg?0#VS{8f3UlsVIWZA#nuk_M#9QQoLJB|fyo|n=y@pz z%w$1U!LN*XGRNT5@}tS>ZWM&yzZUuQ>o{7rnGzoNuODuDf0>gLk(f<(JIPQw5yG-E zO-2eewYEoO9%KOun}!NBCGPy9E)A zX3ZKK1weHZ_|l7E{gj9Seheek;X!IER~b?-bnCJ~;i2sTH1m|Jo(kzxYE2LVd?L;f zYs2k+rP&WAiQzZY=8twGUo9Bl1Gn=K%|FmR`z1E*Z_^)!AN*jUL2LRJk@!)GzC6RK2>lWbXH1`NPM*bWj+OO&d(C=b9Ztip zx>>*MJ^Pe2v{AoC&**$kF68rFU_ie+$4fIOiRL9%E)yysthYC=J)Jv8vIY@ z1aB-pDnn|{$KA(`goxSo0+dm$z2Ghnry?uQ#?p9fBV1{}ChA(tKyzw(CAi@is7@e# zlqA;mrxYIh+YB=qKIZm7y*A+mmzQe&)xG&sG(#bGixd<=VQzM0A2E6@=}+e9KZV=B z-#mOswd}#U_crTS6A(>HATAt|qC}xgLe_;S@Q*o#e~Jycpom*-DYcg_GhNSO1_9do zJL`Z6$n*WTHnyu%Er6|uzarZ&mjLY5p;#sps4tGZW>@--2q0~(k~sG%NUm`Ua*x3Q zsHkfHOG50|x+%DNr*-qM#vy-4%e14cC{SoB-!0JP~rurp#B9a(yQaGM-~ZCi3@e{aa5d*G~t*b&6uOBpUk9 ze!ySy7yO9la${9y07(Xghyb1}*V+lpbL;2^FK~oll|c1E4eX zqKmX|tVbwcA0G0xwI>)WETYg6Y~6sLU2b`p&6?~`KOv@ju@i+IbhQx2wIN>7L6okB{ATd~r z5_69SX?-1}=}m=656|tCzUBLFw@a@tTzp_s=~7Dd>2PvM1aC#qI!4e2&0e)`)~FEC zj~DfG=^Kd6U!5*5B=wQ4v&>nG@>4wJD&_~44*-uHNUwa~>u$}QCQu&wt?CV_y|p%S z*lcc4Feg2b&~_a&rd8#RctnajsrmA>si0_I zXCrS{4R9UunOwcMW*oOG;!xokJVuB7phw+ZGF!>9W`T^B_Y>1ZaB03??+>s5HLW+r zg8lg!iI|uK@hdm(t2atL*lZEN7eXcP3n7EWuHG$Y>*@7mwrEU7-SU0NY2->v#45;ffq`T-bX5$>A$m#%k zBExv_txpX!f>uaj{t&BsvC|@p%fjgL8oTe2dYh|3T{N;bFV6~c^jCOC&dPBJ;~!?+ zS#GMp;;tMy#?YF$wtV&l^lN&io(g!tJ3_{E#+Xady%O0pxC(|m@bzs=gcLeE_ zP=Dk}7qZ4hK&VeCC^Ii~@Bp7|TZ#gYZb~)}NEgpeZh+Nfz)h7Q>Gy^w3|Cm4{6gRG;AVO`%RhF*y6Hge-ST`LB>rlrbSs)FlFBqY{m zr8H>inR(IXitv*4KQbys_z5baMtv73k`3In7}tekC8``h6~TUYJWN$?VX`94UywwM z$vP4yoa(AnQlxAy>xWu`_+01H@|gEpe`20Rt%JnRjgOdDAk2-?ifK0U$m=4V5i%0# zT#E^QmSANjy;YA+sZ+$RfD@f9od}s6P4Lg-?_=>&d$VQH6g2t z(VB%`zQNW_>w&p#gj}(=XkV8zJTnOrkP>E9?Swvb&4^M#bV|P7WHr%<}f~hNu%)Mw1l+stFl?CMs?L7GUL;xC~YK z1vW26lhzgIY)($~3dx!Mw;kE8l_P5~D$(cZHiNJg`XR^)?aqahD$#zQh;x3#0B3m_!i zHfEI__H)$F19^Meq`e6wE0ip!$Q4o zZ}J%?G(!_b>$i*zGb7N zBrKIHA2r}-LM$+P04ZrFL^XRELzbTDy+Kt2bL<$9a52dtVQVsO&L6_vWHf%05Y(Q; zIx?dycYCqMbPfce*XFO}i}w2@&-dGkR|N1vjHcFR#y_QavPxC5>({o-1W>gnD>-0S zXR#^y@sZh$VC9>81BP~a9Lo)bWII$`5Ba!LBnDgZOc0xeZ`d@>QLs=s-B=j*F^gRA z%hu%e)T$m5wb@#iu)Xzte&g);@=u%gI7`0|R!j*B4mGX6{YyM&wh;HX0}_poJrriq zb5huOmo>SR_oR}UPTpT>5#$nsuENM2)yLc7NA*{~Gh4lX$s1HNP@7rJF#UTdLS4-~ zQ9f^to_o?B!g*?9|wmwu%-cet&u8nB@YBa(n z3b!YKG+~KY1Wia$w3*w9P1A`n=q-NmK=6%~|D&v0d^&4Wu64Dwwco1X~&SH1CX*JB?J9yxM^A+E)a0>mN) z{8I#{5r3)1vdp)MqMs~VUoltB*Bp_3e2J>|tvu|mOu)|LFv}z96jIiI2>z_jj=yH$*#HgH6B)ewfX-P zpY`_`^smMX|MvJ(qT7GjUpQLjJ6X7&i9DZ{pr%FX#@wa&Y z?{fNoB%_K$ZOKZ)fXKoDV4y|g4Xq!!Te$CqkL+Po^}35+$M%pY-@k|XEug85umxeKbxpMeevS{5X}qfpd_4lr0t!8g``TXUN0PvJCVeNG#sRB1tN zG2)e6&)IE8z8;UdNGQ^JOmmbAI+tWJdAOb6K}rXtmQHg2qou)=AWCu4Bgv_s{X+j| z;qKS`bpCY62hP_AYx}$r9?_D{PN&a7E-G&QG%UP3=9J@UD-@?qOMl^)xBx{^qc@Uy z>1Ua;72%V=e%gmUCe;Ltjz}`Bja3370eoJ+xnZl^;uJqC$Lwl@rxucn-s zSdX3QE3l^E0b_y}0Cp^0fV^Sal)#yoYpw{DMB6xpdotfv(9?Q67B1QG%l1}=0CHT4!c%m*hdVlM(H##iF7Xh;oFY_`R0 zyhaO*N`r!WIMSUy!%kD;j)*+X&b?;YQ_4Sd;Sm%KFHr>rF)U#}Cm_09%sT=Q;#d8S znZ5%!SgDp2m%bjNA!;4zH?WRsv6&@EjRcF&HrNepagQt3w3ch#Aht`67d5$a3$*4= z>;0bku7;NbIHPpca&@ejN4`~DN{8f&?x17^2jzxkPbCa(x9J$6WL7R62)o~gH{rK^ zMD;J(uq~H)5`-z;KkSK#xS{Q0`7I2ctseqdJ|wZ{bKjo7Uer1fxvw zNLW>$%X7RN{D~n@k$;cX)VIu_6NW{Sg?%ZtEiy z20N;ic+gQ&oAt#)&kIbduk^cg(RbNz)0&%UU@r0txJNI-lA^7K7ODMQ>D;dgS)A``3!#ZlUJf^J}ZD4LVP@@Kw_BMEkN z(|A$rG0Y8U;-$9jJ8!qj-ClWylvlswGwTkEpdSn|Ja+boOL!*6K;>`^{NU@^k*_xi zB|&i%kHbYmWfQS94shubWZl995_V&FmE3QIY}ihaQB}B+jbr=+iswb-4IcLNHfA~% zofF(KtNFcT;Wzyz&aSPs9V?lkcahiG#?k&Dap`PMZq!@Y_fCeK;WWstR` zLEYu|n)VuWv7hWybTSysL+`FlLsr$`bT-oiO*ESy>KmJZaKfCTBVma9eJnJrZW;*A zs^wlK$iWLK5t6|2i6cQu=Q*gK1@fJ3iHns)BN9+gMbi%yXbewd!dTpkO_Lve@YeFfT|>4?xFF-JsoUF!VEIR9xVp=+`{#?g&w9)q>AlT*!6~$O?PI! z*{X5+Ku%JE%RcWwMmzS**>&Y)>PBF_qQm`*{iZQFQc4+z`tR(F#q}b3hX4+*X{;rV z!l0viu3c4s>GFc?+^LoK2b2>-oGx{uJ_vg`3i3wr6vf_v*6Sn~d<6&Z<<-O>-i&V3 zKWo}pzKmi{r%kaK?uS!osba%F;`XG~{oj5MeULig#TM{WVBNdt7?fp+@k-SO)X6bJ zDD~-j+`du=(6AE8SA$l&Cv`)Uss(LUGB|02fAp=qMnK^98)J9~%2dBU0~jz8$mHO` zPXxXo&i-eh%Ya7PjG3BUJKq5`=tMGfDbw zmfHoX#a9{_{f-V2%`GLb_r)bHMhmJQ^L?K_&o|{V|4Jw22y23YMKk(9V_Oqit6c|F zf^+W0W?Gh=FOM~7^h)7T&~w(R^xKA_CLPg;9^2JkrO`1{vTEOfmKV_(M3Nu!sJAAO zB*VwjoMR*a|AsNuua^3>a~gwP8HL%9tMK~s5f|r!v7ShNQ$7c$d#N7}>|#@dwQpAW ziv9l8;XbZ2%8mv*@YjeR?kp5!r>DgRT%vi(dBnUJH3<^b#NISbBwd_08Yzy^H$(;9 zb1JAJKwP9!*LRcUnFm;%mu!bIhAbxMgf6AFb;x_5{*Ma5G^6EXu|tuK@LBwznFe3r!4-1^rGGrv@86e-t_l9-x%vfzLv+ zZwm;cDFz>24baY9C=eoGGP(WvJ)E-5k?TK$*Of@xYdBjI;58p zu>=jU+Rn{(BcD40f)QM^ySQokc)Jv2x(m+&E|psx?KSi%RiJMBOaK)h``0P;a`W_( zEj0)k^>cz&LZDAXRJL(*;-yUN8TCL}>yrE1_;(%S@iK+ap2eB^S8>kT_*((}HVhTi z!ICxd^<})9V7}7ok5Phm;q75D@%aai-S6{`Y6EKNTfPVh336tipjB2$pyC)m@`H&2 zz#B?9ch-6OF5CVhVXOVtBIW&j8H%tf`)~+RP(b_Vg)QN zt7-K_a&YG)BAtJbcV#gyXFDce4V3;YRy#~-BH^d26`RjJmMwoLTzS4U*SM&HHv{8a z|E$Wv={Te;Cda_^*tr}hTynmK-A3S)sXZA$2(JQdhD3WgGn&vnUdwWkQkUm(Gh*mQ zy8Z~)l|Nj~Jgn%iD)-QA zXG>14D!rL#y*AsW(pw6LIc=U3+A-VdD{3c3+$z7zoMR-7#&+p>uk2I+ELOsJij^ED z2KsbOoYZD<`?kD{+sL~OtJ)e}7dN7yU z&iO=kd%KRkCeg|Ne2<$|MxLBig~WKxcMN|>WYkyoLM?SD9$hP#nJ)-NVd%`?EBgPU z^N!sY>ej*bcT4`KdPaN`4!sTVzkukw-=@xYVb4by0kGaz01_B!_z8$kKLDet}n4)w1V-hbpA|07cL|8mCjzj^e3=+X8zTQa1Br>gF6IUp}e z`J$s4s5Y6e@?H-m&kDlgH zsmsDB$*pRLz9)?N|C)j@)WP1gHH@J3K&f};CKF7rCXvE1N^ zN7=I(l)xN4sDOa2z&?9C&D_rbqmp?UC*VJ$hy1Fc{qjC!pSt$k*uVC+QT@j8xVft* z&Iv~vBYFgRoLNjIb%kskZ)r9&wb%)&>Fhf-8nS_rS*uIdF6d;h*=wKkuVIJ9z8<%Y zj+$FiR(@1C|3lO8xZfu6&>H>jP zJLZIdi8Mg&zWcPkvHgaO?^5ss&q8|*?#V8$Xd2~3osCFf$m1SvmVal+CS-c8`7G8B zk#w#(SyRa7X!OByvF3X*$F(cEn{4qrzfkwn8U0$TjG9ijJW9<-H=o=xsJsQWuZmSVZ1bU8M z)~WCmd7j#xg8HyBU>t8znY)>xds9W&>TUNNDc$Fpa?E9CcOQ)^r)Fcm1a(l@aQHGd z^`??|U^!|O`}VVh+MApayg20DaKmTSEOV@@Wo(wp> zxjiu^LO?_O4;(;U5!P)^i>lw=Y5_znKvTq&K;x!=tNARnL|u+)c*W#6p5Yy*n;`Hu zNyewc(>zsqe(TB2qSQNXzYm8y4{bzhk1JlM%j`|_{JQCUs{UKZx!Z7CE7b4PIL9~4Kg3@Eb}j|=;Cs=80z zf1aQiIFjpEgJW~@=m2&Vq8+2=2l)g}1*N?GXW#dd<1CW4T%ORWsp#X_wYaUW*M!im z9Id5p9BY_lIK9MIMtv779;ndsP>|%mh0jH1t_7?d$!^E9sLX1h+_S0-3U7M9U^;(^ z={o1S7E2rr#nS6F^#kDUz$N%|b`amZx8Rk-1i>nO?|c6-t6Yu*Je{@ z>$F_Va}(Wf$lQez0aJw)%F_~=+TPX@tVlFdI?>g>(d4gCAB0uT1+i>a?%4&xFuUv8mDhV3~iHVW-K|J)S; zeB33W3!w4AA1!Jn{&!ctD@UxW{d;u4Sy54q2+5Iw5|XBQm{Yx#a=9tJeP<~8f=ky% z2c7%S1j^t3Xzg1Evj3*q-5pK7Q~R9o4ez*~RX72;&*=En*m_JG^ajx82j=<5- zYJuX}?Z@vECm4oyn9iLWXJaZKudq+KFT8AvqRu_~%7>Y9+-oz(eq7j+he<3WGP>5} o!Dpz4K7LG8>rZGGI3Br6jkJiX59`7-P+qI4rl*>B{r2Pk16tg_#sB~S literal 0 HcmV?d00001 diff --git a/providers/ibm/mq/docs/index.rst b/providers/ibm/mq/docs/index.rst new file mode 100644 index 0000000000000..2a2e68c1066ab --- /dev/null +++ b/providers/ibm/mq/docs/index.rst @@ -0,0 +1,136 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +``apache-airflow-providers-ibm-mq`` +=================================== + + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Basics + + Home + Changelog + Security + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: References + + Connections + Python API <_api/airflow/providers/ibm/mq/index> + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: System tests + + System Tests <_api/tests/system/ibm/mq/index> + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Resources + + PyPI Repository + Example Dags + Installing from sources + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Guides + + Connection types + Message Queues + +.. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME! + + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Commits + + Detailed list of commits + + +apache-airflow-providers-ibm-mq package +------------------------------------------------------ + +`IBM MQ `__ + + +Release: 0.1.0 + +Provider package +---------------- + +This package is for the ``ibm.mq`` provider. +All classes for this package are included in the ``airflow.providers.ibm.mq`` python package. + +Installation +------------ + +You can install this package on top of an existing Airflow installation via +``pip install apache-airflow-providers-ibm-mq``. +For the minimum Airflow version supported, see ``Requirements`` below. + +Requirements +------------ + +The minimum Apache Airflow version supported by this provider distribution is ``2.11.0``. + +========================================== ====================================== +PIP package Version required +========================================== ====================================== +``apache-airflow`` ``>=2.11.0`` +``apache-airflow-providers-common-compat`` ``>=1.12.0`` +``asgiref`` ``>=2.3.0; python_version < "3.14"`` +``asgiref`` ``>=3.11.1; python_version >= "3.14"`` +========================================== ====================================== + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified provider distributions in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install apache-airflow-providers-ibm-mq[common.compat] + + +======================================================================================================================== ==================== +Dependent package Extra +======================================================================================================================== ==================== +`apache-airflow-providers-common-compat `_ ``common.compat`` +`apache-airflow-providers-common-messaging `_ ``common.messaging`` +======================================================================================================================== ==================== + +Downloading official packages +----------------------------- + +You can download officially released packages and verify their checksums and signatures from the +`Official Apache Download site `_ + +* `The apache-airflow-providers-ibm-mq 0.1.0 sdist package `_ (`asc `__, `sha512 `__) +* `The apache-airflow-providers-ibm-mq 0.1.0 wheel package `_ (`asc `__, `sha512 `__) diff --git a/providers/ibm/mq/docs/installing-providers-from-sources.rst b/providers/ibm/mq/docs/installing-providers-from-sources.rst new file mode 100644 index 0000000000000..fdbb17d017579 --- /dev/null +++ b/providers/ibm/mq/docs/installing-providers-from-sources.rst @@ -0,0 +1,18 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: /../../../../devel-common/src/sphinx_exts/includes/installing-providers-from-sources.rst diff --git a/providers/ibm/mq/docs/integration-logos/ibm-mq.png b/providers/ibm/mq/docs/integration-logos/ibm-mq.png new file mode 100644 index 0000000000000000000000000000000000000000..e5c1a0820df2dd91886bb497b65caeec891e1829 GIT binary patch literal 67759 zcmV*OKw-a$P)004R>004l5008;`004mK004C`008P>0026e000+ooVrmw0004i zP)t-s0002__w>%$>Bi36*W>c!= zrn1rQ@%Z}s`m?;vp|#bdwb!My)t|D}pR&`Uv(})m(W|=CrnS-X_WY)~)t$B0qO;kb zv)HA$+nci1o3+!Kt<$Q$=KcHr^Y!)k_xk+*|C_SWsJGew{r~^}{H?#+`Tqa>{r~^}|MvR&`~CU-{rvp@{Qv#@{`>lopt=73`1||z z_WJd})9}B~<+8)#kEF$ho4p;m37UU#En ze6w10t6X`kT6VHrda`7Lw_SX)f0MUddbV7Awp)9&S$VQnaia10`dWImS$wlte70J8 zxLL^@6h z#QFdL00(qQO+^Rl0|N>>6Ptx>N&o0}ETWFBt?y0WEZ@w({@rsE5UpKoZPkt^ts)|X+#lzjbMINc z`|eIlpXrmuY&PTXmVV2vNdIlhqeqW!d+6@9mX};~DyU)^ZC2bUzfsd9!IFP_i8e|ZTkN^9d5@vv*;Gn;vP}Goqxe#~5w>^0G zSL_awqz4Aru5csV2>X%33;I`deC(gx3vpg011vf^w+DuRKF4j3-u;z+iX0da7(w8# zCFt6tucCg-`JqQMpDEWnx9f}W0qJ>6?;E15{kJ`Q_f;(BQ9=N%#q;m_oL?a$SQw_~ zJ06IUuOv=GFERYb9anr3uKD=OpE~_XTn_hb58yo&Gx3*FE7Y*>bcDu z8j&SNiHEh6PY{cFXhigSCcI&>-ew<;fTg3jLKZQJXm~F_qb54x&UXRD4Ur?+I zJ}kEl(%0qG$>94u`c>j#p2apN%h5DNSZ;e5@0(0nU9^+s`3a=#JYA0vU-!4>mCN$s zx#)`?AJn;@tDmp<`}0qn;p;egj-e^aI(^#%dA~$5Q#8Z!zA<)!H?Ai5s^25QI}km` zW%&x0{6|~-%%_}<@c!enXVc@v_jvSPFDEji4+`&^@`#{aG&|#EeJnQC2M2}0?=g1Y z!_D^`y}lhjz6ed{$L0I4&z*3+W3ZRE^VK8clL=Rc)w%PDvqKNA#kFJ-)ExtA4w$<< z&%Ad%pV8;p&SNk8moRtV1^KwWhff#eig`KC$>A)*zW>92|6YtgnY)Xh0K7%ZvjRgA4+`%irJu5)X%+~;mk^ew zJe@HZe2crQcTIJWaD2!$<#QF{7nXPO2`8?H-gNI`k;Ze4I;L)WK<|l|2>LO>UBw-b ze6~yAfeN^I40gO&JtXS7+}&~T-r|kLo|1mv7r1kJK;t2W_pnX-QPOX<(VTCbhoyL! zX?ym{t|>Z4?1kg42YC(g=yGjLKXtq0X7^R_nO}qxor-`o9$~XS1Vnv=*v$lTDmYAy z@Oj+vAH#(AqP%k_&hr5OBw(nq@_S z^BETqT~v(R3<8ApqHmusI$vGTy7waQ7rTK*env+w{J`_w-@YUN0b}sd*%HP>Igb12 zzhHD@pvxV4Nw`b&h@NpLC&G2>b;q2vjd)Oa9~9Y2j}G_*j=yBiu=+o|Fan3ZdJi9P z=M^SgXOPC;j_d3wr_LkaweR*8O&u}zMJ|3ne*~+~=Wp#Z81p<}an)`|SI%wBOJ--+ z{2XDxjC1OFJOj+Yz`zKH3|Vp3ZHguH)1Br`cQB#InTd7TJG z&L;>y<}poBHVTHHx+LUV_ZTo@935}TRrh_2SRK}jVm4dx@sVOO;eY0J+8N}Hivtv4d6;S6Cl+Fe9Kx$m*gj^Q$l^ltEX`2F zs0l9g@NmGm;N1hua9}uykFLO7Ai>Mxc84xY7>V*>neREe62Wr*bJURVCDHFyRs79( z?D_rCw;l6Ik{%M75w2$?N@SfQ zvot|k1Bwz-6u8tMz(XI81tfkX3ztSNdSUeR@q_P|M?I<eiwdREC!E#`*9y5y(f>p;P;?d2x64xe0Ynj$1C&tMD8(3m>+g6-Yq8dku?_G zjYUv_<;MkamLzO|ohmKP_hO^ILx2mreG*IblEm#G@gA`!uq4qT`D2#_2ti+x7?sY? zNc<8UiR3~QA6^`U~OZa#)xnzg$iC&2(5^^G* zL~lY>A6ZG-H7ibdIjB!%Q6i5!h%6wGMPv$z~Ns=05fmvh7LcyH+#8~F9@c=IPjk@ppm^Gv3Ka83yP~p zxyc`p?m5TwqgV9v_xB$Dizn@uS61P)V+&Dtzl|7WkpGV?6HZ`_RT+K#n4u~2gKP0F zF(b>sIngoIO0D=@Q7JYVKK!-8QC-h?> ziH`-aAVAY0!RNmuuf|uAl8pM@$Vwp)cd$!w9=C(nyak4$thYV9cUAhW#A7f>@cz#E z`YrnO5;l`MycmncCHq=uv|^GzV&~(AC(C4KVD-U;-|G_7|3XdbcXy@x`y~}rb>$N+t1gD{XKrWp7M|jd#is4Bks~1_7(`aUw z%mn2USbck^!=1o-yj~AaQ_thbopT|G9b<>~rU5bN9(GCDeHb6pa=7edlk7O!3YY5BwQZ28%HWc$MLuCwTz$i=(Sk-K;pvYovo-Cv~A&(}keU0#)=YRF8`A}5MC8Vi`C zaBiJs<>KK0>cQiI5JSHfU8lI9@SLPo@tbhZ$==tnU=lbW3 z?>ikS>}Gef1@wOxZWYc2J*bEtZ%F5_^Y4^w=Rx`-ua;5=f6PK5TgZMuEPJrK%sAR;^ai&$UZ}ey724Ny-Qjj7}L2B$uAHa9PUl7vs}-3YqPcNeRbRS7j9x-Uu_ogTjM#P<__|nRgQt9^ZiNA>{qCSO|1|W)OEX-*?I` zc)+#b7?;hYD0VcBtnDky>7CtOG{)#lcjR>jc6U1iUFjSKCtWdqU)QCA{^P$d*5Q}M zI{vPI#nKt*YC}KjP5r1gTlhF=>qqBxYlr%w3%;$NpMD2DWvLxqlof@IhIg04ezv{Q}QSOW){)Wc2 z`}j+LT`-QfrHKy%u*1&1S`*GpYwDuGfu(y}|K4m~`uoG9-s9*99@ECpr2k#>3h7XH zi2k*UzwTtxbHVUpGM<#8ArncAGR(LUOYQ7(=j8<2^{`EQQB1_B$a`R8ksTO-tO~E! z=bRl8krPG+RQE!1O%AJ}jqSq83k@!Lv37(ZL4t{x_js3J zWrw~4icRBt=l0d$={M``kxd!k=}f-OdaQQ>-pE<9pqBx=+CvALra%I-S-9c?&7@?(ktq=e)Y}f4%3g;PMUJlA*kw2@fZ_Y~}}__FPPaOGG|bCw^x@-`jCAL*$Sn zph~es!X9-8H!_9O!{&<@=xWznVEO|Wt$M9vM|Gri;ixt*9Ps;k@9*^y?Qn(%7JlE6 zddI#icX=vbINqjBpVvFm%ZyH*est#DuKlQguXZTw@_*5aK&OwjCd}37Y|%BS)emYr zxvd`~3i^B|7Sv%gfo2Z@>ZW*l!Si~YB0WTz9+JyC^2kgAci@8QTRZqF`8aWQh_FbD zi!pmF<_?Xfv*jaDcLzw@VVBeJ8gybh4x(YlS1aHuoVy8C{YUic&hHI7zgpCfV&|*= z+jGJARp7v{S2zB)!$rq`G44MX{rRzC?z#7}IzD$)%f)lQ;XPtJLsx7z>P!Y=?rSIh*(aA+oQI-MhWpsX(pm7*+zQ>|UvH)IgI<6|a&QgLrwpqk3twee3s^q~}*<P(F3pIMh zk;fGI&_K$Nek(=O#%zG88yp-PL;8*xC8>VZF5BaaN+`&@dc9LD9W{`$1B2|X@whQ* zTsW$&YfAO<`pvr(-1*emdt5`hD~m`K7qh8NOGJ(J`PU?M*t$$T=Epn}zKL;B&|_%P zRD$#J2FM2sPZzt1pj@2T;f`h5!S81#CMG=M5NTGY{dRjyRU#&&?q0vFl}?K~b=q}C z=vvral1ujDM{xIkd5gMhpWj99&@-S{EoQgUwn!-UXwsjMLMD=;S)X@o+&jS<+(BKp z0SJ(yDf2@>6bQm@i01Sh6hXJ{kh=4FIfiibhojSzv3VsDc$Uume0*~H<+=OP^10l- z^-NXp!HeZ=>bY5oPRIO`q`)?J@ z>zO4W~_MUNv+!nk9d6s!agxpc&(Mn;(Vk1Yz z@d@7;J2PN#*Xb`XLXr<5??_Da2qOv{U^gfXFwB6Hh4=|x7$vR4Qq(?amyo)9jOyBQ&vSdGjTgY!KH>$A5`t|17 z@-^jabJyjs@blJkU|5YVEQD1hK$4_Ogk+1si`?a=I+7InG~h$@5iFkNaG>b!o`Tb z@tMKFL0lfHTe)IZq)Bt)wg>)}#YBn>3$tw3z8e@|5qCU8IZTq>?q6C%Ud6LiwzhlF z(rS%z?e^!{yTx7A*sDD`$Y)cVfk-$Si!DmZzLg}07@iY!?gX~Wos)4<|Mo&X{pKO3 z2zhtsITiE(D|+}5nlOi?*z}S-{~K~DwvSH^VKcl~zVrUX*W&Pee-2~!;zjFtFPkPp zVO5pli;Jqsv1}7r4$UI$U?)QlF0pT%A&`al?_NtW)8O6r7HvmYM|65dhHQ>NcyUd( zC*~E)=61evf`c%QiVxgf=B|t*Q(hb#yewr>TP8)_U!0d?(_u43x;R*bXZ3b5I(KL; z5k=R(+_kV+T`rgb*dFj6`&f<^!cZR$3*8_d` z;>CXscJqa7`dL6x)nt4v>F;--iB{k*b|Ck9JtEHyQ2%PzBH^$heRqjFc3dEaY!n%e zs>!M3q8hQJw(_T~_F?z};6}A-mgR z1RChzwN73f)Z07d%=1W4QP<=(e|Q+;QfQq!r?V6B?&LX66h~;&zjmgbHoGXA!<#oe z)6>arz(o?KY3WfcKCgx%>znD!PVxA(*#;Z>?%Q`?Pdb5BI(P>M|2Zht&a!DT68bGE z_r=0t3qcKB<_?$TW|)y7()@4jO=~7?1c|%~r=A9Y15uE^^Rc5O<(~FGO2i&1O7NHM zy?p+(j^n+xufW~gLGX8T9i_uvOP|^GIOImVu$>A-)U`f&Y0@3G5-xgBcVqR~0H?<( zI-w;E=OS$XQib=(;d<;EpgkhG7P!zNkBwlKrCo$c^-uc~sv3z{Q~8~Isdj?%ThJwb z;O;VaE#R(Lg~<3rtyajSiC|<=URkv-g!@T~f^?Nm?Ev7;iHoVCum94xlqhcY$?2US zaGgMCW;rt^Jf=yTTblOA*47lovhj1iTqtR$O@q4!`|fQqc!wCQaxt4t{TPYN$5&Tl zZq-bX;JBd+%<5{-<1{jAowM}6)3q=W^c*9O3r62SC&xgE)Ug?s7$q#yg57VoFDfA- zRVbHt>$SrN?%pjudbV6F;gEMFz9P$Zbr^XTuomCltAL}n3aIW_Vk$Nq2`;BHg`IM_ zuC*Vydyn*RSKcjUbI&cxe!Oqh9}X)vnnBnNh@CDfoyrxw$k4Wb->yYJ;?Bu?IDv(3 zb$A2E*e82TniW;@$L;${z?9Br(IsuvtDRJxhjo;zq=&m|>9mkbA=g4(eX=wiQ%%I^ z05K={*lzxcv5tp)IcoG@R|C{S(yXXAFYI(lghWO`U;rdKNiTP&2c^Q^=DMQVm-?3c(~6aX)noQNkD$ZdIE@tEzpUu7$wCU;1bXwh-5bM0 zXRYxG-}tydlNPrckIO5`{mAI6>@g0f)wH?+-UD}6NiTQx1Fcv}Z$v^-P$scz5&@=VM%i`CsK)$Y#PN@Ot+g zBtrjezLU48$4M}Zjc^2{Br@*i)lhIFmCe6AKEip*5Bl!Ca=yA$E+00v_Gzt@er^hd zlWQw0suDDlBn^9G9;g@4?Mur$J?Q$={~m?cZz61ueLQwwyWH`}+;G`Qv%1h1Pp&DE zh&7#lecEWCf7%0gACL>&wO<~#Ps^ECkAopeezTfXBojf}Mnot)E1dVGWqmvcnYA|S zzho^%GLtqMntAkchkOcxv5}79sQgA&7u8T8wUw#uYDZ06nycE$(>|}#@F3^X%U!K~ zQa{)&=3l@1F%nYc)%dU^nF(@8zs}uE7M>LaPT*N)$o4N;OX(*l7sEp#R9?V!c=T;D zC*dMVOLSp%C84THgxJdN9%zk5`yyx$++8IXxYM*+<)E^=m-!_aQCC*vgai$wbne*m zQuzQKZd6PBJDPD%n@D;;Llr;)l_P?eA=wY7WjpVpyXFtbw8=UF{Z z`9UsAFL&q))u2MuZYljZ5|UTsCHq3ujFTGq^9~KrDO9hRGJ<@$e?!q@6WF-pdQm{9 z9_@*^L$@g#Qg^W_`AsYw3N5F$^9TPuFnrm1MW1IMxVu_bW*$M{L z)ulvYK^jI151V)AX?w`e5Cx?FIEE(v1&hbiR+{5{!Z?tIcgPrFke!+EI%f&O91BMm z(;S$@C=K|e-SRR-uA zWW_o0-;hmf8e+I{VPY${t^c-OXkRsO{F@NO9gcc`(JtlE%fV1M7F$auLjiJRM0ASd&{Uc? z`bXgkv`$fcZ2Q*ZQW6$|5k*!QJLisrc~)Q<(h*W6yB!xZN~bc}O6%Z7tMS0yb*0&W zY>Q^A{l8Kv{rqVt98M(Hl3~R>D*Cu_Pq$+p=sOm-MdFCjZ$0GQL{gjxYe?QMcjLYp zc7}aSQvG3RN|x6;G+upU9HvKfjF&(GM;0`If!2y~uD|#=>V8r}9oynG=g-u<1h(Ecm z=oxU$7Y8qnPxIOB5#(CPZ)AB&G68nJUgjX&k%g@yhPGKJZySy;C}wI%gnUK3WNGAT zkBtc|q>u$vyDYCHBqg|+N@dzOfvl$8gSr%VPB3>ZMBZtww7q2ssqwx(8QIMaa+X8j z4PK~R;l=$MMri6AUQ6jWQyj-|>;>*7#%E|3NtvbS)XLgw(jBs<(%HgkyLs3=)asSb zABKOQKx#mhR1q*2tS$438JKR?ViIWj+-|*tG!$xyX-pO`j z{q=e%gEU20hoyx+IX)#Rmd#WqQ$)};A?~j75dZWE0^BuETJ2h~lufNG`*Pos-#_Ix zlRwQN>lzjlp!OSUu!q58zTLGHlXY~~cWz*G=2>B2fTT=OcTARLNe$8I%yyxS)ZJmD zQP(OD3hxsH*Tg%~O68++Hnjl_^ZfS7X|$`gOfxAx;u<&X%tzAAUl%avt{(|2>>6cWq?Wb8N zA0V(j-0AZzC_MXZ6Y@pGW_F1p=XG|qozC6N07WF}(!cy#E*+h;ahKKl z19zVym^)3Y)ym~UHoFywxFtWdpZSda#DwQiV+Kof4eF$BpA2i;%ZsDLsNBhMgp5mxLT@_0U3E`CxGgfC0w%vLa>%+D{XQaIqCe?p&r zQEzC0`_u@7z}+}8J4AeIYbgl_>4Lty{Dc>FWns;HW{^YvJVz7epgQ@aZ^u8oO z*Y!M}hxOu5l*`<;58J1iv@4?grb_cu=pv62Bb>-N&$%1K+s zHa`47-oSu9&-%t0`tdi`K%y6;|yE%iZTg|L|L;hM3s=0un{Z- z0*wvgPJ~9b&$!5PjEnw;Tnku30ql(W*+!-|(swq(W>OXseR5(k5}8TmO8H9V!LPVc zfed6#D`r#A0?4(n`zKYU-$BqU)RG?Kd$?m~I941HzoBhf2`abq-iZk>%QGWJol`F- ziZ+Tg2i>vwn-w`6F>URYcS^gJYV~2By+LUl!4h7vkWE>G5mok2#)cKkvW*eO5O-tW z4|Y2xu|7@^IcAnLOScWo4N3np#c+tau?d0I+jr;@5H5;zSd?%wF5B%&C-s_zT>@ei`{qB$quh6SJb| zo#@nm0=FHK>3lYlw9KpXzs=A8rYNK7Y~khc@&6vUyJ5ji^BS!ewY}GABA|pNc{Q%8 z{Wg+;b~Qb*P@QBeW`z1C3iViRBR~BvB84}GxdZH+Xqt73Ho`F+UQEu|)hdgv5gIAkU%nqhm&Lv@8CW+`j*wyF7SF`0+NS#j1- zcd&GfTqocTO|uro9b1LM?ZKarx;v=U4x5+jjy=eyNjIXRSS;X1@LIK)O>YL3VS8c< zDuLKY+hb9Dn>&^jM`-fiE4)cF%`oCD4sY)+jw35VU}(x5RActll{F<|N@og12;Aw$ zpFGU7pC>)s6|ur=9-QVgDKe-m#HJ-xoi`DrixGvlvEy0aENQy$3eRk#8K=m5I@t02 zjQ(C}!WsxgV{haYSqX0B&I;P8b^xu&^y~J(-RH?UcTjNwT#L%_X(^SmM5Jgex~PUi z4uY5+5z#EvlT8bG+^mn8wJqOsg*Rm;8J6LkPTtv3b`X_1SwP~Uyl%DAtBcQ5LHJX&vej70~ z>zv`odMiX>?gR$87WY}Zq{eB|W=8g{ycYEjhpo@kuV2FcSOcm*J=7ih zGSWcYX|;S|C!4kf6=h9cNld9Git4q4@LrO@3VK)MxqF*j1{t%Wz+U2RhUI9|+OH(n zR^oR5WZ;){wsg`2?lcV+n{IoAzBQ?1zXEZ0c+@VI^V#j^mXNx(BFoX}u$3TP!XW!L zcN4sG=)Q)$KQfcdoR9BN$LJ1|ATqPG&7`XFwbhmIf_pTzmp^!MfVrzyY7g9f3DGxc z8;v@eQnK62!F~0OyefsG7825}&btd3S&c~Fc{zfZLII= z59=sjLW+jMtJf=)%FgyCQh000xD-|c4gz{23mos!y8(B)B6}yC98KIug=ZzXS>GQX z&-pwnLVp-1Ns(r^G`+I&CK(BtQmK5o);z#%W~&cX$G(sh4e*-i3LpIZ`jsQ3sM2C= zS_+#f%0)A?ybnSmy-wUaN-v4Wi=(!CsPOth-H9IL7#X5RxI1Yy%l1cOP(u5+NMJdg z!JF3Y_EEWvtKB`!vtLBcx%+P=n|dAy?n`SiX}Qv|aem|;X46ht z$vKf2_xSiOcOt`~sTNsxW+iNwmsTYu^fZ+#7aH2(i3Z#~=({f`<+7%43DxVzdzmy5 z35Jovb4yVRg?tNIBKVvcUrkxthEc7^p+{&mKGP0YY{dRq>ZLITy2&Y z7^k3@DD;S)5eSjIU)!|(W}0y!1?S|Li}Nf)P$nfTtuD!aw?brdy0v>yK@NUdtG1Bq zb=%|gt;u=)0O)K{EFT?}^4avWpfap3#-jFczk{S`7blF38F>hLo`q)|nMgEA-7D9^ zZk;1&WrhTVZq2gJUz;1EirHFvEM$Gz)7F_p9i!X?d1oSb^?z{?oZb z;EAFu5Rue1d1X2rdX~z*JpLba3pi+1&=dgs*Eh;tx%$xFtlB8++|?l_54da8ilt0? zGoU__l54SOIOtfWM(G*QckBRztMo9u)8pYiBMjxD?or)}gmp;ZSOL8Sr1H+)3Wjkl zlP2WBCs$YGaOlT$rf_upvY}PATJ_`m^5~yk_~e3)eA^SzD-L!NTJB%}TD`8buA4gV zp+~xpYJN(lF@DC{?oK|FCKct8obZREQ4>Xs5`$x2k4In!XnmfAvRzKj#Zc5eD%4}9 z#|bZ#t4G|i9ep<DV0?*Rf(-N|EA^7PUrN{mq+!=^~zP?HBkQ8@M zkBh&)&TSzL5VucHB86urh{xYgfI}+`3=DR-0`xiCB+#CXb0}P{Q@{2 z17&z3VKtUql|m87n5}7tZLMCdVU>4n(aF0c=>CfFTb;c7#@)xrB?zx2wPH=vPLWT$ zj129#e=@2n7K)$*T+|(n*g1C`!ft@3?oIgsGewKzKE1jcUJ}yZi8wMum;*{Qx)`&E zgTbxLURgUmY~!u5Pm42t7kI`mb?$DPk7c@VIyWNA*uiEji*{v;WNRk%(vElH( znWP5<4-&SsSBs}vCo?dBoDS=41NCt-Y^4TRVFFqa>D*zz+$qiukv2;>9JAYz6BXG= z9n^fe^73loa;;Ff^_cH4C$S8Kjr<^58Ab6yH?DkH-f=% zEIJ(Vn7l~*)?E_#SE--m44eq^L_K6VCi$MS+x=h!ljKV5Nj9)17T19e{sa!v4i?A~8Dq~q^?>Rel?z;!#U z<HL5`eIfTHQ@QimOqMVGLdCG|{+dXhC>{eoq^No*<4dU)=PF(o~Z~c*! zS&`OOCjpuuH@Vs}X0)jmFHwwJqRNAq2fR*$cDFS)B4UVBUHNha>hvpRQL zqyCB9?H<$`NZ(aUdzsX-GOS8(BvlQWNXj+q5qr2}oH)-}nDvcNj^W$J>VsmYW@kA8 zy0;JNE&p)8QQZuB4`kN7m68Egh^G`R^`=%J3^&%r8?Gj z*VjjgG-yk!TRvzAuI3IW6=>z+{nK~A6Y{ibT3Od`y*?7UuUysL_HwaStf9Bq*8l2S zd3d^_tQEG>n~_LJl_Ygh>Zb^Cmf>0N<&r5rju~*>`5YfBHAKup-?eU^Np^-qW-3jQ z*5SxvLXIb+A*}AIn7ixG6S(>@UNy#Fbfc$wZvPnOI8qzsPqMpjTM%^GQM+8O6^k|a zUnmqVEEiz_^Z7iwRYUKtj&)uA8r*fZKuWdzR(d_CD2r-LT9`JI#Hc8abGoX4{2nmO zv5hq1BJTJ$t%VvL5joB$u;)ShJjaXlD7hR|-BLWBR6~(vr0%M^b@vI&y0tp`u;?dtZVW*Klw>y_IR7;_0w{eSOzPA9QrtvE)HI<6)=*qzv$vopQU}Xy3ne+YRR-}Yg>oZs7g76~e9qEDV)Fj3?X z!$K|A3*3!O@E$1CbC;`aTZmagHI|Y3>b~<_ictnaIUpOt_NABv{VZB`|pS^xv%DjG^dADq5 zvZd1N)6??xuAn{x0C>jjQmHMaXyJG2=Kb|4^ z#gNyR1jeM4K|^weW-G06eXws;U{$HwKCPAi_!G+yfBw)F_zC{$^4DMR)9dRR68g>| z=I)?%kl#x!hk}ZIwNGA5V`{h1NCxXuz>N ziu8mqP75OAgzD$~pfQ>PLz(ZYg=eMaeB1=o6Y_d>({zv@gZ`2fVO7IYOj5(DV%f_6 zT-w<=KB>1_%}t+|d^pZ@+g(sz5;@iQ*+PFi?& z2jwEhyUw{9T`2>BZa|d}A zl}f(!b0$rO-1aw1{^_u4BIgEw8s|k$L}ng#Fm&$FfV#-hI~f6LBB=qv>mA3sPS8nCOK8rRmj3)dSWPLn z?_C{7s8d%oxH}i?@0~Y%&fV-B(s$YGMtkWyhd81DO$L>6F`G@TD^pz>Z}N?7SRdGfDO(iDspZePvv!o;-0!_0f>KBN z?)6y#l43uJJJ@lo)XIfSYB}Og$V+y=6t++V``9N!45(2VxHFCnL*K=Yd1i`6+(E@Y z9Xl3V256RjOqhdVd*8~6ya*e!NZGa9rwF;49(8wJ`wRKAh`XoVkAHWj?bzGcAex)| z%a_g9+PnA8yo3FUTJbV>$Y!-ZwLa|?;120KFeW}Ni3EqBRVtcR%%#@dYC=v#!&1nE z!zU4T*rr26+TqU0*lc$%n|9bi&oLsdcEB5-5}iASq%4wqQtp$dl*n>A51mo;5Mzj; zxW3t6&SwAl`)_pJk$uyhSV6FT2dYokgMax7g`M_2EYl5j0L%HG-{P)UbVq9Z2h80k zMpff0Q?J(#k9YS{o1yTUoCq&O!yyMr%)%O8hr5oM4mFPn;*PDQM9n0_h;XvVf7Q7Y z#(5vha|~e)Nt1S2UX3d2>20Vw*2J|vKQUIMc!u=dKjU-yy!tmr5F__kUuDpZp}F7x z_GJsTcJ1EnlVR?9_1*8Iqu6Qs+mNop`x};(U!Q$u%uN&7JK1d75mFbUF=tyCbSM5kl+sbm0IOe0VZ&*&E1;ZF464(WHwwV1M!qUiDZ z#(BWbsdo#QVP_eFrc7bT9kEMF==WF1`8xdcjAJb5D*y8j?25q<{CU1t_BZIw)+OSd;B}MwU<5H z+dDhM|F+N0c6QEow$VMO?bFYVFKo6Cn@44&@Js{Zp%V>Y-deF5u`0 zhdV(CS~n)LR@>bvWV4y^&i)zlEB@Hd7q!Ft&)x68|L{X5dmi`M2|g~A3p*!T?Q{15 zn=hJ&)$&;;wGmW?7iHNVR{KecBp4pM66a;)AiPZwS(o+h>{^&98_jzWcOuJ*9qw2k zG6x8=TYVE#->8w$`sUC1e7$`s2kQEEf1`a=tkpVUw+41diiKPj8G28<-0kJ7)x&$& zzPoXmyHVgSU&zB+XHhQ+qX+WiD3lj3KKBJ*v=7^jLN=4K232)UUW%!169J22z})~v zb-{y9n#klM&F@>jReIvhU4&l#B6ry%)ICE6}pUUD>tpJD^@9KY=Si z4d{&*$Hvf%OWO!jSelmO^Pujwws($OHKU%r*2#tc1bqkXKWg1{dYn>v?yt@|${gaZP=xfmre0P-zjEmP)~of;^3kx=o5&i`PV%`F8BrIe z{QjxMxP>H#5O=-vETk{+ywk(Fh&xjB*i2INtT>LiV_~IH~Wmh-7Cb5NT0+x?4S)j^WtEH8?);s|5(Q(xQ1MU z2c}j53Y9`S*~G;6EKO1lMNwl(dsGRTo~IFayV^~1XMDfJ-Q2gr-Jee{a#y{~-Obb; z(z$Ck8)uoVl-WH!9kZ{+_Ej@6D~b&I@9E(V@4SnGi@XDs$%f4~nsbWdKAk%j={tcP z8=Dba6d6$6v6wwMtOl1;nH}U+95io!^gC5i4DLFM4}TimmG3Qgm-Jor(n`Qh)w?Pd z%eboBX+D+S7>JLG-ZY8w$}k+z#*g+cuj!9Mo+#>W|&u(+jYdh*B1M#S9#(s z;vS)Bp8d~3Wha-~2u9TT)fNA6G~gg;b^_aXEac(q+fa~+FY=6NzRj5ydD~zB^wx0^ zgc;;q5Dsf}I@%YDEhs@#DwEB(S}!2%uHO2x-2LzkargJv;SP3sVWI`@b~D+uHKP7z zk0&PmQ8Ph4_UMTVxQDRe(6Sz{$H`FCEvh>QJ=-xnu@Z(I3*=+aHp2EO8kOZ_R8fM@ zQrY~=V}04P{`vcfZ;w;2efJG<_fh)p^Dk$IJEZXHrF?dK!xB=YwRj?m6dp+%l{0Z& z{?1AY^h9Qu+lxz)9FPKXUX_B!l}YD(1EUmaR>SdCSyh#YHMO1HJvjKEKFvOG_u=>R z_4OS#@QzQk67*BCfom}(+a=XRkh6M`8>CP=`i`A}Jqw1R%(qqa*hIQS-#BYjVb=Td zuq;Op*eF{t8tY5WFRDslE0--)cK>_OY}6Xn%2(&EcyGD;O7mJk zL8SAXgv~sC3)uCW2yqsAROs9}A+G|I-+;?TSX4DGuO{b%q2;Yy0g9pOZiNPIP;Y#k z-pyUTevjwb59qs_TgSs!2w%cl^`NqYL*CuVxU4EFtfdGXCx|eo;e;)uY6fQpzZX2v zUf>q8X(!Eu$jq{OuK@IEa6;q)nrCO`NW!e_t9JXm8jP6I*}@mDZG9zob6xK4g&9RHUF_y_>Cuq7C@15pJ8U5c4hOk+`l$%QAUilH@E&niq;J8sK(j0}3tdqR z?pP=9S8jPM80fbGnM<&wfx`1T@eC~?+JH}o?Bm)yNl1*XdJ)HWK))q zB=yOQVRhJQqY+`^xYrnGrc-c9ob}Cq{RmLIl^Pl0M1#A|!Vcn&CMmPJpNuEhep4b( zUu8ZwulS?J?18(_dF_io6^qEFC4z1>DcdE<9k7u!C(iP)d^`x;u~1+R!UK45#P-!) z3llNq#MpJ)RG<$8hg=<+bSTPNa%D~04+gfjvW4OUcOUg0dT!k&cgSwcW>Z`1iaTac z#Gqcv+$bT6BCf%x?^@^zZ`Q|&6!n!OKp$Bt22Jr!e#FRI(v3?VxVu>?w+laK(i@6<3V|1u+#!N=ji7lJQWtdY^op9ihY^S9ue@tv zCK%D__3$h+(8-tePN1I{7nI?N%4;j};mA7jWlKj7+ znoa8Md+ zu#~XNQ_`Y|7)6c+|2;PA@P4hp4&qGn$EztS%Op;^PhO*s-yRF=cDtOM3R{UG4wCXg$Kf(_=iSQ$ zhBPI<5_Z#$(OGe#mpeSqvM!1wO;Jhq+vk-?#Fow!PnySvdduD~SI_@}yBn1p~LN2u)RHo&=)ub9WQv}VhxOo(BD4K5g z+hfA~bvowtTSpmD)C({7a!1>i9sOaXn;f6G_fOvqil*ie4m$MaCUe9ScVjvYZG* zv}ZP3-fi!`X!XvpUt;g#fx8<$1Hf(r5;1ua|Kd6?edQhxEP^s(IG zK`o!xkGdu9n(yMSTCNu1xNYvx8R1Os{~aF{UZi3 z<$FxHY~+{SrtPOjDJbd5^SC=Hgv3DkHXX;cUbiz?hcP%maQ0SP3W32?Kb|j5o3fp+1=p z+4dCiMQ7S+tBr6uM;Oj&SeAomn&pLoA)2PvmDu!?$=Kq);z*}6I|tRf!Cj@=wdz`} zE4h2u6MiA+uHgQTaR>9P)@&Uf*Yew2bVyxHtjcz`Y9fbbMX2e{>;F2{j|FkoM_3(S zvdX)e7$rt{oJ+|#*-pHtK+&YjraX%JpG;4y5zA)!^{@E@7~n4(@%eGw>GLbjh{sa- zfPDEgu2e6)T+gRdeti z=i*zrRqo0LcUryKuGWe>+2xSBzZPFjL{$q#xb$jdFwb_l6FD^fIz`tPtnzNAXmQ+$ zbu4FWRCxJ`ahh~c){r}9f8)1@6-z3e`L$F(FucewPToi9JL556#-n-neFw7aD_`_L zp982V+&#+r#Ab9~Z#0T`fIFm+wYqlLXls>B+N=!E%WveU8ZuJ@KJa7(Cxiz;<|<E?EEJ2vVsv7H)?W(e#%DyfgHCwmL9bPxCVZQ=hy>oS^ zeb8+vmy2KYxau__Xs295H`u0q&69 zgg&>`x`sZxH3VU;4!d?=)?dB8pt)buG>8K!WA^R@ch#b$Lw-^UI~oa z=mAy`dE5zJ=gx>u8KfV7nOzIJnV?zkxKZy`UpoX#Z)TPxP0C_2YEMk55eRO7skKiI zTCM6=a1B3(yShGoLgj!;1^E@)#Y1aeJifG`GlipB?Ht&nEf#l*wR*MIY}fAqcSwr* zDqf>}2HCV~II$wH{ic{~H0=_RSLwmr=>eE9`5~j4ojsWO;nq=kZ~Lc!TTQO6%_~ZPbh!v-MD&RI#0RN9x~1-uS}|MfaoI>VK5ZV4=d}h zN~gz7Xv))uU{5&hFOS}PxNB(D%5F*9DQ*842}bU^1UJ%|mu+ZVTD%L~X}}%w{@d-- zootF!)HNsp8dc1r6gkT{F?YCjMTa{d2SG_UJJTX{M|JZ#pe_qiubh+R7>0BNqEaFe zPpZM-^Hky2V_1zsS4T76!|iaV`xWJCt*Y&|3z@Vf5)3J-8dcS>a?28l^xlpr=*JMc z`CBl!byhkB?&`&2{^#?)$G0%gR?mYenuo`yr?R&Uu0Cy*+ z#n*_th?JBTCMOpp^(%{{UUt_xn{ zuwC5Cq?dyc<+rt@T~#b3IVuvcpoY2YW|wiSPvl%TTA2*bvKP4fevB3QS%$DV+~Ea# z!X67NrqtGU>G-7{$XLDcda|Fy9XboGUEW6ADf6pKe*ffBLP}l~=?hEDI1JHW((h0B zWqd0Zi~0QvvE*-&$kz7W!NH4WN8dfY8{D;#t-hB}rPo8sZ^^ZU>JFI+(uKI2xWpZ# zJ+K2e8JE&;rD)pOU76Oo<9*`n2;s0u!~VonGOP?kkJ+-;{F+ziL)?LtdDt#M{mO7s zmY0^6R#y^O(&TgkDl7yyvpWX||Eb@NdG?&U_RDG> zDZGt9L{X=b_HZa*Lu#l~sN|eGpTM!Ac#}CkCW0J-P*11cyG7@2!h>FfGKVH7?Xf9o zSg}5T^?LW$(wFH*{^{J++uEBB z|Kw!f(i3}3QkBs9FS)`&>zungqVG<$`fj_J&!y(jOgk_4N#T%*u)(IRv6kDdyot9F zT$I^ARYyX)Uc-$Zc9e>io)TT$TDAgo_ru6BAyqzOH%)|D4U|%Z0`r(|4M-d(e8(DCbf{BqY6& zm;8&Wg(O^%QHC^LH?PcN)Qw=M8*(j7%hY2ACoV!~6qaFx0Wj~p9+9vHqKUpGIgXr) z&Fw;|b#egNvS7@2{EFM>?)dbioZU`st_Kt)q$bzqt+<*BcuN<=FZ6rA}5H0gNSyHpe*5-eMy$56xf(;SN_vFIcXPRipBHn*V@N^ z_xc^;Znt$%IVhF4x6{u|!AKxDKcAeBugWVjGOmy#3EW{eZl1SZ?0Rmi^D#+YT3LCs z5|@)|2>BC1GL_2g?dErD<-J0=P%c(?5qGz_Pj-0p1;1FGGKGu#IA*zaJhWE$E(L%@jS!vB0D(4iZp3< zJ$Bu#bJwWVw0h_zSXyk@gqNTt_9RP?52-v3R&B%qfb|UL3TG zyIQq~=h?K~$x3%(7xz?!NJ?@&#W^pf*+8lO=V{&pa ztOgvUi}sD<_7RuF$I;f$S9lKMDdC$FdCqBMtvTVgAPkVCDWpm|BQ9_Co>w`6vrKKn5y67SvpDvwe>2ha0bN9*p zO9*&)y}yw4v?fw^LLw9E~T{u%enM3i-60 z_br77O_6Pm&vh+Krcq*+@o_$mH&ZRfkBcXsI#aGcxx3mOq?TwpuZ;`0er~ed}5pz%c zlTlR-S~t_VLc6$=$)-}NA9nKXlh#4~j?J^3rg_!lU$fiKEfKe>E~<(WG!x_~vcL3t z!#&j)J+r>Y)beL4yr`Lm#wd_4#B{h5J>w8+MVb_KYJOosjYNo4wyd>Zyl6J+$U3R$ ze#Kqj4o&#Am(}WN`PnafxxHMexVKZ z59qsM^|*GHwOIa0ZDn?T%^&aNbK4oh0#PBz&`YX|*suL^f@fA;V9gNeJ9{jqK0GMFyL0^V1!M`_301 zn7$2+xkEP+$><|AO;OLEr*day?JS2JZ5l@hLKUqO?XUss#+UmZ-=e-VN>&}59G@04 zd%uK&A!$*Ks!G3u7;zfv?wmU>Bsg-e&ntRtr8v%Mgc=yZ?TpCqUJpT8(JX6^sr!-0 zkLe8J?ti%E)>o-7@-f`~-id>%HjZ{)zcNLTK3U&LZ)ZxS+G#11PHoNwf~vZ2UznVX z`4`X>s7DEOV9|g2)QR;x>cH=x#QVVUI6PT=bybc=qe^i7St@mQmMQG*yiTW=ElMa9 zQ6kTCJH=KTO&Z0@t#j8SO}##(wyV{U|C5_j-23WUY#|ypkp!bBQtK()0^YL)GmPO} zr1{h9R`gqG!Q=H_=#wD~VD7B_>Y8j{h`OQbST1{9J-B1s{bg|1JS-J6uPn;GguF*& z$7j!KrPDniZ+ShaxTCRHA}UELY?*x-?u@5eO(u~oiS%AH9I_Cpbmpv7+RMGpyxK5_ z6t`Pd_9L6QLhVKS>OSwc$ekg(_0!|>>G9t4fU>Wu@o760>Vb(6XLp@*=Y&fC4D&gy z8XY!TfM?=+@^gj9G|P#!NtuevvLuBfkJFi4UOPPoJK#31$9uRdV(!{I*|a61uBmE7 z2|i9`bGhu<*B*YzZ&zdK|-R;k_Zwo97v`@>~G!=^Mt5U-6RxKn=4+w*x^Um!% z&gu0yMaD&1-+Ur=W-~P)8dbA<{1sx{MOXuhB=^ZvA;t0>0s@NlLoh$TM*XL2aEJ9> z>#&whnIaM_J4Hf4%kpMA_qwptE@Y9;BP|g$=<`V#2SiQ!F)=s5-9-Tla%mbplcYcd zKstpCll)Ew8Ft9Q2`S;IG(CmnN&DYj)pxaH?Pa@kTG~pPLaO>kj!j8HD?u{? z&bK+|&Iu*noy-8~aC}-e?UdE_*f$__x$9;wx(Lb?4#(cC%I;8Pd8@EjLf%BJu4npw zb@D!>?^>-A6meTaR#p;Cs7l1Nfo8eV5&Gh$=0|6SxXVMP^xfi4ug-^zsCMHB z4Q~NC-;t0izp=;M;eLvw2WT__vbff@VU2ll&2@V86I}}vF-r@NSqQL#;(B@o@3BGO zW0EijrD)>KN;0IFwsQG`wp(en>*YJg9ri0)FEljIDr>7NF!?P+6(wTbNbhCJ?X$Do z>s0FbdL;B{US547L%cb%{QgK4qEkUQ7>$8hqwcM zS8diR*uFE?Nn$ai@&ahGJIf*hkgb=3b!5ejjJ$+>0sR`sAb0(X&)hqc5q7ftW+@hX zgnZWZ&GhzOvDDt%&TOTAHwD9z8jHu0D=WY})^~ahraPwZ%D8TTcGzwlHX7}Gc6%$R zOs&XoViIiA(lp)EIf~IMNkFlGhO&K%!ZQ)foX488gOL^S(Z9?cX6K8 zxjV$mc-m3>EVJ!W9<9hLt80>KCMnxvMr1r)?gX&#kkaFjf^uAc@z`V*M?4}wIHQ-} z(YX^iVZcQ=g6i7RlfIY~3_ee1j+*U5*pKQ2F5DsRiuJly*J^pB??Os<+W`5L;b0If ztyBitrfrB9*dhYR&V#0Y2{a$C$WNAFg%>iuK4mRMf|r@Z=^HrQX;D!^0UBAA$gR1;`#U@BRP;>@j=sKyXSdP}EI!Z7=;<0ffX6Y+0CJE`in=DR_@|Tzk;?3V z$*E^k+&S(V=iEhf?sOY&L6Q`=68Ui>oq7H9&S|-vFJv~?5u575JD-emX$ z?qFipR{&R60O2=DH53Ux-FlVD7V^1YGpY0zg+rg`lX3roe?eZ+GcJ_-!CmvTT+F;$ zR*<8%8kg4+78^;@v!V#~Qk=cu`NjptcI}XN6G1znxABZ#Y)lWJcRIxp3d-LXkmNQpd4{ami$6!$K7cUIrcCym6#HGP$M$sP+22ZPpB3RzX9U1%_! zUN(ioZp1#&rN|KdXWX>=*kK7V3=q9 z9er24``lHVhbM*Cspk>pQ9K@BTZr~M2+B39*GoC)4!N`wA}iA7>&>)gigxi%&jeI2 zhGZU=ALD$^5fO1`c86njD2EbRN4~7qu2(e}-LF<3;M&~n>fFhOy%mpx&9%CMz>`9u zV1U?6XEWK;5_T|cw`9F8V?HZZOy`py{AW>BI+QIz1^bMykmqEkpQb!qBQ$x1bb0QFlL=Z9hcCCE<{R(|QHl9Q_WCY^aH{jU|J3INzHd3fc z2zEzgdm@3T!-v6Lavt>E9klN*b9ZvsF752(HUbfKURq2bg=ZyfE>SQ3$vT-%6v>3w zgFFk0x`x7&%p~c8jW<{Tgd{S4W@60eaS|g#geeNkpz{205omRK zf7iG>%;ykyQa90NVIkp%^^HEeoLGpdQ6;brY-IE0v$IkryQI!cGM*b}scQl`j|anN050^Yvg*jZRPF^xb5l zvozhy-JREWz+J1|{9o%}XM4+}1l@>*s5ERw7T(MS?r<3k9_cL6_VHETr_2OBEAWHg zL%A`CF=mbR7#m6TPc9@^<)owphF)cMc8}}jYP|;W0C#krt#tef^_-C2IN(@pA-3R_ zR13M4%I3U?^f zKUqnK6kS8tHlc=0Te+R%lUlo0(|0MpT0NAH2&sWyHYL} zz*EKn0Fz5kKq(bJVg)9bT}Kk$rKERBvnEWnVg(7xI={Wa)<9tV(wy)Ne>RQx*rKX-FTJB z6?RI+Qa-gM1|mTS;ISu`WMo+OaEGfy-81eqbCHm=W|yN%#6%*OmJ@yB ztZc!UA=AT|K-KJwiB!qhY)JMndXtV>_EiuYd5LgL}taZv_d! zPO@Xo6_vtDFfh8go!i;l)oR7;UTR}K5(+IQ^}k8zt8zMHMm8+mPC_k3oUoHXE^u<{ zkpjh?a;4Lwc77+DL-;BogzvrtYbJ?aV(+KE(+*m-me#8N{CfMjMOEfk+ zY+6pGGVPb`Mmd{J|MFuH5}l(6Ff^khCpy{T?kfAPd=I#*A863VS1avgx0aE@TUlLQ zRB>JBf&R^v$}Y}rcZ%e8j%r2V3G z54r1E0)YrSIlcsIZpd{AOK8rp{PJr1b!q1mHZangBm}A;m%tBo^ClM%Wlzwv7zJcy z`3aQlgwT^!ScO)C!63Pj&SlO{U$)D8xvkMaC@d``WqaR}&f2-YySLm`v?^{Poky;P zNm;bZt5cF1Fq6bAhvwOt%iMWHanxo_d;q(sgJ494^FR$fwiCJRh01p}(h@~R%BsAk zgnmlrN@ZM_;-IBL*SUK&&tBjz26h?JRZB}t@{&IqQ_-ww*?5)9X0m(PYz{dE)=(s< zNQ*H!A$Q!1PVp1WogZ_zwk9coh_b$!-pgkSXJ^NSOzIgd6x*lz5_>M*hW%ljGg()J8~&%PW0Q-r7czvz+)gcV1rnh#H_~68RBEkf)*V1SbRp3=!rq zB>g4V6vdRz<+Z~@y>oi?R-)TKkGu1=Uc8=yw;0eo3N~6|N`grE=V-3X zR#)S_>vcW!U*K-%K5$p5;Gn41e~w>fGihrCo5zb%)I<`avp)9id3IbBUBm~rX&+gs zx47duafBJNIr=3uVW7+%N28V`ppP4~&G$*4OVpq{7Dr>k5m z|C-rKF9$-fsM2R&da{&QiYKHP?hTMk$V;-{Zns0nxKJc8fc_*~EapoH*5{)^CA28N zSwYSQvJ3qN~!u1$9?RGNZBa28jY>RrlgSN`BrvEJJG9VG`|(@Vi-F~N=*Cx)6+|n?r6w_JcRtt z&Q3m`+e)R^k@cn|?RawxmS51}$MGcb`QIXupyij;b|!-!40G&;Nl_jlU!!kzMMhIH z;%-6etk=DVyL+*Y(uu`8`0sx!#cU=O2%yiSyqX9PTS?mG5-xM+;hj7&FiJY!jotL} z+^o@+pp%8m^J4;sd|9GD9FyX)N0K`P8KAZ1i$fUe>iy@gBYHEPL<06;m!A0T3t=U= zuIG4X3)x&I4Z9AC5{*Wo1prusb~%=eMO9UapeYt|cD4(J9Yo!x6>_oVrPY-sWca~z zBaIl7q;G>eeV#)G){BFaQX!lAMN#(GR##T0R0~PEXv4lcUpZzNjuRQz`yxOsF5sp@6s(3Moc+G=b)xJ}5lq z@2y%Pe=FPpcn7;$@hp?FM#9p{%4!_8X^B}vc#Av6$vGLCG)=z?JIC)(K-oA-lg$_N z`8?F}LFB0mQUbUBk>iQzq7qb08?Q32_wqZ%()O#&pb7aCZYhRM!8oEg(Ocx_o8b<9 z*IKQ^M!5{zw9+avD8fp>L5vc;`c4-S(*7Cxot?|f@G^I>IKnt*3CGpVxS{(UH|~Q4V`f0FhXSFW<2+4LL3IIrz}FPT zvH{7(HLZSY>%^Dk)3}RWEpqHU^rR1Y81}_QWWD`}Jk?x&?^h^~oZ7Sokv>cG$JeC& z{Yb#JnM!AlcXx}K?ewN207we8re-usPnhvQEWI`(*cO^&O>p-ABI z7EGt5d@-B<8JRSZAaW-xzH~aXvs>EPLsRg^x}v)6i9S8J>K)PX-xzmD%EdzNxhb?S z$5&-Z4MGIyfWV0!?>ToJ|AM}%6l0&mN)mGdH15VRcZ{BVJ2OMf5dkF(OQw<%*+^vz z#d1-DHe`21ZkoGT4|mAP=v!T}FGAhA02Jwj;JjUMFz7%qFrIq#I=_4Hau-d%8x-_x zm*k}-&~-8YIeFhWchy=co8Aa2Q}Os}GAacqVt{5LBI+`CoJiZuZ@U&|8_7I|qM_Xa z0Z>oME6_A)4odUT$1of+rM7pF6VcYRyK@D;0C)H+{I|zpJO2q%d8!(Tn0|piV)>oj zJj4t5Uhfr-^QE2qaW<{jPgEuUYTpytsN3F|vcGliDwRSeZHu_=^6JteLS*?VL5z$1 zTio#+gTTAIO*>_E(F`YYGdg!T2*oRUSYd#0SX2p?K!>3tbOv!(YuCSJ?u>`T^h)ed zs`H6mmgn~)c%3ejE0uOmOQrH&Az$7>Gi)BRCnCyks#z}(sWQ5=^Xhdk^^#QHqFoh_ALp2Ff0nsi_1QomROk%&5<#4{^Y z4UKj5wQ>fz7Gb-*(wC6j7J?ilW}#2xrFqulb3P_6g}nEZc$I}^X(Q+k z>iKy2agHDzZfP2FP4^YcW-fcuY(w$AJMjK{ufrGM4uca<$gu^yg@G(RyQJ(Z`+>pD zbgoo5D<47ayj&_pT7rr)m0Veo^&Ic(F!yb6S8tzfZ&^aIH%syxyBdOeDFaUNt$B9b z=bLrW7Zl#KnWpi^tk8Ls*X!}{;_L`P^@qa{rLBe{#H-hZQ)8Rt_TPW+b+~!%^hb1R zk;xEdXOAwxUThF|0Vw5<3%N}C)iQc!MVViPswS57SlRR5~n z*C$1gYk?+D$lMkf=Q(#19^Q9(o7P0QeEfy}W{5kHV_f8@H40TJ{RuT<+RB}k+y4^o zVwbq{W9}BB3N$J~<9FC{Epzl26(2$~cEoI6~y2Q7+Lk}Vx{i-zSk47bE2bDQ%w+|1STC;w8 z=@eI9hc7YDcDOTgb`ti<$pr~1ynQti3Rr*HO1;{01XV>*Rm5WgbGL$H3%+rEcS*`e z?Z#25klEg_C{e#1lFRxXgl*_NDu8jadcEy&4rxGFPj6Z?MKQwL+=(`dFe}oO-9L?N zYEwF0C^m5WyMGmT9sDK}&`nBJ7Z+8e^a7*LMj?A599>BCaQ6v4Z14FxO8Mv*Qs)Yp zSL-2YiX5NcS4;#&8Q=j=tl_Bg9zk$YwytaO$U#xPaZ`GIT3)0_De_TReiNGxM?=BX zR`#sjJZUuRKwYsr>bL#Izc6>*{n|tVO4_MvEQa6Pr>fK zb60I315c}!bK9GNpd`JK&)behV+gkBLJUzixsU|IPVcq&QR~Ft z7vg;TP29ox9JUXeEp4}yPj8t*3c4JtvGA}NTX?`7Yv?=ZYk|F6ah5PcHtn#L9Hj?X zp^Kft-7HB_{YtEF<&C`P4sE1zg<7rELOz97DHgl--R)n8cX0RDO>qbAY%h0lr1+p% zyS$Q+V@CaLm|k`6o_v-+d;hp=BJi|UT|3w<=hM$Z%3^$VWhyFJ5IkM(SQfYwSVnY? zFc64G^y68Up7ROgJ}>lx0vP}o!^^k`(yT^fcJz5xfye(9-~0Qx z(>{y4s}Qhlp!X>@ z-M7LW09P(I^;Yml`P}n>vKX@`q(!A4`Kk=Z`+T@TV>g>yV1x;e6Z)x`NeVn&AJ^fI z>2haQp)N=wrUaj+GX-=_%20Ffw%_YzxdYr8&-rOjdY`$|^_@NlM~(99v@H}~u*-=> zHDn6frQ%!fvd<>s^xtOdx3uxz(6|GUiqSsT54J zNpUH2_;2i>l5TeZf^{8q}Mh9_lvJgT^DHc_1AXV{DV+IgPyF{7}A zXQD`ARP=Gs$dP5c+)YAyw-ZrY~ZqF3kz{smec@Ai6g%Y!l1WX34rH4-WiT0yV;B;$|iEK z&H}j=26r@4cuLeRFRj|ABJ1gVsnR+)Mc(p#;;wPexVwjb1+G-0Rd!nk2dAZ6YCRYV z$6~4)4w!5N$9dQpq2pJG99~diD6`bzZqiK7ikuTKEE}E&;*OyDr7%<$TvLL}nV&0_ zTB&i=e&FuQad!{(9XjK~x>l+G7ipzzdN~paNei(>MYWLB2ySS>I}vw-*r{;x93#4V zGb}8`?*zj;A(MfH_i%Tw^c|cDaJReL zXtxichrL5_OJQk#9||xs;>@^!G@etBIvyMIdVRB^&33uk>?8At=wrDVAM9A*zMmd` z1k!pGs3F)Vg##Pg`P1D}tQXgAA#Lq8v^j+hAaaYzF zjl&l$EtB4WZf*b?@N{WQz7Gc2gPkrBOrv4`hpMdIzoJtoqHl%0s#qU?f4WI#lRvk>1j2>Y(MAi&tz7|){l^Xrl}a6>S1jHQ z^4>GgV(xZ#v%sAyCEu}&FMZYH3Iuhvt16p2*>a_I*x~L@_IZc;PawdZem*Zw4%(HS z?1n`dmLz*rRgqIMG$QJY!MKYoqw70{wp!lC?z<%mL9==pANT|0S8%fggE9>p!fxGQozN#JGaedd} zu5&^MCoQdgw)s;e6kd!jMBTrcC`!cK;qJwTTLEz>6!qOz((j-JaAozdWR?TfM-7mU zU_@F=$Zw=bVB=LLUxzM`I(HzqeHGr%UA=}rC0aQP+(i>BE6`B!D|7ai_&a8uQBUOw za5w2khS6p&n<;AdL*KPpT8F!X-F9w!Gl(2eyM19H3~MMN=P~;3bgHwD@q+DLEqg44 zOYp&(aVPCj5NMjB%nHOV+ZWZ~)2;1%P3Nw1AGm7(cPF(%2HSTDY)U0>`FJZS2u|ur zLYDS1cQxRye)shq?~?!xjj0y{vFfsh1E(W9Z@a(X9^yWRqKWe9!0?W^!!zXG_^8b@cZk-iJ5 zNj;-qzHO1Z3`*$0Ekal1`22k2c{*FDwY%Kiaedc0q1M4}KD`lC_ZJpoc1g0>2wG%( z2%n`gLlX6 zyL0Z^M|+!~@09s?912a`GI+_$^DZtzkW}aQgPYr#v(uBiuJ5Xyc~;XagS~(yV~esD zTbN#mMTgAi9;4jHT;bI4vJ-_vrt-|B*>0FTqzURaP?RIHz@Ac-LmyOrCY^i*R=2@+NhxOgL zw$Z9>tybSDZEuY#Q!A_TbX0{3WaB$N%cWNuQm(L%(Xf^M`Rj7V&9i`R!PiwbSDs1O+Irx7UWjnW>N@a69g>s|W zZZ@^r9k%bf+|_C)HBD<5vgysp@Zzf67Ym01qsyb*3~(oOxO1|c&oxB7rwb?+6G=UG ziXw7pjk@lVqS@)ociZxTb>r55|BI~A-Q9fgu1LW+ z45`39#cVcPEY_fZdGl`DcjeAmXzkLkwbILMCS{7arM^`e!Ld%*Xm+eqQ5zQI1eOz= z3{8CiyYD8=w95sX8zOIjCvXf+I{GF5q(3?>se$EmE>}81*XIz&PTV$j@82gwa)8J~ z{`YRBa=y~uBi(wqYCZRlCFXUA1!-NZTRL@+|Yp7Et!> zvOS@w78@}zI5P&y$|-iZ6UEsf@~Vy5ORj~PW=1%tQ_#7C+9~WPNti>^cK>8dUBs2} z@+bdk>)_pfqq6t-6~J9(w^&7Hi1wA~`Z#GCf>+|vC!_`3HCuPtzN_k|U9GmOr={{< zX*&gb-_!o-1jM1xgJ90_5CDa|3&h4c~H0_B^ z`D4(ji%e&BN-qu!g?CrDt6t>p2kg(H0j(Fyu+n&$yK?uqP3#(7?pl==vIlB79T4&Y zyZY{Fhr7-n-Tw84)2&wP+G(+neVw*O+~L@ye<7-vC_-Qdks~4Cz&oRlEY!O*4S$%s zeu$p(K}^aZ3r``=3O)|av~G1_DjAd9k-+oRcJcUsc!D~r-THMf$TPTmYH;`WUw`bC zj$mlZxQIe$*tGI(%T+3`)oMbkL3Bx*jas{0-9_B}bzz<@lLX^^ z?;yv=#)#th*!MH=w=9RS;{}?a0_sA{9*YgDfsL))uLmvNBCp-{h0v>DRWEY)!{2D0 z&6mrEdO8RMNjLG;YTdQ;-)gf*Iv?fAW$wy%iMuK|Yt1I&E}MQ93`N5WiG&ogAa%#G z9M3bL?go(vI0BC#%^#)iaQ7ZY@Q#fMcxQJ+WXI8TOA;n`RPx*7su~?>cw2I&u+_&yBdN>%OBtq1~=jE*x3|A5A!_je4V8L)i6l_dDXQaL2gQn#f|= z%ceI?iV|J0FT|96D@i^Ukc!f^-5?I)WCv#$`Xkldci$x(E)Fu!coynfI2o3k;W-yc z4i8HU{^^)o2|Y_^i?xFT>{;CQg}8z{MBd+j`w(}#?GsI_)oS&6?S4qTSTEN!#N9UH z&R7we`{AEE#vS?;HygXfay9@RLE^qeM3h(@&9hMmZl?!eQx>>`h)R=duh+{Ckfb&2MvjXtsUg!=X0OtE@#01EZd{7DxcmL@AO6@`AHnqi z0Du5VL_t)}7k5gh#ZtQrN~V34cgj1(!uB73_i&flDc%+Cs+hYP zQg=I<)Mg;4+GV>G4F?<)&db5v4FGonIJ2xkSwC`?)z?x+ePg(HiXNwo>z3I_OE~JE zeA6c>3X#g@TmN~1ERv(!zZAyKO^3UuSl|8d({`?~liw>8&Omm~3U^=5b_zRZ*|Yr4 z&i3|C-H*`R-~VKG%6Eo45Of~J+H(?+{iEM_xlODLLLU6I!m#Y(x* zEDNzuEUq`fPk1LdsHJ$J49|yU%Icz#Q|!dak2-fOO_7f9u;1UeI*$O~$nNcGt;5UL z;?;p_r`zWyQVt{A8Biw;EQOExHNn5KG^SRWr5)R9&@@iD+CtU+j zYYaA5ot=K3ypwrMny0SC9W*WDMntD5vK{U?k+ChC!r=v3o?qOLtZ!w1E|w144edhZ z+1ud`zv>0<=H}*};(zy6I`8AI=(}oF=dRUkAJ(*;OnNggEKMg? z=6?SRafh^pzGG{({(e_v?!s}!ub=k*beFgT^lDnOaoR2xvguK`G@OXp{fl7}iJe(k zI$^QC^I-0LwrkF^*tIZIE|EjefLnBfW5EgZC}9mrlA4@PDk11DP(E$n9qx8J+*z@V zKE>l|HAdPBU+3mNl8vv--6cJTzjQw+h`Um@SiJ+>X;|g8Pur!l%=Shgycp|?PKJ?F zL5$Lm1s1}EJ&=ot_1!p{A3qfBd6k$U4btb;x$}6u6XPOnBM3`Km6Gweq$sv@Hdi`A z8t?Y~ii_)aRv10&9Ne6<8cO5$yFlDE&@|3_4(Vy&u5kO@A@dHtYKMo7_GvzwS`SLA ztMWoL95z$b0P;g|MRuG6>(r-N8+n~Il&->a5VKD2_yiO?b~=&98~2GWg0Kcvbuqp& zg`AFl^-_n09_Q%KoQuEpooh&7cvE1foO z-LJS5kD^brXw0nmhtAH~Z4>lCv7VoJkHC@;)^~c?!0mAd3n^N?-Z*TZmS1nDEXr_l zMV6=4pm~`@*4<-P7-1L=a|ebQKPwJ>in{ymyC|AzS#ARFc;LyF7dcj>Z2gKlm5j&K zaBw-b{ra?a*gn*1T3v%0eD$v%f2;=+p&PsT%nv{O-D4}=uer4&=V!00?WfqUC>AS5 z>))?^eIX10vSur-I=p)8MYD{Y3M!(!NyhC+-I-jr0a_U61zg&Maf$-(aUxV^iI3ly zy`=E0H0PWV2J}1&q(UL0k6tz@%AzFO?NY>?%I=k*-lJBn)bxbjuP=HGn{MCz`NQ8o zoDbl;JjOdF7yXR4mQ&__z}!I~{Oxg9KIg9Wzfv~65r`PK%6yRi> z0?W@p4LgGT1gAn5E5-PDojc?si?~D-O_CNxRo9Z}MFT%=?VW)~+tjMJUIDbYn=kzN z&!0d1@XisvAIHV@JBYsf^M}`kLbu%I*XOQkL_gtIZyo=yluKD7`)YD6DY+FhNr0n$8t!)vBK2 z0~NP#jk{{SRy_XWkN^9>|MPuYg5J>|rEDlSxVs(a6M0c|T`LadvOKbqvkXL40e6FX4Cw$#Iv%MDemN0SLY9qf^iim3 zP3_jXYiql;TKR16=e_$TKVLX5%J%l&PNA?wg zL8kWSU>!Nq@NrTA%~k4plTy{Dl_BArM0>zC*@UnUJ04D_6qrW{lMT3FEV}Y>w@jx z#^K=$qvtr1^TfgOT|X`gu9(tubW3a87JCMF&6D(HzZIkM2f$LsX)yjLJxHo`F+jV9!^#js-fWjkBk)ozVD5EzrT z)_QSxa{50=&AyFu_^gvZm$!cX@_#40HDpBG)=b-~fLGCKy*Ox9DurxnIiwD&^LD=! zwKyoz$94P(oL7OkL%u9=-DuCN#6;3GC+fL9pzp?g9BrcrQ#hQE`;uxfLZ!0VS_`Lf z>y9B<) zMwnTlmpfjV5s*`1?YjZ)zO#@vlIDa7?--5|5IK5agdQSIs=An1iA#~ldODlm)g9Vq z7=S|ZX~a!rn^!w$kV!iX2y$< z;^#XRy4cO+sEZaR#`MY+4C5L=FET_0RJ(n}9#U-+kG4$s<6}mj4 zDBFNAgL^sZufQX)U%UET0(s~5QY>g7DIEte_0 z%$@FT)NgOy7#4Ss1=arbb#{9r7*XSKc|NN2I|y-B-<>e}E&_sRLY)1yXwR#}MA%$2 zGvgC8EbGF-cur_dYeO@wU6$kfN`y#fvs&%&;NZormtlO68xu&U!2J(v~5Osp}O8_>jUw1|B~##?L_Q3s_TeG8ADmxfiAq`>R~?u>OiuXkc%oS|(u=~U=qrRc}p4C1ZB zork4I2?`WmpB!IQf^+FyKEM0G-F=r1ca0wIYPEdo`HvALIwe1es{2-ge#`;`PUksy z<35@qZ+MnP^2kiM9{VQ75qC~p0Apf;V<^%w9F0xOcr9f!`*Xh7Zff;f9WVSm$i0-_ zm~p_a-DvOD%G)U_GOXI=C4W@05EStk%E2P;oXh~;vhaF%(MDKTZk)UC%p}PO6QJ*m zN>jdZV#r3A+){K6GAV=BRCcGVLC7yuQNQiW_aL7rm$^GUYM+)f+s`c_b-qvbN8N4* z3ENk=76s_L0Z?}yFGo|R8{4Vqktq{FvAlOe-v{=3JQE&~86v4bSW3i`NmUJOKo@x6 zu3D?!;(p?T+?eQoMX8X@rshJ*{FLms``spjbaDKIUIRt%8i26lna4L0{`9u=6E2tF zF}MTp_&HJhoiMvqH5QN0t07Y=n?J2TXuEGmu)Zr5_A=>>U`Sbu%hBm@5Mf6PuuAOg zV5iqzw|=nM=ckEeCK=X?U1mlvisxaNS&|4SZYjAMUsFQM$fvF7+&$QL_gT(6rZyW# zrL(=Q=j#z=KABvU!ePfKDKfmrr(?&n&!6Yb(*D(4v}b>`G}keefO3!+riw_IE<3*0+oh(Ze)qi-UY z=UrkZMiJs3sD~jSeFvQigb`+F-7WP&*@EH7(^PIpEAKWM?K(dFo_xXwxgVm{aUVgg z-hzdRVz~fLMMO=m$g7KLzzQ`HIZ+T9y{fF&>-2IHJi_ivtf6$|k=aJjv!Z8wMr0YM zo35Kf=<$Fx2|ZOH5&`L4yQsOG+rToZ8OjPc)>Yi^ZZ={rY)65Au1^Y9V#eC^rtfn&rDcTS&|qrbq$>_r(i)3ZJw2eEiwWZwnNW`5WK(#w; z-8prAC(xAj#^X>fiwXHNxV0$e4!TJBm=S_9MHl5aaY=FqN7I?T-Gf#O*#?Na+o>P$ zAfGOH#DKf@VZB()|Ga_Jo#eMK*<+6!Bqib+O<-ji-0?HKzz|d$Kz5|QiB`mT=4*EF_+85`U7|OP4qReMgvW=wVhHvZH+{f*p%Nt88s1< z3u-JvODN}vuJMqKKR7^{6JMM=r0|HxoXE}~N5+YpB99~R#38dfpG-=zm=ak}XY!Tf z|2}YcKSjdWA`I^hzPwYR%OeLQi+8q$IiGi2^a(D)8VpHx`|6q! z38qqoa=F^nY7Yy?_f7Ox6^Ofo-FzlZMfQ_&UvwdwG!eD|-?-Q3VY=Kgh&+xaP0_E! z-FGI^<{IHW9vp!+>*L1-AN|-ynnKc4LbfB9_Gv0tKxco@x|Is=5AqoT-e3h1@GJT4 z^hO}0uJ*;IqiTpGZ8Xb!d|a11A2TZwBd*VD7;r6Fav+Dg!yV`3`N2V&MhY+BmSXmV zJ*otry~-3pRYPsM+rE+yavgyVEp@=VRdrbK5a1k1?_g9C%K?=fKxNss(XOLir)p2`-r)_)EfN9_mh?w1s+h&!!P-N~kx zgCR98%hM8M%u)mVgr_^pf;%hvT(h+GM(a|1NX!(|9B?o&2ZOIQ>?*`$pczhoxApKnN6+;W0&2w~Xf5aL`N=1A_vTW9lkA5zVq3{RN{v zuaZaPC?k4!C(Vq&cFEx2_umWS5SOAZ*yZGYL@_=8xpe%$!*;s{9i-|;WXgHH@|&-U zUw)6g>#=ku|@d*|cdI-i@qZhE9=6*pgKpxd?Ti)N#cO_Pz3a_FHd?i5RszM$!IJbau9fD3O8=* zUN<78?OrGMx%Xlb^oZZ(*@*a zxO@NO&wqXKPVI_kd*AC{pX{5uL>=zn)tgmq_W&u2LMAmA*;k{Jen|?N2$B(TBB3zI z>hVuV-T6jtz6{StC_ED}!r_K0@F4*1c+N$VrjWWQ#i6am=+<`W=;YwQ;O>em|5YyP zSK#jI-|BT@?mTzr^DIWNQLP=cYFhCuyBUe>FUDih#r=LX%Zeg0?s!4x4p$nU6^Fh; z_|w}$S_yi55bBIV*F1O*;0~7= zg>sKjiV97%30xF%Ii0B;{HIgNztdCa%Ur$lc2MtoOydjepIoHn)8F4^U#Mf5TNFaMhs{!9`?)2uFU9QB)3Jb+LJCIUJhD|b2^=( z^RbI?M3jBM-|k;i`%~HS$%`f=CLyQxOIt2?jXS+P-^bS{dN+57yzcSkx%=PoNjv*$ zJs47B3)2h!u*tSejrztRNgs5b&}qNOP^PaGhw_%fBdBr29d7@Qqn>^II7@(Q5%b%Z zIW31t-R9aCtwd$Q~ zi_2(eTBrTe2f5H6SB0uC(7dGn_V0Dd&_Dh4&;QPNjZ2S*^M-F#xr!BTccjCq62-02ODQp*Wc(3MP491*M!bDjiW}XcDB6{P!{7!e=NE%>>!~I&p7B6 z9NEq=9;eg8qgnQAuc370k;6uh`}DS>=#zj>YDN$SmK{M=joP2AJW@lcY@yt2w$b~k zeZmUZMXA-G>Ixn*NGR6(*VNCidoOqOYIjiUm$@^BuC9;YhpUt74>X!0zK>7i40;}g z0Ds5R8z)pZ-VwS{yR7fdU$<=7DOd}3KQG3u8!F6zK98N=Mn|PwYCW_Vm*Z2CG;AW! zG>hglyg3EjL5wlaaLf(#nEg~SQH0)CfMs#77MAzTuoPhqMkW913Q~C1t#qk|W(Tc) z(ypWHS1f-!z1=0}N z?d6CY`UON))#4zi0h%6UjSMm`*zpKK(e~AsjL#+Npa(=9I$Q>gLrw-iw@if;ccSl& zystzy)46i(s8(&aPtb)%*Y(r6!<>Pl!-}F-t952x7{uDS7$bwn^(uvF(7Swlknu_XPFRQ4Mc7Y%6fvcq7k+)+{N3aF>80m z>GnxC=GmHVJ88XMApG7KH9T*>d)&_TPWKg{rLxw(LVxMp=!z75H|E23=Y`(lUN_CK zLPmdZRO{7pJ)FWPOsntKDiy6(E9$3TuUBg4FLfi#RjRO40q>)QE=ajp$fW6rJN9N( zhGyy{Wut`wX#c8jk9obKlbIFi-)|E;SW6i(s-WpQ4@xaF#Av^&#uI)?4F$ zlUnWM^mO+feG$)XFRf#TouWs>-(JBfR+5g_Pue&-%cM6gkwv+0HK8g2D=|vbkTEL^==-vWJJBf)Q8(Qm<(e|=n1y6E zu<&3tMbLvm33EV_?9pjSjRc;jGQSpH?-UOz6+I#NQ@PVEqjp;}kgMwIMC;729esy< z7(CIzBhl=4VsNK*xWh1@CpyLkRQ(;pcg`JpJbtOt0e-=EFjq$A#L-a=d7X`75ueX- z3FCTlSt-?bccC498^{MiH_-%JJG%U|=+r>PYX=AD(p8Y&L%Oblet`R`h8^({ZqbLi z(|2Z(1<}&ljom^vHx~)V`sAfGNikEDi>3!yTIhrsLy1L>7#X@wK|-L$I~mjSD{M zDOSq0wgWcm}Fy(58&35ys zp=p&;Ia@f(6bfZDHb~0lY*xROZNkMy(d(UL`Q0-@}Atz9f0p#em%UoIC4 zrP59w5YZVvUqh;)U+f1$X@{VUY@kTvw)uAU3SC>e_{oX$o;;28ymBi6rPEpoealysT0}RS&=e_Am4W(Hmt0tQrUcI zx85|kt9}}HEzlAt`TWif0%E7A9Ue8BZKPBR*=#P0hs2Pa@xu=KO`(|2?^X^@G&G8J ztnJXP!^%z}cUE{=DDRz}wZTs<0+SVN2$Xg1TCEBw0obBCE@cb(LMgwU%~wv#ofuZF z`%dbWs_urO*UA^lh0b|wXEHna+QG?X?i%I7b~d+-POg9-i>PX63TQgj&}RqP_BANQ z1htqNMfd<^)7#sr)Yfw(TmRqQn>eJ6b`8950UK}uMG%NumyVwrg+hoB5E!>{S{Fhn zdrFA11sz*jyUcv|z2E)U?>SErMbv8hE^Vit%v3}mA^GJj&vTx04%1md$QJV-s%`09 zYN+z-_pdQnLtpW$xr38ZMeg3H?RE)*icOB=nVqe`a@=AY8nqL*{W81V>zSvYkN>Bu zj@?HFha3|=+U+6v0dR~fF5koiHIA)H*JH70G_sk-22%AFakPx@{UtM28h53N1k8yq zk8+5C+UYbKYQB(vmxJp9R|ft|r-3>AkpBu*aFvf1f;aQ!n-p(?S}qQ9+SOr4}*( zTg<uv*9G42VyPawsg^L10E)HirWJ8w0&HZ%x zod~fA>B%gW!MbJsClrZVVz>)Br=>TipvXaY z?mr8{uF1FI!Cyv+;=6*w`$DdgZ~b`&1{sbLyh`O?n1dwTHPzRzrJ`^U;-^7hq}Ia& zgI22pojcNUSf>tT!an{TSlNGI(PJBpwP(FvOgqMYX&&FG%{Crn))U+QD8~j5()n^# z=kAO1gb=$LyjfDJdEuXUt%6yxT57l6w`DQ?I~Qj7Ft4Eo*X7~?e|*e_!_ioDVEjPH zyep8n7CsD&boPI=3mytmLZyt>Uk`(|#77WaLE!!q;o~GJdUWmUQZRc zOaB(Z{jOZVW zBP{zdl}a(1fY*{t@@vO%n+4tbV8&11D3v!SDsp%7FLGCFzE;mvxW__KXFR?J0U@e8 z_ri$R$hyE0znVL6 zp|BpCRLZ~n9c_*UTzmzBx^`7e53>Hn#b_8TkfGY?Wmqm`6|#8=CoV8@ zmBj3`NaW+kPY`XJ1X1bduBK;5!<8(97AdBmgd$7ZkGA-bDJ_CGZ;V`AatAsJw4y1( zuO$Lo0pKK+NUW_fY;Z4qR5>vK*7PXoyWD<|WdhrQBogIMt*!Hjs8}j%am=5sQn`}N z9!{A+)$waPiNsbQz(k|5SS%R;Nvts+SvDA?vM(!Amn8Z$j5L<7p^mKN0j+SRp5Pb? zzZ`!QU*dSn5M>+oF1Rowz~><&WLlr~XSVObyF*zvTj|wV&xpp|(>b@tOFJO67~mM^ zQhX^o7TU{->SulP~`b!+V-a#yYr z@LH#p?va>1oD9L8w7!$rCZCCnrr^a}Tie@-0HDgV!O65xDZV+yysBD5{(A13gu9m7 zE#?FZyPn*Dn1TtJDf^^r!8O0)ne%zHC5#^I)8qSY)ZIV+VWCDRXs^q)vNAJ+efrQ5 zov>QXxV9|tDYed7giJw^RL~Z!FP`10UT~*?zT?)`IOMJ@%ZkPw&qN0HaF}5q1b-Nh zU##vDpX)Iw!`*{4?7RtYsY-;q#MbtsjVL!Lh$XpJYL_qC(-2jr^<7oUz}?Fip~$*_ z&11$C;*C2$k5Dt{AmX*7S42bzBM`%!;w zZ6#8KyF8BE)nxD{@}jW68$xv**a7A?fYC$(^=KlAJ8*v532g0rJcc)Aag<1@lHRN6 zJLCBEm!97aMEdP&{Y+IVf}MRF-+Gjc@vO;arJlIGb8tP4L{AqeJ)he?{$Hr=c~422 zMyZKadWDb&su-)!0(iCoejOY6FyYW9m_~Hem1gIQZ%8CUDmID|`gb~a00eL+7lFHv zYmwlAkjs&QJO}}e0lfT?Af)%OO6oDtHx4#ZmCxGkHsKD{-Ev@=vm9je@|j#!$iQpW zAc}COC>5prGV^fsJYs)4(57qHs{?2Z}dr|qr4#&Rsqqyj0hw~16Lwz%kD^ann{U)DZ` zcH!thwA+!8^Clj>66tt)vrP$HYjmLP871 zzw8WbZ*PCv`N&0f_g{j>Q`)6!U52RXtGH9-T175_w#)86iHyaQ0cVuqOq6wqR0zOz z44N#sy>8k;e?L|h#XMxUW4b%8x4^6v99_iKwA^$wu$^4uLpHeVc}0>N>X)7u!W}Vv zW$^F9$X)shcgJiHo={p?LtbtWpA2<}Ds|7r%pp!U9s^*zZ~#jC1I|O`{^Izp#N1pKN_cRDw*^y$H$#f zewt&=)=4{QK8MkS5y-mT_9@%owv3c6Dwk(&X66@{cgjkcx$sh8d7WcJ zaAphL)2`C^Lhi_9d8<{`xPw!}YjH)jS^`%MxC3(;Jj@l#+S&#T%>cYAc<04@v79gE zvxmQhj`=W4Wh-Y&wE!14_g6Hrl^OsGddx+JgiIDk7u!af9*JtPXyk_~dRig=IJ~uK z=GWFDH|smXofcOR?zG`H;BJTELUuvK;TRb!pUU{b=ebRY7&N^YBY*;T+BizJOt=%C zLjaxHL2VZZ@Zbvscd5O@!vpYE5B6*p(0d=(QVD0&?~ku@5%XsHrC2OfasH--Q_Ww@ zozjpoDJ%bWD#Yz1;#gzEGK9@OeI%Gb^M%;W&C-r3+c(cc>6hfV%?B#k?ef}(U7n{i zzx=ZB7Tf=?8~@oX!R$XguCCbaTg`r!PTvk^`%IK=BMF| zL%9s>WT_z7r|D;hMF$Q_PJpgZH#cj7+&1}?}jIZ&>Sc(xG_~w_|SxHY1scHHSJRibcHDCQ86YiKO z+F41dlQA;PI?=)d1+Jo0s7m?lK`ts*u&Yi&?ZE{`BL+>wwy{L z0Vobtx7zu9df&u_`PkrF6Z?^acow3^TeuTZ-z63`?jQmP0C$OXE_jg6RZ3O0AU8@X z&Xwd2Ef5|Dww+1f4q{p0ZbRd)ECY99QE)`~06ukJZniGN#&9R zIA;IiVyS3w@2v&rb$11@(sJvyhMn;d)z7EzQv%(=5#E(0HWNa~(VxlajT@;Iz zZmosKbaz;xWf_*qBh>9(St&^9P=uxfiAQlbo8y!XuB2yf*5h%F;NlA6;<@cM^S4jC zyDkIfi3!R#ujQut=AO>Ytc*OdQsdLnXpBiEKC)oZD&6*L901cZkM7T14adMr7y7QK zaYx3Dqf8k6JM!Wd!AT-z?_7~n2yNrS@1`H4&JB#pa{?%$C>Hjs=@GYNod$$!k6K#{ep zU_%=xC+fL$E(qo6N+5S$2T^#QDe614vM6S1)xF@F9~su>kX^2Yl_y$Ti)hN3T4N(qSy6(x ztJadSA+koE9z))jyWhF%+{FlYWnyLPxVWKlAu`UH=Riv`3~Lhek_6!_80WumKmttQ zfrSM_H3Vi`*3}(w*CN>{aD@4SuovW+Y5qCz$A^JCbQC0wyISoUcM+Yt1n4`kvs=F| zg8>cg=gK&E18!{>xXWcMaHFIWi6rBX$APeizG-O5Djy?hnN_6g8Hs@Kkl>z0q_XokVfrHf>ZJ$ z#Bxa|#LpbIgBo?XXGtL*3@dOonG2iWeGl25H^_i_5|C zZ1|%X(dNpfY96>#!O#}PR}tPBNI^gmk1fTRK*E2?-5V8j*{cY5%sUr19tC2L;e-j9 zmz7diRZnp#n~W=w%EgSB-U}Y{$prX^fn=DC>}4y(%DuU}%myrzS_EyiTND6ymPr71 z3_EDGPSK84GWLJT9qq7Ae(SY7w~2Xh$m?4e8F38bk_#HAk$vt7$}|mzCAGwJk!RUV zu`0I-c|_OW?<8HdtOHsjGa_)Kn4DtNVqeq&(>-;aTjnVACF;b zihpsL;X@{H`azRn@_I)-&kZAYTZsUmX+NbR1|5|^PF9y$s_I1Rim(uW&5<)nZ0_P~eb6oI+ zP$`ggWw;>lYq&GwS)Dr^H(tlZlDVR8S&Kz75JKklPZV78kUdO8T*0!ha;4KVMdyxN zi-5jEJ;w6fZdO!JPIT-v?rP;?P86QA)A49%4_>#+aiH(W_+8^R?lk`{8CVVsm>GTz zB8DiKN-(tfQXY=z&(lBET-p+0oCyqhxux;P>+$79zke}~<99{*Pq`=_20`yIIKAr( zvzC4FsMxKkCAd#j%;F;4f&CZLui$0K-Bz68BbLM5`RSYca;FDeS}_AWyWPAtWgKwE zqWsvn&4#_lNQp&*yA_wiKJ=Yrd)_1#^RRc;gQYYkCL9x5`izgZnnS#EE8vWBoJq(O zYBHIVQjE&F|AK!?TCsfoODjF#zxUE1T(JB%{>T zj@-d7ScfC)o|ay|OPfGH;b4nD1`2yGTU47GcX#JPp`9UhDYX1+sq*rj@Qe)yHXiwx z7~X8NftTcVc>z4v2(hznFYTU!VD?+n^%(&VZ=S3JnV6t`6R4}_Tocrggd zl`i#NCJ%uFw-aXVN0sx2qz#NT$hxxyG_k6y(f2?UPtcAWKw22^?2)(KgJJ?TkY8F zkko%5+<6w}UG}LV>o+XJbGHnbX*90b9}-twVm^bHUfmq$!b=c*tw&Jn!*Dwr5m@sD% z#L9UQJi@9BQ9~0Gh}&>ECL?1Ufh~Iob~eUq^Rstw7hj66j~!UoSSA*arxHABBQamC z;Yx@+*!2{0x4p)(dk0%lCxjLLBt!ZeHQu)iu%8`t3>O#S$>71uJOt#Jp>5Pm#gU>; zP01H>`@t}S=_)aXKaLCvnUfkRy?5X2Mhu@Il#(UDP82LGzrDS^u^7b_vu@hup?##{ z4Vj>Yn8H3~`!DxL`BE7iveUEk3$%LzTSPiMBQxHGRf@7qGs!?=jSsPZ2^pyf0et=I zxVu5$F+5A=E66-9nW~Vr0)kzsa{l&cZb6_?qyNTZiaxn(2FeQ zjlhTE=$za*X|_vMF>S%*`9a~``8n{w7~C0a$!@W;ale0U;9xJv^PunoffO6DV|pf7 z{x_<)$4-Oy{%C6(oU(&`F6vMEK`SyOEk=>$;(>+R(z(MKuv}Sbbgpt&tEq3~N+FX4 zM-jO5FER--aa{hQ`p1}G+-kq>5bn~O5jFreJFvu9AeJSW06wkI63H{Br|d)jwH~uy zDR9F~_!ef}aNx+DcjT89kIP}TjWN+cV2wZKLi;!^g)uFLpZ^Yh$Ae12rL&?~6nj^1 zXT)?`IPh9^zfReYQ(}g@J_!B8><-*BCayyXVB9m|TcHm}= zGsqpzlY#xr?X%G>G_#9I2t0;_45&$cT)k&~Wv4C|GMFKO;Vf{+z%7)M&?&8viU@az zzMcZpZ#tkk#q6Oa#DJ#=*o_TRQ+DhOG3&!hJYIM_hhEdvx2x@WUoo3kDYtvU#lwA!=rFBTIaY+Q$z#%oIGjG&l+Qdf}@8V9^cZ<>R z(!Q_>sxnRnOxTZmhuQY~c2gB08eo`sGO(S%@*R6)z+DXWT@<-fozOA7~K!km>vO?obBI6x=fNqz*0tZ^peaL0iq!JfkfvH65li1+^+`+}$H>=a%^j zojX3ck>HP6)>t*oU9Q{?AM?!mWAk1*eJJKDU{+2VlG3gk$)fvM@FJgukRihT2Cq_W!^KZbLaBTdnTxH(=oHQ#xUz#XjsUJrD{p%?k@1|(s%swhSSOSte@5K<5+;4 zUmGxQ0(XL#KWR2ku%7|pE~;~fYxg!oEEfsA67wILD)v$9sBbDKNAGa?VkGpNfXjaJ z!f*a%KZD%KI(O*l-=Ob+yXZ1}^r1k7xOJyKSGJi`+V87CFLw+#9|Oi0;vCR-%%cFP zH(Z>Lc~Iq|V2gzPIB$#kE(qLxXw~X^28FE8=EE1SDFx7-ESCxdKzS}knFy85e}TS( z&!|koSCMCg?RQrlxD z>QKm@sw60cj}p`4Bx&y;SCqlSNrMj<;f4N};6yV_NKhY@Ui8G@%nG_*Jc;Ey@^$Yi$IZk2QUA&v>dt(RB^gb%^$^8R190?8DZ zV&qOL3eULk;$j^9Lc-n4e5r+%Uu$}OZjHP4nXHsES>P@Zz@IxS0tD?#shvb>eVsE?wkPz$tcxa^&gIZG-m3K*R&M_iu~612EF%P=5Y}^9 zfv@1EDXW=-yPbj9B4PtWn}^%BMi#Odcj0F;qO|oD60Wb%=V$X5aRt8|i1TB(O1ob$ z$%$jxFdybkn}<1c@YFYTk}S%{l04CO^?F&bM?m2P4_+23)l#{uN+)U^qKe&*5Pe&+ zVp*w{^WfjbVj6cBH;Qq@4UN03mTw$@SOPv8R4l0G=|FPP@5klCVi|54aNq+=vG{Tz zHf>Aikh>`6i6-Jv%)iT*MPZN&;|wpEM*}A@U#Nq2B2|f+O;zqFDygomcIA9-AM9+> zAK%*YFY^(bkgwHiQls9wGtUA}x=hwd$&Et(@GXT^Y_>t=g1WO!Iu_=Qz9<-2xNsbH zk{aCj)=1q;mP}Tgo7TCTKy8hc7;WPN5iaU?CYexZ^KI^3soH{z4&>c~JFQSwKX)ZP z?=!w+*&~a!wHk58ql3N8pswC0lVKPW9BdDFd_>oGn_Lt;#6U91XmLee>5#ID zIERmAOR;%ar_*Uvj?%9p9K$4&TU*XWKC&s~6&#Sdm%fwanpSTnlTDAa)6wKkfB}<0 zQBxCM?2o-hNvvu%Qd-KZ%FFD z${j`p2wkyQEaO6+GO46l6!UpfbyL{eMOTjnog)_76-6zR#XvfD3b;zbW{5kEg!Vw= zbWe*X4YdlF)q*<$8oF{BmuURP>D*QC)OXt{!X4lo;?@FA=SKXPg(J>npGUY)fkkJE zVR&m=)VT{>a97S3vwK`P?#BT<;EsJEfXj)?hB5z1lGWE|s@!O{+h-6D$uK`X zXd8lH)-|V(qTsmQ3O229+kcFpLK8E53m;K|i$=O1DyNE~p8nZZ&(E<7JH+Z($|4!X zA)zu^H+<82mD{*GdiQoS#Q2vt5=ovnP1@`}@QhsEUiWU;F>D)L{?;&kfsC1l=n3B( zXuMxY12f;8&%I!?j!#FI7n6Y{K4M89=F4a8)8>hKpA?GEaktK4{+)QyO-^6V)4&R> zl5!$Rf20pg%wjAW#0<Okum^*yJ^6S9WqZAhm9-uW}4+I{?0dcF4Im+wYB?C#_zpGXY z`xf50^{dXENywDiZ%!L}fxhl(>CLx^~L2t(#emFPd<0d&dqw<9z7cX{X2(a>f5x`{(3Q+(a-{SGb6)# zr33FIWi^KvqZ^6TaVRnqt0tw)ac-kLe!E`g&Sq5W*Wh}>~be-C#b+S&9jr*jubv9SM2tD|;w z?oJE&bK&{_zAaY%z@$RBGm@krUMd{f6*Bo!8x-C>xocp#dWfFcvFPGrAhE=mhU`A~ z+`^p4rLEJ}7cSZ%82m0u8Q)*VZ4;P^=bHHixqCV{`_!{KMU4-@IenB!jp4j_QBrEn zu3D=bm8$R0UGb7TJ_?%b5UWgAwRL<3a82XExtKi+hQn-R_iaWh$yy#1;qF{*)Lx$g zn46K0Yh2`2Hs3yd(>Z;awy>b@euvnnRF-gB3gQa%?{Iy^=k%S^zZl=rxyu)_;5UZj zJ77qccy2uX7zeQfnB=*e&O-E>1@019H!8_+7{OG|+J&r@Tf-z1OrRp8BN)SyJnFJi zEr{Z)U}#e~dMAnk6%1o@6D+eg1~1I1XJ}{TTh@j4_Awv@;1!5WFy}SkTznK~EaO&c zb&f1t#MufRJFguq)Q=NXXks>5=~Xuk`i?Z?2JYtR37geC9gRPND-#0e3A9gF?Q}?Y z!Y|}5cbz+u+Czro8%Bp}-QJ!QGlFHB=R!fO{@A|CUA@(maZ-$3I_5^RN0n}+d-`&Z z#X#f5%SyLYDIj-gjl0k3yR6P#GQbn=a9(5*B8S9wAmQY9OccwlGpVGL;n?Ttd?BB| z;4YzYC!Mv+5R33@TggQyM8(WBxPgV!KRTV)%~nm$7cyAdDiooxM!m4h^3ILGuN#YT z{3e-~r``K<*XXE91H0etM#fSQZ^xG~x&8S(U^k;@p=fD$!$Z~|Vq*5Hn1&!ga(fo$ zb?)Y7>4{Nl$P$e*&eRTBES1TF_tLmeJIg-qa=qNeNWL)yvrR0%ZM>t^Y&CT})bqTs zixX*x!x?awdv@X9DZt%Hr6^2ASZzV0qAECTz;dkR;AN#^=}Jmx217+%Q(uVwe56NsVs`=7azH$lyli119fMk z9Nsx&yw%_itn9by zvGy$77D^GhlZc(Y%fGwAog^!jqeBxLjs+fp=MKQ}48NAbC&*MX|iVhM(l4 zNZs}sDCPk!mGUq9oq<@GeI|fqS6{zwx6jTq!V60T^kbC24*6RT7el#nKf)(-HvtHLP zzhE~NQppMw-lU0n%&e{L@Zr!j2@jA^A=yI?>Jl^3EVX_?yjbLBNJV<2;7B1 z;QUrAAhNJOB zGMS7!<0;ZI02KX;bg?FP^ZncvyKS)6dl4|lJN{*`e_<}TCuBsW-F~l}R%P+;Tw2#SZwb@;?(Jz2-H|KHC zleY06W~QP?9-An~s)wEj>_#wkdUj#fO;e-eV`0n$O7ak|9p)spd7t`FH|o3hdR=WC zM=`joUG^!c>rALBct(R!eA&KOhKp0oU5=yF>(vs3w5sqt!iB?Q`(ieOiN+iRA%I;~ zR*R%%BI-L*SNs0lwOUOny&npPlL2QOhvbq@G_$y0xZTvQh-(@lHt5lJ>RCCr4+QgSVNY)Q0G@nEQVsA(n|R`nOYjbc_!(u|=@60pNJ|Fjp?AC$)O*9_frV z2&Dd~pRr5U&P&6UQC<9;bA>tk~{3)T9fpqU5(~RUOXJ& zcc!`BY#zr}fjfQt?p%_qAFAyZxY}=@f$s!?aW*52V(%vO)xn$B$|dKo>bvT7?zFxs zPJaxzOKRMKk7r2AVhH{n4AC;2^DKmRFcct#> z(INP={7&2nR(4>VvOn?BZrb3^Mas(h9HT!(w&zVUFm8WBdt4(cWQh_H+A&3$Lwq=u zz=#S(|n4H{3Vn@aa7CczxXJb%sN-GVK=Vim$Li8a3aZV3h6@&_mRVl zwsxy=(m26ODGA0r8Cl18b-^H-ca1yn0db-}$%Qn1r{>ZJA*_%c*ud$3EY%Ugq7r%e zbxV?vyETT4{F3^2#fI7h!gKZz_c5`ZTwHz}SU={bO$TY=@ZCEx`|j<&IkcX@1&3G& zY9sK-35fITb0JfZD_YY*t=eg?*V5GDdOWiynAfT{P74LheEyi)N<`u9W;^AY_bxbS zH|d-1@sW##)%bs`nTlR9+ol{|>;UibI(!7X1@F`lHNZ|M;e022i^%UCY}>yhQ?c21N&b)nlFpdqTK-WP`0bw#ukp<15?vASMv!+(K=~w~oXdg{V+5mU170)c~TeXh=NE5SHWyx%PLSs#EuM0K!?A!wF z8MbW>f=PqZ=AUMRhY%vjXNubR40mN+-{Dy2?@{DVEFgCu4DK*bPArkSC}3%@p)5*C zAq8mi#cY}i@}Iy=zB>XtE7u5j$xH6AQd9R-cs?EGKZf=MQ}~!?cX3W0)m@{mRIBLU z#r!9HYL}cUY2+a3?p_Gi+7?1`VhBv1-^(iseEO&0>L=cA0R89ac8t zJl2CMs^EGS^FMN#nsibe78kW}rAM4n{}r^;)!2L2tdaRA<+TgM3TKuq`VUU#udb}+&*i~fzpq%*M1bC`)I zN;o8U!QBSoE{mS7e1a{V-wsC}rve+x8~zPu9bH0PEfNZ1=i@{)ycmtgmYspXr|rZ# z=25@RNPnEZX=`5>Cuy2DV26HhU2b-;%S$FJOtSoX0uvZG(~uSCF}*IgN7r|rxfzew zP5)r|0DUqxXnW!!+-c)OU~|3ns?`?6r2a?oX)N+ukW}SNR-4*&yqPZ-pQw)z&tk+-CsRR1Qrit|EcVqDMKLYxn|OWWR#13bf-4zgld4~MHn-$j$4??m9PUhk#uk~@qml2~Cv=MJoQ=d6-T??%RA z%fJ5WkHzAtHE`r`DKD6IndIL&{lDViZxr-hu?&6#(a^%-Aa}g9xVZ7_c7kD!nH0ZH zmdU|C%gFU&G?v1KKWoQ)h!PIqZaRJcURI2Qxi)Z&tzrmwEv&&+%pUFs(adfKm~dzu z)Ey}{h7}$(nVs>_!}cF2+w&%|P*V=ti;b!6GR?K9u>-*+0HxTB>z?VF# zlv?UpqnLhn$(>dOxh?~D*b!5Mell^SY#e~4qI7oDHB zwwy~b&~2#{u*cwXHNSs3>W{~;J!1mGk&igeQtZBIw>78Y2JW=t*=4P+A9l|=6n+b_ zOk#UG5arD_YM9i10_tXocZU_cyxu22a8N-cX|_6Ohuh(EyRhZ~Rz3jdVaRG>oc>47 z=rk93kmW-X1o@LB@wEE&yJTGc$s`ve2{_T|f$M@R6F0CFU}giTCm zh4iaQ6Su^((_^8*bhe->*aic*;}Q_h9u`U}INCA>dS#`E8Y==o1|DOcx|pvwvF8DR zSFT|lr!~xT7jNWF?*q!Ewzq&geH^8E+G&CUdkM-Vy0x{ju^fK{@vP}k8{)2M)dPS# zaPLXI&!W`OxvQMN-5(6{159G;QGzt~@h9V?^K4*y3*w2S-?3CJcOWcwhJ#iccK4c_yGZtghb!~m;YsuN(qr~( z<*|9lG2y0ZFOBz-%k5g3Uvb$fb9i~#A8>}JaXpQgm+JM;_RAxk-Zg!P{TM|N&FP7( z_W|XmtrY$IifqD0{bdSbW2KLgF)Ztp&Qxk=XAO-zpjVqGLw%>Jpw!B^WcHYMZh^Wp z3xZh2J}eE;tBN9JkUO4mciH#2SKrn1I(J(eMm*bW{@H{Zqaf_E$5?G?E13Z9CIzt7 zjZU*xAo+J>SVWI!(d%n;qe9xx7z9Xo)lw`R!iG!xBl2x7R5|C z9FBaJ97C`s~-RnVT2M6b9wL-WvuI&{+;7@aNYK~dLofX8k@kE-yK>a9D3`R zAX+YK{dpacd|snwI=AhQd!IjR_u@QXnQ-(TTtE2 z;QR!(mi6d)C@y-+`W^L{{aRTv)AR&AF{`g@^mtZW-eDUxJ{?|+#g>^cT>osYSUEe@ zW-M+_J|^7BO6BeEKoe`pi7IkeuZr10HV)@-ImR4w*95DvkVb~P2kCdBSSWN=rK@(N z>@zlki!!q36}2Rl8>B5%>ntxkiGtmjS=YQT$?;UM!B zG52a0zUsO^8t0dzh#s~9#l!;mAeLjo({ZOiu*Y8iy zAecP}PKWtVi9kFY9;YT<-lvATa}oSpF6@u;gZ7BlH*)6-1+(N;1Xwroz)^9xUV*71Kh( zc$VD5YPl%F6Mc12E~=wMLcySEd=uAn=Zbk5TS^xsusirBxYU-+`FTa_M~qjySvo(; z{01b1g0FHV?9WIHYO8hvyUG2AbuTQLVi_!R4|g@4yE>*0gwWIzYj1gn%hJ_uSuKF$ z_aX?);s=K!`w)qfsb*Z^`zmN0Y$)`98Nywo20Lz@DXQF%D(6C)GO-*N4o4aIk0?7H zsOEyZn?ib=3y;N;+ku^RE@XOlRKe9c#HRLhXJkioWQC+Gv&_=tL|~bjrY1*e&-|rt zr*St-4UYfd!K^MOGxZnk#_0`T3ybvRHzt73U7g9A(5z5 zt+r%Y!7*O!vLdASDN_)uyr4{DvyI>`wNJ`QJ_lvUK-3FC?ekor((N>G8OHTGM|yfr zvvXQFIy~HsOvmEu0p>9?I8IZm9=E<;c4Wjgg4{VA*6+G<`xk*{qb6uvP3y&O2)JSi zxf=&Bo=YXx7%pgi3pXs82x#5H9o7j{PtMPevVtJIEOas0RNJj;F`s#<3yz?We_smP z6WMH`kk9AAAv>uw6f%0Hf;k6OlFOzcVl8yfRD~?dK!?7e!iVRwIS4><1-bRQ-Kvo$ zUa~5;&k9HIhV0AISxdggT}{uxA_pKG2|_xTD|bt6NhxC`t8%xH%aT9A^TlpU#qUQV zoYHwN`%=LFvSN2@{0oz}zjm`;#B}yI)Be3^;^7r3N7SryaZFBDcj^qe?_M|t1 zks!x_Rj1NorK`eiONQ!h;jU5XRx)p`pzflv#5%)Uuu(O=>hl`hX)B$9yTR|aKgt)% zfO%+@rnS{dWH!dTGJ+o7#|ZjlDL#ZX()_UEbxwc{tYMtbSK3 UHq> z3k6xNR3s^1sbngZd_I%GG2==_Rpq>jibXzCYn^8Eb-UeC6jf<;8s`crgj_glw@Y2Q ziaRASM4=*z88LTMC`xkcJ>5f?})WEBOpW3`Ja$ zPFhxx^40A!?k`u&oJ+Faq8u|S5n3&mpfgT7p`T-MAYiVB`V5r@A4+_!Jv z5_EGp*o)dYmvU(e3^mI|sB~5=RAe;yq*h*EUcd!+e^fG9)DPmh_)?f1w*hstZjTpN zRCv7vI=kKB_&bcD7-F8JR^23Af^Rv2eSBOvmV$ky*HYU{T<8g=)yi7$iEH{!YaG~A zF?rxb!t2z9>)&j>evkbq&7b2G` z;@fAT2oL&>=Xi^iqA@|p>vH(et;3?SF1vkd%Kn{KZvPTl9<;8)H(bF=DL!m#F*k>U zc!NQX-$^_QU~P49!N8?aYo#_~&~x5&{|?tBo^-n>Cnsg4+dX^TR@?IHvy!6C-s`z2 z7lOZauyB%*2z0>--u@>ArC8`HBzvGP6Rm)s(Wo^PrBuMtF7TDW&Q{5u+wzC^XI;4V zO5yAb^jJ-=F{>S=K0O5oDU-NcQQ#02oLnQVPPM)UYDJacd8MP};*&mVvQm>kDc2PE z62!y66**e7Z!KRHI~u^B6(A3|)M_p&iqbm6+YOX=@jNH&SwhdUpomY)Z%(!K@0Zgl zvOca|CiiOt{7GRSYhN;P>`*>V(LTVArcvLyh?D20U5+QzcikW5OJ#5p(`{C89?+q2 zH#g(8Q6>&Y&UaEDwR+}i2i$O_kGqCeW(Q6kV0j_nccV%G7rbQT{Tk$HXTM&@mo*zk zRZi_w(AxnnBrwetyrwedXy{tu1TaNq2zYm32OaHQ<=R=Pg{HiMwp%50>sPodOM1U6 zne3rV4zi|{WdpmmOwc07L+%l6XhTw^#tHnPMn3j}7o%OXB%QzsQQ^JyD&`R?^{U>R zqm2SSgM9uj^T(@&Y!!Xz7O~(O$2bG=X@)ghtqxFk*liK* z>5sV82HC|t8%gFDwSH6G;o)b>t_j|5+2i;`5AN>aJjxS2cD zYN+y1J!qVeOMId4bYHM6>jKIV6`tI`ZU`a+)N5Vxo_bR@_Jb`1lrwl2?x$lM1DLIL zTdtM5@CPjQQ@`Nul9L{;!8@oxaEDQWOw?CRuIbzXQg|pe<8X*iNZOR*Cu+mG#(i^zJJK)_fz#-#)rma@7Z>#A zN%!PTmJ5Y4xTLDoRpmD-HUsSawz9sT6BVdDtL8I;DH3Lu{LDHtJxJL|7cYz|2zMhh zGjmUU6L#z03%h?XCM)U4F^>g@H0~CrCO5||TzDygMFxYzZ{Ky5)3e@8D2<3qJ=Gu% z8(HM7U~~&V6z%F=UN@`=rAeo8ojT(aF%Q00MvXw`tc&qnTrS91VOB0Ro7cI+6LNC;rgT~=RfMU~F_Zk{j~(+C8)b8Qdb#@rxC0-^W*+<2 zr2L!Z@u1D_aj%fjN$Xwjb=a&{2rZc8T9n~KmII-ZKU1ZC{nS%;TT=|AvK>(!HC;a; z4u*eZP-y6_8@bc+;L*k@#N259w=?DN+T(kZL~r$`oNz z8&1EI9~-pV9lklNcIO@Tk>)l4-W=_wf7m5tw~NVU_02C}0Z^SgH*L3BZRQBS?u=s} zpGiT?pVj(7xxCKWS?_e@n#O>}Lj79)ilOfml{6NnzGqjr__$Uw7YRee8o#o$UI z`6{&91p0t6r|)!q3B?we`9#8x;?gxe_;7%>IK77cF!e1=UCmsd-;jqbSUG+W1GCW={w$0SJ zTX2v5t%3?omduo6mG{C{192b$solEPuj=qq`p5~wBt|=Q=MAJ{TTjeJ2 zPSq49y8PWwZtkbM>8*Ovl;M@}X< zPK`Qny8P*i_rkyX#XIkHn11wZ&zr<%|L+LR)H(l$0H+j9!^jqfMzg|C<-dFc=C;exRyj1CO>6*gx(tTO_M!YszhQ_P@_v|EurC-Ho68?~tFuq%NV$FB4hK|l>6O*=gOxJAx1hpXHNbPwv>S^>4co>e zTr|18y@MU|aw1uUBKP=rwR?3MxPIR6`88v$Ln;G3rxrltn}o>mscWF2)}iVw5- zM^=VyW}bI$ZEvqdLce2M3tVI@U;1}HCU>>}9CyuH{%_05KFmJZO zC2%_&E}v0H+r8kK_c=y?>;eIwmqqgs<-Wkq=cPTanU$#_Dj1DAw*qTiB(yK&ixMui z>E-VFxvc79{BF!GHEw(@xmZmjKj8D6U7uI`8t(Ktv+`GQC-p~CebT?bn&I=^{QYG~ zcoB*)>(0P7!;E1*zy$4f_&geSBM`}Y=vlXY(D|+D_z%i}nYuKzn1173@jk(}7Sr*K z#LikI@*A!o#1q(SXMZa08h3HmxPiOc4cyhWamR8|Jlu`&WAWrtU^#5I()28rexd=q z*;(Muwcz&9Zu;*zhH@c;wxQ99i3x2fElI!exb36XfpGj&V2Ni!k-c=WR8q;(hI08o z8d3bP(zxn*^xS?meX2~ZR_kS@a)M@~n8SL_d?FcG^79sIXjB`vpVg|}&CJZKcs=wg zZT5d_H2$M9V1XdsL3^~Gd4xO1#Hw{1TLdQ(Jjd=HW(rbQR%GSA`r>DoUcIm9%Hwk~ zCMYOa{c7J5;nxG(PXAbJoEn-K`9 zI!alXs56m>^L&WP=JTX_k>;6yiI@L#O0VZ&tkJ;yP+6(Nqe3P<6k)kzY>F{vvRduK z$eq4!k#Gm`>?-v)oS3~T17ht6u2j7 z#}H+TGS2NlbUMmSrp0r$^Sb%K-B$|^Md6yFv&vCMFonWkOPulPu!*ugS(zCbG1e?< z+^yKB{zgm4ZWYW+anrQVXK?58(e4RMd3Oc^@mO>^_)5q~f2wU#b>aTQZV#dht`nBZ zQnmf2T`gwbJ|7E5qk#lOZ38yzuxkd}*z|BW=b`O(^N(A~`2J#|?5i~G*7Y4Z1Hj$H zU^o@nUXI7GtL5Q&Lw*0i-TkC*{1_L`m5UNC0dD8BnY4wEIHQEloeTta*0CKF zHNSw|VLf9nR$ujaX6I@y>5vLQZ^O7R>9 z6^>PUcGl<7y9HoA3hi^yLx0n3&zr<#221O}^7S6Fn%gt?biqAInZtZ6^=Qk99ofaA z)M=`%WSHj?z^ zIq9@ouUqPQMxfZ~#Ycf1p5-i7>l5mUmv++=I6+3Zo26Ij(Z8(UnuY(S42;`Gfj6(# zX%|fHFJ2c-4GnT!GzGyI6EdZ9#YXdWvsr$ap}j-;xhqO)tJRjv8R5Xf#TPdcsdX+m zNI4eV_F*h>t|fcV%`MEWPK;XrhAU=2CuW;{!hxLye8?R}_QMW()N0|vd|*33no>&* zFtnX+{##$y?_Dl)MK$iE^A6VSkut($go{7=HNk{AGxf}Fe_|i;PE3#tG~jObe-`M4 z$)TUg%KigW-bW`$KP;@%gK5VjD{ePs9cOuddnbT3)%SC1ff&}#a%K$+1+l>oDa*0 zV;-Js&a+AnQ6`RCPo?6~X?6%ZHMUN>s@^V-+{-n+5azeO!rznVX;2bbaE?V1DyrIQ zSH$#gWGNo=FY^qC*`D(x6~Id@t=BU%1 zJ-2{`$-EfZ&eJq)vu%cWKI&ig$HJlg!+b$4$$CL1qWB)TyDkQIGAg@HM^>91RXy5& z&Q3?;OL6~l7~4Tn|FfWD=W%H1?H=#&&u3*(9*2z!ypbR3%`GXq0b zyTiNUagi((955vfw!Ons<3B^T=S^Z6w0RdU?d-f4!J{ZsIJ)fL2&5wHq>#&3t2oQ8 z`E(E5-6T494Y`Tnbxu#DY}&+falbRhM8jsv{%m!@i=946zkoS|JKImH?*4(jX{Vle zNP>iqL>DfX&*PwMK_=?l+6k;h*nQzuw=5~L+|t1-|4UcsZ!IUJTBOoADdy8ubWY3{Ri)Fyx!MQrJ|_*e(Sa~bI;k9{gWThke>oQCF}a;~yHSW(c+PGK{k>l8-m%o1$zMChXkaO0Bb@=YlBa%c|PZSLl`RTkYoO5u?HdFsDl0 zic-v_pGA%Xj}~LmC~u~0ZW>}&*F5d;k<>FVv~I86{&S9@7-GRhJTID8t$P`cAJePU zAj^*hwi7!%zneZ3i}|kH1oCPZWk4Rd>k)&y64rH>lp^++jjSh<&M41@#y2N{JM0_c zz2wd_VYiunYVQ7F9-@Z5KDXP44WzWZu!U!!@VGD&|CHic*7o)=^T(-jCX*7H<+4%v zNIT~b%YH6v11S~hjr8u1OeQ@T;TdOO!^v}In~g^9JYG1x6F!pSJ@28Zi9zaHU-@q< z%U}rXv;%_+hYu5vG3|fYIzGmPW6ng1iJJC>%)2+T+-|juUULuJU6Bj!Bvq2Sg^aKn ziiDGxO~wUn)+aRP`;a=u6FwhsH{)5LS8YF8`qOp6K0Xeb_RJdGJ)K{8f*te1vA8q2 z&T!*zGnFH?cGiC2?q<2*PLWQfKPpGqW0rSrY;EylW;^AW$AWKHxpV&n^-%5-r`bOB zWWqN)>jB4?tgf4#o3P>0kr*@0ym5N}C za9rH)ckZlPhA8k|Ft6R~!aA3PI}hfY4JNX?lClkiqp4&(u@nw%rqiOLsvYF6 zVT|BCaCe2fTC=HkRVkBB?;>{0QalVng>@MF@@U)vaWhZJfE6`4{;jY1_aI{?o87&z zGKVWICNRRF>4_oAIygPe>^PkXf*u{BOkpOz>`X54 z5vw4Is@!Zo=(~Qo;I7qby{;5Q0oy^X$9G~(m^E9eN!s8JM@C&wpU%QBt3UUU@ue&e z+9({(bfFJSyTQ|Q(~eQA4HRB98E{UAEPKLHk)&9im(JJ~9`#J~Qc@}&>Q-Ofbi^c4i_sjry!wxO%LXTwBX5yyf&W)`&&yF8v z&Xq<7xsx8ayFrkrob37gzRGy6*3p0W3tw#(O+)N93se_jGTVLVtP+GE#*qy%22!kxYww(w#Io>>E zo$_LffLW4uhowN~=bp~a(o+vs_Cf~Cw0GX4afb_wePA1Wqm-F@9C)<79iQe*;NeM% z+J|LGG?|9yH|ibuTj6lR4%a`PK z>-FnzeVxBMk>PGs+Ica%$#Trj)}zH3W3t(7B)Lc9jy#-S@pva}4?)Eh8KCS_!z0?d zVm%|z=de;+6ti+y3P+s5{C{xHHzpyD;0+F=Ts~spyl(gG2U(+4)&oQ+ODHIqabD zj%nmpojaGqW*r*f!%QHs6Xk=OLav~8aMFU*J$~R$ z=k7#R)N(N+?6NEmCn_9`nkeg(-EH(+BUHkE5qR}~7kE!lUjeOvQqDH@Gi1}jNZp`-*QLH6Rp?l)&5?|%2jZ* z-((qkx8jTImvZH*%bMUD>Nlc?xgl4!n!d@G();==t&^2br*Vr&7tNh<>q+-Kk)(`3 zMY#2yL?9jyTWnSv))v%zE;@!CUe~OL_CBM2igr+6E@KqE>ZM)SN(TEix?I1k4BJ8B zjq%KF)wO@?H@&y6y-$**{-c-W>%5Vro2339ZjEx|$~#oVE*)Yqm`%i@)8kfZa%y3A zLB|fudtx`ZVf#^mq8+n3U@3MkIlwm(-a!vaXgQ#8|h5AqvLmS?fAyd}t&|e;w{s|Ip zlBBH!)~M<|{5}w+a#^~vk?#VZV7$IiDT;@akuiR4C&5Q~6J@hcIk3E^&K(WDi_hh- zTOX|KE%JEKjwQB#85zO6Z1>8@{EBPdwrL9S$9|`OnF&p%_m47oA*zLvRMoehD?~O) zFE4%h?2S^sc6ocRd-*C0H;MH1QZ5&Z#fx9^Anqd0h%QTrnHs z`P8Q!h94WJriSdadw%Xh-;s5U!w-2Vw~N_24Ehea196SG+zZ}0m(4oBM&pZ1{wT|C zzTMA=+UbT1q-~dyC);geT(=vZy7Ij7d;SKw>GgMs@#4GQmqXEi+$7l>guF#~dteH& z)2Z#mI>Va5xtqd8vL2(ZtebZDXouVWU}bNW>2W(~Ih{MSxj33)rOa&Dzqqjx<)=*t zZ-s1DxaksP1zob~+eE*9`fvC#=^I{bT+r)#fDcHE;^hnV?W^SQnjGxkEPMNVd#`Sm z7et@ z^#A3vFE%6+iLjv%%Q9;_TY)7$G&p3n<4EzG+hf!-hP&6dYJXTl_BmNLk9vVSvJwR> zglF#Q%#6!!9SlzUog0rnMW@%+KSnfRBR_)N=P0>aN?-k>j z+b&T!eEG&}Z|)O(`4?UP+dk!nXBp<&Gi1Az*p*F_X!NE%M7i+#`ufiHHvG4>!-dAJ z6ONHzFeAWFci5(4!aX(qFjMhaahmNDF0$6xN2beGp3bbep5Pjv#pSJl)5&w|j9-(O zE>2zIed1^IEv|3cqg$o_z3_wSebU$;ql@u8K2Cvo#H{-cp!8sA4I@p$LDhsb7UO(-6-k``Xw)@E?8_C5U zPIQ89{uE6``Fr&p>OlM%(B%@|t0WoPv0slG5=9+r=v(HBuG4;^bC*bgp&q0r7v|=! zaW^&f({ytG7i7siI^m|Z0Vs#dvoiB^Zei-#<~Yado>3~94BRBiEwUw(sn2cwSN)n^ z@*DQ=ntjBr7bsnDkG;Q>zuzu~lXp#S^EHjXYaZU^>kW@jdk~Fs78~VoFFc+3MenXk z@+;;%^rUtCLEYUgCd#_HFguGqq$oXDdTU$2-9ZB{$nsK4}cXB&UW_PmEU z&D4~`{gm_}pCygU7HBZE<2Yo%a{QOYnYYKay09SAz1yu4`mBtNT^pnu3*Ju#w9Rsd zTpu+wo6Nm3X|q3Bd9pGxqOVfXxpO>>q1;af%+#uPVRqJ7=I-`*pV%quuGxIk*}k_7 z-YDaP<9Ev>+3>;S&@~}XT^W%cCEF1EZ?oPg*T%HrDJyk{T>lu@3y8iC*q7ZgLDDKN zxWm)u`ME2%e~Fl>3CBDxO7W7lP{5sQ*lyR)Jayy6yN75@TwXPU61&|&HjS212Rw3I zY#O`gWK2F6G+ayxO1L?ukAjl@c$WrTfHqJ02r^9lLdO z%I%;XdQ%3hrR}0U?m3!zu(J1*j!E!s000IDNkle~F51<*{5e4=otT=Kx><&GF&gAAh zHT7c7o6j{%IwW|!y_@#D=Ni9wf8jj@KgfrI%*?nhWu;F>Mt%W7tLg>YI^xn=L3v#I z$HI=W+M>h!Fl2mh88A;#F0GB0&*7$BZrba{(>jbs%GDI<&^`c0#KC z-qNo$N#6}`4|Z4hyUHSN+EHKr?j6xyp=(`K?C_fo7m&4b`S3Oy?qc=i+%xKj;!2#3+EBia9-84O9eaQCw z5?LCYvZKKuUbt2k+HpacSKq*1IlR}Qn&`XihBH6m)3!TffLr9!vfqbhfNfa&m&oNK z?Ux?8;uKzSA$y!jy+w2#K1)tL%>Cd{fe|;|P8PAnZxE>;0T6FTBUK8*AcS!&Dx_qR`?B~U2 z++4jZBX@IN$CT}13E3Bli5i9r?Zf&3;E8)KdAN2R=K7?co131~c=C8|eqICW0;>_S zU5f*ZaNsjY4ZH5~eE;sR+3OxLMpt@G{SB&?^q-ldr>3kAV<=xJ17^G34a#b^M*|sT z&Gv0~b9WniAQ%v$nevX@CBKkO6FrF3#M83@%_CPgSKr6DwC9mH^7>o+)!Y1s35V{y z-s)Il)v>2fpDqkfQG*-b`a1q_VzSvMNUeoAW1zzGS$;g9=kB(5dG5rm54>LtBYQ#J z&3f=sp8;LQPuPqBl#6fW9lo-8hlj7mOB4F$xbpKeekG>Mf*0tu+Xf%jRD7{G&7;`3 zZ1xHya2r2e>5J?7{q8&T+cnXLEw1T1gBy)D94VW@ZRQsHsM+%?`_~`1wC$#s_wss) z_sR+x`6XJ>ytj}W{1>0_T?QQFSN(~L=UvwX-|ccy&&D6dP`*Y6ZKL%3 z!kiXR!U@-6%DK52ZM(|Bz2%x6BtfC)=D99IF@235)iJpqpXwhor+<3i8whlW9`e__ zTYTly>xXmk{(d)cl6n|J`6?M0wAmL{o@y3Nvmp-ejo0=<2@r-Z6v5SvjG}n2Z5|Sv zz^54VrI-3eABVX%a$yj4Rc-}OKd?TKXKzDze*XWPhnLSAVubBR{mv^N(+Av&HrWAU z2k-7-4CU*@Y@1qmHotPioWxCHj0^kppZ(*#<&WQC*&U7v29 z7}e#D%Y8AYYIx%}c<7GH+1l%QMt1vU#>&O>H~91yWZp1aeC6*~-{YQ`(l$T_HUmln7i}wAlOdw+?)x~dp{eD<6QGYouuCv{)<;~%b-g$as{`4{v zbLw+qYqarNAHcygl5eZvH&{+wFM<)_cm8N(a}a=g;I{)5D6{ zua+hAq!q6&p4U;mti(meuW|ZsyH5QHy0;|l{@dg}nZnl&0W(prZWsB{7s>3#gL{jX zlKrhO-j6Jg2hHPycZlgWvFMxj&*igyk1cw2lL5;>|9@X3W4Fo@k>%xOoD=;uRFfeMSo{|6n002ovPDHLkV1lJgX8-^I literal 0 HcmV?d00001 diff --git a/providers/ibm/mq/docs/message-queues.rst b/providers/ibm/mq/docs/message-queues.rst new file mode 100644 index 0000000000000..076c8d92bb19a --- /dev/null +++ b/providers/ibm/mq/docs/message-queues.rst @@ -0,0 +1,100 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. NOTE TO CONTRIBUTORS: + Please, only add notes to the Changelog just below the "Changelog" header when there are some breaking changes + and you want to add an explanation to the users on how they are supposed to deal with them. + The changelog is updated and maintained semi-automatically by release manager. + + +IBM MQ Message Queue +==================== + +.. contents:: + :local: + :depth: 2 + + +IBM MQ Queue Provider +--------------------- + +Implemented by :class:`~airflow.providers.ibm.mq.queues.mq.IBMMQMessageQueueProvider` + +The IBM MQ Queue Provider is a +:class:`~airflow.providers.common.messaging.providers.base_provider.BaseMessageQueueProvider` +that uses IBM MQ as the underlying message queue system. + +It allows you to send and receive messages using IBM MQ queues in your Airflow workflows +via the common message queue interface +:class:`~airflow.providers.common.messaging.triggers.msg_queue.MessageQueueTrigger`. + + +.. include:: /../src/airflow/providers/ibm/mq/queues/mq.py + :start-after: [START ibmmq_message_queue_provider_description] + :end-before: [END ibmmq_message_queue_provider_description] + + +.. _howto/triggers:IBMMQMessageQueueTrigger: + + +IBM MQ Message Queue Trigger +---------------------------- + +Implemented by :class:`~airflow.providers.ibm.mq.triggers.mq.AwaitMessageTrigger` + +Inherited from +:class:`~airflow.providers.common.messaging.triggers.msg_queue.MessageQueueTrigger` + +Wait for a message in a queue +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + + ``AwaitMessageTrigger`` consumes IBM MQ messages with + ``MQGMO_NO_SYNCPOINT``. This gives the trigger **at-most-once** delivery + semantics: once IBM MQ returns a message, it is removed from the queue + before Airflow emits the corresponding ``TriggerEvent``. If the trigger is + canceled in that window, the message can be lost. + +Below is an example of how you can configure an Airflow DAG to be triggered +by a message arriving in an IBM MQ queue. + +.. exampleinclude:: /../tests/system/ibm/mq/example_dag_message_queue_trigger.py + :language: python + :start-after: [START howto_trigger_message_queue] + :end-before: [END howto_trigger_message_queue] + + +How it works +------------ + +1. **IBM MQ Message Queue Trigger** + The ``AwaitMessageTrigger`` listens for messages from an IBM MQ queue. It + uses a non-transactional MQ get, so the integration is at-most-once rather + than at-least-once. + +2. **Asset and Watcher** + The ``Asset`` abstracts the external entity, the IBM MQ queue in this example. + The ``AssetWatcher`` associates a trigger with a name. This name helps you + identify which trigger is associated with which asset. + +3. **Event-Driven DAG** + Instead of running on a fixed schedule, the DAG executes when the asset receives + an update (for example, when a new message arrives in the queue). + +For how to use the trigger, refer to the documentation of the +:ref:`Messaging Trigger `. diff --git a/providers/ibm/mq/docs/redirects.txt b/providers/ibm/mq/docs/redirects.txt new file mode 100644 index 0000000000000..7cf120f898bd8 --- /dev/null +++ b/providers/ibm/mq/docs/redirects.txt @@ -0,0 +1 @@ +connections/index.rst connections/ibmmq.rst diff --git a/providers/ibm/mq/docs/security.rst b/providers/ibm/mq/docs/security.rst new file mode 100644 index 0000000000000..351ff007ebf2f --- /dev/null +++ b/providers/ibm/mq/docs/security.rst @@ -0,0 +1,18 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: /../../../../devel-common/src/sphinx_exts/includes/security.rst diff --git a/providers/ibm/mq/pre_extras_install.yaml b/providers/ibm/mq/pre_extras_install.yaml new file mode 100644 index 0000000000000..054d2453b986e --- /dev/null +++ b/providers/ibm/mq/pre_extras_install.yaml @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +--- +downloads: + - url: https://public.dhe.ibm.com/ibmdl/export/pub/software/websphere/messaging/mqdev/redist/9.4.0.0-IBM-MQC-Redist-LinuxX64.tar.gz # yamllint disable-line rule:line-length + sha256: a839db8f14cef59319639336e951a52dc320fd6a42ab5bee9bc925e43985f457 + extract_to: /opt/mqm + # Anycast IPs published by IBM for public.dhe.ibm.com (see + # https://www.ibm.com/support/pages/node/6826677). Used only when DNS + # resolution of the hostname fails inside the test container; TLS SNI + # and certificate verification remain bound to public.dhe.ibm.com. + fallback_ips: + - 170.225.126.18 + - 129.35.224.1 + - 129.124.168.5 + - 9.133.44.11 +env: + MQ_FILE_PATH: /opt/mqm diff --git a/providers/ibm/mq/provider.yaml b/providers/ibm/mq/provider.yaml new file mode 100644 index 0000000000000..d1c56376f2f75 --- /dev/null +++ b/providers/ibm/mq/provider.yaml @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +--- +package-name: apache-airflow-providers-ibm-mq +name: IBM MQ + +state: not-ready +lifecycle: incubation +source-date-epoch: 1758787200 +description: | + `IBM MQ `__ +# Note that those versions are maintained by release manager - do not update them manually +# with the exception of case where other provider in sources has >= new provider version. +# In such case adding >= NEW_VERSION and bumping to NEW_VERSION in a provider have +# to be done in the same PR +versions: + - 0.1.0 + +integrations: + - integration-name: IBM MQ + external-doc-url: https://www.ibm.com/products/mq + logo: /docs/integration-logos/ibm-mq.png + tags: [software] + +hooks: + - integration-name: IBM MQ + python-modules: + - airflow.providers.ibm.mq.hooks.mq + +triggers: + - integration-name: IBM MQ + python-modules: + - airflow.providers.ibm.mq.triggers.mq + +connection-types: + - hook-class-name: airflow.providers.ibm.mq.hooks.mq.IBMMQHook + connection-type: ibmmq + ui-field-behaviour: + hidden-fields: ["schema"] + placeholders: + host: mq.example.com + port: "1414" + login: app_user + extra: | + { + "queue_manager": "QM1", + "channel": "DEV.APP.SVRCONN", + "open_options": "MQOO_INPUT_SHARED" + } + +queues: + - airflow.providers.ibm.mq.queues.mq.IBMMQMessageQueueProvider diff --git a/providers/ibm/mq/pyproject.toml b/providers/ibm/mq/pyproject.toml new file mode 100644 index 0000000000000..b748d28e37f71 --- /dev/null +++ b/providers/ibm/mq/pyproject.toml @@ -0,0 +1,139 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.12.0"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-ibm-mq" +version = "0.1.0" +description = "Provider package apache-airflow-providers-ibm-mq for Apache Airflow" +readme = "README.rst" +license = "Apache-2.0" +license-files = ['LICENSE', 'NOTICE'] +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "ibm.mq", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Topic :: System :: Monitoring", +] +requires-python = ">=3.10" + +# The dependencies should be modified in place in the generated file. +# Any change in the dependencies is preserved when the file is regenerated +# Make sure to run ``prek update-providers-dependencies --all-files`` +# After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build`` +dependencies = [ + "apache-airflow>=2.11.0", + "apache-airflow-providers-common-compat>=1.12.0", + "asgiref>=2.3.0; python_version < '3.14'", + "asgiref>=3.11.1; python_version >= '3.14'", +] + +# The optional dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +[project.optional-dependencies] +"ibmmq" = [ + # Required at Runtime + "ibmmq>=2.0.6", +] +"common.messaging" = [ + "apache-airflow-providers-common-messaging>=2.0.0" +] + +[dependency-groups] +dev = [ + "apache-airflow", + "apache-airflow-task-sdk", + "apache-airflow-devel-common", + "apache-airflow-providers-common-compat", + "apache-airflow-providers-common-messaging", + # Additional devel dependencies (do not remove this line and add extra development dependencies) +] + +# To build docs: +# +# uv run --group docs build-docs +# +# To enable auto-refreshing build with server: +# +# uv run --group docs build-docs --autobuild +# +# To see more options: +# +# uv run --group docs build-docs --help +# +docs = [ + "apache-airflow-devel-common[docs]" +] + +[tool.uv.sources] +# These names must match the names as defined in the pyproject.toml of the workspace items, +# *not* the workspace folder paths +apache-airflow = {workspace = true} +apache-airflow-devel-common = {workspace = true} +apache-airflow-task-sdk = {workspace = true} +apache-airflow-providers-common-sql = {workspace = true} +apache-airflow-providers-standard = {workspace = true} + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-ibm-mq/0.1.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-ibm-mq/0.1.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Mastodon" = "https://fosstodon.org/@airflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.ibm.mq.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.ibm.mq" + +# Explicit sdist contents so the build does not rely on VCS information +# (flit 4.0 makes --no-use-vcs the default — see https://github.com/pypa/flit/pull/782). +[tool.flit.sdist] +include = [ + "docs/", + "provider.yaml", + "src/airflow/__init__.py", + "src/airflow/providers/__init__.py", + "src/airflow/providers/ibm/__init__.py", + "tests/", +] diff --git a/providers/ibm/mq/src/airflow/__init__.py b/providers/ibm/mq/src/airflow/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/ibm/mq/src/airflow/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/ibm/mq/src/airflow/providers/__init__.py b/providers/ibm/mq/src/airflow/providers/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/ibm/mq/src/airflow/providers/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/ibm/mq/src/airflow/providers/ibm/__init__.py b/providers/ibm/mq/src/airflow/providers/ibm/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/ibm/mq/src/airflow/providers/ibm/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/ibm/mq/src/airflow/providers/ibm/mq/__init__.py b/providers/ibm/mq/src/airflow/providers/ibm/mq/__init__.py new file mode 100644 index 0000000000000..f809790763f0c --- /dev/null +++ b/providers/ibm/mq/src/airflow/providers/ibm/mq/__init__.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE +# OVERWRITTEN WHEN PREPARING DOCUMENTATION FOR THE PACKAGES. +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `PROVIDER__INIT__PY_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +# +from __future__ import annotations + +import packaging.version + +from airflow import __version__ as airflow_version + +__all__ = ["__version__"] + +__version__ = "0.1.0" + +if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse( + "2.11.0" +): + raise RuntimeError( + f"The package `apache-airflow-providers-ibm-mq:{__version__}` needs Apache Airflow 2.11.0+" + ) diff --git a/providers/ibm/mq/src/airflow/providers/ibm/mq/get_provider_info.py b/providers/ibm/mq/src/airflow/providers/ibm/mq/get_provider_info.py new file mode 100644 index 0000000000000..033f017d9b9e5 --- /dev/null +++ b/providers/ibm/mq/src/airflow/providers/ibm/mq/get_provider_info.py @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-ibm-mq", + "name": "IBM MQ", + "description": "`IBM MQ `__\n", + "integrations": [ + { + "integration-name": "IBM MQ", + "external-doc-url": "https://www.ibm.com/products/mq", + "logo": "/docs/integration-logos/ibm-mq.png", + "tags": ["software"], + } + ], + "hooks": [{"integration-name": "IBM MQ", "python-modules": ["airflow.providers.ibm.mq.hooks.mq"]}], + "triggers": [ + {"integration-name": "IBM MQ", "python-modules": ["airflow.providers.ibm.mq.triggers.mq"]} + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.ibm.mq.hooks.mq.IBMMQHook", + "connection-type": "ibmmq", + "ui-field-behaviour": { + "hidden-fields": ["schema"], + "placeholders": { + "host": "mq.example.com", + "port": "1414", + "login": "app_user", + "extra": '{\n "queue_manager": "QM1",\n "channel": "DEV.APP.SVRCONN",\n "open_options": "MQOO_INPUT_SHARED"\n}\n', + }, + }, + } + ], + "queues": ["airflow.providers.ibm.mq.queues.mq.IBMMQMessageQueueProvider"], + } diff --git a/providers/ibm/mq/src/airflow/providers/ibm/mq/hooks/__init__.py b/providers/ibm/mq/src/airflow/providers/ibm/mq/hooks/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/ibm/mq/src/airflow/providers/ibm/mq/hooks/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/ibm/mq/src/airflow/providers/ibm/mq/hooks/mq.py b/providers/ibm/mq/src/airflow/providers/ibm/mq/hooks/mq.py new file mode 100644 index 0000000000000..831b85eea4b59 --- /dev/null +++ b/providers/ibm/mq/src/airflow/providers/ibm/mq/hooks/mq.py @@ -0,0 +1,708 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import asyncio +import json +import logging +import re +import threading +from contextlib import contextmanager, suppress +from functools import wraps +from typing import TYPE_CHECKING, Any + +from asgiref.sync import sync_to_async + +from airflow.exceptions import AirflowOptionalProviderFeatureException +from airflow.providers.common.compat.connection import get_async_connection +from airflow.providers.common.compat.sdk import BaseHook +from airflow.utils.log.logging_mixin import LoggingMixin + +if TYPE_CHECKING: + from airflow.providers.common.compat.sdk import Connection + + +# Guarded, module-level import for the optional heavy `ibmmq` C extension. +# Providers should avoid importing optional dependencies at module import time +# to keep Airflow lightweight for users who don't need the provider. Importing +# inline in every function is repetitive; perform a single guarded import and +# raise a clear error on first use. +try: + import ibmmq # type: ignore +except (ImportError, ModuleNotFoundError): # missing optional dependency + ibmmq = None # type: ignore + + +# Backoff parameters for transient consume failures +_BACKOFF_BASE: float = 1.0 +_BACKOFF_MAX: float = 60.0 +_BACKOFF_FACTOR: float = 2.0 +_TRANSIENT_REASON_NAMES = frozenset( + { + "MQRC_CONNECTION_BROKEN", + "MQRC_Q_MGR_QUIESCING", + "MQRC_Q_MGR_NOT_AVAILABLE", + "MQRC_HOST_NOT_AVAILABLE", + "MQRC_CONNECTION_QUIESCING", + } +) +# Sentinel values used when the wrapped exception is not an MQMIError and +# therefore doesn't expose MQ completion/reason codes. Using -1 (instead of 0) +# avoids colliding with a legitimate MQ reason/completion code of 0 and +# allows downstream consumers (logs, Sentry tags) to distinguish non-MQ errors. +_NON_MQ_SENTINEL: int = -1 + + +def requires_ibmmq(func): + """ + Ensure the optional ``ibmmq`` module is available. + + Use this decorator on functions or methods that call into the native ``ibmmq`` extension so callers + receive a clear :class:`AirflowOptionalProviderFeatureException` when the dependency is not installed. + """ + + @wraps(func) + def _wrapped(*args, **kwargs): + if ibmmq is None: + raise AirflowOptionalProviderFeatureException( + "The 'ibmmq' package is required to use the IBM MQ provider. " + "Install the provider extra (e.g. 'apache-airflow-providers-ibm[ibm-mq]') " + "or add the 'ibmmq' dependency to your environment." + ) + return func(*args, **kwargs) + + return _wrapped + + +class IBMMQError(Exception): + """ + Lightweight wrapper for IBM MQ errors raised by the consumer thread. + + This allows the async event-loop code in :meth:`IBMMQHook.aconsume` to + handle MQ errors **without importing the heavy ibmmq C extension** on the + event-loop thread. + + :param reason: The integer MQ reason code (e.g. ``MQRC_CONNECTION_BROKEN``). + :param comp: The integer MQ completion code. + :note: When ``reason``/``comp`` equal ``_NON_MQ_SENTINEL`` (``-1``), the + error was not produced by an ``ibmmq.MQMIError`` and no MQ codes are + available (for example ``ibmmq.PYIFError`` or a wrapped + ``ConnectionError``). + :param transient: Whether this error is considered transient (eligible for retry). + :param message: Human-readable description of the error. + """ + + def __init__(self, reason: int, comp: int, transient: bool, message: str): + super().__init__(message) + self.reason = reason + self.comp = comp + self.transient = transient + + +class IBMMQConsumer(threading.Thread, LoggingMixin): + """ + Thread worker that consumes one message from an IBM MQ queue. + + The consumer is used by :meth:`IBMMQHook.aconsume` to execute blocking MQ + calls in a dedicated thread because the IBM MQ C client requires handles to + be used from the thread that created them. The result (or exception) is + forwarded to the asyncio event loop through ``future``. + + :param hook: Hook instance that provides connection and queue helpers. + :param connection: Airflow connection object resolved from ``hook.conn_id``. + :param queue_name: Queue to consume from. + :param poll_interval: Maximum wait time (seconds) for each ``q.get`` call. + :param loop: Event loop that owns ``future``. + :param future: Future completed with the decoded message or an exception. + :param stop_event: Signal used to stop polling after cancellation. + """ + + def __init__( + self, + hook: IBMMQHook, + connection: Connection, + queue_name: str, + poll_interval: float, + loop: asyncio.AbstractEventLoop, + future: asyncio.Future, + stop_event: threading.Event, + ): + super().__init__(daemon=True) + self.hook = hook + self.connection = connection + self.queue_name = queue_name + self.poll_interval = poll_interval + self.loop = loop + self.future = future + self.stop_event = stop_event + + @requires_ibmmq + def _process_message(self, message: bytes) -> str: + """ + Process a raw MQ message. + + If the message contains an RFH2 header, the header is unpacked and the + payload following the header is returned. If unpacking fails, the raw + message is returned decoded as UTF-8. + + Because the message is consumed with ``MQGMO_NO_SYNCPOINT``, it has already + been removed from the queue. Any exception raised here would lose the message, + so we catch all errors and fall back to returning the raw message. + + :param message: Raw message received from IBM MQ. + :return: Decoded message payload. + """ + try: + rfh2 = ibmmq.RFH2() + rfh2.unpack(message) + + payload_offset = rfh2.get_length() + # Defensive guard: if offset is out of bounds, fall back to raw message + if payload_offset >= len(message): + self.log.warning( + "RFH2 offset %d exceeds message length %d; returning raw message", + payload_offset, + len(message), + ) + return message.decode("utf-8", errors="ignore") + + payload = message[payload_offset:] + decoded = payload.decode("utf-8", errors="ignore") + if self.log.isEnabledFor(logging.DEBUG): + truncated_decoded = decoded[:200] + ("..." if len(decoded) > 200 else "") + self.log.debug("Message received from MQ (RFH2 decoded): %s", truncated_decoded) + return decoded + except Exception as error: + # Catch all exceptions (PYIFError, struct errors, etc.) to avoid losing messages. + # Since message is already removed from queue (MQGMO_NO_SYNCPOINT), log and return raw. + self.log.warning( + "Failed to process RFH2 header (%s: %s) for message (size: %d bytes); returning raw message", + type(error).__name__, + error, + len(message), + ) + if self.log.isEnabledFor(logging.DEBUG): + truncated_message = message.decode("utf-8", errors="ignore")[:200] + truncated_message_display = truncated_message + ("..." if len(message) > 200 else "") + self.log.debug("Raw message payload (truncated): %s", truncated_message_display) + return message.decode("utf-8", errors="ignore") + + @requires_ibmmq + def consume( + self, + queue_name: str, + poll_interval: float, + stop_event: threading.Event, + ) -> str | None: + """ + Blocking implementation that consumes a single message from the given IBM MQ queue. + + All IBM MQ handles (queue manager connection, queue) are created **and used** within + this method, satisfying the thread-affinity requirement of the IBM MQ C client library. + The 'stop_event' is checked between 'q.get' calls so the thread terminates promptly + after the coroutine side is canceled. Reads are performed with + ``MQGMO_NO_SYNCPOINT``, so this method provides at-most-once delivery: + once ``q.get`` returns successfully, IBM MQ has already committed the + message removal from the queue. + + MQ-specific exceptions are caught and re-raised as :class:`IBMMQError` so that + the async caller never needs to import the heavy ``ibmmq`` C extension. + + For an asynchronous interface see :meth:`IBMMQHook.aconsume`. + """ + transient_reasons = frozenset( + getattr(ibmmq.CMQC, name) for name in _TRANSIENT_REASON_NAMES if hasattr(ibmmq.CMQC, name) + ) + + od = ibmmq.OD() + od.ObjectName = queue_name + + md = ibmmq.MD() + md.Format = ibmmq.CMQC.MQFMT_STRING + md.CodedCharSetId = 1208 + md.Encoding = ibmmq.CMQC.MQENC_NATIVE + + gmo = ibmmq.GMO() + gmo.Options = ibmmq.CMQC.MQGMO_WAIT | ibmmq.CMQC.MQGMO_NO_SYNCPOINT | ibmmq.CMQC.MQGMO_CONVERT + gmo.WaitInterval = int(poll_interval * 1000) + + try: + with self.hook.get_conn(connection=self.connection) as conn: + q = ibmmq.Queue(conn, od, self.hook.get_open_options(queue_name=queue_name)) + try: + # WaitInterval already blocks for poll_interval seconds when no message is + # available, so no additional sleep is needed between iterations. + while not stop_event.is_set(): + try: + message = q.get(None, md, gmo) + if message: + return self._process_message(message) + except ibmmq.MQMIError as e: + if e.reason == ibmmq.CMQC.MQRC_NO_MSG_AVAILABLE: + self.log.info( + "No message available on queue '%s' (reason=%s)", + queue_name, + e.reason, + ) + continue + self.log.error( + "IBM MQ error on queue '%s': completion_code=%s reason_code=%s (%s)", + queue_name, + e.comp, + e.reason, + e, + ) + raise + finally: + with suppress(Exception): + q.close() + except ibmmq.MQMIError as e: + raise IBMMQError( + reason=e.reason, + comp=e.comp, + transient=e.reason in transient_reasons, + message=str(e), + ) from e + except ibmmq.PYIFError as e: + raise IBMMQError( + reason=_NON_MQ_SENTINEL, + comp=_NON_MQ_SENTINEL, + transient=False, + message=str(e), + ) from e + except ConnectionError as e: + # _connect() wraps ibmmq.MQMIError as ConnectionError; treat as transient + # so aconsume retries with backoff instead of killing the trigger. + raise IBMMQError( + reason=_NON_MQ_SENTINEL, + comp=_NON_MQ_SENTINEL, + transient=True, + message=str(e), + ) from e + return None + + def run(self): + try: + result = self.consume( + queue_name=self.queue_name, + poll_interval=self.poll_interval, + stop_event=self.stop_event, + ) + + if not self.future.cancelled(): + self.loop.call_soon_threadsafe(self.future.set_result, result) + except Exception as e: + if not self.future.cancelled(): + self.loop.call_soon_threadsafe(self.future.set_exception, e) + + +class IBMMQHook(BaseHook): + """ + Interact with IBM MQ queue managers to consume and produce messages. + + This hook wraps the ``ibmmq`` C client and manages connection + lifecycle, queue open/close, and message serialization. Both synchronous + (context-manager) and asynchronous (``consume`` / ``produce``) interfaces + are provided. + + The asynchronous consume path intentionally uses ``MQGMO_NO_SYNCPOINT``. + That keeps reads non-transactional and therefore provides **at-most-once** + delivery semantics for trigger-based consumption: once ``q.get`` returns, + IBM MQ has already removed the message from the queue. If the coroutine is + canceled after that point but before Airflow yields a ``TriggerEvent``, the + message can be lost. The stop-event machinery only prevents additional + ``q.get`` calls after cancellation; it cannot make the non-transactional + get atomic with TriggerEvent emission. + + Connection parameters (host, port, login, password) are read from the + Airflow connection identified by *conn_id*. ``queue_manager``, ``channel``, + and ``open_options`` can be supplied either as constructor arguments or via + the connection's *extra* JSON — constructor arguments take precedence. + + :param conn_id: Airflow connection ID for the IBM MQ instance. + Defaults to ``"mq_default"``. + :param queue_manager: Name of the IBM MQ queue manager to connect to. + If not provided, the value is read from the ``queue_manager`` key in + the connection's *extra* JSON. + :param channel: MQ channel name used for the connection. + If not provided, the value is read from the ``channel`` key in the + connection's *extra* JSON. + :param open_options: Integer bitmask of ``MQOO_*`` open options passed + when opening a queue (e.g., + ``ibmmq.CMQC.MQOO_INPUT_SHARED | ibmmq.CMQC.MQOO_FAIL_IF_QUIESCING``). + If not provided, the value is resolved from the ``open_options`` key + in the connection's *extra* JSON, falling back to + ``MQOO_INPUT_SHARED``. + """ + + conn_name_attr = "conn_id" + default_conn_name = "mq_default" + conn_type = "ibmmq" + hook_name = "IBM MQ" + default_open_options = "MQOO_INPUT_SHARED" + + def __init__( + self, + conn_id: str = default_conn_name, + queue_manager: str | None = None, + channel: str | None = None, + open_options: int | None = None, + ): + super().__init__() + self.conn_id = conn_id + self.queue_manager = queue_manager + self.channel = channel + self.open_options = open_options + + @classmethod + @requires_ibmmq + def parse_open_options(cls, value) -> int: + """ + Parse MQ open-options from allowed formats into an integer bitmask. + + Accepts: + - int (returned as-is) + - numeric string (decimal or hex, e.g., "2" or "0x10") + - single symbol name from ``ibmmq.CMQC`` (e.g., "MQOO_INPUT_SHARED") + - pipe- or comma-separated symbols (e.g. "MQOO_INPUT_SHARED | MQOO_FAIL_IF_QUIESCING") + + Raises ValueError on unknown symbol tokens and TypeError for unsupported types. + """ + if value is None: + return ibmmq.CMQC.MQOO_INPUT_SHARED + + if isinstance(value, int): + return value + + if isinstance(value, str): + s = value.strip() + # Try numeric literal first (decimal or hex) + with suppress(ValueError): + if s.startswith(("0x", "0X")): + return int(s, 16) + return int(s) + + tokens = [t.strip() for t in re.split(r"\s*(?:\||,)\s*", s) if t.strip()] + if not tokens: + raise ValueError("Empty open_options string") + + result = 0 + for token in tokens: + if hasattr(ibmmq.CMQC, token): + result |= getattr(ibmmq.CMQC, token) + else: + raise ValueError(f"Unknown MQ open option token: {token}") + return result + + raise TypeError("open_options must be an int or string of MQOO_* tokens") + + @classmethod + def get_ui_field_behaviour(cls) -> dict[str, Any]: + """Return custom UI field behaviour for IBM MQ Connection.""" + return { + "hidden_fields": ["schema"], + "placeholders": { + "host": "mq.example.com", + "port": "1414", + "login": "app_user", + "extra": json.dumps( + { + "queue_manager": "QM1", + "channel": "DEV.APP.SVRCONN", + "open_options": cls.default_open_options, + }, + indent=2, + ), + }, + } + + @classmethod + @requires_ibmmq + def get_open_options_flags(cls, open_options: int) -> list[str]: + """ + Return the symbolic MQ open option flags set in a given bitmask. + + Each flag corresponds to a constant in ``ibmmq.CMQC`` that starts with ``MQOO_``. + + :param open_options: The integer bitmask used when opening an MQ queue + (e.g., ``MQOO_INPUT_EXCLUSIVE | MQOO_FAIL_IF_QUIESCING``). + + :return: A list of the names of the MQ open flags that are set in the bitmask. + For example, ``['MQOO_INPUT_EXCLUSIVE', 'MQOO_FAIL_IF_QUIESCING']``. + + Example: + >>> open_options = ibmmq.CMQC.MQOO_INPUT_SHARED | ibmmq.CMQC.MQOO_FAIL_IF_QUIESCING + >>> cls.get_open_options_flags(open_options) + ['MQOO_INPUT_SHARED', 'MQOO_FAIL_IF_QUIESCING'] + """ + return [ + name + for name, value in vars(ibmmq.CMQC).items() + if name.startswith("MQOO_") and (open_options & value) + ] + + def get_open_options(self, queue_name: str) -> int | None: + # Prefer a resolved value set by ``get_conn`` during the connection + # context; fall back to the instance attribute if present. + open_options_val = getattr(self, "_resolved_open_options", None) + if open_options_val is None: + open_options_val = self.open_options + + if open_options_val is not None: + flag_names = self.get_open_options_flags(open_options_val) + self.log.info( + "Opening MQ queue '%s' with open_options=%s (%s)", + queue_name, + open_options_val, + ", ".join(flag_names), + ) + return open_options_val + + @staticmethod + @requires_ibmmq + def _connect(queue_manager: str, channel: str, conn_info: str, csp): + """ + Connect to the IBM MQ queue manager. + + Connection errors from the C client are caught and re-raised as a + :class:`ConnectionError` with a human-readable message. + + :return: IBM MQ connection object + """ + try: + return ibmmq.connect(queue_manager, channel, conn_info, csp=csp) + except (ibmmq.MQMIError, ibmmq.PYIFError) as e: + raise ConnectionError( + f"Failed to connect to IBM MQ queue manager '{queue_manager}' " + f"at {conn_info} on channel '{channel}': {e}" + ) from e + + @contextmanager + @requires_ibmmq + def get_conn(self, connection: Connection | None = None): + """ + Sync context manager for IBM MQ connection lifecycle. + + Must be called from the executor thread (not the event loop thread). + Retrieves the Airflow connection (or uses the explicitly supplied one), + extracts MQ parameters, and manages the IBM MQ connection lifecycle. + + :param connection: Optional Airflow connection object. When omitted, + the connection is resolved from ``self.conn_id``. + :yield: IBM MQ connection object + """ + connection = connection or BaseHook.get_connection(self.conn_id) + config = connection.extra_dejson + queue_manager = self.queue_manager or config.get("queue_manager") + channel = self.channel or config.get("channel") + + if not queue_manager: + raise ValueError("queue_manager must be set in Connection extra config or hook init") + if not channel: + raise ValueError("channel must be set in Connection extra config or hook init") + + # Resolve open_options without mutating the hook instance so the + # connection remains idempotent across calls. The temporary resolved + # value is stored on the instance for the duration of the context + # manager and removed afterward. + if self.open_options is None: + config_value = config.get("open_options", self.default_open_options) + # Use the class-level parser so callers (and tests) can reuse the + # logic without importing a module-private helper. + resolved_open_options = self.parse_open_options(config_value) + else: + resolved_open_options = self.open_options + + # Store the resolved value temporarily for get_open_options to pick up + # while inside the connection context. + self._resolved_open_options = resolved_open_options + + csp = ibmmq.CSP() + csp.CSPUserId = connection.login + csp.CSPPassword = connection.password + + conn_info = f"{connection.host}({connection.port})" + conn = self._connect(queue_manager, channel, conn_info, csp) + try: + yield conn + finally: + # Remove the temporary resolved value so the hook instance is + # unchanged after the context exits. + with suppress(Exception): + delattr(self, "_resolved_open_options") + with suppress(Exception): + conn.disconnect() + + async def aconsume(self, queue_name: str, poll_interval: float = 5) -> str | None: + """ + Asynchronous version of :meth:`consume`. + + Wait for a single message from the specified IBM MQ queue and return its decoded payload. + + The method retries with exponential back-off whenever the underlying + :meth:`consume` returns ``None`` (connection broken, timeout) or raises + an unexpected exception, so that an AssetWatcher is never silently killed + by a transient failure. + + All blocking IBM MQ operations ('connect', 'open', 'get', 'close', 'disconnect') run in a + separate thread via 'sync_to_async' to satisfy the IBM MQ C client's thread-affinity + requirement — every operation on a connection must happen from the thread that created it. + + A :class:`threading.Event` stop signal is passed to the worker so that, when this + coroutine is canceled (e.g. because the Airflow triggerer reassigns the watcher to + another pod), the background thread exits cleanly after the current 'q.get' call + times out (at most 'poll_interval' seconds). This prevents orphaned + threads from continuing to poll after cancellation, but it does **not** + change the delivery guarantee: :meth:`consume` uses + ``MQGMO_NO_SYNCPOINT``, so cancellation after ``q.get`` returns but + before the trigger yields an event can still lose that message. + + :param queue_name: Name of the IBM MQ queue to consume messages from. + :param poll_interval: Interval in seconds used to wait for messages and to control + how long the underlying MQ 'get' operation blocks before checking again. + :return: The decoded message payload. + """ + connection = await get_async_connection(self.conn_id) + backoff = _BACKOFF_BASE + while True: + loop = asyncio.get_running_loop() + future = loop.create_future() + stop_event = threading.Event() + thread = IBMMQConsumer( + hook=self, + connection=connection, + queue_name=queue_name, + poll_interval=poll_interval, + loop=loop, + future=future, + stop_event=stop_event, + ) + thread.start() + + try: + result = await future + + if result is not None: + return result + except asyncio.CancelledError: + stop_event.set() + raise + except IBMMQError as e: + if e.transient: + self.log.warning( + "Transient MQ error on queue '%s': completion_code=%s reason_code=%s (%s); retrying in %.1fs", + queue_name, + e.comp, + e.reason, + e, + backoff, + ) + await asyncio.sleep(backoff) + backoff = min(backoff * _BACKOFF_FACTOR, _BACKOFF_MAX) + continue + self.log.error( + "Permanent MQ error on queue '%s': completion_code=%s reason_code=%s (%s) -- not retrying", + queue_name, + e.comp, + e.reason, + e, + ) + raise + except Exception: + # Programming errors should not be retried + self.log.exception( + "Unexpected error in IBM MQ consume for queue '%s' -- not retrying", + queue_name, + ) + raise + finally: + stop_event.set() + thread.join(timeout=poll_interval + 1) + + self.log.info( + "IBM MQ consume returned no event for queue '%s'; queue may be quiet. Retrying in %.1fs", + queue_name, + backoff, + ) + await asyncio.sleep(backoff) + backoff = min(backoff * _BACKOFF_FACTOR, _BACKOFF_MAX) + + async def aproduce(self, queue_name: str, payload: str, open_options: int | None = None) -> None: + """ + Asynchronous version of :meth:`produce`. + + Put a message onto the specified IBM MQ queue. + + All blocking IBM MQ operations run in a separate thread via 'sync_to_async' for the same + thread-safety reasons as :meth:`aconsume`. + + :param queue_name: Name of the IBM MQ queue to which the message should be sent. + :param payload: Message payload to send. The payload will be encoded as UTF-8 + before being placed on the queue. + :param open_options: Integer bitmask of ``MQOO_*`` open options for the queue. + If not provided, defaults to ``MQOO_OUTPUT``. + :return: None + """ + connection = await get_async_connection(self.conn_id) + await sync_to_async(self.produce, thread_sensitive=False)( + connection, queue_name, payload, open_options + ) + + @requires_ibmmq + def produce( + self, + connection: Connection, + queue_name: str, + payload: str, + open_options: int | None = None, + ) -> None: + """ + Blocking implementation of :meth:`aproduce`. + + :param connection: Airflow connection object. + :param queue_name: Name of the IBM MQ queue to which the message should be sent. + :param payload: Message payload to send. The payload will be encoded as UTF-8 + before being placed on the queue. + :param open_options: Integer bitmask of ``MQOO_*`` open options for the queue. + If not provided, defaults to ``MQOO_OUTPUT``. + """ + od = ibmmq.OD() + od.ObjectName = queue_name + + md = ibmmq.MD() + md.Format = ibmmq.CMQC.MQFMT_STRING + md.CodedCharSetId = 1208 + md.Encoding = ibmmq.CMQC.MQENC_NATIVE + + if open_options is None: + open_options = ibmmq.CMQC.MQOO_OUTPUT + + try: + with self.get_conn(connection=connection) as conn: + q = ibmmq.Queue(conn, od, open_options) + try: + q.put(payload.encode("utf-8"), md) + finally: + with suppress(Exception): + q.close() + except Exception: + self.log.exception( + "MQ produce failed on queue '%s'", + queue_name, + ) + raise diff --git a/providers/ibm/mq/src/airflow/providers/ibm/mq/queues/__init__.py b/providers/ibm/mq/src/airflow/providers/ibm/mq/queues/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/ibm/mq/src/airflow/providers/ibm/mq/queues/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/ibm/mq/src/airflow/providers/ibm/mq/queues/mq.py b/providers/ibm/mq/src/airflow/providers/ibm/mq/queues/mq.py new file mode 100644 index 0000000000000..3bb6382d495e6 --- /dev/null +++ b/providers/ibm/mq/src/airflow/providers/ibm/mq/queues/mq.py @@ -0,0 +1,137 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import re +from typing import TYPE_CHECKING, Any +from urllib.parse import parse_qs, urlparse + +from airflow.providers.common.messaging.providers.base_provider import BaseMessageQueueProvider +from airflow.providers.ibm.mq.triggers.mq import AwaitMessageTrigger + +if TYPE_CHECKING: + from airflow.triggers.base import BaseEventTrigger + +# [START queue_regexp] +# Matches ibmmq:/// (conn_id required, queue_name required) +QUEUE_REGEXP = r"^ibmmq://[^/]+/.+" +# [END queue_regexp] + + +class IBMMQMessageQueueProvider(BaseMessageQueueProvider): + """ + Configuration for IBM MQ integration with common-messaging. + + [START ibmmq_message_queue_provider_description] + + * It uses ``ibmmq`` as scheme for identifying IBM MQ queues. + * For parameter definitions take a look at + :class:`~airflow.providers.ibm.mq.triggers.mq.AwaitMessageTrigger`. + + .. code-block:: python + + from airflow.providers.common.messaging.triggers.msg_queue import MessageQueueTrigger + from airflow.sdk import Asset, AssetWatcher + + trigger = MessageQueueTrigger( + queue="ibmmq://mq_default/MY.QUEUE.NAME", + ) + + asset = Asset("mq_topic_asset", watchers=[AssetWatcher(name="mq_watcher", trigger=trigger)]) + + [END ibmmq_message_queue_provider_description] + """ + + scheme = "ibmmq" + + def queue_matches(self, queue: str) -> bool: + return bool(re.match(QUEUE_REGEXP, queue)) + + def trigger_class(self) -> type[BaseEventTrigger]: + return AwaitMessageTrigger # type: ignore[return-value] + + def trigger_kwargs(self, queue: str, **kwargs) -> dict[str, Any]: + # Parse the queue URI + parsed = urlparse(queue) + query_params = parse_qs(parsed.query, keep_blank_values=True) + + if not parsed.netloc: + raise ValueError( + "MQ URI must contain connection id. Expected format: ibmmq:///" + ) + + conn_id = parsed.netloc + + queue_name = parsed.path.lstrip("/") + if not queue_name: + raise ValueError( + "MQ URI must contain queue name. Expected format: ibmmq:///" + ) + + open_options: int | None = None + raw_open_options = query_params.get("open_options", []) + if raw_open_options: + import ibmmq + + open_options = 0 + for value in raw_open_options: + if not value: + raise ValueError( + "MQ URI open_options query parameter values must be numeric or CMQC constant names" + ) + found_token = False + for token in (part.strip() for part in value.replace("|", ",").split(",")): + if not token: + continue + found_token = True + try: + option_value = int(token, 0) + except ValueError: + if not token.startswith("MQOO_"): + raise ValueError( + f"Invalid MQ URI open_options value '{token}'. " + "Use numeric values or MQOO_* CMQC constant names " + "(for example MQOO_INPUT_SHARED)." + ) from None + option_value = getattr(ibmmq.CMQC, token, None) # type: ignore[assignment] + if not isinstance(option_value, int): + raise ValueError( + f"Invalid MQ URI open_options value '{token}'. " + "Use numeric values or MQOO_* CMQC constant names " + "(for example MQOO_INPUT_SHARED)." + ) from None + open_options |= option_value + if not found_token: + raise ValueError( + "MQ URI open_options query parameter values must be numeric or MQOO_* CMQC constant names" + ) + + if open_options == 0: + open_options = None + trigger_kwargs: dict[str, Any] = { + "mq_conn_id": conn_id, + "queue_name": queue_name, + } + if "poll_interval" not in kwargs: + trigger_kwargs["poll_interval"] = 5 + + # MessageQueueTrigger(queue=..., **kwargs) passes both provider kwargs and raw kwargs. + # Avoid duplicate keyword errors by not re-emitting keys already present in raw kwargs. + if open_options is not None and "open_options" not in kwargs: + trigger_kwargs["open_options"] = open_options + return trigger_kwargs diff --git a/providers/ibm/mq/src/airflow/providers/ibm/mq/triggers/__init__.py b/providers/ibm/mq/src/airflow/providers/ibm/mq/triggers/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/ibm/mq/src/airflow/providers/ibm/mq/triggers/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/ibm/mq/src/airflow/providers/ibm/mq/triggers/mq.py b/providers/ibm/mq/src/airflow/providers/ibm/mq/triggers/mq.py new file mode 100644 index 0000000000000..fc5422c73cec9 --- /dev/null +++ b/providers/ibm/mq/src/airflow/providers/ibm/mq/triggers/mq.py @@ -0,0 +1,80 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Any + +from airflow.providers.ibm.mq.hooks.mq import IBMMQHook +from airflow.providers.ibm.mq.version_compat import AIRFLOW_V_3_0_PLUS +from airflow.triggers.base import TriggerEvent + +if AIRFLOW_V_3_0_PLUS: + from airflow.triggers.base import BaseEventTrigger +else: + from airflow.triggers.base import BaseTrigger as BaseEventTrigger # type: ignore + + +class AwaitMessageTrigger(BaseEventTrigger): + """ + Trigger that polls an IBM MQ queue and fires a TriggerEvent when a message arrives. + + Delegates to :meth:`IBMMQHook.aconsume` which retries with exponential + back-off on transient failures so that an AssetWatcher is never silently + killed by a broken connection or swallowed exception. + + The trigger consumes messages with ``MQGMO_NO_SYNCPOINT``, so it provides + **at-most-once** delivery semantics. If the trigger is canceled after IBM + MQ returns a message but before Airflow yields the corresponding + ``TriggerEvent``, that message may be lost. + + :param mq_conn_id: Airflow connection id for the IBM MQ broker. + :param queue_name: Name of the IBM MQ queue to consume from. + :param poll_interval: Seconds between MQ get attempts inside the hook. + :param open_options: Optional integer bitmask of MQOO_* flags for queue open. + """ + + def __init__( + self, + mq_conn_id: str, + queue_name: str, + poll_interval: float = 5, + open_options: int | None = None, + ) -> None: + super().__init__() + self.mq_conn_id = mq_conn_id + self.queue_name = queue_name + self.poll_interval = poll_interval + self.open_options = open_options + + def serialize(self) -> tuple[str, dict[str, Any]]: + return ( + f"{self.__class__.__module__}.{self.__class__.__name__}", + { + "mq_conn_id": self.mq_conn_id, + "queue_name": self.queue_name, + "poll_interval": self.poll_interval, + "open_options": self.open_options, + }, + ) + + async def run(self): + event = await IBMMQHook(self.mq_conn_id, open_options=self.open_options).aconsume( + queue_name=self.queue_name, + poll_interval=self.poll_interval, + ) + yield TriggerEvent(event) diff --git a/providers/ibm/mq/src/airflow/providers/ibm/mq/version_compat.py b/providers/ibm/mq/src/airflow/providers/ibm/mq/version_compat.py new file mode 100644 index 0000000000000..0956edd21112f --- /dev/null +++ b/providers/ibm/mq/src/airflow/providers/ibm/mq/version_compat.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# NOTE! THIS FILE IS COPIED MANUALLY IN OTHER PROVIDERS DELIBERATELY TO AVOID ADDING UNNECESSARY +# DEPENDENCIES BETWEEN PROVIDERS. IF YOU WANT TO ADD CONDITIONAL CODE IN YOUR PROVIDER THAT DEPENDS +# ON AIRFLOW VERSION, PLEASE COPY THIS FILE TO THE ROOT PACKAGE OF YOUR PROVIDER AND IMPORT +# THOSE CONSTANTS FROM IT RATHER THAN IMPORTING THEM FROM ANOTHER PROVIDER OR TEST CODE +# +from __future__ import annotations + + +def get_base_airflow_version_tuple() -> tuple[int, int, int]: + from packaging.version import Version + + from airflow import __version__ + + airflow_version = Version(__version__) + return airflow_version.major, airflow_version.minor, airflow_version.micro + + +AIRFLOW_V_3_0_PLUS = get_base_airflow_version_tuple() >= (3, 0, 0) + +__all__ = [ + "AIRFLOW_V_3_0_PLUS", +] diff --git a/providers/ibm/mq/tests/conftest.py b/providers/ibm/mq/tests/conftest.py new file mode 100644 index 0000000000000..e21ed098eb7a2 --- /dev/null +++ b/providers/ibm/mq/tests/conftest.py @@ -0,0 +1,120 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import importlib +import sys +from types import ModuleType, SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.fixture(autouse=True) +def _ensure_fake_ibmmq_if_missing(): + """Autouse fixture that injects a fake `ibmmq` module into sys.modules + for the duration of tests in this package when the real package is not + available. This avoids global import-time mutations while preserving test + behavior. + """ + + if importlib.util.find_spec("ibmmq") is not None: + # Real package available, nothing to do. + yield + return + + from airflow.providers.ibm.mq.hooks import mq + + # Create a minimal fake module with the attributes our tests expect. + fake = ModuleType("ibmmq") + # https://www.ibm.com/docs/en/ibm-mq/9.4.x?topic=20-cmqc + fake.CMQC = SimpleNamespace( + MQCC_OK=0, + MQCC_WARNING=1, + MQCC_FAILED=2, + MQRC_NONE=0, + MQRC_CONNECTION_BROKEN=2009, + MQRC_Q_MGR_NOT_AVAILABLE=2059, + MQRC_NO_MSG_AVAILABLE=2033, + MQRC_NOT_AUTHORIZED=2035, + MQRC_UNKNOWN_OBJECT_NAME=2085, + MQRC_Q_MGR_NAME_ERROR=2058, + MQRC_Q_MGR_QUIESCING=2161, + MQRC_CONNECTION_QUIESCING=2202, + MQRC_HOST_NOT_AVAILABLE=2538, + MQGMO_NO=0, + MQGMO_NO_WAIT=0, + MQGMO_WAIT=1, + MQGMO_NO_SYNCPOINT=2, + MQGMO_CONVERT=4, + MQOO_INPUT_EXCLUSIVE=4, + MQOO_INPUT_SHARED=2, + MQOO_FAIL_IF_QUIESCING=8192, + MQOO_OUTPUT=0x00000010, + MQFMT_STRING=b"MQSTR ", + MQENC_NATIVE=0x00000111, + ) + + class MQMIError(Exception): + def __init__(self, comp: int | None = None, reason: int | None = None): + self.comp = comp or fake.CMQC.MQCC_OK + self.reason = reason or fake.CMQC.MQRC_NONE + + def __str__(self) -> str: + return f"MQI Error. Comp {self.comp}, Reason {self.reason}: {self.error_as_string()}" + + def error_as_string(self) -> str: + """Return the exception object MQI warning/failed reason as its mnemonic string.""" + if self.comp == fake.CMQC.MQCC_OK: + return "OK" + + if self.comp == fake.CMQC.MQCC_WARNING: + pfx = "WARNING" + else: + pfx = "FAILED" + + return f"{pfx}: Error code {self.reason} not defined" + + class PYIFError(Exception): + def __init__(self, e=""): + self.error = e + + fake.MQMIError = MQMIError + fake.PYIFError = PYIFError + fake.OD = MagicMock() + fake.MD = MagicMock() + fake.GMO = MagicMock() + fake.CSP = MagicMock() + fake.Queue = MagicMock() + fake.connect = MagicMock() + fake.RFH2 = MagicMock() + + sys.modules["ibmmq"] = fake + original_mq = getattr(mq, "ibmmq", None) + # Bind the fake into the already-imported hook module (if present) so that + # module-level references to `ibmmq` are satisfied even if the hook was + # imported before this fixture ran during collection. + setattr(mq, "ibmmq", fake) + + try: + yield + finally: + # Remove the injected fake to avoid leaking into other tests + sys.modules.pop("ibmmq", None) + setattr(mq, "ibmmq", original_mq) diff --git a/providers/ibm/mq/tests/system/__init__.py b/providers/ibm/mq/tests/system/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/ibm/mq/tests/system/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/ibm/mq/tests/system/ibm/__init__.py b/providers/ibm/mq/tests/system/ibm/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/ibm/mq/tests/system/ibm/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/ibm/mq/tests/system/ibm/mq/__init__.py b/providers/ibm/mq/tests/system/ibm/mq/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/ibm/mq/tests/system/ibm/mq/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/ibm/mq/tests/system/ibm/mq/example_dag_message_queue_trigger.py b/providers/ibm/mq/tests/system/ibm/mq/example_dag_message_queue_trigger.py new file mode 100644 index 0000000000000..7304b2cb15334 --- /dev/null +++ b/providers/ibm/mq/tests/system/ibm/mq/example_dag_message_queue_trigger.py @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +# [START howto_trigger_message_queue] +from airflow.providers.common.messaging.triggers.msg_queue import MessageQueueTrigger +from airflow.sdk import DAG, Asset, AssetWatcher, task + +# Define a trigger that listens to an external message queue (IBM MQ in this case) +trigger = MessageQueueTrigger( + queue="ibmmq://mq_default/MY.QUEUE.NAME", +) + +mq_topic_asset = Asset( + "mq_topic_asset", + watchers=[AssetWatcher(name="mq_watcher", trigger=trigger)], +) + +with DAG(dag_id="example_ibm_mq_watcher", schedule=[mq_topic_asset]) as dag: + + @task + def process_message(**context): + for event in context["triggering_asset_events"][mq_topic_asset]: + # Get the message from the TriggerEvent payload + print("Processing event: ", event) + payload = event.extra["payload"] + print("Actual payload: ", payload) +# [END howto_trigger_message_queue] + + +from tests_common.test_utils.system_tests import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) +test_run = get_test_run(dag) diff --git a/providers/ibm/mq/tests/unit/__init__.py b/providers/ibm/mq/tests/unit/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/ibm/mq/tests/unit/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/ibm/mq/tests/unit/ibm/__init__.py b/providers/ibm/mq/tests/unit/ibm/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/ibm/mq/tests/unit/ibm/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/ibm/mq/tests/unit/ibm/mq/__init__.py b/providers/ibm/mq/tests/unit/ibm/mq/__init__.py new file mode 100644 index 0000000000000..5966d6b1d5261 --- /dev/null +++ b/providers/ibm/mq/tests/unit/ibm/mq/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/providers/ibm/mq/tests/unit/ibm/mq/hooks/__init__.py b/providers/ibm/mq/tests/unit/ibm/mq/hooks/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/ibm/mq/tests/unit/ibm/mq/hooks/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/ibm/mq/tests/unit/ibm/mq/hooks/test_mq.py b/providers/ibm/mq/tests/unit/ibm/mq/hooks/test_mq.py new file mode 100644 index 0000000000000..1288322a0cd03 --- /dev/null +++ b/providers/ibm/mq/tests/unit/ibm/mq/hooks/test_mq.py @@ -0,0 +1,1150 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import asyncio +import logging +import operator +import struct +import threading +from functools import reduce +from itertools import count +from typing import Any +from unittest import mock +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from airflow.models import Connection +from airflow.providers.ibm.mq.hooks.mq import ( + _BACKOFF_BASE, + _BACKOFF_FACTOR, + _BACKOFF_MAX, + _NON_MQ_SENTINEL, + IBMMQConsumer, + IBMMQError, + IBMMQHook, +) + +MQ_PAYLOAD = """RFH x"MQSTR jms_map topic://localhost/topic17721219474762414D5143514D49413030542020202069774D7092F81057Llocal26.01.00 4topic {}""" + + +def mq_connection(open_options: Any = None): + """Create a test MQ connection object.""" + import json + + extra: dict[str, Any] = {"queue_manager": "QM1", "channel": "DEV.APP.SVRCONN"} + + if open_options is not None: + extra["open_options"] = open_options + + return Connection( + conn_id="mq_conn", + conn_type="ibmmq", + host="mq.example.com", + login="user", + password="pass", + port=1414, + extra=json.dumps(extra), + ) + + +@pytest.fixture +def mock_get_connection(): + """Fixture that mocks get_async_connection to return a test connection.""" + with patch( + "airflow.providers.ibm.mq.hooks.mq.get_async_connection", + new_callable=AsyncMock, + ) as mock_conn: + mock_conn.return_value = mq_connection() + yield mock_conn + + +@pytest.fixture +def mock_base_get_connection(): + """Fixture that mocks BaseHook.get_connection to return a test connection.""" + with patch("airflow.providers.ibm.mq.hooks.mq.BaseHook.get_connection") as mock_conn: + mock_conn.return_value = mq_connection() + yield mock_conn + + +@pytest.fixture +def patch_sync_to_async(): + """Patch sync_to_async to call the wrapped function directly for testing.""" + + def sync_to_async(func, **kwargs): + """Wrap a sync function so it can be awaited directly.""" + + async def wrapper(*args, **kwargs): + return func(*args, **kwargs) + + return wrapper + + with patch("airflow.providers.ibm.mq.hooks.mq.sync_to_async", side_effect=sync_to_async): + yield + + +def fake_get(*args, **kwargs): + import ibmmq + + raise ibmmq.MQMIError(comp=ibmmq.CMQC.MQCC_FAILED, reason=ibmmq.CMQC.MQRC_CONNECTION_BROKEN) + + +@pytest.mark.asyncio +class TestIBMMQHook: + @pytest.fixture(autouse=True) + def setup_connections(self, create_connection_without_db): + create_connection_without_db( + Connection( + conn_id="mq_conn", + conn_type="mq", + host="mq.example.com", + login="user", + password="pass", + port=1414, + extra='{"queue_manager": "QM1", "channel": "DEV.APP.SVRCONN"}', + ) + ) + self.hook = IBMMQHook("mq_conn") + + @pytest.mark.parametrize( + ("open_options_attr", "use_explicit_connection"), + [ + pytest.param("MQGMO_NO_WAIT", True, id="explicit_connection_no_wait"), + pytest.param("MQGMO_NO_WAIT", False, id="hook_connection_no_wait"), + pytest.param("MQOO_INPUT_EXCLUSIVE", True, id="explicit_connection_input_exclusive"), + pytest.param("MQOO_INPUT_EXCLUSIVE", False, id="hook_connection_input_exclusive"), + pytest.param("MQOO_INPUT_SHARED", True, id="explicit_connection_input_shared"), + pytest.param("MQOO_INPUT_SHARED", False, id="hook_connection_input_shared"), + ], + ) + async def test_get_conn_with_open_options( + self, + open_options_attr, + use_explicit_connection, + mock_base_get_connection, + ): + import ibmmq + + open_options = getattr(ibmmq.CMQC, open_options_attr) + hook = IBMMQHook("mq_conn") + mock_conn = MagicMock() + + assert not hook.open_options + + hook.open_options = open_options + with patch.object(hook, "_connect", return_value=mock_conn) as mock_connect: + if use_explicit_connection: + with hook.get_conn(connection=mq_connection()): + pass + else: + with hook.get_conn(): + pass + + assert hook.open_options == open_options + + mock_connect.assert_called_once() + mock_conn.disconnect.assert_called_once() + if use_explicit_connection: + mock_base_get_connection.assert_not_called() + else: + mock_base_get_connection.assert_called_once_with("mq_conn") + + @pytest.mark.parametrize( + ("open_options", "expect_exception", "expected_resolved"), + [ + (3, False, 3), + ("MQOO_INPUT_EXCLSUVE", True, None), + ], + ) + def test_get_conn_resolves_or_errors_based_on_connection_extra( + self, open_options, expect_exception, expected_resolved + ): + hook = IBMMQHook(open_options=None) + mock_conn = MagicMock() + + with patch.object(hook, "_connect", return_value=mock_conn) as mock_connect: + if expect_exception: + with pytest.raises(ValueError, match="Unknown MQ open option token"): + with hook.get_conn(connection=mq_connection(open_options=open_options)): + pass + assert hook.open_options is None + + mock_connect.assert_not_called() + mock_conn.disconnect.assert_not_called() + else: + with hook.get_conn(connection=mq_connection(open_options=open_options)): + assert hasattr(hook, "_resolved_open_options") + assert getattr(hook, "_resolved_open_options") == expected_resolved + + assert not hasattr(hook, "_resolved_open_options") + assert hook.open_options is None + + mock_connect.assert_called_once() + mock_conn.disconnect.assert_called_once() + + @pytest.mark.parametrize( + ("open_options_attr", "expected_flags"), + [ + ("MQOO_INPUT_EXCLUSIVE", ["MQOO_INPUT_EXCLUSIVE"]), + ("MQOO_INPUT_SHARED", ["MQOO_INPUT_SHARED"]), + ( + "MQOO_INPUT_SHARED | MQOO_FAIL_IF_QUIESCING", + ["MQOO_INPUT_SHARED", "MQOO_FAIL_IF_QUIESCING"], + ), + ], + ) + async def test_get_open_options_flags(self, open_options_attr, expected_flags): + import ibmmq + + open_options = [ + getattr(ibmmq.CMQC, open_option.strip()) for open_option in open_options_attr.split("|") + ] + combined_options = reduce(operator.or_, open_options) + flags = IBMMQHook.get_open_options_flags(combined_options) + + assert flags == expected_flags + + @pytest.mark.parametrize( + ("value", "expected"), + [ + (None, 2), + (3, 3), + ("0x10", 0x10), + ("42", 42), + ("MQOO_INPUT_SHARED", 2), + ("MQOO_INPUT_SHARED | MQOO_FAIL_IF_QUIESCING", 2 | 8192), + ("MQOO_INPUT_SHARED, MQOO_FAIL_IF_QUIESCING", 2 | 8192), + (" MQOO_INPUT_SHARED | MQOO_FAIL_IF_QUIESCING ", 2 | 8192), + ], + ) + def test_parse_open_options_accepts_expected_formats(self, value, expected): + result = IBMMQHook.parse_open_options(value) + assert result == expected + + @pytest.mark.parametrize("bad_value", ["", "UNKNOWN_FLAG", [1, 2]]) + def test_parse_open_options_rejects_invalid_inputs(self, bad_value): + if isinstance(bad_value, list): + with pytest.raises(TypeError): + IBMMQHook.parse_open_options(bad_value) + else: + with pytest.raises(ValueError, match=r"Unknown MQ open option token|Empty open_options string"): + IBMMQHook.parse_open_options(bad_value) + + @patch("ibmmq.connect") + @patch("ibmmq.Queue") + async def test_aconsume_message( + self, mock_queue_class, mock_connect, mock_get_connection, patch_sync_to_async + ): + mock_qmgr = MagicMock() + mock_connect.return_value = mock_qmgr + + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + mock_queue.get.return_value = MQ_PAYLOAD.format("test message").encode() + + result = await self.hook.aconsume(queue_name="QUEUE1", poll_interval=0.1) + assert isinstance(result, str) + assert "test message" in result + + mock_connect.assert_called_once() + mock_queue_class.assert_called_once_with( + mock_qmgr, + mock.ANY, + mock.ANY, + ) + + @patch("ibmmq.connect") + @patch("ibmmq.Queue") + async def test_aproduce_message( + self, mock_queue_class, mock_connect, mock_get_connection, patch_sync_to_async + ): + mock_qmgr = MagicMock() + mock_connect.return_value = mock_qmgr + + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + + await self.hook.aproduce(queue_name="QUEUE1", payload="payload") + + mock_connect.assert_called_once() + mock_queue_class.assert_called_once_with( + mock_qmgr, + mock.ANY, + mock.ANY, + ) + mock_queue.put.assert_called_once() + + @pytest.mark.parametrize( + ("open_options_attr", "use_explicit_open_options"), + [ + pytest.param("MQOO_OUTPUT", False, id="default_output"), + pytest.param( + "MQOO_OUTPUT | MQOO_FAIL_IF_QUIESCING", True, id="custom_output_with_fail_if_quiescing" + ), + ], + ) + @patch("ibmmq.connect") + @patch("ibmmq.Queue") + async def test_aproduce_with_custom_open_options( + self, + mock_queue_class, + mock_connect, + mock_get_connection, + patch_sync_to_async, + open_options_attr, + use_explicit_open_options, + ): + import ibmmq + + mock_qmgr = MagicMock() + mock_connect.return_value = mock_qmgr + + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + + # Calculate the expected open_options value + open_options_values = [getattr(ibmmq.CMQC, opt.strip()) for opt in open_options_attr.split("|")] + expected_open_options = reduce(operator.or_, open_options_values) + + if use_explicit_open_options: + await self.hook.aproduce( + queue_name="QUEUE1", payload="payload", open_options=expected_open_options + ) + else: + await self.hook.aproduce(queue_name="QUEUE1", payload="payload") + # When not specified, should default to MQOO_OUTPUT + expected_open_options = ibmmq.CMQC.MQOO_OUTPUT + + mock_connect.assert_called_once() + # Verify Queue was called with the expected open_options + call_args = mock_queue_class.call_args + actual_open_options = call_args[0][2] # Third positional argument is open_options + assert actual_open_options == expected_open_options + mock_queue.put.assert_called_once() + + @patch("ibmmq.connect") + @patch("ibmmq.Queue") + async def test_aconsume_connection_broken( + self, mock_queue_class, mock_connect, mock_get_connection, patch_sync_to_async, caplog + ): + mock_qmgr = MagicMock() + mock_connect.return_value = mock_qmgr + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + mock_queue.get.side_effect = fake_get + + # aconsume() retries on None, so we need to cancel after the first attempt + with patch("airflow.providers.ibm.mq.hooks.mq.asyncio.sleep", new_callable=AsyncMock) as mock_sleep: + mock_sleep.side_effect = asyncio.CancelledError + with pytest.raises(asyncio.CancelledError): + await self.hook.aconsume(queue_name="QUEUE1", poll_interval=0.1) + + assert "Opening MQ queue 'QUEUE1' with open_options=2 (MQOO_INPUT_SHARED)" in caplog.text + + @patch("ibmmq.connect") + @patch("ibmmq.Queue") + def test_consume_uses_no_syncpoint(self, mock_queue_class, mock_connect, mock_get_connection): + import ibmmq + + mock_qmgr = MagicMock() + mock_connect.return_value = mock_qmgr + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + + stop_event = threading.Event() + captured_options: dict[str, int] = {} + + def get_message(_buffer, _md, gmo): + captured_options["value"] = gmo.Options + stop_event.set() + raise ibmmq.MQMIError(comp=ibmmq.CMQC.MQCC_WARNING, reason=ibmmq.CMQC.MQRC_NO_MSG_AVAILABLE) + + mock_queue.get.side_effect = get_message + + consumer = IBMMQConsumer( + hook=self.hook, + connection=mq_connection(), + queue_name="QUEUE1", + poll_interval=0.1, + loop=MagicMock(), + future=MagicMock(), + stop_event=stop_event, + ) + result = consumer.consume("QUEUE1", 0.1, stop_event) + + assert result is None + assert captured_options["value"] == ( + ibmmq.CMQC.MQGMO_WAIT | ibmmq.CMQC.MQGMO_NO_SYNCPOINT | ibmmq.CMQC.MQGMO_CONVERT + ) + + @patch("airflow.providers.ibm.mq.hooks.mq.asyncio.sleep", new_callable=AsyncMock) + async def test_aconsume_retries_on_none_then_succeeds(self, mock_sleep, patch_sync_to_async): + counter = count() + + def consume( + queue_name: str, + poll_interval: float, + stop_event: threading.Event, + ) -> str | None: + assert queue_name == "QUEUE1" + assert poll_interval == 0.1 + + if stop_event.is_set(): + raise RuntimeError("Should not occur in this test!") + + if next(counter) < 2: + return None + return "payload after retries" + + with patch("airflow.providers.ibm.mq.hooks.mq.IBMMQConsumer.consume", side_effect=consume): + result = await self.hook.aconsume(queue_name="QUEUE1", poll_interval=0.1) + + assert result == "payload after retries" + assert mock_sleep.call_count == 2 + mock_sleep.assert_any_call(_BACKOFF_BASE) + mock_sleep.assert_any_call(_BACKOFF_BASE * _BACKOFF_FACTOR) + + @pytest.mark.parametrize( + "transient_reason_name", + [ + "MQRC_CONNECTION_BROKEN", + "MQRC_Q_MGR_QUIESCING", + "MQRC_Q_MGR_NOT_AVAILABLE", + "MQRC_HOST_NOT_AVAILABLE", + "MQRC_CONNECTION_QUIESCING", + ], + ) + @patch("airflow.providers.ibm.mq.hooks.mq.asyncio.sleep", new_callable=AsyncMock) + async def test_aconsume_retries_on_transient_exception_then_succeeds( + self, mock_sleep, patch_sync_to_async, transient_reason_name + ): + import ibmmq + + transient_reason = getattr(ibmmq.CMQC, transient_reason_name) + transient_error = IBMMQError( + reason=transient_reason, + comp=ibmmq.CMQC.MQCC_FAILED, + transient=True, + message=f"Transient error: {transient_reason_name}", + ) + with patch( + "airflow.providers.ibm.mq.hooks.mq.IBMMQConsumer.consume", + side_effect=[transient_error, "recovered"], + ): + result = await self.hook.aconsume(queue_name="QUEUE1", poll_interval=0.1) + + assert result == "recovered" + mock_sleep.assert_called_once_with(_BACKOFF_BASE) + + async def test_aconsume_does_not_retry_on_non_transient_mq_exception(self, patch_sync_to_async): + import ibmmq + + non_transient_error = IBMMQError( + reason=ibmmq.CMQC.MQRC_NOT_AUTHORIZED, + comp=ibmmq.CMQC.MQCC_FAILED, + transient=False, + message="Not authorized", + ) + with patch( + "airflow.providers.ibm.mq.hooks.mq.IBMMQConsumer.consume", + side_effect=non_transient_error, + ): + with pytest.raises(IBMMQError): + await self.hook.aconsume(queue_name="QUEUE1", poll_interval=0.1) + + @patch("airflow.providers.ibm.mq.hooks.mq.asyncio.sleep", new_callable=AsyncMock) + async def test_aconsume_backoff_caps_at_max(self, mock_sleep, patch_sync_to_async): + failures_needed = 0 + backoff = _BACKOFF_BASE + while backoff < _BACKOFF_MAX: + backoff *= _BACKOFF_FACTOR + failures_needed += 1 + failures_needed += 3 + + with patch( + "airflow.providers.ibm.mq.hooks.mq.IBMMQConsumer.consume", + side_effect=[None] * failures_needed + ["finally"], + ): + result = await self.hook.aconsume(queue_name="QUEUE1", poll_interval=0.1) + + assert result == "finally" + capped_calls = [c for c in mock_sleep.call_args_list if c.args[0] == _BACKOFF_MAX] + assert len(capped_calls) >= 3 + + @patch("airflow.providers.ibm.mq.hooks.mq.asyncio.sleep", new_callable=AsyncMock) + async def test_aconsume_logs_debug_on_none(self, mock_sleep, patch_sync_to_async, caplog): + with patch("airflow.providers.ibm.mq.hooks.mq.IBMMQConsumer.consume", side_effect=[None, "message"]): + with caplog.at_level("DEBUG"): + await self.hook.aconsume(queue_name="QUEUE1", poll_interval=0.1) + + assert ( + "IBM MQ consume returned no event for queue 'QUEUE1'; queue may be quiet. Retrying in 1.0s" + in caplog.text + ) + + @patch("airflow.providers.ibm.mq.hooks.mq.asyncio.sleep", new_callable=AsyncMock) + async def test_aconsume_logs_warning_on_exception(self, mock_sleep, patch_sync_to_async, caplog): + import ibmmq + + transient_error = IBMMQError( + reason=ibmmq.CMQC.MQRC_CONNECTION_BROKEN, + comp=ibmmq.CMQC.MQCC_FAILED, + transient=True, + message="Connection broken", + ) + with patch( + "airflow.providers.ibm.mq.hooks.mq.IBMMQConsumer.consume", + side_effect=[transient_error, "ok"], + ): + with caplog.at_level("WARNING"): + await self.hook.aconsume(queue_name="QUEUE1", poll_interval=0.1) + + assert "Transient MQ error on queue 'QUEUE1': completion_code=2 reason_code=2009" in caplog.text + + @patch("airflow.providers.ibm.mq.hooks.mq.asyncio.sleep", new_callable=AsyncMock) + async def test_aconsume_cancelled_error_propagates(self, mock_sleep, patch_sync_to_async): + mock_sleep.side_effect = asyncio.CancelledError + + with patch("airflow.providers.ibm.mq.hooks.mq.IBMMQConsumer.consume", return_value=None): + with pytest.raises(asyncio.CancelledError): + await self.hook.aconsume(queue_name="QUEUE1", poll_interval=0.1) + + async def test_aconsume_propagates_non_mq_exceptions(self, mock_get_connection): + with patch( + "airflow.providers.ibm.mq.hooks.mq.IBMMQConsumer.consume", + side_effect=TypeError("Unexpected programming bug"), + ): + with pytest.raises(TypeError, match="Unexpected programming bug"): + await self.hook.aconsume(queue_name="QUEUE1", poll_interval=0.1) + + @patch("airflow.providers.ibm.mq.hooks.mq.sync_to_async") + async def test_aconsume_does_not_call_sync_to_async(self, mock_sync_to_async, mock_get_connection): + def fake_start(thread_self): + thread_self.future.set_result("test message") + + with patch("airflow.providers.ibm.mq.hooks.mq.IBMMQConsumer.start", new=fake_start): + with patch("airflow.providers.ibm.mq.hooks.mq.IBMMQConsumer.join", return_value=None): + result = await self.hook.aconsume(queue_name="QUEUE1", poll_interval=0.1) + + assert result == "test message" + mock_sync_to_async.assert_not_called() + + @patch("airflow.providers.ibm.mq.hooks.mq.sync_to_async") + async def test_aproduce_calls_sync_to_async_with_thread_sensitive_false(self, mock_sync_to_async): + mock_wrapper = AsyncMock() + mock_sync_to_async.return_value = mock_wrapper + + await self.hook.aproduce(queue_name="QUEUE1", payload="test payload") + + mock_sync_to_async.assert_called_once() + call_args = mock_sync_to_async.call_args + assert call_args[1].get("thread_sensitive") is False + assert call_args[0][0] == self.hook.produce + + @pytest.mark.parametrize( + ("open_options_attr", "use_explicit_open_options"), + [ + pytest.param("MQOO_OUTPUT", False, id="default_output"), + pytest.param( + "MQOO_OUTPUT | MQOO_FAIL_IF_QUIESCING", True, id="custom_output_with_fail_if_quiescing" + ), + ], + ) + @patch("ibmmq.connect") + @patch("ibmmq.Queue") + def test_produce_with_custom_open_options( + self, + mock_queue_class, + mock_connect, + mock_base_get_connection, + open_options_attr, + use_explicit_open_options, + ): + import ibmmq + + mock_qmgr = MagicMock() + mock_connect.return_value = mock_qmgr + + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + + # Calculate the expected open_options value + open_options_values = [getattr(ibmmq.CMQC, opt.strip()) for opt in open_options_attr.split("|")] + expected_open_options = reduce(operator.or_, open_options_values) + + conn = mq_connection() + if use_explicit_open_options: + self.hook.produce( + connection=conn, + queue_name="QUEUE1", + payload="test payload", + open_options=expected_open_options, + ) + else: + self.hook.produce(connection=conn, queue_name="QUEUE1", payload="test payload") + # When not specified, should default to MQOO_OUTPUT + expected_open_options = ibmmq.CMQC.MQOO_OUTPUT + + mock_connect.assert_called_once() + # Verify Queue was called with the expected open_options + call_args = mock_queue_class.call_args + actual_open_options = call_args[0][2] # Third positional argument is open_options + assert actual_open_options == expected_open_options + mock_queue.put.assert_called_once() + + +class TestIBMMQConsumer: + @pytest.fixture + def event_loop(self): + loop = asyncio.new_event_loop() + yield loop + loop.close() + + @pytest.fixture + def mock_hook(self): + hook = MagicMock(spec=IBMMQHook) + hook.open_options = None + return hook + + @pytest.fixture + def stop_event(self): + return threading.Event() + + @pytest.fixture + def consumer(self, mock_hook, event_loop, stop_event): + future = event_loop.create_future() + return IBMMQConsumer( + hook=mock_hook, + connection=mq_connection(), + queue_name="QUEUE1", + poll_interval=0.1, + loop=event_loop, + future=future, + stop_event=stop_event, + ) + + @patch("ibmmq.RFH2") + def test_process_message_decodes_payload_after_rfh2_header(self, mock_rfh2_class, consumer): + mock_rfh2 = MagicMock() + mock_rfh2_class.return_value = mock_rfh2 + mock_rfh2.get_length.return_value = 10 + + message = b"0123456789hello world" + result = consumer._process_message(message) + + mock_rfh2.unpack.assert_called_once_with(message) + assert result == "hello world" + + @patch("ibmmq.RFH2") + def test_process_message_falls_back_to_raw_on_pyif_error(self, mock_rfh2_class, consumer): + import ibmmq + + # Replace PYIFError with a plain Exception subclass so we can raise it in side_effect + # while the except clause in _process_message still catches it. + class FakePYIFError(Exception): + pass + + mock_rfh2 = MagicMock() + mock_rfh2_class.return_value = mock_rfh2 + mock_rfh2.unpack.side_effect = FakePYIFError("no rfh2 header") + + message = b"plain text message" + with patch.object(ibmmq, "PYIFError", FakePYIFError): + result = consumer._process_message(message) + + assert result == "plain text message" + + @pytest.mark.parametrize( + ("exception_to_raise", "exception_name", "message_bytes", "expected_result", "log_contains"), + [ + pytest.param( + ValueError("invalid offset"), + "ValueError", + b"short message", + "short message", + "Failed to process RFH2 header (ValueError:", + id="exception_from_get_length", + ), + pytest.param( + Exception("generic error"), + "Exception", + b"malformed rfh2 data", + "malformed rfh2 data", + "Failed to process RFH2 header (Exception:", + id="generic_exception_during_processing", + ), + ], + ) + @patch("ibmmq.RFH2") + def test_process_message_exception_fallback( + self, + mock_rfh2_class, + consumer, + caplog, + exception_to_raise, + exception_name, + message_bytes, + expected_result, + log_contains, + ): + mock_rfh2 = MagicMock() + mock_rfh2_class.return_value = mock_rfh2 + mock_rfh2.get_length.side_effect = exception_to_raise + + with caplog.at_level(logging.WARNING): + result = consumer._process_message(message_bytes) + + assert result == expected_result + assert log_contains in caplog.text + assert "returning raw message" in caplog.text + + @patch("ibmmq.RFH2") + def test_process_message_out_of_bounds_offset(self, mock_rfh2_class, consumer, caplog): + mock_rfh2 = MagicMock() + mock_rfh2_class.return_value = mock_rfh2 + mock_rfh2.get_length.return_value = 100 # Offset larger than message + + message = b"short message" + with caplog.at_level(logging.WARNING): + result = consumer._process_message(message) + + assert result == "short message" + assert "RFH2 offset 100 exceeds message length 13; returning raw message" in caplog.text + + @patch("ibmmq.RFH2") + def test_process_message_struct_error_falls_back(self, mock_rfh2_class, consumer, caplog): + mock_rfh2 = MagicMock() + mock_rfh2_class.return_value = mock_rfh2 + mock_rfh2.unpack.side_effect = struct.error("unpack requires a buffer of N bytes") + + message = b"malformed rfh2 data" + with caplog.at_level(logging.WARNING): + result = consumer._process_message(message) + + assert result == "malformed rfh2 data" + assert "Failed to process RFH2 header (error:" in caplog.text + assert "returning raw message" in caplog.text + + @pytest.mark.parametrize( + ("log_level", "expect_debug_log"), + [ + pytest.param(logging.DEBUG, True, id="debug_level_enabled"), + pytest.param(logging.INFO, False, id="info_level_no_debug"), + ], + ) + @patch("ibmmq.RFH2") + def test_process_message_debug_logging_payload( + self, mock_rfh2_class, consumer, caplog, log_level, expect_debug_log + ): + mock_rfh2 = MagicMock() + mock_rfh2_class.return_value = mock_rfh2 + mock_rfh2.get_length.return_value = 10 + + message = b"0123456789hello world" + with caplog.at_level(log_level): + result = consumer._process_message(message) + + assert result == "hello world" + if expect_debug_log: + assert "Message received from MQ (RFH2 decoded):" in caplog.text + else: + assert "Message received from MQ (RFH2 decoded):" not in caplog.text + + @patch("ibmmq.RFH2") + def test_process_message_payload_truncation(self, mock_rfh2_class, consumer, caplog): + mock_rfh2 = MagicMock() + mock_rfh2_class.return_value = mock_rfh2 + mock_rfh2.get_length.return_value = 10 + + # Create a long payload (>200 chars) + long_payload = "x" * 300 + message = b"0123456789" + long_payload.encode() + with caplog.at_level(logging.DEBUG): + result = consumer._process_message(message) + + assert result == long_payload + # Check truncation marker + debug_logs = [r.message for r in caplog.records if "Message received from MQ" in r.message] + assert len(debug_logs) > 0 + assert "..." in debug_logs[0] # Should have truncation marker + + @pytest.mark.parametrize( + ("log_level", "expect_raw_payload_log"), + [ + pytest.param(logging.DEBUG, True, id="fallback_debug_level_enabled"), + pytest.param(logging.INFO, False, id="fallback_info_level_no_debug"), + ], + ) + @patch("ibmmq.RFH2") + def test_process_message_fallback_debug_logging( + self, mock_rfh2_class, consumer, caplog, log_level, expect_raw_payload_log + ): + mock_rfh2 = MagicMock() + mock_rfh2_class.return_value = mock_rfh2 + mock_rfh2.unpack.side_effect = struct.error("unpack failed") + + message = b"error case message" + with caplog.at_level(log_level): + result = consumer._process_message(message) + + assert result == "error case message" + # Warning should always appear + assert "Failed to process RFH2 header" in caplog.text + # Debug log should only appear at DEBUG level + if expect_raw_payload_log: + assert "Raw message payload (truncated):" in caplog.text + else: + assert "Raw message payload (truncated):" not in caplog.text + + @patch("ibmmq.Queue") + @patch("ibmmq.GMO") + @patch("ibmmq.MD") + @patch("ibmmq.OD") + def test_consume_returns_decoded_message_on_success( + self, mock_od, mock_md, mock_gmo, mock_queue_class, consumer, mock_hook + ): + import ibmmq + + mock_conn = MagicMock() + mock_hook.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_hook.get_conn.return_value.__exit__ = MagicMock(return_value=False) + mock_hook.get_open_options.return_value = ibmmq.CMQC.MQOO_INPUT_SHARED + + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + mock_queue.get.return_value = b"hello from mq" + + with patch.object(consumer, "_process_message", return_value="hello from mq"): + result = consumer.consume("QUEUE1", 0.1, threading.Event()) + + assert result == "hello from mq" + mock_queue.close.assert_called_once() + + @patch("ibmmq.Queue") + @patch("ibmmq.GMO") + @patch("ibmmq.MD") + @patch("ibmmq.OD") + def test_consume_returns_none_when_stop_event_set_before_loop( + self, mock_od, mock_md, mock_gmo, mock_queue_class, consumer, mock_hook + ): + import ibmmq + + mock_conn = MagicMock() + mock_hook.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_hook.get_conn.return_value.__exit__ = MagicMock(return_value=False) + mock_hook.get_open_options.return_value = ibmmq.CMQC.MQOO_INPUT_SHARED + + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + + stop_event = threading.Event() + stop_event.set() + + result = consumer.consume("QUEUE1", 0.1, stop_event) + + assert result is None + mock_queue.get.assert_not_called() + mock_queue.close.assert_called_once() + + @patch("ibmmq.Queue") + @patch("ibmmq.GMO") + @patch("ibmmq.MD") + @patch("ibmmq.OD") + def test_consume_continues_on_no_msg_available_then_returns_none( + self, mock_od, mock_md, mock_gmo, mock_queue_class, consumer, mock_hook + ): + import ibmmq + + mock_conn = MagicMock() + mock_hook.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_hook.get_conn.return_value.__exit__ = MagicMock(return_value=False) + mock_hook.get_open_options.return_value = ibmmq.CMQC.MQOO_INPUT_SHARED + + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + + stop_event = threading.Event() + + def get_side_effect(*args, **kwargs): + stop_event.set() + raise ibmmq.MQMIError(comp=ibmmq.CMQC.MQCC_WARNING, reason=ibmmq.CMQC.MQRC_NO_MSG_AVAILABLE) + + mock_queue.get.side_effect = get_side_effect + + result = consumer.consume("QUEUE1", 0.1, stop_event) + + assert result is None + mock_queue.get.assert_called_once() + + @patch("ibmmq.Queue") + @patch("ibmmq.GMO") + @patch("ibmmq.MD") + @patch("ibmmq.OD") + def test_consume_raises_on_unexpected_mq_error( + self, mock_od, mock_md, mock_gmo, mock_queue_class, consumer, mock_hook + ): + import ibmmq + + mock_conn = MagicMock() + mock_hook.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_hook.get_conn.return_value.__exit__ = MagicMock(return_value=False) + mock_hook.get_open_options.return_value = ibmmq.CMQC.MQOO_INPUT_SHARED + + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + mock_queue.get.side_effect = ibmmq.MQMIError( + comp=ibmmq.CMQC.MQCC_FAILED, reason=ibmmq.CMQC.MQRC_NOT_AUTHORIZED + ) + + with pytest.raises(IBMMQError) as exc_info: + consumer.consume("QUEUE1", 0.1, threading.Event()) + + assert exc_info.value.reason == ibmmq.CMQC.MQRC_NOT_AUTHORIZED + assert exc_info.value.transient is False + mock_queue.close.assert_called_once() + + @patch("ibmmq.Queue") + @patch("ibmmq.GMO") + @patch("ibmmq.MD") + @patch("ibmmq.OD") + def test_consume_closes_queue_even_when_exception_raised( + self, mock_od, mock_md, mock_gmo, mock_queue_class, consumer, mock_hook + ): + import ibmmq + + mock_conn = MagicMock() + mock_hook.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_hook.get_conn.return_value.__exit__ = MagicMock(return_value=False) + mock_hook.get_open_options.return_value = ibmmq.CMQC.MQOO_INPUT_SHARED + + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + mock_queue.get.side_effect = ibmmq.MQMIError( + comp=ibmmq.CMQC.MQCC_FAILED, reason=ibmmq.CMQC.MQRC_CONNECTION_BROKEN + ) + + with pytest.raises(IBMMQError) as exc_info: + consumer.consume("QUEUE1", 0.1, threading.Event()) + + assert exc_info.value.reason == ibmmq.CMQC.MQRC_CONNECTION_BROKEN + assert exc_info.value.transient is True + mock_queue.close.assert_called_once() + + @patch("ibmmq.GMO") + @patch("ibmmq.MD") + @patch("ibmmq.OD") + @pytest.mark.parametrize( + ("exc_type", "expected_transient"), + [ + pytest.param("connection", True, id="connection_error_is_transient"), + pytest.param("pyif", False, id="pyif_error_not_transient"), + ], + ) + def test_consume_wraps_non_mq_exceptions_as_ibmmq_error( + self, mock_od, mock_md, mock_gmo, consumer, mock_hook, exc_type, expected_transient + ): + import ibmmq + + if exc_type == "connection": + side_exc = ConnectionError("host unavailable") + else: + # ibmmq.PYIFError is a non-MQ exception from the C extension + side_exc = ibmmq.PYIFError("pyif failure") + + mock_hook.get_conn.side_effect = side_exc + + with pytest.raises(IBMMQError) as exc_info: + consumer.consume("QUEUE1", 0.1, threading.Event()) + + assert exc_info.value.reason == _NON_MQ_SENTINEL + assert exc_info.value.comp == _NON_MQ_SENTINEL + assert exc_info.value.transient is expected_transient + assert str(exc_info.value) == str(side_exc) + + @patch("ibmmq.Queue") + @patch("ibmmq.GMO") + @patch("ibmmq.MD") + @patch("ibmmq.OD") + def test_consume_close_suppresses_its_own_exception( + self, mock_od, mock_md, mock_gmo, mock_queue_class, consumer, mock_hook + ): + import ibmmq + + mock_conn = MagicMock() + mock_hook.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_hook.get_conn.return_value.__exit__ = MagicMock(return_value=False) + mock_hook.get_open_options.return_value = ibmmq.CMQC.MQOO_INPUT_SHARED + + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + mock_queue.get.return_value = b"msg" + mock_queue.close.side_effect = RuntimeError("close failed") + + with patch.object(consumer, "_process_message", return_value="msg"): + # Should not raise even though close() fails + result = consumer.consume("QUEUE1", 0.1, threading.Event()) + + assert result == "msg" + + @patch("ibmmq.Queue") + @patch("ibmmq.GMO") + @patch("ibmmq.MD") + @patch("ibmmq.OD") + def test_consume_sets_gmo_wait_interval_from_poll_interval( + self, mock_od_class, mock_md_class, mock_gmo_class, mock_queue_class, consumer, mock_hook + ): + import ibmmq + + mock_conn = MagicMock() + mock_hook.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_hook.get_conn.return_value.__exit__ = MagicMock(return_value=False) + mock_hook.get_open_options.return_value = ibmmq.CMQC.MQOO_INPUT_SHARED + + mock_gmo = MagicMock() + mock_gmo_class.return_value = mock_gmo + + mock_queue = MagicMock() + mock_queue_class.return_value = mock_queue + + stop_event = threading.Event() + stop_event.set() # exit immediately + + consumer.consume("QUEUE1", 2.5, stop_event) + + assert mock_gmo.WaitInterval == 2500 # 2.5 s → 2500 ms + + @pytest.mark.parametrize( + ("consume_outcome", "expected_result", "match"), + [ + pytest.param("done", "done", None, id="success_result"), + pytest.param(None, None, None, id="none_result"), + pytest.param("mq_error", None, None, id="mq_error_exception"), + pytest.param( + TypeError("unexpected bug"), None, "unexpected bug", id="programming_error_exception" + ), + ], + ) + def test_run(self, consumer, event_loop, consume_outcome, expected_result, match): + if consume_outcome == "mq_error": + import ibmmq + + consume_patch_kwargs = { + "side_effect": IBMMQError( + reason=ibmmq.CMQC.MQRC_CONNECTION_BROKEN, + comp=ibmmq.CMQC.MQCC_FAILED, + transient=False, + message="Connection failed", + ) + } + resolved_exception = IBMMQError + elif isinstance(consume_outcome, Exception): + consume_patch_kwargs = {"side_effect": consume_outcome} + resolved_exception = type(consume_outcome) + else: + consume_patch_kwargs = {"return_value": consume_outcome} + resolved_exception = None + + with patch.object(consumer, "consume", **consume_patch_kwargs): + consumer.run() + event_loop.run_until_complete(asyncio.sleep(0)) + + if resolved_exception is None: + assert consumer.future.result() == expected_result + elif match is None: + with pytest.raises(resolved_exception): + consumer.future.result() + else: + with pytest.raises(resolved_exception, match=match): + consumer.future.result() + + @pytest.mark.parametrize( + ("consume_outcome", "exception_msg"), + [ + pytest.param("done", None, id="cancelled_after_success"), + pytest.param(RuntimeError("test error"), "test error", id="cancelled_after_exception"), + ], + ) + def test_run_does_not_call_loop_when_future_cancelled( + self, consumer, event_loop, consume_outcome, exception_msg + ): + consumer.future.cancel() + + if exception_msg: + consume_kwargs = {"side_effect": consume_outcome} + else: + consume_kwargs = {"return_value": consume_outcome} + + with patch.object(consumer, "consume", **consume_kwargs): + with patch.object(consumer.loop, "call_soon_threadsafe") as mock_call: + consumer.run() + event_loop.run_until_complete(asyncio.sleep(0)) + + mock_call.assert_not_called() + + @pytest.mark.parametrize( + ("consume_outcome", "expected_result", "expected_error"), + [ + pytest.param("success", "success", None, id="sets_result_when_not_cancelled"), + pytest.param( + ValueError("test error"), None, "test error", id="sets_exception_when_not_cancelled" + ), + ], + ) + def test_run_calls_loop_when_future_not_cancelled( + self, consumer, event_loop, consume_outcome, expected_result, expected_error + ): + if isinstance(consume_outcome, Exception): + consume_kwargs = {"side_effect": consume_outcome} + else: + consume_kwargs = {"return_value": consume_outcome} + + with patch.object(consumer, "consume", **consume_kwargs): + with patch.object( + consumer.loop, "call_soon_threadsafe", wraps=consumer.loop.call_soon_threadsafe + ) as mock_call: + consumer.run() + event_loop.run_until_complete(asyncio.sleep(0)) + + assert mock_call.called + + if expected_error is None: + assert consumer.future.result() == expected_result + else: + with pytest.raises(ValueError, match=expected_error): + consumer.future.result() + + def test_run_as_thread_completes_and_sets_result(self, event_loop): + future = event_loop.create_future() + stop_event = threading.Event() + mock_hook = MagicMock(spec=IBMMQHook) + + consumer = IBMMQConsumer( + hook=mock_hook, + connection=mq_connection(), + queue_name="QUEUE1", + poll_interval=0.1, + loop=event_loop, + future=future, + stop_event=stop_event, + ) + with patch.object(consumer, "consume", return_value="threaded result"): + consumer.start() + consumer.join(timeout=5) + + event_loop.run_until_complete(asyncio.sleep(0)) + assert future.result() == "threaded result" diff --git a/providers/ibm/mq/tests/unit/ibm/mq/queues/__init__.py b/providers/ibm/mq/tests/unit/ibm/mq/queues/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/ibm/mq/tests/unit/ibm/mq/queues/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/ibm/mq/tests/unit/ibm/mq/queues/test_mq.py b/providers/ibm/mq/tests/unit/ibm/mq/queues/test_mq.py new file mode 100644 index 0000000000000..34082ead06d24 --- /dev/null +++ b/providers/ibm/mq/tests/unit/ibm/mq/queues/test_mq.py @@ -0,0 +1,249 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pytest + +from airflow.providers.ibm.mq.triggers.mq import AwaitMessageTrigger + +pytest.importorskip("airflow.providers.common.messaging.providers.base_provider") + + +class TestIBMMQMessageQueueProvider: + """Tests for IBMMQMessageQueueProvider.""" + + def setup_method(self): + """Set up the test environment.""" + from airflow.providers.ibm.mq.queues.mq import IBMMQMessageQueueProvider + + self.provider = IBMMQMessageQueueProvider() + + def test_queue_create(self): + """Test the creation of the provider.""" + from airflow.providers.common.messaging.providers.base_provider import BaseMessageQueueProvider + + assert isinstance(self.provider, BaseMessageQueueProvider) + + @pytest.mark.parametrize( + ("queue_uri", "expected_result"), + [ + pytest.param("ibmmq://mq_default/MY.QUEUE.NAME", True, id="valid_mq_uri"), + pytest.param("ibmmq://mq_default", False, id="invalid_mq_url"), + pytest.param("http://example.com", False, id="http_url"), + pytest.param("not-a-url", False, id="invalid_url"), + ], + ) + def test_queue_matches(self, queue_uri, expected_result): + """Test the queue_matches method with various URLs.""" + assert self.provider.queue_matches(queue_uri) == expected_result + + @pytest.mark.parametrize( + ("scheme", "expected_result"), + [ + pytest.param("kafka", False, id="kafka_scheme"), + pytest.param("ibmmq", True, id="mq_scheme"), + pytest.param("redis+pubsub", False, id="redis_scheme"), + pytest.param("sqs", False, id="sqs_scheme"), + pytest.param("unknown", False, id="unknown_scheme"), + ], + ) + def test_scheme_matches(self, scheme, expected_result): + """Test the scheme_matches method with various schemes.""" + assert self.provider.scheme_matches(scheme) == expected_result + + def test_trigger_class(self): + """Test the trigger_class method.""" + assert self.provider.trigger_class() == AwaitMessageTrigger + + @pytest.mark.parametrize( + ("queue_uri", "extra_kwargs", "expected_result"), + [ + pytest.param( + "ibmmq://my_conn/QUEUE1", + {}, + { + "mq_conn_id": "my_conn", + "queue_name": "QUEUE1", + "poll_interval": 5, + }, + id="default_poll_interval", + ), + pytest.param( + "ibmmq://my_conn/QUEUE1", + {"poll_interval": 60}, + { + "mq_conn_id": "my_conn", + "queue_name": "QUEUE1", + }, + id="override_poll_interval", + ), + pytest.param( + "ibmmq://my_conn/QUEUE1?open_options=8192", + {}, + { + "mq_conn_id": "my_conn", + "queue_name": "QUEUE1", + "poll_interval": 5, + "open_options": 8192, + }, + id="open_options_from_uri_query", + ), + pytest.param( + "ibmmq://my_conn/QUEUE1?open_options=MQOO_INPUT_SHARED", + {}, + { + "mq_conn_id": "my_conn", + "queue_name": "QUEUE1", + "poll_interval": 5, + "open_options": 2, + }, + id="open_options_symbolic_name_from_uri_query", + ), + pytest.param( + "ibmmq://my_conn/QUEUE1", + {"open_options": 32}, + { + "mq_conn_id": "my_conn", + "queue_name": "QUEUE1", + "poll_interval": 5, + }, + id="open_options_in_kwargs_is_not_re_emitted", + ), + pytest.param( + "ibmmq://my_conn/QUEUE1?open_options=8192", + {"open_options": 32}, + { + "mq_conn_id": "my_conn", + "queue_name": "QUEUE1", + "poll_interval": 5, + }, + id="uri_open_options_not_re_emitted_when_kwargs_has_open_options", + ), + ], + ) + def test_trigger_kwargs_valid_cases(self, queue_uri, extra_kwargs, expected_result): + """Test the trigger_kwargs method with valid parameters.""" + kwargs = self.provider.trigger_kwargs(queue_uri, **extra_kwargs) + assert kwargs == expected_result + + @pytest.mark.parametrize( + ("queue_uri", "expected_error", "error_match"), + [ + pytest.param( + "ibmmq:///QUEUE1", + ValueError, + "MQ URI must contain connection id", + id="missing_conn_id", + ), + pytest.param( + "ibmmq://my_conn/", + ValueError, + "MQ URI must contain queue name", + id="missing_queue_name", + ), + pytest.param( + "ibmmq://my_conn/QUEUE1?open_options=not_a_number", + ValueError, + "Invalid MQ URI open_options value", + id="invalid_open_options_value", + ), + pytest.param( + "ibmmq://my_conn/QUEUE1?open_options=", + ValueError, + "MQ URI open_options query parameter values must be numeric or CMQC constant names", + id="empty_open_options_value", + ), + pytest.param( + "ibmmq://my_conn/QUEUE1?open_options=MQGMO_NO_WAIT", + ValueError, + "Invalid MQ URI open_options value", + id="non_mqoo_symbolic_open_options_value", + ), + ], + ) + def test_trigger_kwargs_error_cases(self, queue_uri, expected_error, error_match): + """Test that trigger_kwargs raises appropriate errors with invalid parameters.""" + with pytest.raises(expected_error, match=error_match): + self.provider.trigger_kwargs(queue_uri) + + @pytest.mark.parametrize( + ("queue_uri", "expected_open_options"), + [ + pytest.param( + "ibmmq://my_conn/QUEUE1?open_options=MQOO_INPUT_SHARED", + "MQOO_INPUT_SHARED", + id="symbolic_name", + ), + pytest.param( + "ibmmq://my_conn/QUEUE1?open_options=MQOO_INPUT_SHARED&open_options=MQOO_FAIL_IF_QUIESCING", + "MQOO_INPUT_SHARED|MQOO_FAIL_IF_QUIESCING", + id="multiple_query_values", + ), + pytest.param( + "ibmmq://my_conn/QUEUE1?open_options=MQOO_INPUT_SHARED|MQOO_FAIL_IF_QUIESCING", + "MQOO_INPUT_SHARED|MQOO_FAIL_IF_QUIESCING", + id="pipe_delimited", + ), + pytest.param( + "ibmmq://my_conn/QUEUE1?open_options=MQOO_INPUT_SHARED,MQOO_FAIL_IF_QUIESCING", + "MQOO_INPUT_SHARED|MQOO_FAIL_IF_QUIESCING", + id="comma_delimited", + ), + pytest.param( + "ibmmq://my_conn/QUEUE1?open_options=2|MQOO_FAIL_IF_QUIESCING", + "MQOO_INPUT_SHARED|MQOO_FAIL_IF_QUIESCING", + id="mixed_numeric_and_symbolic", + ), + ], + ) + def test_trigger_kwargs_parses_open_options_formats(self, queue_uri, expected_open_options): + import ibmmq + + expected = 0 + for flag in expected_open_options.split("|"): + expected |= getattr(ibmmq.CMQC, flag) + + kwargs = self.provider.trigger_kwargs(queue_uri) + assert kwargs["open_options"] == expected + + def test_message_queue_trigger_with_scheme(self): + from airflow.providers.common.messaging.triggers.msg_queue import MessageQueueTrigger + + trigger = MessageQueueTrigger( + scheme="ibmmq", + mq_conn_id="mq_default", + queue_name="MY.QUEUE.NAME", + ) + assert trigger.queue is None + assert trigger.scheme == "ibmmq" + assert isinstance(trigger.trigger, AwaitMessageTrigger) + assert trigger.trigger.mq_conn_id == "mq_default" + assert trigger.trigger.queue_name == "MY.QUEUE.NAME" + assert trigger.trigger.poll_interval == 5 + + @pytest.mark.filterwarnings("ignore::airflow.exceptions.AirflowProviderDeprecationWarning") + def test_message_queue_trigger_with_deprecated_queue(self): + from airflow.providers.common.messaging.triggers.msg_queue import MessageQueueTrigger + + trigger = MessageQueueTrigger(queue="ibmmq://mq_default/MY.QUEUE.NAME", open_options=32) + assert trigger.scheme is None + assert trigger.queue == "ibmmq://mq_default/MY.QUEUE.NAME" + assert isinstance(trigger.trigger, AwaitMessageTrigger) + assert trigger.trigger.mq_conn_id == "mq_default" + assert trigger.trigger.queue_name == "MY.QUEUE.NAME" + assert trigger.trigger.poll_interval == 5 + assert trigger.trigger.open_options == 32 diff --git a/providers/ibm/mq/tests/unit/ibm/mq/triggers/__init__.py b/providers/ibm/mq/tests/unit/ibm/mq/triggers/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/providers/ibm/mq/tests/unit/ibm/mq/triggers/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/providers/ibm/mq/tests/unit/ibm/mq/triggers/test_mq.py b/providers/ibm/mq/tests/unit/ibm/mq/triggers/test_mq.py new file mode 100644 index 0000000000000..c5aec69b9c241 --- /dev/null +++ b/providers/ibm/mq/tests/unit/ibm/mq/triggers/test_mq.py @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from unittest.mock import AsyncMock, patch + +import pytest + +from airflow.providers.ibm.mq.triggers.mq import AwaitMessageTrigger +from airflow.triggers.base import TriggerEvent + + +class TestMQTrigger: + @pytest.mark.asyncio + async def test_trigger_serialization(self): + trigger = AwaitMessageTrigger( + mq_conn_id="mq_default", + queue_name="QUEUE1", + poll_interval=2, + ) + assert isinstance(trigger, AwaitMessageTrigger) + + classpath, kwargs = trigger.serialize() + assert classpath == "airflow.providers.ibm.mq.triggers.mq.AwaitMessageTrigger" + assert kwargs == { + "mq_conn_id": "mq_default", + "queue_name": "QUEUE1", + "poll_interval": 2, + "open_options": None, + } + + @pytest.mark.asyncio + async def test_trigger_serialization_with_open_options(self): + trigger = AwaitMessageTrigger( + mq_conn_id="mq_default", + queue_name="QUEUE1", + poll_interval=2, + open_options=8192, + ) + + _, kwargs = trigger.serialize() + assert kwargs["open_options"] == 8192 + + @pytest.mark.asyncio + @patch("airflow.providers.ibm.mq.triggers.mq.IBMMQHook") + async def test_trigger_run_yields_event(self, mock_hook_class): + """run() delegates to consume() and yields the result as a TriggerEvent.""" + mock_hook = mock_hook_class.return_value + mock_hook.aconsume = AsyncMock(return_value="test message") + + trigger = AwaitMessageTrigger( + mq_conn_id="mq_default", + queue_name="QUEUE1", + poll_interval=0.1, + open_options=8192, + ) + + event = await anext(trigger.run()) + assert isinstance(event, TriggerEvent) + assert event.payload == "test message" + mock_hook_class.assert_called_once_with("mq_default", open_options=8192) + mock_hook.aconsume.assert_called_once_with(queue_name="QUEUE1", poll_interval=0.1) diff --git a/pyproject.toml b/pyproject.toml index 1051632695c01..960bd58618c1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -248,6 +248,9 @@ apache-airflow = "airflow.__main__:main" "http" = [ "apache-airflow-providers-http>=4.13.2" ] +"ibm.mq" = [ + "apache-airflow-providers-ibm-mq>=0.1.0" +] "imap" = [ "apache-airflow-providers-imap>=3.8.0" ] @@ -451,6 +454,7 @@ apache-airflow = "airflow.__main__:main" "apache-airflow-providers-grpc>=3.7.0", "apache-airflow-providers-hashicorp>=4.0.0", "apache-airflow-providers-http>=4.13.2", + "apache-airflow-providers-ibm-mq>=0.1.0", "apache-airflow-providers-imap>=3.8.0", "apache-airflow-providers-influxdb>=2.8.0", "apache-airflow-providers-informatica>=0.1.1", @@ -1174,6 +1178,8 @@ mypy_path = [ "$MYPY_CONFIG_FILE_DIR/providers/hashicorp/tests", "$MYPY_CONFIG_FILE_DIR/providers/http/src", "$MYPY_CONFIG_FILE_DIR/providers/http/tests", + "$MYPY_CONFIG_FILE_DIR/providers/ibm/mq/src", + "$MYPY_CONFIG_FILE_DIR/providers/ibm/mq/tests", "$MYPY_CONFIG_FILE_DIR/providers/imap/src", "$MYPY_CONFIG_FILE_DIR/providers/imap/tests", "$MYPY_CONFIG_FILE_DIR/providers/influxdb/src", @@ -1460,6 +1466,7 @@ apache-airflow-providers-google = false apache-airflow-providers-grpc = false apache-airflow-providers-hashicorp = false apache-airflow-providers-http = false +apache-airflow-providers-ibm-mq = false apache-airflow-providers-imap = false apache-airflow-providers-influxdb = false apache-airflow-providers-informatica = false @@ -1602,6 +1609,7 @@ apache-airflow-providers-google = false apache-airflow-providers-grpc = false apache-airflow-providers-hashicorp = false apache-airflow-providers-http = false +apache-airflow-providers-ibm-mq = false apache-airflow-providers-imap = false apache-airflow-providers-influxdb = false apache-airflow-providers-informatica = false @@ -1761,6 +1769,7 @@ apache-airflow-providers-google = { workspace = true } apache-airflow-providers-grpc = { workspace = true } apache-airflow-providers-hashicorp = { workspace = true } apache-airflow-providers-http = { workspace = true } +apache-airflow-providers-ibm-mq = { workspace = true } apache-airflow-providers-imap = { workspace = true } apache-airflow-providers-influxdb = { workspace = true } apache-airflow-providers-informatica = { workspace = true } @@ -1898,6 +1907,7 @@ members = [ "providers/grpc", "providers/hashicorp", "providers/http", + "providers/ibm/mq", "providers/imap", "providers/influxdb", "providers/informatica", diff --git a/scripts/ci/docker-compose/remove-sources.yml b/scripts/ci/docker-compose/remove-sources.yml index a2f7d3a035766..8bd4764c7bdb5 100644 --- a/scripts/ci/docker-compose/remove-sources.yml +++ b/scripts/ci/docker-compose/remove-sources.yml @@ -77,6 +77,7 @@ services: - ../../../empty:/opt/airflow/providers/grpc/src - ../../../empty:/opt/airflow/providers/hashicorp/src - ../../../empty:/opt/airflow/providers/http/src + - ../../../empty:/opt/airflow/providers/ibm/mq/src - ../../../empty:/opt/airflow/providers/imap/src - ../../../empty:/opt/airflow/providers/influxdb/src - ../../../empty:/opt/airflow/providers/informatica/src diff --git a/scripts/ci/docker-compose/tests-sources.yml b/scripts/ci/docker-compose/tests-sources.yml index 9c02d1c271412..974e9871b5b43 100644 --- a/scripts/ci/docker-compose/tests-sources.yml +++ b/scripts/ci/docker-compose/tests-sources.yml @@ -90,6 +90,7 @@ services: - ../../../providers/grpc/tests:/opt/airflow/providers/grpc/tests - ../../../providers/hashicorp/tests:/opt/airflow/providers/hashicorp/tests - ../../../providers/http/tests:/opt/airflow/providers/http/tests + - ../../../providers/ibm/mq/tests:/opt/airflow/providers/ibm/mq/tests - ../../../providers/imap/tests:/opt/airflow/providers/imap/tests - ../../../providers/influxdb/tests:/opt/airflow/providers/influxdb/tests - ../../../providers/informatica/tests:/opt/airflow/providers/informatica/tests diff --git a/scripts/docker/entrypoint_ci.sh b/scripts/docker/entrypoint_ci.sh index aa5edbd909508..18f2efbe67392 100755 --- a/scripts/docker/entrypoint_ci.sh +++ b/scripts/docker/entrypoint_ci.sh @@ -415,7 +415,7 @@ function reinstall_shared_distributions() { # export. Providers cannot run arbitrary code through this hook. Maintainers should # review every addition to this list as a privileged change. See # contributing-docs/12_provider_distributions.rst. -PROVIDERS_NEEDING_PRE_EXTRAS_INSTALL=() +PROVIDERS_NEEDING_PRE_EXTRAS_INSTALL=("ibm.mq") function run_pre_extras_install_if_registered() { local provider_id="${1}" diff --git a/scripts/in_container/run_pre_extras_install.py b/scripts/in_container/run_pre_extras_install.py index 3bb6ad28aa5e2..cfd8c3db383ef 100644 --- a/scripts/in_container/run_pre_extras_install.py +++ b/scripts/in_container/run_pre_extras_install.py @@ -30,16 +30,22 @@ from __future__ import annotations import argparse +import contextlib import hashlib +import ipaddress import re import shlex +import socket import sys import tarfile import tempfile +import urllib.error import urllib.request import zipfile +from collections.abc import Iterator from pathlib import Path from typing import NoReturn +from urllib.parse import urlparse import yaml @@ -48,7 +54,9 @@ ENV_NAME_RE = re.compile(r"^[A-Z][A-Z0-9_]*$") SHA256_RE = re.compile(r"^[0-9a-f]{64}$") ALLOWED_TOP_LEVEL_KEYS = {"downloads", "env"} -ALLOWED_DOWNLOAD_KEYS = {"url", "sha256", "extract_to"} +REQUIRED_DOWNLOAD_KEYS = {"url", "sha256", "extract_to"} +OPTIONAL_DOWNLOAD_KEYS = {"fallback_ips"} +ALLOWED_DOWNLOAD_KEYS = REQUIRED_DOWNLOAD_KEYS | OPTIONAL_DOWNLOAD_KEYS def fail(msg: str) -> NoReturn: @@ -71,7 +79,7 @@ def validate_manifest(manifest: object, provider_id: str) -> dict: unknown = set(entry) - ALLOWED_DOWNLOAD_KEYS if unknown: fail(f"downloads[{i}] has unknown keys: {sorted(unknown)}") - missing = ALLOWED_DOWNLOAD_KEYS - set(entry) + missing = REQUIRED_DOWNLOAD_KEYS - set(entry) if missing: fail(f"downloads[{i}] is missing required keys: {sorted(missing)}") url = entry["url"] @@ -90,6 +98,16 @@ def validate_manifest(manifest: object, provider_id: str) -> dict: ) if ".." in Path(extract_to).parts: fail(f"downloads[{i}].extract_to cannot contain '..'") + fallback_ips = entry.get("fallback_ips", []) + if not isinstance(fallback_ips, list): + fail(f"downloads[{i}].fallback_ips must be a list of IP address strings") + for j, ip in enumerate(fallback_ips): + if not isinstance(ip, str): + fail(f"downloads[{i}].fallback_ips[{j}] must be a string") + try: + ipaddress.ip_address(ip) + except ValueError: + fail(f"downloads[{i}].fallback_ips[{j}] is not a valid IP address: {ip!r}") env = manifest.get("env", {}) if not isinstance(env, dict): fail("'env' must be a mapping") @@ -101,8 +119,31 @@ def validate_manifest(manifest: object, provider_id: str) -> dict: return manifest -def download_with_checksum(url: str, expected_sha256: str, dest: Path) -> None: - print(f"Downloading {url}") +@contextlib.contextmanager +def override_dns(hostname: str, ip: str) -> Iterator[None]: + """Temporarily resolve `hostname` to `ip` for the duration of the block. + + Only `socket.getaddrinfo` is patched, so urllib still uses `hostname` for + the HTTPS SNI and certificate verification — we only change which IP the + TCP connection dials. + """ + family = socket.AF_INET6 if ipaddress.ip_address(ip).version == 6 else socket.AF_INET + original = socket.getaddrinfo + + def patched(host, port, *args, **kwargs): + if host == hostname: + sockaddr = (ip, port, 0, 0) if family == socket.AF_INET6 else (ip, port) + return [(family, socket.SOCK_STREAM, socket.IPPROTO_TCP, "", sockaddr)] + return original(host, port, *args, **kwargs) + + socket.getaddrinfo = patched + try: + yield + finally: + socket.getaddrinfo = original + + +def _attempt_download(url: str, expected_sha256: str, dest: Path) -> None: digest = hashlib.sha256() with urllib.request.urlopen(url) as response, dest.open("wb") as out: while True: @@ -116,6 +157,44 @@ def download_with_checksum(url: str, expected_sha256: str, dest: Path) -> None: fail(f"sha256 mismatch for {url}: expected {expected_sha256}, got {got}") +def download_with_checksum( + url: str, + expected_sha256: str, + dest: Path, + fallback_ips: list[str] | None = None, +) -> None: + print(f"Downloading {url}") + try: + _attempt_download(url, expected_sha256, dest) + return + except (urllib.error.URLError, OSError) as primary_err: + if not fallback_ips: + raise + print( + f"Primary download failed ({type(primary_err).__name__}: {primary_err}); " + f"trying {len(fallback_ips)} fallback IP(s)" + ) + + hostname = urlparse(url).hostname + if not hostname: + fail(f"cannot extract hostname from url {url!r} for fallback resolution") + + last_err: BaseException | None = None + for ip in fallback_ips: + print(f" Retrying with {hostname} -> {ip}") + try: + with override_dns(hostname, ip): + _attempt_download(url, expected_sha256, dest) + print(f" Success via {ip}") + return + except (urllib.error.URLError, OSError) as e: + print(f" {ip} failed: {type(e).__name__}: {e}") + last_err = e + continue + + fail(f"all download attempts failed for {url}; last error: {last_err}") + + def safe_extract(archive: Path, target: Path) -> None: target = target.resolve() target.mkdir(parents=True, exist_ok=True) @@ -173,7 +252,12 @@ def main() -> None: for index, entry in enumerate(manifest.get("downloads", [])): archive_name = Path(entry["url"]).name or f"download_{index}" archive = tmp / f"{index}_{archive_name}" - download_with_checksum(entry["url"], entry["sha256"], archive) + download_with_checksum( + entry["url"], + entry["sha256"], + archive, + fallback_ips=entry.get("fallback_ips") or None, + ) safe_extract(archive, Path(entry["extract_to"])) archive.unlink(missing_ok=True) diff --git a/uv.lock b/uv.lock index a02c1fca513f4..6fefbf9fc232b 100644 --- a/uv.lock +++ b/uv.lock @@ -91,6 +91,7 @@ apache-airflow-providers-vespa = false apache-airflow-providers-databricks = false apache-airflow-shared-state = false apache-airflow-providers-sqlite = false +apache-airflow-providers-ibm-mq = false apache-airflow-shared-module-loading = false apache-airflow-providers-yandex = false apache-airflow-shared-serialization = false @@ -218,6 +219,7 @@ members = [ "apache-airflow-providers-grpc", "apache-airflow-providers-hashicorp", "apache-airflow-providers-http", + "apache-airflow-providers-ibm-mq", "apache-airflow-providers-imap", "apache-airflow-providers-influxdb", "apache-airflow-providers-informatica", @@ -1033,6 +1035,7 @@ all = [ { name = "apache-airflow-providers-grpc" }, { name = "apache-airflow-providers-hashicorp" }, { name = "apache-airflow-providers-http" }, + { name = "apache-airflow-providers-ibm-mq" }, { name = "apache-airflow-providers-imap" }, { name = "apache-airflow-providers-influxdb" }, { name = "apache-airflow-providers-informatica" }, @@ -1268,6 +1271,9 @@ hashicorp = [ http = [ { name = "apache-airflow-providers-http" }, ] +ibm-mq = [ + { name = "apache-airflow-providers-ibm-mq" }, +] imap = [ { name = "apache-airflow-providers-imap" }, ] @@ -1642,6 +1648,8 @@ requires-dist = [ { name = "apache-airflow-providers-hashicorp", marker = "extra == 'hashicorp'", editable = "providers/hashicorp" }, { name = "apache-airflow-providers-http", marker = "extra == 'all'", editable = "providers/http" }, { name = "apache-airflow-providers-http", marker = "extra == 'http'", editable = "providers/http" }, + { name = "apache-airflow-providers-ibm-mq", marker = "extra == 'all'", editable = "providers/ibm/mq" }, + { name = "apache-airflow-providers-ibm-mq", marker = "extra == 'ibm-mq'", editable = "providers/ibm/mq" }, { name = "apache-airflow-providers-imap", marker = "extra == 'all'", editable = "providers/imap" }, { name = "apache-airflow-providers-imap", marker = "extra == 'imap'", editable = "providers/imap" }, { name = "apache-airflow-providers-influxdb", marker = "extra == 'all'", editable = "providers/influxdb" }, @@ -1752,7 +1760,7 @@ requires-dist = [ { name = "sentry-sdk", marker = "extra == 'sentry'", specifier = ">=2.30.0" }, { name = "uv", marker = "extra == 'uv'", specifier = ">=0.11.16" }, ] -provides-extras = ["all-core", "async", "graphviz", "gunicorn", "kerberos", "memray", "otel", "statsd", "all-task-sdk", "airbyte", "akeyless", "alibaba", "amazon", "apache-cassandra", "apache-drill", "apache-druid", "apache-flink", "apache-hdfs", "apache-hive", "apache-iceberg", "apache-impala", "apache-kafka", "apache-kylin", "apache-livy", "apache-pig", "apache-pinot", "apache-spark", "apache-tinkerpop", "apprise", "arangodb", "asana", "atlassian-jira", "celery", "cloudant", "cncf-kubernetes", "cohere", "common-ai", "common-compat", "common-io", "common-messaging", "common-sql", "databricks", "datadog", "dbt-cloud", "dingding", "discord", "docker", "edge3", "elasticsearch", "exasol", "fab", "facebook", "ftp", "git", "github", "google", "grpc", "hashicorp", "http", "imap", "influxdb", "informatica", "jdbc", "jenkins", "keycloak", "microsoft-azure", "microsoft-mssql", "microsoft-psrp", "microsoft-winrm", "mongo", "mysql", "neo4j", "odbc", "openai", "openfaas", "openlineage", "opensearch", "opsgenie", "oracle", "pagerduty", "papermill", "pgvector", "pinecone", "postgres", "presto", "qdrant", "redis", "salesforce", "samba", "segment", "sendgrid", "sftp", "singularity", "slack", "smtp", "snowflake", "sqlite", "ssh", "standard", "tableau", "telegram", "teradata", "trino", "vertica", "vespa", "weaviate", "yandex", "ydb", "zendesk", "all", "aiobotocore", "apache-atlas", "apache-webhdfs", "amazon-aws-auth", "cloudpickle", "github-enterprise", "google-auth", "ldap", "pandas", "polars", "rabbitmq", "sentry", "s3fs", "uv"] +provides-extras = ["all-core", "async", "graphviz", "gunicorn", "kerberos", "memray", "otel", "statsd", "all-task-sdk", "airbyte", "akeyless", "alibaba", "amazon", "apache-cassandra", "apache-drill", "apache-druid", "apache-flink", "apache-hdfs", "apache-hive", "apache-iceberg", "apache-impala", "apache-kafka", "apache-kylin", "apache-livy", "apache-pig", "apache-pinot", "apache-spark", "apache-tinkerpop", "apprise", "arangodb", "asana", "atlassian-jira", "celery", "cloudant", "cncf-kubernetes", "cohere", "common-ai", "common-compat", "common-io", "common-messaging", "common-sql", "databricks", "datadog", "dbt-cloud", "dingding", "discord", "docker", "edge3", "elasticsearch", "exasol", "fab", "facebook", "ftp", "git", "github", "google", "grpc", "hashicorp", "http", "ibm-mq", "imap", "influxdb", "informatica", "jdbc", "jenkins", "keycloak", "microsoft-azure", "microsoft-mssql", "microsoft-psrp", "microsoft-winrm", "mongo", "mysql", "neo4j", "odbc", "openai", "openfaas", "openlineage", "opensearch", "opsgenie", "oracle", "pagerduty", "papermill", "pgvector", "pinecone", "postgres", "presto", "qdrant", "redis", "salesforce", "samba", "segment", "sendgrid", "sftp", "singularity", "slack", "smtp", "snowflake", "sqlite", "ssh", "standard", "tableau", "telegram", "teradata", "trino", "vertica", "vespa", "weaviate", "yandex", "ydb", "zendesk", "all", "aiobotocore", "apache-atlas", "apache-webhdfs", "amazon-aws-auth", "cloudpickle", "github-enterprise", "google-auth", "ldap", "pandas", "polars", "rabbitmq", "sentry", "s3fs", "uv"] [package.metadata.requires-dev] ci-image = [ @@ -5714,6 +5722,57 @@ dev = [ ] docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "devel-common" }] +[[package]] +name = "apache-airflow-providers-ibm-mq" +version = "0.1.0" +source = { editable = "providers/ibm/mq" } +dependencies = [ + { name = "apache-airflow" }, + { name = "apache-airflow-providers-common-compat" }, + { name = "asgiref" }, +] + +[package.optional-dependencies] +common-messaging = [ + { name = "apache-airflow-providers-common-messaging" }, +] +ibmmq = [ + { name = "ibmmq" }, +] + +[package.dev-dependencies] +dev = [ + { name = "apache-airflow" }, + { name = "apache-airflow-devel-common" }, + { name = "apache-airflow-providers-common-compat" }, + { name = "apache-airflow-providers-common-messaging" }, + { name = "apache-airflow-task-sdk" }, +] +docs = [ + { name = "apache-airflow-devel-common", extra = ["docs"] }, +] + +[package.metadata] +requires-dist = [ + { name = "apache-airflow", editable = "." }, + { name = "apache-airflow-providers-common-compat", editable = "providers/common/compat" }, + { name = "apache-airflow-providers-common-messaging", marker = "extra == 'common-messaging'", editable = "providers/common/messaging" }, + { name = "asgiref", marker = "python_full_version < '3.14'", specifier = ">=2.3.0" }, + { name = "asgiref", marker = "python_full_version >= '3.14'", specifier = ">=3.11.1" }, + { name = "ibmmq", marker = "extra == 'ibmmq'", specifier = ">=2.0.6" }, +] +provides-extras = ["ibmmq", "common-messaging"] + +[package.metadata.requires-dev] +dev = [ + { name = "apache-airflow", editable = "." }, + { name = "apache-airflow-devel-common", editable = "devel-common" }, + { name = "apache-airflow-providers-common-compat", editable = "providers/common/compat" }, + { name = "apache-airflow-providers-common-messaging", editable = "providers/common/messaging" }, + { name = "apache-airflow-task-sdk", editable = "task-sdk" }, +] +docs = [{ name = "apache-airflow-devel-common", extras = ["docs"], editable = "devel-common" }] + [[package]] name = "apache-airflow-providers-imap" version = "3.11.3" @@ -13864,6 +13923,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/57/6c/0546c39956dbc8ef8db8b93d2c83c5a674e250c73b00ebf84a758f568770/ibmcloudant-0.11.7-py3-none-any.whl", hash = "sha256:e3b56cf6561beb66fcfa777723891b44b78788847943712adae389d24783eb27", size = 120764, upload-time = "2026-05-22T13:17:58.958Z" }, ] +[[package]] +name = "ibmmq" +version = "2.0.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/28/c5/a838a9a6571ba9ce46bba07f1c0cf4c6d29ff7a1e9f39a21b0f533f180be/ibmmq-2.0.6.tar.gz", hash = "sha256:80a21fb89c7c1a5d205f1fbba813d4260dc46f8504b4f0a906a83c3f02d17d16", size = 463436, upload-time = "2026-04-20T11:22:27.427Z" } + [[package]] name = "icalendar" version = "7.1.2" From 64dd5d6b429f62492aa115cba81ce799d833b9db Mon Sep 17 00:00:00 2001 From: Yuseok Jo Date: Fri, 29 May 2026 03:11:49 +0900 Subject: [PATCH 22/28] Fix inconsistency in S3 transfer operators (#67378) --- .../amazon/aws/transfers/ftp_to_s3.py | 41 +++-- .../amazon/aws/transfers/s3_to_ftp.py | 80 ++++++++-- .../amazon/aws/transfers/s3_to_sftp.py | 92 +++++++++-- .../amazon/aws/transfers/sftp_to_s3.py | 148 +++++++++++++++--- .../amazon/aws/transfers/test_ftp_to_s3.py | 35 +++++ .../amazon/aws/transfers/test_s3_to_ftp.py | 61 +++++++- .../amazon/aws/transfers/test_s3_to_sftp.py | 62 ++++++++ .../amazon/aws/transfers/test_sftp_to_s3.py | 104 +++++++++++- 8 files changed, 557 insertions(+), 66 deletions(-) diff --git a/providers/amazon/src/airflow/providers/amazon/aws/transfers/ftp_to_s3.py b/providers/amazon/src/airflow/providers/amazon/aws/transfers/ftp_to_s3.py index 251c16a5e26b6..5dd933a197bc7 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/transfers/ftp_to_s3.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/transfers/ftp_to_s3.py @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +import ftplib from collections.abc import Sequence from tempfile import NamedTemporaryFile from typing import TYPE_CHECKING @@ -60,6 +61,9 @@ class FTPToS3Operator(BaseOperator): :param gzip: If True, the file will be compressed locally :param acl_policy: String specifying the canned ACL policy for the file being uploaded to the S3 bucket. + :param fail_on_file_not_exist: If True, operator fails when a source file does not + exist on the FTP server. If False, the operator logs a warning and skips the + transfer. Default is True. """ template_fields: Sequence[str] = ("ftp_path", "s3_bucket", "s3_key", "ftp_filenames", "s3_filenames") @@ -78,6 +82,7 @@ def __init__( encrypt: bool = False, gzip: bool = False, acl_policy: str | None = None, + fail_on_file_not_exist: bool = True, **kwargs, ): super().__init__(**kwargs) @@ -92,25 +97,31 @@ def __init__( self.encrypt = encrypt self.gzip = gzip self.acl_policy = acl_policy + self.fail_on_file_not_exist = fail_on_file_not_exist self.s3_hook: S3Hook | None = None self.ftp_hook: FTPHook | None = None def __upload_to_s3_from_ftp(self, remote_filename, s3_file_key): - with NamedTemporaryFile() as local_tmp_file: - self.ftp_hook.retrieve_file( - remote_full_path=remote_filename, local_full_path_or_buffer=local_tmp_file.name - ) - - self.s3_hook.load_file( - filename=local_tmp_file.name, - key=s3_file_key, - bucket_name=self.s3_bucket, - replace=self.replace, - encrypt=self.encrypt, - gzip=self.gzip, - acl_policy=self.acl_policy, - ) - self.log.info("File upload to %s", s3_file_key) + try: + with NamedTemporaryFile() as local_tmp_file: + self.ftp_hook.retrieve_file( + remote_full_path=remote_filename, local_full_path_or_buffer=local_tmp_file.name + ) + self.s3_hook.load_file( + filename=local_tmp_file.name, + key=s3_file_key, + bucket_name=self.s3_bucket, + replace=self.replace, + encrypt=self.encrypt, + gzip=self.gzip, + acl_policy=self.acl_policy, + ) + self.log.info("File upload to %s", s3_file_key) + except ftplib.error_perm as e: + if "550" in str(e) and not self.fail_on_file_not_exist: + self.log.info("File %s not found on FTP server. Skipping transfer.", remote_filename) + return + raise def execute(self, context: Context): self.ftp_hook = FTPHook(ftp_conn_id=self.ftp_conn_id) diff --git a/providers/amazon/src/airflow/providers/amazon/aws/transfers/s3_to_ftp.py b/providers/amazon/src/airflow/providers/amazon/aws/transfers/s3_to_ftp.py index 2a0a4fb91e8e4..ad532e9ff1d66 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/transfers/s3_to_ftp.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/transfers/s3_to_ftp.py @@ -39,16 +39,27 @@ class S3ToFTPOperator(BaseOperator): :param s3_bucket: The targeted s3 bucket. This is the S3 bucket from where the file is downloaded. - :param s3_key: The targeted s3 key. This is the specified file path for - downloading the file from S3. - :param ftp_path: The ftp remote path. This is the specified file path for - uploading file to the FTP server. + :param s3_key: The targeted s3 key. For a single file it must include the file + path. For multiple files it is the key prefix (directory) and must end with + ``"/"``. + :param s3_filenames: Only used if you want to move multiple files. You can pass + a list with exact key suffixes present under the s3_key prefix, or a string + prefix that all filenames must match. Use ``"*"`` to move all objects under + the s3_key prefix. + :param ftp_path: The ftp remote path. For a single file it must include the file + path. For multiple files it is the destination directory path and must end + with ``"/"``. + :param ftp_filenames: Only used if you want to move multiple files and name them + differently at the destination. It can be a list of filenames or a string + prefix that replaces the s3 prefix. :param aws_conn_id: reference to a specific AWS connection :param ftp_conn_id: The ftp connection id. The name or identifier for establishing a connection to the FTP server. + :param fail_on_file_not_exist: If True, operator fails when a source S3 key does not + exist. If False, the operator logs a warning and skips the transfer. Default is True. """ - template_fields: Sequence[str] = ("s3_bucket", "s3_key", "ftp_path") + template_fields: Sequence[str] = ("s3_bucket", "s3_key", "ftp_path", "s3_filenames", "ftp_filenames") def __init__( self, @@ -56,26 +67,71 @@ def __init__( s3_bucket, s3_key, ftp_path, + s3_filenames: str | list[str] | None = None, + ftp_filenames: str | list[str] | None = None, aws_conn_id="aws_default", ftp_conn_id="ftp_default", + fail_on_file_not_exist: bool = True, **kwargs, ) -> None: super().__init__(**kwargs) self.s3_bucket = s3_bucket self.s3_key = s3_key self.ftp_path = ftp_path + self.s3_filenames = s3_filenames + self.ftp_filenames = ftp_filenames self.aws_conn_id = aws_conn_id self.ftp_conn_id = ftp_conn_id + self.fail_on_file_not_exist = fail_on_file_not_exist + + def _download_from_s3(self, s3_hook: S3Hook, ftp_hook: FTPHook, s3_key: str, ftp_path: str) -> None: + if not s3_hook.check_for_key(s3_key, self.s3_bucket): + if self.fail_on_file_not_exist: + raise FileNotFoundError(f"Key {s3_key!r} not found in S3 bucket {self.s3_bucket!r}") + self.log.info("Key %s not found in S3. Skipping transfer.", s3_key) + return + s3_obj = s3_hook.get_key(s3_key, self.s3_bucket) + with NamedTemporaryFile() as local_tmp_file: + self.log.info("Downloading file from %s", s3_key) + s3_obj.download_fileobj(local_tmp_file) + local_tmp_file.seek(0) + ftp_hook.store_file(ftp_path, local_tmp_file.name) + self.log.info("File stored in %s", ftp_path) def execute(self, context: Context): s3_hook = S3Hook(self.aws_conn_id) ftp_hook = FTPHook(ftp_conn_id=self.ftp_conn_id) - s3_obj = s3_hook.get_key(self.s3_key, self.s3_bucket) + if self.s3_filenames: + if isinstance(self.s3_filenames, str): + self.log.info("Getting files in s3://%s/%s", self.s3_bucket, self.s3_key) + all_keys = s3_hook.list_keys(bucket_name=self.s3_bucket, prefix=self.s3_key) or [] + filenames = [k[len(self.s3_key) :] for k in all_keys] + if self.s3_filenames == "*": + files = filenames + else: + s3_prefix: str = self.s3_filenames + files = [f for f in filenames if s3_prefix in f] - with NamedTemporaryFile() as local_tmp_file: - self.log.info("Downloading file from %s", self.s3_key) - s3_obj.download_fileobj(local_tmp_file) - local_tmp_file.seek(0) - ftp_hook.store_file(self.ftp_path, local_tmp_file.name) - self.log.info("File stored in %s", {self.ftp_path}) + for file in files: + self.log.info("Moving file %s", file) + if self.ftp_filenames and isinstance(self.ftp_filenames, str): + ftp_filename = file.replace(self.s3_filenames, self.ftp_filenames) + else: + ftp_filename = file + self._download_from_s3( + s3_hook, ftp_hook, self.s3_key + file, self.ftp_path + ftp_filename + ) + else: + if self.ftp_filenames: + for s3_file, ftp_file in zip(self.s3_filenames, self.ftp_filenames): + self._download_from_s3( + s3_hook, ftp_hook, self.s3_key + s3_file, self.ftp_path + ftp_file + ) + else: + for s3_file in self.s3_filenames: + self._download_from_s3( + s3_hook, ftp_hook, self.s3_key + s3_file, self.ftp_path + s3_file + ) + else: + self._download_from_s3(s3_hook, ftp_hook, self.s3_key, self.ftp_path) diff --git a/providers/amazon/src/airflow/providers/amazon/aws/transfers/s3_to_sftp.py b/providers/amazon/src/airflow/providers/amazon/aws/transfers/s3_to_sftp.py index 87d8454af963b..b44d6fd82b9b1 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/transfers/s3_to_sftp.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/transfers/s3_to_sftp.py @@ -27,6 +27,8 @@ from airflow.providers.ssh.hooks.ssh import SSHHook if TYPE_CHECKING: + import paramiko + from airflow.sdk import Context @@ -40,8 +42,9 @@ class S3ToSFTPOperator(BaseOperator): :param sftp_conn_id: The sftp connection id. The name or identifier for establishing a connection to the SFTP server. - :param sftp_path: The sftp remote path. This is the specified file path for - uploading file to the SFTP server. + :param sftp_path: The sftp remote path. For a single file it must include the + file path. For multiple files it is the destination directory path and must + end with ``"/"``. :param sftp_remote_host: The remote host of the SFTP server. Overrides host in Connection. :param aws_conn_id: The Airflow connection used for AWS credentials. @@ -51,14 +54,24 @@ class S3ToSFTPOperator(BaseOperator): maintained on each worker node). :param s3_bucket: The targeted s3 bucket. This is the S3 bucket from where the file is downloaded. - :param s3_key: The targeted s3 key. This is the specified file path for - downloading the file from S3. + :param s3_key: The targeted s3 key. For a single file it must include the file + path. For multiple files it is the key prefix (directory) and must end with + ``"/"``. + :param s3_filenames: Only used if you want to move multiple files. You can pass + a list with exact key suffixes present under the s3_key prefix, or a string + prefix that all filenames must match. Use ``"*"`` to move all objects under + the s3_key prefix. + :param sftp_filenames: Only used if you want to move multiple files and name them + differently at the destination. It can be a list of filenames or a string + prefix that replaces the s3 prefix. :param confirm: specify if the SFTP operation should be confirmed, defaults to True. When True, a stat will be performed on the remote file after upload to verify the file size matches and confirm successful transfer. + :param fail_on_file_not_exist: If True, operator fails when a source S3 key does not + exist. If False, the operator logs a warning and skips the transfer. Default is True. """ - template_fields: Sequence[str] = ("s3_key", "sftp_path", "s3_bucket") + template_fields: Sequence[str] = ("s3_key", "sftp_path", "s3_bucket", "s3_filenames", "sftp_filenames") def __init__( self, @@ -69,7 +82,10 @@ def __init__( sftp_conn_id: str = "ssh_default", sftp_remote_host: str = "", aws_conn_id: str | None = "aws_default", + s3_filenames: str | list[str] | None = None, + sftp_filenames: str | list[str] | None = None, confirm: bool = True, + fail_on_file_not_exist: bool = True, **kwargs, ) -> None: super().__init__(**kwargs) @@ -79,7 +95,10 @@ def __init__( self.s3_key = s3_key self.sftp_remote_host = sftp_remote_host self.aws_conn_id = aws_conn_id + self.s3_filenames = s3_filenames + self.sftp_filenames = sftp_filenames self.confirm = confirm + self.fail_on_file_not_exist = fail_on_file_not_exist @staticmethod def get_s3_key(s3_key: str) -> str: @@ -87,16 +106,69 @@ def get_s3_key(s3_key: str) -> str: parsed_s3_key = urlsplit(s3_key) return parsed_s3_key.path.lstrip("/") + def _download_from_s3( + self, + sftp_client: paramiko.SFTPClient, + s3_hook: S3Hook, + s3_key: str, + sftp_path: str, + ) -> None: + if not s3_hook.check_for_key(s3_key, self.s3_bucket): + if self.fail_on_file_not_exist: + raise FileNotFoundError(f"Key {s3_key!r} not found in S3 bucket {self.s3_bucket!r}") + self.log.info("Key %s not found in S3. Skipping transfer.", s3_key) + return + with NamedTemporaryFile("w") as f: + s3_hook.get_conn().download_file(self.s3_bucket, s3_key, f.name) + sftp_client.put(f.name, sftp_path, confirm=self.confirm) + def execute(self, context: Context) -> None: self.s3_key = self.get_s3_key(self.s3_key) # SSHHook will handle a None/"" sftp_remote_host ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id, remote_host=self.sftp_remote_host) s3_hook = S3Hook(self.aws_conn_id) - - s3_client = s3_hook.get_conn() sftp_client = ssh_hook.get_conn().open_sftp() - with NamedTemporaryFile("w") as f: - s3_client.download_file(self.s3_bucket, self.s3_key, f.name) - sftp_client.put(f.name, self.sftp_path, confirm=self.confirm) + if self.s3_filenames: + if isinstance(self.s3_filenames, str): + self.log.info("Getting files in s3://%s/%s", self.s3_bucket, self.s3_key) + all_keys = s3_hook.list_keys(bucket_name=self.s3_bucket, prefix=self.s3_key) or [] + filenames = [k[len(self.s3_key) :] for k in all_keys] + if self.s3_filenames == "*": + files = filenames + else: + s3_prefix: str = self.s3_filenames + files = [f for f in filenames if s3_prefix in f] + + for file in files: + self.log.info("Moving file %s", file) + if self.sftp_filenames and isinstance(self.sftp_filenames, str): + sftp_filename = file.replace(self.s3_filenames, self.sftp_filenames) + else: + sftp_filename = file + self._download_from_s3( + sftp_client, + s3_hook, + self.s3_key + file, + self.sftp_path + sftp_filename, + ) + else: + if self.sftp_filenames: + for s3_file, sftp_file in zip(self.s3_filenames, self.sftp_filenames): + self._download_from_s3( + sftp_client, + s3_hook, + self.s3_key + s3_file, + self.sftp_path + sftp_file, + ) + else: + for s3_file in self.s3_filenames: + self._download_from_s3( + sftp_client, + s3_hook, + self.s3_key + s3_file, + self.sftp_path + s3_file, + ) + else: + self._download_from_s3(sftp_client, s3_hook, self.s3_key, self.sftp_path) diff --git a/providers/amazon/src/airflow/providers/amazon/aws/transfers/sftp_to_s3.py b/providers/amazon/src/airflow/providers/amazon/aws/transfers/sftp_to_s3.py index 4897ccca25c91..1ee19aed36a81 100644 --- a/providers/amazon/src/airflow/providers/amazon/aws/transfers/sftp_to_s3.py +++ b/providers/amazon/src/airflow/providers/amazon/aws/transfers/sftp_to_s3.py @@ -17,16 +17,20 @@ # under the License. from __future__ import annotations +import warnings from collections.abc import Sequence from tempfile import NamedTemporaryFile from typing import TYPE_CHECKING from urllib.parse import urlsplit +from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.amazon.aws.hooks.s3 import S3Hook from airflow.providers.common.compat.sdk import BaseOperator from airflow.providers.ssh.hooks.ssh import SSHHook if TYPE_CHECKING: + import paramiko + from airflow.sdk import Context @@ -42,21 +46,35 @@ class SFTPToS3Operator(BaseOperator): establishing a connection to the SFTP server. :param sftp_remote_host: The remote host of the SFTP server. Overrides host in Connection. - :param sftp_path: The sftp remote path. This is the specified file path - for downloading the file from the SFTP server. - :param s3_conn_id: The s3 connection id. The name or identifier for - establishing a connection to S3 + :param sftp_path: The sftp remote path. For a single file it must include the + file path. For multiple files it is the directory path where the files are + located. + :param sftp_filenames: Only used if you want to move multiple files. You can pass + a list with exact filenames present in the sftp path, or a prefix that all + files must match. Use ``"*"`` to move all files within the sftp path. + :param aws_conn_id: The Airflow connection used for AWS credentials. + If this is None or empty then the default boto3 behaviour is used. If + running Airflow in a distributed manner and aws_conn_id is None or + empty, then default boto3 configuration would be used (and must be + maintained on each worker node). :param s3_bucket: The targeted s3 bucket. This is the S3 bucket to where the file is uploaded. - :param s3_key: The targeted s3 key. This is the specified path for - uploading the file to S3. + :param s3_key: The targeted s3 key. For a single file it must include the file + path. For multiple files it must end with ``"/"``. + :param s3_filenames: Only used if you want to move multiple files and name them + differently from the originals on the SFTP server. It can be a list of + filenames or a string prefix that replaces the sftp prefix. :param use_temp_file: If True, copies file first to local, if False streams file from SFTP to S3. :param fail_on_file_not_exist: If True, operator fails when file does not exist, if False, operator will not fail and skips transfer. Default is True. + :param replace: If True, overwrite the S3 key if it already exists. + :param encrypt: If True, the file will be encrypted on the server-side by S3. + :param gzip: If True, the file will be compressed locally before upload. + :param acl_policy: Canned ACL policy for the file being uploaded to S3. """ - template_fields: Sequence[str] = ("s3_key", "sftp_path", "s3_bucket") + template_fields: Sequence[str] = ("s3_key", "sftp_path", "s3_bucket", "sftp_filenames", "s3_filenames") def __init__( self, @@ -66,20 +84,40 @@ def __init__( sftp_path: str, sftp_conn_id: str = "ssh_default", sftp_remote_host: str = "", - s3_conn_id: str = "aws_default", + sftp_filenames: str | list[str] | None = None, + s3_filenames: str | list[str] | None = None, use_temp_file: bool = True, fail_on_file_not_exist: bool = True, + replace: bool = False, + encrypt: bool = False, + gzip: bool = False, + acl_policy: str | None = None, + aws_conn_id: str = "aws_default", + s3_conn_id: str | None = None, **kwargs, ) -> None: super().__init__(**kwargs) + if s3_conn_id is not None: + warnings.warn( + "The s3_conn_id parameter is deprecated. Use aws_conn_id instead.", + AirflowProviderDeprecationWarning, + stacklevel=2, + ) + aws_conn_id = s3_conn_id self.sftp_conn_id = sftp_conn_id self.sftp_path = sftp_path self.sftp_remote_host = sftp_remote_host self.s3_bucket = s3_bucket self.s3_key = s3_key - self.s3_conn_id = s3_conn_id + self.aws_conn_id = aws_conn_id + self.sftp_filenames = sftp_filenames + self.s3_filenames = s3_filenames self.use_temp_file = use_temp_file self.fail_on_file_not_exist = fail_on_file_not_exist + self.replace = replace + self.encrypt = encrypt + self.gzip = gzip + self.acl_policy = acl_policy @staticmethod def get_s3_key(s3_key: str) -> str: @@ -87,28 +125,90 @@ def get_s3_key(s3_key: str) -> str: parsed_s3_key = urlsplit(s3_key) return parsed_s3_key.path.lstrip("/") - def execute(self, context: Context) -> None: - self.s3_key = self.get_s3_key(self.s3_key) - - # SSHHook will handle a None/"" sftp_remote_host - ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id, remote_host=self.sftp_remote_host) - s3_hook = S3Hook(self.s3_conn_id) - - sftp_client = ssh_hook.get_conn().open_sftp() - + def _upload_to_s3( + self, + sftp_client: paramiko.SFTPClient, + s3_hook: S3Hook, + sftp_path: str, + s3_key: str, + ) -> None: try: - sftp_client.stat(self.sftp_path) + sftp_client.stat(sftp_path) except FileNotFoundError: if self.fail_on_file_not_exist: raise - self.log.info("File %s not found on SFTP server. Skipping transfer.", self.sftp_path) + self.log.info("File %s not found on SFTP server. Skipping transfer.", sftp_path) return if self.use_temp_file: with NamedTemporaryFile("w") as f: - sftp_client.get(self.sftp_path, f.name) + sftp_client.get(sftp_path, f.name) + s3_hook.load_file( + filename=f.name, + key=s3_key, + bucket_name=self.s3_bucket, + replace=self.replace, + encrypt=self.encrypt, + gzip=self.gzip, + acl_policy=self.acl_policy, + ) + else: + extra_args: dict = {} + if self.encrypt: + extra_args["ServerSideEncryption"] = "AES256" + if self.acl_policy: + extra_args["ACL"] = self.acl_policy + with sftp_client.file(sftp_path, mode="rb") as data: + s3_hook.get_conn().upload_fileobj( + data, self.s3_bucket, s3_key, ExtraArgs=extra_args or None, Callback=self.log.info + ) + + def execute(self, context: Context) -> None: + self.s3_key = self.get_s3_key(self.s3_key) + + # SSHHook will handle a None/"" sftp_remote_host + ssh_hook = SSHHook(ssh_conn_id=self.sftp_conn_id, remote_host=self.sftp_remote_host) + s3_hook = S3Hook(self.aws_conn_id) + sftp_client = ssh_hook.get_conn().open_sftp() - s3_hook.load_file(filename=f.name, key=self.s3_key, bucket_name=self.s3_bucket, replace=True) + if self.sftp_filenames: + if isinstance(self.sftp_filenames, str): + self.log.info("Getting files in %s", self.sftp_path) + list_dir = sftp_client.listdir(self.sftp_path) + if self.sftp_filenames == "*": + files = list_dir + else: + sftp_prefix: str = self.sftp_filenames + files = [f for f in list_dir if sftp_prefix in f] + + for file in files: + self.log.info("Moving file %s", file) + if self.s3_filenames and isinstance(self.s3_filenames, str): + s3_filename = file.replace(self.sftp_filenames, self.s3_filenames) + else: + s3_filename = file + self._upload_to_s3( + sftp_client, + s3_hook, + f"{self.sftp_path}/{file}", + f"{self.s3_key}{s3_filename}", + ) + else: + if self.s3_filenames: + for sftp_file, s3_file in zip(self.sftp_filenames, self.s3_filenames): + self._upload_to_s3( + sftp_client, + s3_hook, + self.sftp_path + sftp_file, + self.s3_key + s3_file, + ) + else: + for sftp_file in self.sftp_filenames: + self._upload_to_s3( + sftp_client, + s3_hook, + self.sftp_path + sftp_file, + self.s3_key + sftp_file, + ) else: - with sftp_client.file(self.sftp_path, mode="rb") as data: - s3_hook.get_conn().upload_fileobj(data, self.s3_bucket, self.s3_key, Callback=self.log.info) + self._upload_to_s3(sftp_client, s3_hook, self.sftp_path, self.s3_key) diff --git a/providers/amazon/tests/unit/amazon/aws/transfers/test_ftp_to_s3.py b/providers/amazon/tests/unit/amazon/aws/transfers/test_ftp_to_s3.py index 757a396464178..102969b33c8f9 100644 --- a/providers/amazon/tests/unit/amazon/aws/transfers/test_ftp_to_s3.py +++ b/providers/amazon/tests/unit/amazon/aws/transfers/test_ftp_to_s3.py @@ -17,7 +17,11 @@ # under the License. from __future__ import annotations +import ftplib from unittest import mock +from unittest.mock import MagicMock, patch + +import pytest from airflow.providers.amazon.aws.transfers.ftp_to_s3 import FTPToS3Operator @@ -128,3 +132,34 @@ def test_execute_multiple_files_prefix( operator.execute(None) mock_ftp_hook_list_directory.assert_called_once_with(path=FTP_PATH_MULTIPLE) + + +class TestFTPToS3OperatorInit: + """Unit tests for FTPToS3Operator.__init__ that do not require an FTP server.""" + + def test_fail_on_file_not_exist_default(self): + """fail_on_file_not_exist defaults to True.""" + op = FTPToS3Operator(task_id="test_fail_default", s3_bucket=BUCKET, s3_key=S3_KEY, ftp_path=FTP_PATH) + assert op.fail_on_file_not_exist is True + + @pytest.mark.parametrize("fail_on_file_not_exist", [True, False]) + def test_fail_on_file_not_exist_skip(self, fail_on_file_not_exist): + """When FTP file is missing (error_perm 550): raise if True, skip if False.""" + op = FTPToS3Operator( + task_id="test_skip", + s3_bucket=BUCKET, + s3_key=S3_KEY, + ftp_path=FTP_PATH, + fail_on_file_not_exist=fail_on_file_not_exist, + ) + op.ftp_hook = MagicMock() + op.s3_hook = MagicMock() + op.ftp_hook.retrieve_file.side_effect = ftplib.error_perm("550 No such file or directory") + + if fail_on_file_not_exist: + with pytest.raises(ftplib.error_perm): + op._FTPToS3Operator__upload_to_s3_from_ftp(FTP_PATH, S3_KEY) + else: + with patch.object(op.log, "info") as mock_log: + op._FTPToS3Operator__upload_to_s3_from_ftp(FTP_PATH, S3_KEY) + mock_log.assert_called_once() diff --git a/providers/amazon/tests/unit/amazon/aws/transfers/test_s3_to_ftp.py b/providers/amazon/tests/unit/amazon/aws/transfers/test_s3_to_ftp.py index 6308d34ac020a..899708c533f17 100644 --- a/providers/amazon/tests/unit/amazon/aws/transfers/test_s3_to_ftp.py +++ b/providers/amazon/tests/unit/amazon/aws/transfers/test_s3_to_ftp.py @@ -19,6 +19,8 @@ from unittest import mock +import pytest + from airflow.providers.amazon.aws.transfers.s3_to_ftp import S3ToFTPOperator TASK_ID = "test_s3_to_ftp" @@ -32,8 +34,11 @@ class TestS3ToFTPOperator: @mock.patch("airflow.providers.ftp.hooks.ftp.FTPHook.store_file") @mock.patch("airflow.providers.amazon.aws.hooks.s3.S3Hook.get_key") + @mock.patch("airflow.providers.amazon.aws.hooks.s3.S3Hook.check_for_key", return_value=True) @mock.patch("airflow.providers.amazon.aws.transfers.s3_to_ftp.NamedTemporaryFile") - def test_execute(self, mock_local_tmp_file, mock_s3_hook_get_key, mock_ftp_hook_store_file): + def test_execute( + self, mock_local_tmp_file, mock_check_for_key, mock_s3_hook_get_key, mock_ftp_hook_store_file + ): operator = S3ToFTPOperator(task_id=TASK_ID, s3_bucket=BUCKET, s3_key=S3_KEY, ftp_path=FTP_PATH) operator.execute(None) @@ -42,3 +47,57 @@ def test_execute(self, mock_local_tmp_file, mock_s3_hook_get_key, mock_ftp_hook_ mock_local_tmp_file_value = mock_local_tmp_file.return_value.__enter__.return_value mock_s3_hook_get_key.return_value.download_fileobj.assert_called_once_with(mock_local_tmp_file_value) mock_ftp_hook_store_file.assert_called_once_with(operator.ftp_path, mock_local_tmp_file_value.name) + + +class TestS3ToFTPOperatorInit: + """Unit tests for S3ToFTPOperator.__init__ that do not require an FTP server.""" + + @pytest.mark.parametrize( + ("s3_filenames", "ftp_filenames"), + [ + (None, None), + ("*", None), + ("prefix_", "renamed_"), + (["a.csv", "b.csv"], ["x.csv", "y.csv"]), + ], + ) + def test_multi_file_params(self, s3_filenames, ftp_filenames): + """s3_filenames and ftp_filenames are stored correctly.""" + op = S3ToFTPOperator( + task_id="test_multi", + s3_bucket=BUCKET, + s3_key=S3_KEY, + ftp_path=FTP_PATH, + s3_filenames=s3_filenames, + ftp_filenames=ftp_filenames, + ) + assert op.s3_filenames == s3_filenames + assert op.ftp_filenames == ftp_filenames + + def test_fail_on_file_not_exist_default(self): + """fail_on_file_not_exist defaults to True.""" + op = S3ToFTPOperator(task_id="test_fail_default", s3_bucket=BUCKET, s3_key=S3_KEY, ftp_path=FTP_PATH) + assert op.fail_on_file_not_exist is True + + @pytest.mark.parametrize("fail_on_file_not_exist", [True, False]) + def test_fail_on_file_not_exist_skip(self, fail_on_file_not_exist): + """When key is missing: raise FileNotFoundError if True, skip if False.""" + from unittest.mock import MagicMock, patch + + op = S3ToFTPOperator( + task_id="test_skip", + s3_bucket=BUCKET, + s3_key=S3_KEY, + ftp_path=FTP_PATH, + fail_on_file_not_exist=fail_on_file_not_exist, + ) + mock_s3_hook = MagicMock() + mock_s3_hook.check_for_key.return_value = False + + if fail_on_file_not_exist: + with pytest.raises(FileNotFoundError): + op._download_from_s3(mock_s3_hook, MagicMock(), S3_KEY, FTP_PATH) + else: + with patch.object(op.log, "info") as mock_log: + op._download_from_s3(mock_s3_hook, MagicMock(), S3_KEY, FTP_PATH) + mock_log.assert_called_once() diff --git a/providers/amazon/tests/unit/amazon/aws/transfers/test_s3_to_sftp.py b/providers/amazon/tests/unit/amazon/aws/transfers/test_s3_to_sftp.py index 257b898922ccd..867c7336ae920 100644 --- a/providers/amazon/tests/unit/amazon/aws/transfers/test_s3_to_sftp.py +++ b/providers/amazon/tests/unit/amazon/aws/transfers/test_s3_to_sftp.py @@ -313,3 +313,65 @@ def test_s3_to_sftp_operator_sftp_remote_host(self): def teardown_method(self): self.delete_remote_resource() + + +class TestS3ToSFTPOperatorInit: + """Unit tests for S3ToSFTPOperator.__init__ that do not require an SSH server.""" + + @pytest.mark.parametrize( + ("s3_filenames", "sftp_filenames"), + [ + (None, None), + ("*", None), + ("prefix_", "renamed_"), + (["a.csv", "b.csv"], ["x.csv", "y.csv"]), + ], + ) + def test_multi_file_params(self, s3_filenames, sftp_filenames): + """s3_filenames and sftp_filenames are stored correctly.""" + op = S3ToSFTPOperator( + task_id="test_multi", + s3_bucket=BUCKET, + s3_key=S3_KEY, + sftp_path=SFTP_PATH, + sftp_conn_id=SFTP_CONN_ID, + s3_filenames=s3_filenames, + sftp_filenames=sftp_filenames, + ) + assert op.s3_filenames == s3_filenames + assert op.sftp_filenames == sftp_filenames + + def test_fail_on_file_not_exist_default(self): + """fail_on_file_not_exist defaults to True.""" + op = S3ToSFTPOperator( + task_id="test_fail_default", + s3_bucket=BUCKET, + s3_key=S3_KEY, + sftp_path=SFTP_PATH, + sftp_conn_id=SFTP_CONN_ID, + ) + assert op.fail_on_file_not_exist is True + + @pytest.mark.parametrize("fail_on_file_not_exist", [True, False]) + def test_fail_on_file_not_exist_skip(self, fail_on_file_not_exist): + """When key is missing: raise FileNotFoundError if True, skip if False.""" + from unittest.mock import MagicMock, patch + + op = S3ToSFTPOperator( + task_id="test_skip", + s3_bucket=BUCKET, + s3_key=S3_KEY, + sftp_path=SFTP_PATH, + sftp_conn_id=SFTP_CONN_ID, + fail_on_file_not_exist=fail_on_file_not_exist, + ) + mock_s3_hook = MagicMock() + mock_s3_hook.check_for_key.return_value = False + + if fail_on_file_not_exist: + with pytest.raises(FileNotFoundError): + op._download_from_s3(MagicMock(), mock_s3_hook, S3_KEY, SFTP_PATH) + else: + with patch.object(op.log, "info") as mock_log: + op._download_from_s3(MagicMock(), mock_s3_hook, S3_KEY, SFTP_PATH) + mock_log.assert_called_once() diff --git a/providers/amazon/tests/unit/amazon/aws/transfers/test_sftp_to_s3.py b/providers/amazon/tests/unit/amazon/aws/transfers/test_sftp_to_s3.py index feb85e33a3c17..73ad1bae0c37e 100644 --- a/providers/amazon/tests/unit/amazon/aws/transfers/test_sftp_to_s3.py +++ b/providers/amazon/tests/unit/amazon/aws/transfers/test_sftp_to_s3.py @@ -17,10 +17,13 @@ # under the License. from __future__ import annotations +import warnings + import boto3 import pytest from moto import mock_aws +from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.models import DAG from airflow.providers.amazon.aws.hooks.s3 import S3Hook from airflow.providers.amazon.aws.transfers.sftp_to_s3 import SFTPToS3Operator @@ -99,7 +102,7 @@ def test_sftp_to_s3_operation(self, use_temp_file): s3_key=S3_KEY, sftp_path=SFTP_PATH, sftp_conn_id=SFTP_CONN_ID, - s3_conn_id=S3_CONN_ID, + aws_conn_id=S3_CONN_ID, use_temp_file=use_temp_file, task_id="test_sftp_to_s3", dag=self.dag, @@ -137,7 +140,7 @@ def test_sftp_to_s3_fail_on_file_not_exist(self, fail_on_file_not_exist): s3_key=self.s3_key, sftp_path="/tmp/wrong_path.txt", sftp_conn_id=SFTP_CONN_ID, - s3_conn_id=S3_CONN_ID, + aws_conn_id=S3_CONN_ID, fail_on_file_not_exist=fail_on_file_not_exist, task_id="test_sftp_to_s3", dag=self.dag, @@ -148,7 +151,7 @@ def test_sftp_to_s3_fail_on_file_not_exist(self, fail_on_file_not_exist): s3_key=self.s3_key, sftp_path=self.sftp_path, sftp_conn_id=SFTP_CONN_ID, - s3_conn_id=S3_CONN_ID, + aws_conn_id=S3_CONN_ID, fail_on_file_not_exist=fail_on_file_not_exist, task_id="test_sftp_to_s3", dag=self.dag, @@ -191,7 +194,7 @@ def test_sftp_to_s3_sftp_remote_host(self): sftp_path=SFTP_PATH, sftp_conn_id=SFTP_CONN_ID, sftp_remote_host="localhost", - s3_conn_id=S3_CONN_ID, + aws_conn_id=S3_CONN_ID, task_id="test_sftp_to_s3_remote_host", dag=self.dag, ) @@ -208,3 +211,96 @@ def test_sftp_to_s3_sftp_remote_host(self): conn.delete_object(Bucket=self.s3_bucket, Key=self.s3_key) conn.delete_bucket(Bucket=self.s3_bucket) assert not s3_hook.check_for_bucket(self.s3_bucket) + + +class TestSFTPToS3OperatorInit: + """Unit tests for SFTPToS3Operator.__init__ that do not require an SSH server.""" + + def test_s3_conn_id_deprecated(self): + """s3_conn_id is a deprecated alias for aws_conn_id and must raise DeprecationWarning.""" + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + op = SFTPToS3Operator( + task_id="test_deprecated", + s3_bucket=BUCKET, + s3_key=S3_KEY, + sftp_path=SFTP_PATH, + sftp_conn_id=SFTP_CONN_ID, + s3_conn_id="my_legacy_conn", + ) + deprecation_warnings = [ + w for w in caught if issubclass(w.category, AirflowProviderDeprecationWarning) + ] + assert len(deprecation_warnings) == 1 + assert "s3_conn_id" in str(deprecation_warnings[0].message) + assert op.aws_conn_id == "my_legacy_conn" + + def test_aws_conn_id_default(self): + """aws_conn_id defaults to 'aws_default' and no AirflowProviderDeprecationWarning is raised.""" + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + op = SFTPToS3Operator( + task_id="test_default", + s3_bucket=BUCKET, + s3_key=S3_KEY, + sftp_path=SFTP_PATH, + sftp_conn_id=SFTP_CONN_ID, + ) + deprecation_warnings = [ + w for w in caught if issubclass(w.category, AirflowProviderDeprecationWarning) + ] + assert not deprecation_warnings + assert op.aws_conn_id == "aws_default" + + @pytest.mark.parametrize( + ("kwargs", "expected"), + [ + ({}, {"replace": False, "encrypt": False, "gzip": False, "acl_policy": None}), + ( + {"replace": True, "encrypt": True, "gzip": True, "acl_policy": "bucket-owner-full-control"}, + { + "replace": True, + "encrypt": True, + "gzip": True, + "acl_policy": "bucket-owner-full-control", + }, + ), + ], + ) + def test_s3_upload_options(self, kwargs, expected): + """replace/encrypt/gzip/acl_policy are stored and default to False/None.""" + op = SFTPToS3Operator( + task_id="test_options", + s3_bucket=BUCKET, + s3_key=S3_KEY, + sftp_path=SFTP_PATH, + sftp_conn_id=SFTP_CONN_ID, + **kwargs, + ) + assert op.replace == expected["replace"] + assert op.encrypt == expected["encrypt"] + assert op.gzip == expected["gzip"] + assert op.acl_policy == expected["acl_policy"] + + @pytest.mark.parametrize( + ("sftp_filenames", "s3_filenames"), + [ + (None, None), + ("*", None), + ("prefix_", "renamed_"), + (["a.csv", "b.csv"], ["x.csv", "y.csv"]), + ], + ) + def test_multi_file_params(self, sftp_filenames, s3_filenames): + """sftp_filenames and s3_filenames are stored correctly.""" + op = SFTPToS3Operator( + task_id="test_multi", + s3_bucket=BUCKET, + s3_key=S3_KEY, + sftp_path=SFTP_PATH, + sftp_conn_id=SFTP_CONN_ID, + sftp_filenames=sftp_filenames, + s3_filenames=s3_filenames, + ) + assert op.sftp_filenames == sftp_filenames + assert op.s3_filenames == s3_filenames From 5898bcb2b926f678fb0cc3877cf99bb54a6a0b3c Mon Sep 17 00:00:00 2001 From: Daniel Standish <15932138+dstandish@users.noreply.github.com> Date: Thu, 28 May 2026 11:19:55 -0700 Subject: [PATCH 23/28] Emit OpenTelemetry spans around listener hook calls (#67347) Push span creation down into ListenerManager via pluggy's add_hookcall_monitoring so every hook call gets a `listener.` span automatically, gated on task span detail level > 1. Callers no longer need to wrap individual hook invocations. Also makes the existing _after_hookcall safe against listener exceptions: previously `outcome.get_result()` was called unconditionally and would re-raise, skipping the rest of the callback. --- .../tests/integration/otel/test_otel.py | 6 +- shared/listeners/pyproject.toml | 3 + .../src/airflow_shared/listeners/listener.py | 48 +++++++++- .../tests/listeners/test_listener_manager.py | 95 ++++++++++++++++++- .../airflow/sdk/execution_time/task_runner.py | 3 +- uv.lock | 10 +- 6 files changed, 159 insertions(+), 6 deletions(-) diff --git a/airflow-core/tests/integration/otel/test_otel.py b/airflow-core/tests/integration/otel/test_otel.py index a6af896b4374b..c543f54a921d8 100644 --- a/airflow-core/tests/integration/otel/test_otel.py +++ b/airflow-core/tests/integration/otel/test_otel.py @@ -458,7 +458,6 @@ def test_export_metrics_during_process_shutdown(self, capfd): # Additional detail spans are deferred to follow-up PRs; tracked # at https://linear.app/astronomer/issue/ACD-157. { - "hook.on_starting": "startup", "_verify_bundle_access": "parse", "parse": "startup", "get_template_context": "startup", @@ -474,6 +473,11 @@ def test_export_metrics_during_process_shutdown(self, capfd): "dag_run.otel_test_dag": None, "task_run.task1": "dag_run.otel_test_dag", "worker.task1": "task_run.task1", + # OpenLineage registers a listener by default, so its + # on_task_instance_running / on_task_instance_success hook + # calls get wrapped in spans at detail level > 1. + "listener.on_task_instance_running": "_prepare", + "listener.on_task_instance_success": "finalize", }, id="detail_spans", ), diff --git a/shared/listeners/pyproject.toml b/shared/listeners/pyproject.toml index 0554778a47eff..e5a67d003cd64 100644 --- a/shared/listeners/pyproject.toml +++ b/shared/listeners/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ ] dependencies = [ + "opentelemetry-api>=1.27.0", "pluggy>=1.5.0", "structlog>=25.4.0", ] @@ -31,6 +32,8 @@ dependencies = [ [dependency-groups] dev = [ "apache-airflow-devel-common", + "apache-airflow-shared-observability", + "opentelemetry-sdk>=1.27.0", ] mypy = [ "apache-airflow-devel-common[mypy]", diff --git a/shared/listeners/src/airflow_shared/listeners/listener.py b/shared/listeners/src/airflow_shared/listeners/listener.py index d4b36c059d480..a76019ac02843 100644 --- a/shared/listeners/src/airflow_shared/listeners/listener.py +++ b/shared/listeners/src/airflow_shared/listeners/listener.py @@ -17,24 +17,70 @@ # under the License. from __future__ import annotations +import threading from typing import TYPE_CHECKING import pluggy import structlog +from opentelemetry import trace +from opentelemetry.trace import Status, StatusCode + +from ..observability.traces import DEFAULT_TASK_SPAN_DETAIL_LEVEL, TASK_SPAN_DETAIL_LEVEL_KEY if TYPE_CHECKING: + from opentelemetry.trace import Span from pluggy._hooks import _HookRelay log = structlog.get_logger(__name__) +tracer = trace.get_tracer(__name__) + + +def _detail_level(span: Span) -> int: + raw = span.get_span_context().trace_state.get(TASK_SPAN_DETAIL_LEVEL_KEY) + if raw is None: + return DEFAULT_TASK_SPAN_DETAIL_LEVEL + try: + return int(raw) + except (TypeError, ValueError): + return DEFAULT_TASK_SPAN_DETAIL_LEVEL + + +_span_state = threading.local() + + +def _stack() -> list: + stack = getattr(_span_state, "stack", None) + if stack is None: + stack = _span_state.stack = [] + return stack def _before_hookcall(hook_name, hook_impls, kwargs): log.debug("Calling %r with %r", hook_name, kwargs) log.debug("Hook impls: %s", hook_impls) + if not hook_impls or _detail_level(trace.get_current_span()) <= 1: + _stack().append(None) + return + cm = tracer.start_as_current_span(f"listener.{hook_name}") + span = cm.__enter__() + _stack().append((cm, span)) def _after_hookcall(outcome, hook_name, hook_impls, kwargs): - log.debug("Result from %r: %s", hook_name, outcome.get_result()) + excinfo = getattr(outcome, "excinfo", None) + if excinfo: + log.debug("Hook %r raised %s", hook_name, excinfo[0].__name__) + else: + log.debug("Result from %r: %s", hook_name, outcome.get_result()) + entry = _stack().pop() + if entry is None: + return + cm, span = entry + if excinfo: + exc_type, exc, _tb = excinfo + span.record_exception(exc) + span.set_status(Status(StatusCode.ERROR, description=f"Exception: {exc_type.__name__}")) + cm.__exit__(None, None, None) class ListenerManager: diff --git a/shared/listeners/tests/listeners/test_listener_manager.py b/shared/listeners/tests/listeners/test_listener_manager.py index ebf360dade08d..3843c89f88fe5 100644 --- a/shared/listeners/tests/listeners/test_listener_manager.py +++ b/shared/listeners/tests/listeners/test_listener_manager.py @@ -17,9 +17,19 @@ # under the License. from __future__ import annotations -from airflow_shared.listeners import hookimpl +from unittest import mock + +import pytest +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from opentelemetry.trace import StatusCode +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator + +from airflow_shared.listeners import hookimpl, listener as listener_module from airflow_shared.listeners.listener import ListenerManager from airflow_shared.listeners.spec import lifecycle, taskinstance +from airflow_shared.observability.traces import new_dagrun_trace_carrier class TestListenerManager: @@ -162,3 +172,86 @@ def on_task_instance_failed(self, previous_state, task_instance, error): ("success", mock_ti), ("failed", mock_ti, "test error"), ] + + +@pytest.fixture +def test_tracer(): + """Patch the listener module's tracer with one backed by an in-memory exporter.""" + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + tracer = provider.get_tracer("test") + with mock.patch.object(listener_module, "tracer", tracer): + yield tracer, exporter + + +def _parent_span_ctx(detail_level: int): + carrier = new_dagrun_trace_carrier(task_span_detail_level=detail_level) + return TraceContextTextMapPropagator().extract(carrier) + + +class _StartingListener: + @hookimpl + def on_starting(self, component): + pass + + +class _RaisingListener: + @hookimpl + def on_starting(self, component): + raise RuntimeError("boom") + + +class TestListenerSpan: + """Span emitted around every listener hook call when detail level > 1.""" + + def test_emits_span_when_detail_level_above_1(self, test_tracer): + tracer, exporter = test_tracer + lm = ListenerManager() + lm.add_hookspecs(lifecycle) + lm.add_listener(_StartingListener()) + + with tracer.start_as_current_span("parent", context=_parent_span_ctx(2)): + lm.hook.on_starting(component="x") + + names = [s.name for s in exporter.get_finished_spans()] + assert "listener.on_starting" in names + + def test_no_span_at_default_detail_level(self, test_tracer): + tracer, exporter = test_tracer + lm = ListenerManager() + lm.add_hookspecs(lifecycle) + lm.add_listener(_StartingListener()) + + with tracer.start_as_current_span("parent", context=_parent_span_ctx(1)): + lm.hook.on_starting(component="x") + + names = [s.name for s in exporter.get_finished_spans()] + assert "listener.on_starting" not in names + + def test_no_span_when_no_impls_registered(self, test_tracer): + tracer, exporter = test_tracer + lm = ListenerManager() + lm.add_hookspecs(lifecycle) + # No listeners added — pluggy still fires monitoring around the call. + + with tracer.start_as_current_span("parent", context=_parent_span_ctx(2)): + lm.hook.on_starting(component="x") + + names = [s.name for s in exporter.get_finished_spans()] + assert "listener.on_starting" not in names + + def test_records_exception_on_listener_error(self, test_tracer): + tracer, exporter = test_tracer + lm = ListenerManager() + lm.add_hookspecs(lifecycle) + lm.add_listener(_RaisingListener()) + + with tracer.start_as_current_span("parent", context=_parent_span_ctx(2)): + with pytest.raises(RuntimeError): + lm.hook.on_starting(component="x") + + spans = {s.name: s for s in exporter.get_finished_spans()} + listener_span = spans["listener.on_starting"] + assert listener_span.status.status_code == StatusCode.ERROR + assert any(ev.name == "exception" for ev in listener_span.events) diff --git a/task-sdk/src/airflow/sdk/execution_time/task_runner.py b/task-sdk/src/airflow/sdk/execution_time/task_runner.py index 313309fa40100..df340f2511225 100644 --- a/task-sdk/src/airflow/sdk/execution_time/task_runner.py +++ b/task-sdk/src/airflow/sdk/execution_time/task_runner.py @@ -1022,8 +1022,7 @@ def startup(msg: StartupDetails) -> tuple[RuntimeTaskInstance, Context, Logger]: ) try: - with detail_span("hook.on_starting"): - get_listener_manager().hook.on_starting(component=TaskRunnerMarker()) + get_listener_manager().hook.on_starting(component=TaskRunnerMarker()) except Exception: log.exception("error calling listener") diff --git a/uv.lock b/uv.lock index 6fefbf9fc232b..9fbd48c147dcf 100644 --- a/uv.lock +++ b/uv.lock @@ -8311,6 +8311,7 @@ name = "apache-airflow-shared-listeners" version = "0.0" source = { editable = "shared/listeners" } dependencies = [ + { name = "opentelemetry-api" }, { name = "pluggy" }, { name = "structlog" }, ] @@ -8318,6 +8319,8 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "apache-airflow-devel-common" }, + { name = "apache-airflow-shared-observability" }, + { name = "opentelemetry-sdk" }, ] mypy = [ { name = "apache-airflow-devel-common", extra = ["mypy"] }, @@ -8325,12 +8328,17 @@ mypy = [ [package.metadata] requires-dist = [ + { name = "opentelemetry-api", specifier = ">=1.27.0" }, { name = "pluggy", specifier = ">=1.5.0" }, { name = "structlog", specifier = ">=25.4.0" }, ] [package.metadata.requires-dev] -dev = [{ name = "apache-airflow-devel-common", editable = "devel-common" }] +dev = [ + { name = "apache-airflow-devel-common", editable = "devel-common" }, + { name = "apache-airflow-shared-observability", editable = "shared/observability" }, + { name = "opentelemetry-sdk", specifier = ">=1.27.0" }, +] mypy = [{ name = "apache-airflow-devel-common", extras = ["mypy"], editable = "devel-common" }] [[package]] From fd950357d6b0485a962b66707be88d947c81bd06 Mon Sep 17 00:00:00 2001 From: Kaxil Naik Date: Thu, 28 May 2026 20:00:43 +0100 Subject: [PATCH 24/28] Fix KubernetesPodOperator emitting orphan timestamps for empty container writes (#67652) When a container running under `KubernetesPodOperator` writes an empty line, kubelet streams it back (with `timestamps=True`) as `" \n"` -- a timestamp followed by a separator space and an empty message. `parse_log_line` in `pod_manager.py` called `line.strip().partition(" ")`, which removed the trailing separator space before partitioning, so the function returned `timestamp=None` and the caller treated the line as a continuation of the previous buffered log record. The bare RFC3339 string was then appended onto the previous message and emitted as a multi-line log where only the first line carried the Airflow `[ts] {pod_manager.py:N} INFO -` prefix, leaving unprefixed timestamp rows interleaved in task logs: ``` [2026-05-28T13:07:50.160+0000] {pod_manager.py:520} INFO - [base] first test line 2026-05-28T13:07:57.030578889Z 2026-05-28T13:07:57.030581518Z 2026-05-28T13:07:57.030642740Z [2026-05-28T13:07:57.034+0000] {pod_manager.py:520} INFO - [base] last test line ``` Downstream that breaks [`airflow.utils.log.file_task_handler._parse_timestamp`](https://github.com/apache/airflow/blob/main/airflow-core/src/airflow/utils/log/file_task_handler.py#L201-L203), which feeds the line to `pendulum.parse` after stripping `[]`: malformed fragments from these orphan rows can raise `ValueError: month must be in 1..12` and fail the task entirely. Closes #36571. ## Root cause and history Regressed in [#33675](https://github.com/apache/airflow/pull/33675) (merged 2023-08-24, shipped in cncf-kubernetes **7.5.0**) which replaced the original `line.find(\" \")` split with a `line.strip().partition(\" \")` pattern under the banner of a refactor: ```diff - split_at = line.find(\" \") - if split_at == -1: - ... - timestamp = line[:split_at] - message = line[split_at + 1 :].rstrip() + timestamp, sep, message = line.strip().partition(\" \") + if not sep: + ... ``` The pre-refactor implementation correctly handled ` \n` because `find(\" \")` matched the separator space directly and the message-side `.rstrip()` produced an empty string. The new code strips the separator off before partitioning, so the function loses its only signal that the line is well-formed. This matches the regression window the original reporter described in [#36571](https://github.com/apache/airflow/issues/36571): the bug appeared after upgrading cncf-kubernetes from 7.4.2 (pre-refactor) to 7.12.0+ (post-refactor) and is still reproducible on current `main` (10.17.x). ## Fix * `parse_log_line` no longer pre-strips the line; it `rstrip(\"\\n\")` only and partitions on the original separator, so empty container writes are recognised as `(timestamp, \"\")` rather than as continuations. If the partition yields no separator the whole line is tried as a bare timestamp (some kubelet versions emit `\\n` with no trailing space), and parse failures fall through to the original return-the-raw-line path. It also catches `ValueError`, not just `ParserError`, so a malformed timestamp can never escape into Airflow's downstream parsers. * The sync (`PodManager.fetch_container_logs.consume_logs`) and async (`AsyncPodManager.fetch_container_logs_before_current_sec`) log consumer loops skip emit for empty messages -- the resume marker still advances in the sync path so reconnect-since-time stays correct, but no noisy `[base] ` row is written. ## Tests * Parametrized `test_parse_log_line_handles_empty_container_writes` covers ` \\n`, `\\n`, and ` ` (no newline). Verified RED on `main`, GREEN with the fix. * End-to-end `test_empty_container_lines_do_not_pollute_previous_message` drives `fetch_container_logs` with the exact log sequence from the issue and asserts no orphan timestamps land in `caplog`. Also RED on `main`, GREEN with the fix. ## Gotchas * Truly empty container output (just `\\n`) is no longer surfaced as a `[base]` row. That output carries no information for the task log reader and was previously the trigger for downstream pendulum failures, so dropping it is a net improvement; if a future use case needs to count blank container lines, that's separable work. --- .../cncf/kubernetes/utils/pod_manager.py | 27 ++++++-- .../cncf/kubernetes/utils/test_pod_manager.py | 69 +++++++++++++++++++ 2 files changed, 92 insertions(+), 4 deletions(-) diff --git a/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/utils/pod_manager.py b/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/utils/pod_manager.py index e4bb7ae9115a2..835d239f209f2 100644 --- a/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/utils/pod_manager.py +++ b/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/utils/pod_manager.py @@ -566,6 +566,15 @@ def consume_logs(*, since_time: DateTime | None = None) -> tuple[DateTime | None line = raw_line.decode("utf-8", errors="backslashreplace") line_timestamp, message = parse_log_line(line) if line_timestamp: # detect new log line + if not message: + # Empty container write: advance the resume + # marker but do not emit a noisy ``[base] `` + # row or break the previous buffered message + # with a stray continuation (#36571). + self.container_log_times[ + (pod.metadata.namespace, pod.metadata.name, container_name) + ] = line_timestamp + continue if message_to_log is None: # first line in the log message_to_log = message message_timestamp = line_timestamp @@ -1108,12 +1117,17 @@ def parse_log_line(line: str) -> tuple[DateTime | None, str]: :param line: k8s log line :return: timestamp and log message """ - timestamp, sep, message = line.strip().partition(" ") - if not sep: - return None, line + # Strip only the trailing newline so an empty container write (which + # kubelet streams back as " \n" under ``timestamps=True``) + # keeps the separator space and is recognised as a real log line, not a + # continuation of the previous one (#36571). When kubelet emits "\n" + # with no trailing space, ``partition`` returns the whole line as + # ``timestamp`` and ``message`` as ``""`` -- the parse below handles both. + stripped = line.rstrip("\n") + timestamp, _, message = stripped.partition(" ") try: last_log_time = cast("DateTime", pendulum.parse(timestamp)) - except ParserError: + except (ParserError, ValueError): return None, line return last_log_time, message @@ -1220,6 +1234,11 @@ async def fetch_container_logs_before_current_sec( if line_timestamp and line_timestamp.replace(microsecond=0) == now_seconds: break if line_timestamp: # detect new log line + if not message: + # Empty container write -- drop it instead of letting + # it overwrite the buffered message with "" or be + # emitted as a noisy ``[base] `` row (#36571). + continue if message_to_log is None: # first line in the log message_to_log = message else: # previous log line is complete diff --git a/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/utils/test_pod_manager.py b/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/utils/test_pod_manager.py index 27cfd87fc6d6a..e2390f90dc3a9 100644 --- a/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/utils/test_pod_manager.py +++ b/providers/cncf/kubernetes/tests/unit/cncf/kubernetes/utils/test_pod_manager.py @@ -88,6 +88,40 @@ def test_parse_log_line(): assert line == log_message +@pytest.mark.parametrize( + ("raw_line", "expected_ts"), + [ + pytest.param( + "2026-05-28T13:07:57.030578889Z \n", + "2026-05-28T13:07:57.030578889Z", + id="trailing-space-and-newline", + ), + pytest.param( + "2026-05-28T13:07:57.030581518Z\n", + "2026-05-28T13:07:57.030581518Z", + id="newline-only", + ), + pytest.param( + "2026-05-28T13:07:57.030642740Z ", + "2026-05-28T13:07:57.030642740Z", + id="trailing-space-no-newline", + ), + ], +) +def test_parse_log_line_handles_empty_container_writes(raw_line, expected_ts): + """ + Regression for #36571: an empty container write (just ``\\n``) is streamed + back by kubelet as ``" \\n"`` when ``timestamps=True``. The + parser must recognise it as a real (empty) log line rather than as a + continuation of the previous one, otherwise the bare timestamp is appended + onto the previous buffered message and emitted unformatted into task logs. + """ + timestamp, message = parse_log_line(raw_line) + + assert timestamp == pendulum.parse(expected_ts) + assert message == "" + + def test_log_pod_event(): """Test logging a pod event.""" mock_pod_manager = mock.Mock() @@ -782,6 +816,41 @@ def test_parse_multi_line_logs(self, mock_read_pod_logs, mock_container_is_runni assert "message3 line1" in caplog.text assert "ERROR" not in caplog.text + @mock.patch("airflow.providers.cncf.kubernetes.utils.pod_manager.PodManager.container_is_running") + @mock.patch("airflow.providers.cncf.kubernetes.utils.pod_manager.PodManager.read_pod_logs") + def test_empty_container_lines_do_not_pollute_previous_message( + self, mock_read_pod_logs, mock_container_is_running, caplog + ): + """ + Regression for #36571: when a container writes empty lines, kubelet + returns them as ``" \\n"`` rows. Previously these slipped through + ``parse_log_line`` as "no timestamp" and were appended as continuations + onto the previous buffered message, which then emitted multi-line + records where only the first line carried the Airflow log prefix -- + leaving bare ```` rows in task logs that downstream pendulum-based + parsers ``(file_task_handler._parse_timestamp)`` then choked on. + """ + log = ( + "2026-05-28T13:07:50.160Z first test line\n" + "2026-05-28T13:07:57.030578889Z \n" + "2026-05-28T13:07:57.030581518Z\n" + "2026-05-28T13:07:57.030642740Z \n" + "2026-05-28T13:07:57.034Z last test line\n" + ) + mock_read_pod_logs.return_value = [bytes(line, "utf-8") for line in log.split("\n")] + mock_container_is_running.return_value = False + + with caplog.at_level(logging.INFO): + self.pod_manager.fetch_container_logs(mock.MagicMock(), "base", follow=True) + + assert "first test line" in caplog.text + assert "last test line" in caplog.text + # The empty-line timestamps must not leak into the previous message and + # must not be emitted as orphan rows. + assert "2026-05-28T13:07:57.030578889Z" not in caplog.text + assert "2026-05-28T13:07:57.030581518Z" not in caplog.text + assert "2026-05-28T13:07:57.030642740Z" not in caplog.text + @mock.patch("airflow.providers.cncf.kubernetes.utils.pod_manager.PodManager.container_is_running") @mock.patch("airflow.providers.cncf.kubernetes.utils.pod_manager.PodManager.read_pod_logs") def test_container_log_times_tracks_last_timestamp(self, mock_read_pod_logs, mock_container_is_running): From a28b726798d804f389b8572a4d7f7cc3a8f9209b Mon Sep 17 00:00:00 2001 From: Vikram Koka Date: Wed, 27 May 2026 07:26:06 -0700 Subject: [PATCH 25/28] Add AIP progress tracker example DAG for common.ai provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Demonstrates LLMOperator with Dynamic Task Mapping, structured output, UsageLimits, and HITL approval — no framework dependency. The DAG gathers AIP specs from the Confluence wiki and searches GitHub for related PRs, then uses an LLM to assess each AIP's progress and synthesize a cross-AIP report for maintainer review. --- .../example_aip_progress_tracker.py | 475 ++++++++++++++++++ 1 file changed, 475 insertions(+) create mode 100644 providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py diff --git a/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py b/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py new file mode 100644 index 0000000000000..114b56302f67d --- /dev/null +++ b/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py @@ -0,0 +1,475 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +AIP progress tracker -- multi-source data fusion with common.ai operators. + +Demonstrates Dynamic Task Mapping, structured LLM output, cost-controlled +synthesis, and HITL approval using only ``LLMOperator`` -- no LlamaIndex or +LangChain dependency required. + +For each active Airflow Improvement Proposal the Dag gathers evidence from +two sources (Confluence spec text, GitHub PRs and commits), asks an LLM to +assess spec-vs-implementation progress, then synthesizes a cross-AIP report +for maintainer review. + +``example_aip_progress_tracker`` (manual trigger): + +.. code-block:: text + + fetch_aip_list (@task) + → gather_aip_evidence (@task, mapped ×N AIPs) + → format_analysis_prompt (@task, mapped ×N) + → analyze_aip (LLMOperator, mapped ×N) + → collect_analyses (@task) + → synthesize_report (LLMOperator, with UsageLimits) + → review_report (ApprovalOperator) + +**What this makes visible that a notebook hides:** + +* Each AIP investigation is a named, logged task instance with its own + retry behaviour -- not a loop iteration buried inside one cell. +* If the GitHub API is rate-limited for one AIP, only that mapped + instance retries; the others preserve their XCom results. +* The synthesis step's inputs and token budget are fully auditable. +* A maintainer reviews the report before it goes to the dev list. + +Before running: + +1. Create an LLM connection named ``pydanticai_default`` (or the value of + ``LLM_CONN_ID``) for your chosen model provider. +2. Set ``USE_SAMPLE_DATA = False`` in the DAG file to fetch live data + from the Apache Confluence wiki and GitHub API. +""" + +from __future__ import annotations + +import json +import re +import urllib.parse +import urllib.request +from datetime import timedelta + +from pydantic import BaseModel +from pydantic_ai.usage import UsageLimits + +from airflow.providers.common.ai.operators.llm import LLMOperator +from airflow.providers.common.compat.sdk import dag, task +from airflow.providers.standard.operators.hitl import ApprovalOperator + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +LLM_CONN_ID = "pydanticai_default" + +# Confluence wiki -- the AIP listing page is public, no auth required. +# https://cwiki.apache.org/confluence/display/AIRFLOW/Airflow+Improvement+Proposals +CONFLUENCE_BASE_URL = "https://cwiki.apache.org/confluence" +AIP_LISTING_PAGE_ID = "89066602" +GITHUB_REPO = "apache/airflow" + +# When True the Dag runs on built-in sample data (self-contained, no network). +# Set to False to fetch live data from Confluence and GitHub. +USE_SAMPLE_DATA = True + +# --------------------------------------------------------------------------- +# Structured output model -- enforces a schema on the per-AIP LLM response +# --------------------------------------------------------------------------- + +# [START aip_tracker_structured_output] + + +class AIPStatus(BaseModel): + """Per-AIP analysis produced by the LLM.""" + + aip_number: int + title: str + spec_summary: str + implementation_status: str + key_prs: list[str] + blockers: list[str] + next_steps: list[str] + completion_pct: int + + +# [END aip_tracker_structured_output] + +# --------------------------------------------------------------------------- +# Sample data -- replace with Confluence / GitHub API calls for production +# --------------------------------------------------------------------------- + +SAMPLE_AIPS: list[dict] = [ + {"aip_number": 76, "title": "Asset Partitions"}, + {"aip_number": 99, "title": "Common Data Access Pattern + AI"}, + {"aip_number": 103, "title": "Task State Management"}, + {"aip_number": 105, "title": "LLM Retry Policy"}, + {"aip_number": 108, "title": "Language Coordinator Layer"}, +] + +SAMPLE_EVIDENCE: dict[int, dict] = { + 76: { + "spec_text": ( + "AIP-76 adds partition awareness to Airflow assets. Instead of " + "triggering on any update to an asset, Dags can depend on specific " + "partitions (e.g. a date-based slice of a dataset). The scheduler " + "tracks which partitions have been produced and only triggers " + "downstream Dags when the required partitions are available." + ), + "prs": [ + "#62400 -- Asset partition model and metadata schema", + "#63900 -- Partition-aware scheduling in the DagRun creator", + "#65100 -- UI: partition status badges on Asset views", + ], + "commits": [ + "Add AssetPartition model with composite key", + "Extend DagScheduleAssetReference for partition filters", + "Show partition status in React Asset detail view", + ], + }, + 99: { + "spec_text": ( + "AIP-99 adds first-class AI/ML operators to the common.ai provider. " + "LLMOperator wraps pydantic-ai for structured LLM calls with retries. " + "AgentOperator enables multi-turn ReAct agents with tool use. " + "LangChain and LlamaIndex hooks bridge framework models to Airflow " + "connections. DocumentLoaderOperator parses files for RAG pipelines." + ), + "prs": [ + "#61200 -- LLMOperator and PydanticAIHook", + "#62800 -- AgentOperator with tool calling and HITL", + "#64100 -- LangChain hook and integration", + "#65500 -- LlamaIndex embedding and retrieval operators", + "#66300 -- DocumentLoaderOperator for multi-format parsing", + ], + "commits": [ + "Add LLMOperator with structured output support", + "Add AgentOperator with ReAct loop and durable execution", + "Add LangChainHook bridging langchain models to connections", + "Add LlamaIndex embedding and retrieval operators", + "Add DocumentLoaderOperator with PDF and DOCX support", + ], + }, + 103: { + "spec_text": ( + "AIP-103 introduces task-level state persistence via " + "context['task_state']. Tasks can checkpoint intermediate results " + "that survive retries and restarts. The state backend stores " + "key-value pairs scoped to a task instance, enabling long-running " + "tasks to resume from the last checkpoint rather than starting " + "from scratch." + ), + "prs": [ + "#65000 -- Task state storage backend and API", + "#65800 -- context['task_state'] integration in Task SDK", + "#66700 -- State-aware retry logic for LLM tasks", + ], + "commits": [ + "Add TaskState model with key-value storage", + "Expose task_state in TaskInstanceContext", + "Add state checkpoint and restore in retry path", + ], + }, + 105: { + "spec_text": ( + "AIP-105 introduces LLMRetryPolicy, an intelligent retry mechanism " + "that uses an LLM to classify task failures before deciding whether " + "to retry, fail fast, or back off. Instead of static exception-type " + "matching, the policy sends the error context to an LLM that " + "determines the appropriate action: rate-limit errors trigger " + "exponential backoff, auth errors fail immediately, transient " + "network errors retry with a short delay." + ), + "prs": [ + "#64800 -- RetryPolicy base class in Task SDK (AIP-105 prerequisite)", + "#65600 -- LLMRetryPolicy with pydantic-ai error classification", + "#66200 -- Integration tests for LLM-classified retry scenarios", + ], + "commits": [ + "Add RetryPolicy protocol and RetryRule dataclass", + "Implement LLMRetryPolicy with structured ErrorClassification output", + "Wire RetryPolicy into task runner retry loop", + ], + }, + 108: { + "spec_text": ( + "AIP-108 defines a language coordinator layer that enables tasks " + "written in Java, Go, and TypeScript to run alongside Python tasks. " + "A lightweight coordinator process manages the non-Python runtime " + "lifecycle, handles serialization between the task and the Execution " + "API, and provides the same guarantees (heartbeat, state, XCom) that " + "Python tasks get from the Task SDK." + ), + "prs": [ + "#66100 -- Coordinator protocol specification and protobuf schema", + "#66800 -- Java Task SDK with coordinator bridge", + "#67200 -- Go Task SDK initial implementation", + ], + "commits": [ + "Define coordinator gRPC protocol for multi-language tasks", + "Add Java Task SDK with Maven build and coordinator client", + "Scaffold Go Task SDK with coordinator handshake", + ], + }, +} + +# --------------------------------------------------------------------------- +# HTTP helpers -- used when USE_SAMPLE_DATA is False +# --------------------------------------------------------------------------- + + +def _confluence_rest_get(path: str) -> dict: + """GET a Confluence REST API endpoint (public, no auth required).""" + url = f"{CONFLUENCE_BASE_URL}{path}" + req = urllib.request.Request(url, headers={"Accept": "application/json"}) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read()) + + +def _github_api_get(path: str) -> dict: + """GET a GitHub REST API endpoint (public, rate-limited to 10 req/min).""" + url = f"https://api.github.com{path}" + req = urllib.request.Request(url, headers={"Accept": "application/vnd.github.v3+json"}) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read()) + + +def _strip_html_tags(html: str) -> str: + """Remove HTML/Confluence markup, returning plain text.""" + text = re.sub(r"<[^>]+>", " ", html) + return re.sub(r"\s+", " ", text).strip() + + +def _parse_accepted_aips(listing_html: str) -> list[dict]: + """Extract accepted AIPs from the rendered AIP listing page.""" + match = re.search(r"Accepted AIPs.*?(?=]+>([^<]*AIP-(\d+)[^<]*)", section): + title = re.sub(r"\s+", " ", m.group(1)).strip() + aip_number = int(m.group(2)) + aips.append({"aip_number": aip_number, "title": title}) + return aips + + +# --------------------------------------------------------------------------- +# System prompts +# --------------------------------------------------------------------------- + +ANALYSIS_SYSTEM_PROMPT = """\ +You are an Airflow project analyst. Given an AIP specification and its \ +GitHub evidence (pull requests and commits), produce a structured status \ +assessment. + +Be specific about what has been implemented versus what remains. Rate \ +completion percentage based on the ratio of spec goals that have \ +corresponding PRs or commits.""" + +SYNTHESIS_SYSTEM_PROMPT = """\ +You are an Airflow release coordinator. Given individual AIP status \ +assessments, produce a concise cross-AIP progress report. + +Identify the top priorities, shared blockers across AIPs, and recommend \ +where maintainer attention is most needed. Keep the report actionable \ +and under 500 words.""" + + +# --------------------------------------------------------------------------- +# DAG +# --------------------------------------------------------------------------- + + +# [START example_aip_progress_tracker] +@dag(catchup=False, tags=["example", "aip_tracker", "common_ai"]) +def example_aip_progress_tracker(): + """ + Track AIP progress by analysing Confluence specs against GitHub evidence. + + Task graph:: + + fetch_aip_list (@task) + → gather_aip_evidence (@task ×N, via Dynamic Task Mapping) + → format_analysis_prompt (@task ×N) + → analyze_aip (LLMOperator ×N, structured output) + → collect_analyses (@task) + → synthesize_report (LLMOperator, with UsageLimits) + → review_report (ApprovalOperator) + """ + + # ------------------------------------------------------------------ + # Step 1: Fetch the list of active AIPs to investigate. + # The length of this list determines how many mapped instances are + # created in the downstream steps -- N is decided at runtime. + # ------------------------------------------------------------------ + @task + def fetch_aip_list() -> list[dict]: + if USE_SAMPLE_DATA: + return SAMPLE_AIPS + # Fetch the AIP listing page and extract the "Accepted" section. + # https://cwiki.apache.org/confluence/display/AIRFLOW/Airflow+Improvement+Proposals + page = _confluence_rest_get(f"/rest/api/content/{AIP_LISTING_PAGE_ID}?expand=body.view") + accepted = _parse_accepted_aips(page["body"]["view"]["value"]) + return accepted or SAMPLE_AIPS + + aip_list = fetch_aip_list() + + # ------------------------------------------------------------------ + # Step 2: Gather evidence for each AIP from multiple sources. + # Each mapped instance fetches one AIP's spec text from the + # Confluence wiki (cwiki.apache.org) and searches GitHub for + # related PRs and commits. If the GitHub API is rate-limited + # for one AIP, only that instance retries. + # ------------------------------------------------------------------ + @task + def gather_aip_evidence(aip: dict) -> dict: + aip_number = aip["aip_number"] + if USE_SAMPLE_DATA: + evidence = SAMPLE_EVIDENCE[aip_number] + return { + "aip_number": aip_number, + "title": aip["title"], + "spec_text": evidence["spec_text"], + "prs": evidence["prs"], + "commits": evidence["commits"], + } + # Fetch spec text from the AIP's Confluence wiki page via CQL search. + # Example: https://cwiki.apache.org/confluence/display/AIRFLOW/AIP-103 + cql = urllib.parse.quote( + f'space="AIRFLOW" AND title~"AIP-{aip_number}" AND ancestor={AIP_LISTING_PAGE_ID}' + ) + results = _confluence_rest_get(f"/rest/api/content/search?cql={cql}&expand=body.view&limit=1") + spec_text = "" + if results.get("results"): + raw_html = results["results"][0]["body"]["view"]["value"] + spec_text = _strip_html_tags(raw_html)[:3000] + # Search GitHub for related PRs. + pr_query = urllib.parse.quote(f"AIP-{aip_number} repo:{GITHUB_REPO} is:pr") + pr_data = _github_api_get(f"/search/issues?q={pr_query}&per_page=10") + prs = [f"#{it['number']} -- {it['title']}" for it in pr_data.get("items", [])] + # Search GitHub for related commits. + commit_query = urllib.parse.quote(f"AIP-{aip_number} repo:{GITHUB_REPO}") + commit_data = _github_api_get(f"/search/commits?q={commit_query}&per_page=10") + commits = [it["commit"]["message"].split("\n")[0] for it in commit_data.get("items", [])] + return { + "aip_number": aip_number, + "title": aip["title"], + "spec_text": spec_text, + "prs": prs, + "commits": commits, + } + + evidence = gather_aip_evidence.expand(aip=aip_list) + + # ------------------------------------------------------------------ + # Step 3: Format the gathered evidence into an LLM analysis prompt. + # Separating formatting from data gathering keeps each task focused + # and makes prompt iteration independent of API logic. + # ------------------------------------------------------------------ + @task + def format_analysis_prompt(evidence: dict) -> str: + prs_text = "\n".join(f" - {pr}" for pr in evidence["prs"]) + commits_text = "\n".join(f" - {c}" for c in evidence["commits"]) + return ( + f"Analyze AIP-{evidence['aip_number']}: {evidence['title']}\n\n" + f"Specification:\n{evidence['spec_text']}\n\n" + f"Pull Requests:\n{prs_text}\n\n" + f"Recent Commits:\n{commits_text}" + ) + + prompts = format_analysis_prompt.expand(evidence=evidence) + + # ------------------------------------------------------------------ + # Step 4: Analyze each AIP with a structured LLM call. + # Dynamic Task Mapping creates one LLMOperator instance per AIP. + # output_type=AIPStatus enforces the Pydantic schema on the response. + # ------------------------------------------------------------------ + # [START aip_tracker_dtm_analysis] + analyses = LLMOperator.partial( + task_id="analyze_aip", + llm_conn_id=LLM_CONN_ID, + system_prompt=ANALYSIS_SYSTEM_PROMPT, + output_type=AIPStatus, + ).expand(prompt=prompts) + # [END aip_tracker_dtm_analysis] + + # ------------------------------------------------------------------ + # Step 5: Collect all per-AIP analyses into a single context string + # for the synthesis step. + # ------------------------------------------------------------------ + @task + def collect_analyses(analyses: list) -> str: + sections = [] + for raw in analyses: + a = json.loads(raw) if isinstance(raw, str) else raw + blockers = ", ".join(a["blockers"]) if a["blockers"] else "None identified" + next_steps = ", ".join(a["next_steps"]) if a["next_steps"] else "N/A" + sections.append( + f"## AIP-{a['aip_number']}: {a['title']}\n" + f"Status: {a['implementation_status']} " + f"({a['completion_pct']}% complete)\n" + f"Summary: {a['spec_summary']}\n" + f"Key PRs: {', '.join(a['key_prs'])}\n" + f"Blockers: {blockers}\n" + f"Next steps: {next_steps}" + ) + return "\n\n".join(sections) + + collected = collect_analyses(analyses.output) + + # ------------------------------------------------------------------ + # Step 6: Synthesize a cross-AIP progress report. + # UsageLimits caps the token spend so a runaway prompt cannot + # exhaust the API budget in a single Dag run. + # ------------------------------------------------------------------ + # [START aip_tracker_synthesis] + synthesize = LLMOperator( + task_id="synthesize_report", + llm_conn_id=LLM_CONN_ID, + system_prompt=SYNTHESIS_SYSTEM_PROMPT, + prompt="""\ +Create a cross-AIP progress report from these individual assessments. +Prioritize AIPs that are close to completion or have shared blockers. + +{{ ti.xcom_pull(task_ids='collect_analyses') }}""", + usage_limits=UsageLimits( + request_limit=5, + input_tokens_limit=20_000, + output_tokens_limit=4_000, + ), + ) + # [END aip_tracker_synthesis] + collected >> synthesize + + # ------------------------------------------------------------------ + # Step 7: A maintainer reviews the synthesized report before it is + # shared on the dev list. The Dag pauses here until the human + # approves, requests changes, or the timeout expires. + # ------------------------------------------------------------------ + # [START aip_tracker_hitl] + ApprovalOperator( + task_id="review_report", + subject="Review AIP Progress Report before sharing", + body=synthesize.output, + response_timeout=timedelta(hours=24), + ) + # [END aip_tracker_hitl] + + +# [END example_aip_progress_tracker] + +example_aip_progress_tracker() From 1f440f45841f471b861e35faa7f4ede9695824c6 Mon Sep 17 00:00:00 2001 From: Vikram Koka Date: Thu, 28 May 2026 13:23:00 -0700 Subject: [PATCH 26/28] Updated AIP-tracker example for common.ai Updated to take out sample data from the DAG and go to Confluence every time. Also updated to take AIP number list as a DAG param. --- .../example_aip_progress_tracker.py | 197 +++--------------- 1 file changed, 33 insertions(+), 164 deletions(-) diff --git a/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py b/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py index 114b56302f67d..dbd16338a15f5 100644 --- a/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py +++ b/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py @@ -51,8 +51,8 @@ 1. Create an LLM connection named ``pydanticai_default`` (or the value of ``LLM_CONN_ID``) for your chosen model provider. -2. Set ``USE_SAMPLE_DATA = False`` in the DAG file to fetch live data - from the Apache Confluence wiki and GitHub API. +2. Trigger the DAG with the default ``aip_numbers`` param or edit it to + choose which AIPs to investigate. """ from __future__ import annotations @@ -69,6 +69,7 @@ from airflow.providers.common.ai.operators.llm import LLMOperator from airflow.providers.common.compat.sdk import dag, task from airflow.providers.standard.operators.hitl import ApprovalOperator +from airflow.sdk import Param # --------------------------------------------------------------------------- # Configuration @@ -76,15 +77,11 @@ LLM_CONN_ID = "pydanticai_default" -# Confluence wiki -- the AIP listing page is public, no auth required. -# https://cwiki.apache.org/confluence/display/AIRFLOW/Airflow+Improvement+Proposals +# Confluence wiki -- public REST API, no auth required. CONFLUENCE_BASE_URL = "https://cwiki.apache.org/confluence" -AIP_LISTING_PAGE_ID = "89066602" +AIP_LISTING_PAGE_ID = "89066602" # ancestor filter for CQL queries GITHUB_REPO = "apache/airflow" - -# When True the Dag runs on built-in sample data (self-contained, no network). -# Set to False to fetch live data from Confluence and GitHub. -USE_SAMPLE_DATA = True +DEFAULT_AIP_NUMBERS = "76,99,103,105,108" # --------------------------------------------------------------------------- # Structured output model -- enforces a schema on the per-AIP LLM response @@ -109,125 +106,7 @@ class AIPStatus(BaseModel): # [END aip_tracker_structured_output] # --------------------------------------------------------------------------- -# Sample data -- replace with Confluence / GitHub API calls for production -# --------------------------------------------------------------------------- - -SAMPLE_AIPS: list[dict] = [ - {"aip_number": 76, "title": "Asset Partitions"}, - {"aip_number": 99, "title": "Common Data Access Pattern + AI"}, - {"aip_number": 103, "title": "Task State Management"}, - {"aip_number": 105, "title": "LLM Retry Policy"}, - {"aip_number": 108, "title": "Language Coordinator Layer"}, -] - -SAMPLE_EVIDENCE: dict[int, dict] = { - 76: { - "spec_text": ( - "AIP-76 adds partition awareness to Airflow assets. Instead of " - "triggering on any update to an asset, Dags can depend on specific " - "partitions (e.g. a date-based slice of a dataset). The scheduler " - "tracks which partitions have been produced and only triggers " - "downstream Dags when the required partitions are available." - ), - "prs": [ - "#62400 -- Asset partition model and metadata schema", - "#63900 -- Partition-aware scheduling in the DagRun creator", - "#65100 -- UI: partition status badges on Asset views", - ], - "commits": [ - "Add AssetPartition model with composite key", - "Extend DagScheduleAssetReference for partition filters", - "Show partition status in React Asset detail view", - ], - }, - 99: { - "spec_text": ( - "AIP-99 adds first-class AI/ML operators to the common.ai provider. " - "LLMOperator wraps pydantic-ai for structured LLM calls with retries. " - "AgentOperator enables multi-turn ReAct agents with tool use. " - "LangChain and LlamaIndex hooks bridge framework models to Airflow " - "connections. DocumentLoaderOperator parses files for RAG pipelines." - ), - "prs": [ - "#61200 -- LLMOperator and PydanticAIHook", - "#62800 -- AgentOperator with tool calling and HITL", - "#64100 -- LangChain hook and integration", - "#65500 -- LlamaIndex embedding and retrieval operators", - "#66300 -- DocumentLoaderOperator for multi-format parsing", - ], - "commits": [ - "Add LLMOperator with structured output support", - "Add AgentOperator with ReAct loop and durable execution", - "Add LangChainHook bridging langchain models to connections", - "Add LlamaIndex embedding and retrieval operators", - "Add DocumentLoaderOperator with PDF and DOCX support", - ], - }, - 103: { - "spec_text": ( - "AIP-103 introduces task-level state persistence via " - "context['task_state']. Tasks can checkpoint intermediate results " - "that survive retries and restarts. The state backend stores " - "key-value pairs scoped to a task instance, enabling long-running " - "tasks to resume from the last checkpoint rather than starting " - "from scratch." - ), - "prs": [ - "#65000 -- Task state storage backend and API", - "#65800 -- context['task_state'] integration in Task SDK", - "#66700 -- State-aware retry logic for LLM tasks", - ], - "commits": [ - "Add TaskState model with key-value storage", - "Expose task_state in TaskInstanceContext", - "Add state checkpoint and restore in retry path", - ], - }, - 105: { - "spec_text": ( - "AIP-105 introduces LLMRetryPolicy, an intelligent retry mechanism " - "that uses an LLM to classify task failures before deciding whether " - "to retry, fail fast, or back off. Instead of static exception-type " - "matching, the policy sends the error context to an LLM that " - "determines the appropriate action: rate-limit errors trigger " - "exponential backoff, auth errors fail immediately, transient " - "network errors retry with a short delay." - ), - "prs": [ - "#64800 -- RetryPolicy base class in Task SDK (AIP-105 prerequisite)", - "#65600 -- LLMRetryPolicy with pydantic-ai error classification", - "#66200 -- Integration tests for LLM-classified retry scenarios", - ], - "commits": [ - "Add RetryPolicy protocol and RetryRule dataclass", - "Implement LLMRetryPolicy with structured ErrorClassification output", - "Wire RetryPolicy into task runner retry loop", - ], - }, - 108: { - "spec_text": ( - "AIP-108 defines a language coordinator layer that enables tasks " - "written in Java, Go, and TypeScript to run alongside Python tasks. " - "A lightweight coordinator process manages the non-Python runtime " - "lifecycle, handles serialization between the task and the Execution " - "API, and provides the same guarantees (heartbeat, state, XCom) that " - "Python tasks get from the Task SDK." - ), - "prs": [ - "#66100 -- Coordinator protocol specification and protobuf schema", - "#66800 -- Java Task SDK with coordinator bridge", - "#67200 -- Go Task SDK initial implementation", - ], - "commits": [ - "Define coordinator gRPC protocol for multi-language tasks", - "Add Java Task SDK with Maven build and coordinator client", - "Scaffold Go Task SDK with coordinator handshake", - ], - }, -} - -# --------------------------------------------------------------------------- -# HTTP helpers -- used when USE_SAMPLE_DATA is False +# HTTP helpers # --------------------------------------------------------------------------- @@ -253,20 +132,6 @@ def _strip_html_tags(html: str) -> str: return re.sub(r"\s+", " ", text).strip() -def _parse_accepted_aips(listing_html: str) -> list[dict]: - """Extract accepted AIPs from the rendered AIP listing page.""" - match = re.search(r"Accepted AIPs.*?(?=]+>([^<]*AIP-(\d+)[^<]*)", section): - title = re.sub(r"\s+", " ", m.group(1)).strip() - aip_number = int(m.group(2)) - aips.append({"aip_number": aip_number, "title": title}) - return aips - - # --------------------------------------------------------------------------- # System prompts # --------------------------------------------------------------------------- @@ -295,7 +160,18 @@ def _parse_accepted_aips(listing_html: str) -> list[dict]: # [START example_aip_progress_tracker] -@dag(catchup=False, tags=["example", "aip_tracker", "common_ai"]) +@dag( + schedule=None, + catchup=False, + params={ + "aip_numbers": Param( + DEFAULT_AIP_NUMBERS, + type="string", + description="Comma-separated AIP numbers to investigate (e.g. 76,99,103,105,108)", + ), + }, + tags=["example", "aip_tracker", "common_ai"], +) def example_aip_progress_tracker(): """ Track AIP progress by analysing Confluence specs against GitHub evidence. @@ -317,14 +193,20 @@ def example_aip_progress_tracker(): # created in the downstream steps -- N is decided at runtime. # ------------------------------------------------------------------ @task - def fetch_aip_list() -> list[dict]: - if USE_SAMPLE_DATA: - return SAMPLE_AIPS - # Fetch the AIP listing page and extract the "Accepted" section. - # https://cwiki.apache.org/confluence/display/AIRFLOW/Airflow+Improvement+Proposals - page = _confluence_rest_get(f"/rest/api/content/{AIP_LISTING_PAGE_ID}?expand=body.view") - accepted = _parse_accepted_aips(page["body"]["view"]["value"]) - return accepted or SAMPLE_AIPS + def fetch_aip_list(params: dict) -> list[dict]: + aip_numbers = [int(n.strip()) for n in params["aip_numbers"].split(",") if n.strip()] + aips = [] + for num in aip_numbers: + cql = urllib.parse.quote( + f'space="AIRFLOW" AND title~"AIP-{num}" AND ancestor={AIP_LISTING_PAGE_ID}' + ) + results = _confluence_rest_get(f"/rest/api/content/search?cql={cql}&limit=1") + if results.get("results"): + title = results["results"][0]["title"] + else: + title = f"AIP-{num}" + aips.append({"aip_number": num, "title": title}) + return aips aip_list = fetch_aip_list() @@ -338,17 +220,6 @@ def fetch_aip_list() -> list[dict]: @task def gather_aip_evidence(aip: dict) -> dict: aip_number = aip["aip_number"] - if USE_SAMPLE_DATA: - evidence = SAMPLE_EVIDENCE[aip_number] - return { - "aip_number": aip_number, - "title": aip["title"], - "spec_text": evidence["spec_text"], - "prs": evidence["prs"], - "commits": evidence["commits"], - } - # Fetch spec text from the AIP's Confluence wiki page via CQL search. - # Example: https://cwiki.apache.org/confluence/display/AIRFLOW/AIP-103 cql = urllib.parse.quote( f'space="AIRFLOW" AND title~"AIP-{aip_number}" AND ancestor={AIP_LISTING_PAGE_ID}' ) @@ -357,11 +228,9 @@ def gather_aip_evidence(aip: dict) -> dict: if results.get("results"): raw_html = results["results"][0]["body"]["view"]["value"] spec_text = _strip_html_tags(raw_html)[:3000] - # Search GitHub for related PRs. pr_query = urllib.parse.quote(f"AIP-{aip_number} repo:{GITHUB_REPO} is:pr") pr_data = _github_api_get(f"/search/issues?q={pr_query}&per_page=10") prs = [f"#{it['number']} -- {it['title']}" for it in pr_data.get("items", [])] - # Search GitHub for related commits. commit_query = urllib.parse.quote(f"AIP-{aip_number} repo:{GITHUB_REPO}") commit_data = _github_api_get(f"/search/commits?q={commit_query}&per_page=10") commits = [it["commit"]["message"].split("\n")[0] for it in commit_data.get("items", [])] From f7a7f3c76cbc9729836bc674ee36f491f711446b Mon Sep 17 00:00:00 2001 From: SameerMesiah97 <75502260+SameerMesiah97@users.noreply.github.com> Date: Thu, 28 May 2026 21:31:35 +0100 Subject: [PATCH 27/28] Allow deadline intervals to be configured via Airflow Variables by supporting (#64751) --- airflow-core/docs/howto/deadline-alerts.rst | 2 +- airflow-core/docs/migrations-ref.rst | 4 +- airflow-core/newsfragments/64751.feature.rst | 1 + ...16_3_3_0_add_team_name_to_trigger_table.py | 1 + ..._3_3_0_change_deadline_interval_to_json.py | 305 ++++++++++++++++++ .../src/airflow/models/deadline_alert.py | 17 +- .../src/airflow/serialization/decoders.py | 24 +- .../airflow/serialization/definitions/dag.py | 8 +- .../serialization/definitions/deadline.py | 4 +- .../src/airflow/serialization/encoders.py | 2 +- airflow-core/src/airflow/utils/db.py | 2 +- airflow-core/tests/unit/models/test_dag.py | 5 +- airflow-core/tests/unit/models/test_dagrun.py | 86 ++++- .../tests/unit/models/test_serialized_dag.py | 2 +- .../serialization/test_serialized_objects.py | 16 + .../src/airflow/sdk/definitions/deadline.py | 61 +++- .../task_sdk/definitions/test_deadline.py | 52 ++- 17 files changed, 574 insertions(+), 18 deletions(-) create mode 100644 airflow-core/newsfragments/64751.feature.rst create mode 100644 airflow-core/src/airflow/migrations/versions/0117_3_3_0_change_deadline_interval_to_json.py diff --git a/airflow-core/docs/howto/deadline-alerts.rst b/airflow-core/docs/howto/deadline-alerts.rst index 64f39c0244050..e36908009a0f1 100644 --- a/airflow-core/docs/howto/deadline-alerts.rst +++ b/airflow-core/docs/howto/deadline-alerts.rst @@ -42,7 +42,7 @@ Creating a Deadline Alert Creating a Deadline Alert requires three mandatory parameters: * Reference: When to start counting from -* Interval: How far before or after the reference point to trigger the alert +* Interval: How far before or after the reference point to trigger the alert (either a timedelta or a dynamic interval such as VariableInterval) * Callback: A Callback object which contains a path to a callable and optional kwargs to pass to it if the deadline is exceeded Here is how Deadlines are calculated: diff --git a/airflow-core/docs/migrations-ref.rst b/airflow-core/docs/migrations-ref.rst index 82f32c8a2fdd9..32fbc76feb015 100644 --- a/airflow-core/docs/migrations-ref.rst +++ b/airflow-core/docs/migrations-ref.rst @@ -39,7 +39,9 @@ Here's the list of all the Database Migrations that are executed via when you ru +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | Revision ID | Revises ID | Airflow Version | Description | +=========================+==================+===================+==============================================================+ -| ``acc215baed80`` (head) | ``a1b2c3d4e5f6`` | ``3.3.0`` | Add team_name to trigger table. | +| ``8812eb67b63c`` (head) | ``acc215baed80`` | ``3.3.0`` | Change Deadline interval to JSON. | ++-------------------------+------------------+-------------------+--------------------------------------------------------------+ +| ``acc215baed80`` | ``a1b2c3d4e5f6`` | ``3.3.0`` | Add team_name to trigger table. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ | ``a1b2c3d4e5f6`` | ``a7f3b2c1d4e5`` | ``3.3.0`` | Add version_data to dag_version. | +-------------------------+------------------+-------------------+--------------------------------------------------------------+ diff --git a/airflow-core/newsfragments/64751.feature.rst b/airflow-core/newsfragments/64751.feature.rst new file mode 100644 index 0000000000000..41d647f143d2b --- /dev/null +++ b/airflow-core/newsfragments/64751.feature.rst @@ -0,0 +1 @@ +Allow DeadlineAlert intervals to be dynamically resolved at Deadline evaluation using objects such as VariableInterval. diff --git a/airflow-core/src/airflow/migrations/versions/0116_3_3_0_add_team_name_to_trigger_table.py b/airflow-core/src/airflow/migrations/versions/0116_3_3_0_add_team_name_to_trigger_table.py index 503f1b58d7c0e..e217cbeb45ce4 100644 --- a/airflow-core/src/airflow/migrations/versions/0116_3_3_0_add_team_name_to_trigger_table.py +++ b/airflow-core/src/airflow/migrations/versions/0116_3_3_0_add_team_name_to_trigger_table.py @@ -22,6 +22,7 @@ Revision ID: acc215baed80 Revises: a1b2c3d4e5f6 Create Date: 2026-05-21 21:38:00.122692 + """ from __future__ import annotations diff --git a/airflow-core/src/airflow/migrations/versions/0117_3_3_0_change_deadline_interval_to_json.py b/airflow-core/src/airflow/migrations/versions/0117_3_3_0_change_deadline_interval_to_json.py new file mode 100644 index 0000000000000..04e5a35aa3143 --- /dev/null +++ b/airflow-core/src/airflow/migrations/versions/0117_3_3_0_change_deadline_interval_to_json.py @@ -0,0 +1,305 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Change Deadline interval to JSON. + +Revision ID: 8812eb67b63c +Revises: acc215baed80 +Create Date: 2026-05-28 17:36:56.837243 + +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import context, op + +# revision identifiers, used by Alembic. +revision = "8812eb67b63c" +down_revision = "acc215baed80" +branch_labels = None +depends_on = None +airflow_version = "3.3.0" + + +def upgrade(): + """Apply change deadline interval to JSON.""" + conn = op.get_bind() + dialect = conn.dialect.name + + if context.is_offline_mode(): + print( + """ + Manual conversion required: + + PostgreSQL: + + Step 1: Convert column type. + ALTER TABLE deadline_alert + ALTER COLUMN interval TYPE JSONB + USING to_json(interval); + + Step 2: Convert values. + UPDATE deadline_alert + SET interval = json_build_object( + '__classname__', 'datetime.timedelta', + '__version__', 2, + '__data__', (interval::text)::float + ) + WHERE jsonb_typeof(interval::jsonb) = 'number'; + + MySQL: + + Step 1: Convert column type. + ALTER TABLE deadline_alert MODIFY COLUMN `interval` JSON; + + Step 2: Convert values + UPDATE deadline_alert + SET `interval` = JSON_OBJECT( + '__classname__', 'datetime.timedelta', + '__version__', 2, + '__data__', `interval` + ); + + SQLite: + + UPDATE deadline_alert + SET interval = + '{"__classname__":"datetime.timedelta","__version__":2,"__data__":' + || CAST(interval AS TEXT) || '}'; + """ + ) + return + + with op.batch_alter_table("deadline_alert") as batch_op: + if dialect == "postgresql": + batch_op.alter_column( + "interval", + existing_type=sa.FLOAT(), + type_=sa.JSON(), + postgresql_using="to_json(interval)", + existing_nullable=False, + ) + else: + batch_op.alter_column( + "interval", + existing_type=sa.FLOAT(), + type_=sa.JSON(), + existing_nullable=False, + ) + + if dialect == "postgresql": + op.execute(""" + UPDATE deadline_alert + SET interval = json_build_object( + '__classname__', 'datetime.timedelta', + '__version__', 2, + '__data__', (interval::text)::float + ) + WHERE jsonb_typeof(interval::jsonb) = 'number' + """) + + elif dialect == "mysql": + op.execute(""" + UPDATE deadline_alert + SET `interval` = JSON_OBJECT( + '__classname__', 'datetime.timedelta', + '__version__', 2, + '__data__', `interval` + ) + """) + + else: + op.execute(""" + UPDATE deadline_alert + SET interval = + '{"__classname__":"datetime.timedelta","__version__":' + || '2' || + ',"__data__":' || CAST(interval AS TEXT) || '}' + """) + + +def downgrade(): + """Revert deadline interval back to float.""" + conn = op.get_bind() + dialect = conn.dialect.name + + if context.is_offline_mode(): + print( + """ + Manual downgrade required: + + PostgreSQL: + + Step 1: Convert values. + UPDATE deadline_alert + SET interval = + CASE + WHEN jsonb_typeof(interval::jsonb) = 'number' + THEN interval + WHEN (interval::jsonb)->>'__classname__' = 'datetime.timedelta' + THEN to_json((interval->>'__data__')::double precision) + ELSE NULL + END; + + Step 2: Convert column type. + ALTER TABLE deadline_alert + ALTER COLUMN interval TYPE DOUBLE PRECISION + USING ( + CASE + WHEN jsonb_typeof(interval::jsonb) = 'number' + THEN interval::text::double precision + WHEN (interval::jsonb)->>'__classname__' = 'datetime.timedelta' + THEN (interval->>'__data__')::double precision + ELSE NULL + END + ); + + MySQL: + + Step 1: Convert values + UPDATE deadline_alert + SET `interval` = + CASE + WHEN JSON_EXTRACT(`interval`, '$.__data__') IS NOT NULL + THEN CAST(JSON_EXTRACT(`interval`, '$.__data__') AS DOUBLE) + WHEN JSON_EXTRACT(`interval`, '$.__classname__') IS NULL + THEN CAST(`interval` AS DOUBLE) + ELSE NULL + END; + + Step 2: Convert column type + ALTER TABLE deadline_alert + MODIFY COLUMN `interval` DOUBLE; + + SQLite: + + Step 1: Convert values + UPDATE deadline_alert + SET interval = + CASE + WHEN json_extract(interval, '$.__data__') IS NOT NULL + THEN CAST(json_extract(interval, '$.__data__') AS REAL) + WHEN json_extract(interval, '$.__classname__') IS NULL + THEN CAST(interval AS REAL) + ELSE NULL + END; + + Step 2: SQLite does not support ALTER COLUMN TYPE. + Recreate the table with interval as REAL and copy data. + """ + ) + return + + if dialect == "postgresql": + op.execute(""" + UPDATE deadline_alert + SET interval = + CASE + WHEN jsonb_typeof(interval::jsonb) = 'number' + THEN interval + WHEN (interval::jsonb)->>'__classname__' = 'datetime.timedelta' + THEN to_json((interval->>'__data__')::double precision) + ELSE NULL + END + """) + + elif dialect == "mysql": + op.execute(""" + UPDATE deadline_alert + SET `interval` = + CASE + WHEN JSON_EXTRACT(`interval`, '$.__data__') IS NOT NULL + THEN CAST(JSON_EXTRACT(`interval`, '$.__data__') AS DOUBLE) + WHEN JSON_EXTRACT(`interval`, '$.__classname__') IS NULL + THEN CAST(`interval` AS DOUBLE) + ELSE NULL + END + """) + + # Serialized VariableInterval objects do not contain a numeric "__data__" field + # and therefore cannot be converted back to a float representation. + # During downgrade, only timedelta-style serialized values are converted. + # Other serialized interval types (e.g. VariableInterval) will cast as null. + else: + # Detect availability of SQLite JSON functions (JSON1 extension). + json_functions_available = False + try: + conn.execute(sa.text("SELECT json_extract('{\"a\":1}', '$.a')")).fetchone() + json_functions_available = True + except Exception: + print("SQLite JSON functions not available, using string parsing as fallback.") + + if json_functions_available: + op.execute(""" + UPDATE deadline_alert + SET interval = + CASE + WHEN json_extract(interval, '$.__data__') IS NOT NULL + THEN CAST(json_extract(interval, '$.__data__') AS REAL) + WHEN json_extract(interval, '$.__classname__') IS NULL + THEN CAST(interval AS REAL) + ELSE NULL + END + """) + else: + # NOTE: This is a best-effort fallback for environments without JSON1. + # It assumes a stable JSON format and may not work for all serialized values. + op.execute(""" + UPDATE deadline_alert + SET interval = + CASE + WHEN instr(interval, '__data__') > 0 + THEN CAST( + substr( + interval, + instr(interval, '__data__') + + instr(substr(interval, instr(interval, '__data__')), ':') + ) AS FLOAT + ) + WHEN instr(interval, '__classname__') = 0 + THEN CAST(interval AS FLOAT) + ELSE NULL + END + """) + + with op.batch_alter_table("deadline_alert") as batch_op: + if dialect == "postgresql": + batch_op.alter_column( + "interval", + existing_type=sa.JSON(), + type_=sa.FLOAT(), + postgresql_using=""" + CASE + WHEN jsonb_typeof(interval::jsonb) = 'number' + THEN interval::text::double precision + WHEN (interval::jsonb)->>'__classname__' = 'datetime.timedelta' + THEN (interval->>'__data__')::double precision + ELSE NULL + END + """, + existing_nullable=False, + ) + else: + batch_op.alter_column( + "interval", + existing_type=sa.JSON(), + type_=sa.FLOAT(), + existing_nullable=False, + ) diff --git a/airflow-core/src/airflow/models/deadline_alert.py b/airflow-core/src/airflow/models/deadline_alert.py index 0b8a8eba9b1b1..d9b6590c0f7f1 100644 --- a/airflow-core/src/airflow/models/deadline_alert.py +++ b/airflow-core/src/airflow/models/deadline_alert.py @@ -21,7 +21,7 @@ from uuid import UUID import uuid6 -from sqlalchemy import JSON, Float, ForeignKey, String, Text, Uuid, select +from sqlalchemy import JSON, ForeignKey, String, Text, Uuid, select from sqlalchemy.exc import NoResultFound from sqlalchemy.orm import Mapped, mapped_column @@ -50,13 +50,22 @@ class DeadlineAlert(Base): name: Mapped[str | None] = mapped_column(String(250), nullable=True) description: Mapped[str | None] = mapped_column(Text, nullable=True) reference: Mapped[dict] = mapped_column(JSON, nullable=False) - interval: Mapped[float] = mapped_column(Float, nullable=False) + interval: Mapped[dict] = mapped_column(JSON, nullable=False) callback_def: Mapped[dict] = mapped_column(JSON, nullable=False) def __repr__(self): - interval_seconds = int(self.interval) - if interval_seconds >= 3600: + interval_seconds = None + + if isinstance(self.interval, (int, float)): + interval_seconds = int(self.interval) + + elif isinstance(self.interval, datetime.timedelta): + interval_seconds = int(self.interval.total_seconds()) + + if interval_seconds is None: + interval_display = "dynamic" + elif interval_seconds >= 3600: interval_display = f"{interval_seconds // 3600}h" elif interval_seconds >= 60: interval_display = f"{interval_seconds // 60}m" diff --git a/airflow-core/src/airflow/serialization/decoders.py b/airflow-core/src/airflow/serialization/decoders.py index 22683ef5d612b..b36b7a8a52499 100644 --- a/airflow-core/src/airflow/serialization/decoders.py +++ b/airflow-core/src/airflow/serialization/decoders.py @@ -156,6 +156,7 @@ def decode_deadline_alert(encoded_data: dict): :meta private: """ + from airflow.sdk.definitions.deadline import VariableInterval from airflow.sdk.serde import deserialize data = encoded_data.get(Encoding.VAR, encoded_data) @@ -163,9 +164,30 @@ def decode_deadline_alert(encoded_data: dict): reference_data = data[DeadlineAlertFields.REFERENCE] reference = decode_deadline_reference(reference_data) + raw_interval = data[DeadlineAlertFields.INTERVAL] + + if raw_interval is None: + raise ValueError( + "DeadlineAlert interval is missing. This can happen after downgrading " + "from a version that supports VariableInterval. Downgrade is not fully reversible." + ) + + interval: datetime.timedelta | VariableInterval + + # Backward compatibility: previously interval was stored as total_seconds() (float/int). + # Handle numeric values by converting to timedelta. + if isinstance(raw_interval, (int, float)): + interval = datetime.timedelta(seconds=raw_interval) + else: + deserialized = deserialize(raw_interval) + if isinstance(deserialized, (datetime.timedelta, VariableInterval)): + interval = deserialized + else: + raise TypeError(f"Invalid interval type: {type(deserialized).__name__}") + return SerializedDeadlineAlert( reference=reference, - interval=datetime.timedelta(seconds=data[DeadlineAlertFields.INTERVAL]), + interval=interval, callback=deserialize(data[DeadlineAlertFields.CALLBACK]), name=data.get(DeadlineAlertFields.NAME), ) diff --git a/airflow-core/src/airflow/serialization/definitions/dag.py b/airflow-core/src/airflow/serialization/definitions/dag.py index d20b9c22c039c..6fb7bf083cf76 100644 --- a/airflow-core/src/airflow/serialization/definitions/dag.py +++ b/airflow-core/src/airflow/serialization/definitions/dag.py @@ -41,6 +41,7 @@ from airflow.models.deadline_alert import DeadlineAlert as DeadlineAlertModel from airflow.models.taskinstancekey import TaskInstanceKey from airflow.models.tasklog import LogTemplate +from airflow.sdk.definitions.deadline import VariableInterval from airflow.serialization.decoders import decode_deadline_alert from airflow.serialization.definitions.deadline import DeadlineAlertFields, SerializedReferenceModels from airflow.serialization.definitions.param import SerializedParamsDict @@ -653,10 +654,15 @@ def _process_dagrun_deadline_alerts( } ) + interval = deserialized_deadline_alert.interval + + if isinstance(interval, VariableInterval): + interval = interval.resolve() + if isinstance(deserialized_deadline_alert.reference, SerializedReferenceModels.TYPES.DAGRUN): deadline_time = deserialized_deadline_alert.reference.evaluate_with( session=session, - interval=deserialized_deadline_alert.interval, + interval=interval, # TODO : Pretty sure we can drop these last two; verify after testing is complete dag_id=self.dag_id, run_id=orm_dagrun.run_id, diff --git a/airflow-core/src/airflow/serialization/definitions/deadline.py b/airflow-core/src/airflow/serialization/definitions/deadline.py index 58eaa46e6f721..89e231cba24dc 100644 --- a/airflow-core/src/airflow/serialization/definitions/deadline.py +++ b/airflow-core/src/airflow/serialization/definitions/deadline.py @@ -38,6 +38,8 @@ from sqlalchemy import ColumnElement from sqlalchemy.orm import Session + from airflow.sdk.definitions.deadline import VariableInterval + logger = logging.getLogger(__name__) @@ -366,6 +368,6 @@ class SerializedDeadlineAlert: """Serialized representation of a deadline alert.""" reference: SerializedReferenceModels.SerializedBaseDeadlineReference - interval: timedelta + interval: timedelta | VariableInterval callback: Any name: str | None = None diff --git a/airflow-core/src/airflow/serialization/encoders.py b/airflow-core/src/airflow/serialization/encoders.py index e97dcff26237c..b9caea4cc3722 100644 --- a/airflow-core/src/airflow/serialization/encoders.py +++ b/airflow-core/src/airflow/serialization/encoders.py @@ -226,7 +226,7 @@ def encode_deadline_alert(d: DeadlineAlert | SerializedDeadlineAlert) -> dict[st return { "name": d.name, "reference": encode_deadline_reference(d.reference), - "interval": d.interval.total_seconds(), + "interval": serialize(d.interval), "callback": serialize(d.callback), } diff --git a/airflow-core/src/airflow/utils/db.py b/airflow-core/src/airflow/utils/db.py index 00d512909dc5a..0d3bf5ef1aa32 100644 --- a/airflow-core/src/airflow/utils/db.py +++ b/airflow-core/src/airflow/utils/db.py @@ -116,7 +116,7 @@ class MappedClassProtocol(Protocol): "3.1.0": "cc92b33c6709", "3.1.8": "509b94a1042d", "3.2.0": "1d6611b6ab7c", - "3.3.0": "acc215baed80", + "3.3.0": "8812eb67b63c", } # Prefix used to identify tables holding data moved during migration. diff --git a/airflow-core/tests/unit/models/test_dag.py b/airflow-core/tests/unit/models/test_dag.py index f9751684e35cf..0a15c9ab09df5 100644 --- a/airflow-core/tests/unit/models/test_dag.py +++ b/airflow-core/tests/unit/models/test_dag.py @@ -2229,7 +2229,10 @@ def test_dag_with_multiple_deadlines(self, testing_dag_bundle, session): ).all() assert len(stored_alerts) == expected_num_deadlines - intervals = sorted([alert.interval for alert in stored_alerts]) + intervals = sorted( + alert.interval["__data__"] if isinstance(alert.interval, dict) else alert.interval + for alert in stored_alerts + ) assert intervals == [300.0, 600.0, 3600.0] # Now create a dagrun and verify deadlines are created diff --git a/airflow-core/tests/unit/models/test_dagrun.py b/airflow-core/tests/unit/models/test_dagrun.py index 5f555ba69d030..7ac2514cbf72d 100644 --- a/airflow-core/tests/unit/models/test_dagrun.py +++ b/airflow-core/tests/unit/models/test_dagrun.py @@ -61,7 +61,9 @@ from airflow.providers.standard.operators.python import PythonOperator, ShortCircuitOperator from airflow.sdk import DAG, BaseOperator, get_current_context, setup, task, task_group, teardown from airflow.sdk.definitions.callback import AsyncCallback -from airflow.sdk.definitions.deadline import DeadlineAlert, DeadlineReference +from airflow.sdk.definitions.deadline import DeadlineAlert, DeadlineReference, VariableInterval +from airflow.sdk.definitions.variable import Variable +from airflow.sdk.exceptions import AirflowRuntimeError from airflow.serialization.definitions.deadline import SerializedReferenceModels from airflow.serialization.serialized_objects import LazyDeserializedDAG from airflow.settings import get_policy_plugin_manager @@ -1326,17 +1328,28 @@ def test_dag_run_dag_versions_with_null_created_dag_version(self, dag_maker, ses assert isinstance(dag_run.dag_versions, list) assert len(dag_run.dag_versions) == 0 + @pytest.mark.parametrize( + "interval", + [ + datetime.timedelta(hours=1), + VariableInterval("my_key"), + ], + ) + @mock.patch.object(Variable, "get") @mock.patch.object(Deadline, "prune_deadlines") - def test_dagrun_success_deadline(self, _, session, deadline_test_dag): + def test_dagrun_success_deadline(self, _, mock_get, interval, session, deadline_test_dag): def on_success_callable(context): assert context["dag_run"].dag_id == "test_dag" future_date = datetime.datetime.now() + datetime.timedelta(days=365) + # First value used during resolution + mock_get.return_value = "5" + scheduler_dag = deadline_test_dag( deadline=DeadlineAlert( reference=DeadlineReference.FIXED_DATETIME(future_date), - interval=datetime.timedelta(hours=1), + interval=interval, callback=AsyncCallback(empty_callback_for_deadline), ), on_success_callback=on_success_callable, @@ -1441,6 +1454,73 @@ def test_dagrun_success_handles_empty_deadline_list(self, mock_prune, dag_maker, mock_prune.assert_not_called() assert dag_run.state == DagRunState.SUCCESS + @mock.patch.object(Variable, "get") + @mock.patch.object(Deadline, "prune_deadlines") + def test_dagrun_deadline_variable_interval_stable(self, _, mock_get, session, deadline_test_dag): + future_date = datetime.datetime.now() + datetime.timedelta(days=365) + + # First value used during resolution. + mock_get.return_value = "60" + + scheduler_dag = deadline_test_dag( + deadline=DeadlineAlert( + reference=DeadlineReference.FIXED_DATETIME(future_date), + interval=VariableInterval("my_key"), + callback=AsyncCallback(empty_callback_for_deadline), + ), + ) + + dag_run = self.create_dag_run( + dag=scheduler_dag, + task_states={"task_1": TaskInstanceState.SUCCESS, "task_2": TaskInstanceState.SUCCESS}, + session=session, + ) + dag_run.dag = scheduler_dag + + # First update resolve interval to "5". + dag_run.update_state(session=session) + + deadline = session.execute(select(Deadline)).scalars().one_or_none() + first_deadline_time = deadline.deadline_time + + # Change Variable value after resolution. + mock_get.return_value = "120" + + # Run again (This should not change existing deadline). + dag_run.update_state(session=session) + + deadline = session.execute(select(Deadline)).scalars().one_or_none() + assert deadline.deadline_time == first_deadline_time + + @mock.patch.object(Deadline, "prune_deadlines") + def test_dagrun_deadline_variable_interval_missing_variable_fails(self, _, session, deadline_test_dag): + + mock_err = mock.Mock() + mock_err.error.value = "MISSING_DEADLINE" + mock_err.detail = "missing deadline" + + with mock.patch.object( + Variable, + "get", + side_effect=AirflowRuntimeError(mock_err), + ): + future_date = datetime.datetime.now() + datetime.timedelta(days=365) + + scheduler_dag = deadline_test_dag( + deadline=DeadlineAlert( + reference=DeadlineReference.FIXED_DATETIME(future_date), + interval=VariableInterval("missing_key"), + callback=AsyncCallback(empty_callback_for_deadline), + ), + ) + + with pytest.raises(ValueError, match="not found"): + self.create_dag_run( + dag=scheduler_dag, + task_states={"task_1": TaskInstanceState.SUCCESS}, + session=session, + ) + @pytest.mark.parametrize( ("run_type", "expected_tis"), diff --git a/airflow-core/tests/unit/models/test_serialized_dag.py b/airflow-core/tests/unit/models/test_serialized_dag.py index 54438f8e82fc9..765b2adf206ae 100644 --- a/airflow-core/tests/unit/models/test_serialized_dag.py +++ b/airflow-core/tests/unit/models/test_serialized_dag.py @@ -849,7 +849,7 @@ def test_deadline_interval_change_triggers_new_serdag(self, testing_dag_bundle, # There should be a second serdag with a new hash and the new interval. assert new_serdag_count == 2 assert new_serdag.dag_hash != orig_serdag.dag_hash - assert new_alert.interval == 600.0 + assert new_alert.interval["__data__"] == 600.0 def test_deadline_name_change_updates_db_and_returns_true(self, testing_dag_bundle, session): """Name-only deadline change: UUID reused, DB row updated, write_dag returns True.""" diff --git a/airflow-core/tests/unit/serialization/test_serialized_objects.py b/airflow-core/tests/unit/serialization/test_serialized_objects.py index 1767209ca7350..6d558628e7431 100644 --- a/airflow-core/tests/unit/serialization/test_serialized_objects.py +++ b/airflow-core/tests/unit/serialization/test_serialized_objects.py @@ -513,6 +513,22 @@ def test_serialize_deserialize_deadline_alert(reference): assert deserialized.callback == original.callback +def test_deserialize_deadline_alert_none_interval_raises(): + valid = DeadlineAlert( + reference=DeadlineReference.DAGRUN_QUEUED_AT, + interval=timedelta(hours=1), + callback=AsyncCallback(TEST_CALLBACK_PATH, kwargs=TEST_CALLBACK_KWARGS), + ) + + serialized = BaseSerialization.serialize(valid) + + # Inject downgrade corruption. + serialized[Encoding.VAR][DeadlineAlertFields.INTERVAL] = None + + with pytest.raises(ValueError, match="interval"): + BaseSerialization.deserialize(serialized) + + @pytest.mark.parametrize( "conn_uri", [ diff --git a/task-sdk/src/airflow/sdk/definitions/deadline.py b/task-sdk/src/airflow/sdk/definitions/deadline.py index 2ab20d056b9d5..a9da3dd3d3ea2 100644 --- a/task-sdk/src/airflow/sdk/definitions/deadline.py +++ b/task-sdk/src/airflow/sdk/definitions/deadline.py @@ -22,7 +22,11 @@ from datetime import datetime, timedelta from typing import TYPE_CHECKING, Any +import attrs + from airflow.sdk.definitions.callback import AsyncCallback, Callback, SyncCallback +from airflow.sdk.definitions.variable import Variable +from airflow.sdk.exceptions import AirflowRuntimeError if TYPE_CHECKING: from collections.abc import Callable @@ -143,7 +147,7 @@ class DeadlineAlert: def __init__( self, reference: DeadlineReferenceType, - interval: timedelta, + interval: timedelta | VariableInterval, callback: Callback, name: str | None = None, ): @@ -342,3 +346,58 @@ def decorator( return reference_class return decorator + + +@attrs.define(frozen=True) +class VariableInterval: + """ + Interval backed by an Airflow Variable. + + This allows DeadlineAlert intervals to be configured dynamically using + Airflow Variables. The variable value is interpreted as seconds and + converted into a ``timedelta`` object. + + ------ + Usage: + ------ + + .. code-block:: python + + from airflow.sdk import DAG, DeadlineAlert, DeadlineReference, AsyncCallback + + DAG( + dag_id="dag_with_variable_interval", + deadline=DeadlineAlert( + reference=DeadlineReference.DAGRUN_QUEUED_AT, + interval=VariableInterval("deadline_seconds"), + callback=AsyncCallback(my_callback), + ), + ) + + ------ + Notes: + ------ + * Resolution occurs when deadlines are evaluated (during DagRun creation). + * Changes to the Variable affect only newly parsed DAGs and future DagRuns. + * Existing deadlines are not retroactively updated. + """ + + key: str + + def resolve(self) -> timedelta: + try: + value = Variable.get(self.key) + except AirflowRuntimeError as e: + raise ValueError(f"VariableInterval '{self.key}' not found") from e + + try: + seconds = int(value) + except (TypeError, ValueError) as e: + raise ValueError( + f"VariableInterval '{self.key}' must be an integer (seconds), got: {value!r}" + ) from e + + if seconds <= 0: + raise ValueError(f"VariableInterval '{self.key}' must be > 0, got: {seconds}") + + return timedelta(seconds=seconds) diff --git a/task-sdk/tests/task_sdk/definitions/test_deadline.py b/task-sdk/tests/task_sdk/definitions/test_deadline.py index 8e9e816b30705..b104980e4c986 100644 --- a/task-sdk/tests/task_sdk/definitions/test_deadline.py +++ b/task-sdk/tests/task_sdk/definitions/test_deadline.py @@ -17,12 +17,15 @@ from __future__ import annotations from datetime import datetime, timedelta +from unittest import mock import pytest from task_sdk.definitions.test_callback import TEST_CALLBACK_KWARGS, TEST_CALLBACK_PATH, UNIMPORTABLE_DOT_PATH from airflow.sdk.definitions.callback import AsyncCallback, SyncCallback -from airflow.sdk.definitions.deadline import DeadlineAlert, DeadlineReference +from airflow.sdk.definitions.deadline import DeadlineAlert, DeadlineReference, VariableInterval +from airflow.sdk.definitions.variable import Variable +from airflow.sdk.exceptions import AirflowRuntimeError DAG_ID = "dag_id_1" RUN_ID = 1 @@ -162,3 +165,50 @@ def test_deadline_alert_rejects_invalid_callback(self): interval=timedelta(hours=1), callback="not_a_callback", # type: ignore ) + + +class TestVariableInterval: + @pytest.mark.parametrize( + ("value", "expected"), + [ + ("3", timedelta(seconds=3)), + ("10", timedelta(seconds=10)), + ("05", timedelta(seconds=5)), # leading zero + ], + ) + def test_resolve_valid(self, mocker, value, expected): + mocker.patch.object(Variable, "get", return_value=value) + + interval = VariableInterval(key="test_interval") + + assert interval.resolve() == expected + + @pytest.mark.parametrize( + ("value", "raise_runtime", "match"), + [ + (None, True, "not found"), + ("abc", False, "must be an integer"), + ("", False, "must be an integer"), + ("0", False, "must be > 0"), + ("-5", False, "must be > 0"), + ], + ) + def test_resolve_invalid(self, mocker, value, raise_runtime, match): + + if raise_runtime: + mock_err = mock.Mock() + mock_err.error.value = "MISSING" + mock_err.detail = "missing" + + mocker.patch.object( + Variable, + "get", + side_effect=AirflowRuntimeError(mock_err), + ) + else: + mocker.patch.object(Variable, "get", return_value=value) + + interval = VariableInterval(key="test_interval") + + with pytest.raises(ValueError, match=match): + interval.resolve() From 032708d63041e568af78e9f125ea8ad5b1c2acc0 Mon Sep 17 00:00:00 2001 From: Vikram Koka Date: Thu, 28 May 2026 13:59:04 -0700 Subject: [PATCH 28/28] Add AIP progress tracker example DAG for common.ai provider Demonstrates Dynamic Task Mapping, structured LLM output (Pydantic), cost-controlled synthesis (UsageLimits), and HITL approval using LLMOperator with live data from the Apache Confluence wiki and GitHub. The DAG accepts an aip_numbers param (default: 76,99,103,105,108) to choose which AIPs to investigate. For each AIP it fetches the spec via Confluence CQL search and gathers PR/commit evidence from the GitHub Search API, then fans out structured LLM analysis via DTM and synthesizes a cross-AIP progress report for maintainer review. --- .../example_aip_progress_tracker.py | 344 ++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py diff --git a/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py b/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py new file mode 100644 index 0000000000000..dbd16338a15f5 --- /dev/null +++ b/providers/common/ai/src/airflow/providers/common/ai/example_dags/example_aip_progress_tracker.py @@ -0,0 +1,344 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +AIP progress tracker -- multi-source data fusion with common.ai operators. + +Demonstrates Dynamic Task Mapping, structured LLM output, cost-controlled +synthesis, and HITL approval using only ``LLMOperator`` -- no LlamaIndex or +LangChain dependency required. + +For each active Airflow Improvement Proposal the Dag gathers evidence from +two sources (Confluence spec text, GitHub PRs and commits), asks an LLM to +assess spec-vs-implementation progress, then synthesizes a cross-AIP report +for maintainer review. + +``example_aip_progress_tracker`` (manual trigger): + +.. code-block:: text + + fetch_aip_list (@task) + → gather_aip_evidence (@task, mapped ×N AIPs) + → format_analysis_prompt (@task, mapped ×N) + → analyze_aip (LLMOperator, mapped ×N) + → collect_analyses (@task) + → synthesize_report (LLMOperator, with UsageLimits) + → review_report (ApprovalOperator) + +**What this makes visible that a notebook hides:** + +* Each AIP investigation is a named, logged task instance with its own + retry behaviour -- not a loop iteration buried inside one cell. +* If the GitHub API is rate-limited for one AIP, only that mapped + instance retries; the others preserve their XCom results. +* The synthesis step's inputs and token budget are fully auditable. +* A maintainer reviews the report before it goes to the dev list. + +Before running: + +1. Create an LLM connection named ``pydanticai_default`` (or the value of + ``LLM_CONN_ID``) for your chosen model provider. +2. Trigger the DAG with the default ``aip_numbers`` param or edit it to + choose which AIPs to investigate. +""" + +from __future__ import annotations + +import json +import re +import urllib.parse +import urllib.request +from datetime import timedelta + +from pydantic import BaseModel +from pydantic_ai.usage import UsageLimits + +from airflow.providers.common.ai.operators.llm import LLMOperator +from airflow.providers.common.compat.sdk import dag, task +from airflow.providers.standard.operators.hitl import ApprovalOperator +from airflow.sdk import Param + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +LLM_CONN_ID = "pydanticai_default" + +# Confluence wiki -- public REST API, no auth required. +CONFLUENCE_BASE_URL = "https://cwiki.apache.org/confluence" +AIP_LISTING_PAGE_ID = "89066602" # ancestor filter for CQL queries +GITHUB_REPO = "apache/airflow" +DEFAULT_AIP_NUMBERS = "76,99,103,105,108" + +# --------------------------------------------------------------------------- +# Structured output model -- enforces a schema on the per-AIP LLM response +# --------------------------------------------------------------------------- + +# [START aip_tracker_structured_output] + + +class AIPStatus(BaseModel): + """Per-AIP analysis produced by the LLM.""" + + aip_number: int + title: str + spec_summary: str + implementation_status: str + key_prs: list[str] + blockers: list[str] + next_steps: list[str] + completion_pct: int + + +# [END aip_tracker_structured_output] + +# --------------------------------------------------------------------------- +# HTTP helpers +# --------------------------------------------------------------------------- + + +def _confluence_rest_get(path: str) -> dict: + """GET a Confluence REST API endpoint (public, no auth required).""" + url = f"{CONFLUENCE_BASE_URL}{path}" + req = urllib.request.Request(url, headers={"Accept": "application/json"}) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read()) + + +def _github_api_get(path: str) -> dict: + """GET a GitHub REST API endpoint (public, rate-limited to 10 req/min).""" + url = f"https://api.github.com{path}" + req = urllib.request.Request(url, headers={"Accept": "application/vnd.github.v3+json"}) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read()) + + +def _strip_html_tags(html: str) -> str: + """Remove HTML/Confluence markup, returning plain text.""" + text = re.sub(r"<[^>]+>", " ", html) + return re.sub(r"\s+", " ", text).strip() + + +# --------------------------------------------------------------------------- +# System prompts +# --------------------------------------------------------------------------- + +ANALYSIS_SYSTEM_PROMPT = """\ +You are an Airflow project analyst. Given an AIP specification and its \ +GitHub evidence (pull requests and commits), produce a structured status \ +assessment. + +Be specific about what has been implemented versus what remains. Rate \ +completion percentage based on the ratio of spec goals that have \ +corresponding PRs or commits.""" + +SYNTHESIS_SYSTEM_PROMPT = """\ +You are an Airflow release coordinator. Given individual AIP status \ +assessments, produce a concise cross-AIP progress report. + +Identify the top priorities, shared blockers across AIPs, and recommend \ +where maintainer attention is most needed. Keep the report actionable \ +and under 500 words.""" + + +# --------------------------------------------------------------------------- +# DAG +# --------------------------------------------------------------------------- + + +# [START example_aip_progress_tracker] +@dag( + schedule=None, + catchup=False, + params={ + "aip_numbers": Param( + DEFAULT_AIP_NUMBERS, + type="string", + description="Comma-separated AIP numbers to investigate (e.g. 76,99,103,105,108)", + ), + }, + tags=["example", "aip_tracker", "common_ai"], +) +def example_aip_progress_tracker(): + """ + Track AIP progress by analysing Confluence specs against GitHub evidence. + + Task graph:: + + fetch_aip_list (@task) + → gather_aip_evidence (@task ×N, via Dynamic Task Mapping) + → format_analysis_prompt (@task ×N) + → analyze_aip (LLMOperator ×N, structured output) + → collect_analyses (@task) + → synthesize_report (LLMOperator, with UsageLimits) + → review_report (ApprovalOperator) + """ + + # ------------------------------------------------------------------ + # Step 1: Fetch the list of active AIPs to investigate. + # The length of this list determines how many mapped instances are + # created in the downstream steps -- N is decided at runtime. + # ------------------------------------------------------------------ + @task + def fetch_aip_list(params: dict) -> list[dict]: + aip_numbers = [int(n.strip()) for n in params["aip_numbers"].split(",") if n.strip()] + aips = [] + for num in aip_numbers: + cql = urllib.parse.quote( + f'space="AIRFLOW" AND title~"AIP-{num}" AND ancestor={AIP_LISTING_PAGE_ID}' + ) + results = _confluence_rest_get(f"/rest/api/content/search?cql={cql}&limit=1") + if results.get("results"): + title = results["results"][0]["title"] + else: + title = f"AIP-{num}" + aips.append({"aip_number": num, "title": title}) + return aips + + aip_list = fetch_aip_list() + + # ------------------------------------------------------------------ + # Step 2: Gather evidence for each AIP from multiple sources. + # Each mapped instance fetches one AIP's spec text from the + # Confluence wiki (cwiki.apache.org) and searches GitHub for + # related PRs and commits. If the GitHub API is rate-limited + # for one AIP, only that instance retries. + # ------------------------------------------------------------------ + @task + def gather_aip_evidence(aip: dict) -> dict: + aip_number = aip["aip_number"] + cql = urllib.parse.quote( + f'space="AIRFLOW" AND title~"AIP-{aip_number}" AND ancestor={AIP_LISTING_PAGE_ID}' + ) + results = _confluence_rest_get(f"/rest/api/content/search?cql={cql}&expand=body.view&limit=1") + spec_text = "" + if results.get("results"): + raw_html = results["results"][0]["body"]["view"]["value"] + spec_text = _strip_html_tags(raw_html)[:3000] + pr_query = urllib.parse.quote(f"AIP-{aip_number} repo:{GITHUB_REPO} is:pr") + pr_data = _github_api_get(f"/search/issues?q={pr_query}&per_page=10") + prs = [f"#{it['number']} -- {it['title']}" for it in pr_data.get("items", [])] + commit_query = urllib.parse.quote(f"AIP-{aip_number} repo:{GITHUB_REPO}") + commit_data = _github_api_get(f"/search/commits?q={commit_query}&per_page=10") + commits = [it["commit"]["message"].split("\n")[0] for it in commit_data.get("items", [])] + return { + "aip_number": aip_number, + "title": aip["title"], + "spec_text": spec_text, + "prs": prs, + "commits": commits, + } + + evidence = gather_aip_evidence.expand(aip=aip_list) + + # ------------------------------------------------------------------ + # Step 3: Format the gathered evidence into an LLM analysis prompt. + # Separating formatting from data gathering keeps each task focused + # and makes prompt iteration independent of API logic. + # ------------------------------------------------------------------ + @task + def format_analysis_prompt(evidence: dict) -> str: + prs_text = "\n".join(f" - {pr}" for pr in evidence["prs"]) + commits_text = "\n".join(f" - {c}" for c in evidence["commits"]) + return ( + f"Analyze AIP-{evidence['aip_number']}: {evidence['title']}\n\n" + f"Specification:\n{evidence['spec_text']}\n\n" + f"Pull Requests:\n{prs_text}\n\n" + f"Recent Commits:\n{commits_text}" + ) + + prompts = format_analysis_prompt.expand(evidence=evidence) + + # ------------------------------------------------------------------ + # Step 4: Analyze each AIP with a structured LLM call. + # Dynamic Task Mapping creates one LLMOperator instance per AIP. + # output_type=AIPStatus enforces the Pydantic schema on the response. + # ------------------------------------------------------------------ + # [START aip_tracker_dtm_analysis] + analyses = LLMOperator.partial( + task_id="analyze_aip", + llm_conn_id=LLM_CONN_ID, + system_prompt=ANALYSIS_SYSTEM_PROMPT, + output_type=AIPStatus, + ).expand(prompt=prompts) + # [END aip_tracker_dtm_analysis] + + # ------------------------------------------------------------------ + # Step 5: Collect all per-AIP analyses into a single context string + # for the synthesis step. + # ------------------------------------------------------------------ + @task + def collect_analyses(analyses: list) -> str: + sections = [] + for raw in analyses: + a = json.loads(raw) if isinstance(raw, str) else raw + blockers = ", ".join(a["blockers"]) if a["blockers"] else "None identified" + next_steps = ", ".join(a["next_steps"]) if a["next_steps"] else "N/A" + sections.append( + f"## AIP-{a['aip_number']}: {a['title']}\n" + f"Status: {a['implementation_status']} " + f"({a['completion_pct']}% complete)\n" + f"Summary: {a['spec_summary']}\n" + f"Key PRs: {', '.join(a['key_prs'])}\n" + f"Blockers: {blockers}\n" + f"Next steps: {next_steps}" + ) + return "\n\n".join(sections) + + collected = collect_analyses(analyses.output) + + # ------------------------------------------------------------------ + # Step 6: Synthesize a cross-AIP progress report. + # UsageLimits caps the token spend so a runaway prompt cannot + # exhaust the API budget in a single Dag run. + # ------------------------------------------------------------------ + # [START aip_tracker_synthesis] + synthesize = LLMOperator( + task_id="synthesize_report", + llm_conn_id=LLM_CONN_ID, + system_prompt=SYNTHESIS_SYSTEM_PROMPT, + prompt="""\ +Create a cross-AIP progress report from these individual assessments. +Prioritize AIPs that are close to completion or have shared blockers. + +{{ ti.xcom_pull(task_ids='collect_analyses') }}""", + usage_limits=UsageLimits( + request_limit=5, + input_tokens_limit=20_000, + output_tokens_limit=4_000, + ), + ) + # [END aip_tracker_synthesis] + collected >> synthesize + + # ------------------------------------------------------------------ + # Step 7: A maintainer reviews the synthesized report before it is + # shared on the dev list. The Dag pauses here until the human + # approves, requests changes, or the timeout expires. + # ------------------------------------------------------------------ + # [START aip_tracker_hitl] + ApprovalOperator( + task_id="review_report", + subject="Review AIP Progress Report before sharing", + body=synthesize.output, + response_timeout=timedelta(hours=24), + ) + # [END aip_tracker_hitl] + + +# [END example_aip_progress_tracker] + +example_aip_progress_tracker()