diff --git a/paimon-python/pypaimon/compact/__init__.py b/paimon-python/pypaimon/compact/__init__.py new file mode 100644 index 000000000000..65b48d4d79b4 --- /dev/null +++ b/paimon-python/pypaimon/compact/__init__.py @@ -0,0 +1,17 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ diff --git a/paimon-python/pypaimon/compact/coordinator/__init__.py b/paimon-python/pypaimon/compact/coordinator/__init__.py new file mode 100644 index 000000000000..65b48d4d79b4 --- /dev/null +++ b/paimon-python/pypaimon/compact/coordinator/__init__.py @@ -0,0 +1,17 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ diff --git a/paimon-python/pypaimon/compact/coordinator/append_compact_coordinator.py b/paimon-python/pypaimon/compact/coordinator/append_compact_coordinator.py new file mode 100644 index 000000000000..0a9193dedca6 --- /dev/null +++ b/paimon-python/pypaimon/compact/coordinator/append_compact_coordinator.py @@ -0,0 +1,164 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from collections import defaultdict +from typing import Dict, List, Optional, Tuple + +from pypaimon.common.predicate import Predicate +from pypaimon.compact.coordinator.coordinator import CompactCoordinator +from pypaimon.compact.options import CompactOptions +from pypaimon.compact.task.append_compact_task import AppendCompactTask +from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.read.scanner.file_scanner import FileScanner + + +class AppendCompactCoordinator(CompactCoordinator): + """Plan compaction tasks for append-only tables (HASH_FIXED or BUCKET_UNAWARE). + + Per (partition, bucket) we filter to files smaller than the table's + target_file_size and bin-pack them into compaction tasks using the same + algorithm as Java AppendCompactCoordinator.SubCoordinator.pack: + + 1. Sort candidates by file size ascending (smaller files lead, so the + packer has the most flexibility to grow a bin without immediately + overshooting). + 2. Walk the sorted list adding each file to the current bin, accruing + (file_size + open_file_cost) — open_file_cost mirrors Java's per-file + IO weight so a bin of many tiny files drains earlier than naive + size accounting would suggest. + 3. Drain a bin as soon as it has >1 file AND its weighted size hits + target_file_size * 2. The ×2 is Java's hardcoded constant: each + task should produce roughly two target-sized output files, which + amortizes task setup cost while keeping output sizes predictable. + 4. The trailing bin is emitted only if it has at least min_file_num + files; smaller tails are dropped to avoid spending an entire task + on a couple of files that will collect company on the next plan. + + full_compaction=True relaxes both the size filter (large files also + enter packing) and the trailing-bin threshold (any non-empty tail + is emitted), matching the user-level intent of "rewrite this bucket + regardless of current shape". + """ + + def __init__( + self, + table, + compact_options: Optional[CompactOptions] = None, + partition_predicate: Optional[Predicate] = None, + ): + if table.is_primary_key_table: + raise ValueError( + "AppendCompactCoordinator only handles append-only tables; " + "use the merge-tree coordinator for primary-key tables." + ) + self.table = table + self.options = compact_options or CompactOptions() + self.partition_predicate = partition_predicate + + def plan(self) -> List[AppendCompactTask]: + manifest_entries = self._scan_live_files() + if not manifest_entries: + return [] + + # Reduce the manifest entry stream to (partition, bucket) → live files. + # We trust the manifest scanner to have already merged ADD/DELETE + # entries; whatever survives here is currently in the snapshot. + bucket_files: Dict[Tuple[Tuple, int], List[DataFileMeta]] = defaultdict(list) + for entry in manifest_entries: + key = (tuple(entry.partition.values), entry.bucket) + bucket_files[key].append(entry.file) + + target_file_size = self.table.options.target_file_size(False) + open_file_cost = self.table.options.source_split_open_file_cost() + + tasks: List[AppendCompactTask] = [] + for (partition, bucket), files in bucket_files.items(): + for chunk in self._pick_files_for_bucket(files, target_file_size, open_file_cost): + tasks.append( + AppendCompactTask( + partition=partition, + bucket=bucket, + files=chunk, + table=self.table, + ) + ) + return tasks + + def _pick_files_for_bucket( + self, + files: List[DataFileMeta], + target_file_size: int, + open_file_cost: int, + ) -> List[List[DataFileMeta]]: + """Bin-pack one bucket's files into compaction tasks. + + Mirrors org.apache.paimon.append.AppendCompactCoordinator + .SubCoordinator.pack — see class docstring for the reasoning behind + the size-based packing, the open_file_cost weight, and the + target_file_size * 2 drain threshold. + """ + if self.options.full_compaction: + candidates = list(files) + else: + # Files already at or above target size aren't worth rewriting — + # the output would be near-identical and we'd burn IO for it. + candidates = [f for f in files if f.file_size < target_file_size] + + if not candidates: + return [] + + candidates.sort(key=lambda f: f.file_size) + + chunks: List[List[DataFileMeta]] = [] + bin_files: List[DataFileMeta] = [] + bin_size = 0 + drain_threshold = target_file_size * 2 + for f in candidates: + bin_files.append(f) + bin_size += f.file_size + open_file_cost + if len(bin_files) > 1 and bin_size >= drain_threshold: + chunks.append(bin_files) + bin_files = [] + bin_size = 0 + + # Trailing bin: under full_compaction any non-empty tail ships; + # otherwise we require min_file_num files so a tiny tail waits for + # company on the next plan instead of paying task overhead now. + min_tail = 1 if self.options.full_compaction else self.options.min_file_num + if len(bin_files) >= min_tail: + chunks.append(bin_files) + return chunks + + def _scan_live_files(self): + """Read manifest entries from the latest snapshot, applying partition filter.""" + snapshot = self.table.snapshot_manager().get_latest_snapshot() + if snapshot is None: + return [] + + from pypaimon.manifest.manifest_list_manager import ManifestListManager + manifest_list_manager = ManifestListManager(self.table) + + def manifest_scanner(): + return manifest_list_manager.read_all(snapshot), snapshot + + scanner = FileScanner( + self.table, + manifest_scanner, + partition_predicate=self.partition_predicate, + ) + return scanner.plan_files() diff --git a/paimon-python/pypaimon/compact/coordinator/coordinator.py b/paimon-python/pypaimon/compact/coordinator/coordinator.py new file mode 100644 index 000000000000..478ca1f8a470 --- /dev/null +++ b/paimon-python/pypaimon/compact/coordinator/coordinator.py @@ -0,0 +1,40 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from abc import ABC, abstractmethod +from typing import List + +from pypaimon.compact.task.compact_task import CompactTask + + +class CompactCoordinator(ABC): + """Driver-side planner that turns a snapshot into a list of CompactTask. + + The coordinator runs once per CompactJob, in the driver, and **does not** + rewrite data itself. Splitting plan() from task.run() lets us pin the + snapshot scan to a single process (no concurrent manifest re-reads) while + still letting the executor distribute the resulting tasks however it likes. + """ + + @abstractmethod + def plan(self) -> List[CompactTask]: + """Return a possibly-empty list of compact tasks for the current snapshot. + + An empty list means there is nothing worth compacting at this moment; + the job should commit nothing rather than produce an empty snapshot. + """ diff --git a/paimon-python/pypaimon/compact/coordinator/merge_tree_compact_coordinator.py b/paimon-python/pypaimon/compact/coordinator/merge_tree_compact_coordinator.py new file mode 100644 index 000000000000..b1397f640a71 --- /dev/null +++ b/paimon-python/pypaimon/compact/coordinator/merge_tree_compact_coordinator.py @@ -0,0 +1,153 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Driver-side planner for primary-key (merge-tree) compaction.""" + +from collections import defaultdict +from typing import Dict, List, Optional, Tuple + +from pypaimon.common.predicate import Predicate +from pypaimon.compact.coordinator.coordinator import CompactCoordinator +from pypaimon.compact.levels import Levels +from pypaimon.compact.options import CompactOptions +from pypaimon.compact.strategy.strategy import (CompactStrategy, + pick_full_compaction) +from pypaimon.compact.strategy.universal_compaction import UniversalCompaction +from pypaimon.compact.task.merge_tree_compact_task import MergeTreeCompactTask +from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.read.reader.sort_merge_reader import builtin_key_comparator +from pypaimon.read.scanner.file_scanner import FileScanner + +DEFAULT_NUM_LEVELS = 5 + + +class MergeTreeCompactCoordinator(CompactCoordinator): + """Plan one MergeTreeCompactTask per (partition, bucket) that the strategy says to compact. + + The coordinator owns the Levels object for a bucket and asks the + strategy.pick(...) which runs to combine. full_compaction=True bypasses + the strategy entirely and picks every file in every bucket. + """ + + def __init__( + self, + table, + compact_options: Optional[CompactOptions] = None, + partition_predicate: Optional[Predicate] = None, + strategy: Optional[CompactStrategy] = None, + ): + if not table.is_primary_key_table: + raise ValueError( + "MergeTreeCompactCoordinator only handles primary-key tables; " + "use AppendCompactCoordinator for append-only tables." + ) + self.table = table + self.options = compact_options or CompactOptions() + self.partition_predicate = partition_predicate + self.num_levels = self._resolve_num_levels() + self.strategy = strategy or self._default_strategy() + self.key_comparator = builtin_key_comparator(self.table.trimmed_primary_keys_fields) + + def plan(self) -> List[MergeTreeCompactTask]: + manifest_entries = self._scan_live_files() + if not manifest_entries: + return [] + + bucket_files: Dict[Tuple[Tuple, int], List[DataFileMeta]] = defaultdict(list) + for entry in manifest_entries: + key = (tuple(entry.partition.values), entry.bucket) + bucket_files[key].append(entry.file) + + tasks: List[MergeTreeCompactTask] = [] + for (partition, bucket), files in bucket_files.items(): + levels = Levels(self.key_comparator, files, self.num_levels) + unit = self._pick_unit(levels) + if unit is None: + continue + drop_delete = self._should_drop_delete(unit, levels) + tasks.append( + MergeTreeCompactTask( + partition=partition, + bucket=bucket, + files=unit.files, + output_level=unit.output_level, + drop_delete=drop_delete, + table=self.table, + ) + ) + return tasks + + # ---- internals --------------------------------------------------------- + + def _pick_unit(self, levels: Levels): + runs = levels.level_sorted_runs() + if self.options.full_compaction: + return pick_full_compaction(levels.number_of_levels(), runs) + return self.strategy.pick(levels.number_of_levels(), runs) + + def _should_drop_delete(self, unit, levels: Levels) -> bool: + # Mirrors MergeTreeCompactManager.triggerCompaction's dropDelete rule: + # we may drop retract rows only when nothing older could need them, i.e. + # we are writing to a level >= the highest non-empty level (and never + # to L0, which by definition can have older data above and below). + if unit.output_level == 0: + return False + return unit.output_level >= levels.non_empty_highest_level() + + def _resolve_num_levels(self) -> int: + # Java reads num-levels off CoreOptions; pypaimon's CoreOptions doesn't + # surface it as a typed accessor yet, so we read the raw map and fall + # back to Java's CoreOptions.NUM_LEVELS default (5). Any input file + # already at a higher level wins during Levels construction anyway. + raw = self._raw_options_map().get("num-levels") + return int(raw) if raw is not None else DEFAULT_NUM_LEVELS + + def _default_strategy(self) -> CompactStrategy: + raw = self._raw_options_map() + max_size_amp = int(raw.get("compaction.max-size-amplification-percent") or 200) + size_ratio = int(raw.get("compaction.size-ratio") or 1) + trigger = int(raw.get("num-sorted-run.compaction-trigger") or 5) + return UniversalCompaction( + max_size_amp=max_size_amp, + size_ratio=size_ratio, + num_run_compaction_trigger=trigger, + ) + + def _raw_options_map(self) -> dict: + opts = self.table.options.options + if hasattr(opts, "to_map"): + return opts.to_map() + return dict(opts) if opts else {} + + def _scan_live_files(self): + snapshot = self.table.snapshot_manager().get_latest_snapshot() + if snapshot is None: + return [] + + from pypaimon.manifest.manifest_list_manager import ManifestListManager + manifest_list_manager = ManifestListManager(self.table) + + def manifest_scanner(): + return manifest_list_manager.read_all(snapshot), snapshot + + scanner = FileScanner( + self.table, + manifest_scanner, + partition_predicate=self.partition_predicate, + ) + return scanner.plan_files() diff --git a/paimon-python/pypaimon/compact/executor/__init__.py b/paimon-python/pypaimon/compact/executor/__init__.py new file mode 100644 index 000000000000..65b48d4d79b4 --- /dev/null +++ b/paimon-python/pypaimon/compact/executor/__init__.py @@ -0,0 +1,17 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ diff --git a/paimon-python/pypaimon/compact/executor/executor.py b/paimon-python/pypaimon/compact/executor/executor.py new file mode 100644 index 000000000000..4fc988ff01b9 --- /dev/null +++ b/paimon-python/pypaimon/compact/executor/executor.py @@ -0,0 +1,40 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from abc import ABC, abstractmethod +from typing import List + +from pypaimon.compact.task.compact_task import CompactTask +from pypaimon.write.commit_message import CommitMessage + + +class CompactExecutor(ABC): + """Pluggable backend that runs a list of CompactTask and returns CommitMessages. + + Implementations decide where the work happens (current process, thread pool, + Ray cluster, ...). The contract is intentionally narrow so adding RayExecutor + in Phase 4 doesn't ripple through the coordinator/job layers. + """ + + @abstractmethod + def execute(self, tasks: List[CompactTask]) -> List[CommitMessage]: + """Run all tasks and gather one CommitMessage per task. + + Order of returned messages is not significant — the driver merges them + into a single atomic commit. + """ diff --git a/paimon-python/pypaimon/compact/executor/local_executor.py b/paimon-python/pypaimon/compact/executor/local_executor.py new file mode 100644 index 000000000000..acc01f353355 --- /dev/null +++ b/paimon-python/pypaimon/compact/executor/local_executor.py @@ -0,0 +1,34 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from typing import List + +from pypaimon.compact.executor.executor import CompactExecutor +from pypaimon.compact.task.compact_task import CompactTask +from pypaimon.write.commit_message import CommitMessage + + +class LocalExecutor(CompactExecutor): + """Synchronous in-process executor — runs each task in series. + + Useful for tests, single-machine compactions, and as the default backend + so a CompactJob is functional out of the box without any cluster setup. + """ + + def execute(self, tasks: List[CompactTask]) -> List[CommitMessage]: + return [task.run() for task in tasks] diff --git a/paimon-python/pypaimon/compact/executor/ray_executor.py b/paimon-python/pypaimon/compact/executor/ray_executor.py new file mode 100644 index 000000000000..7e650e2c7b23 --- /dev/null +++ b/paimon-python/pypaimon/compact/executor/ray_executor.py @@ -0,0 +1,103 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Ray-backed compaction executor. + +Driver-side: serialize each CompactTask to a JSON payload and dispatch +ray.remote tasks. Worker-side: a top-level `_run_task_payload` rebuilds +the task from the payload (which includes catalog options + table +identifier so the worker can rebuild its own FileStoreTable) and runs +it, then returns a serialized CommitMessage. The driver collects them +into Python CommitMessage objects. + +`ray` is an optional dependency — installation is `pip install pypaimon[ray]` +— so we import inside execute() and present a clear error if it isn't +available. +""" + +from typing import Any, Dict, List, Optional + +from pypaimon.compact.executor.executor import CompactExecutor +from pypaimon.compact.task.compact_task import CompactTask +# Side-effect imports: each task subclass registers itself in the task +# registry at import time. Ray workers are fresh processes with an empty +# registry, so we must guarantee these modules get imported in the worker — +# importing them here means `import ray_executor` (which the worker does +# implicitly when unpickling _run_task_payload) brings the registrations +# along for the ride. +from pypaimon.compact.task import append_compact_task as _append_task # noqa: F401 +from pypaimon.compact.task import merge_tree_compact_task as _mt_task # noqa: F401 +from pypaimon.write.commit_message import CommitMessage +from pypaimon.write.commit_message_serializer import CommitMessageSerializer + + +# Worker entry point. Defined at module scope so Ray can pickle it cheaply +# and so a misbehaving subclass cannot accidentally close over driver state. +def _run_task_payload(payload: bytes) -> bytes: + task = CompactTask.deserialize(payload) + message = task.run() + return CommitMessageSerializer.serialize(message) + + +class RayExecutor(CompactExecutor): + + def __init__( + self, + num_cpus_per_task: float = 1.0, + ray_remote_args: Optional[Dict[str, Any]] = None, + ray_init_args: Optional[Dict[str, Any]] = None, + ): + """Create a RayExecutor. + + - num_cpus_per_task: per-task CPU budget; passed to ray.remote. + - ray_remote_args: extra kwargs for the ray.remote decorator + (e.g. {"max_retries": 3, "memory": 1<<30}). + - ray_init_args: extra kwargs for ray.init when this executor needs + to bootstrap Ray itself. If a Ray runtime is already initialized + we leave it alone. + """ + self.num_cpus_per_task = num_cpus_per_task + self.ray_remote_args = dict(ray_remote_args or {}) + self.ray_init_args = dict(ray_init_args or {}) + + def execute(self, tasks: List[CompactTask]) -> List[CommitMessage]: + if not tasks: + return [] + + try: + import ray # type: ignore + except ImportError as e: + raise RuntimeError( + "RayExecutor requires the 'ray' package; install pypaimon[ray] " + "or 'pip install ray'." + ) from e + + if not ray.is_initialized(): + ray.init(**self.ray_init_args) + + remote_run = ray.remote(num_cpus=self.num_cpus_per_task, **self.ray_remote_args)( + _run_task_payload + ) + + # Drive serialization on the driver — gives a deterministic failure + # site (one bad task surfaces as a TypeError here, not lost in a + # remote traceback). + payloads = [task.serialize() for task in tasks] + futures = [remote_run.remote(p) for p in payloads] + result_bytes: List[bytes] = ray.get(futures) + return [CommitMessageSerializer.deserialize(b) for b in result_bytes] diff --git a/paimon-python/pypaimon/compact/job/__init__.py b/paimon-python/pypaimon/compact/job/__init__.py new file mode 100644 index 000000000000..65b48d4d79b4 --- /dev/null +++ b/paimon-python/pypaimon/compact/job/__init__.py @@ -0,0 +1,17 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ diff --git a/paimon-python/pypaimon/compact/job/compact_job.py b/paimon-python/pypaimon/compact/job/compact_job.py new file mode 100644 index 000000000000..edf25d73e1b6 --- /dev/null +++ b/paimon-python/pypaimon/compact/job/compact_job.py @@ -0,0 +1,134 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import logging +import uuid +from typing import Any, Dict, List, Optional + +from pypaimon.common.predicate import Predicate +from pypaimon.compact.coordinator.append_compact_coordinator import \ + AppendCompactCoordinator +from pypaimon.compact.coordinator.coordinator import CompactCoordinator +from pypaimon.compact.coordinator.merge_tree_compact_coordinator import \ + MergeTreeCompactCoordinator +from pypaimon.compact.executor.executor import CompactExecutor +from pypaimon.compact.executor.local_executor import LocalExecutor +from pypaimon.compact.options import CompactOptions +from pypaimon.snapshot.snapshot import BATCH_COMMIT_IDENTIFIER +from pypaimon.write.commit_message import CommitMessage +from pypaimon.write.file_store_commit import FileStoreCommit + +logger = logging.getLogger(__name__) + + +class CompactJob: + """Driver-side orchestrator: plan → distribute → commit, in three steps. + + The flow purposely mirrors what Spark's CompactProcedure does: + 1. A Coordinator runs once on the driver and produces independent tasks. + 2. An Executor (Local in Phase 2, Ray in Phase 4) runs the tasks and + returns CommitMessage(compact_before, compact_after) per task. + 3. The driver collects all messages and commits them atomically with + commit_kind="COMPACT". + """ + + def __init__( + self, + table, + compact_options: Optional[CompactOptions] = None, + executor: Optional[CompactExecutor] = None, + partition_predicate: Optional[Predicate] = None, + commit_user: Optional[str] = None, + catalog_options: Optional[Dict[str, Any]] = None, + table_identifier: Optional[str] = None, + ): + """Construct a CompactJob. + + catalog_options + table_identifier are required when using a + distributed executor (RayExecutor) — workers need them to rebuild + the table on the worker process. LocalExecutor never reads them. + """ + self.table = table + self.compact_options = compact_options or CompactOptions() + self.executor = executor or LocalExecutor() + self.partition_predicate = partition_predicate + self.commit_user = commit_user or str(uuid.uuid4()) + self.catalog_options = dict(catalog_options) if catalog_options else None + # Identifier is a dataclass with no custom __str__; str(...) would + # return its repr ("Identifier(database=...)") and Identifier.from_string + # would refuse to parse that. Use get_full_name() so the worker can + # round-trip the identifier through CatalogFactory. + self.table_identifier = table_identifier or table.identifier.get_full_name() + + def execute(self) -> List[CommitMessage]: + """Run the compaction job and return the messages that were committed. + + Returns an empty list when there is nothing to compact. + """ + coordinator = self._build_coordinator() + tasks = coordinator.plan() + if not tasks: + logger.info( + "No compaction work for table %s at the current snapshot", + self.table.identifier, + ) + return [] + + # Distributed executors can't share the in-process FileStoreTable, so + # attach the loader spec when caller provided one. LocalExecutor + # ignores it and uses the in-process table the coordinator already + # baked into each task. + if self.catalog_options is not None: + for task in tasks: + task.with_table_loader(self.catalog_options, self.table_identifier) + + logger.info( + "Compacting table %s: %d task(s) via %s", + self.table.identifier, + len(tasks), + type(self.executor).__name__, + ) + messages = self.executor.execute(tasks) + self._commit(messages) + return messages + + def _build_coordinator(self) -> CompactCoordinator: + if self.table.is_primary_key_table: + return MergeTreeCompactCoordinator( + table=self.table, + compact_options=self.compact_options, + partition_predicate=self.partition_predicate, + ) + return AppendCompactCoordinator( + table=self.table, + compact_options=self.compact_options, + partition_predicate=self.partition_predicate, + ) + + def _commit(self, messages: List[CommitMessage]) -> None: + non_empty = [m for m in messages if not m.is_empty()] + if not non_empty: + return + snapshot_commit = self.table.new_snapshot_commit() + if snapshot_commit is None: + raise RuntimeError("Table does not provide a SnapshotCommit instance") + file_store_commit = FileStoreCommit(snapshot_commit, self.table, self.commit_user) + try: + file_store_commit.commit_compact(non_empty, BATCH_COMMIT_IDENTIFIER) + finally: + file_store_commit.close() diff --git a/paimon-python/pypaimon/compact/levels.py b/paimon-python/pypaimon/compact/levels.py new file mode 100644 index 000000000000..da08a21a62b0 --- /dev/null +++ b/paimon-python/pypaimon/compact/levels.py @@ -0,0 +1,232 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Multi-level file management for primary-key tables. + +Direct port of paimon-core/.../mergetree/Levels.java semantics: +- Level 0: every file is its own SortedRun, sorted by maxSequenceNumber DESC + (newest first) so the universal compaction strategy can read it in age + order. +- Levels 1..N: each level holds a single SortedRun whose files have + non-overlapping [min_key, max_key] intervals (compaction maintains this + invariant on output). +""" + +from collections import defaultdict +from dataclasses import dataclass +from functools import cmp_to_key +from typing import Callable, List + +from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.read.interval_partition import SortedRun +from pypaimon.table.row.generic_row import GenericRow + + +# Re-export SortedRun under the compact namespace so callers can import a +# single, stable name. +__all__ = ["LevelSortedRun", "Levels", "SortedRun"] + + +@dataclass +class LevelSortedRun: + """Pairs a SortedRun with the level it came from.""" + + level: int + run: SortedRun + + def total_size(self) -> int: + return sum(f.file_size for f in self.run.files) + + +KeyComparator = Callable[[GenericRow, GenericRow], int] + + +class Levels: + """Maintains the L0 + per-level structure of a single (partition, bucket).""" + + def __init__( + self, + key_comparator: KeyComparator, + input_files: List[DataFileMeta], + num_levels: int, + ): + self.key_comparator = key_comparator + + max_seen = max((f.level for f in input_files), default=-1) + restored_num_levels = max(num_levels, max_seen + 1) + if restored_num_levels < 2: + raise ValueError( + f"Number of levels must be at least 2, got {restored_num_levels}" + ) + + # Level 0: list ordered by (max_seq DESC, min_seq ASC, creation_time ASC, + # file_name ASC). We use a sorted python list rather than SortedList + # because additions are rare and full re-sorting is cheap relative to + # data sizes here. + self._level0: List[DataFileMeta] = [] + # Levels 1..N: index 0 is L1, index 1 is L2, ... + self._levels: List[SortedRun] = [SortedRun(files=[]) for _ in range(restored_num_levels - 1)] + + grouped: dict = defaultdict(list) + for f in input_files: + grouped[f.level].append(f) + for level, files in grouped.items(): + self._update_level(level, [], files) + + # Sanity check parallels Java's same Preconditions.checkState. + stored = len(self._level0) + sum(len(r.files) for r in self._levels) + if stored != len(input_files): + raise RuntimeError( + f"Levels stored {stored} files but inputs had {len(input_files)} — " + f"this is a bug in level grouping." + ) + + @property + def level0(self) -> List[DataFileMeta]: + return list(self._level0) + + def run_of_level(self, level: int) -> SortedRun: + if level <= 0: + raise ValueError("Level0 does not have one single sorted run.") + return self._levels[level - 1] + + def number_of_levels(self) -> int: + return len(self._levels) + 1 + + def max_level(self) -> int: + return len(self._levels) + + def number_of_sorted_runs(self) -> int: + n = len(self._level0) + for r in self._levels: + if r.files: + n += 1 + return n + + def non_empty_highest_level(self) -> int: + """Highest level index with at least one file, or -1 if everything is empty.""" + for i in range(len(self._levels) - 1, -1, -1): + if self._levels[i].files: + return i + 1 + return 0 if self._level0 else -1 + + def total_file_size(self) -> int: + return sum(f.file_size for f in self._level0) + sum( + sum(f.file_size for f in r.files) for r in self._levels + ) + + def all_files(self) -> List[DataFileMeta]: + out: List[DataFileMeta] = [] + for run in self.level_sorted_runs(): + out.extend(run.run.files) + return out + + def level_sorted_runs(self) -> List[LevelSortedRun]: + """L0 contributes one LevelSortedRun per file; other levels contribute + their single non-empty SortedRun.""" + runs: List[LevelSortedRun] = [] + for f in self._level0: + runs.append(LevelSortedRun(0, SortedRun(files=[f]))) + for i, run in enumerate(self._levels): + if run.files: + runs.append(LevelSortedRun(i + 1, run)) + return runs + + def update(self, before: List[DataFileMeta], after: List[DataFileMeta]) -> None: + """Apply a CompactResult: remove `before` files and add `after` files, + preserving each file's level. Mirrors Java's Levels.update(). + """ + before_by_level: dict = defaultdict(list) + after_by_level: dict = defaultdict(list) + for f in before: + before_by_level[f.level].append(f) + for f in after: + after_by_level[f.level].append(f) + + # Reject out-of-range levels with a clear error instead of letting a + # downstream IndexError leak. Constructor handles the auto-grow case + # for files restored from manifest; runtime updates must stay within + # the levels we already know about. + max_seen = max( + (lvl for lvl in list(before_by_level.keys()) + list(after_by_level.keys())), + default=-1, + ) + if max_seen >= self.number_of_levels(): + raise ValueError( + f"Cannot update Levels with file at level {max_seen}; " + f"current number_of_levels={self.number_of_levels()}. " + f"Strategies must not select an output_level above the existing top." + ) + + for level in range(self.number_of_levels()): + self._update_level( + level, + before_by_level.get(level, []), + after_by_level.get(level, []), + ) + + def _update_level( + self, + level: int, + before: List[DataFileMeta], + after: List[DataFileMeta], + ) -> None: + if not before and not after: + return + if level == 0: + before_names = {f.file_name for f in before} + self._level0 = [f for f in self._level0 if f.file_name not in before_names] + self._level0.extend(after) + self._level0.sort(key=cmp_to_key(_level0_compare)) + else: + current = list(self._levels[level - 1].files) + before_names = {f.file_name for f in before} + current = [f for f in current if f.file_name not in before_names] + current.extend(after) + current.sort(key=cmp_to_key(_min_key_compare(self.key_comparator))) + self._levels[level - 1] = SortedRun(files=current) + + +def _level0_compare(a: DataFileMeta, b: DataFileMeta) -> int: + """Order L0: file with the largest maxSequenceNumber comes first. + + Ties (concurrent writers) are broken by minSequenceNumber, then creation + time, then file name — same priority chain as Levels.java's TreeSet + comparator, so a Python recovery from a manifest with conflicting + timestamps lays out files identically to the Java side. + """ + if a.max_sequence_number != b.max_sequence_number: + return -1 if a.max_sequence_number > b.max_sequence_number else 1 + if a.min_sequence_number != b.min_sequence_number: + return -1 if a.min_sequence_number < b.min_sequence_number else 1 + if a.creation_time != b.creation_time: + # Treat None as smallest so it sorts first deterministically. + if a.creation_time is None: + return -1 + if b.creation_time is None: + return 1 + return -1 if a.creation_time < b.creation_time else 1 + if a.file_name == b.file_name: + return 0 + return -1 if a.file_name < b.file_name else 1 + + +def _min_key_compare(key_comparator: KeyComparator) -> Callable[[DataFileMeta, DataFileMeta], int]: + def cmp(a: DataFileMeta, b: DataFileMeta) -> int: + return key_comparator(a.min_key, b.min_key) + return cmp diff --git a/paimon-python/pypaimon/compact/options.py b/paimon-python/pypaimon/compact/options.py new file mode 100644 index 000000000000..08798195fec3 --- /dev/null +++ b/paimon-python/pypaimon/compact/options.py @@ -0,0 +1,66 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from dataclasses import dataclass +from typing import Any, Dict, Optional + +# Defaults mirror Java's append-only compaction options where possible. Only the +# subset that drives append/PK planning is exposed here; per-table options +# (file format, compression, target_file_size, open_file_cost) still come from +# CoreOptions on the table itself. +DEFAULT_MIN_FILE_NUM = 5 +DEFAULT_FORCE_FULL = False + + +@dataclass +class CompactOptions: + """Knobs that drive compaction planning. + + target_file_size and open_file_cost are intentionally absent — they are + sourced from the table's own CoreOptions (target_file_size via DataWriter + rolling, open_file_cost as the per-file overhead added when computing + bin size). This keeps a job's output and packing decisions consistent + with what the regular write path would produce. + + The Java AppendCompactCoordinator's `compactionFileNumLimit` / + per-task max-file-count knobs aren't surfaced here: the size-based + bin-packing in `_pick_files_for_bucket` naturally caps each task at + ~2x target_file_size of input, which is the same shape Java produces. + """ + + min_file_num: int = DEFAULT_MIN_FILE_NUM + full_compaction: bool = DEFAULT_FORCE_FULL + + def __post_init__(self): + if self.min_file_num < 1: + raise ValueError(f"min_file_num must be >= 1, got {self.min_file_num}") + + def to_dict(self) -> Dict[str, Any]: + return { + "min_file_num": self.min_file_num, + "full_compaction": self.full_compaction, + } + + @classmethod + def from_dict(cls, data: Optional[Dict[str, Any]]) -> "CompactOptions": + if not data: + return cls() + return cls( + min_file_num=data.get("min_file_num", DEFAULT_MIN_FILE_NUM), + full_compaction=data.get("full_compaction", DEFAULT_FORCE_FULL), + ) diff --git a/paimon-python/pypaimon/compact/rewriter/__init__.py b/paimon-python/pypaimon/compact/rewriter/__init__.py new file mode 100644 index 000000000000..65b48d4d79b4 --- /dev/null +++ b/paimon-python/pypaimon/compact/rewriter/__init__.py @@ -0,0 +1,17 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ diff --git a/paimon-python/pypaimon/compact/rewriter/append_compact_rewriter.py b/paimon-python/pypaimon/compact/rewriter/append_compact_rewriter.py new file mode 100644 index 000000000000..290e1cc5869e --- /dev/null +++ b/paimon-python/pypaimon/compact/rewriter/append_compact_rewriter.py @@ -0,0 +1,179 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from typing import Iterator, List, Tuple + +import pyarrow as pa +import pyarrow.dataset as ds + +from pypaimon.compact.rewriter.merge_tree_rolling_writer import \ + FILE_SOURCE_COMPACT +from pypaimon.compact.rewriter.rewriter import CompactRewriter +from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.read.split_read import format_identifier +from pypaimon.write.writer.append_only_data_writer import AppendOnlyDataWriter + + +class AppendCompactRollingWriter(AppendOnlyDataWriter): + """AppendOnlyDataWriter variant that mirrors Java RowDataRollingFileWriter + used by BaseAppendFileStoreWrite.compactRewrite. + + Two behavior tweaks vs. the regular append writer: + + 1. Sequence number bookkeeping. The base AppendOnlyDataWriter never + advances `sequence_generator.current`, which is fine for plain + writes (where every committed file ends up with min_seq == max_seq + == the batch's seed) but wrong for compaction output. Java's + LongCounter is incremented per row written, so each rolled output + file ends up with precisely [first_row_seq, last_row_seq] in its + metadata. We replicate that here, treating the seed value as the + seq for the first row written: file N covering R rows gets + [next_seq, next_seq + R - 1] in its meta. The base writer's + SequenceGenerator has an off-by-one quirk (current is "+1 after + last assigned"), so we set up the generator each call to make + super()._write_data_to_file emit the exact bounds we want without + touching the parent class. + + 2. Provenance. Java passes FileSource.COMPACT into the rewriter + constructor. The base writer hardcodes file_source=APPEND in + _write_data_to_file, so we patch the just-appended DataFileMeta + afterwards (same shape MergeTreeRollingWriter uses on the PK side). + """ + + def _write_data_to_file(self, data: pa.Table) -> None: + n = data.num_rows + if n == 0: + return + # `start` is treated as "next-to-assign" seq, matching Java's + # LongCounter semantics. The slice we're about to write covers + # [seq_start, seq_end]. + seq_start = self.sequence_generator.start + seq_end = seq_start + n - 1 + # Drive the parent's metadata accounting: parent reads + # min_seq = start, max_seq = current + # then sets start = current. Putting current at seq_end yields + # exactly the [seq_start, seq_end] range Java would have written. + self.sequence_generator.current = seq_end + + before = len(self.committed_files) + super()._write_data_to_file(data) + # Advance both fields past this batch so the next slice starts at + # seq_end + 1 (the parent only moved start to current, which is + # also seq_end now — we want next_to_assign, not last_assigned). + self.sequence_generator.start = seq_end + 1 + self.sequence_generator.current = seq_end + 1 + + # Stamp provenance on whatever the parent appended (rolling may emit + # multiple files in one super call in principle; loop defensively). + for i in range(before, len(self.committed_files)): + self.committed_files[i].file_source = FILE_SOURCE_COMPACT + + +class AppendCompactRewriter(CompactRewriter): + """Reads input append-only files and re-writes them via a Java-aligned + rolling writer. Direct port of org.apache.paimon.operation + .BaseAppendFileStoreWrite.compactRewrite. + + Sequence numbers: the rolling writer's counter is seeded with + files[0].min_sequence_number (matching Java + `LongCounter(toCompact.get(0).minSequenceNumber())`) and bumped per + row written. Output files therefore carry contiguous, non-overlapping + seq ranges starting from that seed — exactly what Java produces. + + NOTE (deferred): + - Schema evolution: input batches are read straight off disk via + pyarrow.dataset, not through table-aware ReadForCompact. Inputs + spanning a schema change need the read path to evolve them; that's + part of the schema-evolution work scheduled later. + - Deletion vectors: Java's compactRewrite accepts a dvFactory that + applies per-file DV during read and persists the resulting index + delta into CompactIncrement.{newIndexFiles, deletedIndexFiles}. + Pypaimon's compaction path will plug DV in alongside the broader + DV support phase. + """ + + def __init__(self, table): + self.table = table + + def rewrite( + self, + partition: Tuple, + bucket: int, + files: List[DataFileMeta], + ) -> List[DataFileMeta]: + if not files: + return [] + + # Java seeds the rolling writer's counter from the first file in the + # (size-sorted) input. files comes from AppendCompactCoordinator + # which sorts by size ascending — same shape Java's pack() produces — + # so files[0] is the smallest input file's min_seq. + seed_seq = files[0].min_sequence_number + + writer = AppendCompactRollingWriter( + table=self.table, + partition=partition, + bucket=bucket, + max_seq_number=seed_seq, + options=self.table.options, + write_cols=None, + ) + try: + try: + for batch in self._read_input_batches(partition, bucket, files): + if batch.num_rows > 0: + writer.write(batch) + new_files = writer.prepare_commit() + except Exception: + # Roll back any rewriter output written so far so a failed task + # doesn't leave orphan files in the warehouse. + writer.abort() + raise + finally: + writer.close() + + return new_files + + def _read_input_batches( + self, + partition: Tuple, + bucket: int, + files: List[DataFileMeta], + ) -> Iterator[pa.RecordBatch]: + """Stream record batches from each input file in order. + + We resolve each file's read path locally (preferring external_path, + matching SplitRead.file_reader_supplier) instead of mutating the + DataFileMeta returned by the manifest — those objects may be cached or + shared with other readers. + """ + path_factory = self.table.path_factory() + bucket_path = path_factory.bucket_path(partition, bucket).rstrip("/") + for f in files: + read_path = f.external_path if f.external_path else ( + f.file_path if f.file_path else f"{bucket_path}/{f.file_name}" + ) + file_format = format_identifier(f.file_name) + file_path_for_pyarrow = self.table.file_io.to_filesystem_path(read_path) + dataset = ds.dataset( + file_path_for_pyarrow, + format=file_format, + filesystem=self.table.file_io.filesystem, + ) + for batch in dataset.to_batches(): + yield batch diff --git a/paimon-python/pypaimon/compact/rewriter/merge_tree_compact_rewriter.py b/paimon-python/pypaimon/compact/rewriter/merge_tree_compact_rewriter.py new file mode 100644 index 000000000000..e243cacbab53 --- /dev/null +++ b/paimon-python/pypaimon/compact/rewriter/merge_tree_compact_rewriter.py @@ -0,0 +1,219 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Primary-key (merge-tree) compaction rewriter. + +Reads each section of the input plan via SortMergeReader, applies the +table's MergeFunction (Deduplicate by default), optionally drops retract +rows, and writes the merged stream out via MergeTreeRollingWriter so the +target_file_size rolling stays consistent with the regular write path. + +Sections are produced by IntervalPartition before reaching us — that's the +existing utility used by MergeFileSplitRead, so we get identical "key +intervals don't overlap inside a sorted run" guarantees here. +""" + +from functools import partial +from typing import Callable, List, Optional + +import pyarrow as pa + +from pypaimon.compact.rewriter.merge_tree_rolling_writer import \ + MergeTreeRollingWriter +from pypaimon.compact.rewriter.rewriter import CompactRewriter +from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.read.interval_partition import SortedRun +from pypaimon.read.reader.concat_record_reader import ConcatRecordReader +from pypaimon.read.reader.drop_delete_reader import DropDeleteRecordReader +from pypaimon.read.reader.format_avro_reader import FormatAvroReader +from pypaimon.read.reader.format_pyarrow_reader import FormatPyArrowReader +from pypaimon.read.reader.iface.record_reader import RecordReader +from pypaimon.read.reader.key_value_wrap_reader import KeyValueWrapReader +from pypaimon.read.reader.merge_function import MergeFunctionFactory +from pypaimon.read.reader.sort_merge_reader import SortMergeReaderWithMinHeap +from pypaimon.read.split_read import (KEY_PREFIX, build_kv_file_fields, + format_identifier) +from pypaimon.schema.data_types import DataField + +# Buffer KVs from the merge stream this many at a time before handing the +# resulting RecordBatch to the writer. Sized to amortize per-row Python +# overhead without ballooning peak memory for wide PK rows. +DEFAULT_BUFFER_ROWS = 4096 + + +class MergeTreeCompactRewriter(CompactRewriter): + + def __init__( + self, + table, + mf_factory: MergeFunctionFactory, + buffer_rows: int = DEFAULT_BUFFER_ROWS, + ): + self.table = table + self.mf_factory = mf_factory + self.buffer_rows = buffer_rows + + # Schema of the on-disk KV file: [_KEY_pk, _SEQUENCE_NUMBER, + # _VALUE_KIND, value_cols...]. Computed once per rewriter to avoid + # repeated per-section work. + self._kv_fields: List[DataField] = self._build_kv_fields() + self._kv_field_names: List[str] = [f.name for f in self._kv_fields] + self._key_arity = sum( + 1 for f in self._kv_fields if f.name.startswith(KEY_PREFIX) + ) + self._value_arity = ( + len(self._kv_fields) - self._key_arity - 2 # minus seq + kind + ) + self._arrow_schema = self._build_arrow_schema() + + def rewrite( + self, + partition, + bucket: int, + output_level: int, + sections: List[List[SortedRun]], + drop_delete: bool, + ) -> List[DataFileMeta]: + if not sections: + return [] + + writer = MergeTreeRollingWriter( + table=self.table, + partition=partition, + bucket=bucket, + output_level=output_level, + options=self.table.options, + ) + + try: + try: + for section in sections: + self._consume_section(section, drop_delete, writer) + files = writer.prepare_commit() + except Exception: + writer.abort() + raise + finally: + writer.close() + + return files + + # ---- internals --------------------------------------------------------- + + def _consume_section( + self, + section: List[SortedRun], + drop_delete: bool, + writer: MergeTreeRollingWriter, + ) -> None: + # Each rewrite() call already knows its (partition, bucket); compute + # the bucket directory once here so each file's read_path is a cheap + # string concat instead of repeating path-factory work per file. + partition = writer.partition + bucket_path = self.table.path_factory().bucket_path(partition, writer.bucket).rstrip("/") + + readers: List[RecordReader] = [] + for sorted_run in section: + suppliers: List[Callable[[], RecordReader]] = [] + for f in sorted_run.files: + suppliers.append(partial(self._kv_reader_supplier, f, bucket_path)) + readers.append(ConcatRecordReader(suppliers)) + + merge_reader: RecordReader = SortMergeReaderWithMinHeap( + readers=readers, + schema=self.table.table_schema, + merge_function=self.mf_factory.create(), + ) + if drop_delete: + merge_reader = DropDeleteRecordReader(merge_reader) + + try: + self._stream_to_writer(merge_reader, writer) + finally: + merge_reader.close() + + def _stream_to_writer( + self, + merge_reader: RecordReader, + writer: MergeTreeRollingWriter, + ) -> None: + buffer: List[tuple] = [] + while True: + iterator = merge_reader.read_batch() + if iterator is None: + break + while True: + kv = iterator.next() + if kv is None: + break + # Snapshot the row tuple — KeyValue is reused across calls. + buffer.append(tuple(kv.row_tuple)) + if len(buffer) >= self.buffer_rows: + writer.write(self._tuples_to_batch(buffer)) + buffer.clear() + if buffer: + writer.write(self._tuples_to_batch(buffer)) + buffer.clear() + + def _tuples_to_batch(self, tuples: List[tuple]) -> pa.RecordBatch: + # Transpose to columnar form, then build the RecordBatch with the + # KV-file schema we precomputed at __init__. + columns = list(zip(*tuples)) if tuples else [() for _ in self._kv_field_names] + arrays = [ + pa.array(list(col), type=self._arrow_schema.field(i).type) + for i, col in enumerate(columns) + ] + return pa.RecordBatch.from_arrays(arrays, schema=self._arrow_schema) + + def _kv_reader_supplier(self, file: DataFileMeta, bucket_path: str) -> RecordReader: + # Resolve read path locally (preferring external_path, matching + # SplitRead.file_reader_supplier) without mutating the manifest meta. + read_path = ( + file.external_path + if file.external_path + else (file.file_path if file.file_path else f"{bucket_path}/{file.file_name}") + ) + file_format = format_identifier(file.file_name) + file_io = self.table.file_io + if file_format == "avro": + file_batch_reader = FormatAvroReader( + file_io, read_path, self._kv_fields, push_down_predicate=None, + ) + else: + file_batch_reader = FormatPyArrowReader( + file_io, + file_format, + read_path, + read_fields=self._kv_fields, + push_down_predicate=None, + options=self.table.options, + ) + return KeyValueWrapReader(file_batch_reader, self._key_arity, self._value_arity) + + def _build_kv_fields(self) -> List[DataField]: + # Same helper SplitRead._create_key_value_fields uses, so the on-disk + # KV file schema cannot drift between read and compact paths. + return build_kv_file_fields( + table_fields=self.table.fields, + trimmed_primary_keys=self.table.trimmed_primary_keys, + value_fields=self.table.fields, + ) + + def _build_arrow_schema(self) -> pa.Schema: + from pypaimon.schema.data_types import PyarrowFieldParser + return PyarrowFieldParser.from_paimon_schema(self._kv_fields) diff --git a/paimon-python/pypaimon/compact/rewriter/merge_tree_rolling_writer.py b/paimon-python/pypaimon/compact/rewriter/merge_tree_rolling_writer.py new file mode 100644 index 000000000000..ec874e460927 --- /dev/null +++ b/paimon-python/pypaimon/compact/rewriter/merge_tree_rolling_writer.py @@ -0,0 +1,131 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import pyarrow as pa +import pyarrow.compute as pc + +from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.write.writer.data_writer import DataWriter + +# DataFileMeta.file_source convention used across pypaimon's compaction path. +FILE_SOURCE_COMPACT = 1 + + +class MergeTreeRollingWriter(DataWriter): + """Writer for compaction output of primary-key (merge-tree) tables. + + Differences from KeyValueDataWriter: + - Input batches are assumed to already carry the KV system fields + (_KEY_*, _SEQUENCE_NUMBER, _VALUE_KIND) and to be sorted by + (key ASC, sequence ASC). The writer never adds system fields itself + and never advances the sequence generator. + - After the parent class writes a file at level 0, we rewrite the just- + appended DataFileMeta with the strategy-chosen output_level, the actual + min/max sequence numbers observed in the data, the count of retract + rows, and file_source=COMPACT. + """ + + def __init__(self, table, partition, bucket, output_level: int, options=None): + super().__init__( + table=table, + partition=partition, + bucket=bucket, + max_seq_number=0, # generator is not used for KV compaction output + options=options if options is not None else table.options, + write_cols=None, + ) + self.output_level = output_level + + def _process_data(self, data: pa.RecordBatch) -> pa.Table: + # Already enriched + sorted by upstream SortMergeReader/MergeFunction. + return pa.Table.from_batches([data]) + + def _merge_data(self, existing_data: pa.Table, new_data: pa.Table) -> pa.Table: + # Both halves are already in (key, seq) order and the second half's + # smallest key is guaranteed to be >= the first half's largest key + # (caller is feeding a monotonic merge stream), so concat is enough. + return pa.concat_tables([existing_data, new_data]) + + def _write_data_to_file(self, data: pa.Table): + if data.num_rows == 0: + return + + # Snapshot the file count so we can find the entry the parent appends. + before = len(self.committed_files) + super()._write_data_to_file(data) + if len(self.committed_files) <= before: + return # parent skipped (e.g. empty after processing) + + produced = self.committed_files[before:] + # The parent always writes a single file per call, but be defensive. + for idx, original in enumerate(produced): + min_seq, max_seq = self._extract_seq_bounds(data) + delete_count = self._count_retract_rows(data) + self.committed_files[before + idx] = self._patch( + original, + self.output_level, + min_seq, + max_seq, + delete_count, + ) + + @staticmethod + def _extract_seq_bounds(data: pa.Table): + seq = data.column("_SEQUENCE_NUMBER") + return pc.min(seq).as_py(), pc.max(seq).as_py() + + @staticmethod + def _count_retract_rows(data: pa.Table) -> int: + # Match RowKind.is_add_byte: INSERT(0) and UPDATE_AFTER(2) are "add", + # UPDATE_BEFORE(1) and DELETE(3) are retracts. Counting != 0 here would + # wrongly include UPDATE_AFTER and inflate delete_row_count. + kind = data.column("_VALUE_KIND") + is_retract = pc.or_(pc.equal(kind, 1), pc.equal(kind, 3)) + return int(pc.sum(pc.cast(is_retract, pa.int64())).as_py() or 0) + + @staticmethod + def _patch( + original: DataFileMeta, + level: int, + min_seq: int, + max_seq: int, + delete_count: int, + ) -> DataFileMeta: + return DataFileMeta( + file_name=original.file_name, + file_size=original.file_size, + row_count=original.row_count, + min_key=original.min_key, + max_key=original.max_key, + key_stats=original.key_stats, + value_stats=original.value_stats, + min_sequence_number=min_seq, + max_sequence_number=max_seq, + schema_id=original.schema_id, + level=level, + extra_files=original.extra_files, + creation_time=original.creation_time, + delete_row_count=delete_count, + embedded_index=original.embedded_index, + file_source=FILE_SOURCE_COMPACT, + value_stats_cols=original.value_stats_cols, + external_path=original.external_path, + first_row_id=original.first_row_id, + write_cols=original.write_cols, + file_path=original.file_path, + ) diff --git a/paimon-python/pypaimon/compact/rewriter/rewriter.py b/paimon-python/pypaimon/compact/rewriter/rewriter.py new file mode 100644 index 000000000000..05eb22c6b39f --- /dev/null +++ b/paimon-python/pypaimon/compact/rewriter/rewriter.py @@ -0,0 +1,30 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from abc import ABC + +# CompactRewriter is intentionally a marker base class without an abstract +# rewrite() signature: append-only and merge-tree rewriters take very different +# arguments (flat file list vs. pre-partitioned sections + dropDelete + output +# level) and forcing a unified signature here would obscure their semantics. +# Concrete rewriters are still expected to expose a rewrite(...) entry point so +# callers can grep for one consistent verb. + + +class CompactRewriter(ABC): + """Marker base for compact rewriters (append-only, merge-tree, ...).""" diff --git a/paimon-python/pypaimon/compact/strategy/__init__.py b/paimon-python/pypaimon/compact/strategy/__init__.py new file mode 100644 index 000000000000..65b48d4d79b4 --- /dev/null +++ b/paimon-python/pypaimon/compact/strategy/__init__.py @@ -0,0 +1,17 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ diff --git a/paimon-python/pypaimon/compact/strategy/compact_unit.py b/paimon-python/pypaimon/compact/strategy/compact_unit.py new file mode 100644 index 000000000000..461d542f7097 --- /dev/null +++ b/paimon-python/pypaimon/compact/strategy/compact_unit.py @@ -0,0 +1,55 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from dataclasses import dataclass, field +from typing import List + +from pypaimon.compact.levels import LevelSortedRun +from pypaimon.manifest.schema.data_file_meta import DataFileMeta + + +@dataclass +class CompactUnit: + """One unit of compaction work picked by a CompactStrategy. + + `output_level` is the LSM level the rewriter should write the merged + output at. `file_rewrite=False` is a hint that the rewriter may simply + upgrade files in place (no key merging needed) — used by the merge-tree + rewriter for large non-overlapping inputs. The append-only path ignores + it. + """ + + output_level: int + files: List[DataFileMeta] = field(default_factory=list) + file_rewrite: bool = False + + @classmethod + def from_level_runs(cls, output_level: int, runs: List[LevelSortedRun]) -> "CompactUnit": + files: List[DataFileMeta] = [] + for run in runs: + files.extend(run.run.files) + return cls(output_level=output_level, files=files, file_rewrite=False) + + @classmethod + def from_files( + cls, + output_level: int, + files: List[DataFileMeta], + file_rewrite: bool = False, + ) -> "CompactUnit": + return cls(output_level=output_level, files=list(files), file_rewrite=file_rewrite) diff --git a/paimon-python/pypaimon/compact/strategy/strategy.py b/paimon-python/pypaimon/compact/strategy/strategy.py new file mode 100644 index 000000000000..59cdec4c9bca --- /dev/null +++ b/paimon-python/pypaimon/compact/strategy/strategy.py @@ -0,0 +1,62 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from abc import ABC, abstractmethod +from typing import List, Optional + +from pypaimon.compact.levels import LevelSortedRun +from pypaimon.compact.strategy.compact_unit import CompactUnit + + +class CompactStrategy(ABC): + """Picks which sorted runs to compact next. + + Implementations are stateful (they may track `last_full_compaction` time + or similar) but each pick() call inspects only the level snapshot it is + handed; the coordinator owns the Levels object. + """ + + @abstractmethod + def pick( + self, + num_levels: int, + runs: List[LevelSortedRun], + ) -> Optional[CompactUnit]: + """Return the next compaction unit, or None if nothing should run now.""" + + +def pick_full_compaction( + num_levels: int, + runs: List[LevelSortedRun], +) -> Optional[CompactUnit]: + """Force a single unit covering every run, output to the max level. + + Returns None when there are no runs to compact, or when the runs already + consist of a single file already at the max level (idempotent no-op). + Mirrors CompactStrategy.pickFullCompaction(int, List). + """ + if not runs: + return None + max_level = num_levels - 1 + if ( + len(runs) == 1 + and runs[0].level == max_level + and len(runs[0].run.files) == 1 + ): + return None + return CompactUnit.from_level_runs(max_level, runs) diff --git a/paimon-python/pypaimon/compact/strategy/universal_compaction.py b/paimon-python/pypaimon/compact/strategy/universal_compaction.py new file mode 100644 index 000000000000..837ac7bacb6a --- /dev/null +++ b/paimon-python/pypaimon/compact/strategy/universal_compaction.py @@ -0,0 +1,180 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Universal Compaction strategy. + +Direct port of paimon-core/.../mergetree/compact/UniversalCompaction.java. +Reference: https://github.com/facebook/rocksdb/wiki/Universal-Compaction. + +Three-stage decision (in order): + 1. Size amplification: when (sum of non-max-level runs) * 100 > max_size_amp% + of the max-level run, full-compact everything to the max level. + 2. Size ratio: append a candidate prefix while + candidate_size * (100 + size_ratio) / 100 >= next.run.total_size, + stopping at the first run that breaks the ratio. + 3. File-num: if total runs > num_run_compaction_trigger, force-pick at least + (size - trigger + 1) runs. + +EarlyFullCompaction and OffPeakHours from the Java side are intentionally +omitted in this first cut — they are independent triggers that can be added +later without touching the core algorithm here. +""" + +from typing import List, Optional + +from pypaimon.compact.levels import LevelSortedRun +from pypaimon.compact.strategy.compact_unit import CompactUnit +from pypaimon.compact.strategy.strategy import CompactStrategy + + +class UniversalCompaction(CompactStrategy): + + def __init__( + self, + max_size_amp: int = 200, + size_ratio: int = 1, + num_run_compaction_trigger: int = 5, + ): + if max_size_amp <= 0: + raise ValueError(f"max_size_amp must be > 0, got {max_size_amp}") + if size_ratio < 0: + raise ValueError(f"size_ratio must be >= 0, got {size_ratio}") + if num_run_compaction_trigger < 1: + raise ValueError( + f"num_run_compaction_trigger must be >= 1, got {num_run_compaction_trigger}" + ) + self.max_size_amp = max_size_amp + self.size_ratio = size_ratio + self.num_run_compaction_trigger = num_run_compaction_trigger + + def pick( + self, + num_levels: int, + runs: List[LevelSortedRun], + ) -> Optional[CompactUnit]: + max_level = num_levels - 1 + + # 1. Size amplification. + unit = self._pick_for_size_amp(max_level, runs) + if unit is not None: + return unit + + # 2. Size ratio. + unit = self._pick_for_size_ratio(max_level, runs) + if unit is not None: + return unit + + # 3. File num. + if len(runs) > self.num_run_compaction_trigger: + candidate_count = len(runs) - self.num_run_compaction_trigger + 1 + return self._pick_for_size_ratio_with_count( + max_level, runs, candidate_count, force_pick=False + ) + + return None + + def force_pick_l0( + self, + num_levels: int, + runs: List[LevelSortedRun], + ) -> Optional[CompactUnit]: + """Pick all consecutive L0 runs at the head of `runs` (no-op if none).""" + candidate_count = 0 + for r in runs: + if r.level > 0: + break + candidate_count += 1 + if candidate_count == 0: + return None + return self._pick_for_size_ratio_with_count( + num_levels - 1, runs, candidate_count, force_pick=True + ) + + # ---- internal helpers -------------------------------------------------- + + def _pick_for_size_amp( + self, + max_level: int, + runs: List[LevelSortedRun], + ) -> Optional[CompactUnit]: + if len(runs) < self.num_run_compaction_trigger: + return None + candidate_size = sum(r.total_size() for r in runs[: len(runs) - 1]) + earliest_run_size = runs[-1].total_size() + # Universal compaction's amplification = non-maxLevel total / maxLevel. + if candidate_size * 100 > self.max_size_amp * earliest_run_size: + return CompactUnit.from_level_runs(max_level, runs) + return None + + def _pick_for_size_ratio( + self, + max_level: int, + runs: List[LevelSortedRun], + ) -> Optional[CompactUnit]: + if len(runs) < self.num_run_compaction_trigger: + return None + return self._pick_for_size_ratio_with_count(max_level, runs, 1, force_pick=False) + + def _pick_for_size_ratio_with_count( + self, + max_level: int, + runs: List[LevelSortedRun], + candidate_count: int, + force_pick: bool, + ) -> Optional[CompactUnit]: + candidate_size = sum(r.total_size() for r in runs[:candidate_count]) + i = candidate_count + while i < len(runs): + nxt = runs[i] + if candidate_size * (100.0 + self.size_ratio) / 100.0 < nxt.total_size(): + break + candidate_size += nxt.total_size() + candidate_count += 1 + i += 1 + + if force_pick or candidate_count > 1: + return self._create_unit(runs, max_level, candidate_count) + return None + + def _create_unit( + self, + runs: List[LevelSortedRun], + max_level: int, + run_count: int, + ) -> CompactUnit: + if run_count == len(runs): + output_level = max_level + else: + # Compact into the level just below the next, untouched run. + output_level = max(0, runs[run_count].level - 1) + + if output_level == 0: + # Output to L0 is meaningless — keep extending until we can land on + # a real level (or until we cover everything, which falls back to + # max_level below). + while run_count < len(runs): + nxt = runs[run_count] + run_count += 1 + if nxt.level != 0: + output_level = nxt.level + break + + if run_count == len(runs): + output_level = max_level + + return CompactUnit.from_level_runs(output_level, runs[:run_count]) diff --git a/paimon-python/pypaimon/compact/task/__init__.py b/paimon-python/pypaimon/compact/task/__init__.py new file mode 100644 index 000000000000..65b48d4d79b4 --- /dev/null +++ b/paimon-python/pypaimon/compact/task/__init__.py @@ -0,0 +1,17 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ diff --git a/paimon-python/pypaimon/compact/task/append_compact_task.py b/paimon-python/pypaimon/compact/task/append_compact_task.py new file mode 100644 index 000000000000..ee4f60b60d03 --- /dev/null +++ b/paimon-python/pypaimon/compact/task/append_compact_task.py @@ -0,0 +1,89 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from typing import Any, Dict, List, Tuple + +from pypaimon.compact.rewriter.append_compact_rewriter import AppendCompactRewriter +from pypaimon.compact.task.compact_task import CompactTask, register_compact_task +from pypaimon.manifest.schema.data_file_meta import (DataFileMeta, decode_value, + encode_value) +from pypaimon.write.commit_message import CommitMessage +from pypaimon.write.compact_increment import CompactIncrement + + +@register_compact_task +class AppendCompactTask(CompactTask): + """Compact a single (partition, bucket) of an append-only table. + + The driver attaches the in-process FileStoreTable so LocalExecutor can run + without rebuilding catalog state. Distributed executors (RayExecutor, added + in Phase 4) must instead populate the loader fields so the worker can + rebuild the table — see to_dict()/from_dict() for the contract. + """ + + TYPE = "append-compact" + + def __init__( + self, + partition: Tuple, + bucket: int, + files: List[DataFileMeta], + table=None, + ): + self.partition = tuple(partition) + self.bucket = bucket + self.files = list(files) + self._table = table + + def with_table(self, table) -> "AppendCompactTask": + self._table = table + return self + + def run(self) -> CommitMessage: + table = self._resolve_table() + rewriter = AppendCompactRewriter(table) + after = rewriter.rewrite(self.partition, self.bucket, self.files) + return CommitMessage( + partition=self.partition, + bucket=self.bucket, + total_buckets=table.total_buckets, + compact_increment=CompactIncrement( + compact_before=list(self.files), + compact_after=list(after), + ), + ) + + def _to_payload(self) -> Dict[str, Any]: + return { + "partition": [encode_value(v) for v in self.partition], + "bucket": self.bucket, + "files": [f.to_dict() for f in self.files], + } + + @classmethod + def _from_payload(cls, payload: Dict[str, Any]) -> "AppendCompactTask": + return cls( + partition=tuple(decode_value(v) for v in payload.get("partition") or []), + bucket=payload["bucket"], + files=[DataFileMeta.from_dict(f) for f in payload.get("files") or []], + ) + + def _resolve_table(self): + if self._table is not None: + return self._table + return self._resolve_table_via_loader() diff --git a/paimon-python/pypaimon/compact/task/compact_task.py b/paimon-python/pypaimon/compact/task/compact_task.py new file mode 100644 index 000000000000..18790e7360cf --- /dev/null +++ b/paimon-python/pypaimon/compact/task/compact_task.py @@ -0,0 +1,136 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import json +from abc import ABC, abstractmethod +from typing import Any, Dict, Optional + +from pypaimon.write.commit_message import CommitMessage + + +class CompactTask(ABC): + """A self-contained compaction unit dispatched to a worker. + + Two operating modes: + - In-process (LocalExecutor): the driver attaches the FileStoreTable + via with_table(); the worker reuses it directly. + - Distributed (RayExecutor): the driver attaches a table loader spec + via with_table_loader(catalog_options, table_identifier); to_dict + ships those + the data payload across, and from_dict on the worker + rebuilds the table from the catalog. + + Subclasses implement _to_payload / _from_payload to add their own + fields on top of this base envelope (catalog_loader + table_identifier + are handled here once). + """ + + TYPE: str = "" + + # Distributed-execution loader spec, populated by CompactJob when an + # executor that can't share the in-process table is in use. + _catalog_loader_options: Optional[Dict[str, str]] = None + _table_identifier: Optional[str] = None + + @abstractmethod + def run(self) -> CommitMessage: + """Execute the compaction unit and return a CommitMessage. + + Subclasses should obtain their FileStoreTable via self._resolve_table() + rather than poking at the cached _table directly, so distributed and + local paths share the same retrieval logic. + """ + + def with_table_loader( + self, + catalog_options: Dict[str, str], + table_identifier: str, + ) -> "CompactTask": + """Attach the spec a distributed worker uses to rebuild this task's table.""" + self._catalog_loader_options = dict(catalog_options) + self._table_identifier = table_identifier + return self + + def _resolve_table_via_loader(self): + if not self._catalog_loader_options or not self._table_identifier: + raise RuntimeError( + f"{type(self).__name__} has no in-process table and no catalog loader; " + "the driver must call with_table() or with_table_loader() before " + "handing this task to an executor." + ) + # Lazy import keeps base task module decoupled from catalog code. + from pypaimon.catalog.catalog_factory import CatalogFactory + catalog = CatalogFactory.create(dict(self._catalog_loader_options)) + return catalog.get_table(self._table_identifier) + + def to_dict(self) -> Dict[str, Any]: + """Standard envelope; subclasses override _to_payload to add fields.""" + return { + "type": self.TYPE, + "catalog_options": self._catalog_loader_options, + "table_identifier": self._table_identifier, + "payload": self._to_payload(), + } + + @abstractmethod + def _to_payload(self) -> Dict[str, Any]: + """Subclass-specific data (partition / bucket / files / ...).""" + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "CompactTask": + task_type = data.get("type") + impl = _TASK_REGISTRY.get(task_type) + if impl is None: + raise ValueError(f"Unknown CompactTask type: {task_type}") + task = impl._from_payload(data.get("payload") or {}) + loader_opts = data.get("catalog_options") + identifier = data.get("table_identifier") + if loader_opts and identifier: + task.with_table_loader(loader_opts, identifier) + return task + + @classmethod + @abstractmethod + def _from_payload(cls, payload: Dict[str, Any]) -> "CompactTask": + """Construct a task from the subclass-specific payload only.""" + + def serialize(self) -> bytes: + return json.dumps(self.to_dict(), separators=(",", ":")).encode("utf-8") + + @classmethod + def deserialize(cls, payload: bytes) -> "CompactTask": + data = json.loads(payload.decode("utf-8")) + return cls.from_dict(data) + + +_TASK_REGISTRY: Dict[str, type] = {} + + +def register_compact_task(impl: type) -> type: + """Decorator to register a CompactTask subclass under its TYPE string. + + The registry powers CompactTask.deserialize() so the executor can route + payloads back to the correct subclass without a hard import. + """ + if not issubclass(impl, CompactTask): + raise TypeError(f"{impl} is not a CompactTask subclass") + if not impl.TYPE: + raise ValueError(f"{impl} must define a non-empty TYPE") + if impl.TYPE in _TASK_REGISTRY and _TASK_REGISTRY[impl.TYPE] is not impl: + raise ValueError(f"CompactTask TYPE {impl.TYPE!r} already registered") + _TASK_REGISTRY[impl.TYPE] = impl + return impl diff --git a/paimon-python/pypaimon/compact/task/merge_tree_compact_task.py b/paimon-python/pypaimon/compact/task/merge_tree_compact_task.py new file mode 100644 index 000000000000..feba025c0d74 --- /dev/null +++ b/paimon-python/pypaimon/compact/task/merge_tree_compact_task.py @@ -0,0 +1,117 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from typing import Any, Dict, List, Tuple + +from pypaimon.compact.rewriter.merge_tree_compact_rewriter import \ + MergeTreeCompactRewriter +from pypaimon.compact.task.compact_task import CompactTask, register_compact_task +from pypaimon.manifest.schema.data_file_meta import (DataFileMeta, decode_value, + encode_value) +from pypaimon.read.interval_partition import IntervalPartition +from pypaimon.read.reader.merge_function import \ + create_merge_function_factory +from pypaimon.write.commit_message import CommitMessage +from pypaimon.write.compact_increment import CompactIncrement + + +@register_compact_task +class MergeTreeCompactTask(CompactTask): + """Compact a single (partition, bucket) of a primary-key table. + + Carries the picked CompactUnit's files plus the strategy-decided + output_level and drop_delete flag. The driver attaches either the + in-process FileStoreTable (LocalExecutor) or a catalog loader spec via + with_table_loader (RayExecutor); _resolve_table picks whichever is set. + """ + + TYPE = "merge-tree-compact" + + def __init__( + self, + partition: Tuple, + bucket: int, + files: List[DataFileMeta], + output_level: int, + drop_delete: bool, + table=None, + ): + self.partition = tuple(partition) + self.bucket = bucket + self.files = list(files) + self.output_level = output_level + self.drop_delete = drop_delete + self._table = table + + def with_table(self, table) -> "MergeTreeCompactTask": + self._table = table + return self + + def run(self) -> CommitMessage: + table = self._resolve_table() + + # IntervalPartition reproduces split_read.MergeFileSplitRead.create_reader's + # section grouping so the rewriter sees the same "non-overlapping + # SortedRuns per section" layout it would on a normal scan. + sections = IntervalPartition(self.files).partition() + + rewriter = MergeTreeCompactRewriter( + table=table, + mf_factory=create_merge_function_factory(table.options), + ) + after = rewriter.rewrite( + partition=self.partition, + bucket=self.bucket, + output_level=self.output_level, + sections=sections, + drop_delete=self.drop_delete, + ) + + return CommitMessage( + partition=self.partition, + bucket=self.bucket, + total_buckets=table.total_buckets, + compact_increment=CompactIncrement( + compact_before=list(self.files), + compact_after=list(after), + ), + ) + + def _to_payload(self) -> Dict[str, Any]: + return { + "partition": [encode_value(v) for v in self.partition], + "bucket": self.bucket, + "files": [f.to_dict() for f in self.files], + "output_level": self.output_level, + "drop_delete": self.drop_delete, + } + + @classmethod + def _from_payload(cls, payload: Dict[str, Any]) -> "MergeTreeCompactTask": + return cls( + partition=tuple(decode_value(v) for v in payload.get("partition") or []), + bucket=payload["bucket"], + files=[DataFileMeta.from_dict(f) for f in payload.get("files") or []], + output_level=payload["output_level"], + drop_delete=payload["drop_delete"], + ) + + def _resolve_table(self): + if self._table is not None: + return self._table + return self._resolve_table_via_loader() diff --git a/paimon-python/pypaimon/manifest/schema/data_file_meta.py b/paimon-python/pypaimon/manifest/schema/data_file_meta.py index 870dde7aa7b8..7158304e737e 100644 --- a/paimon-python/pypaimon/manifest/schema/data_file_meta.py +++ b/paimon-python/pypaimon/manifest/schema/data_file_meta.py @@ -17,15 +17,19 @@ ################################################################################ from dataclasses import dataclass -from datetime import datetime -from typing import List, Optional +from datetime import date, datetime, time as dt_time +from decimal import Decimal +from base64 import b64decode, b64encode +from typing import Any, Dict, List, Optional import time from pypaimon.utils.range import Range from pypaimon.data.timestamp import Timestamp from pypaimon.manifest.schema.simple_stats import (KEY_STATS_SCHEMA, VALUE_STATS_SCHEMA, SimpleStats) +from pypaimon.schema.data_types import DataField from pypaimon.table.row.generic_row import GenericRow +from pypaimon.table.row.internal_row import RowKind from pypaimon.utils.file_store_path_factory import _is_null_or_whitespace_only @@ -224,6 +228,186 @@ def assign_sequence_number(self, min_sequence_number: int, max_sequence_number: file_path=self.file_path ) + def to_dict(self) -> Dict[str, Any]: + """Serialize to a JSON-friendly dict for cross-process transport (e.g. Ray task payloads). + + Field types preserved via tagged objects (see encode_value/decode_value). + """ + return { + "file_name": self.file_name, + "file_size": self.file_size, + "row_count": self.row_count, + "min_key": _generic_row_to_dict(self.min_key), + "max_key": _generic_row_to_dict(self.max_key), + "key_stats": _simple_stats_to_dict(self.key_stats), + "value_stats": _simple_stats_to_dict(self.value_stats), + "min_sequence_number": self.min_sequence_number, + "max_sequence_number": self.max_sequence_number, + "schema_id": self.schema_id, + "level": self.level, + "extra_files": list(self.extra_files) if self.extra_files is not None else [], + "creation_time": _timestamp_to_dict(self.creation_time), + "delete_row_count": self.delete_row_count, + "embedded_index": _bytes_to_str(self.embedded_index), + "file_source": self.file_source, + "value_stats_cols": list(self.value_stats_cols) if self.value_stats_cols is not None else None, + "external_path": self.external_path, + "first_row_id": self.first_row_id, + "write_cols": list(self.write_cols) if self.write_cols is not None else None, + "file_path": self.file_path, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "DataFileMeta": + return cls( + file_name=data["file_name"], + file_size=data["file_size"], + row_count=data["row_count"], + min_key=_generic_row_from_dict(data.get("min_key")), + max_key=_generic_row_from_dict(data.get("max_key")), + key_stats=_simple_stats_from_dict(data.get("key_stats")), + value_stats=_simple_stats_from_dict(data.get("value_stats")), + min_sequence_number=data["min_sequence_number"], + max_sequence_number=data["max_sequence_number"], + schema_id=data["schema_id"], + level=data["level"], + extra_files=list(data.get("extra_files") or []), + creation_time=_timestamp_from_dict(data.get("creation_time")), + delete_row_count=data.get("delete_row_count"), + embedded_index=_bytes_from_str(data.get("embedded_index")), + file_source=data.get("file_source"), + value_stats_cols=list(data["value_stats_cols"]) if data.get("value_stats_cols") is not None else None, + external_path=data.get("external_path"), + first_row_id=data.get("first_row_id"), + write_cols=list(data["write_cols"]) if data.get("write_cols") is not None else None, + file_path=data.get("file_path"), + ) + + +def _bytes_to_str(value: Optional[bytes]) -> Optional[str]: + if value is None: + return None + return b64encode(value).decode("ascii") + + +def _bytes_from_str(value: Optional[str]) -> Optional[bytes]: + if value is None: + return None + return b64decode(value.encode("ascii")) + + +def _timestamp_to_dict(ts: Optional[Timestamp]) -> Optional[Dict[str, int]]: + if ts is None: + return None + return {"ms": ts.get_millisecond(), "ns": ts.get_nano_of_millisecond()} + + +def _timestamp_from_dict(data: Optional[Dict[str, int]]) -> Optional[Timestamp]: + if data is None: + return None + return Timestamp(data["ms"], data.get("ns", 0)) + + +def encode_value(value: Any) -> Any: + """Encode a GenericRow / SimpleStats / partition field value into a JSON-friendly form. + + Tagged dicts mark non-JSON-native types so decode_value can round-trip them. + Public so that callers serializing other field-bearing structures (e.g. partitions + in CommitMessage) can reuse the same tagged encoding. + """ + if value is None or isinstance(value, (bool, int, float, str)): + return value + if isinstance(value, bytes): + return {"__t__": "bytes", "v": b64encode(value).decode("ascii")} + if isinstance(value, Decimal): + return {"__t__": "decimal", "v": str(value)} + if isinstance(value, Timestamp): + return {"__t__": "ts", "ms": value.get_millisecond(), "ns": value.get_nano_of_millisecond()} + if isinstance(value, datetime): + return {"__t__": "datetime", "v": value.isoformat()} + if isinstance(value, date): + return {"__t__": "date", "v": value.isoformat()} + if isinstance(value, dt_time): + return {"__t__": "time", "v": value.isoformat()} + raise TypeError( + f"Unsupported value type for DataFileMeta serialization: {type(value).__name__}" + ) + + +def decode_value(value: Any) -> Any: + if not isinstance(value, dict) or "__t__" not in value: + return value + tag = value["__t__"] + if tag == "bytes": + return b64decode(value["v"].encode("ascii")) + if tag == "decimal": + return Decimal(value["v"]) + if tag == "ts": + return Timestamp(value["ms"], value.get("ns", 0)) + if tag == "datetime": + return datetime.fromisoformat(value["v"]) + if tag == "date": + return date.fromisoformat(value["v"]) + if tag == "time": + return dt_time.fromisoformat(value["v"]) + raise ValueError(f"Unknown tagged value type: {tag}") + + +def _generic_row_to_dict(row) -> Optional[Dict[str, Any]]: + if row is None: + return None + # GenericRow exposes .values directly; BinaryRow lazily decodes per field + # via get_field(i). Normalize both into a list of decoded Python values + # so the dict format stays uniform. + if hasattr(row, "values"): + values = row.values + else: + values = [row.get_field(i) for i in range(len(row))] + fields = getattr(row, "fields", None) + return { + "values": [encode_value(v) for v in values], + "fields": [f.to_dict() for f in fields] if fields else [], + "row_kind": row.get_row_kind().value if hasattr(row, "get_row_kind") else 0, + } + + +def _generic_row_from_dict(data: Optional[Dict[str, Any]]) -> Optional[GenericRow]: + if data is None: + return None + fields = [DataField.from_dict(f) for f in data.get("fields", [])] + values = [decode_value(v) for v in data.get("values", [])] + row_kind = RowKind(data.get("row_kind", RowKind.INSERT.value)) + return GenericRow(values, fields, row_kind) + + +def _simple_stats_to_dict(stats: Optional[SimpleStats]) -> Optional[Dict[str, Any]]: + if stats is None: + return None + # null_counts may be a Python list (writer path) or a pyarrow Array-like + # (manifest reader path). Normalize to a plain list of ints. + nc = stats.null_counts + if nc is None: + null_counts = [] + elif hasattr(nc, "to_pylist"): + null_counts = nc.to_pylist() + else: + null_counts = list(nc) + return { + "min_values": _generic_row_to_dict(stats.min_values), + "max_values": _generic_row_to_dict(stats.max_values), + "null_counts": null_counts, + } + + +def _simple_stats_from_dict(data: Optional[Dict[str, Any]]) -> Optional[SimpleStats]: + if data is None: + return None + return SimpleStats( + min_values=_generic_row_from_dict(data.get("min_values")), + max_values=_generic_row_from_dict(data.get("max_values")), + null_counts=list(data.get("null_counts") or []), + ) + DATA_FILE_META_SCHEMA = { "type": "record", diff --git a/paimon-python/pypaimon/read/reader/merge_function.py b/paimon-python/pypaimon/read/reader/merge_function.py new file mode 100644 index 000000000000..9f6251d20019 --- /dev/null +++ b/paimon-python/pypaimon/read/reader/merge_function.py @@ -0,0 +1,155 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Merge functions for primary key reduction. + +A MergeFunction defines how multiple KeyValues sharing the same primary key +are reduced into one. SortMergeReader feeds a function with all KVs for a key +in ascending sequence order via reset()/add()*/get_result(), then moves on. + +Phase 3 ships the production DeduplicateMergeFunction (kept identical to the +prior in-line implementation in sort_merge_reader.py) and stubs for the other +three engines so tables tagged with those engines fail loudly instead of +silently producing wrong data. Phase 6 will fill in the stubs. +""" + +from abc import ABC, abstractmethod +from typing import Optional + +from pypaimon.common.options.core_options import CoreOptions, MergeEngine +from pypaimon.table.row.key_value import KeyValue + + +class MergeFunction(ABC): + """Reduces a sequence of KeyValues sharing the same primary key into one.""" + + @abstractmethod + def reset(self) -> None: + """Discard any state from the previous key.""" + + @abstractmethod + def add(self, kv: KeyValue) -> None: + """Accept the next KV for the current key (caller delivers in seq order).""" + + @abstractmethod + def get_result(self) -> Optional[KeyValue]: + """Return the merged value for the current key, or None to drop the row.""" + + +class MergeFunctionFactory(ABC): + """A factory exists per-engine because some engines (PartialUpdate / + Aggregate) build per-call instances bound to projected schemas.""" + + @abstractmethod + def create(self) -> MergeFunction: + """Return a fresh MergeFunction. Caller owns it for one merge pass.""" + + +class DeduplicateMergeFunction(MergeFunction): + """Keep the latest KV (highest sequence number) for each key. + + Because SortMergeReader hands KVs over in ascending sequence order, the + last one added is always the latest — no comparison needed here. + """ + + def __init__(self): + self.latest_kv: Optional[KeyValue] = None + + def reset(self) -> None: + self.latest_kv = None + + def add(self, kv: KeyValue) -> None: + self.latest_kv = kv + + def get_result(self) -> Optional[KeyValue]: + return self.latest_kv + + +class DeduplicateMergeFunctionFactory(MergeFunctionFactory): + def create(self) -> MergeFunction: + return DeduplicateMergeFunction() + + +# --- Stubs reserved for Phase 6 ---------------------------------------------- +# These exist so MergeFunctionFactory.create_for(options) can route every Java +# MergeEngine to a Python class today; tables tagged with these engines simply +# fail loudly instead of silently producing wrong results, and Phase 6 will +# fill in the bodies without changing any callers. + + +class _UnimplementedMergeFunction(MergeFunction): + engine_name = "" + + def reset(self) -> None: + raise NotImplementedError( + f"MergeEngine '{self.engine_name}' compaction is not implemented yet " + f"(planned for Phase 6)." + ) + + def add(self, kv: KeyValue) -> None: + raise NotImplementedError( + f"MergeEngine '{self.engine_name}' compaction is not implemented yet " + f"(planned for Phase 6)." + ) + + def get_result(self) -> Optional[KeyValue]: + raise NotImplementedError( + f"MergeEngine '{self.engine_name}' compaction is not implemented yet " + f"(planned for Phase 6)." + ) + + +class PartialUpdateMergeFunction(_UnimplementedMergeFunction): + engine_name = "partial-update" + + +class AggregateMergeFunction(_UnimplementedMergeFunction): + engine_name = "aggregation" + + +class FirstRowMergeFunction(_UnimplementedMergeFunction): + engine_name = "first-row" + + +class _UnimplementedFactory(MergeFunctionFactory): + def __init__(self, engine_name: str, impl_cls: type): + self.engine_name = engine_name + self.impl_cls = impl_cls + + def create(self) -> MergeFunction: + # Build the instance now so callers see the failure at the first call + # site they own, with the engine name in the traceback. + return self.impl_cls() + + +def create_merge_function_factory(options: CoreOptions) -> MergeFunctionFactory: + """Pick the correct factory for the table's configured merge engine. + + Unknown / unsupported engines raise here (rather than later inside the + rewriter) so the failure points back at the configuration directly. + """ + engine = options.merge_engine() + if engine == MergeEngine.DEDUPLICATE: + return DeduplicateMergeFunctionFactory() + if engine == MergeEngine.PARTIAL_UPDATE: + return _UnimplementedFactory("partial-update", PartialUpdateMergeFunction) + if engine == MergeEngine.AGGREGATE: + return _UnimplementedFactory("aggregation", AggregateMergeFunction) + if engine == MergeEngine.FIRST_ROW: + return _UnimplementedFactory("first-row", FirstRowMergeFunction) + raise ValueError(f"Unsupported MergeEngine: {engine!r}") diff --git a/paimon-python/pypaimon/read/reader/sort_merge_reader.py b/paimon-python/pypaimon/read/reader/sort_merge_reader.py index aedd593b702b..35d04cbd63a0 100644 --- a/paimon-python/pypaimon/read/reader/sort_merge_reader.py +++ b/paimon-python/pypaimon/read/reader/sort_merge_reader.py @@ -21,6 +21,8 @@ from pypaimon.read.reader.iface.record_iterator import RecordIterator from pypaimon.read.reader.iface.record_reader import RecordReader +from pypaimon.read.reader.merge_function import (DeduplicateMergeFunction, + MergeFunction) from pypaimon.schema.data_types import DataField, Keyword from pypaimon.schema.table_schema import TableSchema from pypaimon.table.row.internal_row import InternalRow @@ -28,11 +30,21 @@ class SortMergeReaderWithMinHeap(RecordReader): - """SortMergeReader implemented with min-heap.""" - - def __init__(self, readers: List[RecordReader[KeyValue]], schema: TableSchema): + """SortMergeReader implemented with min-heap. + + `merge_function` defaults to DeduplicateMergeFunction so the existing read + path is unchanged; compaction passes a factory-built instance to honor the + table's configured merge engine. + """ + + def __init__( + self, + readers: List[RecordReader[KeyValue]], + schema: TableSchema, + merge_function: Optional[MergeFunction] = None, + ): self.next_batch_readers = list(readers) - self.merge_function = DeduplicateMergeFunction() + self.merge_function = merge_function if merge_function is not None else DeduplicateMergeFunction() if schema.partition_keys: trimmed_primary_keys = [pk for pk in schema.primary_keys if pk not in schema.partition_keys] @@ -124,22 +136,6 @@ def _next_impl(self): return True -class DeduplicateMergeFunction: - """A MergeFunction where key is primary key (unique) and value is the full record, only keep the latest one.""" - - def __init__(self): - self.latest_kv = None - - def reset(self) -> None: - self.latest_kv = None - - def add(self, kv: KeyValue): - self.latest_kv = kv - - def get_result(self) -> Optional[KeyValue]: - return self.latest_kv - - class Element: def __init__(self, kv: KeyValue, iterator: RecordIterator[KeyValue], reader: RecordReader[KeyValue]): self.kv = kv diff --git a/paimon-python/pypaimon/read/split_read.py b/paimon-python/pypaimon/read/split_read.py index c88f49f3b065..6c604a86a17e 100644 --- a/paimon-python/pypaimon/read/split_read.py +++ b/paimon-python/pypaimon/read/split_read.py @@ -66,6 +66,31 @@ KEY_FIELD_ID_START = 1000000 NULL_FIELD_INDEX = -1 + +def build_kv_file_fields( + table_fields: List[DataField], + trimmed_primary_keys: List[str], + value_fields: List[DataField], +) -> List[DataField]: + """Build the on-disk KV file schema: [_KEY_pk*, _SEQUENCE_NUMBER, _VALUE_KIND, value_cols]. + + Centralizes the layout so that read (split_read) and write/compact paths + cannot drift. Field ids for key columns are derived from each PK field's + id offset by KEY_FIELD_ID_START (matches Java KeyValueFieldsExtractor). + """ + fields: List[DataField] = [] + for f in table_fields: + if f.name in trimmed_primary_keys: + fields.append(DataField( + f.id + KEY_FIELD_ID_START, + f"{KEY_PREFIX}{f.name}", + f.type, + )) + fields.append(SpecialFields.SEQUENCE_NUMBER) + fields.append(SpecialFields.VALUE_KIND) + fields.extend(value_fields) + return fields + _COMPRESS_EXTENSIONS = frozenset(['gz', 'bz2', 'deflate', 'snappy', 'lz4', 'zst']) @@ -281,23 +306,11 @@ def _get_read_data_fields(self): return read_data_fields def _create_key_value_fields(self, value_field: List[DataField]): - all_fields: List[DataField] = self.table.fields - all_data_fields = [] - - for field in all_fields: - if field.name in self.trimmed_primary_key: - key_field_name = f"{KEY_PREFIX}{field.name}" - key_field_id = field.id + KEY_FIELD_ID_START - key_field = DataField(key_field_id, key_field_name, field.type) - all_data_fields.append(key_field) - - all_data_fields.append(SpecialFields.SEQUENCE_NUMBER) - all_data_fields.append(SpecialFields.VALUE_KIND) - - for field in value_field: - all_data_fields.append(field) - - return all_data_fields + return build_kv_file_fields( + table_fields=self.table.fields, + trimmed_primary_keys=self.trimmed_primary_key, + value_fields=value_field, + ) def create_index_mapping(self): base_index_mapping = self._create_base_index_mapping(self.read_fields, self._get_read_data_fields()) diff --git a/paimon-python/pypaimon/table/file_store_table.py b/paimon-python/pypaimon/table/file_store_table.py index 4dadb234db2a..03838c539e07 100644 --- a/paimon-python/pypaimon/table/file_store_table.py +++ b/paimon-python/pypaimon/table/file_store_table.py @@ -366,6 +366,33 @@ def new_stream_read_builder(self) -> 'StreamReadBuilder': def new_batch_write_builder(self) -> BatchWriteBuilder: return BatchWriteBuilder(self) + def new_compact_job( + self, + compact_options=None, + executor=None, + partition_predicate=None, + commit_user: Optional[str] = None, + catalog_options=None, + table_identifier: Optional[str] = None, + ): + """Create a CompactJob bound to this table. + + Args mirror CompactJob — passed through so callers can construct + coordinators/executors elsewhere when they need cross-table sharing. + Pass catalog_options + table_identifier when using a distributed + executor (RayExecutor) so workers can rebuild the table. + """ + from pypaimon.compact.job.compact_job import CompactJob + return CompactJob( + table=self, + compact_options=compact_options, + executor=executor, + partition_predicate=partition_predicate, + commit_user=commit_user, + catalog_options=catalog_options, + table_identifier=table_identifier, + ) + def new_stream_write_builder(self) -> StreamWriteBuilder: return StreamWriteBuilder(self) diff --git a/paimon-python/pypaimon/table/row/key_value.py b/paimon-python/pypaimon/table/row/key_value.py index 22647c4b6d6b..41e32386882a 100644 --- a/paimon-python/pypaimon/table/row/key_value.py +++ b/paimon-python/pypaimon/table/row/key_value.py @@ -55,3 +55,12 @@ def sequence_number(self) -> int: @property def value_row_kind_byte(self) -> int: return self._row_tuple[self.key_arity + 1] + + @property + def row_tuple(self) -> tuple: + """The underlying physical row tuple (key_cols, seq, kind, value_cols). + + Compaction writers consume this verbatim when buffering KVs back into + a RecordBatch — the column order matches the on-disk KV file schema. + """ + return self._row_tuple diff --git a/paimon-python/pypaimon/tests/commit_message_serializer_test.py b/paimon-python/pypaimon/tests/commit_message_serializer_test.py new file mode 100644 index 000000000000..d41dcbe0520c --- /dev/null +++ b/paimon-python/pypaimon/tests/commit_message_serializer_test.py @@ -0,0 +1,228 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import unittest +from datetime import date, datetime, time as dt_time +from decimal import Decimal + +from pypaimon.data.timestamp import Timestamp +from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.manifest.schema.simple_stats import SimpleStats +from pypaimon.schema.data_types import AtomicType, DataField +from pypaimon.table.row.generic_row import GenericRow +from pypaimon.table.row.internal_row import RowKind +from pypaimon.write.commit_message import CommitMessage +from pypaimon.write.commit_message_serializer import CommitMessageSerializer +from pypaimon.write.compact_increment import CompactIncrement +from pypaimon.write.data_increment import DataIncrement + + +def _key_field(idx: int, name: str, type_str: str) -> DataField: + return DataField(idx, name, AtomicType(type_str)) + + +def _build_data_file_meta(file_name: str = "data-1.parquet") -> DataFileMeta: + pk_fields = [_key_field(0, "id", "BIGINT"), _key_field(1, "name", "STRING")] + min_key = GenericRow([1, "alice"], pk_fields) + max_key = GenericRow([99, "zoe"], pk_fields) + key_stats = SimpleStats( + min_values=GenericRow([1, "alice"], pk_fields), + max_values=GenericRow([99, "zoe"], pk_fields), + null_counts=[0, 0], + ) + value_stats = SimpleStats( + min_values=GenericRow([], []), + max_values=GenericRow([], []), + null_counts=[], + ) + return DataFileMeta.create( + file_name=file_name, + file_size=4096, + row_count=99, + min_key=min_key, + max_key=max_key, + key_stats=key_stats, + value_stats=value_stats, + min_sequence_number=10, + max_sequence_number=200, + schema_id=0, + level=0, + extra_files=["index-1.idx"], + creation_time=Timestamp.from_epoch_millis(1_700_000_000_000, 123_456), + delete_row_count=2, + embedded_index=b"\x00\x01\x02\x03embedded", + file_source=1, + value_stats_cols=["c1"], + external_path="oss://bucket/path/to/file", + first_row_id=1000, + write_cols=["id", "name"], + file_path="/abs/path/data-1.parquet", + ) + + +class DataFileMetaSerdeTest(unittest.TestCase): + + def test_to_from_dict_roundtrip(self): + original = _build_data_file_meta() + rebuilt = DataFileMeta.from_dict(original.to_dict()) + + self.assertEqual(original, rebuilt) + # spot check of complex sub-fields that use tagged encoding + self.assertEqual(original.embedded_index, rebuilt.embedded_index) + self.assertEqual(original.creation_time, rebuilt.creation_time) + self.assertEqual(original.min_key.values, rebuilt.min_key.values) + self.assertEqual(original.min_key.row_kind, rebuilt.min_key.row_kind) + self.assertEqual( + [f.to_dict() for f in original.min_key.fields], + [f.to_dict() for f in rebuilt.min_key.fields], + ) + + def test_value_encoding_supports_decimal_and_temporal_types(self): + fields = [ + _key_field(0, "amount", "DECIMAL(10, 2)"), + _key_field(1, "ts", "TIMESTAMP(6)"), + _key_field(2, "d", "DATE"), + _key_field(3, "t", "TIME"), + _key_field(4, "blob", "BYTES"), + ] + row = GenericRow( + values=[ + Decimal("12.34"), + datetime(2024, 1, 2, 3, 4, 5, 678901), + date(2024, 1, 2), + dt_time(13, 45, 30, 250000), + b"binary-payload", + ], + fields=fields, + row_kind=RowKind.UPDATE_AFTER, + ) + # Reuse the GenericRow encode path through SimpleStats + stats = SimpleStats(min_values=row, max_values=row, null_counts=[0, 0, 0, 0, 0]) + meta = _build_data_file_meta() + meta.key_stats = stats + + rebuilt = DataFileMeta.from_dict(meta.to_dict()) + + self.assertEqual(rebuilt.key_stats.min_values.values[0], Decimal("12.34")) + self.assertEqual(rebuilt.key_stats.min_values.values[1], datetime(2024, 1, 2, 3, 4, 5, 678901)) + self.assertEqual(rebuilt.key_stats.min_values.values[2], date(2024, 1, 2)) + self.assertEqual(rebuilt.key_stats.min_values.values[3], dt_time(13, 45, 30, 250000)) + self.assertEqual(rebuilt.key_stats.min_values.values[4], b"binary-payload") + self.assertEqual(rebuilt.key_stats.min_values.row_kind, RowKind.UPDATE_AFTER) + + +class CommitMessageSerializerTest(unittest.TestCase): + + def test_serialize_deserialize_roundtrip_for_compact_message(self): + before_files = [_build_data_file_meta(f"old-{i}.parquet") for i in range(3)] + after_files = [_build_data_file_meta("new-merged.parquet")] + message = CommitMessage( + partition=("2024-01-01", "us"), + bucket=2, + total_buckets=8, + compact_increment=CompactIncrement( + compact_before=before_files, + compact_after=after_files, + ), + check_from_snapshot=42, + ) + + payload = CommitMessageSerializer.serialize(message) + rebuilt = CommitMessageSerializer.deserialize(payload) + + self.assertIsInstance(payload, bytes) + self.assertEqual(message.partition, rebuilt.partition) + self.assertEqual(message.bucket, rebuilt.bucket) + self.assertEqual(message.total_buckets, rebuilt.total_buckets) + self.assertEqual(message.new_files, rebuilt.new_files) + self.assertEqual(message.compact_before, rebuilt.compact_before) + self.assertEqual(message.compact_after, rebuilt.compact_after) + self.assertEqual(message.check_from_snapshot, rebuilt.check_from_snapshot) + + def test_serialize_deserialize_roundtrip_for_append_message(self): + message = CommitMessage( + partition=(), + bucket=0, + data_increment=DataIncrement(new_files=[_build_data_file_meta("append-1.parquet")]), + ) + + rebuilt = CommitMessageSerializer.deserialize(CommitMessageSerializer.serialize(message)) + + self.assertEqual(message.partition, rebuilt.partition) + self.assertEqual(message.bucket, rebuilt.bucket) + self.assertEqual(message.new_files, rebuilt.new_files) + self.assertEqual([], rebuilt.compact_before) + self.assertEqual([], rebuilt.compact_after) + + def test_unsupported_version_is_rejected(self): + message = CommitMessage( + partition=(), + bucket=0, + data_increment=DataIncrement(new_files=[_build_data_file_meta()]), + ) + payload_dict = CommitMessageSerializer.to_dict(message) + payload_dict["version"] = CommitMessageSerializer.VERSION + 1 + + with self.assertRaises(ValueError): + CommitMessageSerializer.from_dict(payload_dict) + + def test_serialize_supports_partition_with_non_json_native_types(self): + # Partitions can carry DATE/DECIMAL/bytes columns; serializer must round-trip them. + message = CommitMessage( + partition=(date(2024, 1, 2), Decimal("99.50"), b"raw"), + bucket=0, + compact_increment=CompactIncrement(compact_after=[_build_data_file_meta()]), + ) + + rebuilt = CommitMessageSerializer.deserialize(CommitMessageSerializer.serialize(message)) + + self.assertEqual((date(2024, 1, 2), Decimal("99.50"), b"raw"), rebuilt.partition) + + def test_serialize_supports_timestamp_partition(self): + ts = Timestamp.from_epoch_millis(1_700_000_000_000, 500_000) + message = CommitMessage( + partition=(ts,), + bucket=0, + compact_increment=CompactIncrement(compact_after=[_build_data_file_meta()]), + ) + + rebuilt = CommitMessageSerializer.deserialize(CommitMessageSerializer.serialize(message)) + + self.assertEqual((ts,), rebuilt.partition) + + def test_serialize_list_round_trip(self): + messages = [ + CommitMessage( + partition=(f"p{i}",), + bucket=i, + data_increment=DataIncrement(new_files=[_build_data_file_meta(f"f{i}.parquet")]), + ) + for i in range(3) + ] + payloads = CommitMessageSerializer.serialize_list(messages) + rebuilt = CommitMessageSerializer.deserialize_list(payloads) + + self.assertEqual(len(messages), len(rebuilt)) + for original, copy in zip(messages, rebuilt): + self.assertEqual(original.partition, copy.partition) + self.assertEqual(original.bucket, copy.bucket) + self.assertEqual(original.new_files, copy.new_files) + + +if __name__ == "__main__": + unittest.main() diff --git a/paimon-python/pypaimon/tests/compact/__init__.py b/paimon-python/pypaimon/tests/compact/__init__.py new file mode 100644 index 000000000000..65b48d4d79b4 --- /dev/null +++ b/paimon-python/pypaimon/tests/compact/__init__.py @@ -0,0 +1,17 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ diff --git a/paimon-python/pypaimon/tests/compact/append_compact_coordinator_test.py b/paimon-python/pypaimon/tests/compact/append_compact_coordinator_test.py new file mode 100644 index 000000000000..66c1f3e8d145 --- /dev/null +++ b/paimon-python/pypaimon/tests/compact/append_compact_coordinator_test.py @@ -0,0 +1,163 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import os +import shutil +import tempfile +import unittest + +import pyarrow as pa + +from pypaimon import CatalogFactory, Schema +from pypaimon.compact.coordinator.append_compact_coordinator import \ + AppendCompactCoordinator +from pypaimon.compact.options import CompactOptions +from pypaimon.common.options.core_options import CoreOptions + + +class AppendCompactCoordinatorTest(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.temp_dir = tempfile.mkdtemp() + cls.warehouse = os.path.join(cls.temp_dir, "warehouse") + cls.catalog = CatalogFactory.create({"warehouse": cls.warehouse}) + cls.catalog.create_database("compact_db", False) + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.temp_dir, ignore_errors=True) + + def _create_unaware_table(self, table_name: str, options=None) -> "FileStoreTable": # noqa: F821 + full_name = f"compact_db.{table_name}" + try: + self.catalog.file_io.delete(self.catalog.get_table_path( + self.catalog.identifier_from_string(full_name) if hasattr( + self.catalog, "identifier_from_string") else None), recursive=True) + except Exception: + pass + # Force a small target_file_size so a few rows are already "small enough" + # to be candidates without writing thousands of rows per test. We also + # zero out source.split.open-file-cost so the size-based packer's bin + # accounting degenerates to raw file_size — keeps test assertions + # crisp instead of having to reason about a 4 MB per-file overhead + # dwarfing the 1 KB test files. + opts = { + CoreOptions.BUCKET.key(): "-1", + CoreOptions.TARGET_FILE_SIZE.key(): "1mb", + CoreOptions.SOURCE_SPLIT_OPEN_FILE_COST.key(): "0", + } + if options: + opts.update(options) + pa_schema = pa.schema([("id", pa.int32()), ("name", pa.string())]) + schema = Schema.from_pyarrow_schema(pa_schema, options=opts) + self.catalog.create_table(full_name, schema, True) + return self.catalog.get_table(full_name) + + def _write_n_files(self, table, n: int, rows_per_file: int = 5): + builder = table.new_batch_write_builder() + for i in range(n): + write = builder.new_write() + commit = builder.new_commit() + data = pa.Table.from_pydict( + { + "id": pa.array( + list(range(i * rows_per_file, (i + 1) * rows_per_file)), + type=pa.int32(), + ), + "name": [f"row-{j}" for j in range(rows_per_file)], + } + ) + write.write_arrow(data) + commit.commit(write.prepare_commit()) + write.close() + commit.close() + + def test_no_tasks_when_below_min_file_num(self): + table = self._create_unaware_table("below_min") + self._write_n_files(table, n=3) # default min_file_num=5 + # Re-fetch table so it sees the new snapshots. + table = self.catalog.get_table("compact_db.below_min") + + coordinator = AppendCompactCoordinator(table, CompactOptions(min_file_num=5)) + tasks = coordinator.plan() + + self.assertEqual(0, len(tasks), + "Coordinator should not plan when fewer than min_file_num small files exist") + + def test_one_task_when_threshold_met(self): + table = self._create_unaware_table("at_threshold") + self._write_n_files(table, n=6) + table = self.catalog.get_table("compact_db.at_threshold") + + coordinator = AppendCompactCoordinator(table, CompactOptions(min_file_num=5)) + tasks = coordinator.plan() + + self.assertEqual(1, len(tasks)) + self.assertEqual((), tasks[0].partition) + self.assertEqual(0, tasks[0].bucket) + self.assertGreaterEqual(len(tasks[0].files), 5) + + def test_full_compaction_overrides_threshold(self): + table = self._create_unaware_table("full_compact") + self._write_n_files(table, n=2) # well below min_file_num=5 + table = self.catalog.get_table("compact_db.full_compact") + + coordinator = AppendCompactCoordinator( + table, + CompactOptions(min_file_num=5, full_compaction=True), + ) + tasks = coordinator.plan() + + self.assertEqual(1, len(tasks), + "full_compaction should produce a task even below min_file_num") + self.assertEqual(2, len(tasks[0].files)) + + def test_many_small_files_pack_into_single_task(self): + # Real parquet files written here are ~1KB (well under the 1MB target + # set in setUp), so the size-based packer never reaches its drain + # threshold and emits a single trailing chunk containing every file. + table = self._create_unaware_table("packed_single") + self._write_n_files(table, n=12) + table = self.catalog.get_table("compact_db.packed_single") + + coordinator = AppendCompactCoordinator(table, CompactOptions(min_file_num=5)) + tasks = coordinator.plan() + + self.assertEqual(1, len(tasks)) + self.assertEqual(12, len(tasks[0].files)) + + def test_pk_table_rejected(self): + full_name = "compact_db.pk_rejected" + pa_schema = pa.schema([("id", pa.int32()), ("name", pa.string())]) + schema = Schema.from_pyarrow_schema( + pa_schema, + primary_keys=["id"], + options={CoreOptions.BUCKET.key(): "1"}, + ) + try: + self.catalog.create_table(full_name, schema, True) + except Exception: + pass + table = self.catalog.get_table(full_name) + with self.assertRaises(ValueError): + AppendCompactCoordinator(table) + + +if __name__ == "__main__": + unittest.main() diff --git a/paimon-python/pypaimon/tests/compact/append_compact_e2e_test.py b/paimon-python/pypaimon/tests/compact/append_compact_e2e_test.py new file mode 100644 index 000000000000..80261e1f1e34 --- /dev/null +++ b/paimon-python/pypaimon/tests/compact/append_compact_e2e_test.py @@ -0,0 +1,188 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import os +import shutil +import tempfile +import unittest + +import pyarrow as pa + +from pypaimon import CatalogFactory, Schema +from pypaimon.common.options.core_options import CoreOptions +from pypaimon.compact.options import CompactOptions + + +class AppendCompactE2ETest(unittest.TestCase): + """End-to-end test: write many small files, run a CompactJob, verify + the table reads back identical data with fewer underlying files and a + new snapshot tagged commit_kind=COMPACT. + """ + + @classmethod + def setUpClass(cls): + cls.temp_dir = tempfile.mkdtemp() + cls.warehouse = os.path.join(cls.temp_dir, "warehouse") + cls.catalog = CatalogFactory.create({"warehouse": cls.warehouse}) + cls.catalog.create_database("e2e_db", False) + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.temp_dir, ignore_errors=True) + + def _make_table(self, name: str, partitioned: bool = False): + full = f"e2e_db.{name}" + opts = { + CoreOptions.BUCKET.key(): "-1", # unaware bucket + CoreOptions.TARGET_FILE_SIZE.key(): "10mb", # plenty of headroom for small writes + # Zero open-file-cost so the size-based packer doesn't drain + # mid-loop on these tiny test files (each ~1 KB; with the 4 MB + # default cost a couple of files would already weigh more than + # 2x target and trigger a premature drain). + CoreOptions.SOURCE_SPLIT_OPEN_FILE_COST.key(): "0", + } + if partitioned: + pa_schema = pa.schema([ + ("id", pa.int32()), + ("name", pa.string()), + ("dt", pa.string()), + ]) + schema = Schema.from_pyarrow_schema( + pa_schema, partition_keys=["dt"], options=opts, + ) + else: + pa_schema = pa.schema([ + ("id", pa.int32()), + ("name", pa.string()), + ]) + schema = Schema.from_pyarrow_schema(pa_schema, options=opts) + self.catalog.create_table(full, schema, True) + return self.catalog.get_table(full) + + def _write_one(self, table, batch: pa.Table): + builder = table.new_batch_write_builder() + write = builder.new_write() + commit = builder.new_commit() + write.write_arrow(batch) + commit.commit(write.prepare_commit()) + write.close() + commit.close() + + def _read_sorted(self, table, sort_col: str = "id") -> pa.Table: + rb = table.new_read_builder() + scan = rb.new_scan() + splits = scan.plan().splits() + return rb.new_read().to_arrow(splits).sort_by(sort_col) + + def _count_live_files(self, table) -> int: + from pypaimon.read.scanner.file_scanner import FileScanner + from pypaimon.manifest.manifest_list_manager import ManifestListManager + snapshot = table.snapshot_manager().get_latest_snapshot() + if snapshot is None: + return 0 + mlm = ManifestListManager(table) + + def manifest_scanner(): + return mlm.read_all(snapshot), snapshot + return len(FileScanner(table, manifest_scanner).plan_files()) + + def test_unpartitioned_compact_reduces_file_count_and_preserves_data(self): + table = self._make_table("flat") + + rows_per_write = 4 + n_writes = 6 + for i in range(n_writes): + self._write_one(table, pa.Table.from_pydict({ + "id": pa.array( + list(range(i * rows_per_write, (i + 1) * rows_per_write)), + type=pa.int32(), + ), + "name": [f"r-{j}" for j in range(rows_per_write)], + })) + + table = self.catalog.get_table("e2e_db.flat") + before_files = self._count_live_files(table) + self.assertGreaterEqual(before_files, n_writes, + "Each write should leave at least one file") + before_data = self._read_sorted(table) + + job = table.new_compact_job(compact_options=CompactOptions(min_file_num=5)) + messages = job.execute() + + self.assertEqual(1, len(messages), + "Single (partition, bucket) → single CommitMessage") + msg = messages[0] + self.assertEqual(n_writes, len(msg.compact_before), + "All n_writes small files should have been picked up") + self.assertGreaterEqual(len(msg.compact_after), 1) + + table = self.catalog.get_table("e2e_db.flat") + after_files = self._count_live_files(table) + self.assertLess(after_files, before_files, + f"Compact must reduce live file count ({before_files} → {after_files})") + + after_data = self._read_sorted(table) + self.assertEqual(before_data, after_data, + "Compact must preserve data identity") + + latest = table.snapshot_manager().get_latest_snapshot() + self.assertEqual("COMPACT", latest.commit_kind) + + def test_partitioned_compact_emits_per_partition_messages(self): + table = self._make_table("partitioned", partitioned=True) + for partition in ["p1", "p2"]: + for i in range(5): + self._write_one(table, pa.Table.from_pydict({ + "id": pa.array([i * 10 + k for k in range(3)], type=pa.int32()), + "name": [f"x-{k}" for k in range(3)], + "dt": [partition] * 3, + })) + + table = self.catalog.get_table("e2e_db.partitioned") + messages = table.new_compact_job( + compact_options=CompactOptions(min_file_num=5), + ).execute() + + partitions = sorted(m.partition for m in messages) + self.assertEqual([("p1",), ("p2",)], partitions) + for m in messages: + self.assertEqual(5, len(m.compact_before)) + self.assertGreaterEqual(len(m.compact_after), 1) + + def test_no_op_when_nothing_to_compact(self): + table = self._make_table("noop") + # Only 2 writes — below default min_file_num. + for i in range(2): + self._write_one(table, pa.Table.from_pydict({ + "id": pa.array([i], type=pa.int32()), + "name": [f"x-{i}"], + })) + table = self.catalog.get_table("e2e_db.noop") + snapshot_before = table.snapshot_manager().get_latest_snapshot().id + + messages = table.new_compact_job().execute() + + self.assertEqual([], messages) + table = self.catalog.get_table("e2e_db.noop") + self.assertEqual(snapshot_before, + table.snapshot_manager().get_latest_snapshot().id, + "No-op compact must not produce a new snapshot") + + +if __name__ == "__main__": + unittest.main() diff --git a/paimon-python/pypaimon/tests/compact/append_compact_packing_test.py b/paimon-python/pypaimon/tests/compact/append_compact_packing_test.py new file mode 100644 index 000000000000..4d68e5b9f6f1 --- /dev/null +++ b/paimon-python/pypaimon/tests/compact/append_compact_packing_test.py @@ -0,0 +1,164 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Pure-algorithm tests for AppendCompactCoordinator._pick_files_for_bucket. + +Drives the bin-packer with hand-built DataFileMeta lists so the size-based +packing logic can be verified independently of the storage layer. Mirrors +the test cases in Java AppendCompactCoordinatorTest's pack() coverage so +divergence shows up here first. +""" + +import unittest +from datetime import datetime +from typing import List + +from pypaimon.compact.coordinator.append_compact_coordinator import \ + AppendCompactCoordinator +from pypaimon.compact.options import CompactOptions +from pypaimon.data.timestamp import Timestamp +from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.manifest.schema.simple_stats import SimpleStats +from pypaimon.table.row.generic_row import GenericRow + + +def _file(name: str, size: int) -> DataFileMeta: + return DataFileMeta.create( + file_name=name, + file_size=size, + row_count=10, + min_key=GenericRow([], []), + max_key=GenericRow([], []), + key_stats=SimpleStats.empty_stats(), + value_stats=SimpleStats.empty_stats(), + min_sequence_number=0, + max_sequence_number=10, + schema_id=0, + level=0, + extra_files=[], + creation_time=Timestamp.from_local_date_time(datetime(2024, 1, 1)), + ) + + +def _make_coord(min_file_num: int = 5, full: bool = False) -> AppendCompactCoordinator: + """Bypass __init__ — we never touch the table here, only call the pure helper.""" + coord = AppendCompactCoordinator.__new__(AppendCompactCoordinator) + coord.options = CompactOptions(min_file_num=min_file_num, full_compaction=full) + return coord + + +# Defaults matching Java's AppendCompactCoordinator +TARGET = 128 * 1024 * 1024 # 128 MB +OPEN_COST = 4 * 1024 * 1024 # 4 MB (Java's source.split.open-file-cost default) + + +class PickFilesAlgorithmTest(unittest.TestCase): + + def test_skips_files_at_or_above_target_size(self): + coord = _make_coord(min_file_num=2) + files = [_file("big-1", TARGET), _file("big-2", TARGET + 1)] + self.assertEqual([], coord._pick_files_for_bucket(files, TARGET, OPEN_COST)) + + def test_drains_bin_when_weighted_size_reaches_2x_target(self): + # 6 files of 50 MB each → weighted = 6*(50+4) = 324 MB; target*2 = 256 MB. + # Sorted ascending by size (all equal here). Walk: + # after 1 file: bin_size=54, count=1 → skip drain (bin must have >1) + # after 2 files: bin_size=108, count=2 → 108 < 256 → keep + # after 3 files: bin_size=162 → keep + # after 4 files: bin_size=216 → keep + # after 5 files: bin_size=270 → drain ✅ (chunk[0] = 5 files) + # after 6 files: bin_size=54, count=1 → tail < min_file_num=2 here + # → 1 chunk, 5 files; trailing 1 file dropped (< min_file_num=2) + coord = _make_coord(min_file_num=2) + files = [_file(f"f{i}", 50 * 1024 * 1024) for i in range(6)] + chunks = coord._pick_files_for_bucket(files, TARGET, OPEN_COST) + + self.assertEqual(1, len(chunks)) + self.assertEqual(5, len(chunks[0])) + + def test_trailing_bin_emitted_when_meets_min_file_num(self): + # 5 small files: each 10 MB. Weighted: 5*(10+4)=70 MB. Below 256 MB + # threshold → never drains mid-loop. Trailing bin has 5 files which + # equals min_file_num=5 → emitted as the only chunk. + coord = _make_coord(min_file_num=5) + files = [_file(f"f{i}", 10 * 1024 * 1024) for i in range(5)] + chunks = coord._pick_files_for_bucket(files, TARGET, OPEN_COST) + + self.assertEqual(1, len(chunks)) + self.assertEqual(5, len(chunks[0])) + + def test_trailing_bin_dropped_when_below_min_file_num(self): + coord = _make_coord(min_file_num=5) + files = [_file(f"f{i}", 10 * 1024 * 1024) for i in range(4)] + self.assertEqual([], coord._pick_files_for_bucket(files, TARGET, OPEN_COST)) + + def test_sort_by_size_ascending_lets_small_files_lead(self): + # The size-asc sort means small files accumulate first, and a single + # big file lands in the bin only once it would push past the threshold. + coord = _make_coord(min_file_num=2) + small = [_file(f"s{i}", 1 * 1024 * 1024) for i in range(3)] + large = [_file("L", 120 * 1024 * 1024)] + chunks = coord._pick_files_for_bucket(small + large, TARGET, OPEN_COST) + # After 3 smalls: weighted = 3*(1+4) = 15 MB → no drain. + # Add large: weighted = 15 + (120+4) = 139 MB; still < 256 → no drain. + # End of loop: trailing bin has 4 files → meets min_file_num=2 → emitted. + # First file in chunk should be a small one (lowest size). + self.assertEqual(1, len(chunks)) + self.assertEqual(4, len(chunks[0])) + self.assertLess(chunks[0][0].file_size, chunks[0][-1].file_size) + + def test_full_compaction_includes_files_at_target_size_and_emits_short_tails(self): + # Mix of two oversized files plus one tiny file. Without full_compaction + # the oversized files are filtered out; with full_compaction they count + # AND the trailing-bin minimum drops to 1 so even a single-file chunk + # is emitted. + coord = _make_coord(min_file_num=5, full=True) + files = [_file("big-1", TARGET), _file("big-2", TARGET + 1)] + chunks = coord._pick_files_for_bucket(files, TARGET, OPEN_COST) + # weighted = (TARGET+OPEN) + (TARGET+1+OPEN) > 2*TARGET → drain after 2nd file. + self.assertEqual(1, len(chunks)) + self.assertEqual(2, len(chunks[0])) + + def test_full_compaction_single_file_emits(self): + coord = _make_coord(min_file_num=5, full=True) + files = [_file("only", 1024)] + chunks = coord._pick_files_for_bucket(files, TARGET, OPEN_COST) + self.assertEqual(1, len(chunks)) + self.assertEqual(1, len(chunks[0])) + + def test_open_file_cost_pulls_drain_forward_for_many_tiny_files(self): + # 80 tiny 100KB files. Without open_file_cost weighting, weighted size + # is ~8 MB total — far below 256 MB threshold, never drains, all 80 + # land in one task. With open_file_cost=4 MB Java-style: weighted per + # file ≈ 4 MB, so drain triggers around bin size 64 (= 256/4) files. + coord = _make_coord(min_file_num=2) + files = [_file(f"t{i}", 100 * 1024) for i in range(80)] + chunks = coord._pick_files_for_bucket(files, TARGET, OPEN_COST) + self.assertGreaterEqual(len(chunks), 2, + "open_file_cost must split a 'many tiny files' bucket") + # Every emitted bin should hold > 1 file (the > 1 guard in the loop). + for c in chunks: + self.assertGreater(len(c), 1) + + def test_empty_input_returns_empty(self): + coord = _make_coord() + self.assertEqual([], coord._pick_files_for_bucket([], TARGET, OPEN_COST)) + + +if __name__ == "__main__": + unittest.main() diff --git a/paimon-python/pypaimon/tests/compact/append_compact_rewriter_test.py b/paimon-python/pypaimon/tests/compact/append_compact_rewriter_test.py new file mode 100644 index 000000000000..e2c012d9c6ec --- /dev/null +++ b/paimon-python/pypaimon/tests/compact/append_compact_rewriter_test.py @@ -0,0 +1,176 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import os +import shutil +import tempfile +import unittest +from unittest.mock import patch + +import pyarrow as pa + +from pypaimon import CatalogFactory, Schema +from pypaimon.common.options.core_options import CoreOptions +from pypaimon.compact.coordinator.append_compact_coordinator import \ + AppendCompactCoordinator +from pypaimon.compact.options import CompactOptions +from pypaimon.compact.rewriter.append_compact_rewriter import \ + AppendCompactRewriter + + +class AppendCompactRewriterTest(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.temp_dir = tempfile.mkdtemp() + cls.warehouse = os.path.join(cls.temp_dir, "warehouse") + cls.catalog = CatalogFactory.create({"warehouse": cls.warehouse}) + cls.catalog.create_database("rw_db", False) + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.temp_dir, ignore_errors=True) + + def _make_unaware_table(self, name: str): + full = f"rw_db.{name}" + opts = { + CoreOptions.BUCKET.key(): "-1", + CoreOptions.TARGET_FILE_SIZE.key(): "10mb", + CoreOptions.SOURCE_SPLIT_OPEN_FILE_COST.key(): "0", + } + pa_schema = pa.schema([("id", pa.int32()), ("name", pa.string())]) + schema = Schema.from_pyarrow_schema(pa_schema, options=opts) + self.catalog.create_table(full, schema, True) + return self.catalog.get_table(full) + + def _write_n(self, table, n: int): + builder = table.new_batch_write_builder() + for i in range(n): + w = builder.new_write() + c = builder.new_commit() + data = pa.Table.from_pydict({ + "id": pa.array([i], type=pa.int32()), + "name": [f"row-{i}"], + }) + w.write_arrow(data) + c.commit(w.prepare_commit()) + w.close() + c.close() + + def test_does_not_mutate_input_metadata(self): + table = self._make_unaware_table("no_mutate") + self._write_n(table, n=5) + table = self.catalog.get_table("rw_db.no_mutate") + coord = AppendCompactCoordinator(table, CompactOptions(min_file_num=5)) + tasks = coord.plan() + self.assertEqual(1, len(tasks)) + files = tasks[0].files + + original_paths = [f.file_path for f in files] + self.assertTrue(all(p is None for p in original_paths), + "Coordinator should hand off manifest entries with file_path=None") + + rewriter = AppendCompactRewriter(table) + rewriter.rewrite(tasks[0].partition, tasks[0].bucket, files) + + # Rewriter must not write file_path back onto manifest-owned objects. + self.assertEqual(original_paths, [f.file_path for f in files]) + + def test_output_seq_range_starts_at_input0_min_seq_and_spans_total_rows(self): + # Mirrors Java BaseAppendFileStoreWrite.compactRewrite seeding the + # rolling writer's counter with toCompact.get(0).minSequenceNumber() + # and bumping it once per row written. After compact, the union of + # output [min_seq, max_seq] ranges must be: + # [files[0].min_seq, files[0].min_seq + total_input_rows - 1] + # contiguous, no gaps, no overlap. + table = self._make_unaware_table("seq_range") + self._write_n(table, n=5) # 5 files, 1 row each → 5 rows total + table = self.catalog.get_table("rw_db.seq_range") + coord = AppendCompactCoordinator(table, CompactOptions(min_file_num=5)) + tasks = coord.plan() + self.assertEqual(1, len(tasks)) + + # files arrive size-asc-sorted; in unaware tables every write seeded + # at seq=0, so files[0].min_sequence_number is also 0 here. + seed = tasks[0].files[0].min_sequence_number + total_rows = sum(f.row_count for f in tasks[0].files) + + rewriter = AppendCompactRewriter(table) + new_files = rewriter.rewrite(tasks[0].partition, tasks[0].bucket, list(tasks[0].files)) + + self.assertGreater(len(new_files), 0) + self.assertEqual(total_rows, sum(f.row_count for f in new_files), + "Compact must preserve total row count") + # Per-file invariant: max - min + 1 == row_count (the rolling writer + # advanced exactly once per row in this file's slice). + for f in new_files: + self.assertEqual(f.row_count, f.max_sequence_number - f.min_sequence_number + 1, + f"file {f.file_name} seq range must match its row_count") + # Cross-file invariant: starts at seed, no gaps/overlaps when sorted. + sorted_out = sorted(new_files, key=lambda f: f.min_sequence_number) + self.assertEqual(seed, sorted_out[0].min_sequence_number) + for prev, curr in zip(sorted_out, sorted_out[1:]): + self.assertEqual(prev.max_sequence_number + 1, curr.min_sequence_number, + "Adjacent output files must form a contiguous seq range") + # Total upper bound matches Java: seed + total_rows - 1. + self.assertEqual(seed + total_rows - 1, sorted_out[-1].max_sequence_number) + + def test_output_files_tagged_compact_source(self): + from pypaimon.compact.rewriter.merge_tree_rolling_writer import \ + FILE_SOURCE_COMPACT + table = self._make_unaware_table("source_tag") + self._write_n(table, n=5) + table = self.catalog.get_table("rw_db.source_tag") + coord = AppendCompactCoordinator(table, CompactOptions(min_file_num=5)) + tasks = coord.plan() + + new_files = AppendCompactRewriter(table).rewrite( + tasks[0].partition, tasks[0].bucket, list(tasks[0].files), + ) + self.assertGreater(len(new_files), 0) + for f in new_files: + self.assertEqual(FILE_SOURCE_COMPACT, f.file_source) + + def test_aborts_partial_output_on_failure(self): + table = self._make_unaware_table("abort_on_failure") + self._write_n(table, n=5) + table = self.catalog.get_table("rw_db.abort_on_failure") + coord = AppendCompactCoordinator(table, CompactOptions(min_file_num=5)) + tasks = coord.plan() + self.assertEqual(1, len(tasks)) + + rewriter = AppendCompactRewriter(table) + # Force AppendOnlyDataWriter.prepare_commit to blow up after some + # batches have already been buffered/flushed; rewriter must abort + # those outputs rather than leave them on disk. + with patch( + "pypaimon.write.writer.append_only_data_writer.AppendOnlyDataWriter.prepare_commit", + side_effect=RuntimeError("boom"), + ): + with self.assertRaises(RuntimeError): + rewriter.rewrite(tasks[0].partition, tasks[0].bucket, list(tasks[0].files)) + + # Snapshot id should not have advanced (no successful commit happened), + # and no new compaction snapshot should exist. + latest = table.snapshot_manager().get_latest_snapshot() + self.assertNotEqual("COMPACT", latest.commit_kind, + "Failed compaction must not produce a COMPACT snapshot") + + +if __name__ == "__main__": + unittest.main() diff --git a/paimon-python/pypaimon/tests/compact/compact_options_test.py b/paimon-python/pypaimon/tests/compact/compact_options_test.py new file mode 100644 index 000000000000..32ae7b8422d7 --- /dev/null +++ b/paimon-python/pypaimon/tests/compact/compact_options_test.py @@ -0,0 +1,45 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import unittest + +from pypaimon.compact.options import CompactOptions + + +class CompactOptionsTest(unittest.TestCase): + + def test_defaults(self): + opts = CompactOptions() + self.assertEqual(5, opts.min_file_num) + self.assertFalse(opts.full_compaction) + + def test_min_zero_rejected(self): + with self.assertRaises(ValueError): + CompactOptions(min_file_num=0) + + def test_to_from_dict_roundtrip(self): + opts = CompactOptions(min_file_num=2, full_compaction=True) + rebuilt = CompactOptions.from_dict(opts.to_dict()) + self.assertEqual(opts, rebuilt) + + def test_from_dict_none_returns_defaults(self): + self.assertEqual(CompactOptions(), CompactOptions.from_dict(None)) + + +if __name__ == "__main__": + unittest.main() diff --git a/paimon-python/pypaimon/tests/compact/compact_task_serde_test.py b/paimon-python/pypaimon/tests/compact/compact_task_serde_test.py new file mode 100644 index 000000000000..27008053019e --- /dev/null +++ b/paimon-python/pypaimon/tests/compact/compact_task_serde_test.py @@ -0,0 +1,125 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import unittest +from datetime import date, datetime + +from pypaimon.compact.task.append_compact_task import AppendCompactTask +from pypaimon.compact.task.compact_task import CompactTask +from pypaimon.compact.task.merge_tree_compact_task import MergeTreeCompactTask +from pypaimon.data.timestamp import Timestamp +from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.manifest.schema.simple_stats import SimpleStats +from pypaimon.schema.data_types import AtomicType, DataField +from pypaimon.table.row.generic_row import GenericRow + +PK_FIELDS = [DataField(0, "id", AtomicType("BIGINT"))] + + +def _make_file(name: str = "data-1.parquet") -> DataFileMeta: + return DataFileMeta.create( + file_name=name, + file_size=4096, + row_count=10, + min_key=GenericRow([1], PK_FIELDS), + max_key=GenericRow([99], PK_FIELDS), + key_stats=SimpleStats.empty_stats(), + value_stats=SimpleStats.empty_stats(), + min_sequence_number=10, + max_sequence_number=20, + schema_id=0, + level=0, + extra_files=[], + creation_time=Timestamp.from_epoch_millis(1_700_000_000_000), + ) + + +class AppendCompactTaskSerdeTest(unittest.TestCase): + + def test_round_trip_with_loader(self): + original = AppendCompactTask( + partition=("p1",), + bucket=2, + files=[_make_file("a.parquet"), _make_file("b.parquet")], + ).with_table_loader({"warehouse": "/tmp/wh"}, "default.t") + + rebuilt = CompactTask.deserialize(original.serialize()) + + self.assertIsInstance(rebuilt, AppendCompactTask) + self.assertEqual(("p1",), rebuilt.partition) + self.assertEqual(2, rebuilt.bucket) + self.assertEqual(2, len(rebuilt.files)) + self.assertEqual(["a.parquet", "b.parquet"], [f.file_name for f in rebuilt.files]) + self.assertEqual({"warehouse": "/tmp/wh"}, rebuilt._catalog_loader_options) + self.assertEqual("default.t", rebuilt._table_identifier) + + def test_partition_with_non_json_native_types_round_trips(self): + original = AppendCompactTask( + partition=(date(2024, 1, 2), datetime(2024, 1, 2, 3, 4)), + bucket=0, + files=[_make_file()], + ).with_table_loader({"warehouse": "/tmp/wh"}, "default.t") + + rebuilt = CompactTask.deserialize(original.serialize()) + + self.assertEqual((date(2024, 1, 2), datetime(2024, 1, 2, 3, 4)), rebuilt.partition) + + +class MergeTreeCompactTaskSerdeTest(unittest.TestCase): + + def test_round_trip_includes_output_level_and_drop_delete(self): + original = MergeTreeCompactTask( + partition=("p1",), + bucket=0, + files=[_make_file("merge-a.parquet"), _make_file("merge-b.parquet")], + output_level=3, + drop_delete=True, + ).with_table_loader({"warehouse": "/tmp/wh"}, "default.pk") + + rebuilt = CompactTask.deserialize(original.serialize()) + + self.assertIsInstance(rebuilt, MergeTreeCompactTask) + self.assertEqual(("p1",), rebuilt.partition) + self.assertEqual(0, rebuilt.bucket) + self.assertEqual(3, rebuilt.output_level) + self.assertTrue(rebuilt.drop_delete) + self.assertEqual(2, len(rebuilt.files)) + self.assertEqual({"warehouse": "/tmp/wh"}, rebuilt._catalog_loader_options) + self.assertEqual("default.pk", rebuilt._table_identifier) + + def test_run_without_table_or_loader_raises_clear_error(self): + task = MergeTreeCompactTask( + partition=("p1",), + bucket=0, + files=[_make_file()], + output_level=2, + drop_delete=False, + ) + with self.assertRaises(RuntimeError): + task.run() + + +class CompactTaskRegistryTest(unittest.TestCase): + + def test_unknown_type_rejected(self): + with self.assertRaises(ValueError): + CompactTask.from_dict({"type": "bogus", "payload": {}}) + + +if __name__ == "__main__": + unittest.main() diff --git a/paimon-python/pypaimon/tests/compact/levels_test.py b/paimon-python/pypaimon/tests/compact/levels_test.py new file mode 100644 index 000000000000..a29b6a5c52b5 --- /dev/null +++ b/paimon-python/pypaimon/tests/compact/levels_test.py @@ -0,0 +1,127 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import unittest +from datetime import datetime +from typing import List + +from pypaimon.compact.levels import Levels +from pypaimon.data.timestamp import Timestamp +from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.manifest.schema.simple_stats import SimpleStats +from pypaimon.schema.data_types import AtomicType, DataField +from pypaimon.table.row.generic_row import GenericRow + +PK_FIELDS = [DataField(0, "id", AtomicType("BIGINT"))] + + +def _key(v: int) -> GenericRow: + return GenericRow([v], PK_FIELDS) + + +def _file(name: str, level: int, *, min_k: int, max_k: int, + min_seq: int, max_seq: int, file_size: int = 1024, + ts_ms: int = 1_700_000_000_000) -> DataFileMeta: + return DataFileMeta.create( + file_name=name, + file_size=file_size, + row_count=10, + min_key=_key(min_k), + max_key=_key(max_k), + key_stats=SimpleStats.empty_stats(), + value_stats=SimpleStats.empty_stats(), + min_sequence_number=min_seq, + max_sequence_number=max_seq, + schema_id=0, + level=level, + extra_files=[], + creation_time=Timestamp.from_epoch_millis(ts_ms), + ) + + +def _key_cmp(a: GenericRow, b: GenericRow) -> int: + av = a.values[0] + bv = b.values[0] + return -1 if av < bv else (1 if av > bv else 0) + + +class LevelsTest(unittest.TestCase): + + def test_level0_orders_newest_first(self): + files: List[DataFileMeta] = [ + _file("f1", 0, min_k=1, max_k=2, min_seq=10, max_seq=20), + _file("f2", 0, min_k=3, max_k=4, min_seq=30, max_seq=40), + _file("f3", 0, min_k=5, max_k=6, min_seq=50, max_seq=60), + ] + levels = Levels(_key_cmp, files, num_levels=3) + + ordered = levels.level0 + self.assertEqual(["f3", "f2", "f1"], [f.file_name for f in ordered]) + + def test_number_of_sorted_runs_counts_l0_files_plus_nonempty_levels(self): + files = [ + _file("a", 0, min_k=1, max_k=2, min_seq=10, max_seq=20), + _file("b", 0, min_k=3, max_k=4, min_seq=30, max_seq=40), + _file("c", 1, min_k=5, max_k=8, min_seq=50, max_seq=60), + _file("d", 1, min_k=9, max_k=12, min_seq=70, max_seq=80), + _file("e", 3, min_k=20, max_k=30, min_seq=90, max_seq=100), + ] + levels = Levels(_key_cmp, files, num_levels=5) + + # L0 has 2 files (=2 runs), L1 has 1 SortedRun, L3 has 1 SortedRun → 4 + self.assertEqual(4, levels.number_of_sorted_runs()) + self.assertEqual(3, levels.non_empty_highest_level()) + + def test_levels_grow_to_accommodate_input_above_declared_num_levels(self): + files = [_file("z", 7, min_k=1, max_k=2, min_seq=10, max_seq=20)] + # Declare 3 but the file is at level 7 — Levels must expand. + levels = Levels(_key_cmp, files, num_levels=3) + self.assertEqual(8, levels.number_of_levels()) # levels 0..7 + self.assertEqual(7, levels.non_empty_highest_level()) + + def test_update_replaces_files_at_their_levels(self): + a = _file("a", 0, min_k=1, max_k=2, min_seq=10, max_seq=20) + b = _file("b", 0, min_k=3, max_k=4, min_seq=30, max_seq=40) + c = _file("c", 2, min_k=5, max_k=6, min_seq=50, max_seq=60) + levels = Levels(_key_cmp, [a, b, c], num_levels=4) + + merged = _file("merged", 2, min_k=1, max_k=6, min_seq=10, max_seq=60) + levels.update(before=[a, b, c], after=[merged]) + + self.assertEqual(0, len(levels.level0)) + self.assertEqual(["merged"], [f.file_name for f in levels.run_of_level(2).files]) + self.assertEqual(1, levels.number_of_sorted_runs()) + + def test_update_per_level_routing(self): + a = _file("a", 0, min_k=1, max_k=2, min_seq=10, max_seq=20) + b = _file("b", 1, min_k=5, max_k=6, min_seq=30, max_seq=40) + levels = Levels(_key_cmp, [a, b], num_levels=3) + + # Move a from L0 → new file at L1; replace b at L1 with new file. + new_at_l1 = _file("new", 1, min_k=1, max_k=6, min_seq=10, max_seq=40) + levels.update(before=[a, b], after=[new_at_l1]) + self.assertEqual([], levels.level0) + self.assertEqual(["new"], [f.file_name for f in levels.run_of_level(1).files]) + + def test_invalid_num_levels_rejected(self): + with self.assertRaises(ValueError): + Levels(_key_cmp, [], num_levels=1) + + +if __name__ == "__main__": + unittest.main() diff --git a/paimon-python/pypaimon/tests/compact/merge_function_test.py b/paimon-python/pypaimon/tests/compact/merge_function_test.py new file mode 100644 index 000000000000..8a9ff1905817 --- /dev/null +++ b/paimon-python/pypaimon/tests/compact/merge_function_test.py @@ -0,0 +1,85 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import unittest +from unittest.mock import MagicMock + +from pypaimon.common.options import Options +from pypaimon.common.options.core_options import CoreOptions, MergeEngine +from pypaimon.read.reader.merge_function import ( + DeduplicateMergeFunction, DeduplicateMergeFunctionFactory, + create_merge_function_factory) +from pypaimon.table.row.key_value import KeyValue + + +def _kv(key: int, seq: int, value: str = "v", value_kind: int = 0) -> KeyValue: + kv = KeyValue(key_arity=1, value_arity=1) + kv.replace((key, seq, value_kind, value)) + return kv + + +class DeduplicateMergeFunctionTest(unittest.TestCase): + + def test_keeps_last_added(self): + mf = DeduplicateMergeFunction() + mf.reset() + mf.add(_kv(1, 10, "old")) + mf.add(_kv(1, 20, "new")) + result = mf.get_result() + self.assertIsNotNone(result) + self.assertEqual(20, result.sequence_number) + + def test_reset_clears_state(self): + mf = DeduplicateMergeFunction() + mf.add(_kv(1, 1)) + mf.reset() + self.assertIsNone(mf.get_result()) + + +class CreateMergeFunctionFactoryTest(unittest.TestCase): + + def _options_for(self, engine: MergeEngine) -> CoreOptions: + opts = Options({CoreOptions.MERGE_ENGINE.key(): engine.value}) + return CoreOptions(opts) + + def test_deduplicate_returns_factory(self): + factory = create_merge_function_factory(self._options_for(MergeEngine.DEDUPLICATE)) + self.assertIsInstance(factory, DeduplicateMergeFunctionFactory) + self.assertIsInstance(factory.create(), DeduplicateMergeFunction) + + def test_partial_update_factory_creates_stub_that_raises(self): + factory = create_merge_function_factory(self._options_for(MergeEngine.PARTIAL_UPDATE)) + mf = factory.create() + with self.assertRaises(NotImplementedError): + mf.add(_kv(1, 1)) + + def test_aggregate_factory_creates_stub_that_raises(self): + factory = create_merge_function_factory(self._options_for(MergeEngine.AGGREGATE)) + mf = factory.create() + with self.assertRaises(NotImplementedError): + mf.add(_kv(1, 1)) + + def test_first_row_factory_creates_stub_that_raises(self): + factory = create_merge_function_factory(self._options_for(MergeEngine.FIRST_ROW)) + mf = factory.create() + with self.assertRaises(NotImplementedError): + mf.add(_kv(1, 1)) + + +if __name__ == "__main__": + unittest.main() diff --git a/paimon-python/pypaimon/tests/compact/pk_compact_e2e_test.py b/paimon-python/pypaimon/tests/compact/pk_compact_e2e_test.py new file mode 100644 index 000000000000..0842c040b4ae --- /dev/null +++ b/paimon-python/pypaimon/tests/compact/pk_compact_e2e_test.py @@ -0,0 +1,160 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import os +import shutil +import tempfile +import unittest + +import pyarrow as pa + +from pypaimon import CatalogFactory, Schema +from pypaimon.common.options.core_options import CoreOptions +from pypaimon.compact.options import CompactOptions + + +class PrimaryKeyCompactE2ETest(unittest.TestCase): + """End-to-end test for primary-key compaction. + + Writes multiple snapshots that each leave a new L0 file, runs the compact + job, and verifies: (1) the compacted files are tagged COMPACT in the + snapshot, (2) read-after-compact returns the deduplicated latest values, + and (3) the file count drops. + """ + + @classmethod + def setUpClass(cls): + cls.temp_dir = tempfile.mkdtemp() + cls.warehouse = os.path.join(cls.temp_dir, "warehouse") + cls.catalog = CatalogFactory.create({"warehouse": cls.warehouse}) + cls.catalog.create_database("pk_db", False) + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.temp_dir, ignore_errors=True) + + def _make_pk_table(self, name: str): + full = f"pk_db.{name}" + opts = { + CoreOptions.BUCKET.key(): "1", # single bucket → single (partition,bucket) key + CoreOptions.TARGET_FILE_SIZE.key(): "10mb", + CoreOptions.METADATA_STATS_MODE.key() + if hasattr(CoreOptions, "METADATA_STATS_MODE") + else "metadata.stats-mode": "truncate(16)", + } + pa_schema = pa.schema([("id", pa.int64()), ("name", pa.string())]) + schema = Schema.from_pyarrow_schema(pa_schema, primary_keys=["id"], options=opts) + self.catalog.create_table(full, schema, True) + return self.catalog.get_table(full) + + def _write_one(self, table, batch: pa.Table): + builder = table.new_batch_write_builder() + write = builder.new_write() + commit = builder.new_commit() + write.write_arrow(batch) + commit.commit(write.prepare_commit()) + write.close() + commit.close() + + def _read_sorted(self, table) -> pa.Table: + rb = table.new_read_builder() + scan = rb.new_scan() + splits = scan.plan().splits() + return rb.new_read().to_arrow(splits).sort_by("id") + + def _count_live_files(self, table) -> int: + from pypaimon.read.scanner.file_scanner import FileScanner + from pypaimon.manifest.manifest_list_manager import ManifestListManager + snapshot = table.snapshot_manager().get_latest_snapshot() + if snapshot is None: + return 0 + mlm = ManifestListManager(table) + + def manifest_scanner(): + return mlm.read_all(snapshot), snapshot + return len(FileScanner(table, manifest_scanner).plan_files()) + + def test_full_compaction_dedup_keeps_latest(self): + table = self._make_pk_table("dedup_keep_latest") + + # Write 3 generations of (id, name) for the same set of ids — the + # latest seen value should win after compaction. + for gen in range(3): + self._write_one(table, pa.Table.from_pydict({ + "id": pa.array([1, 2, 3, 4, 5], type=pa.int64()), + "name": [f"gen{gen}-{i}" for i in range(1, 6)], + })) + + table = self.catalog.get_table("pk_db.dedup_keep_latest") + before_files = self._count_live_files(table) + before_data = self._read_sorted(table) + # Read path already dedups — sanity check that the table sees latest. + self.assertEqual(["gen2-1", "gen2-2", "gen2-3", "gen2-4", "gen2-5"], + before_data.column("name").to_pylist()) + + messages = table.new_compact_job( + compact_options=CompactOptions(full_compaction=True), + ).execute() + + # Single bucket → single message expected. + self.assertEqual(1, len(messages)) + msg = messages[0] + self.assertGreaterEqual(len(msg.compact_before), 3, + "All 3 small writes should have been picked up") + self.assertGreaterEqual(len(msg.compact_after), 1, + "Compaction must produce at least one output file") + + table = self.catalog.get_table("pk_db.dedup_keep_latest") + after_files = self._count_live_files(table) + self.assertLess(after_files, before_files, + f"File count must decrease ({before_files} → {after_files})") + + after_data = self._read_sorted(table) + self.assertEqual(before_data, after_data, + "Compact must preserve the dedup result") + + latest = table.snapshot_manager().get_latest_snapshot() + self.assertEqual("COMPACT", latest.commit_kind) + + # Output files should land at a level > 0 (the strategy promotes them). + max_level = max(f.level for f in msg.compact_after) + self.assertGreater(max_level, 0, + "Compacted output should land at a level > 0") + + def test_no_op_when_below_compaction_trigger(self): + table = self._make_pk_table("noop_below_trigger") + # Only 2 writes — far below default num-sorted-run.compaction-trigger=5. + for i in range(2): + self._write_one(table, pa.Table.from_pydict({ + "id": pa.array([i], type=pa.int64()), + "name": [f"row-{i}"], + })) + table = self.catalog.get_table("pk_db.noop_below_trigger") + snapshot_before = table.snapshot_manager().get_latest_snapshot().id + + messages = table.new_compact_job().execute() + + self.assertEqual([], messages) + table = self.catalog.get_table("pk_db.noop_below_trigger") + self.assertEqual(snapshot_before, + table.snapshot_manager().get_latest_snapshot().id, + "Strategy decided no-op → no new snapshot") + + +if __name__ == "__main__": + unittest.main() diff --git a/paimon-python/pypaimon/tests/compact/ray_executor_test.py b/paimon-python/pypaimon/tests/compact/ray_executor_test.py new file mode 100644 index 000000000000..912bfa02323e --- /dev/null +++ b/paimon-python/pypaimon/tests/compact/ray_executor_test.py @@ -0,0 +1,127 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import os +import shutil +import tempfile +import unittest + +import pyarrow as pa + +from pypaimon import CatalogFactory, Schema +from pypaimon.common.options.core_options import CoreOptions +from pypaimon.compact.options import CompactOptions + +try: + import ray # noqa: F401 + HAS_RAY = True +except ImportError: + HAS_RAY = False + + +@unittest.skipUnless(HAS_RAY, "ray is not installed") +class RayExecutorE2ETest(unittest.TestCase): + """End-to-end Ray execution: rewrite via worker tasks, commit on driver. + + Uses a real local Ray runtime — the executor under test ships task + payloads through ray.remote, which exercises the full CompactTask + serde path AppendCompactTask / MergeTreeCompactTask were extended + with this phase. + """ + + @classmethod + def setUpClass(cls): + import ray + cls.temp_dir = tempfile.mkdtemp() + cls.warehouse = os.path.join(cls.temp_dir, "warehouse") + cls.catalog_options = {"warehouse": cls.warehouse} + cls.catalog = CatalogFactory.create(cls.catalog_options) + cls.catalog.create_database("ray_db", False) + # local_mode keeps the test single-process; faster startup, no port + # races and the same code path as a real cluster. + ray.init(local_mode=True, ignore_reinit_error=True, log_to_driver=False) + + @classmethod + def tearDownClass(cls): + import ray + ray.shutdown() + shutil.rmtree(cls.temp_dir, ignore_errors=True) + + def _make_unaware_table(self, name: str): + full = f"ray_db.{name}" + opts = { + CoreOptions.BUCKET.key(): "-1", + CoreOptions.TARGET_FILE_SIZE.key(): "10mb", + # See AppendCompactE2ETest for why open-file-cost is zeroed here. + CoreOptions.SOURCE_SPLIT_OPEN_FILE_COST.key(): "0", + } + pa_schema = pa.schema([("id", pa.int32()), ("name", pa.string())]) + schema = Schema.from_pyarrow_schema(pa_schema, options=opts) + self.catalog.create_table(full, schema, True) + return self.catalog.get_table(full) + + def _write_one(self, table, batch: pa.Table): + builder = table.new_batch_write_builder() + write = builder.new_write() + commit = builder.new_commit() + write.write_arrow(batch) + commit.commit(write.prepare_commit()) + write.close() + commit.close() + + def _read_sorted(self, table) -> pa.Table: + rb = table.new_read_builder() + scan = rb.new_scan() + splits = scan.plan().splits() + return rb.new_read().to_arrow(splits).sort_by("id") + + def test_append_compact_via_ray_executor(self): + from pypaimon.compact.executor.ray_executor import RayExecutor + + table = self._make_unaware_table("ray_append") + for i in range(5): + self._write_one(table, pa.Table.from_pydict({ + "id": pa.array([i * 2, i * 2 + 1], type=pa.int32()), + "name": [f"r-{i}-a", f"r-{i}-b"], + })) + + table = self.catalog.get_table("ray_db.ray_append") + before_data = self._read_sorted(table) + + # Note: deliberately omit table_identifier — exercises the default + # path (table.identifier.get_full_name()) which the worker uses + # via Identifier.from_string. A regression here would surface as + # "Cannot get splits from 'Identifier(...)'" inside the Ray task. + job = table.new_compact_job( + compact_options=CompactOptions(min_file_num=5), + executor=RayExecutor(), + catalog_options=self.catalog_options, + ) + messages = job.execute() + + self.assertEqual(1, len(messages)) + self.assertGreaterEqual(len(messages[0].compact_before), 5) + + table = self.catalog.get_table("ray_db.ray_append") + after_data = self._read_sorted(table) + self.assertEqual(before_data, after_data) + self.assertEqual("COMPACT", table.snapshot_manager().get_latest_snapshot().commit_kind) + + +if __name__ == "__main__": + unittest.main() diff --git a/paimon-python/pypaimon/tests/compact/universal_compaction_test.py b/paimon-python/pypaimon/tests/compact/universal_compaction_test.py new file mode 100644 index 000000000000..3b34424eba01 --- /dev/null +++ b/paimon-python/pypaimon/tests/compact/universal_compaction_test.py @@ -0,0 +1,124 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import unittest +from typing import List + +from pypaimon.compact.levels import LevelSortedRun, SortedRun +from pypaimon.compact.strategy.universal_compaction import UniversalCompaction +from pypaimon.data.timestamp import Timestamp +from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.manifest.schema.simple_stats import SimpleStats +from pypaimon.schema.data_types import AtomicType, DataField +from pypaimon.table.row.generic_row import GenericRow + +PK_FIELDS = [DataField(0, "id", AtomicType("BIGINT"))] + + +def _key(v: int) -> GenericRow: + return GenericRow([v], PK_FIELDS) + + +def _file(level: int, size: int, name: str = None) -> DataFileMeta: + name = name or f"f-l{level}-{size}" + return DataFileMeta.create( + file_name=name, + file_size=size, + row_count=10, + min_key=_key(0), + max_key=_key(99), + key_stats=SimpleStats.empty_stats(), + value_stats=SimpleStats.empty_stats(), + min_sequence_number=0, + max_sequence_number=10, + schema_id=0, + level=level, + extra_files=[], + creation_time=Timestamp.from_epoch_millis(0), + ) + + +def _run(level: int, *sizes: int) -> LevelSortedRun: + return LevelSortedRun(level=level, run=SortedRun(files=[_file(level, s) for s in sizes])) + + +class UniversalCompactionTest(unittest.TestCase): + + def test_returns_none_below_trigger(self): + strategy = UniversalCompaction(num_run_compaction_trigger=5) + runs = [_run(0, 100), _run(0, 100)] + self.assertIsNone(strategy.pick(num_levels=3, runs=runs)) + + def test_size_amp_triggers_full_compaction(self): + strategy = UniversalCompaction(max_size_amp=200, num_run_compaction_trigger=5) + # 5 runs total. Top 4 sum to 1000; max-level run is 100. 1000*100 > 200*100 → trigger. + runs = [_run(0, 250)] * 4 + [_run(2, 100)] + unit = strategy.pick(num_levels=3, runs=runs) + self.assertIsNotNone(unit) + # output_level == max_level == num_levels - 1 == 2 + self.assertEqual(2, unit.output_level) + self.assertEqual(5, len(unit.files)) + + def test_size_ratio_picks_growing_prefix(self): + # No size-amp trigger (top-4 = 100, max-level = 1000 → 100*100 < 200*1000). + # size-ratio: candidate=100; next=100 → 100*101/100=101 >= 100, include. + # candidate=200; next=100 → 200*101/100=202 >= 100, include. + # candidate=300; next=100 → include. + # candidate=400; next=1000 → 400*101/100=404 < 1000, stop. Pick 4. + strategy = UniversalCompaction(max_size_amp=200, size_ratio=1, num_run_compaction_trigger=5) + runs = [_run(0, 100)] * 4 + [_run(2, 1000)] + unit = strategy.pick(num_levels=3, runs=runs) + self.assertIsNotNone(unit) + # 4 runs picked (the L0 chunks); since not all runs included, + # output_level = max(0, runs[4].level - 1) = max(0, 2-1) = 1. + self.assertEqual(4, len(unit.files)) + self.assertEqual(1, unit.output_level) + + def test_force_pick_l0_picks_only_consecutive_l0(self): + strategy = UniversalCompaction() + runs = [_run(0, 50), _run(0, 60), _run(2, 1000)] + unit = strategy.force_pick_l0(num_levels=3, runs=runs) + self.assertIsNotNone(unit) + self.assertEqual(2, len(unit.files)) + + def test_force_pick_l0_returns_none_when_no_l0(self): + strategy = UniversalCompaction() + runs = [_run(1, 100), _run(2, 200)] + self.assertIsNone(strategy.force_pick_l0(num_levels=3, runs=runs)) + + def test_picking_all_runs_outputs_to_max_level(self): + # Construct a scenario where the size-ratio loop swallows everything. + strategy = UniversalCompaction(num_run_compaction_trigger=2) + runs = [_run(0, 100)] * 6 + unit = strategy.pick(num_levels=4, runs=runs) + self.assertIsNotNone(unit) + # All swallowed → output_level = max_level = 3. + self.assertEqual(6, len(unit.files)) + self.assertEqual(3, unit.output_level) + + def test_invalid_options_raise(self): + with self.assertRaises(ValueError): + UniversalCompaction(max_size_amp=0) + with self.assertRaises(ValueError): + UniversalCompaction(size_ratio=-1) + with self.assertRaises(ValueError): + UniversalCompaction(num_run_compaction_trigger=0) + + +if __name__ == "__main__": + unittest.main() diff --git a/paimon-python/pypaimon/tests/file_store_commit_compact_test.py b/paimon-python/pypaimon/tests/file_store_commit_compact_test.py new file mode 100644 index 000000000000..07205a242008 --- /dev/null +++ b/paimon-python/pypaimon/tests/file_store_commit_compact_test.py @@ -0,0 +1,206 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import unittest +from datetime import datetime +from unittest.mock import Mock, patch + +from pypaimon.data.timestamp import Timestamp +from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.manifest.schema.simple_stats import SimpleStats +from pypaimon.schema.data_types import AtomicType, DataField +from pypaimon.table.row.generic_row import GenericRow +from pypaimon.write.commit_message import CommitMessage +from pypaimon.write.compact_increment import CompactIncrement +from pypaimon.write.data_increment import DataIncrement +from pypaimon.write.file_store_commit import FileStoreCommit + + +def _make_file(name: str, *, first_row_id=None) -> DataFileMeta: + return DataFileMeta.create( + file_name=name, + file_size=4096, + row_count=10, + min_key=GenericRow([], []), + max_key=GenericRow([], []), + key_stats=SimpleStats.empty_stats(), + value_stats=SimpleStats.empty_stats(), + min_sequence_number=1, + max_sequence_number=10, + schema_id=0, + level=0, + extra_files=[], + creation_time=Timestamp.from_local_date_time(datetime(2024, 1, 15, 10, 30, 0)), + first_row_id=first_row_id, + ) + + +@patch('pypaimon.write.file_store_commit.SnapshotManager') +@patch('pypaimon.write.file_store_commit.ManifestFileManager') +@patch('pypaimon.write.file_store_commit.ManifestListManager') +class TestFileStoreCommitCompact(unittest.TestCase): + """Phase 1 protocol-level tests: verify compact_before/after entries flow correctly through commit(). + + These tests stub _try_commit so we only verify the entry-construction and commit_kind selection. + Full e2e (with real manifest writes / scans) is covered in Phase 2 once the rewriter exists. + """ + + def setUp(self): + self.mock_table = Mock() + self.mock_table.partition_keys = ['dt'] + self.mock_table.partition_keys_fields = [DataField(0, 'dt', AtomicType('STRING'))] + self.mock_table.total_buckets = 4 + self.mock_table.current_branch.return_value = 'main' + self.mock_table.identifier = 'default.t' + self.mock_snapshot_commit = Mock() + + def _create_commit(self): + return FileStoreCommit( + snapshot_commit=self.mock_snapshot_commit, + table=self.mock_table, + commit_user='test_user', + ) + + def test_build_entries_emits_add_for_new_files(self, *_): + commit = self._create_commit() + msg = CommitMessage( + partition=('2024-01-15',), + bucket=2, + data_increment=DataIncrement(new_files=[_make_file('a.parquet')]), + ) + + entries = commit._build_commit_entries([msg]) + + self.assertEqual(1, len(entries)) + self.assertEqual(0, entries[0].kind) + self.assertEqual(2, entries[0].bucket) + self.assertEqual('a.parquet', entries[0].file.file_name) + self.assertEqual(['2024-01-15'], list(entries[0].partition.values)) + + def test_build_entries_emits_delete_for_compact_before_and_add_for_compact_after(self, *_): + commit = self._create_commit() + msg = CommitMessage( + partition=('2024-01-15',), + bucket=1, + compact_increment=CompactIncrement( + compact_before=[_make_file('old-1.parquet'), _make_file('old-2.parquet')], + compact_after=[_make_file('merged.parquet')], + ), + ) + + entries = commit._build_commit_entries([msg]) + + kinds = [e.kind for e in entries] + names = [e.file.file_name for e in entries] + self.assertEqual([1, 1, 0], kinds) + self.assertEqual(['old-1.parquet', 'old-2.parquet', 'merged.parquet'], names) + self.assertTrue(all(e.bucket == 1 for e in entries)) + + def test_commit_with_only_compact_messages_uses_compact_kind(self, *_): + commit = self._create_commit() + commit._try_commit = Mock() + msg = CommitMessage( + partition=('p1',), + bucket=0, + compact_increment=CompactIncrement( + compact_before=[_make_file('old.parquet')], + compact_after=[_make_file('new.parquet')], + ), + ) + + commit.commit([msg], commit_identifier=100) + + commit._try_commit.assert_called_once() + call_kwargs = commit._try_commit.call_args.kwargs + self.assertEqual('COMPACT', call_kwargs['commit_kind']) + self.assertEqual(100, call_kwargs['commit_identifier']) + + def test_commit_with_new_files_keeps_append_kind_even_when_compact_fields_present(self, *_): + commit = self._create_commit() + commit._try_commit = Mock() + msg = CommitMessage( + partition=('p1',), + bucket=0, + data_increment=DataIncrement(new_files=[_make_file('new.parquet')]), + compact_increment=CompactIncrement( + compact_before=[_make_file('old.parquet')], + compact_after=[_make_file('merged.parquet')], + ), + ) + + commit.commit([msg], commit_identifier=200) + + call_kwargs = commit._try_commit.call_args.kwargs + self.assertEqual('APPEND', call_kwargs['commit_kind']) + + def test_commit_compact_uses_compact_kind_and_no_conflict_detection(self, *_): + commit = self._create_commit() + commit._try_commit = Mock() + msg = CommitMessage( + partition=('p1',), + bucket=3, + compact_increment=CompactIncrement( + compact_before=[_make_file('old.parquet')], + compact_after=[_make_file('new.parquet')], + ), + ) + + commit.commit_compact([msg], commit_identifier=300) + + commit._try_commit.assert_called_once() + kwargs = commit._try_commit.call_args.kwargs + self.assertEqual('COMPACT', kwargs['commit_kind']) + self.assertEqual(300, kwargs['commit_identifier']) + self.assertFalse(kwargs['detect_conflicts']) + self.assertFalse(kwargs['allow_rollback']) + + def test_commit_compact_rejects_messages_with_new_files(self, *_): + commit = self._create_commit() + msg = CommitMessage( + partition=('p1',), + bucket=0, + data_increment=DataIncrement(new_files=[_make_file('append.parquet')]), + compact_increment=CompactIncrement( + compact_before=[_make_file('old.parquet')], + compact_after=[_make_file('new.parquet')], + ), + ) + + with self.assertRaises(ValueError): + commit.commit_compact([msg], commit_identifier=400) + + def test_commit_compact_skips_when_no_messages(self, *_): + commit = self._create_commit() + commit._try_commit = Mock() + + commit.commit_compact([], commit_identifier=500) + + commit._try_commit.assert_not_called() + + def test_commit_compact_skips_when_messages_have_no_files(self, *_): + commit = self._create_commit() + commit._try_commit = Mock() + empty_msg = CommitMessage(partition=('p1',), bucket=0) + + commit.commit_compact([empty_msg], commit_identifier=600) + + commit._try_commit.assert_not_called() + + +if __name__ == '__main__': + unittest.main() diff --git a/paimon-python/pypaimon/tests/file_store_commit_test.py b/paimon-python/pypaimon/tests/file_store_commit_test.py index 958ea85a6b7e..a537e6d0dca4 100644 --- a/paimon-python/pypaimon/tests/file_store_commit_test.py +++ b/paimon-python/pypaimon/tests/file_store_commit_test.py @@ -25,6 +25,7 @@ from pypaimon.snapshot.snapshot_commit import PartitionStatistics from pypaimon.table.row.generic_row import GenericRow from pypaimon.write.commit_message import CommitMessage +from pypaimon.write.data_increment import DataIncrement from pypaimon.write.file_store_commit import FileStoreCommit @@ -88,7 +89,7 @@ def test_generate_partition_statistics_single_partition_single_file( commit_message = CommitMessage( partition=('2024-01-15', 'us-east-1'), bucket=0, - new_files=[file_meta] + data_increment=DataIncrement(new_files=[file_meta]) ) # Test method @@ -153,7 +154,7 @@ def test_generate_partition_statistics_multiple_files_same_partition( commit_message = CommitMessage( partition=('2024-01-15', 'us-east-1'), bucket=0, - new_files=[file_meta_1, file_meta_2] + data_increment=DataIncrement(new_files=[file_meta_1, file_meta_2]) ) # Test method @@ -225,13 +226,13 @@ def test_generate_partition_statistics_multiple_partitions( commit_message_1 = CommitMessage( partition=('2024-01-15', 'us-east-1'), bucket=0, - new_files=[file_meta_1] + data_increment=DataIncrement(new_files=[file_meta_1]) ) commit_message_2 = CommitMessage( partition=('2024-01-15', 'us-west-2'), bucket=0, - new_files=[file_meta_2] + data_increment=DataIncrement(new_files=[file_meta_2]) ) # Test method @@ -294,7 +295,7 @@ def test_generate_partition_statistics_unpartitioned_table( commit_message = CommitMessage( partition=(), # Empty partition for unpartitioned table bucket=0, - new_files=[file_meta] + data_increment=DataIncrement(new_files=[file_meta]) ) # Test method @@ -333,7 +334,7 @@ def test_generate_partition_statistics_no_creation_time( commit_message = CommitMessage( partition=('2024-01-15', 'us-east-1'), bucket=0, - new_files=[file_meta] + data_increment=DataIncrement(new_files=[file_meta]) ) # Test method @@ -375,7 +376,7 @@ def test_generate_partition_statistics_mismatched_partition_keys( commit_message = CommitMessage( partition=('2024-01-15', 'us-east-1', 'extra-value'), # 3 values but table has 2 keys bucket=0, - new_files=[file_meta] + data_increment=DataIncrement(new_files=[file_meta]) ) # Test method diff --git a/paimon-python/pypaimon/write/commit_message.py b/paimon-python/pypaimon/write/commit_message.py index d560c5a2479f..24347ff8d001 100644 --- a/paimon-python/pypaimon/write/commit_message.py +++ b/paimon-python/pypaimon/write/commit_message.py @@ -16,18 +16,67 @@ # limitations under the License. ################################################################################ -from dataclasses import dataclass -from typing import List, Tuple, Optional +from dataclasses import dataclass, field +from typing import List, Optional, Tuple from pypaimon.manifest.schema.data_file_meta import DataFileMeta +from pypaimon.write.compact_increment import CompactIncrement +from pypaimon.write.data_increment import DataIncrement @dataclass class CommitMessage: + """File committable for sink. + + Direct port of org.apache.paimon.table.sink.CommitMessageImpl. Carries + everything one (partition, bucket) writer or compactor contributes to a + snapshot, packaged as a (data_increment, compact_increment) pair so the + same message type can describe both pure writes and compaction results. + + - partition / bucket: identify the (partition, bucket) the message + applies to. + - total_buckets: number of buckets the table had at write time, used by + the commit path to detect bucket-count changes. + - data_increment: ADD/DELETE/changelog/index deltas from a normal write. + - compact_increment: ADD/DELETE/changelog/index deltas from compaction. + - check_from_snapshot: row-tracking conflict-detection anchor; -1 means + "no check" (default). + """ + partition: Tuple bucket: int - new_files: List[DataFileMeta] + total_buckets: Optional[int] = None + data_increment: DataIncrement = field(default_factory=DataIncrement) + compact_increment: CompactIncrement = field(default_factory=CompactIncrement) check_from_snapshot: Optional[int] = -1 - def is_empty(self): - return not self.new_files + # ---- Convenience accessors --------------------------------------------- + # Mirror Java's CommitMessageImpl shape: callers usually want the + # individual file lists rather than reaching through the increment. + + @property + def new_files(self) -> List[DataFileMeta]: + return self.data_increment.new_files + + @property + def deleted_files(self) -> List[DataFileMeta]: + return self.data_increment.deleted_files + + @property + def changelog_files(self) -> List[DataFileMeta]: + return self.data_increment.changelog_files + + @property + def compact_before(self) -> List[DataFileMeta]: + return self.compact_increment.compact_before + + @property + def compact_after(self) -> List[DataFileMeta]: + return self.compact_increment.compact_after + + @property + def compact_changelog_files(self) -> List[DataFileMeta]: + return self.compact_increment.changelog_files + + def is_empty(self) -> bool: + return self.data_increment.is_empty() and self.compact_increment.is_empty() diff --git a/paimon-python/pypaimon/write/commit_message_serializer.py b/paimon-python/pypaimon/write/commit_message_serializer.py new file mode 100644 index 000000000000..1b89830d6cc7 --- /dev/null +++ b/paimon-python/pypaimon/write/commit_message_serializer.py @@ -0,0 +1,163 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +import json +from typing import Any, Dict, List + +from pypaimon.index.index_file_meta import IndexFileMeta +from pypaimon.manifest.schema.data_file_meta import (DataFileMeta, decode_value, + encode_value) +from pypaimon.write.commit_message import CommitMessage +from pypaimon.write.compact_increment import CompactIncrement +from pypaimon.write.data_increment import DataIncrement + + +class CommitMessageSerializer: + """Cross-process serializer for CommitMessage payloads. + + JSON-based on purpose: human-debuggable, version-tolerant across worker + Python versions, and avoids the security/compat pitfalls of pickle when + shipping CompactTask outputs from Ray workers back to the driver. + + The wire shape mirrors org.apache.paimon.table.sink.CommitMessageImpl: + every message is (partition, bucket, total_buckets, data_increment, + compact_increment), with each increment carrying its own new/deleted/ + changelog file lists plus index file deltas. Today the index slots are + populated only by tables that opt into them; the serializer round-trips + them either way so adding deletion vectors / global index later does + not need a new payload version. + """ + + VERSION = 1 + + @classmethod + def serialize(cls, message: CommitMessage) -> bytes: + return json.dumps(cls.to_dict(message), separators=(",", ":")).encode("utf-8") + + @classmethod + def deserialize(cls, payload: bytes) -> CommitMessage: + return cls.from_dict(json.loads(payload.decode("utf-8"))) + + @classmethod + def to_dict(cls, message: CommitMessage) -> Dict[str, Any]: + partition = message.partition if message.partition is not None else () + return { + "version": cls.VERSION, + "partition": [encode_value(v) for v in partition], + "bucket": message.bucket, + "total_buckets": message.total_buckets, + "data_increment": cls._data_increment_to_dict(message.data_increment), + "compact_increment": cls._compact_increment_to_dict(message.compact_increment), + "check_from_snapshot": message.check_from_snapshot, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> CommitMessage: + version = data.get("version", cls.VERSION) + if version != cls.VERSION: + raise ValueError( + f"Unsupported CommitMessage payload version: {version} (expected {cls.VERSION})" + ) + partition_values = data.get("partition") or [] + return CommitMessage( + partition=tuple(decode_value(v) for v in partition_values), + bucket=data["bucket"], + total_buckets=data.get("total_buckets"), + data_increment=cls._data_increment_from_dict(data.get("data_increment")), + compact_increment=cls._compact_increment_from_dict(data.get("compact_increment")), + check_from_snapshot=data.get("check_from_snapshot", -1), + ) + + @classmethod + def serialize_list(cls, messages: List[CommitMessage]) -> List[bytes]: + return [cls.serialize(m) for m in messages] + + @classmethod + def deserialize_list(cls, payloads: List[bytes]) -> List[CommitMessage]: + return [cls.deserialize(p) for p in payloads] + + # ---- Increment helpers ------------------------------------------------- + + @classmethod + def _data_increment_to_dict(cls, inc: DataIncrement) -> Dict[str, Any]: + return { + "new_files": [f.to_dict() for f in inc.new_files], + "deleted_files": [f.to_dict() for f in inc.deleted_files], + "changelog_files": [f.to_dict() for f in inc.changelog_files], + "new_index_files": [_index_file_to_dict(i) for i in inc.new_index_files], + "deleted_index_files": [_index_file_to_dict(i) for i in inc.deleted_index_files], + } + + @classmethod + def _data_increment_from_dict(cls, data) -> DataIncrement: + if not data: + return DataIncrement() + return DataIncrement( + new_files=[DataFileMeta.from_dict(f) for f in data.get("new_files") or []], + deleted_files=[DataFileMeta.from_dict(f) for f in data.get("deleted_files") or []], + changelog_files=[DataFileMeta.from_dict(f) for f in data.get("changelog_files") or []], + new_index_files=[_index_file_from_dict(i) for i in data.get("new_index_files") or []], + deleted_index_files=[_index_file_from_dict(i) for i in data.get("deleted_index_files") or []], + ) + + @classmethod + def _compact_increment_to_dict(cls, inc: CompactIncrement) -> Dict[str, Any]: + return { + "compact_before": [f.to_dict() for f in inc.compact_before], + "compact_after": [f.to_dict() for f in inc.compact_after], + "changelog_files": [f.to_dict() for f in inc.changelog_files], + "new_index_files": [_index_file_to_dict(i) for i in inc.new_index_files], + "deleted_index_files": [_index_file_to_dict(i) for i in inc.deleted_index_files], + } + + @classmethod + def _compact_increment_from_dict(cls, data) -> CompactIncrement: + if not data: + return CompactIncrement() + return CompactIncrement( + compact_before=[DataFileMeta.from_dict(f) for f in data.get("compact_before") or []], + compact_after=[DataFileMeta.from_dict(f) for f in data.get("compact_after") or []], + changelog_files=[DataFileMeta.from_dict(f) for f in data.get("changelog_files") or []], + new_index_files=[_index_file_from_dict(i) for i in data.get("new_index_files") or []], + deleted_index_files=[_index_file_from_dict(i) for i in data.get("deleted_index_files") or []], + ) + + +# IndexFileMeta has richer payloads (deletion vector ranges, global index +# meta) that aren't relevant to the basic compaction path yet — round-trip +# only the scalar identity fields here. Phase 6/7 (deletion vectors, +# changelog producer) will extend this to cover dv_ranges and +# global_index_meta as the rewriter starts producing them. +def _index_file_to_dict(idx: IndexFileMeta) -> Dict[str, Any]: + return { + "index_type": idx.index_type, + "file_name": idx.file_name, + "file_size": idx.file_size, + "row_count": idx.row_count, + "external_path": idx.external_path, + } + + +def _index_file_from_dict(data: Dict[str, Any]) -> IndexFileMeta: + return IndexFileMeta( + index_type=data["index_type"], + file_name=data["file_name"], + file_size=data["file_size"], + row_count=data["row_count"], + external_path=data.get("external_path"), + ) diff --git a/paimon-python/pypaimon/write/compact_increment.py b/paimon-python/pypaimon/write/compact_increment.py new file mode 100644 index 000000000000..ddf923ceb9c8 --- /dev/null +++ b/paimon-python/pypaimon/write/compact_increment.py @@ -0,0 +1,58 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from dataclasses import dataclass, field +from typing import List + +from pypaimon.index.index_file_meta import IndexFileMeta +from pypaimon.manifest.schema.data_file_meta import DataFileMeta + + +@dataclass +class CompactIncrement: + """Files changed before and after compaction, with changelog produced during compaction. + + Direct port of org.apache.paimon.io.CompactIncrement. + + - compact_before: input files consumed by compaction (DELETE entries). + - compact_after: rewritten output files (ADD entries). + - changelog_files: changelog files emitted while compacting (used by the + full-compaction changelog producer; empty in the basic dedup path). + - new_index_files / deleted_index_files: index file deltas attributable + to this compaction (deletion vectors / global index updates). Empty + lists by default. + """ + + compact_before: List[DataFileMeta] = field(default_factory=list) + compact_after: List[DataFileMeta] = field(default_factory=list) + changelog_files: List[DataFileMeta] = field(default_factory=list) + new_index_files: List[IndexFileMeta] = field(default_factory=list) + deleted_index_files: List[IndexFileMeta] = field(default_factory=list) + + def is_empty(self) -> bool: + return ( + not self.compact_before + and not self.compact_after + and not self.changelog_files + and not self.new_index_files + and not self.deleted_index_files + ) + + @classmethod + def empty(cls) -> "CompactIncrement": + return cls() diff --git a/paimon-python/pypaimon/write/data_increment.py b/paimon-python/pypaimon/write/data_increment.py new file mode 100644 index 000000000000..e0b1d16f98d1 --- /dev/null +++ b/paimon-python/pypaimon/write/data_increment.py @@ -0,0 +1,61 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +from dataclasses import dataclass, field +from typing import List + +from pypaimon.index.index_file_meta import IndexFileMeta +from pypaimon.manifest.schema.data_file_meta import DataFileMeta + + +@dataclass +class DataIncrement: + """Increment of data files, changelog files and index files produced by a write. + + Direct port of org.apache.paimon.io.DataIncrement. Carries everything one + write attempt contributes to a snapshot, so a CommitMessage can be + constructed from a (DataIncrement, CompactIncrement) pair just like the + Java side. + + - new_files: data files this write created (ADD entries). + - deleted_files: data files this write removed without compaction + (e.g. row-level delete in data-evolution tables); ADD/DELETE asymmetry + is preserved by giving each list its own slot. + - changelog_files: changelog data files associated with this write. + - new_index_files / deleted_index_files: index file deltas (deletion + vectors, global index, ...). Empty lists by default. + """ + + new_files: List[DataFileMeta] = field(default_factory=list) + deleted_files: List[DataFileMeta] = field(default_factory=list) + changelog_files: List[DataFileMeta] = field(default_factory=list) + new_index_files: List[IndexFileMeta] = field(default_factory=list) + deleted_index_files: List[IndexFileMeta] = field(default_factory=list) + + def is_empty(self) -> bool: + return ( + not self.new_files + and not self.deleted_files + and not self.changelog_files + and not self.new_index_files + and not self.deleted_index_files + ) + + @classmethod + def empty(cls) -> "DataIncrement": + return cls() diff --git a/paimon-python/pypaimon/write/file_store_commit.py b/paimon-python/pypaimon/write/file_store_commit.py index 832a39ba6887..28f4535bcdac 100644 --- a/paimon-python/pypaimon/write/file_store_commit.py +++ b/paimon-python/pypaimon/write/file_store_commit.py @@ -111,7 +111,12 @@ def __init__(self, snapshot_commit: SnapshotCommit, table, commit_user: str): self.rollback = CommitRollback(table_rollback) if table_rollback is not None else None def commit(self, commit_messages: List[CommitMessage], commit_identifier: int): - """Commit the given commit messages in normal append mode.""" + """Commit the given commit messages in normal append mode. + + new_files in each message generate ADD entries; compact_before/compact_after + generate DELETE/ADD entries respectively. If only compact_* fields are present + across all messages (no new_files), commit_kind becomes COMPACT. + """ if not commit_messages: return @@ -126,21 +131,12 @@ def commit(self, commit_messages: List[CommitMessage], commit_identifier: int): self.table.identifier, len(commit_messages), ) - commit_entries = [] - for msg in commit_messages: - partition = GenericRow(list(msg.partition), self.table.partition_keys_fields) - for file in msg.new_files: - commit_entries.append(ManifestEntry( - kind=0, - partition=partition, - bucket=msg.bucket, - total_buckets=self.table.total_buckets, - file=file - )) + commit_entries = self._build_commit_entries(commit_messages) + has_new_files = any(msg.new_files for msg in commit_messages) logger.info("Finished collecting changes, including: %d entries", len(commit_entries)) - commit_kind = "APPEND" + commit_kind = "APPEND" if has_new_files else "COMPACT" detect_conflicts = False allow_rollback = False if self.conflict_detection.should_be_overwrite_commit(): @@ -157,6 +153,70 @@ def commit(self, commit_messages: List[CommitMessage], commit_identifier: int): detect_conflicts=detect_conflicts, allow_rollback=allow_rollback) + def commit_compact(self, commit_messages: List[CommitMessage], commit_identifier: int): + """Commit compaction results (compact_before/compact_after only). + + Each message must carry no new_files. compact_before generate DELETE entries, + compact_after generate ADD entries. Snapshot kind is COMPACT. + """ + if not commit_messages: + return + + for msg in commit_messages: + if msg.new_files: + raise ValueError( + "commit_compact rejects messages with new_files; use commit() instead." + ) + + logger.info( + "Ready to commit compact to table %s, number of commit messages: %d", + self.table.identifier, + len(commit_messages), + ) + commit_entries = self._build_commit_entries(commit_messages) + if not commit_entries: + return + + logger.info("Finished collecting compact changes: %d entries", len(commit_entries)) + + self._try_commit( + commit_kind="COMPACT", + commit_identifier=commit_identifier, + commit_entries_plan=lambda snapshot: commit_entries, + detect_conflicts=False, + allow_rollback=False, + ) + + def _build_commit_entries(self, commit_messages: List[CommitMessage]) -> List[ManifestEntry]: + entries: List[ManifestEntry] = [] + for msg in commit_messages: + partition = GenericRow(list(msg.partition), self.table.partition_keys_fields) + for file in msg.new_files: + entries.append(ManifestEntry( + kind=0, + partition=partition, + bucket=msg.bucket, + total_buckets=self.table.total_buckets, + file=file, + )) + for file in msg.compact_before: + entries.append(ManifestEntry( + kind=1, + partition=partition, + bucket=msg.bucket, + total_buckets=self.table.total_buckets, + file=file, + )) + for file in msg.compact_after: + entries.append(ManifestEntry( + kind=0, + partition=partition, + bucket=msg.bucket, + total_buckets=self.table.total_buckets, + file=file, + )) + return entries + def overwrite(self, overwrite_partition, commit_messages: List[CommitMessage], commit_identifier: int): """Commit the given commit messages in overwrite mode.""" if not commit_messages: diff --git a/paimon-python/pypaimon/write/file_store_write.py b/paimon-python/pypaimon/write/file_store_write.py index 75b1d3a7d708..e96a35a926f9 100644 --- a/paimon-python/pypaimon/write/file_store_write.py +++ b/paimon-python/pypaimon/write/file_store_write.py @@ -22,6 +22,7 @@ from pypaimon.common.options.core_options import CoreOptions from pypaimon.write.commit_message import CommitMessage +from pypaimon.write.data_increment import DataIncrement from pypaimon.write.writer.append_only_data_writer import AppendOnlyDataWriter from pypaimon.write.writer.data_blob_writer import DataBlobWriter from pypaimon.write.writer.data_writer import DataWriter @@ -109,7 +110,8 @@ def prepare_commit(self, commit_identifier) -> List[CommitMessage]: commit_message = CommitMessage( partition=partition, bucket=bucket, - new_files=committed_files + total_buckets=self.table.total_buckets, + data_increment=DataIncrement(new_files=committed_files), ) commit_messages.append(commit_message) return commit_messages diff --git a/paimon-python/pypaimon/write/table_update.py b/paimon-python/pypaimon/write/table_update.py index ec192a98a502..bd544d253bae 100644 --- a/paimon-python/pypaimon/write/table_update.py +++ b/paimon-python/pypaimon/write/table_update.py @@ -26,6 +26,7 @@ from pypaimon.manifest.schema.data_file_meta import DataFileMeta from pypaimon.read.split import DataSplit from pypaimon.write.commit_message import CommitMessage +from pypaimon.write.data_increment import DataIncrement from pypaimon.write.table_update_by_row_id import TableUpdateByRowId from pypaimon.write.table_upsert_by_key import TableUpsertByKey from pypaimon.write.writer.data_writer import DataWriter @@ -199,7 +200,13 @@ def arrow_reader(self) -> pyarrow.ipc.RecordBatchReader: def prepare_commit(self) -> List[CommitMessage]: commit_messages = [] for (partition, files) in self.dict.items(): - commit_messages.append(CommitMessage(partition, 0, files, self.snapshot_id)) + commit_messages.append(CommitMessage( + partition=partition, + bucket=0, + total_buckets=self.table.total_buckets, + data_increment=DataIncrement(new_files=files), + check_from_snapshot=self.snapshot_id, + )) return commit_messages def update_by_arrow_batch(self, data: pa.RecordBatch):