From 1593be7be979b2b14b89d478031444a7fb71a280 Mon Sep 17 00:00:00 2001 From: liushengsong Date: Fri, 12 Jun 2026 17:52:06 +0800 Subject: [PATCH 1/3] Remove unnecessary vacuum_set_xid_limits calls for AO/AOCO tables AO/AOCO tables have no per-tuple xmin/xmax -- visibility is managed via visibility map at segment level, not per-tuple transaction IDs. The freeze limits computed by vacuum_set_xid_limits are meaningless for AO tables. Worse, passing MultiXactCutoff to vac_update_relstats (vacuum) or swap_relation_files (CLUSTER) incorrectly sets relminmxid on AO tables (whose relminmxid should remain InvalidMultiXactId), causing them to unnecessarily participate in database-wide datminmxid calculation. Fix by: - vacuum_ao.c: remove vacuum_set_xid_limits call, pass Invalid values directly to vac_update_relstats - appendonlyam_handler.c / aocsam_handler.c: remove vacuum_set_xid_limits call in copy_for_cluster, return Invalid values to caller - cluster.c: relax MultiXactId assert to allow InvalidMultiXactId, and reset relminmxid to InvalidMultiXactId for AO tables (matching the existing relfrozenxid override) vacuum_set_xid_limits was a pre-PG16 API kept only for AO callers. With all callers removed, delete the function and its declaration. --- src/backend/access/aocs/aocsam_handler.c | 29 +-- .../access/appendonly/appendonlyam_handler.c | 29 +-- src/backend/commands/cluster.c | 13 +- src/backend/commands/vacuum.c | 199 ------------------ src/backend/commands/vacuum_ao.c | 24 +-- src/include/commands/vacuum.h | 9 - 6 files changed, 27 insertions(+), 276 deletions(-) diff --git a/src/backend/access/aocs/aocsam_handler.c b/src/backend/access/aocs/aocsam_handler.c index 5cbf2015c64..4dc7d541c5f 100644 --- a/src/backend/access/aocs/aocsam_handler.c +++ b/src/backend/access/aocs/aocsam_handler.c @@ -1462,8 +1462,6 @@ aoco_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, int natts; Datum *values; bool *isnull; - TransactionId FreezeXid; - MultiXactId MultiXactCutoff; Tuplesortstate *tuplesort; PGRUsage ru0; @@ -1523,29 +1521,12 @@ aoco_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber); /* - * Compute sane values for FreezeXid and CutoffMulti with regular - * VACUUM machinery to avoidconfising existing CLUSTER code. + * AO/AOCO tables have no per-tuple xmin/xmax, so freeze limits don't + * apply. Return Invalid values so that relfrozenxid and relminmxid + * remain unchanged after CLUSTER. */ - vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, - &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff, - NULL); - - /* - * FreezeXid will become the table's new relfrozenxid, and that mustn't go - * backwards, so take the max. - */ - if (TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid)) - FreezeXid = OldHeap->rd_rel->relfrozenxid; - - /* - * MultiXactCutoff, similarly, shouldn't go backwards either. - */ - if (MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid)) - MultiXactCutoff = OldHeap->rd_rel->relminmxid; - - /* return selected values to caller */ - *xid_cutoff = FreezeXid; - *multi_cutoff = MultiXactCutoff; + *xid_cutoff = InvalidTransactionId; + *multi_cutoff = InvalidMultiXactId; tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex, maintenance_work_mem, NULL, false); diff --git a/src/backend/access/appendonly/appendonlyam_handler.c b/src/backend/access/appendonly/appendonlyam_handler.c index cd37c0bbacd..cae12e110ff 100644 --- a/src/backend/access/appendonly/appendonlyam_handler.c +++ b/src/backend/access/appendonly/appendonlyam_handler.c @@ -1317,8 +1317,6 @@ appendonly_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, int natts; Datum *values; bool *isnull; - TransactionId FreezeXid; - MultiXactId MultiXactCutoff; Tuplesortstate *tuplesort; PGRUsage ru0; @@ -1380,29 +1378,12 @@ appendonly_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber); /* - * Compute sane values for FreezeXid and CutoffMulti with regular - * VACUUM machinery to avoidconfising existing CLUSTER code. + * AO/AOCO tables have no per-tuple xmin/xmax, so freeze limits don't + * apply. Return Invalid values so that relfrozenxid and relminmxid + * remain unchanged after CLUSTER. */ - vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, - &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff, - NULL); - - /* - * FreezeXid will become the table's new relfrozenxid, and that mustn't go - * backwards, so take the max. - */ - if (TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid)) - FreezeXid = OldHeap->rd_rel->relfrozenxid; - - /* - * MultiXactCutoff, similarly, shouldn't go backwards either. - */ - if (MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid)) - MultiXactCutoff = OldHeap->rd_rel->relminmxid; - - /* return selected values to caller */ - *xid_cutoff = FreezeXid; - *multi_cutoff = MultiXactCutoff; + *xid_cutoff = InvalidTransactionId; + *multi_cutoff = InvalidMultiXactId; tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex, maintenance_work_mem, NULL, false); diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index e31fe4cef8e..a3d2ad87bfb 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -1486,17 +1486,24 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, Assert(!TransactionIdIsValid(frozenXid) || TransactionIdIsNormal(frozenXid)); relform1->relfrozenxid = frozenXid; - Assert(MultiXactIdIsValid(cutoffMulti)); + Assert(!MultiXactIdIsValid(cutoffMulti) || + MultiXactIdPrecedesOrEquals(FirstMultiXactId, cutoffMulti)); relform1->relminmxid = cutoffMulti; } /* - * Cloudberry: append-optimized tables do not have a valid relfrozenxid. - * Overwrite the entry for both relations. + * Cloudberry: append-optimized tables do not have a valid relfrozenxid + * or relminmxid. Overwrite the entry for both relations. */ if (relform1->relkind != RELKIND_INDEX && IsAccessMethodAO(relform1->relam)) + { relform1->relfrozenxid = InvalidTransactionId; + relform1->relminmxid = InvalidMultiXactId; + } if (relform2->relkind != RELKIND_INDEX && IsAccessMethodAO(relform2->relam)) + { relform2->relfrozenxid = InvalidTransactionId; + relform2->relminmxid = InvalidMultiXactId; + } /* swap size statistics too, since new rel has freshly-updated stats */ if (swap_stats) diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 80585a878a7..f4ba5615d5a 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -3914,202 +3914,3 @@ vac_cmp_itemptr(const void *left, const void *right) return 0; } - -void -vacuum_set_xid_limits(Relation rel, - int freeze_min_age, - int freeze_table_age, - int multixact_freeze_min_age, - int multixact_freeze_table_age, - TransactionId *oldestXmin, - TransactionId *freezeLimit, - TransactionId *xidFullScanLimit, - MultiXactId *multiXactCutoff, - MultiXactId *mxactFullScanLimit) -{ - int freezemin; - int mxid_freezemin; - int effective_multixact_freeze_max_age; - TransactionId limit; - TransactionId safeLimit; - MultiXactId oldestMxact; - MultiXactId mxactLimit; - MultiXactId safeMxactLimit; - - /* - * We can always ignore processes running lazy vacuum. This is because we - * use these values only for deciding which tuples we must keep in the - * tables. Since lazy vacuum doesn't write its XID anywhere (usually no - * XID assigned), it's safe to ignore it. In theory it could be - * problematic to ignore lazy vacuums in a full vacuum, but keep in mind - * that only one vacuum process can be working on a particular table at - * any time, and that each vacuum is always an independent transaction. - */ - *oldestXmin = GetOldestNonRemovableTransactionId(rel); - - if (OldSnapshotThresholdActive()) - { - TransactionId limit_xmin; - TimestampTz limit_ts; - - if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel, - &limit_xmin, &limit_ts)) - { - /* - * TODO: We should only set the threshold if we are pruning on the - * basis of the increased limits. Not as crucial here as it is - * for opportunistic pruning (which often happens at a much higher - * frequency), but would still be a significant improvement. - */ - SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin); - *oldestXmin = limit_xmin; - } - } - - Assert(TransactionIdIsNormal(*oldestXmin)); - - /* - * Determine the minimum freeze age to use: as specified by the caller, or - * vacuum_freeze_min_age, but in any case not more than half - * autovacuum_freeze_max_age, so that autovacuums to prevent XID - * wraparound won't occur too frequently. - */ - freezemin = freeze_min_age; - if (freezemin < 0) - freezemin = vacuum_freeze_min_age; - freezemin = Min(freezemin, autovacuum_freeze_max_age / 2); - Assert(freezemin >= 0); - - /* - * Compute the cutoff XID, being careful not to generate a "permanent" XID - */ - limit = *oldestXmin - freezemin; - if (!TransactionIdIsNormal(limit)) - limit = FirstNormalTransactionId; - - /* - * If oldestXmin is very far back (in practice, more than - * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum - * freeze age of zero. - */ - safeLimit = ReadNextTransactionId() - autovacuum_freeze_max_age; - if (!TransactionIdIsNormal(safeLimit)) - safeLimit = FirstNormalTransactionId; - - if (TransactionIdPrecedes(limit, safeLimit)) - { - ereport(WARNING, - (errmsg("oldest xmin is far in the past"), - errhint("Close open transactions soon to avoid wraparound problems.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - limit = *oldestXmin; - } - - *freezeLimit = limit; - - /* - * Compute the multixact age for which freezing is urgent. This is - * normally autovacuum_multixact_freeze_max_age, but may be less if we are - * short of multixact member space. - */ - effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); - - /* - * Determine the minimum multixact freeze age to use: as specified by - * caller, or vacuum_multixact_freeze_min_age, but in any case not more - * than half effective_multixact_freeze_max_age, so that autovacuums to - * prevent MultiXact wraparound won't occur too frequently. - */ - mxid_freezemin = multixact_freeze_min_age; - if (mxid_freezemin < 0) - mxid_freezemin = vacuum_multixact_freeze_min_age; - mxid_freezemin = Min(mxid_freezemin, - effective_multixact_freeze_max_age / 2); - Assert(mxid_freezemin >= 0); - - /* compute the cutoff multi, being careful to generate a valid value */ - oldestMxact = GetOldestMultiXactId(); - mxactLimit = oldestMxact - mxid_freezemin; - if (mxactLimit < FirstMultiXactId) - mxactLimit = FirstMultiXactId; - - safeMxactLimit = - ReadNextMultiXactId() - effective_multixact_freeze_max_age; - if (safeMxactLimit < FirstMultiXactId) - safeMxactLimit = FirstMultiXactId; - - if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit)) - { - ereport(WARNING, - (errmsg("oldest multixact is far in the past"), - errhint("Close open transactions with multixacts soon to avoid wraparound problems."))); - /* Use the safe limit, unless an older mxact is still running */ - if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit)) - mxactLimit = oldestMxact; - else - mxactLimit = safeMxactLimit; - } - - *multiXactCutoff = mxactLimit; - - if (xidFullScanLimit != NULL) - { - int freezetable; - - Assert(mxactFullScanLimit != NULL); - - /* - * Determine the table freeze age to use: as specified by the caller, - * or vacuum_freeze_table_age, but in any case not more than - * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly - * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples - * before anti-wraparound autovacuum is launched. - */ - freezetable = freeze_table_age; - if (freezetable < 0) - freezetable = vacuum_freeze_table_age; - freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95); - Assert(freezetable >= 0); - - /* - * Compute XID limit causing a full-table vacuum, being careful not to - * generate a "permanent" XID. - */ - limit = ReadNextTransactionId() - freezetable; - if (!TransactionIdIsNormal(limit)) - limit = FirstNormalTransactionId; - - *xidFullScanLimit = limit; - - /* - * Similar to the above, determine the table freeze age to use for - * multixacts: as specified by the caller, or - * vacuum_multixact_freeze_table_age, but in any case not more than - * autovacuum_multixact_freeze_table_age * 0.95, so that if you have - * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to - * freeze multixacts before anti-wraparound autovacuum is launched. - */ - freezetable = multixact_freeze_table_age; - if (freezetable < 0) - freezetable = vacuum_multixact_freeze_table_age; - freezetable = Min(freezetable, - effective_multixact_freeze_max_age * 0.95); - Assert(freezetable >= 0); - - /* - * Compute MultiXact limit causing a full-table vacuum, being careful - * to generate a valid MultiXact value. - */ - mxactLimit = ReadNextMultiXactId() - freezetable; - if (mxactLimit < FirstMultiXactId) - mxactLimit = FirstMultiXactId; - - *mxactFullScanLimit = mxactLimit; - } - else - { - Assert(mxactFullScanLimit == NULL); - } -} - - diff --git a/src/backend/commands/vacuum_ao.c b/src/backend/commands/vacuum_ao.c index dd0522f1986..075c73fb1c1 100644 --- a/src/backend/commands/vacuum_ao.c +++ b/src/backend/commands/vacuum_ao.c @@ -245,11 +245,6 @@ ao_vacuum_rel_post_cleanup(Relation onerel, VacuumParams *params, BufferAccessSt BlockNumber total_file_segs; int elevel; int options = params->options; - TransactionId OldestXmin; - TransactionId FreezeLimit; - MultiXactId MultiXactCutoff; - TransactionId xidFullScanLimit; - MultiXactId mxactFullScanLimit; if (options & VACOPT_VERBOSE) elevel = INFO; @@ -288,16 +283,11 @@ ao_vacuum_rel_post_cleanup(Relation onerel, VacuumParams *params, BufferAccessSt &relhasindex, &total_file_segs); - /* MERGE16_FIXME: How to set limits for ao */ - vacuum_set_xid_limits(onerel, - params->freeze_min_age, - params->freeze_table_age, - params->multixact_freeze_min_age, - params->multixact_freeze_table_age, - &OldestXmin, &FreezeLimit, &xidFullScanLimit, - &MultiXactCutoff, &mxactFullScanLimit); - - /* Causion: AO/AOCO use relallvisible to represent total segment file count */ + /* + * AO/AOCO tables have no per-tuple xmin/xmax, so freeze limits don't + * apply. Pass InvalidTransactionId/InvalidMultiXactId to keep + * relfrozenxid and relminmxid unchanged. + */ vac_update_relstats(onerel, relpages, reltuples, @@ -305,8 +295,8 @@ ao_vacuum_rel_post_cleanup(Relation onerel, VacuumParams *params, BufferAccessSt Heap's 'all visible pages', use this field to represent AO/AOCO's total segment file count */ relhasindex, - FreezeLimit, - MultiXactCutoff, + InvalidTransactionId, + InvalidMultiXactId, NULL, NULL, false, diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index d700f4a72cc..431cd7221a8 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -491,15 +491,6 @@ extern Size vac_max_items_to_alloc_size(int max_items); /* In postmaster/autovacuum.c */ extern void AutoVacuumUpdateCostLimit(void); extern void VacuumUpdateCosts(void); -extern void vacuum_set_xid_limits(Relation rel, - int freeze_min_age, int freeze_table_age, - int multixact_freeze_min_age, - int multixact_freeze_table_age, - TransactionId *oldestXmin, - TransactionId *freezeLimit, - TransactionId *xidFullScanLimit, - MultiXactId *multiXactCutoff, - MultiXactId *mxactFullScanLimit); /* in commands/vacuumparallel.c */ extern ParallelVacuumState *parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, int nrequested_workers, From c5c4574184f948c0a9ec906a7dfb28fa60632276 Mon Sep 17 00:00:00 2001 From: liushengsong Date: Mon, 15 Jun 2026 10:19:45 +0800 Subject: [PATCH 2/3] Remove MERGE16_FIXME in PartitionSelector and dead declarations The CreatePartitionPruneState() call in nodePartitionSelector.c is correct -- PartitionSelector only needs the pruning data structure, not the initial pruning and subplan map renumbering that ExecInitPartitionPruning() adds on top. Remove the incorrect FIXME. Also remove two dead declarations in execPartition.h: - ExecCreatePartitionPruneState: renamed to CreatePartitionPruneState in PG15 (commit 297daa9d435), declaration was never cleaned up - ExecFindInitialMatchingSubPlans: folded into ExecFindMatchingSubPlans in the same refactor, declaration was never cleaned up --- src/backend/executor/nodePartitionSelector.c | 1 - src/include/executor/execPartition.h | 4 ---- 2 files changed, 5 deletions(-) diff --git a/src/backend/executor/nodePartitionSelector.c b/src/backend/executor/nodePartitionSelector.c index 7999a1a4d71..1a1e3affa85 100644 --- a/src/backend/executor/nodePartitionSelector.c +++ b/src/backend/executor/nodePartitionSelector.c @@ -101,7 +101,6 @@ ExecInitPartitionSelector(PartitionSelector *node, EState *estate, int eflags) outerPlanState(psstate) = ExecInitNode(outerPlan(node), estate, eflags); /* Create the working data structure for pruning. */ - /* MERGE16_FIXME: This use of ExecInitPartitionPruning may be incorrect */ psstate->prune_state = CreatePartitionPruneState(&psstate->ps, node->part_prune_info); return psstate; diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 22c4fc9a5e6..91bc97db8c6 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -130,14 +130,10 @@ extern ResultRelInfo *ExecFindPartition(ModifyTableState *mtstate, EState *estate); extern void ExecCleanupTupleRouting(ModifyTableState *mtstate, PartitionTupleRouting *proute); -extern PartitionPruneState *ExecCreatePartitionPruneState(PlanState *planstate, - PartitionPruneInfo *partitionpruneinfo); extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate, bool initial_prune, EState *estate, int nplans, List *join_prune_paramids); -extern Bitmapset *ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, - int nsubplans); extern int get_partition_for_tuple(PartitionKey key, PartitionDesc partdesc, Datum *values, bool *isnull); From fc448ae2100e50f1e67803dd42c84c9aa2b8c53b Mon Sep 17 00:00:00 2001 From: liushengsong Date: Mon, 15 Jun 2026 14:42:07 +0800 Subject: [PATCH 3/3] Fix MERGE16_FIXME: add UNSAFE_HAS_SUBPLAN flag for qual pushdown The subplan check in check_output_expressions was incorrectly using UNSAFE_NOTIN_PARTITIONBY_CLAUSE, which only prevents normal pushdown but still allows the qual to be pushed as a window run condition. Subplans in output expressions should completely block pushdown in Cloudberry's distributed execution model. Add a dedicated UNSAFE_HAS_SUBPLAN flag and include it in the fully unsafe set in qual_is_pushdown_safe, so quals referencing output columns containing subplans are never pushed down. --- src/backend/optimizer/path/allpaths.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 76ce1e8fbea..6bb0f9a92a5 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -74,6 +74,7 @@ bool gp_enable_sort_limit = false; #define UNSAFE_NOTIN_DISTINCTON_CLAUSE (1 << 2) #define UNSAFE_NOTIN_PARTITIONBY_CLAUSE (1 << 3) #define UNSAFE_TYPE_MISMATCH (1 << 4) +#define UNSAFE_HAS_SUBPLAN (1 << 5) /* results of subquery_is_pushdown_safe */ typedef struct pushdown_safety_info @@ -4714,11 +4715,10 @@ check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo) continue; } - /* Refuse subplans */ + /* Refuse subplans (Cloudberry-specific, see UNSAFE_HAS_SUBPLAN) */ if (contain_subplans((Node *) tle->expr)) { - /*.MERGE16_FIXME: should we add a new unsafe type? */ - safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_NOTIN_PARTITIONBY_CLAUSE; + safetyInfo->unsafeFlags[tle->resno] |= UNSAFE_HAS_SUBPLAN; continue; } } @@ -4896,7 +4896,8 @@ qual_is_pushdown_safe(Query *subquery, Index rti, RestrictInfo *rinfo, { if (safetyInfo->unsafeFlags[var->varattno] & (UNSAFE_HAS_VOLATILE_FUNC | UNSAFE_HAS_SET_FUNC | - UNSAFE_NOTIN_DISTINCTON_CLAUSE | UNSAFE_TYPE_MISMATCH)) + UNSAFE_NOTIN_DISTINCTON_CLAUSE | UNSAFE_TYPE_MISMATCH | + UNSAFE_HAS_SUBPLAN)) { safe = PUSHDOWN_UNSAFE; break;