From 5f9a56aafe1ecd73aeaf69d7ce74d649fe8daee9 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 10 Aug 2022 13:37:25 -0400 Subject: [PATCH 01/95] Fix handling of R/W expanded datums that are passed to SQL functions. fmgr_sql must make expanded-datum arguments read-only, because it's possible that the function body will pass the argument to more than one callee function. If one of those functions takes the datum's R/W property as license to scribble on it, then later callees will see an unexpected value, leading to wrong answers. From a performance standpoint, it'd be nice to skip this in the common case that the argument value is passed to only one callee. However, detecting that seems fairly hard, and certainly not something that I care to attempt in a back-patched bug fix. Per report from Adam Mackler. This has been broken since we invented expanded datums, so back-patch to all supported branches. Discussion: https://postgr.es/m/WScDU5qfoZ7PB2gXwNqwGGgDPmWzz08VdydcPFLhOwUKZcdWbblbo-0Lku-qhuEiZoXJ82jpiQU4hOjOcrevYEDeoAvz6nR0IU4IHhXnaCA=@mackler.email Discussion: https://postgr.es/m/187436.1660143060@sss.pgh.pa.us --- src/backend/executor/functions.c | 19 +++++++++++++++++-- .../regress/expected/create_function_3.out | 19 ++++++++++++++++++- .../expected/create_function_3_optimizer.out | 19 ++++++++++++++++++- src/test/regress/sql/create_function_3.sql | 13 +++++++++++++ 4 files changed, 66 insertions(+), 4 deletions(-) diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index 4236adfcf29..820d4c60cc0 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -1078,6 +1078,7 @@ postquel_sub_params(SQLFunctionCachePtr fcache, if (nargs > 0) { ParamListInfo paramLI; + Oid *argtypes = fcache->pinfo->argtypes; if (fcache->paramLI == NULL) { @@ -1094,10 +1095,24 @@ postquel_sub_params(SQLFunctionCachePtr fcache, { ParamExternData *prm = ¶mLI->params[i]; - prm->value = fcinfo->args[i].value; + /* + * If an incoming parameter value is a R/W expanded datum, we + * force it to R/O. We'd be perfectly entitled to scribble on it, + * but the problem is that if the parameter is referenced more + * than once in the function, earlier references might mutate the + * value seen by later references, which won't do at all. We + * could do better if we could be sure of the number of Param + * nodes in the function's plans; but we might not have planned + * all the statements yet, nor do we have plan tree walker + * infrastructure. (Examining the parse trees is not good enough, + * because of possible function inlining during planning.) + */ prm->isnull = fcinfo->args[i].isnull; + prm->value = MakeExpandedObjectReadOnly(fcinfo->args[i].value, + prm->isnull, + get_typlen(argtypes[i])); prm->pflags = 0; - prm->ptype = fcache->pinfo->argtypes[i]; + prm->ptype = argtypes[i]; } } else diff --git a/src/test/regress/expected/create_function_3.out b/src/test/regress/expected/create_function_3.out index 7842a3c1c82..9380779b5b4 100644 --- a/src/test/regress/expected/create_function_3.out +++ b/src/test/regress/expected/create_function_3.out @@ -672,9 +672,25 @@ SELECT * FROM voidtest5(3); ----------- (0 rows) +-- Regression tests for bugs: +-- Check that arguments that are R/W expanded datums aren't corrupted by +-- multiple uses. This test knows that array_append() returns a R/W datum +-- and will modify a R/W array input in-place. We use SETOF to prevent +-- inlining of the SQL function. +CREATE FUNCTION double_append(anyarray, anyelement) RETURNS SETOF anyarray +LANGUAGE SQL IMMUTABLE AS +$$ SELECT array_append($1, $2) || array_append($1, $2) $$; +SELECT double_append(array_append(ARRAY[q1], q2), q3) + FROM (VALUES(1,2,3), (4,5,6)) v(q1,q2,q3); + double_append +--------------- + {1,2,3,1,2,3} + {4,5,6,4,5,6} +(2 rows) + -- Cleanup DROP SCHEMA temp_func_test CASCADE; -NOTICE: drop cascades to 29 other objects +NOTICE: drop cascades to 30 other objects DETAIL: drop cascades to function functest_a_1(text,date) drop cascades to function functest_a_2(text[]) drop cascades to function functest_a_3() @@ -704,5 +720,6 @@ drop cascades to function voidtest2(integer,integer) drop cascades to function voidtest3(integer) drop cascades to function voidtest4(integer) drop cascades to function voidtest5(integer) +drop cascades to function double_append(anyarray,anyelement) DROP USER regress_unpriv_user; RESET search_path; diff --git a/src/test/regress/expected/create_function_3_optimizer.out b/src/test/regress/expected/create_function_3_optimizer.out index 3256709e1aa..12c57b8e092 100644 --- a/src/test/regress/expected/create_function_3_optimizer.out +++ b/src/test/regress/expected/create_function_3_optimizer.out @@ -670,9 +670,25 @@ SELECT * FROM voidtest5(3); ----------- (0 rows) +-- Regression tests for bugs: +-- Check that arguments that are R/W expanded datums aren't corrupted by +-- multiple uses. This test knows that array_append() returns a R/W datum +-- and will modify a R/W array input in-place. We use SETOF to prevent +-- inlining of the SQL function. +CREATE FUNCTION double_append(anyarray, anyelement) RETURNS SETOF anyarray +LANGUAGE SQL IMMUTABLE AS +$$ SELECT array_append($1, $2) || array_append($1, $2) $$; +SELECT double_append(array_append(ARRAY[q1], q2), q3) + FROM (VALUES(1,2,3), (4,5,6)) v(q1,q2,q3); + double_append +--------------- + {1,2,3,1,2,3} + {4,5,6,4,5,6} +(2 rows) + -- Cleanup DROP SCHEMA temp_func_test CASCADE; -NOTICE: drop cascades to 29 other objects +NOTICE: drop cascades to 30 other objects DETAIL: drop cascades to function functest_a_1(text,date) drop cascades to function functest_a_2(text[]) drop cascades to function functest_a_3() @@ -702,5 +718,6 @@ drop cascades to function voidtest2(integer,integer) drop cascades to function voidtest3(integer) drop cascades to function voidtest4(integer) drop cascades to function voidtest5(integer) +drop cascades to function double_append(anyarray,anyelement) DROP USER regress_unpriv_user; RESET search_path; diff --git a/src/test/regress/sql/create_function_3.sql b/src/test/regress/sql/create_function_3.sql index 7edd757b8f3..ef8098089d9 100644 --- a/src/test/regress/sql/create_function_3.sql +++ b/src/test/regress/sql/create_function_3.sql @@ -385,6 +385,19 @@ CREATE FUNCTION voidtest5(a int) RETURNS SETOF VOID LANGUAGE SQL AS $$ SELECT generate_series(1, a) $$ STABLE; SELECT * FROM voidtest5(3); +-- Regression tests for bugs: + +-- Check that arguments that are R/W expanded datums aren't corrupted by +-- multiple uses. This test knows that array_append() returns a R/W datum +-- and will modify a R/W array input in-place. We use SETOF to prevent +-- inlining of the SQL function. +CREATE FUNCTION double_append(anyarray, anyelement) RETURNS SETOF anyarray +LANGUAGE SQL IMMUTABLE AS +$$ SELECT array_append($1, $2) || array_append($1, $2) $$; + +SELECT double_append(array_append(ARRAY[q1], q2), q3) + FROM (VALUES(1,2,3), (4,5,6)) v(q1,q2,q3); + -- Cleanup DROP SCHEMA temp_func_test CASCADE; DROP USER regress_unpriv_user; From e5c72ebb3a9abfc54f2f6913e07852a6d2987438 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 11 Aug 2022 09:45:04 +0530 Subject: [PATCH 02/95] Fix catalog lookup with the wrong snapshot during logical decoding. Previously, we relied on HEAP2_NEW_CID records and XACT_INVALIDATION records to know if the transaction has modified the catalog, and that information is not serialized to snapshot. Therefore, after the restart, if the logical decoding decodes only the commit record of the transaction that has actually modified a catalog, we will miss adding its XID to the snapshot. Thus, we will end up looking at catalogs with the wrong snapshot. To fix this problem, this changes the snapshot builder so that it remembers the last-running-xacts list of the decoded RUNNING_XACTS record after restoring the previously serialized snapshot. Then, we mark the transaction as containing catalog changes if it's in the list of initial running transactions and its commit record has XACT_XINFO_HAS_INVALS. To avoid ABI breakage, we store the array of the initial running transactions in the static variables InitialRunningXacts and NInitialRunningXacts, instead of storing those in SnapBuild or ReorderBuffer. This approach has a false positive; we could end up adding the transaction that didn't change catalog to the snapshot since we cannot distinguish whether the transaction has catalog changes only by checking the COMMIT record. It doesn't have the information on which (sub) transaction has catalog changes, and XACT_XINFO_HAS_INVALS doesn't necessarily indicate that the transaction has catalog change. But that won't be a problem since we use snapshot built during decoding only to read system catalogs. On the master branch, we took a more future-proof approach by writing catalog modifying transactions to the serialized snapshot which avoids the above false positive. But we cannot backpatch it because of a change in the SnapBuild. Reported-by: Mike Oh Author: Masahiko Sawada Reviewed-by: Amit Kapila, Shi yu, Takamichi Osumi, Kyotaro Horiguchi, Bertrand Drouvot, Ahsan Hadi Backpatch-through: 10 Discussion: https://postgr.es/m/81D0D8B0-E7C4-4999-B616-1E5004DBDCD2%40amazon.com --- contrib/test_decoding/Makefile | 2 +- .../expected/catalog_change_snapshot.out | 44 ++++++ .../specs/catalog_change_snapshot.spec | 39 +++++ src/backend/replication/logical/decode.c | 15 ++ src/backend/replication/logical/snapbuild.c | 137 +++++++++++++++++- src/include/replication/snapbuild.h | 3 + 6 files changed, 232 insertions(+), 8 deletions(-) create mode 100644 contrib/test_decoding/expected/catalog_change_snapshot.out create mode 100644 contrib/test_decoding/specs/catalog_change_snapshot.spec diff --git a/contrib/test_decoding/Makefile b/contrib/test_decoding/Makefile index 9a31e0b8795..4553252d75f 100644 --- a/contrib/test_decoding/Makefile +++ b/contrib/test_decoding/Makefile @@ -8,7 +8,7 @@ REGRESS = ddl xact rewrite toast permissions decoding_in_xact \ spill slot truncate stream stats twophase twophase_stream ISOLATION = mxact delayed_startup ondisk_startup concurrent_ddl_dml \ oldest_xmin snapshot_transfer subxact_without_top concurrent_stream \ - twophase_snapshot + twophase_snapshot catalog_change_snapshot REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf ISOLATION_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf diff --git a/contrib/test_decoding/expected/catalog_change_snapshot.out b/contrib/test_decoding/expected/catalog_change_snapshot.out new file mode 100644 index 00000000000..dc4f9b7018f --- /dev/null +++ b/contrib/test_decoding/expected/catalog_change_snapshot.out @@ -0,0 +1,44 @@ +Parsed test spec with 2 sessions + +starting permutation: s0_init s0_begin s0_savepoint s0_truncate s1_checkpoint s1_get_changes s0_commit s0_begin s0_insert s1_checkpoint s1_get_changes s0_commit s1_get_changes +step s0_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? +-------- +init +(1 row) + +step s0_begin: BEGIN; +step s0_savepoint: SAVEPOINT sp1; +step s0_truncate: TRUNCATE tbl1; +step s1_checkpoint: CHECKPOINT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data +---- +(0 rows) + +step s0_commit: COMMIT; +step s0_begin: BEGIN; +step s0_insert: INSERT INTO tbl1 VALUES (1); +step s1_checkpoint: CHECKPOINT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data +--------------------------------------- +BEGIN +table public.tbl1: TRUNCATE: (no-flags) +COMMIT +(3 rows) + +step s0_commit: COMMIT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data +------------------------------------------------------------- +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:null +COMMIT +(3 rows) + +?column? +-------- +stop +(1 row) + diff --git a/contrib/test_decoding/specs/catalog_change_snapshot.spec b/contrib/test_decoding/specs/catalog_change_snapshot.spec new file mode 100644 index 00000000000..2971ddc69cb --- /dev/null +++ b/contrib/test_decoding/specs/catalog_change_snapshot.spec @@ -0,0 +1,39 @@ +# Test decoding only the commit record of the transaction that have +# modified catalogs. +setup +{ + DROP TABLE IF EXISTS tbl1; + CREATE TABLE tbl1 (val1 integer, val2 integer); +} + +teardown +{ + DROP TABLE tbl1; + SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot'); +} + +session "s0" +setup { SET synchronous_commit=on; } +step "s0_init" { SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); } +step "s0_begin" { BEGIN; } +step "s0_savepoint" { SAVEPOINT sp1; } +step "s0_truncate" { TRUNCATE tbl1; } +step "s0_insert" { INSERT INTO tbl1 VALUES (1); } +step "s0_commit" { COMMIT; } + +session "s1" +setup { SET synchronous_commit=on; } +step "s1_checkpoint" { CHECKPOINT; } +step "s1_get_changes" { SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); } + +# For the transaction that TRUNCATEd the table tbl1, the last decoding decodes +# only its COMMIT record, because it starts from the RUNNING_XACTS record emitted +# during the first checkpoint execution. This transaction must be marked as +# containing catalog changes while decoding the COMMIT record and the decoding +# of the INSERT record must read the pg_class with the correct historic snapshot. +# +# Note that in a case where bgwriter wrote the RUNNING_XACTS record between "s0_commit" +# and "s0_begin", this doesn't happen as the decoding starts from the RUNNING_XACTS +# record written by bgwriter. One might think we can either stop the bgwriter or +# increase LOG_SNAPSHOT_INTERVAL_MS but it's not practical via tests. +permutation "s0_init" "s0_begin" "s0_savepoint" "s0_truncate" "s1_checkpoint" "s1_get_changes" "s0_commit" "s0_begin" "s0_insert" "s1_checkpoint" "s1_get_changes" "s0_commit" "s1_get_changes" diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index 68524222d71..c198baacfd4 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -635,6 +635,21 @@ DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, commit_time = parsed->origin_timestamp; } + /* + * If the COMMIT record has invalidation messages, it could have catalog + * changes. It is possible that we didn't mark this transaction as + * containing catalog changes when the decoding starts from a commit + * record without decoding the transaction's other changes. So, we ensure + * to mark such transactions as containing catalog change. + * + * This must be done before SnapBuildCommitTxn() so that we can include + * these transactions in the historic snapshot. + */ + if (parsed->xinfo & XACT_XINFO_HAS_INVALS) + SnapBuildXidSetCatalogChanges(ctx->snapshot_builder, xid, + parsed->nsubxacts, parsed->subxacts, + buf->origptr); + SnapBuildCommitTxn(ctx->snapshot_builder, buf->origptr, xid, parsed->nsubxacts, parsed->subxacts); diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 379ee3f5eb3..9788711bf6b 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -250,8 +250,38 @@ struct SnapBuild static ResourceOwner SavedResourceOwnerDuringExport = NULL; static bool ExportInProgress = false; -/* ->committed manipulation */ -static void SnapBuildPurgeCommittedTxn(SnapBuild *builder); +/* + * Array of transactions and subtransactions that were running when + * the xl_running_xacts record that we decoded was written. The array is + * sorted in xidComparator order. We remove xids from this array when + * they become old enough to matter, and then it eventually becomes empty. + * This array is allocated in builder->context so its lifetime is the same + * as the snapshot builder. + * + * We normally rely on some WAL record types such as HEAP2_NEW_CID to know + * if the transaction has changed the catalog. But it could happen that the + * logical decoding decodes only the commit record of the transaction after + * restoring the previously serialized snapshot in which case we will miss + * adding the xid to the snapshot and end up looking at the catalogs with the + * wrong snapshot. + * + * Now to avoid the above problem, if the COMMIT record of the xid listed in + * InitialRunningXacts has XACT_XINFO_HAS_INVALS flag, we mark both the top + * transaction and its substransactions as containing catalog changes. + * + * We could end up adding the transaction that didn't change catalog + * to the snapshot since we cannot distinguish whether the transaction + * has catalog changes only by checking the COMMIT record. It doesn't + * have the information on which (sub) transaction has catalog changes, + * and XACT_XINFO_HAS_INVALS doesn't necessarily indicate that the + * transaction has catalog change. But that won't be a problem since we + * use snapshot built during decoding only for reading system catalogs. + */ +static TransactionId *InitialRunningXacts = NULL; +static int NInitialRunningXacts = 0; + +/* ->committed and InitailRunningXacts manipulation */ +static void SnapBuildPurgeOlderTxn(SnapBuild *builder); /* snapshot building/manipulation/distribution functions */ static Snapshot SnapBuildBuildSnapshot(SnapBuild *builder); @@ -879,12 +909,17 @@ SnapBuildAddCommittedTxn(SnapBuild *builder, TransactionId xid) } /* - * Remove knowledge about transactions we treat as committed that are smaller - * than ->xmin. Those won't ever get checked via the ->committed array but via - * the clog machinery, so we don't need to waste memory on them. + * Remove knowledge about transactions we treat as committed and the initial + * running transactions that are smaller than ->xmin. Those won't ever get + * checked via the ->committed or InitialRunningXacts array, respectively. + * The committed xids will get checked via the clog machinery. + * + * We can ideally remove the transaction from InitialRunningXacts array + * once it is finished (committed/aborted) but that could be costly as we need + * to maintain the xids order in the array. */ static void -SnapBuildPurgeCommittedTxn(SnapBuild *builder) +SnapBuildPurgeOlderTxn(SnapBuild *builder) { int off; TransactionId *workspace; @@ -919,6 +954,49 @@ SnapBuildPurgeCommittedTxn(SnapBuild *builder) builder->committed.xcnt = surviving_xids; pfree(workspace); + + /* Quick exit if there is no initial running transactions */ + if (NInitialRunningXacts == 0) + return; + + /* bound check if there is at least one transaction to remove */ + if (!NormalTransactionIdPrecedes(InitialRunningXacts[0], + builder->xmin)) + return; + + /* + * purge xids in InitialRunningXacts as well. The purged array must also + * be sorted in xidComparator order. + */ + workspace = + MemoryContextAlloc(builder->context, + NInitialRunningXacts * sizeof(TransactionId)); + surviving_xids = 0; + for (off = 0; off < NInitialRunningXacts; off++) + { + if (NormalTransactionIdPrecedes(InitialRunningXacts[off], + builder->xmin)) + ; /* remove */ + else + workspace[surviving_xids++] = InitialRunningXacts[off]; + } + + if (surviving_xids > 0) + memcpy(InitialRunningXacts, workspace, + sizeof(TransactionId) * surviving_xids); + else + { + pfree(InitialRunningXacts); + InitialRunningXacts = NULL; + } + + elog(DEBUG3, "purged initial running transactions from %u to %u, oldest running xid %u", + (uint32) NInitialRunningXacts, + (uint32) surviving_xids, + builder->xmin); + + NInitialRunningXacts = surviving_xids; + pfree(workspace); } /* @@ -1126,7 +1204,7 @@ SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xact builder->xmin = running->oldestRunningXid; /* Remove transactions we don't need to keep track off anymore */ - SnapBuildPurgeCommittedTxn(builder); + SnapBuildPurgeOlderTxn(builder); /* * Advance the xmin limit for the current replication slot, to allow @@ -1277,6 +1355,20 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn else if (!builder->building_full_snapshot && SnapBuildRestore(builder, lsn)) { + int nxacts = running->subxcnt + running->xcnt; + Size sz = sizeof(TransactionId) * nxacts; + + /* + * Remember the transactions and subtransactions that were running + * when xl_running_xacts record that we decoded was written. We use + * this later to identify the transactions have performed catalog + * changes. See SnapBuildXidSetCatalogChanges. + */ + NInitialRunningXacts = nxacts; + InitialRunningXacts = MemoryContextAlloc(builder->context, sz); + memcpy(InitialRunningXacts, running->xids, sz); + qsort(InitialRunningXacts, nxacts, sizeof(TransactionId), xidComparator); + /* there won't be any state to cleanup */ return false; } @@ -1992,3 +2084,34 @@ CheckPointSnapBuild(void) } FreeDir(snap_dir); } + +/* + * If the given xid is in the list of the initial running xacts, we mark the + * transaction and its subtransactions as containing catalog changes. See + * comments for NInitialRunningXacts and InitialRunningXacts for additional + * info. + */ +void +SnapBuildXidSetCatalogChanges(SnapBuild *builder, TransactionId xid, int subxcnt, + TransactionId *subxacts, XLogRecPtr lsn) +{ + /* + * Skip if there is no initial running xacts information or the + * transaction is already marked as containing catalog changes. + */ + if (NInitialRunningXacts == 0 || + ReorderBufferXidHasCatalogChanges(builder->reorder, xid)) + return; + + if (bsearch(&xid, InitialRunningXacts, NInitialRunningXacts, + sizeof(TransactionId), xidComparator) != NULL) + { + ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn); + + for (int i = 0; i < subxcnt; i++) + { + ReorderBufferAssignChild(builder->reorder, xid, subxacts[i], lsn); + ReorderBufferXidSetCatalogChanges(builder->reorder, subxacts[i], lsn); + } + } +} diff --git a/src/include/replication/snapbuild.h b/src/include/replication/snapbuild.h index 3604621e888..a19b59e1008 100644 --- a/src/include/replication/snapbuild.h +++ b/src/include/replication/snapbuild.h @@ -90,4 +90,7 @@ extern void SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, struct xl_running_xacts *running); extern void SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn); +extern void SnapBuildXidSetCatalogChanges(SnapBuild *builder, TransactionId xid, + int subxcnt, TransactionId *subxacts, + XLogRecPtr lsn); #endif /* SNAPBUILD_H */ From 2659c7c2fbfc1772e5cbd6d4e58e144a67749cc7 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Fri, 12 Aug 2022 11:16:35 +0530 Subject: [PATCH 03/95] Back-Patch "Add wait_for_subscription_sync for TAP tests." This was originally done in commit 0c20dd33db for 16 only, to eliminate duplicate code and as an infrastructure that makes it easier to write future tests. However, it has been suggested that it would be good to back-patch this testing infrastructure to aid future tests in back-branches. Backpatch to all supported versions. Author: Masahiko Sawada Reviewed by: Amit Kapila, Shi yu Discussion: https://postgr.es/m/CAD21AoC-fvAkaKHa4t1urupwL8xbAcWRePeETvshvy80f6WV1A@mail.gmail.com Discussion: https://postgr.es/m/E1oJBIf-0006sw-SA@gemulon.postgresql.org --- src/test/perl/PostgresNode.pm | 44 +++++++++++++++++++ src/test/subscription/t/001_rep_changes.pl | 18 ++------ src/test/subscription/t/002_types.pl | 9 +--- src/test/subscription/t/004_sync.pl | 18 +++----- src/test/subscription/t/005_encoding.pl | 9 +--- src/test/subscription/t/006_rewrite.pl | 9 +--- src/test/subscription/t/008_diff_schema.pl | 12 ++--- src/test/subscription/t/010_truncate.pl | 8 +--- src/test/subscription/t/011_generated.pl | 5 +-- src/test/subscription/t/013_partition.pl | 20 +++------ src/test/subscription/t/014_binary.pl | 5 +-- src/test/subscription/t/015_stream.pl | 9 +--- src/test/subscription/t/016_stream_subxact.pl | 9 +--- src/test/subscription/t/017_stream_ddl.pl | 9 +--- .../t/018_stream_subxact_abort.pl | 9 +--- .../t/019_stream_subxact_ddl_abort.pl | 9 +--- src/test/subscription/t/021_alter_sub_pub.pl | 18 ++------ src/test/subscription/t/100_bugs.pl | 14 ++---- 18 files changed, 88 insertions(+), 146 deletions(-) diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 241ed8d49e8..ce41857a742 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -2844,6 +2844,50 @@ sub wait_for_slot_catchup =pod +=item $node->wait_for_subscription_sync(publisher, subname, dbname) + +Wait for all tables in pg_subscription_rel to complete the initial +synchronization (i.e to be either in 'syncdone' or 'ready' state). + +If the publisher node is given, additionally, check if the subscriber has +caught up to what has been committed on the primary. This is useful to +ensure that the initial data synchronization has been completed after +creating a new subscription. + +If there is no active replication connection from this peer, wait until +poll_query_until timeout. + +This is not a test. It die()s on failure. + +=cut + +sub wait_for_subscription_sync +{ + my ($self, $publisher, $subname, $dbname) = @_; + my $name = $self->name; + + $dbname = defined($dbname) ? $dbname : 'postgres'; + + # Wait for all tables to finish initial sync. + print "Waiting for all subscriptions in \"$name\" to synchronize data\n"; + my $query = + qq[SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');]; + $self->poll_query_until($dbname, $query) + or croak "timed out waiting for subscriber to synchronize data"; + + # Then, wait for the replication to catchup if required. + if (defined($publisher)) + { + croak 'subscription name must be specified' unless defined($subname); + $publisher->wait_for_catchup($subname); + } + + print "done\n"; + return; +} + +=pod + =item $node->wait_for_log(regexp, offset) Waits for the contents of the server log file, starting at the given offset, to diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl index 7dd69caacbd..2aedd9fb57a 100644 --- a/src/test/subscription/t/001_rep_changes.pl +++ b/src/test/subscription/t/001_rep_changes.pl @@ -102,13 +102,8 @@ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr' PUBLICATION tap_pub, tap_pub_ins_only" ); -$node_publisher->wait_for_catchup('tap_sub'); - -# Also wait for initial table sync to finish -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial table sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tap_sub'); my $result = $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_notrep"); @@ -237,13 +232,8 @@ "CREATE SUBSCRIPTION tap_sub_temp1 CONNECTION '$publisher_connstr' PUBLICATION tap_pub_temp1, tap_pub_temp2" ); -$node_publisher->wait_for_catchup('tap_sub_temp1'); - -# Also wait for initial table sync to finish -$synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial table sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tap_sub_temp1'); # Subscriber table will have no rows initially $result = diff --git a/src/test/subscription/t/002_types.pl b/src/test/subscription/t/002_types.pl index f915fad1ae3..f73b8efdbc2 100644 --- a/src/test/subscription/t/002_types.pl +++ b/src/test/subscription/t/002_types.pl @@ -114,13 +114,8 @@ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr' PUBLICATION tap_pub WITH (slot_name = tap_sub_slot)" ); -$node_publisher->wait_for_catchup('tap_sub'); - -# Wait for initial sync to finish as well -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('s', 'r');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tap_sub'); # Insert initial test data $node_publisher->safe_psql( diff --git a/src/test/subscription/t/004_sync.pl b/src/test/subscription/t/004_sync.pl index b3c91af21d1..959e47fad5e 100644 --- a/src/test/subscription/t/004_sync.pl +++ b/src/test/subscription/t/004_sync.pl @@ -39,13 +39,8 @@ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr' PUBLICATION tap_pub" ); -$node_publisher->wait_for_catchup('tap_sub'); - -# Also wait for initial table sync to finish -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial table sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tap_sub'); my $result = $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep"); @@ -71,8 +66,7 @@ $node_subscriber->safe_psql('postgres', "DELETE FROM tab_rep;"); # wait for sync to finish this time -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +$node_subscriber->wait_for_subscription_sync; # check that all data is synced $result = @@ -107,8 +101,7 @@ ); # and wait for data sync to finish again -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +$node_subscriber->wait_for_subscription_sync; # check that all data is synced $result = @@ -133,8 +126,7 @@ "ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION"); # wait for sync to finish -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +$node_subscriber->wait_for_subscription_sync; $result = $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep_next"); diff --git a/src/test/subscription/t/005_encoding.pl b/src/test/subscription/t/005_encoding.pl index a3f56a452f3..407f7164ab6 100644 --- a/src/test/subscription/t/005_encoding.pl +++ b/src/test/subscription/t/005_encoding.pl @@ -32,13 +32,8 @@ "CREATE SUBSCRIPTION mysub CONNECTION '$publisher_connstr' PUBLICATION mypub;" ); -$node_publisher->wait_for_catchup('mysub'); - -# Wait for initial sync to finish as well -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('s', 'r');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, 'mysub'); $node_publisher->safe_psql('postgres', q{INSERT INTO test1 VALUES (1, E'Mot\xc3\xb6rhead')}); # hand-rolled UTF-8 diff --git a/src/test/subscription/t/006_rewrite.pl b/src/test/subscription/t/006_rewrite.pl index 37e05a401af..0aeed5f62db 100644 --- a/src/test/subscription/t/006_rewrite.pl +++ b/src/test/subscription/t/006_rewrite.pl @@ -28,13 +28,8 @@ "CREATE SUBSCRIPTION mysub CONNECTION '$publisher_connstr' PUBLICATION mypub;" ); -$node_publisher->wait_for_catchup('mysub'); - -# Wait for initial sync to finish as well -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('s', 'r');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, 'mysub'); $node_publisher->safe_psql('postgres', q{INSERT INTO test1 (a, b) VALUES (1, 'one'), (2, 'two');}); diff --git a/src/test/subscription/t/008_diff_schema.pl b/src/test/subscription/t/008_diff_schema.pl index a04a798a187..b296c9a9d47 100644 --- a/src/test/subscription/t/008_diff_schema.pl +++ b/src/test/subscription/t/008_diff_schema.pl @@ -38,13 +38,8 @@ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr' PUBLICATION tap_pub" ); -$node_publisher->wait_for_catchup('tap_sub'); - -# Also wait for initial table sync to finish -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial table sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tap_sub'); my $result = $node_subscriber->safe_psql('postgres', @@ -105,8 +100,7 @@ $node_subscriber->safe_psql('postgres', "ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION"); -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +$node_subscriber->wait_for_subscription_sync; # Add replica identity column. (The serial is not necessary, but it's # a convenient way to get a default on the new column so that rows diff --git a/src/test/subscription/t/010_truncate.pl b/src/test/subscription/t/010_truncate.pl index 5617469a2c3..21f25466b88 100644 --- a/src/test/subscription/t/010_truncate.pl +++ b/src/test/subscription/t/010_truncate.pl @@ -67,10 +67,7 @@ ); # Wait for initial sync of all subscriptions -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +$node_subscriber->wait_for_subscription_sync; # insert data to truncate @@ -211,8 +208,7 @@ ); # wait for initial data sync -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +$node_subscriber->wait_for_subscription_sync; # insert data to truncate diff --git a/src/test/subscription/t/011_generated.pl b/src/test/subscription/t/011_generated.pl index 29108cbcf20..0662c55f082 100644 --- a/src/test/subscription/t/011_generated.pl +++ b/src/test/subscription/t/011_generated.pl @@ -40,10 +40,7 @@ ); # Wait for initial sync of all subscriptions -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +$node_subscriber->wait_for_subscription_sync; my $result = $node_subscriber->safe_psql('postgres', "SELECT a, b FROM tab1"); is( $result, qq(1|22 diff --git a/src/test/subscription/t/013_partition.pl b/src/test/subscription/t/013_partition.pl index dfe2cb6deae..58d78b4292f 100644 --- a/src/test/subscription/t/013_partition.pl +++ b/src/test/subscription/t/013_partition.pl @@ -153,12 +153,8 @@ BEGIN }); # Wait for initial sync of all subscriptions -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_subscriber1->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; -$node_subscriber2->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +$node_subscriber1->wait_for_subscription_sync; +$node_subscriber2->wait_for_subscription_sync; # Tests for replication using leaf partition identity and schema @@ -483,10 +479,8 @@ BEGIN "ALTER SUBSCRIPTION sub2 SET PUBLICATION pub_lower_level, pub_all"); # Wait for initial sync of all subscriptions -$node_subscriber1->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; -$node_subscriber2->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +$node_subscriber1->wait_for_subscription_sync; +$node_subscriber2->wait_for_subscription_sync; # check that data is synced correctly $result = $node_subscriber1->safe_psql('postgres', @@ -557,8 +551,7 @@ BEGIN # make sure the subscription on the second subscriber is synced, before # continuing -$node_subscriber2->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +$node_subscriber2->wait_for_subscription_sync; # Insert a change into the leaf partition, should be replicated through # the partition root (thanks to the FOR ALL TABLES partition). @@ -813,8 +806,7 @@ BEGIN $node_subscriber2->safe_psql('postgres', "ALTER SUBSCRIPTION sub2 REFRESH PUBLICATION"); -$node_subscriber2->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +$node_subscriber2->wait_for_subscription_sync; # Make partition map cache $node_publisher->safe_psql('postgres', "INSERT INTO tab5 VALUES (1, 1)"); diff --git a/src/test/subscription/t/014_binary.pl b/src/test/subscription/t/014_binary.pl index 7260378f5e8..ac1786902bc 100644 --- a/src/test/subscription/t/014_binary.pl +++ b/src/test/subscription/t/014_binary.pl @@ -46,10 +46,7 @@ . "PUBLICATION tpub WITH (slot_name = tpub_slot, binary = true)"); # Ensure nodes are in sync with each other -$node_publisher->wait_for_catchup('tsub'); -$node_subscriber->poll_query_until('postgres', - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('s', 'r');" -) or die "Timed out while waiting for subscriber to synchronize data"; +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tsub'); # Insert some content and make sure it's replicated across $node_publisher->safe_psql( diff --git a/src/test/subscription/t/015_stream.pl b/src/test/subscription/t/015_stream.pl index 998650ac86a..79decdb830f 100644 --- a/src/test/subscription/t/015_stream.pl +++ b/src/test/subscription/t/015_stream.pl @@ -41,13 +41,8 @@ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (streaming = on)" ); -$node_publisher->wait_for_catchup($appname); - -# Also wait for initial table sync to finish -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial table sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, $appname); my $result = $node_subscriber->safe_psql('postgres', diff --git a/src/test/subscription/t/016_stream_subxact.pl b/src/test/subscription/t/016_stream_subxact.pl index 0245b0685b1..90219f78a05 100644 --- a/src/test/subscription/t/016_stream_subxact.pl +++ b/src/test/subscription/t/016_stream_subxact.pl @@ -41,13 +41,8 @@ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (streaming = on)" ); -$node_publisher->wait_for_catchup($appname); - -# Also wait for initial table sync to finish -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial table sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, $appname); my $result = $node_subscriber->safe_psql('postgres', diff --git a/src/test/subscription/t/017_stream_ddl.pl b/src/test/subscription/t/017_stream_ddl.pl index 35b146827d3..558f2b9d9fc 100644 --- a/src/test/subscription/t/017_stream_ddl.pl +++ b/src/test/subscription/t/017_stream_ddl.pl @@ -41,13 +41,8 @@ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (streaming = on)" ); -$node_publisher->wait_for_catchup($appname); - -# Also wait for initial table sync to finish -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial table sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, $appname); my $result = $node_subscriber->safe_psql('postgres', diff --git a/src/test/subscription/t/018_stream_subxact_abort.pl b/src/test/subscription/t/018_stream_subxact_abort.pl index 7fc60b5bde8..676d54bd2c9 100644 --- a/src/test/subscription/t/018_stream_subxact_abort.pl +++ b/src/test/subscription/t/018_stream_subxact_abort.pl @@ -40,13 +40,8 @@ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (streaming = on)" ); -$node_publisher->wait_for_catchup($appname); - -# Also wait for initial table sync to finish -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial table sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, $appname); my $result = $node_subscriber->safe_psql('postgres', diff --git a/src/test/subscription/t/019_stream_subxact_ddl_abort.pl b/src/test/subscription/t/019_stream_subxact_ddl_abort.pl index 81149b86a99..9047a273ce3 100644 --- a/src/test/subscription/t/019_stream_subxact_ddl_abort.pl +++ b/src/test/subscription/t/019_stream_subxact_ddl_abort.pl @@ -41,13 +41,8 @@ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (streaming = on)" ); -$node_publisher->wait_for_catchup($appname); - -# Also wait for initial table sync to finish -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial table sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, $appname); my $result = $node_subscriber->safe_psql('postgres', diff --git a/src/test/subscription/t/021_alter_sub_pub.pl b/src/test/subscription/t/021_alter_sub_pub.pl index 104eddb64d6..4c59d44e33f 100644 --- a/src/test/subscription/t/021_alter_sub_pub.pl +++ b/src/test/subscription/t/021_alter_sub_pub.pl @@ -38,13 +38,7 @@ ); # Wait for initial table sync to finish -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; - -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; - -$node_publisher->wait_for_catchup('tap_sub'); +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tap_sub'); # Check the initial data of tab_1 is copied to subscriber my $result = $node_subscriber->safe_psql('postgres', @@ -68,10 +62,7 @@ "ALTER SUBSCRIPTION tap_sub DROP PUBLICATION tap_pub_1"); # Wait for initial table sync to finish -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; - -$node_publisher->wait_for_catchup('tap_sub'); +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tap_sub'); # Check the initial data of tab_drop_refresh was copied to subscriber $result = $node_subscriber->safe_psql('postgres', @@ -83,10 +74,7 @@ "ALTER SUBSCRIPTION tap_sub ADD PUBLICATION tap_pub_1"); # Wait for initial table sync to finish -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; - -$node_publisher->wait_for_catchup('tap_sub'); +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tap_sub'); # Check the initial data of tab_1 was copied to subscriber again $result = $node_subscriber->safe_psql('postgres', diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl index 424ffb79c3e..91602c43399 100644 --- a/src/test/subscription/t/100_bugs.pl +++ b/src/test/subscription/t/100_bugs.pl @@ -144,12 +144,7 @@ # We cannot rely solely on wait_for_catchup() here; it isn't sufficient # when tablesync workers might still be running. So in addition to that, # verify that tables are synced. -# XXX maybe this should be integrated in wait_for_catchup() itself. -$node_twoways->wait_for_catchup('testsub'); -my $synced_query = - "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; -$node_twoways->poll_query_until('d2', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +$node_twoways->wait_for_subscription_sync($node_twoways, 'testsub', 'd2'); is($node_twoways->safe_psql('d2', "SELECT count(f) FROM t"), $rows * 2, "2x$rows rows in t"); @@ -278,11 +273,8 @@ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr' PUBLICATION tap_pub" ); -$node_publisher->wait_for_catchup('tap_sub'); - -# Also wait for initial table sync to finish -$node_subscriber->poll_query_until('postgres', $synced_query) - or die "Timed out while waiting for subscriber to synchronize data"; +# Wait for initial table sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tap_sub'); is( $node_subscriber->safe_psql( 'postgres', "SELECT * FROM tab_replidentity_index"), From 377d3fa00344921e95e1d46cf1eb011c6aed59b9 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 12 Aug 2022 09:06:48 -0400 Subject: [PATCH 04/95] doc: clarify DROP EXTENSION dependent members text Member tracking was added in PG 13. Reported-by: David G. Johnston Discussion: https://postgr.es/m/CAKFQuwY1YtxQHVWUFYvSnOjZ5VPpXjF33V52bSKEwFjK2K=1Aw@mail.gmail.com Author: David G. Johnston Backpatch-through: 13 --- doc/src/sgml/ref/drop_extension.sgml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/ref/drop_extension.sgml b/doc/src/sgml/ref/drop_extension.sgml index dcc52c2ced0..484e5d9b11a 100644 --- a/doc/src/sgml/ref/drop_extension.sgml +++ b/doc/src/sgml/ref/drop_extension.sgml @@ -30,7 +30,7 @@ DROP EXTENSION [ IF EXISTS ] name [ DROP EXTENSION removes extensions from the database. - Dropping an extension causes its component objects, and other explicitly + Dropping an extension causes its member objects, and other explicitly dependent routines (see , the DEPENDS ON EXTENSION extension_name action), to be dropped as well. @@ -80,9 +80,9 @@ DROP EXTENSION [ IF EXISTS ] name [ RESTRICT - This option prevents the specified extensions from being dropped - if there exist non-extension-member objects that depend on any - of the extensions. This is the default. + This option prevents the specified extensions from being dropped if + other objects, besides these extensions, their members, and their + explicitly dependent routines, depend on them.  This is the default. From 1c3449cb00c5c063da7a76ae06b1ace048f886c3 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 12 Aug 2022 10:30:01 -0400 Subject: [PATCH 05/95] doc: improve wal_level docs for the 'minimal' level Reported-by: David G. Johnston Discussion: https://postgr.es/m/CAKFQuwZ24UcfkoyLLSW3PMGQATomOcw1nuYFRuMev-NoOF+mYw@mail.gmail.com Author: David G. Johnston Backpatch-through: 14, partial to 13 --- doc/src/sgml/config.sgml | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index bc3d0d1bd14..cdb5fabaa62 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2768,9 +2768,10 @@ include_dir 'conf.d' levels. This parameter can only be set at server start. - In minimal level, no information is logged for - permanent relations for the remainder of a transaction that creates or - rewrites them. This can make operations much faster (see + The minimal level generates the least WAL + volume. It logs no row information for permanent relations + in transactions that create or + rewrite them. This can make operations much faster (see ). Operations that initiate this optimization include: @@ -2782,10 +2783,12 @@ include_dir 'conf.d' REINDEX TRUNCATE - But minimal WAL does not contain enough information to reconstruct the - data from a base backup and the WAL logs, so replica or - higher must be used to enable WAL archiving - () and streaming replication. + However, minimal WAL does not contain sufficient information for + point-in-time recovery, so replica or + higher must be used to enable continuous archiving + () and streaming binary replication. + In fact, the server will not even start in this mode if + max_wal_senders is non-zero. Note that changing wal_level to minimal makes any base backups taken before unavailable for archive recovery and standby server, which may @@ -2793,8 +2796,8 @@ include_dir 'conf.d' In logical level, the same information is logged as - with replica, plus information needed to allow - extracting logical change sets from the WAL. Using a level of + with replica, plus information needed to + extract logical change sets from the WAL. Using a level of logical will increase the WAL volume, particularly if many tables are configured for REPLICA IDENTITY FULL and many UPDATE and DELETE statements are From e68b35a2ff639c99f65075a2fb9c63753c5797cb Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 12 Aug 2022 10:59:00 -0400 Subject: [PATCH 06/95] doc: clarify CREATE TABLE AS ... IF NOT EXISTS Mention that the table is not modified if it already exists. Reported-by: frank_limpert@yahoo.com Discussion: https://postgr.es/m/164441177106.9677.5991676148704507229@wrigleys.postgresql.org Backpatch-through: 10 --- doc/src/sgml/ref/create_table_as.sgml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/ref/create_table_as.sgml b/doc/src/sgml/ref/create_table_as.sgml index 936184ed83e..ec62dd39ef8 100644 --- a/doc/src/sgml/ref/create_table_as.sgml +++ b/doc/src/sgml/ref/create_table_as.sgml @@ -111,9 +111,8 @@ where storage_parameter is: IF NOT EXISTS - Do not throw an error if a relation with the same name already exists. - A notice is issued in this case. Refer to - for details. + Do not throw an error if a relation with the same name already + exists; simply issue a notice and leave the table unmodified. From 021defb24f4ef7924b36fbe63bff396c14b0f163 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 12 Aug 2022 11:26:03 -0400 Subject: [PATCH 07/95] doc: document the CREATE INDEX "USING" clause Somehow this was in the syntax but had no description. Reported-by: robertcorrington@gmail.com Discussion: https://postgr.es/m/164228771825.31954.2719791849363756957@wrigleys.postgresql.org Backpatch-through: 10 --- doc/src/sgml/ref/create_index.sgml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index a0520c8c052..dcb0ed88791 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -148,6 +148,18 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] + + USING + + + The optional USING clause specifies an index + type as described in . If not + specified, a default index type will be used based on the + data types of the columns. + + + + INCLUDE From fc165d011c475fc9593ff82f10ffb4e559f7cc58 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 12 Aug 2022 11:35:23 -0400 Subject: [PATCH 08/95] doc: clarify configuration file for Windows builds The use of file 'config.pl' was not clearly explained. Reported-by: liambowen@gmail.com Discussion: https://postgr.es/m/164246013804.31952.4958087335645367498@wrigleys.postgresql.org Backpatch-through: 10 --- doc/src/sgml/install-windows.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/install-windows.sgml b/doc/src/sgml/install-windows.sgml index 4ba220d1a22..176f15dea09 100644 --- a/doc/src/sgml/install-windows.sgml +++ b/doc/src/sgml/install-windows.sgml @@ -128,7 +128,7 @@ - Before you build, you may need to edit the file config.pl + Before you build, you can create the file config.pl to reflect any configuration options you want to change, or the paths to any third party libraries to use. The complete configuration is determined by first reading and parsing the file config_default.pl, From 8a8a43cad697bf5554c2c240cdeece66248025f6 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 12 Aug 2022 12:02:20 -0400 Subject: [PATCH 09/95] doc: warn about security issues around log files Reported-by: Simon Riggs Discussion: https://postgr.es/m/CANP8+jJESuuXYq9Djvf-+tx2vY2OFLmfEuu+UvwHNJ1RT7iJCQ@mail.gmail.com Author: Simon Riggs Backpatch-through: 10 --- doc/src/sgml/config.sgml | 11 +++++++++++ doc/src/sgml/maintenance.sgml | 20 +++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index cdb5fabaa62..71a9942aabc 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -6665,6 +6665,13 @@ local0.* /var/log/postgresql What to Log + + + What you choose to log can have security implications; see + . + + + @@ -7257,6 +7264,10 @@ log_line_prefix = '%m [%p] %q%u@%d/%a ' planning). Set log_min_error_statement to ERROR (or lower) to log such statements. + + Logged statements might reveal sensitive data and even contain + plaintext passwords. + diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index 693c92d9cba..eaffb5a6592 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -958,7 +958,25 @@ analyze threshold = analyze base threshold + analyze scale factor * number of tu It is a good idea to save the database server's log output somewhere, rather than just discarding it via /dev/null. The log output is invaluable when diagnosing - problems. However, the log output tends to be voluminous + problems. + + + + + The server log can contain sensitive information and needs to be protected, + no matter how or where it is stored, or the destination to which it is routed. + For example, some DDL statements might contain plaintext passwords or other + authentication details. Logged statements at the ERROR + level might show the SQL source code for applications + and might also contain some parts of data rows. Recording data, events and + related information is the intended function of this facility, so this is + not a leakage or a bug. Please ensure the server logs are visible only to + appropriately authorized people. + + + + + Log output tends to be voluminous (especially at higher debug levels) so you won't want to save it indefinitely. You need to rotate the log files so that new log files are started and old ones removed after a reasonable From cf901ee071c1ed1991cd8ceff570a6872fd91a7d Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 12 Aug 2022 15:05:13 -0400 Subject: [PATCH 10/95] doc: add section about heap-only tuples (HOT) Reported-by: Jonathan S. Katz Discussion: https://postgr.es/m/c59ffbd5-96ac-a5a5-a401-14f627ca1405@postgresql.org Backpatch-through: 11 --- doc/src/sgml/acronyms.sgml | 4 +- doc/src/sgml/btree.sgml | 3 +- doc/src/sgml/catalogs.sgml | 2 +- doc/src/sgml/config.sgml | 3 +- doc/src/sgml/indexam.sgml | 3 +- doc/src/sgml/indices.sgml | 6 ++- doc/src/sgml/monitoring.sgml | 2 +- doc/src/sgml/ref/create_table.sgml | 4 +- doc/src/sgml/storage.sgml | 70 ++++++++++++++++++++++++++++++ 9 files changed, 86 insertions(+), 11 deletions(-) diff --git a/doc/src/sgml/acronyms.sgml b/doc/src/sgml/acronyms.sgml index 9ed148ab842..2df6559accc 100644 --- a/doc/src/sgml/acronyms.sgml +++ b/doc/src/sgml/acronyms.sgml @@ -299,9 +299,7 @@ HOT - Heap-Only - Tuples + Heap-Only Tuples diff --git a/doc/src/sgml/btree.sgml b/doc/src/sgml/btree.sgml index a9200ee52e6..6f608a14bf3 100644 --- a/doc/src/sgml/btree.sgml +++ b/doc/src/sgml/btree.sgml @@ -639,7 +639,8 @@ options(relopts local_relopts *) returns accumulate and adversely affect query latency and throughput. This typically occurs with UPDATE-heavy workloads where most individual updates cannot apply the - HOT optimization. Changing the value of only + HOT optimization. + Changing the value of only one column covered by one index during an UPDATE always necessitates a new set of index tuples — one for each and every index on the diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 533856b12d7..b8b82208a40 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -4326,7 +4326,7 @@ SCRAM-SHA-256$<iteration count>:&l If true, queries must not use the index until the xmin of this pg_index row is below their TransactionXmin - event horizon, because the table may contain broken HOT chains with + event horizon, because the table may contain broken HOT chains with incompatible rows that they can see diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 71a9942aabc..514cac29e60 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4395,7 +4395,8 @@ ANY num_sync ( HOT updates + will defer cleanup of dead row versions. The default is zero transactions, meaning that dead row versions can be removed as soon as possible, that is, as soon as they are no longer visible to any open transaction. You may wish to set this to a diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index cf359fa9ffd..4f83970c851 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -45,7 +45,8 @@ extant versions of the same logical row; to an index, each tuple is an independent object that needs its own index entry. Thus, an update of a row always creates all-new index entries for the row, even if - the key values did not change. (HOT tuples are an exception to this + the key values did not change. (HOT + tuples are an exception to this statement; but indexes do not deal with those, either.) Index entries for dead tuples are reclaimed (by vacuuming) when the dead tuples themselves are reclaimed. diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index 023157d8884..2a70e02f7c4 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -103,7 +103,9 @@ CREATE INDEX test1_id_index ON test1 (id); After an index is created, the system has to keep it synchronized with the - table. This adds overhead to data manipulation operations. + table. This adds overhead to data manipulation operations. Indexes can + also prevent the creation of heap-only + tuples. Therefore indexes that are seldom or never used in queries should be removed. @@ -749,7 +751,7 @@ CREATE INDEX people_names ON people ((first_name || ' ' || last_name)); Index expressions are relatively expensive to maintain, because the derived expression(s) must be computed for each row insertion - and non-HOT update. However, the index expressions are + and non-HOT update. However, the index expressions are not recomputed during an indexed search, since they are already stored in the index. In both examples above, the system sees the query as just WHERE indexedcolumn = 'constant' diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 949bba7c768..ef795a752bf 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -4166,7 +4166,7 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i n_tup_upd bigint - Number of rows updated (includes HOT updated rows) + Number of rows updated (includes HOT updated rows) diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index c20cf1a5a6e..4a0b6dcfcd4 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -1487,7 +1487,9 @@ Where column_reference_storage_directive is: to the indicated percentage; the remaining space on each page is reserved for updating rows on that page. This gives UPDATE a chance to place the updated copy of a row on the same page as the - original, which is more efficient than placing it on a different page. + original, which is more efficient than placing it on a different + page, and makes heap-only tuple + updates more likely. For a table whose entries are never updated, complete packing is the best choice, but in heavily updated tables smaller fillfactors are appropriate. This parameter cannot be set for TOAST tables. diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml index 96b373f990b..96a275a1110 100644 --- a/doc/src/sgml/storage.sgml +++ b/doc/src/sgml/storage.sgml @@ -1079,4 +1079,74 @@ data. Empty in ordinary tables. + + + Heap-Only Tuples (<acronym>HOT</acronym>) + + + To allow for high concurrency, PostgreSQL + uses multiversion concurrency + control (MVCC) to store rows. However, + MVCC has some downsides for update queries. + Specifically, updates require new versions of rows to be added to + tables. This can also require new index entries for each updated row, + and removal of old versions of rows and their index entries can be + expensive. + + + + To help reduce the overhead of updates, + PostgreSQL has an optimization called + heap-only tuples (HOT). This optimization is + possible when: + + + + + The update does not modify any columns referenced by the table's + indexes, including expression and partial indexes. + + + + + There is sufficient free space on the page containing the old row + for the updated row. + + + + + In such cases, heap-only tuples provide two optimizations: + + + + + New index entries are not needed to represent updated rows. + + + + + Old versions of updated rows can be completely removed during normal + operation, including SELECTs, instead of requiring + periodic vacuum operations. (This is possible because indexes + do not reference their page + item identifiers.) + + + + + + + In summary, heap-only tuple updates can only be created + if columns used by indexes are not updated. You can + increase the likelihood of sufficient page space for + HOT updates by decreasing a table's fillfactor. + If you don't, HOT updates will still happen because + new rows will naturally migrate to new pages and existing pages with + sufficient free space for new row versions. The system view pg_stat_all_tables + allows monitoring of the occurrence of HOT and non-HOT updates. + + + From 4db1c8ce959af00e44ab73b432eb335957e05267 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 12 Aug 2022 15:43:23 -0400 Subject: [PATCH 11/95] doc: add missing role attributes to user management section Reported-by: Shinya Kato Discussion: https://postgr.es/m/1ecdb1ff78e9b03dfce37e85eaca725a@oss.nttdata.com Author: Shinya Kato Backpatch-through: 10 --- doc/src/sgml/user-manag.sgml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/doc/src/sgml/user-manag.sgml b/doc/src/sgml/user-manag.sgml index 7b4b2271734..5420567896d 100644 --- a/doc/src/sgml/user-manag.sgml +++ b/doc/src/sgml/user-manag.sgml @@ -236,6 +236,39 @@ CREATE USER name; + + + inheritance of privilegesroleprivilege to inherit + + + A role is given permission to inherit the privileges of roles it is a + member of, by default. However, to create a role without the permission, + use CREATE ROLE name NOINHERIT. + + + + + + bypassing row-level securityroleprivilege to bypass + + + A role must be explicitly given permission to bypass every row-level security (RLS) policy + (except for superusers, since those bypass all permission checks). + To create such a role, use CREATE ROLE name BYPASSRLS as a superuser. + + + + + + connection limitroleprivilege to limit connection + + + Connection limit can specify how many concurrent connections a role can make. + -1 (the default) means no limit. Specify connection limit upon role creation with + CREATE ROLE name CONNECTION LIMIT 'integer'. + + + A role's attributes can be modified after creation with From 8183d6be1907303b5bb3e0235708c2472f60d13f Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Sat, 13 Aug 2022 00:00:41 +0200 Subject: [PATCH 12/95] pg_upgrade: Fix some minor code issues 96ef3b8ff1cf1950e897fd2f766d4bd9ef0d5d56 accidentally copied a not applicable comment from the float8_pass_by_value code to the data_checksums code. Remove that. 87d3b35a1ca31a9d947a8f919a6006679216dff0 changed pg_upgrade to checking the checksum version rather than just the Boolean presence of checksums, but didn't change the field type in its ControlData struct from bool. So this would not work correctly if there ever is a checksum version larger than 1. --- src/bin/pg_upgrade/pg_upgrade.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 9b1775e5435..2938ea76845 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -324,7 +324,7 @@ typedef struct bool date_is_int; bool float8_pass_by_value; uint32 data_checksum_version; - int file_encryption_method; + int file_encryption_method; } ControlData; /* From fa1ee5e00007f63693e9e9c811e9a6be95fe1686 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 13 Aug 2022 15:21:28 -0400 Subject: [PATCH 13/95] Catch stack overflow when recursing in transformFromClauseItem(). Most parts of the parser can expect that the stack overflow check in transformExprRecurse() will trigger before things get desperate. However, transformFromClauseItem() can recurse directly to self without having analyzed any expressions, so it's possible to drive it to a stack-overrun crash. Add a check to prevent that. Per bug #17583 from Egor Chindyaskin. Back-patch to all supported branches. Richard Guo Discussion: https://postgr.es/m/17583-33be55b9f981f75c@postgresql.org --- src/backend/parser/parse_clause.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index c5be2bb5855..b2cbcc2f78b 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -1270,6 +1270,9 @@ transformFromClauseItem(ParseState *pstate, Node *n, ParseNamespaceItem **top_nsitem, List **namespace) { + /* Guard against stack overflow due to overly deep subtree */ + check_stack_depth(); + if (IsA(n, RangeVar)) { /* Plain relation reference, or perhaps a CTE reference */ From b02db5ca24ba125a663e4f105d1ecbea44624103 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 13 Aug 2022 16:59:58 -0400 Subject: [PATCH 14/95] Avoid misbehavior when hash_table_bytes < bucket_size. It's possible to reach this case when work_mem is very small and tupsize is (relatively) very large. In that case ExecChooseHashTableSize would get an assertion failure, or with asserts off it'd compute nbuckets = 0, which'd likely cause misbehavior later (I've not checked). To fix, clamp the number of buckets to be at least 1. This is due to faulty conversion of old my_log2() coding in 28d936031. Back-patch to v13, as that was. Zhang Mingli Discussion: https://postgr.es/m/beb64ca0-91e2-44ac-bf4a-7ea36275ec02@Spark --- src/backend/executor/nodeHash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 62d3c2da790..8779d93b06f 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -965,7 +965,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, */ bucket_size = (tupsize * gp_hashjoin_tuples_per_bucket + sizeof(HashJoinTuple)); if (hash_table_bytes < bucket_size) - sbuckets = 1; + sbuckets = 1; /* avoid pg_nextpower2_size_t(0) */ else sbuckets = pg_nextpower2_size_t(hash_table_bytes / bucket_size); sbuckets = Min(sbuckets, max_pointers); From 9133edcb4e83e5e6019c0714905cfbc605d7d5fe Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 15 Aug 2022 13:37:38 +0900 Subject: [PATCH 15/95] Fix outdated --help message for postgres -f This option switch supports a total of 8 values, as told by set_plan_disabling_options() and the documentation, but this was not reflected in the output generated by --help. Author: Junwang Zhao Discussion: https://postgr.es/m/CAEG8a3+pT3cWzyjzKs184L1XMNm8NDnoJLiSjAYSO7XqpRh_vA@mail.gmail.com Backpatch-through: 10 --- src/backend/main/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/main/main.c b/src/backend/main/main.c index d0b32114656..a003f9ccab2 100644 --- a/src/backend/main/main.c +++ b/src/backend/main/main.c @@ -395,7 +395,7 @@ help(const char *progname) printf(_(" --catalog-version output the catalog version, then exit\n")); printf(_("\nDeveloper options:\n")); - printf(_(" -f s|i|n|m|h forbid use of some plan types\n")); + printf(_(" -f s|i|o|b|t|n|m|h forbid use of some plan types\n")); printf(_(" -n do not reinitialize shared memory after abnormal exit\n")); printf(_(" -O allow system table structure changes\n")); printf(_(" -P disable system indexes\n")); From 7c07ddb106b01c43e0d20749fd26fddb0b7fc4c2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 15 Aug 2022 15:40:07 -0400 Subject: [PATCH 16/95] Add missing bad-PGconn guards in libpq entry points. There's a convention that externally-visible libpq functions should check for a NULL PGconn pointer, and fail gracefully instead of crashing. PQflush() and PQisnonblocking() didn't get that memo though. Also add a similar check to PQdefaultSSLKeyPassHook_OpenSSL; while it's not clear that ordinary usage could reach that with a null conn pointer, it's cheap enough to check, so let's be consistent. Daniele Varrazzo and Tom Lane Discussion: https://postgr.es/m/CA+mi_8Zm_mVVyW1iNFgyMd9Oh0Nv8-F+7Y3-BqwMgTMHuo_h2Q@mail.gmail.com --- src/interfaces/libpq/fe-exec.c | 4 ++++ src/interfaces/libpq/fe-secure-openssl.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index ea3a78420ca..8a69cb5164c 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -3864,6 +3864,8 @@ PQsetnonblocking(PGconn *conn, int arg) int PQisnonblocking(const PGconn *conn) { + if (!conn || conn->status == CONNECTION_BAD) + return false; return pqIsnonblocking(conn); } @@ -3883,6 +3885,8 @@ PQisthreadsafe(void) int PQflush(PGconn *conn) { + if (!conn || conn->status == CONNECTION_BAD) + return -1; return pqFlush(conn); } diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c index 5c6b317caa7..c0988e10a30 100644 --- a/src/interfaces/libpq/fe-secure-openssl.c +++ b/src/interfaces/libpq/fe-secure-openssl.c @@ -1809,7 +1809,7 @@ my_SSL_set_fd(PGconn *conn, int fd) int PQdefaultSSLKeyPassHook_OpenSSL(char *buf, int size, PGconn *conn) { - if (conn->sslpassword) + if (conn && conn->sslpassword) { if (strlen(conn->sslpassword) + 1 > size) fprintf(stderr, libpq_gettext("WARNING: sslpassword truncated\n")); From a71825a509effb04ab0ffee82a95678fc4710a2b Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Tue, 16 Aug 2022 09:27:34 +0900 Subject: [PATCH 17/95] doc: fix wrong tag used in create sequence manual. In ref/create_sequence.sgml tag was used for nextval function name. This should have been tag. Author: Noboru Saito Discussion: https://postgr.es/m/CAAM3qnJTDFFfRf5JHJ4AYrNcqXgMmj0pbH0%2Bvm%3DYva%2BpJyGymA%40mail.gmail.com Backpatch-through: 10 --- doc/src/sgml/ref/create_sequence.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/ref/create_sequence.sgml b/doc/src/sgml/ref/create_sequence.sgml index e4085804a4d..7dff75fd9d3 100644 --- a/doc/src/sgml/ref/create_sequence.sgml +++ b/doc/src/sgml/ref/create_sequence.sgml @@ -288,7 +288,7 @@ SELECT * FROM name; session A might reserve values 1..10 and return nextval=1, then session B might reserve values 11..20 and return nextval=11 before session A - has generated nextval=2. Thus, with a + has generated nextval=2. Thus, with a cache setting of one it is safe to assume that nextval values are generated sequentially; with a Date: Tue, 16 Aug 2022 14:51:42 +0530 Subject: [PATCH 18/95] Fix replica identity check for a partitioned table. The current publisher code checks if UPDATE or DELETE can be executed with the replica identity of the table even if it's a partitioned table. We can skip checking the replica identity for partitioned tables because the operations are actually performed on the leaf partitions (not the partitioned table). Reported-by: Brad Nicholson Author: Hou Zhijie Reviewed-by: Peter Smith, Amit Kapila Backpatch-through: 13 Discussion: https://postgr.es/m/CAMMnM%3D8i5DohH%3DYKzV0_wYuYSYvuOJoL9F5nzXTc%2ByzsG1f6rg%40mail.gmail.com --- src/backend/executor/execReplication.c | 7 +++++++ src/test/regress/expected/publication.out | 2 ++ src/test/regress/sql/publication.sql | 2 ++ 3 files changed, 11 insertions(+) diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index f9277c1d80d..8202e050ec8 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -569,6 +569,13 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd) { PublicationActions *pubactions; + /* + * Skip checking the replica identity for partitioned tables, because the + * operations are actually performed on the leaf partitions. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + return; + /* We only need to do checks for UPDATE and DELETE. */ if (cmd != CMD_UPDATE && cmd != CMD_DELETE) return; diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out index 7c7e0226658..b7ce080424f 100644 --- a/src/test/regress/expected/publication.out +++ b/src/test/regress/expected/publication.out @@ -140,6 +140,8 @@ ALTER PUBLICATION testpub_forparted ADD TABLE testpub_parted; Tables: "public.testpub_parted" +-- works despite missing REPLICA IDENTITY, because no actual update happened +UPDATE testpub_parted SET a = 1 WHERE false; -- should now fail, because parent's publication replicates updates UPDATE testpub_parted1 SET a = 1; ERROR: cannot update table "testpub_parted1" because it does not have a replica identity and publishes updates diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql index 4b7738395ec..7d5c9373845 100644 --- a/src/test/regress/sql/publication.sql +++ b/src/test/regress/sql/publication.sql @@ -85,6 +85,8 @@ UPDATE testpub_parted1 SET a = 1; -- only parent is listed as being in publication, not the partition ALTER PUBLICATION testpub_forparted ADD TABLE testpub_parted; \dRp+ testpub_forparted +-- works despite missing REPLICA IDENTITY, because no actual update happened +UPDATE testpub_parted SET a = 1 WHERE false; -- should now fail, because parent's publication replicates updates UPDATE testpub_parted1 SET a = 1; ALTER TABLE testpub_parted DETACH PARTITION testpub_parted1; From b8d417460e3a671c96ed096416ea54c8bb714a01 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Tue, 16 Aug 2022 22:54:43 +0200 Subject: [PATCH 19/95] doc: Remove reference to tty libpq connstring param The tty connection string parameter was removed in commit 14d9b3760 but the reference to it in the docs was mistakenly kept. Fix by removing it from the libpq documentation. Backpatch through v14 where the parameter was removed. Author: Noriyoshi Shinoda Discussion: https://postgr.es/m/DM4PR84MB173433216FCC2A3961879000EE6B9@DM4PR84MB1734.NAMPRD84.PROD.OUTLOOK.COM Backpatch-through: 14 --- doc/src/sgml/libpq.sgml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 1d04494cdbb..3bde3aef54a 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -1388,15 +1388,6 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname - - tty - - - Ignored (formerly, this specified where to send server debug output). - - - - replication From d48a4af0fd235ee0402d23b49e701c2f1b6f9c1c Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Tue, 16 Aug 2022 23:52:10 +0200 Subject: [PATCH 20/95] Fix assert in logicalmsg_desc The assert, introduced by 9f1cf97bb5, is intended to check if the prefix is terminated by a \0 byte, but it has two flaws. Firstly, prefix_size includes the \0 byte, so prefix[prefix_size] points to the byte after the null byte. Secondly, the check ensures the byte is not equal \0, while it should be checking the opposite. Backpatch-through: 14 Discussion: https://postgr.es/m/b99b6101-2f14-3796-3dfa-4a6cd7d4326d@enterprisedb.com --- src/backend/access/rmgrdesc/logicalmsgdesc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/access/rmgrdesc/logicalmsgdesc.c b/src/backend/access/rmgrdesc/logicalmsgdesc.c index d64ce2e7eff..dee440f7d99 100644 --- a/src/backend/access/rmgrdesc/logicalmsgdesc.c +++ b/src/backend/access/rmgrdesc/logicalmsgdesc.c @@ -28,7 +28,7 @@ logicalmsg_desc(StringInfo buf, XLogReaderState *record) char *message = xlrec->message + xlrec->prefix_size; char *sep = ""; - Assert(prefix[xlrec->prefix_size] != '\0'); + Assert(prefix[xlrec->prefix_size - 1] == '\0'); appendStringInfo(buf, "%s, prefix \"%s\"; payload (%zu bytes): ", xlrec->transactional ? "transactional" : "non-transactional", From 920e86bd9a6ec68125d4ffa6e69d119ed59105e3 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 18 Aug 2022 12:11:47 -0400 Subject: [PATCH 21/95] Fix subtly-incorrect matching of parent and child partitioned indexes. When creating a partitioned index, DefineIndex tries to identify any existing indexes on the partitions that match the partitioned index, so that it can absorb those as child indexes instead of building new ones. Part of the matching is to compare IndexInfo structs --- but that wasn't done quite right. We're comparing the IndexInfo built within DefineIndex itself to one made from existing catalog contents by BuildIndexInfo. Notably, while BuildIndexInfo will run index expressions and predicates through expression preprocessing, that has not happened to DefineIndex's struct. The result is failure to match and subsequent creation of duplicate indexes. The easiest and most bulletproof fix is to build a new IndexInfo using BuildIndexInfo, thereby guaranteeing that the processing done is identical. While here, let's also extract the opfamily and collation data from the new partitioned index, removing ad-hoc logic that duplicated knowledge about how those are constructed. Per report from Christophe Pettus. Back-patch to v11 where we invented partitioned indexes. Richard Guo and Tom Lane Discussion: https://postgr.es/m/8864BFAA-81FD-4BF9-8E06-7DEB8D4164ED@thebuild.com --- src/backend/commands/indexcmds.c | 23 ++++++--- src/test/regress/expected/indexing.out | 66 +++++++++++++++++++++++++- src/test/regress/sql/indexing.sql | 15 +++++- 3 files changed, 96 insertions(+), 8 deletions(-) diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 7d91d604443..6618d75cdbd 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1666,18 +1666,27 @@ DefineIndex(Oid relationId, int nparts = partdesc->nparts; Oid *part_oids = palloc(sizeof(Oid) * nparts); bool invalidate_parent = false; + Relation parentIndex; TupleDesc parentDesc; - Oid *opfamOids; pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL, nparts); + /* Make a local copy of partdesc->oids[], just for safety */ memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts); + /* + * We'll need an IndexInfo describing the parent index. The one + * built above is almost good enough, but not quite, because (for + * example) its predicate expression if any hasn't been through + * expression preprocessing. The most reliable way to get an + * IndexInfo that will match those for child indexes is to build + * it the same way, using BuildIndexInfo(). + */ + parentIndex = index_open(indexRelationId, lockmode); + indexInfo = BuildIndexInfo(parentIndex); + parentDesc = RelationGetDescr(rel); - opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes); - for (i = 0; i < numberOfKeyAttributes; i++) - opfamOids[i] = get_opclass_family(classObjectId[i]); /* * For each partition, scan all existing indexes; if one matches @@ -1748,9 +1757,9 @@ DefineIndex(Oid relationId, cldIdxInfo = BuildIndexInfo(cldidx); if (CompareIndexInfo(cldIdxInfo, indexInfo, cldidx->rd_indcollation, - collationObjectId, + parentIndex->rd_indcollation, cldidx->rd_opfamily, - opfamOids, + parentIndex->rd_opfamily, attmap)) { Oid cldConstrOid = InvalidOid; @@ -1877,6 +1886,8 @@ DefineIndex(Oid relationId, free_attrmap(attmap); } + index_close(parentIndex, lockmode); + /* * The pg_index row we inserted for this index was marked * indisvalid=true. But if we attached an existing index that is diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out index 7850108de8a..6573c0040cc 100644 --- a/src/test/regress/expected/indexing.out +++ b/src/test/regress/expected/indexing.out @@ -383,7 +383,7 @@ drop table idxpart; -- When a table is attached a partition and it already has an index, a -- duplicate index should not get created, but rather the index becomes -- attached to the parent's index. -create table idxpart (a int, b int, c text) partition by range (a); +create table idxpart (a int, b int, c text, d bool) partition by range (a); create index idxparti on idxpart (a); create index idxparti2 on idxpart (b, c); create table idxpart1 (like idxpart including indexes); @@ -394,6 +394,7 @@ create table idxpart1 (like idxpart including indexes); a | integer | | | b | integer | | | c | text | | | + d | boolean | | | Indexes: "idxpart1_a_idx" btree (a) "idxpart1_b_c_idx" btree (b, c) @@ -420,6 +421,7 @@ alter table idxpart attach partition idxpart1 for values from (0) to (10); a | integer | | | b | integer | | | c | text | | | + d | boolean | | | Partition of: idxpart FOR VALUES FROM (0) TO (10) Indexes: "idxpart1_a_idx" btree (a) @@ -439,6 +441,68 @@ select relname, relkind, inhparent::regclass idxparti2 | I | (6 rows) +-- While here, also check matching when creating an index after the fact. +create index on idxpart1 ((a+b)) where d = true; +\d idxpart1 + Table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | + d | boolean | | | +Partition of: idxpart FOR VALUES FROM (0) TO (10) +Indexes: + "idxpart1_a_idx" btree (a) + "idxpart1_b_c_idx" btree (b, c) + "idxpart1_expr_idx" btree ((a + b)) WHERE d = true + +select relname, relkind, inhparent::regclass + from pg_class left join pg_index ix on (indexrelid = oid) + left join pg_inherits on (ix.indexrelid = inhrelid) + where relname like 'idxpart%' order by relname; + relname | relkind | inhparent +-------------------+---------+----------- + idxpart | p | + idxpart1 | r | + idxpart1_a_idx | i | idxparti + idxpart1_b_c_idx | i | idxparti2 + idxpart1_expr_idx | i | + idxparti | I | + idxparti2 | I | +(7 rows) + +create index idxparti3 on idxpart ((a+b)) where d = true; +\d idxpart1 + Table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | + d | boolean | | | +Partition of: idxpart FOR VALUES FROM (0) TO (10) +Indexes: + "idxpart1_a_idx" btree (a) + "idxpart1_b_c_idx" btree (b, c) + "idxpart1_expr_idx" btree ((a + b)) WHERE d = true + +select relname, relkind, inhparent::regclass + from pg_class left join pg_index ix on (indexrelid = oid) + left join pg_inherits on (ix.indexrelid = inhrelid) + where relname like 'idxpart%' order by relname; + relname | relkind | inhparent +-------------------+---------+----------- + idxpart | p | + idxpart1 | r | + idxpart1_a_idx | i | idxparti + idxpart1_b_c_idx | i | idxparti2 + idxpart1_expr_idx | i | idxparti3 + idxparti | I | + idxparti2 | I | + idxparti3 | I | +(8 rows) + drop table idxpart; -- Verify that attaching an invalid index does not mark the parent index valid. -- On the other hand, attaching a valid index marks not only its direct diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql index 3d083f37089..0053083c224 100644 --- a/src/test/regress/sql/indexing.sql +++ b/src/test/regress/sql/indexing.sql @@ -198,7 +198,7 @@ drop table idxpart; -- When a table is attached a partition and it already has an index, a -- duplicate index should not get created, but rather the index becomes -- attached to the parent's index. -create table idxpart (a int, b int, c text) partition by range (a); +create table idxpart (a int, b int, c text, d bool) partition by range (a); create index idxparti on idxpart (a); create index idxparti2 on idxpart (b, c); create table idxpart1 (like idxpart including indexes); @@ -209,6 +209,19 @@ select relname, relkind, inhparent::regclass where relname like 'idxpart%' order by relname; alter table idxpart attach partition idxpart1 for values from (0) to (10); \d idxpart1 +select relname, relkind, inhparent::regclass + from pg_class left join pg_index ix on (indexrelid = oid) + left join pg_inherits on (ix.indexrelid = inhrelid) + where relname like 'idxpart%' order by relname; +-- While here, also check matching when creating an index after the fact. +create index on idxpart1 ((a+b)) where d = true; +\d idxpart1 +select relname, relkind, inhparent::regclass + from pg_class left join pg_index ix on (indexrelid = oid) + left join pg_inherits on (ix.indexrelid = inhrelid) + where relname like 'idxpart%' order by relname; +create index idxparti3 on idxpart ((a+b)) where d = true; +\d idxpart1 select relname, relkind, inhparent::regclass from pg_class left join pg_index ix on (indexrelid = oid) left join pg_inherits on (ix.indexrelid = inhrelid) From c1bdf8a0273298f1fae56bab6e80487f6c08c976 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Tue, 23 Aug 2022 09:24:51 +0530 Subject: [PATCH 22/95] Add CHECK_FOR_INTERRUPTS while decoding changes. While decoding changes in a loop, if we skip all the changes there is no CFI making the loop uninterruptible. Reported-by: Whale Song and Andrey Borodin Bug: 17580 Author: Masahiko Sawada Reviwed-by: Amit Kapila Backpatch-through: 10 Discussion: https://postgr.es/m/17580-849c1d5b6d7eb422@postgresql.org Discussion: https://postgr.es/m/B319ECD6-9A28-4CDF-A8F4-3591E0BF2369@yandex-team.ru --- src/backend/replication/logical/reorderbuffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index e59d1396b53..3194e418511 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -2078,6 +2078,8 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation = NULL; Oid reloid; + CHECK_FOR_INTERRUPTS(); + /* * We can't call start stream callback before processing first * change. From 092029339b7582476a486c0292c6b71a947c3098 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 23 Aug 2022 09:41:37 -0400 Subject: [PATCH 23/95] Doc: prefer sysctl to /proc/sys in docs and comments. sysctl is more portable than Linux's /proc/sys file tree, and often easier to use too. That's why most of our docs refer to sysctl when talking about how to adjust kernel parameters. Bring the few stragglers into line. Discussion: https://postgr.es/m/361175.1661187463@sss.pgh.pa.us --- doc/src/sgml/runtime.sgml | 11 ++++++----- src/backend/postmaster/postmaster.c | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index 375644059db..5b3551c0847 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1277,11 +1277,12 @@ default:\ - On Linux - /proc/sys/fs/file-max determines the - maximum number of open files that the kernel will support. It can - be changed by writing a different number into the file or by - adding an assignment in /etc/sysctl.conf. + On Linux the kernel parameter + fs.file-max determines the maximum number of open + files that the kernel will support. It can be changed with + sysctl -w fs.file-max=N. + To make the setting persist across reboots, add an assignment + in /etc/sysctl.conf. The maximum limit of files per process is fixed at the time the kernel is compiled; see /usr/src/linux/Documentation/proc.txt for diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 92e830164e8..4500c711ff5 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -5511,7 +5511,7 @@ SubPostmasterMain(int argc, char *argv[]) * If testing EXEC_BACKEND on Linux, you should run this as root before * starting the postmaster: * - * echo 0 >/proc/sys/kernel/randomize_va_space + * sysctl -w kernel.randomize_va_space=0 * * This prevents using randomized stack and code addresses that cause the * child process's memory map to be different from the parent's, making it From 9092ea8056ed97921f61ce086e2eef959bc6a34b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 23 Aug 2022 09:55:37 -0400 Subject: [PATCH 24/95] Doc: document possible need to raise kernel's somaxconn limit. On fast machines, it's possible for applications such as pgbench to issue connection requests so quickly that the postmaster's listen queue overflows in the kernel, resulting in unexpected failures (with not-very-helpful error messages). Most modern OSes allow the queue size to be increased, so document how to do that. Per report from Kevin McKibbin. Discussion: https://postgr.es/m/CADc_NKg2d+oZY9mg4DdQdoUcGzN2kOYXBu-3--RW_hEe0tUV=g@mail.gmail.com --- doc/src/sgml/runtime.sgml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index 5b3551c0847..479eb62f4ce 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1318,6 +1318,22 @@ default:\ linkend="guc-max-files-per-process"/> configuration parameter to limit the consumption of open files. + + + Another kernel limit that may be of concern when supporting large + numbers of client connections is the maximum socket connection queue + length. If more than that many connection requests arrive within a very + short period, some may get rejected before the postmaster can service + the requests, with those clients receiving unhelpful connection failure + errors such as Resource temporarily unavailable or + Connection refused. The default queue length limit is 128 + on many platforms. To raise it, adjust the appropriate kernel parameter + via sysctl, then restart the postmaster. + The parameter is variously named net.core.somaxconn + on Linux, kern.ipc.soacceptqueue on newer FreeBSD, + and kern.ipc.somaxconn on macOS and other BSD + variants. + From 5a38548b338efc2664c73036895823911b8b978c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 24 Aug 2022 13:01:40 -0400 Subject: [PATCH 25/95] Defend against stack overrun in a few more places. SplitToVariants() in the ispell code, lseg_inside_poly() in geo_ops.c, and regex_selectivity_sub() in selectivity estimation could recurse until stack overflow; fix by adding check_stack_depth() calls. So could next() in the regex compiler, but that case is better fixed by converting its tail recursion to a loop. (We probably get better code that way too, since next() can now be inlined into its sole caller.) There remains a reachable stack overrun in the Turkish stemmer, but we'll need some advice from the Snowball people about how to fix that. Per report from Egor Chindyaskin and Alexander Lakhin. These mistakes are old, so back-patch to all supported branches. Richard Guo and Tom Lane Discussion: https://postgr.es/m/1661334672.728714027@f473.i.mail.ru --- src/backend/regex/regc_lex.c | 5 +++-- src/backend/tsearch/spell.c | 4 ++++ src/backend/utils/adt/geo_ops.c | 3 +++ src/backend/utils/adt/like_support.c | 4 ++++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c index 7673dab76f4..826203e35d2 100644 --- a/src/backend/regex/regc_lex.c +++ b/src/backend/regex/regc_lex.c @@ -201,6 +201,8 @@ next(struct vars *v) { chr c; +next_restart: /* loop here after eating a comment */ + /* errors yield an infinite sequence of failures */ if (ISERR()) return 0; /* the error has set nexttype to EOS */ @@ -493,8 +495,7 @@ next(struct vars *v) if (!ATEOS()) v->now++; assert(v->nexttype == v->lasttype); - return next(v); - break; + goto next_restart; case CHR('='): /* positive lookahead */ NOTE(REG_ULOOKAROUND); RETV(LACON, LATYPE_AHEAD_POS); diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index ebc89604ac2..961eb2709ca 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -63,6 +63,7 @@ #include "postgres.h" #include "catalog/pg_collation.h" +#include "miscadmin.h" #include "tsearch/dicts/spell.h" #include "tsearch/ts_locale.h" #include "utils/memutils.h" @@ -2399,6 +2400,9 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int char *notprobed; int compoundflag = 0; + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + notprobed = (char *) palloc(wordlen); memset(notprobed, 1, wordlen); var = CopyVar(orig, 1); diff --git a/src/backend/utils/adt/geo_ops.c b/src/backend/utils/adt/geo_ops.c index 9484dbc2273..bfd9ff6a36c 100644 --- a/src/backend/utils/adt/geo_ops.c +++ b/src/backend/utils/adt/geo_ops.c @@ -3919,6 +3919,9 @@ lseg_inside_poly(Point *a, Point *b, POLYGON *poly, int start) bool res = true, intersection = false; + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + t.p[0] = *a; t.p[1] = *b; s.p[0] = poly->p[(start == 0) ? (poly->npts - 1) : (start - 1)]; diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 241e6f0f598..cba2d1376f0 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -44,6 +44,7 @@ #include "catalog/pg_statistic.h" #include "catalog/pg_type.h" #include "mb/pg_wchar.h" +#include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/supportnodes.h" @@ -1338,6 +1339,9 @@ regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive) int paren_pos = 0; /* dummy init to keep compiler quiet */ int pos; + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + for (pos = 0; pos < pattlen; pos++) { if (patt[pos] == '(') From f9b6d77318011578c2114eeeb1a439a6e60ae499 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Sat, 27 Aug 2022 15:22:11 +0900 Subject: [PATCH 26/95] Use correct connection for cancellation in frontend's parallel slots While waiting for slots to become available in wait_on_slots() in parallel_slot.c, the cancellation always relied on the first connection in the set to do the job. This could cause problems when this slot's socket is gone as PQgetCancel() would return NULL in this case. Rather than always using the first connection, this changes the logic to use the first valid connection for the cancellation. Author: Ranier Vilela Reviewed-by: Justin Pryzby Discussion: https://postgr.es/m/CAEudQAokk1h_pUwGXsYS4oVOuf35s1O2o3TXGHpV8=AWikvgHA@mail.gmail.com Backpatch-through: 14 --- src/fe_utils/parallel_slot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fe_utils/parallel_slot.c b/src/fe_utils/parallel_slot.c index 69581157c29..dcdad9e30c5 100644 --- a/src/fe_utils/parallel_slot.c +++ b/src/fe_utils/parallel_slot.c @@ -237,7 +237,7 @@ wait_on_slots(ParallelSlotArray *sa) if (cancelconn == NULL) return false; - SetCancelConn(sa->slots->connection); + SetCancelConn(cancelconn); i = select_loop(maxFd, &slotset); ResetCancelConn(); From c3daf012e16e05cc9f16c3a3d1177b5ee49065bd Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 28 Aug 2022 10:44:52 -0400 Subject: [PATCH 27/95] Doc: fix example of recursive query. Compute total number of sub-parts correctly, per jason@banfelder.net Simon Riggs Discussion: https://postgr.es/m/166161184718.1235920.6304070286124217754@wrigleys.postgresql.org --- doc/src/sgml/queries.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml index 516fbcbf375..1428d99d0f8 100644 --- a/doc/src/sgml/queries.sgml +++ b/doc/src/sgml/queries.sgml @@ -2195,7 +2195,7 @@ SELECT sum(n) FROM t; WITH RECURSIVE included_parts(sub_part, part, quantity) AS ( SELECT sub_part, part, quantity FROM parts WHERE part = 'our_product' UNION ALL - SELECT p.sub_part, p.part, p.quantity + SELECT p.sub_part, p.part, p.quantity * pr.quantity FROM included_parts pr, parts p WHERE p.part = pr.sub_part ) From e3b6a02878fd669910d6043f47a1e7c6d4953af2 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Mon, 29 Aug 2022 10:47:12 -0400 Subject: [PATCH 28/95] Prevent WAL corruption after a standby promotion. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a PostgreSQL instance performing archive recovery but not using standby mode is promoted, and the last WAL segment that it attempted to read ended in a partial record, the previous code would create invalid WAL on the new timeline. The WAL from the previously timeline would be copied to the new timeline up until the end of the last valid record, but instead of beginning to write WAL at immediately afterwards, the promoted server would write an overwrite contrecord at the beginning of the next segment. The end of the previous segment would be left as all-zeroes, resulting in failures if anything tried to read WAL from that file. The root of the issue is that ReadRecord() decides whether to set abortedRecPtr and missingContrecPtr based on the value of StandbyMode, but ReadRecord() switches to a new timeline based on the value of ArchiveRecoveryRequested. We shouldn't try to write an overwrite contrecord if we're switching to a new timeline, so change the test in ReadRecod() to check ArchiveRecoveryRequested instead. Code fix by Dilip Kumar. Comments by me incorporating suggested language from Álvaro Herrera. Further review from Kyotaro Horiguchi and Sami Imseih. Discussion: http://postgr.es/m/CAFiTN-t7umki=PK8dT1tcPV=mOUe2vNhHML6b3T7W7qqvvajjg@mail.gmail.com Discussion: http://postgr.es/m/FB0DEA0B-E14E-43A0-811F-C1AE93D00FF3%40amazon.com --- src/backend/access/transam/xlog.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index bfba08fa267..f0229322dd6 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4468,12 +4468,18 @@ ReadRecord(XLogReaderState *xlogreader, int emode, if (record == NULL) { /* - * When not in standby mode we find that WAL ends in an incomplete - * record, keep track of that record. After recovery is done, - * we'll write a record to indicate downstream WAL readers that - * that portion is to be ignored. + * When we find that WAL ends in an incomplete record, keep track + * of that record. After recovery is done, we'll write a record to + * indicate to downstream WAL readers that that portion is to be + * ignored. + * + * However, when ArchiveRecoveryRequested = true, we're going to + * switch to a new timeline at the end of recovery. We will only + * copy WAL over to the new timeline up to the end of the last + * complete record, so if we did this, we would later create an + * overwrite contrecord in the wrong place, breaking everything. */ - if (!StandbyMode && + if (!ArchiveRecoveryRequested && !XLogRecPtrIsInvalid(xlogreader->abortedRecPtr)) { abortedRecPtr = xlogreader->abortedRecPtr; @@ -8264,6 +8270,14 @@ StartupXLOG(void) */ if (!XLogRecPtrIsInvalid(missingContrecPtr)) { + /* + * We should only have a missingContrecPtr if we're not switching to + * a new timeline. When a timeline switch occurs, WAL is copied from + * the old timeline to the new only up to the end of the last complete + * record, so there can't be an incomplete WAL record that we need to + * disregard. + */ + Assert(ThisTimeLineID == PrevTimeLineID); Assert(!XLogRecPtrIsInvalid(abortedRecPtr)); EndOfLog = missingContrecPtr; } From 22180bb417048a3f491cc578ea295a6a9b0cc7d1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 30 Aug 2022 17:28:32 -0400 Subject: [PATCH 29/95] On NetBSD, force dynamic symbol resolution at postmaster start. The default of lazy symbol resolution means that when the postmaster first reaches the select() call in ServerLoop, it'll need to resolve the link to that libc entry point. NetBSD's dynamic loader takes an internal lock while doing that, and if a signal interrupts the operation then there is a risk of self-deadlock should the signal handler do anything that requires that lock, as several of the postmaster signal handlers do. The window for this is pretty narrow, and timing considerations make it unlikely that a signal would arrive right then anyway. But it's semi-repeatable on slow single-CPU machines, and in principle the race could happen with any hardware. The least messy solution to this is to force binding of dynamic symbols at postmaster start, using the "-z now" linker option. While we're at it, also use "-z relro" so as to provide a small security gain. It's not entirely clear whether any other platforms share this issue, but for now we'll assume it's NetBSD-specific. (We might later try to use "-z now" on more platforms for performance reasons, but that would not likely be something to back-patch.) Report and patch by me; the idea to fix it this way is from Andres Freund. Discussion: https://postgr.es/m/3384826.1661802235@sss.pgh.pa.us --- src/template/netbsd | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/template/netbsd b/src/template/netbsd index aaa560cd92c..550e8f09737 100644 --- a/src/template/netbsd +++ b/src/template/netbsd @@ -2,3 +2,12 @@ # Extra CFLAGS for code that will go into a shared library CFLAGS_SL="-fPIC -DPIC" + +# We must resolve all dynamic linking in the core server at program start. +# Otherwise the postmaster can self-deadlock due to signals interrupting +# resolution of calls, since NetBSD's linker takes a lock while doing that and +# some postmaster signal handlers do things that will also acquire that lock. +# As long as we need "-z now", might as well specify "-z relro" too. +# While there's not a hard reason to adopt these settings for our other +# executables, there's also little reason not to, so just add them to LDFLAGS. +LDFLAGS="$LDFLAGS -Wl,-z,now -Wl,-z,relro" From d1f57dbdfda26b4220cca404c1de91baf05ee0a8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 31 Aug 2022 10:42:05 -0400 Subject: [PATCH 30/95] In the Snowball dictionary, don't try to stem excessively-long words. If the input word exceeds 1000 bytes, don't pass it to the stemmer; just return it as-is after case folding. Such an input is surely not a word in any human language, so whatever the stemmer might do to it would be pretty dubious in the first place. Adding this restriction protects us against a known recursion-to-stack-overflow problem in the Turkish stemmer, and it seems like good insurance against any other safety or performance issues that may exist in the Snowball stemmers. (I note, for example, that they contain no CHECK_FOR_INTERRUPTS calls, so we really don't want them running for a long time.) The threshold of 1000 bytes is arbitrary. An alternative definition could have been to treat such words as stopwords, but that seems like a bigger break from the old behavior. Per report from Egor Chindyaskin and Alexander Lakhin. Thanks to Olly Betts for the recommendation to fix it this way. Discussion: https://postgr.es/m/1661334672.728714027@f473.i.mail.ru --- src/backend/snowball/dict_snowball.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/backend/snowball/dict_snowball.c b/src/backend/snowball/dict_snowball.c index 8c25f3ebbf2..11624145d65 100644 --- a/src/backend/snowball/dict_snowball.c +++ b/src/backend/snowball/dict_snowball.c @@ -275,8 +275,24 @@ dsnowball_lexize(PG_FUNCTION_ARGS) char *txt = lowerstr_with_len(in, len); TSLexeme *res = palloc0(sizeof(TSLexeme) * 2); - if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) + /* + * Do not pass strings exceeding 1000 bytes to the stemmer, as they're + * surely not words in any human language. This restriction avoids + * wasting cycles on stuff like base64-encoded data, and it protects us + * against possible inefficiency or misbehavior in the stemmer. (For + * example, the Turkish stemmer has an indefinite recursion, so it can + * crash on long-enough strings.) However, Snowball dictionaries are + * defined to recognize all strings, so we can't reject the string as an + * unknown word. + */ + if (len > 1000) + { + /* return the lexeme lowercased, but otherwise unmodified */ + res->lexeme = txt; + } + else if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) { + /* empty or stopword, so report as stopword */ pfree(txt); } else From 47109a91c8a65d0bf6dc5fc1dcb0e4ac6e367037 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 31 Aug 2022 16:23:20 -0400 Subject: [PATCH 31/95] Prevent long-term memory leakage in autovacuum launcher. get_database_list() failed to restore the caller's memory context, instead leaving current context set to TopMemoryContext which is how CommitTransactionCommand() leaves it. The callers both think they are using short-lived contexts, for the express purpose of not having to worry about cleaning up individual allocations. The net effect therefore is that supposedly short-lived allocations could accumulate indefinitely in the launcher's TopMemoryContext. Although this has been broken for a long time, it seems we didn't have any obvious memory leak here until v15's rearrangement of the stats logic. I (tgl) am not entirely convinced that there's no other leak at all, though, and we're surely at risk of adding one in future back-patched fixes. So back-patch to all supported branches, even though this may be only a latent bug in pre-v15. Reid Thompson Discussion: https://postgr.es/m/972a4e12b68b0f96db514777a150ceef7dcd2e0f.camel@crunchydata.com --- src/backend/postmaster/autovacuum.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 0379659f994..638494a080c 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -2031,6 +2031,9 @@ get_database_list(void) CommitTransactionCommand(); + /* Be sure to restore caller's memory context */ + MemoryContextSwitchTo(resultcxt); + return dblist; } From 8d7ca603bb04772982e117ebe6a8b83e016dcdd3 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 31 Aug 2022 17:08:44 -0400 Subject: [PATCH 32/95] doc: simplify WITH clause syntax in CREATE DATABASE Reported-by: Rob Discussion: https://postgr.es/m/20211016171149.yaouvlw5kvux6dvk@localhost Author: Rob Backpatch-through: 10 --- doc/src/sgml/ref/create_database.sgml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml index 41cb4068ec2..ec831bb57f9 100644 --- a/doc/src/sgml/ref/create_database.sgml +++ b/doc/src/sgml/ref/create_database.sgml @@ -22,7 +22,7 @@ PostgreSQL documentation CREATE DATABASE name - [ [ WITH ] [ OWNER [=] user_name ] + [ WITH ] [ OWNER [=] user_name ] [ TEMPLATE [=] template ] [ ENCODING [=] encoding ] [ LOCALE [=] locale ] @@ -31,7 +31,7 @@ CREATE DATABASE name [ TABLESPACE [=] tablespace_name ] [ ALLOW_CONNECTIONS [=] allowconn ] [ CONNECTION LIMIT [=] connlimit ] - [ IS_TEMPLATE [=] istemplate ] ] + [ IS_TEMPLATE [=] istemplate ] From d02ba671edbcef907aac418b872135d79f9d47a6 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 31 Aug 2022 19:28:42 -0400 Subject: [PATCH 33/95] doc: show direction is optional in FETCH/MOVE's FROM/IN syntax It used to show direction was required for FROM/IN. Reported-by: Rob Discussion: https://postgr.es/m/20211015165248.isqjceyilelhnu3k@localhost Author: Rob Backpatch-through: 10 --- doc/src/sgml/ref/fetch.sgml | 5 +++-- doc/src/sgml/ref/move.sgml | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/ref/fetch.sgml b/doc/src/sgml/ref/fetch.sgml index ec843f56844..83d58e54b9d 100644 --- a/doc/src/sgml/ref/fetch.sgml +++ b/doc/src/sgml/ref/fetch.sgml @@ -27,9 +27,10 @@ PostgreSQL documentation -FETCH [ direction [ FROM | IN ] ] cursor_name +FETCH [ direction ] [ FROM | IN ] cursor_name -where direction can be empty or one of: +where direction can +be one of: NEXT PRIOR diff --git a/doc/src/sgml/ref/move.sgml b/doc/src/sgml/ref/move.sgml index 4c7d1dca391..8378439debb 100644 --- a/doc/src/sgml/ref/move.sgml +++ b/doc/src/sgml/ref/move.sgml @@ -27,9 +27,10 @@ PostgreSQL documentation -MOVE [ direction [ FROM | IN ] ] cursor_name +MOVE [ direction ] [ FROM | IN ] cursor_name -where direction can be empty or one of: +where direction can +be one of: NEXT PRIOR From e5ecc39646a7f6eba6cc73f6022c83ee537c4a34 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 31 Aug 2022 19:43:06 -0400 Subject: [PATCH 34/95] doc: document the maximum char/varchar length value Reported-by: Japin Li Discussion: https://postgr.es/m/MEYP282MB1669B13E98AE531617CB1386B6979@MEYP282MB1669.AUSP282.PROD.OUTLOOK.COM Backpatch-through: 10 --- doc/src/sgml/datatype.sgml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 77c7b17e6a6..0e89b768c5d 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1177,6 +1177,8 @@ SELECT '52093.89'::money::numeric::float8; char(n) are aliases for character varying(n) and character(n), respectively. + If specified, the length must be greater than zero and cannot exceed + 10485760. character without length specifier is equivalent to character(1). If character varying is used without length specifier, the type accepts strings of any size. The From bd6ad9d532d338eefae606987e7091af37a4c7de Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 31 Aug 2022 20:27:27 -0400 Subject: [PATCH 35/95] doc: mention that SET TIME ZONE often needs to be quoted Also mention that time zone abbreviations are not supported. Reported-by: philippe.godfrin@nov.com Discussion: https://postgr.es/m/163888728952.1269.5167822676466793158@wrigleys.postgresql.org Backpatch-through: 10 --- doc/src/sgml/ref/set.sgml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/ref/set.sgml b/doc/src/sgml/ref/set.sgml index c4aab56a2d3..9a7b387b29f 100644 --- a/doc/src/sgml/ref/set.sgml +++ b/doc/src/sgml/ref/set.sgml @@ -22,7 +22,7 @@ PostgreSQL documentation SET [ SESSION | LOCAL ] configuration_parameter { TO | = } { value | 'value' | DEFAULT } -SET [ SESSION | LOCAL ] TIME ZONE { timezone | LOCAL | DEFAULT } +SET [ SESSION | LOCAL ] TIME ZONE { value | 'value' | LOCAL | DEFAULT } @@ -190,8 +190,8 @@ SELECT setseed(value); TIME ZONE - SET TIME ZONE value is an alias - for SET timezone TO value. The + SET TIME ZONE 'value' is an alias + for SET timezone TO 'value'. The syntax SET TIME ZONE allows special syntax for the time zone specification. Here are examples of valid values: @@ -252,7 +252,8 @@ SELECT setseed(value); - See for more information + Time zone abbreviations are not supported by SET; + see for more information about time zones. From 8b5399e00612f78c1ab58d6f0a8abf1a059bbe72 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 31 Aug 2022 21:10:37 -0400 Subject: [PATCH 36/95] doc: warn of SECURITY DEFINER schemas for non-sql_body functions Non-sql_body functions are evaluated at runtime. Reported-by: Erki Eessaar Discussion: https://postgr.es/m/AM9PR01MB8268BF5E74E119828251FD34FE409@AM9PR01MB8268.eurprd01.prod.exchangelabs.com Backpatch-through: 10 --- doc/src/sgml/ref/create_function.sgml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/ref/create_function.sgml b/doc/src/sgml/ref/create_function.sgml index 66e52555a8f..dc36a171a96 100644 --- a/doc/src/sgml/ref/create_function.sgml +++ b/doc/src/sgml/ref/create_function.sgml @@ -780,7 +780,10 @@ SELECT * FROM dup(42); Because a SECURITY DEFINER function is executed with the privileges of the user that owns it, care is needed to - ensure that the function cannot be misused. For security, + ensure that the function cannot be misused. This is particularly + important for non-sql_body functions because + their function bodies are evaluated at run-time, not creation time. + For security, should be set to exclude any schemas writable by untrusted users. This prevents malicious users from creating objects (e.g., tables, functions, and From a10a5a5bd5a5b80742e236fca867a5e3f42e4d01 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 31 Aug 2022 21:46:14 -0400 Subject: [PATCH 37/95] doc: split out the NATURAL/CROSS JOIN in SELECT syntax This allows the syntax to be more accurate about what clauses are supported. Also switch an example query to use the ANSI join syntax. Reported-by: Joel Jacobson Discussion: https://postgr.es/m/67b71d3e-0c22-44df-a223-351f14418319@www.fastmail.com Backpatch-through: 11 --- doc/src/sgml/ref/select.sgml | 46 +++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index a6b1aad991f..7b8b24b9af8 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -59,7 +59,9 @@ SELECT [ ALL | DISTINCT [ ON ( expressionfunction_name ( [ argument [, ...] ] ) AS ( column_definition [, ...] ) [ LATERAL ] ROWS FROM( function_name ( [ argument [, ...] ] ) [ AS ( column_definition [, ...] ) ] [, ...] ) [ WITH ORDINALITY ] [ [ AS ] alias [ ( column_alias [, ...] ) ] ] - from_item [ NATURAL ] join_type from_item [ ON join_condition | USING ( join_column [, ...] ) [ AS join_using_alias ] ] + from_item join_type from_item { ON join_condition | USING ( join_column [, ...] ) [ AS join_using_alias ] } + from_item NATURAL join_type from_item + from_item CROSS JOIN from_item and grouping_element can be one of: @@ -600,19 +602,15 @@ TABLE [ ONLY ] table_name [ * ] FULL [ OUTER ] JOIN - - CROSS JOIN - For the INNER and OUTER join types, a join condition must be specified, namely exactly one of - NATURAL, ON join_condition, or + ON join_condition, USING (join_column [, ...]). - See below for the meaning. For CROSS JOIN, - none of these clauses can appear. + class="parameter">join_column [, ...]), + or NATURAL. See below for the meaning. @@ -623,17 +621,9 @@ TABLE [ ONLY ] table_name [ * ] In the absence of parentheses, JOINs nest left-to-right. In any case JOIN binds more tightly than the commas separating FROM-list items. - - - CROSS JOIN and INNER JOIN - produce a simple Cartesian product, the same result as you get from - listing the two tables at the top level of FROM, - but restricted by the join condition (if any). - CROSS JOIN is equivalent to INNER JOIN ON - (TRUE), that is, no rows are removed by qualification. - These join types are just a notational convenience, since they - do nothing you couldn't do with plain FROM and - WHERE. + All the JOIN options are just a notational + convenience, since they do nothing you couldn't do with plain + FROM and WHERE. LEFT OUTER JOIN returns all rows in the qualified @@ -714,6 +704,19 @@ TABLE [ ONLY ] table_name [ * ] + + CROSS JOIN + + + CROSS JOIN is equivalent to INNER JOIN ON + (TRUE), that is, no rows are removed by qualification. + They produce a simple Cartesian product, the same result as you get from + listing the two tables at the top level of FROM, + but restricted by the join condition (if any). + + + + LATERAL @@ -1754,8 +1757,7 @@ SELECT * FROM name SELECT f.title, f.did, d.name, f.date_prod, f.kind - FROM distributors d, films f - WHERE f.did = d.did + FROM distributors d JOIN films f USING (did); title | did | name | date_prod | kind -------------------+-----+--------------+------------+---------- From 0effb243eab6db5dda5c1e37f693baf423547848 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 31 Aug 2022 22:04:36 -0400 Subject: [PATCH 38/95] doc: clarify that pgcrypto's gen_random_uuid calls core func. Previously it was just marked as a duplicate of the core function. Reported-by: Andreas Dijkman Discussion: https://postgr.es/m/17349-24d61e214429e8c1@postgresql.org Backpatch-through: 13 --- doc/src/sgml/pgcrypto.sgml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/pgcrypto.sgml b/doc/src/sgml/pgcrypto.sgml index b2a4a59098d..c2e537c81d9 100644 --- a/doc/src/sgml/pgcrypto.sgml +++ b/doc/src/sgml/pgcrypto.sgml @@ -1145,8 +1145,9 @@ gen_random_bytes(count integer) returns bytea gen_random_uuid() returns uuid - Returns a version 4 (random) UUID. (Obsolete, this function is now also - included in core PostgreSQL.) + Returns a version 4 (random) UUID. (Obsolete, this function + internally calls the core + function of the same name.) From 1bbf6a4ccf29b7c6b84c53b5c052686775164715 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 31 Aug 2022 22:19:06 -0400 Subject: [PATCH 39/95] doc: use FILTER in aggregate example Reported-by: michal.palenik@freemap.sk Discussion: https://postgr.es/m/163499710897.684.7420075366995883688@wrigleys.postgresql.org Backpatch-through: 10 --- doc/src/sgml/query.sgml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/src/sgml/query.sgml b/doc/src/sgml/query.sgml index 71d644f4323..9046d7c9fbe 100644 --- a/doc/src/sgml/query.sgml +++ b/doc/src/sgml/query.sgml @@ -726,19 +726,20 @@ SELECT city, max(temp_lo) which gives us one output row per city. Each aggregate result is computed over the table rows matching that city. We can filter these grouped - rows using HAVING: + rows using HAVING and the output count using + FILTER: -SELECT city, max(temp_lo) +SELECT city, max(temp_lo), count(*) FILTER (WHERE temp_lo < 30) FROM weather GROUP BY city HAVING max(temp_lo) < 40; - city | max ----------+----- - Hayward | 37 + city | max | count +---------+-----+------- + Hayward | 37 | 5 (1 row) @@ -748,7 +749,7 @@ SELECT city, max(temp_lo) names begin with S, we might do: -SELECT city, max(temp_lo) +SELECT city, max(temp_lo), count(*) FILTER (WHERE temp_lo < 30) FROM weather WHERE city LIKE 'S%' -- GROUP BY city From 8d9b46ff7a5fec3b4189135b69976534c08730a4 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 31 Aug 2022 22:35:09 -0400 Subject: [PATCH 40/95] doc: mention "bloom" as a possible index access method Also remove USING erroneously added recently. Reported-by: Jeff Janes Discussion: https://postgr.es/m/CAMkU=1zhCpC7hottyMWM5Pimr9vRLprSwzLg+7PgajWhKZqRzw@mail.gmail.com Backpatch-through: 10 --- doc/src/sgml/indices.sgml | 3 ++- doc/src/sgml/ref/create_index.sgml | 17 +++-------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index 2a70e02f7c4..0c3fcfd62f8 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -117,7 +117,8 @@ CREATE INDEX test1_id_index ON test1 (id); PostgreSQL provides several index types: - B-tree, Hash, GiST, SP-GiST, GIN and BRIN. + B-tree, Hash, GiST, SP-GiST, GIN, BRIN, and the extension bloom. Each index type uses a different algorithm that is best suited to different types of queries. By default, the CREATE diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index dcb0ed88791..70604f96764 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -148,18 +148,6 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] - - USING - - - The optional USING clause specifies an index - type as described in . If not - specified, a default index type will be used based on the - data types of the columns. - - - - INCLUDE @@ -246,8 +234,9 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] The name of the index method to be used. Choices are btree, hash, - gist, spgist, gin, and - brin. + gist, spgist, gin, + brin, or user-installed access methods like + bloom. The default method is btree. From 1b38111b32a743a6d5fbc012a20fe08c372d7f01 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 31 Aug 2022 23:11:46 -0400 Subject: [PATCH 41/95] doc: in create statistics docs, mention analyze for parent info Discussion: https://postgr.es/m/Yv1Bw8J+1pYfHiRl@momjian.us Backpatch-through: 10 --- doc/src/sgml/ref/create_statistics.sgml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml index 9a8c904c088..e3e5c297ddb 100644 --- a/doc/src/sgml/ref/create_statistics.sgml +++ b/doc/src/sgml/ref/create_statistics.sgml @@ -142,7 +142,9 @@ CREATE STATISTICS [ IF NOT EXISTS ] statistics_na The name (optionally schema-qualified) of the table containing the - column(s) the statistics are computed on. + column(s) the statistics are computed on; see for an explanation of the handling of + inheritance and partitions. From f03ffaf0886b6827c3ba85faf760a4cc50589c47 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Thu, 1 Sep 2022 19:22:35 +1200 Subject: [PATCH 42/95] Fix some possibly latent bugs in slab.c Primarily, this fixes an incorrect calculation in SlabCheck which was looking in the wrong byte for the sentinel check. The reason that we've never noticed this before in the form of a failing sentinel check is because the pre-check to this always fails because all current core users of slab contexts have a chunk size which is already MAXALIGNed, therefore there's never any space for the sentinel byte. It is possible that an extension needs to use a slab context and if they do with a chunk size that's not MAXALIGNed, then they'll likely get errors about overwritten sentinel bytes. Additionally, this patch changes various calculations which are being done based on the sizeof(SlabBlock). Currently, sizeof(SlabBlock) is a multiple of 8, therefore sizeof(SlabBlock) is the same as MAXALIGN(sizeof(SlabBlock)), however, if we were to ever have to add any fields to that struct as part of a bug fix, then SlabAlloc could end up returning a non-MAXALIGNed pointer. To be safe, let's ensure we always MAXALIGN sizeof(SlabBlock) before using it in any calculations. This patch has already been applied to master in d5ee4db0e. Diagnosed-by: Tomas Vondra, Tom Lane Author: Tomas Vondra, David Rowley Discussion: https://postgr.es/m/CAA4eK1%2B1JyW5TiL%3DyV-3Uq1CrfnTyn0Xrk5uArt31Z%3D8rgPhXQ%40mail.gmail.com Backpatch-through: 10 --- src/backend/utils/mmgr/slab.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c index 6469d8c4c6f..54487cc25ac 100644 --- a/src/backend/utils/mmgr/slab.c +++ b/src/backend/utils/mmgr/slab.c @@ -58,6 +58,8 @@ #include "utils/gp_alloc.h" #include "lib/ilist.h" +#define Slab_BLOCKHDRSZ MAXALIGN(sizeof(SlabBlock)) + /* * SlabContext is a specialized implementation of MemoryContext. */ @@ -118,10 +120,10 @@ typedef struct SlabChunk #define SlabChunkGetPointer(chk) \ ((void *)(((char *)(chk)) + sizeof(SlabChunk))) #define SlabBlockGetChunk(slab, block, idx) \ - ((SlabChunk *) ((char *) (block) + sizeof(SlabBlock) \ + ((SlabChunk *) ((char *) (block) + Slab_BLOCKHDRSZ \ + (idx * slab->fullChunkSize))) #define SlabBlockStart(block) \ - ((char *) block + sizeof(SlabBlock)) + ((char *) block + Slab_BLOCKHDRSZ) #define SlabChunkIndex(slab, block, chunk) \ (((char *) chunk - SlabBlockStart(block)) / slab->fullChunkSize) @@ -175,7 +177,7 @@ static const MemoryContextMethods SlabMethods = { * chunkSize: allocation chunk size * * The chunkSize may not exceed: - * MAXALIGN_DOWN(SIZE_MAX) - MAXALIGN(sizeof(SlabBlock)) - sizeof(SlabChunk) + * MAXALIGN_DOWN(SIZE_MAX) - MAXALIGN(Slab_BLOCKHDRSZ) - sizeof(SlabChunk) */ MemoryContext SlabContextCreate(MemoryContext parent, @@ -205,12 +207,12 @@ SlabContextCreate(MemoryContext parent, fullChunkSize = sizeof(SlabChunk) + MAXALIGN(chunkSize); /* Make sure the block can store at least one chunk. */ - if (blockSize < fullChunkSize + sizeof(SlabBlock)) + if (blockSize < fullChunkSize + Slab_BLOCKHDRSZ) elog(ERROR, "block size %zu for slab is too small for %zu chunks", blockSize, chunkSize); /* Compute maximum number of chunks per block */ - chunksPerBlock = (blockSize - sizeof(SlabBlock)) / fullChunkSize; + chunksPerBlock = (blockSize - Slab_BLOCKHDRSZ) / fullChunkSize; /* The freelist starts with 0, ends with chunksPerBlock. */ freelistSize = sizeof(dlist_head) * (chunksPerBlock + 1); @@ -778,7 +780,7 @@ SlabCheck(MemoryContext context) /* there might be sentinel (thanks to alignment) */ if (slab->chunkSize < (slab->fullChunkSize - sizeof(SlabChunk))) - if (!sentinel_ok(chunk, slab->chunkSize)) + if (!sentinel_ok(chunk, sizeof(SlabChunk) + slab->chunkSize)) elog(WARNING, "problem in slab %s: detected write past chunk end in block %p, chunk %p", name, block, chunk); } From c94c562ceb0efa22b35103ee2c4179bbcf3627dc Mon Sep 17 00:00:00 2001 From: Etsuro Fujita Date: Fri, 2 Sep 2022 16:45:03 +0900 Subject: [PATCH 43/95] Doc: Update struct Trigger definition. Commit 487e9861d added a new field to struct Trigger, but failed to update the documentation to match; backpatch to v13 where that came in. Reviewed by Richard Guo. Discussion: https://postgr.es/m/CAPmGK17NY92CyxJ%2BBG7A3JZurmng4jfRfzPiBTtNupGMF0xW1g%40mail.gmail.com --- doc/src/sgml/trigger.sgml | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/src/sgml/trigger.sgml b/doc/src/sgml/trigger.sgml index 7e2654493bb..8f403cad97b 100644 --- a/doc/src/sgml/trigger.sgml +++ b/doc/src/sgml/trigger.sgml @@ -699,6 +699,7 @@ typedef struct Trigger int16 tgtype; char tgenabled; bool tgisinternal; + bool tgisclone; Oid tgconstrrelid; Oid tgconstrindid; Oid tgconstraint; From c7460b6df5d2914e073b5d67227926440066ed5a Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 2 Sep 2022 21:57:41 -0400 Subject: [PATCH 44/95] doc: clarify recursion internal behavior Reported-by: Drew DeVault Discussion: https://postgr.es/m/20211018091720.31299-1-sir@cmpwn.com Backpatch-through: 10 --- doc/src/sgml/queries.sgml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml index 1428d99d0f8..4c5a83c9cbf 100644 --- a/doc/src/sgml/queries.sgml +++ b/doc/src/sgml/queries.sgml @@ -2172,9 +2172,8 @@ SELECT sum(n) FROM t; - Strictly speaking, this process is iteration not recursion, but - RECURSIVE is the terminology chosen by the SQL standards - committee. + While RECURSIVE allows queries to be specified + recursively, internally all queries are evaluated iteratively. From 68d670dab12a388ae56e07f624b56c288ddae7af Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 2 Sep 2022 23:32:19 -0400 Subject: [PATCH 45/95] doc: simplify docs about analyze and inheritance/partitions Discussion: https://postgr.es/m/YxAqYijOsLzgLQgy@momjian.us Backpatch-through: 10 --- doc/src/sgml/ref/analyze.sgml | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/doc/src/sgml/ref/analyze.sgml b/doc/src/sgml/ref/analyze.sgml index b968f740cb8..2ba115d1ade 100644 --- a/doc/src/sgml/ref/analyze.sgml +++ b/doc/src/sgml/ref/analyze.sgml @@ -250,12 +250,13 @@ ANALYZE [ VERBOSE ] [ table_and_columns - If the table being analyzed has one or more children, - ANALYZE will gather statistics twice: once on the - rows of the parent table only, and a second time on the rows of the - parent table with all of its children. This second set of statistics - is needed when planning queries that traverse the entire inheritance - tree. The autovacuum daemon, however, will only consider inserts or + If the table being analyzed has inheritance children, + ANALYZE gathers two sets of statistics: one on the rows + of the parent table only, and a second including rows of both the parent + table and all of its children. This second set of statistics is needed when + planning queries that process the inheritance tree as a whole. The child + tables themselves are not individually analyzed in this case. + The autovacuum daemon, however, will only consider inserts or updates on the parent table itself when deciding whether to trigger an automatic analyze for that table. If that table is rarely inserted into or updated, the inheritance statistics will not be up to date unless you @@ -271,15 +272,6 @@ ANALYZE [ VERBOSE ] [ table_and_columns - - By contrast, if the table being analyzed has inheritance children, - ANALYZE gathers two sets of statistics: one on the rows - of the parent table only, and a second including rows of both the parent - table and all of its children. This second set of statistics is needed when - planning queries that process the inheritance tree as a whole. The child - tables themselves are not individually analyzed in this case. - - The autovacuum daemon does not process partitioned tables, nor does it process inheritance parents if only the children are ever modified. From 7fe7ec01aad3459e8c9bd01f23258463b77ee85e Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Sat, 3 Sep 2022 20:57:30 +0900 Subject: [PATCH 46/95] doc: Fix two queries related to jsonb functions These have been updated by the revert done in 2f2b18b, but the pre-revert state was correct. Note that the result was incorrectly formatted in the first case. Author: Erik Rijkers Discussion: https://postgr.es/m/13777e96-24b6-396b-cb16-8ad01b6ac130@xs4all.nl Backpatch-through: 13 --- doc/src/sgml/func.sgml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 16ad120dd23..99647f56fe9 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -15873,7 +15873,7 @@ table2-mapping jsonb_set_lax('[{"f1":1,"f2":null},2,null,3]', '{0,f1}', null) - [{"f1":null,"f2":null},2,null,3] + [{"f1": null, "f2": null}, 2, null, 3] jsonb_set_lax('[{"f1":99,"f2":null},2]', '{0,f3}', null, true, 'return_target') @@ -16112,7 +16112,7 @@ table2-mapping comparisons. - jsonb_path_exists_tz('["2015-08-01 12:00:00 -05"]', '$[*] ? (@.datetime() < "2015-08-02".datetime())') + jsonb_path_exists_tz('["2015-08-01 12:00:00-05"]', '$[*] ? (@.datetime() < "2015-08-02".datetime())') t From bc985dccad4e3dbf90f6943c7729c1cde0f7faa2 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Mon, 5 Sep 2022 18:44:11 +1200 Subject: [PATCH 47/95] Doc: clarify partitioned table limitations Improve documentation regarding the limitations of unique and primary key constraints on partitioned tables. The existing documentation didn't make it clear that the constraint columns had to be present in the partition key as bare columns. The reader could be led to believe that it was ok to include the constraint columns as part of a function call's parameters or as part of an expression. Additionally, the documentation didn't mention anything about the fact that we disallow unique and primary key constraints if the partition keys contain *any* function calls or expressions, regardless of if the constraint columns appear as columns elsewhere in the partition key. The confusion here was highlighted by a report on the general mailing list by James Vanns. Discussion: https://postgr.es/m/CAH7vdhNF0EdYZz3GLpgE3RSJLwWLhEk7A_fiKS9dPBT3Dz_3eA@mail.gmail.com Discussion: https://postgr.es/m/CAApHDvoU-u9iTqKjteYRFfi+UNEk7dbSAcyxEQD==vZt9B1KnA@mail.gmail.com Reviewed-by: Erik Rijkers Backpatch-through: 11 --- doc/src/sgml/ddl.sgml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index c85e92b3a2f..e49d19716c1 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -4057,12 +4057,13 @@ ALTER INDEX measurement_city_id_logdate_key - Unique constraints (and hence primary keys) on partitioned tables must - include all the partition key columns. This limitation exists because - the individual indexes making up the constraint can only directly - enforce uniqueness within their own partitions; therefore, the - partition structure itself must guarantee that there are not - duplicates in different partitions. + To create a unique or primary key constraint on a partitioned table, + the partition keys must not include any expressions or function calls + and the constraint's columns must include all of the partition key + columns. This limitation exists because the individual indexes making + up the constraint can only directly enforce uniqueness within their own + partitions; therefore, the partition structure itself must guarantee + that there are not duplicates in different partitions. From 857b4903ccaf9917afb400d6302d6244a4d311c5 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Thu, 8 Sep 2022 13:17:02 +0200 Subject: [PATCH 48/95] Choose FK name correctly during partition attachment During ALTER TABLE ATTACH PARTITION, if the name of a parent's foreign key constraint is already used on the partition, the code tries to choose another one before the FK attributes list has been populated, so the resulting constraint name was "__fkey" instead of "__fkey". Repair, and add a test case. Backpatch to 12. In 11, the code to attach a partition was not smart enough to cope with conflicting constraint names, so the problem doesn't exist there. Author: Jehan-Guillaume de Rorthais Discussion: https://postgr.es/m/20220901184156.738ebee5@karst --- src/backend/commands/tablecmds.c | 20 +++++++++---------- src/test/regress/input/constraints.source | 19 ++++++++++++++++++ src/test/regress/output/constraints.source | 23 ++++++++++++++++++++++ 3 files changed, 52 insertions(+), 10 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 07f00a212b0..ba985fc99ae 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -12117,16 +12117,6 @@ CloneFkReferencing(List **wqueue, Relation parentRel, Relation partRel) /* No dice. Set up to create our own constraint */ fkconstraint = makeNode(Constraint); - if (ConstraintNameIsUsed(CONSTRAINT_RELATION, - RelationGetRelid(partRel), - NameStr(constrForm->conname))) - fkconstraint->conname = - ChooseConstraintName(RelationGetRelationName(partRel), - ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs), - "fkey", - RelationGetNamespace(partRel), NIL); - else - fkconstraint->conname = pstrdup(NameStr(constrForm->conname)); fkconstraint->fk_upd_action = constrForm->confupdtype; fkconstraint->fk_del_action = constrForm->confdeltype; fkconstraint->deferrable = constrForm->condeferrable; @@ -12141,6 +12131,16 @@ CloneFkReferencing(List **wqueue, Relation parentRel, Relation partRel) fkconstraint->fk_attrs = lappend(fkconstraint->fk_attrs, makeString(NameStr(att->attname))); } + if (ConstraintNameIsUsed(CONSTRAINT_RELATION, + RelationGetRelid(partRel), + NameStr(constrForm->conname))) + fkconstraint->conname = + ChooseConstraintName(RelationGetRelationName(partRel), + ChooseForeignKeyConstraintNameAddition(fkconstraint->fk_attrs), + "fkey", + RelationGetNamespace(partRel), NIL); + else + fkconstraint->conname = pstrdup(NameStr(constrForm->conname)); indexOid = constrForm->conindid; constrOid = diff --git a/src/test/regress/input/constraints.source b/src/test/regress/input/constraints.source index e3afd458c27..0482216d7f8 100644 --- a/src/test/regress/input/constraints.source +++ b/src/test/regress/input/constraints.source @@ -449,6 +449,25 @@ INSERT INTO parted_uniq_tbl VALUES (1); -- OK now, fail at commit COMMIT; DROP TABLE parted_uniq_tbl; +-- test naming a constraint in a partition when a conflict exists +CREATE TABLE parted_fk_naming ( + id bigint NOT NULL default 1, + id_abc bigint, + CONSTRAINT dummy_constr FOREIGN KEY (id_abc) + REFERENCES parted_fk_naming (id), + PRIMARY KEY (id) +) +PARTITION BY LIST (id); +CREATE TABLE parted_fk_naming_1 ( + id bigint NOT NULL default 1, + id_abc bigint, + PRIMARY KEY (id), + CONSTRAINT dummy_constr CHECK (true) +); +ALTER TABLE parted_fk_naming ATTACH PARTITION parted_fk_naming_1 FOR VALUES IN ('1'); +SELECT conname FROM pg_constraint WHERE conrelid = 'parted_fk_naming_1'::regclass AND contype = 'f'; +DROP TABLE parted_fk_naming; + -- test a HOT update that invalidates the conflicting tuple. -- the trigger should still fire and catch the violation diff --git a/src/test/regress/output/constraints.source b/src/test/regress/output/constraints.source index 3ab2def4026..cab0a0480e1 100644 --- a/src/test/regress/output/constraints.source +++ b/src/test/regress/output/constraints.source @@ -618,6 +618,29 @@ COMMIT; ERROR: duplicate key value violates unique constraint "parted_uniq_tbl_1_i_key" DETAIL: Key (i)=(1) already exists. DROP TABLE parted_uniq_tbl; +-- test naming a constraint in a partition when a conflict exists +CREATE TABLE parted_fk_naming ( + id bigint NOT NULL default 1, + id_abc bigint, + CONSTRAINT dummy_constr FOREIGN KEY (id_abc) + REFERENCES parted_fk_naming (id), + PRIMARY KEY (id) +) +PARTITION BY LIST (id); +CREATE TABLE parted_fk_naming_1 ( + id bigint NOT NULL default 1, + id_abc bigint, + PRIMARY KEY (id), + CONSTRAINT dummy_constr CHECK (true) +); +ALTER TABLE parted_fk_naming ATTACH PARTITION parted_fk_naming_1 FOR VALUES IN ('1'); +SELECT conname FROM pg_constraint WHERE conrelid = 'parted_fk_naming_1'::regclass AND contype = 'f'; + conname +-------------------------------- + parted_fk_naming_1_id_abc_fkey +(1 row) + +DROP TABLE parted_fk_naming; -- test a HOT update that invalidates the conflicting tuple. -- the trigger should still fire and catch the violation BEGIN; From 463d9e94e554c953aeb361103524bef1db9065e1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 9 Sep 2022 12:41:36 -0400 Subject: [PATCH 49/95] Reject bogus output from uuid_create(3). When using the BSD UUID functions, contrib/uuid-ossp expects uuid_create() to produce a version-1 UUID. FreeBSD still does so, but in recent NetBSD releases that function produces a version-4 (random) UUID instead. That's not acceptable for our purposes: if the user wanted v4 she would have asked for v4, not v1. Hence, check the version digit and complain if it's not '1'. Also drop the documentation's claim that the NetBSD implementation is usable. It might be, depending on which OS version you're using, but we're not going to get into that kind of detail. (Maybe someday we should ditch all these external libraries and just write our own UUID code, but today is not that day.) Nazir Bilal Yavuz, with cosmetic adjustments and docs by me. Backpatch to all supported versions. Discussion: https://postgr.es/m/3848059.1661038772@sss.pgh.pa.us Discussion: https://postgr.es/m/17358-89806e7420797025@postgresql.org --- contrib/uuid-ossp/uuid-ossp.c | 12 ++++++++++++ doc/src/sgml/installation.sgml | 2 +- doc/src/sgml/uuid-ossp.sgml | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/contrib/uuid-ossp/uuid-ossp.c b/contrib/uuid-ossp/uuid-ossp.c index 5eda8150659..2b0e8a6f055 100644 --- a/contrib/uuid-ossp/uuid-ossp.c +++ b/contrib/uuid-ossp/uuid-ossp.c @@ -288,6 +288,18 @@ uuid_generate_internal(int v, unsigned char *ns, const char *ptr, int len) { strlcpy(strbuf, str, 37); + /* + * In recent NetBSD, uuid_create() has started + * producing v4 instead of v1 UUIDs. Check the + * version field and complain if it's not v1. + */ + if (strbuf[14] != '1') + ereport(ERROR, + (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), + /* translator: %c will be a hex digit */ + errmsg("uuid_create() produced a version %c UUID instead of the expected version 1", + strbuf[14]))); + /* * PTR, if set, replaces the trailing characters of * the uuid; this is to support v1mc, where a random diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index 2a5437ce59a..98337bd4975 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -1118,7 +1118,7 @@ build-postgresql: - to use the UUID functions found in FreeBSD, NetBSD, + to use the UUID functions found in FreeBSD and some other BSD-derived systems diff --git a/doc/src/sgml/uuid-ossp.sgml b/doc/src/sgml/uuid-ossp.sgml index 359d3c01289..26bfb908dae 100644 --- a/doc/src/sgml/uuid-ossp.sgml +++ b/doc/src/sgml/uuid-ossp.sgml @@ -214,7 +214,7 @@ SELECT uuid_generate_v3(uuid_ns_url(), 'http://www.postgresql.org'); at , it is not well maintained, and is becoming increasingly difficult to port to newer platforms. uuid-ossp can now be built without the OSSP - library on some platforms. On FreeBSD, NetBSD, and some other BSD-derived + library on some platforms. On FreeBSD and some other BSD-derived platforms, suitable UUID creation functions are included in the core libc library. On Linux, macOS, and some other platforms, suitable functions are provided in the libuuid From a7e1544dd121bf242a32770677c07efdd77c6cc5 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 9 Sep 2022 15:34:04 -0400 Subject: [PATCH 50/95] Fix possible omission of variable storage markers in ECPG. The ECPG preprocessor converted code such as static varchar str1[10], str2[20], str3[30]; into static struct varchar_1 { int len; char arr[ 10 ]; } str1 ; struct varchar_2 { int len; char arr[ 20 ]; } str2 ; struct varchar_3 { int len; char arr[ 30 ]; } str3 ; thus losing the storage attribute for the later variables. Repeat the declaration for each such variable. (Note that this occurred only for variables declared "varchar" or "bytea", which may help explain how it escaped detection for so long.) Andrey Sokolov Discussion: https://postgr.es/m/942241662288242@mail.yandex.ru --- src/interfaces/ecpg/preproc/ecpg.trailer | 8 +- src/interfaces/ecpg/preproc/type.h | 1 + .../ecpg/test/expected/preproc-variable.c | 93 +++++----- .../test/expected/preproc-variable.stderr | 162 +++++++++--------- .../test/expected/preproc-variable.stdout | 1 + src/interfaces/ecpg/test/preproc/variable.pgc | 9 + 6 files changed, 151 insertions(+), 123 deletions(-) diff --git a/src/interfaces/ecpg/preproc/ecpg.trailer b/src/interfaces/ecpg/preproc/ecpg.trailer index 9105c7e3d5a..9893128c690 100644 --- a/src/interfaces/ecpg/preproc/ecpg.trailer +++ b/src/interfaces/ecpg/preproc/ecpg.trailer @@ -476,9 +476,10 @@ type_declaration: S_TYPEDEF $$ = mm_strdup(""); }; -var_declaration: storage_declaration - var_type +var_declaration: + storage_declaration var_type { + actual_type[struct_level].type_storage = $1; actual_type[struct_level].type_enum = $2.type_enum; actual_type[struct_level].type_str = $2.type_str; actual_type[struct_level].type_dimension = $2.type_dimension; @@ -493,6 +494,7 @@ var_declaration: storage_declaration } | var_type { + actual_type[struct_level].type_storage = EMPTY; actual_type[struct_level].type_enum = $1.type_enum; actual_type[struct_level].type_str = $1.type_str; actual_type[struct_level].type_dimension = $1.type_dimension; @@ -873,7 +875,7 @@ variable_list: variable | variable_list ',' variable { if (actual_type[struct_level].type_enum == ECPGt_varchar || actual_type[struct_level].type_enum == ECPGt_bytea) - $$ = cat_str(3, $1, mm_strdup(";"), $3); + $$ = cat_str(4, $1, mm_strdup(";"), mm_strdup(actual_type[struct_level].type_storage), $3); else $$ = cat_str(3, $1, mm_strdup(","), $3); } diff --git a/src/interfaces/ecpg/preproc/type.h b/src/interfaces/ecpg/preproc/type.h index fb20be53e0b..08b739e5f35 100644 --- a/src/interfaces/ecpg/preproc/type.h +++ b/src/interfaces/ecpg/preproc/type.h @@ -114,6 +114,7 @@ struct exec struct this_type { + char *type_storage; enum ECPGttype type_enum; char *type_str; char *type_dimension; diff --git a/src/interfaces/ecpg/test/expected/preproc-variable.c b/src/interfaces/ecpg/test/expected/preproc-variable.c index 3954f837690..cce377021be 100644 --- a/src/interfaces/ecpg/test/expected/preproc-variable.c +++ b/src/interfaces/ecpg/test/expected/preproc-variable.c @@ -71,6 +71,8 @@ main (void) + + #line 27 "variable.pgc" struct personal_struct { @@ -98,27 +100,33 @@ main (void) } ; struct t2 { #line 32 "variable.pgc" struct varchar_3 { int len; char arr[ BUFFERSIZ ]; } name ; - } ;/* exec sql end declare section */ + } ; #line 33 "variable.pgc" + static struct varchar_4 { int len; char arr[ 50 ]; } vc1 ; static struct varchar_5 { int len; char arr[ 50 ]; } vc2 ; static struct varchar_6 { int len; char arr[ 255 ]; } vc3 ; + +#line 34 "variable.pgc" + static int i1 , i2 , i3 ; +/* exec sql end declare section */ +#line 35 "variable.pgc" -#line 35 "variable.pgc" +#line 37 "variable.pgc" char * married = NULL ; -#line 35 "variable.pgc" +#line 37 "variable.pgc" -#line 36 "variable.pgc" +#line 38 "variable.pgc" long ind_married ; -#line 36 "variable.pgc" +#line 38 "variable.pgc" -#line 37 "variable.pgc" +#line 39 "variable.pgc" ind children ; -#line 37 "variable.pgc" +#line 39 "variable.pgc" int loopcount; char msg[128]; @@ -127,78 +135,78 @@ main (void) strcpy(msg, "connect"); { ECPGconnect(__LINE__, 0, "ecpg1_regression" , NULL, NULL , NULL, 0); -#line 44 "variable.pgc" +#line 46 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 44 "variable.pgc" +#line 46 "variable.pgc" strcpy(msg, "set"); { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "set datestyle to iso", ECPGt_EOIT, ECPGt_EORT); -#line 47 "variable.pgc" +#line 49 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 47 "variable.pgc" +#line 49 "variable.pgc" strcpy(msg, "create"); { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "create table family ( name char ( 8 ) , born integer , age smallint , married date , children integer )", ECPGt_EOIT, ECPGt_EORT); -#line 50 "variable.pgc" +#line 52 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 50 "variable.pgc" +#line 52 "variable.pgc" strcpy(msg, "insert"); { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "insert into family ( name , married , children ) values ( 'Mum' , '19870714' , 3 )", ECPGt_EOIT, ECPGt_EORT); -#line 53 "variable.pgc" +#line 55 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 53 "variable.pgc" +#line 55 "variable.pgc" { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "insert into family ( name , born , married , children ) values ( 'Dad' , '19610721' , '19870714' , 3 )", ECPGt_EOIT, ECPGt_EORT); -#line 54 "variable.pgc" +#line 56 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 54 "variable.pgc" +#line 56 "variable.pgc" { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "insert into family ( name , age ) values ( 'Child 1' , 16 )", ECPGt_EOIT, ECPGt_EORT); -#line 55 "variable.pgc" +#line 57 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 55 "variable.pgc" +#line 57 "variable.pgc" { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "insert into family ( name , age ) values ( 'Child 2' , 14 )", ECPGt_EOIT, ECPGt_EORT); -#line 56 "variable.pgc" +#line 58 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 56 "variable.pgc" +#line 58 "variable.pgc" { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "insert into family ( name , age ) values ( 'Child 3' , 9 )", ECPGt_EOIT, ECPGt_EORT); -#line 57 "variable.pgc" +#line 59 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 57 "variable.pgc" +#line 59 "variable.pgc" strcpy(msg, "commit"); { ECPGtrans(__LINE__, NULL, "commit"); -#line 60 "variable.pgc" +#line 62 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 60 "variable.pgc" +#line 62 "variable.pgc" strcpy(msg, "open"); { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "declare cur cursor for select name , born , age , married , children from family", ECPGt_EOIT, ECPGt_EORT); -#line 63 "variable.pgc" +#line 65 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 63 "variable.pgc" +#line 65 "variable.pgc" /* exec sql whenever not found break ; */ -#line 65 "variable.pgc" +#line 67 "variable.pgc" p=&personal; @@ -217,13 +225,13 @@ if (sqlca.sqlcode < 0) exit (1);} ECPGt_long,&(ind_married),(long)1,(long)1,sizeof(long), ECPGt_int,&(children.integer),(long)1,(long)1,sizeof(int), ECPGt_short,&(ind_children.smallint),(long)1,(long)1,sizeof(short), ECPGt_EORT); -#line 72 "variable.pgc" +#line 74 "variable.pgc" if (sqlca.sqlcode == ECPG_NOT_FOUND) break; -#line 72 "variable.pgc" +#line 74 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 72 "variable.pgc" +#line 74 "variable.pgc" printf("%8.8s", personal.name.arr); if (i->ind_birth.born >= 0) @@ -242,35 +250,42 @@ if (sqlca.sqlcode < 0) exit (1);} strcpy(msg, "close"); { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "close cur", ECPGt_EOIT, ECPGt_EORT); -#line 89 "variable.pgc" +#line 91 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 89 "variable.pgc" +#line 91 "variable.pgc" strcpy(msg, "drop"); { ECPGdo(__LINE__, 0, 1, NULL, 0, ECPGst_normal, "drop table family", ECPGt_EOIT, ECPGt_EORT); -#line 92 "variable.pgc" +#line 94 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 92 "variable.pgc" +#line 94 "variable.pgc" strcpy(msg, "commit"); { ECPGtrans(__LINE__, NULL, "commit"); -#line 95 "variable.pgc" +#line 97 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 95 "variable.pgc" +#line 97 "variable.pgc" strcpy(msg, "disconnect"); { ECPGdisconnect(__LINE__, "CURRENT"); -#line 98 "variable.pgc" +#line 100 "variable.pgc" if (sqlca.sqlcode < 0) exit (1);} -#line 98 "variable.pgc" +#line 100 "variable.pgc" + + /* this just to silence unused-variable warnings: */ + vc1.len = vc2.len = vc3.len = 0; + i1 = i2 = i3 = 0; + printf("%d %d %d %d %d %d\n", + vc1.len, vc2.len, vc3.len, + i1, i2, i3); return 0; } diff --git a/src/interfaces/ecpg/test/expected/preproc-variable.stderr b/src/interfaces/ecpg/test/expected/preproc-variable.stderr index 08cdcc695f0..0fb5b032218 100644 --- a/src/interfaces/ecpg/test/expected/preproc-variable.stderr +++ b/src/interfaces/ecpg/test/expected/preproc-variable.stderr @@ -2,167 +2,167 @@ [NO_PID]: sqlca: code: 0, state: 00000 [NO_PID]: ECPGconnect: opening database ecpg1_regression on port [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 47: query: set datestyle to iso; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 49: query: set datestyle to iso; with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 47: using PQexec +[NO_PID]: ecpg_execute on line 49: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 47: OK: SET +[NO_PID]: ecpg_process_output on line 49: OK: SET [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 50: query: create table family ( name char ( 8 ) , born integer , age smallint , married date , children integer ); with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 52: query: create table family ( name char ( 8 ) , born integer , age smallint , married date , children integer ); with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 50: using PQexec +[NO_PID]: ecpg_execute on line 52: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 50: OK: CREATE TABLE +[NO_PID]: ecpg_process_output on line 52: OK: CREATE TABLE [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 53: query: insert into family ( name , married , children ) values ( 'Mum' , '19870714' , 3 ); with 0 parameter(s) on connection ecpg1_regression -[NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 53: using PQexec -[NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 53: OK: INSERT 0 1 -[NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 54: query: insert into family ( name , born , married , children ) values ( 'Dad' , '19610721' , '19870714' , 3 ); with 0 parameter(s) on connection ecpg1_regression -[NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 54: using PQexec -[NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 54: OK: INSERT 0 1 -[NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 55: query: insert into family ( name , age ) values ( 'Child 1' , 16 ); with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 55: query: insert into family ( name , married , children ) values ( 'Mum' , '19870714' , 3 ); with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 [NO_PID]: ecpg_execute on line 55: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 [NO_PID]: ecpg_process_output on line 55: OK: INSERT 0 1 [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 56: query: insert into family ( name , age ) values ( 'Child 2' , 14 ); with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 56: query: insert into family ( name , born , married , children ) values ( 'Dad' , '19610721' , '19870714' , 3 ); with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 [NO_PID]: ecpg_execute on line 56: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 [NO_PID]: ecpg_process_output on line 56: OK: INSERT 0 1 [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 57: query: insert into family ( name , age ) values ( 'Child 3' , 9 ); with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 57: query: insert into family ( name , age ) values ( 'Child 1' , 16 ); with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 [NO_PID]: ecpg_execute on line 57: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 [NO_PID]: ecpg_process_output on line 57: OK: INSERT 0 1 [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ECPGtrans on line 60: action "commit"; connection "ecpg1_regression" +[NO_PID]: ecpg_execute on line 58: query: insert into family ( name , age ) values ( 'Child 2' , 14 ); with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_execute on line 58: using PQexec +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_process_output on line 58: OK: INSERT 0 1 +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_execute on line 59: query: insert into family ( name , age ) values ( 'Child 3' , 9 ); with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_execute on line 59: using PQexec +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ecpg_process_output on line 59: OK: INSERT 0 1 +[NO_PID]: sqlca: code: 0, state: 00000 +[NO_PID]: ECPGtrans on line 62: action "commit"; connection "ecpg1_regression" [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 63: query: declare cur cursor for select name , born , age , married , children from family; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 65: query: declare cur cursor for select name , born , age , married , children from family; with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 63: using PQexec +[NO_PID]: ecpg_execute on line 65: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 63: OK: DECLARE CURSOR +[NO_PID]: ecpg_process_output on line 65: OK: DECLARE CURSOR [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 72: query: fetch cur; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 74: query: fetch cur; with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 72: using PQexec +[NO_PID]: ecpg_execute on line 74: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 72: correctly got 1 tuples with 5 fields +[NO_PID]: ecpg_process_output on line 74: correctly got 1 tuples with 5 fields [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: Mum offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: Mum offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_store_result on line 72: allocating memory for 1 tuples +[NO_PID]: ecpg_store_result on line 74: allocating memory for 1 tuples [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: 1987-07-14 offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: 1987-07-14 offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: 3 offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: 3 offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 72: query: fetch cur; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 74: query: fetch cur; with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 72: using PQexec +[NO_PID]: ecpg_execute on line 74: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 72: correctly got 1 tuples with 5 fields +[NO_PID]: ecpg_process_output on line 74: correctly got 1 tuples with 5 fields [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: Dad offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: Dad offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: 19610721 offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: 19610721 offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_store_result on line 72: allocating memory for 1 tuples +[NO_PID]: ecpg_store_result on line 74: allocating memory for 1 tuples [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: 1987-07-14 offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: 1987-07-14 offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: 3 offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: 3 offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 72: query: fetch cur; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 74: query: fetch cur; with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 72: using PQexec +[NO_PID]: ecpg_execute on line 74: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 72: correctly got 1 tuples with 5 fields +[NO_PID]: ecpg_process_output on line 74: correctly got 1 tuples with 5 fields [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: Child 1 offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: Child 1 offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: 16 offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: 16 offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_store_result on line 72: allocating memory for 1 tuples +[NO_PID]: ecpg_store_result on line 74: allocating memory for 1 tuples [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 72: query: fetch cur; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 74: query: fetch cur; with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 72: using PQexec +[NO_PID]: ecpg_execute on line 74: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 72: correctly got 1 tuples with 5 fields +[NO_PID]: ecpg_process_output on line 74: correctly got 1 tuples with 5 fields [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: Child 2 offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: Child 2 offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: 14 offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: 14 offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_store_result on line 72: allocating memory for 1 tuples +[NO_PID]: ecpg_store_result on line 74: allocating memory for 1 tuples [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 72: query: fetch cur; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 74: query: fetch cur; with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 72: using PQexec +[NO_PID]: ecpg_execute on line 74: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 72: correctly got 1 tuples with 5 fields +[NO_PID]: ecpg_process_output on line 74: correctly got 1 tuples with 5 fields [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: Child 3 offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: Child 3 offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: 9 offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: 9 offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_store_result on line 72: allocating memory for 1 tuples +[NO_PID]: ecpg_store_result on line 74: allocating memory for 1 tuples [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_get_data on line 72: RESULT: offset: -1; array: no +[NO_PID]: ecpg_get_data on line 74: RESULT: offset: -1; array: no [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 72: query: fetch cur; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 74: query: fetch cur; with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 72: using PQexec +[NO_PID]: ecpg_execute on line 74: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 72: correctly got 0 tuples with 5 fields +[NO_PID]: ecpg_process_output on line 74: correctly got 0 tuples with 5 fields [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: raising sqlcode 100 on line 72: no data found on line 72 +[NO_PID]: raising sqlcode 100 on line 74: no data found on line 74 [NO_PID]: sqlca: code: 100, state: 02000 -[NO_PID]: ecpg_execute on line 89: query: close cur; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 91: query: close cur; with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 89: using PQexec +[NO_PID]: ecpg_execute on line 91: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 89: OK: CLOSE CURSOR +[NO_PID]: ecpg_process_output on line 91: OK: CLOSE CURSOR [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 92: query: drop table family; with 0 parameter(s) on connection ecpg1_regression +[NO_PID]: ecpg_execute on line 94: query: drop table family; with 0 parameter(s) on connection ecpg1_regression [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_execute on line 92: using PQexec +[NO_PID]: ecpg_execute on line 94: using PQexec [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ecpg_process_output on line 92: OK: DROP TABLE +[NO_PID]: ecpg_process_output on line 94: OK: DROP TABLE [NO_PID]: sqlca: code: 0, state: 00000 -[NO_PID]: ECPGtrans on line 95: action "commit"; connection "ecpg1_regression" +[NO_PID]: ECPGtrans on line 97: action "commit"; connection "ecpg1_regression" [NO_PID]: sqlca: code: 0, state: 00000 [NO_PID]: ecpg_finish: connection ecpg1_regression closed [NO_PID]: sqlca: code: 0, state: 00000 diff --git a/src/interfaces/ecpg/test/expected/preproc-variable.stdout b/src/interfaces/ecpg/test/expected/preproc-variable.stdout index f4ee9c3b15c..89e841fe663 100644 --- a/src/interfaces/ecpg/test/expected/preproc-variable.stdout +++ b/src/interfaces/ecpg/test/expected/preproc-variable.stdout @@ -3,3 +3,4 @@ Dad , born 19610721, married 1987-07-14, children = 3 Child 1 , age = 16 Child 2 , age = 14 Child 3 , age = 9 +0 0 0 0 0 0 diff --git a/src/interfaces/ecpg/test/preproc/variable.pgc b/src/interfaces/ecpg/test/preproc/variable.pgc index 423a01c16e2..032c2fe57b8 100644 --- a/src/interfaces/ecpg/test/preproc/variable.pgc +++ b/src/interfaces/ecpg/test/preproc/variable.pgc @@ -30,6 +30,8 @@ exec sql begin declare section; } ind_personal, *i; ind ind_children; struct t1 { str name; }; struct t2 { str name; }; + static varchar vc1[50], vc2[50], vc3[255]; + static int i1, i2, i3; exec sql end declare section; exec sql char *married = NULL; @@ -97,5 +99,12 @@ exec sql end declare section; strcpy(msg, "disconnect"); exec sql disconnect; + /* this just to silence unused-variable warnings: */ + vc1.len = vc2.len = vc3.len = 0; + i1 = i2 = i3 = 0; + printf("%d %d %d %d %d %d\n", + vc1.len, vc2.len, vc3.len, + i1, i2, i3); + return 0; } From 89eba88bb52e7229dda99baa240b806d4db02c15 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Mon, 12 Sep 2022 12:59:06 +0200 Subject: [PATCH 51/95] Fix NaN comparison in circle_same test Commit c4c340088 changed geometric operators to use float4 and float8 functions, and handle NaN's in a better way. The circle sameness test had a typo in the code which resulted in all comparisons with the left circle having a NaN radius considered same. postgres=# select '<(0,0),NaN>'::circle ~= '<(0,0),1>'::circle; ?column? ---------- t (1 row) This fixes the sameness test to consider the radius of both the left and right circle. Backpatch to v12 where this was introduced. Author: Ranier Vilela Discussion: https://postgr.es/m/CAEudQAo8dK=yctg2ZzjJuzV4zgOPBxRU5+Kb+yatFiddtQk6Rw@mail.gmail.com Backpatch-through: v12 --- contrib/pax_storage/src/test/regress/expected/geometry.out | 3 +-- src/backend/utils/adt/geo_ops.c | 2 +- src/test/regress/expected/geometry.out | 3 +-- src/test/singlenode_regress/expected/geometry.out | 3 +-- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/contrib/pax_storage/src/test/regress/expected/geometry.out b/contrib/pax_storage/src/test/regress/expected/geometry.out index 83f15159660..0f06d37b839 100644 --- a/contrib/pax_storage/src/test/regress/expected/geometry.out +++ b/contrib/pax_storage/src/test/regress/expected/geometry.out @@ -4344,9 +4344,8 @@ SELECT c1.f1, c2.f1 FROM CIRCLE_TBL c1, CIRCLE_TBL c2 WHERE c1.f1 ~= c2.f1; <(100,200),10> | <(100,200),10> <(100,1),115> | <(100,1),115> <(3,5),0> | <(3,5),0> - <(3,5),NaN> | <(3,5),0> <(3,5),NaN> | <(3,5),NaN> -(9 rows) +(8 rows) -- Overlap with circle SELECT c1.f1, c2.f1 FROM CIRCLE_TBL c1, CIRCLE_TBL c2 WHERE c1.f1 && c2.f1; diff --git a/src/backend/utils/adt/geo_ops.c b/src/backend/utils/adt/geo_ops.c index bfd9ff6a36c..c93a8441aee 100644 --- a/src/backend/utils/adt/geo_ops.c +++ b/src/backend/utils/adt/geo_ops.c @@ -4763,7 +4763,7 @@ circle_same(PG_FUNCTION_ARGS) CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0); CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1); - PG_RETURN_BOOL(((isnan(circle1->radius) && isnan(circle1->radius)) || + PG_RETURN_BOOL(((isnan(circle1->radius) && isnan(circle2->radius)) || FPeq(circle1->radius, circle2->radius)) && point_eq_point(&circle1->center, &circle2->center)); } diff --git a/src/test/regress/expected/geometry.out b/src/test/regress/expected/geometry.out index 83f15159660..0f06d37b839 100644 --- a/src/test/regress/expected/geometry.out +++ b/src/test/regress/expected/geometry.out @@ -4344,9 +4344,8 @@ SELECT c1.f1, c2.f1 FROM CIRCLE_TBL c1, CIRCLE_TBL c2 WHERE c1.f1 ~= c2.f1; <(100,200),10> | <(100,200),10> <(100,1),115> | <(100,1),115> <(3,5),0> | <(3,5),0> - <(3,5),NaN> | <(3,5),0> <(3,5),NaN> | <(3,5),NaN> -(9 rows) +(8 rows) -- Overlap with circle SELECT c1.f1, c2.f1 FROM CIRCLE_TBL c1, CIRCLE_TBL c2 WHERE c1.f1 && c2.f1; diff --git a/src/test/singlenode_regress/expected/geometry.out b/src/test/singlenode_regress/expected/geometry.out index 974e2ec43a4..4bb1679157d 100644 --- a/src/test/singlenode_regress/expected/geometry.out +++ b/src/test/singlenode_regress/expected/geometry.out @@ -4342,9 +4342,8 @@ SELECT c1.f1, c2.f1 FROM CIRCLE_TBL c1, CIRCLE_TBL c2 WHERE c1.f1 ~= c2.f1; <(100,200),10> | <(100,200),10> <(100,1),115> | <(100,1),115> <(3,5),0> | <(3,5),0> - <(3,5),NaN> | <(3,5),0> <(3,5),NaN> | <(3,5),NaN> -(9 rows) +(8 rows) -- Overlap with circle SELECT c1.f1, c2.f1 FROM CIRCLE_TBL c1, CIRCLE_TBL c2 WHERE c1.f1 && c2.f1; From 5cc1d31da9ecb19953ac979a22611065d47d9af5 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Mon, 12 Sep 2022 22:17:17 +0200 Subject: [PATCH 52/95] doc: Fix link to FreeBSD documentation project The FreeBSD site was changed with a redirect, which in turn seems to lead to a 404. Replace with the working link. Author: James Coleman Discussion: https://postgr.es/m/CAAaqYe_JZRj+KPn=hACtwsg1iLRYs=jYvxG1NW4AnDeUL1GD-Q@mail.gmail.com --- doc/src/sgml/docguide.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/docguide.sgml b/doc/src/sgml/docguide.sgml index 05dd9a8b44e..e1bac68604f 100644 --- a/doc/src/sgml/docguide.sgml +++ b/doc/src/sgml/docguide.sgml @@ -63,7 +63,7 @@ a complete O'Reilly book for your online reading pleasure. The NewbieDoc Docbook Guide is very helpful for beginners. - The + The FreeBSD Documentation Project also uses DocBook and has some good information, including a number of style guidelines that might be worth considering. From 3d95002c8de17eebee2a80dca2b93530ac974195 Mon Sep 17 00:00:00 2001 From: reshke Date: Mon, 16 Mar 2026 10:43:40 +0000 Subject: [PATCH 53/95] Fix RAT check violation from e5c72eb --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index 0133ae21dc7..0e000093399 100644 --- a/pom.xml +++ b/pom.xml @@ -531,6 +531,7 @@ code or new licensing patterns. contrib/test_decoding/specs/subxact_without_top.spec contrib/test_decoding/specs/snapshot_transfer.spec contrib/test_decoding/specs/concurrent_stream.spec + contrib/test_decoding/specs/catalog_change_snapshot.spec contrib/test_decoding/logical.conf contrib/hstore_plpython/hstore_plpython.c From 493f7c50db8504e199f16faa60f44a7305aa26c3 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Tue, 13 Sep 2022 11:05:13 +1200 Subject: [PATCH 54/95] Don't reference out-of-bounds array elements in brin_minmax_multi.c The primary fix here is to fix has_matching_range() so it does not reference ranges->values[-1] when nranges == 0. Similar problems existed in AssertCheckRanges() too. It does not look like any of these problems could lead to a crash as the array in question is at the end of the Ranges struct, and values[-1] is memory that belongs to other fields in the struct. However, let's get rid of these rather unsafe coding practices. In passing, I (David) adjusted some comments to try to make it more clear what some of the fields are for in the Ranges struct. I had to study the code to find out what nsorted was for as I couldn't tell from the comments. Author: Ranier Vilela Discussion: https://postgr.es/m/CAEudQAqJQzPitufX-jR=YUbJafpCDAKUnwgdbX_MzSc93wuvdw@mail.gmail.com Backpatch-through: 14, where multi-range brin was added. --- src/backend/access/brin/brin_minmax_multi.c | 172 ++++++++++---------- 1 file changed, 88 insertions(+), 84 deletions(-) diff --git a/src/backend/access/brin/brin_minmax_multi.c b/src/backend/access/brin/brin_minmax_multi.c index 0af2f2f27bc..1e4afafe622 100644 --- a/src/backend/access/brin/brin_minmax_multi.c +++ b/src/backend/access/brin/brin_minmax_multi.c @@ -142,19 +142,23 @@ typedef struct MinMaxMultiOptions * The Ranges struct stores the boundary values in a single array, but we * treat regular and single-point ranges differently to save space. For * regular ranges (with different boundary values) we have to store both - * values, while for "single-point ranges" we only need to save one value. + * the lower and upper bound of the range, while for "single-point ranges" + * we only need to store a single value. * * The 'values' array stores boundary values for regular ranges first (there * are 2*nranges values to store), and then the nvalues boundary values for * single-point ranges. That is, we have (2*nranges + nvalues) boundary * values in the array. * - * +---------------------------------+-------------------------------+ - * | ranges (sorted pairs of values) | sorted values (single points) | - * +---------------------------------+-------------------------------+ + * +-------------------------+----------------------------------+ + * | ranges (2 * nranges of) | single point values (nvalues of) | + * +-------------------------+----------------------------------+ * * This allows us to quickly add new values, and store outliers without - * making the other ranges very wide. + * having to widen any of the existing range values. + * + * 'nsorted' denotes how many of 'nvalues' in the values[] array are sorted. + * When nsorted == nvalues, all single point values are sorted. * * We never store more than maxvalues values (as set by values_per_range * reloption). If needed we merge some of the ranges. @@ -173,10 +177,10 @@ typedef struct Ranges FmgrInfo *cmp; /* (2*nranges + nvalues) <= maxvalues */ - int nranges; /* number of ranges in the array (stored) */ - int nsorted; /* number of sorted values (ranges + points) */ - int nvalues; /* number of values in the data array (all) */ - int maxvalues; /* maximum number of values (reloption) */ + int nranges; /* number of ranges in the values[] array */ + int nsorted; /* number of nvalues which are sorted */ + int nvalues; /* number of point values in values[] array */ + int maxvalues; /* number of elements in the values[] array */ /* * We simply add the values into a large buffer, without any expensive @@ -318,102 +322,99 @@ AssertCheckRanges(Ranges *ranges, FmgrInfo *cmpFn, Oid colloid) * Check that none of the values are not covered by ranges (both sorted * and unsorted) */ - for (i = 0; i < ranges->nvalues; i++) + if (ranges->nranges > 0) { - Datum compar; - int start, - end; - Datum minvalue, - maxvalue; - - Datum value = ranges->values[2 * ranges->nranges + i]; - - if (ranges->nranges == 0) - break; - - minvalue = ranges->values[0]; - maxvalue = ranges->values[2 * ranges->nranges - 1]; - - /* - * Is the value smaller than the minval? If yes, we'll recurse to the - * left side of range array. - */ - compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue); - - /* smaller than the smallest value in the first range */ - if (DatumGetBool(compar)) - continue; - - /* - * Is the value greater than the maxval? If yes, we'll recurse to the - * right side of range array. - */ - compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value); - - /* larger than the largest value in the last range */ - if (DatumGetBool(compar)) - continue; - - start = 0; /* first range */ - end = ranges->nranges - 1; /* last range */ - while (true) + for (i = 0; i < ranges->nvalues; i++) { - int midpoint = (start + end) / 2; - - /* this means we ran out of ranges in the last step */ - if (start > end) - break; + Datum compar; + int start, + end; + Datum minvalue = ranges->values[0]; + Datum maxvalue = ranges->values[2 * ranges->nranges - 1]; + Datum value = ranges->values[2 * ranges->nranges + i]; - /* copy the min/max values from the ranges */ - minvalue = ranges->values[2 * midpoint]; - maxvalue = ranges->values[2 * midpoint + 1]; + compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue); /* - * Is the value smaller than the minval? If yes, we'll recurse to - * the left side of range array. + * If the value is smaller than the lower bound in the first range + * then it cannot possibly be in any of the ranges. */ - compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue); - - /* smaller than the smallest value in this range */ if (DatumGetBool(compar)) - { - end = (midpoint - 1); continue; - } - /* - * Is the value greater than the minval? If yes, we'll recurse to - * the right side of range array. - */ compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value); - /* larger than the largest value in this range */ + /* + * Likewise, if the value is larger than the upper bound of the + * final range, then it cannot possibly be inside any of the + * ranges. + */ if (DatumGetBool(compar)) - { - start = (midpoint + 1); continue; - } - /* hey, we found a matching range */ - Assert(false); + /* bsearch the ranges to see if 'value' fits within any of them */ + start = 0; /* first range */ + end = ranges->nranges - 1; /* last range */ + while (true) + { + int midpoint = (start + end) / 2; + + /* this means we ran out of ranges in the last step */ + if (start > end) + break; + + /* copy the min/max values from the ranges */ + minvalue = ranges->values[2 * midpoint]; + maxvalue = ranges->values[2 * midpoint + 1]; + + /* + * Is the value smaller than the minval? If yes, we'll recurse + * to the left side of range array. + */ + compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue); + + /* smaller than the smallest value in this range */ + if (DatumGetBool(compar)) + { + end = (midpoint - 1); + continue; + } + + /* + * Is the value greater than the minval? If yes, we'll recurse + * to the right side of range array. + */ + compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value); + + /* larger than the largest value in this range */ + if (DatumGetBool(compar)) + { + start = (midpoint + 1); + continue; + } + + /* hey, we found a matching range */ + Assert(false); + } } } - /* and values in the unsorted part must not be in sorted part */ - for (i = ranges->nsorted; i < ranges->nvalues; i++) + /* and values in the unsorted part must not be in the sorted part */ + if (ranges->nsorted > 0) { compare_context cxt; - Datum value = ranges->values[2 * ranges->nranges + i]; - - if (ranges->nsorted == 0) - break; cxt.colloid = ranges->colloid; cxt.cmpFn = ranges->cmp; - Assert(bsearch_arg(&value, &ranges->values[2 * ranges->nranges], - ranges->nsorted, sizeof(Datum), - compare_values, (void *) &cxt) == NULL); + for (i = ranges->nsorted; i < ranges->nvalues; i++) + { + Datum value = ranges->values[2 * ranges->nranges + i]; + + Assert(bsearch_arg(&value, &ranges->values[2 * ranges->nranges], + ranges->nsorted, sizeof(Datum), + compare_values, (void *) &cxt) == NULL); + } } #endif } @@ -924,8 +925,8 @@ has_matching_range(BrinDesc *bdesc, Oid colloid, Ranges *ranges, { Datum compar; - Datum minvalue = ranges->values[0]; - Datum maxvalue = ranges->values[2 * ranges->nranges - 1]; + Datum minvalue; + Datum maxvalue; FmgrInfo *cmpLessFn; FmgrInfo *cmpGreaterFn; @@ -937,6 +938,9 @@ has_matching_range(BrinDesc *bdesc, Oid colloid, Ranges *ranges, if (ranges->nranges == 0) return false; + minvalue = ranges->values[0]; + maxvalue = ranges->values[2 * ranges->nranges - 1]; + /* * Otherwise, need to compare the new value with boundaries of all the * ranges. First check if it's less than the absolute minimum, which is From 2034027ac297ecb2867bccb67e9c5be1b13ddcde Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 14 Sep 2022 06:04:24 +0200 Subject: [PATCH 55/95] Expand palloc/pg_malloc API for more type safety This adds additional variants of palloc, pg_malloc, etc. that encapsulate common usage patterns and provide more type safety. Specifically, this adds palloc_object(), palloc_array(), and repalloc_array(), which take the type name of the object to be allocated as its first argument and cast the return as a pointer to that type. There are also palloc0_object() and palloc0_array() variants for initializing with zero, and pg_malloc_*() variants of all of the above. Inspired by the talloc library. This is backpatched from master so that future backpatchable code can make use of these APIs. This patch by itself does not contain any users of these APIs. Reviewed-by: Tom Lane Discussion: https://www.postgresql.org/message-id/flat/bb755632-2a43-d523-36f8-a1e7a389a907@enterprisedb.com --- src/include/common/fe_memutils.h | 28 ++++++++++++++++++++++++++++ src/include/utils/palloc.h | 22 ++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/src/include/common/fe_memutils.h b/src/include/common/fe_memutils.h index a8b59f5db73..b706aa195b2 100644 --- a/src/include/common/fe_memutils.h +++ b/src/include/common/fe_memutils.h @@ -29,6 +29,28 @@ extern void *pg_malloc_extended(size_t size, int flags); extern void *pg_realloc(void *pointer, size_t size); extern void pg_free(void *pointer); +/* + * Variants with easier notation and more type safety + */ + +/* + * Allocate space for one object of type "type" + */ +#define pg_malloc_object(type) ((type *) pg_malloc(sizeof(type))) +#define pg_malloc0_object(type) ((type *) pg_malloc0(sizeof(type))) + +/* + * Allocate space for "count" objects of type "type" + */ +#define pg_malloc_array(type, count) ((type *) pg_malloc(sizeof(type) * (count))) +#define pg_malloc0_array(type, count) ((type *) pg_malloc0(sizeof(type) * (count))) + +/* + * Change size of allocation pointed to by "pointer" to have space for "count" + * objects of type "type" + */ +#define pg_realloc_array(pointer, type, count) ((type *) pg_realloc(pointer, sizeof(type) * (count))) + /* Equivalent functions, deliberately named the same as backend functions */ extern char *pstrdup(const char *in); extern char *pnstrdup(const char *in, Size size); @@ -38,6 +60,12 @@ extern void *palloc_extended(Size size, int flags); extern void *repalloc(void *pointer, Size size); extern void pfree(void *pointer); +#define palloc_object(type) ((type *) palloc(sizeof(type))) +#define palloc0_object(type) ((type *) palloc0(sizeof(type))) +#define palloc_array(type, count) ((type *) palloc(sizeof(type) * (count))) +#define palloc0_array(type, count) ((type *) palloc0(sizeof(type) * (count))) +#define repalloc_array(pointer, type, count) ((type *) repalloc(pointer, sizeof(type) * (count))) + /* sprintf into a palloc'd buffer --- these are in psprintf.c */ extern char *psprintf(const char *fmt,...) pg_attribute_printf(1, 2); extern size_t pvsnprintf(char *buf, size_t len, const char *fmt, va_list args) pg_attribute_printf(3, 0); diff --git a/src/include/utils/palloc.h b/src/include/utils/palloc.h index 03eef856214..d768cabb973 100644 --- a/src/include/utils/palloc.h +++ b/src/include/utils/palloc.h @@ -139,6 +139,28 @@ extern void *palloc_extended(Size size, int flags); extern pg_nodiscard void *repalloc(void *pointer, Size size); extern void pfree(void *pointer); +/* + * Variants with easier notation and more type safety + */ + +/* + * Allocate space for one object of type "type" + */ +#define palloc_object(type) ((type *) palloc(sizeof(type))) +#define palloc0_object(type) ((type *) palloc0(sizeof(type))) + +/* + * Allocate space for "count" objects of type "type" + */ +#define palloc_array(type, count) ((type *) palloc(sizeof(type) * (count))) +#define palloc0_array(type, count) ((type *) palloc0(sizeof(type) * (count))) + +/* + * Change size of allocation pointed to by "pointer" to have space for "count" + * objects of type "type" + */ +#define repalloc_array(pointer, type, count) ((type *) repalloc(pointer, sizeof(type) * (count))) + /* * The result of palloc() is always word-aligned, so we can skip testing * alignment of the pointer when deciding which MemSet variant to use. From 65a5481bb40a6412b7f70bd860c396b304edfdc1 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 14 Sep 2022 14:52:28 +0900 Subject: [PATCH 56/95] Fix incorrect value for "strategy" with deflateParams() in walmethods.c The zlib documentation mentions the values supported for the compression strategy, but this code has been using a hardcoded value of 0 rather than Z_DEFAULT_STRATEGY. This commit adjusts the code to use Z_DEFAULT_STRATEGY. Backpatch down to where this code has been added to ease the backport of any future patch touching this area. Reported-by: Tom Lane Discussion: https://postgr.es/m/1400032.1662217889@sss.pgh.pa.us Backpatch-through: 10 --- src/bin/pg_basebackup/walmethods.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c index be98057d88c..e91eb8f42ff 100644 --- a/src/bin/pg_basebackup/walmethods.c +++ b/src/bin/pg_basebackup/walmethods.c @@ -706,7 +706,7 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_ return NULL; /* Turn off compression for header */ - if (deflateParams(tar_data->zp, 0, 0) != Z_OK) + if (deflateParams(tar_data->zp, 0, Z_DEFAULT_STRATEGY) != Z_OK) { tar_set_error("could not change compression parameters"); return NULL; @@ -746,7 +746,8 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_ return NULL; /* Re-enable compression for the rest of the file */ - if (deflateParams(tar_data->zp, tar_data->compression, 0) != Z_OK) + if (deflateParams(tar_data->zp, tar_data->compression, + Z_DEFAULT_STRATEGY) != Z_OK) { tar_set_error("could not change compression parameters"); return NULL; @@ -960,7 +961,7 @@ tar_close(Walfile f, WalCloseMethod method) else { /* Turn off compression */ - if (deflateParams(tar_data->zp, 0, 0) != Z_OK) + if (deflateParams(tar_data->zp, 0, Z_DEFAULT_STRATEGY) != Z_OK) { tar_set_error("could not change compression parameters"); return -1; @@ -972,7 +973,8 @@ tar_close(Walfile f, WalCloseMethod method) return -1; /* Turn compression back on */ - if (deflateParams(tar_data->zp, tar_data->compression, 0) != Z_OK) + if (deflateParams(tar_data->zp, tar_data->compression, + Z_DEFAULT_STRATEGY) != Z_OK) { tar_set_error("could not change compression parameters"); return -1; From c04f3d97ce10605e153f29d483cf1185a28b60e0 Mon Sep 17 00:00:00 2001 From: Etsuro Fujita Date: Wed, 14 Sep 2022 18:45:03 +0900 Subject: [PATCH 57/95] postgres_fdw: Avoid 'variable not found in subplan target list' error. The tlist of the EvalPlanQual outer plan for a ForeignScan node is adjusted to produce a tuple whose descriptor matches the scan tuple slot for the ForeignScan node. But in the case where the outer plan contains an extra Sort node, if the new tlist contained columns required only for evaluating PlaceHolderVars or columns required only for evaluating local conditions, this would cause setrefs.c to fail with the error. The cause of this is that when creating the outer plan by injecting the Sort node into an alternative local join plan that could emit such extra columns as well, we fail to arrange for the outer plan to propagate them up through the Sort node, causing setrefs.c to fail to match up them in the new tlist to what is available from the outer plan. Repair. Per report from Alexander Pyhalov. Richard Guo and Etsuro Fujita, reviewed by Alexander Pyhalov and Tom Lane. Backpatch to all supported versions. Discussion: http://postgr.es/m/cfb17bf6dfdf876467bd5ef533852d18%40postgrespro.ru --- .../postgres_fdw/expected/postgres_fdw.out | 79 +++++++++++++++++++ contrib/postgres_fdw/postgres_fdw.c | 49 ++++++++++++ contrib/postgres_fdw/sql/postgres_fdw.sql | 13 +++ 3 files changed, 141 insertions(+) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 2874e69c7e5..67fde96a858 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -2467,6 +2467,85 @@ SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = f RESET enable_nestloop; RESET enable_hashjoin; +-- test that add_paths_with_pathkeys_for_rel() arranges for the epq_path to +-- return columns needed by the parent ForeignScan node +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.*, COALESCE(ft1.c3 || ft2.c3, 'foobar') FROM ft1 INNER JOIN ft2 ON (ft1.c1 = ft2.c1 AND ft1.c1 < 100)) ss ON (local_tbl.c1 = ss.c1) ORDER BY local_tbl.c1 FOR UPDATE OF local_tbl; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + LockRows + Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, (COALESCE((ft1.c3 || ft2.c3), 'foobar'::text)), local_tbl.ctid, ft1.*, ft2.* + -> Merge Left Join + Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, (COALESCE((ft1.c3 || ft2.c3), 'foobar'::text)), local_tbl.ctid, ft1.*, ft2.* + Merge Cond: (local_tbl.c1 = ft1.c1) + -> Index Scan using local_tbl_pkey on public.local_tbl + Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid + -> Materialize + Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, (COALESCE((ft1.c3 || ft2.c3), 'foobar'::text)) + -> Foreign Scan + Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, COALESCE((ft1.c3 || ft2.c3), 'foobar'::text) + Relations: (public.ft1) INNER JOIN (public.ft2) + Remote SQL: SELECT r4."C 1", r4.c2, r4.c3, r4.c4, r4.c5, r4.c6, r4.c7, r4.c8, CASE WHEN (r4.*)::text IS NOT NULL THEN ROW(r4."C 1", r4.c2, r4.c3, r4.c4, r4.c5, r4.c6, r4.c7, r4.c8) END, CASE WHEN (r5.*)::text IS NOT NULL THEN ROW(r5."C 1", r5.c2, r5.c3, r5.c4, r5.c5, r5.c6, r5.c7, r5.c8) END, r5.c3 FROM ("S 1"."T 1" r4 INNER JOIN "S 1"."T 1" r5 ON (((r4."C 1" = r5."C 1")) AND ((r4."C 1" < 100)))) ORDER BY r4."C 1" ASC NULLS LAST + -> Result + Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, ft2.c3 + -> Sort + Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, (COALESCE((ft1.c3 || ft2.c3), 'foobar'::text)), ft2.c3 + Sort Key: ft1.c1 + -> Hash Join + Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, COALESCE((ft1.c3 || ft2.c3), 'foobar'::text), ft2.c3 + Hash Cond: (ft1.c1 = ft2.c1) + -> Foreign Scan on public.ft1 + Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.* + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) + -> Hash + Output: ft2.*, ft2.c1, ft2.c3 + -> Foreign Scan on public.ft2 + Output: ft2.*, ft2.c1, ft2.c3 + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" +(29 rows) + +ALTER SERVER loopback OPTIONS (DROP extensions); +ALTER SERVER loopback OPTIONS (ADD fdw_startup_cost '10000.0'); +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.* FROM ft1 INNER JOIN ft2 ON (ft1.c1 = ft2.c1 AND ft1.c1 < 100 AND ft1.c1 = postgres_fdw_abs(ft2.c2))) ss ON (local_tbl.c3 = ss.c3) ORDER BY local_tbl.c1 FOR UPDATE OF local_tbl; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + LockRows + Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, local_tbl.ctid, ft1.*, ft2.* + -> Nested Loop Left Join + Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, local_tbl.ctid, ft1.*, ft2.* + Join Filter: (local_tbl.c3 = ft1.c3) + -> Index Scan using local_tbl_pkey on public.local_tbl + Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid + -> Materialize + Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.* + -> Foreign Scan + Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.* + Filter: (ft1.c1 = postgres_fdw_abs(ft2.c2)) + Relations: (public.ft1) INNER JOIN (public.ft2) + Remote SQL: SELECT r4."C 1", r4.c2, r4.c3, r4.c4, r4.c5, r4.c6, r4.c7, r4.c8, CASE WHEN (r4.*)::text IS NOT NULL THEN ROW(r4."C 1", r4.c2, r4.c3, r4.c4, r4.c5, r4.c6, r4.c7, r4.c8) END, CASE WHEN (r5.*)::text IS NOT NULL THEN ROW(r5."C 1", r5.c2, r5.c3, r5.c4, r5.c5, r5.c6, r5.c7, r5.c8) END, r5.c2 FROM ("S 1"."T 1" r4 INNER JOIN "S 1"."T 1" r5 ON (((r4."C 1" = r5."C 1")) AND ((r4."C 1" < 100)))) ORDER BY r4.c3 ASC NULLS LAST + -> Sort + Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, ft2.c2 + Sort Key: ft1.c3 + -> Merge Join + Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, ft2.c2 + Merge Cond: ((ft1.c1 = (postgres_fdw_abs(ft2.c2))) AND (ft1.c1 = ft2.c1)) + -> Sort + Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.* + Sort Key: ft1.c1 + -> Foreign Scan on public.ft1 + Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.* + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) + -> Sort + Output: ft2.*, ft2.c1, ft2.c2, (postgres_fdw_abs(ft2.c2)) + Sort Key: (postgres_fdw_abs(ft2.c2)), ft2.c1 + -> Foreign Scan on public.ft2 + Output: ft2.*, ft2.c1, ft2.c2, postgres_fdw_abs(ft2.c2) + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST +(32 rows) + +ALTER SERVER loopback OPTIONS (DROP fdw_startup_cost); +ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw'); DROP TABLE local_tbl; -- check join pushdown in situations where multiple userids are involved CREATE ROLE regress_view_owner SUPERUSER; diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index b5adcb86c09..03ea032cb4f 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -5813,6 +5813,55 @@ add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel, useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel); + /* + * Before creating sorted paths, arrange for the passed-in EPQ path, if + * any, to return columns needed by the parent ForeignScan node so that + * they will propagate up through Sort nodes injected below, if necessary. + */ + if (epq_path != NULL && useful_pathkeys_list != NIL) + { + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) rel->fdw_private; + PathTarget *target = copy_pathtarget(epq_path->pathtarget); + + /* Include columns required for evaluating PHVs in the tlist. */ + add_new_columns_to_pathtarget(target, + pull_var_clause((Node *) target->exprs, + PVC_RECURSE_PLACEHOLDERS)); + + /* Include columns required for evaluating the local conditions. */ + foreach(lc, fpinfo->local_conds) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + add_new_columns_to_pathtarget(target, + pull_var_clause((Node *) rinfo->clause, + PVC_RECURSE_PLACEHOLDERS)); + } + + /* + * If we have added any new columns, adjust the tlist of the EPQ path. + * + * Note: the plan created using this path will only be used to execute + * EPQ checks, where accuracy of the plan cost and width estimates + * would not be important, so we do not do set_pathtarget_cost_width() + * for the new pathtarget here. See also postgresGetForeignPlan(). + */ + if (list_length(target->exprs) > list_length(epq_path->pathtarget->exprs)) + { + /* The EPQ path is a join path, so it is projection-capable. */ + Assert(is_projection_capable_path(epq_path)); + + /* + * Use create_projection_path() here, so as to avoid modifying it + * in place. + */ + epq_path = (Path *) create_projection_path(root, + rel, + epq_path, + target); + } + } + /* Create one path for each set of pathkeys we found above. */ foreach(lc, useful_pathkeys_list) { diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 3663ca3bdf4..f8c813d2175 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -628,6 +628,19 @@ SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = f AND ft1.c2 = ft5.c1 AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft2.c1 < 100 FOR UPDATE; RESET enable_nestloop; RESET enable_hashjoin; + +-- test that add_paths_with_pathkeys_for_rel() arranges for the epq_path to +-- return columns needed by the parent ForeignScan node +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.*, COALESCE(ft1.c3 || ft2.c3, 'foobar') FROM ft1 INNER JOIN ft2 ON (ft1.c1 = ft2.c1 AND ft1.c1 < 100)) ss ON (local_tbl.c1 = ss.c1) ORDER BY local_tbl.c1 FOR UPDATE OF local_tbl; + +ALTER SERVER loopback OPTIONS (DROP extensions); +ALTER SERVER loopback OPTIONS (ADD fdw_startup_cost '10000.0'); +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.* FROM ft1 INNER JOIN ft2 ON (ft1.c1 = ft2.c1 AND ft1.c1 < 100 AND ft1.c1 = postgres_fdw_abs(ft2.c2))) ss ON (local_tbl.c3 = ss.c3) ORDER BY local_tbl.c1 FOR UPDATE OF local_tbl; +ALTER SERVER loopback OPTIONS (DROP fdw_startup_cost); +ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw'); + DROP TABLE local_tbl; -- check join pushdown in situations where multiple userids are involved From ae5a90080ddde8f27dbcf8a76f919aa9e2f0ba95 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 15 Sep 2022 17:17:53 -0400 Subject: [PATCH 58/95] Detect format-string mistakes in the libpq_pipeline test module. I happened to notice that libpq_pipeline's private implementation of pg_fatal lacked any pg_attribute_printf decoration. Indeed, adding that turned up a mistake! We'd likely never have noticed because the error exits in this code are unlikely to get hit, but still, it's a bug. We're so used to having the compiler check this stuff for us that a printf-like function without pg_attribute_printf is a land mine. I wonder if there is a way to detect such omissions. Back-patch to v14 where this code came in. --- src/test/modules/libpq_pipeline/libpq_pipeline.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/modules/libpq_pipeline/libpq_pipeline.c b/src/test/modules/libpq_pipeline/libpq_pipeline.c index dfab924965d..9d07681b297 100644 --- a/src/test/modules/libpq_pipeline/libpq_pipeline.c +++ b/src/test/modules/libpq_pipeline/libpq_pipeline.c @@ -28,6 +28,8 @@ static void exit_nicely(PGconn *conn); +static void pg_attribute_noreturn() pg_fatal_impl(int line, const char *fmt,...) + pg_attribute_printf(2, 3); static bool process_result(PGconn *conn, PGresult *res, int results, int numsent); @@ -916,7 +918,7 @@ test_prepared(PGconn *conn) if (PQresultStatus(res) != PGRES_COMMAND_OK) pg_fatal("expected COMMAND_OK, got %s", PQresStatus(PQresultStatus(res))); if (PQnfields(res) != lengthof(expected_oids)) - pg_fatal("expected %d columns, got %d", + pg_fatal("expected %zd columns, got %d", lengthof(expected_oids), PQnfields(res)); for (int i = 0; i < PQnfields(res); i++) { From 15bd0479cb51a2f1dfc2369b67c006f171038e46 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 16 Sep 2022 13:23:01 -0400 Subject: [PATCH 59/95] Improve plpgsql's ability to handle arguments declared as RECORD. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Treat arguments declared as RECORD as if that were a polymorphic type (which it is, sort of), in that we substitute the actual argument type while forming the function cache lookup key. This allows the specific composite type to be known in some cases where it was not before, at the cost of making a separate function cache entry for each named composite type that's passed to the function during a session. The particular symptom discussed in bug #17610 could be solved in other more-efficient ways, but only at the cost of considerable development work, and there are other cases where we'd still fail without this. Per bug #17610 from Martin Jurča. Back-patch to v11 where we first allowed plpgsql functions to be declared as taking type RECORD. Discussion: https://postgr.es/m/17610-fb1eef75bf6c2364@postgresql.org --- .../plpgsql/src/expected/plpgsql_record.out | 35 +++++++++++++++++++ src/pl/plpgsql/src/pl_comp.c | 34 +++++++++++++++--- src/pl/plpgsql/src/sql/plpgsql_record.sql | 11 ++++++ 3 files changed, 76 insertions(+), 4 deletions(-) diff --git a/src/pl/plpgsql/src/expected/plpgsql_record.out b/src/pl/plpgsql/src/expected/plpgsql_record.out index 84817589883..f390328671a 100644 --- a/src/pl/plpgsql/src/expected/plpgsql_record.out +++ b/src/pl/plpgsql/src/expected/plpgsql_record.out @@ -12,6 +12,7 @@ -- s/NOTICE: old.ctid = \(0,[12]\)/ NOTICE: old.ctid = \(0,XXX\)/ -- end_matchsubs create type two_int4s as (f1 int4, f2 int4); +create type more_int4s as (f0 text, f1 int4, f2 int4); create type two_int8s as (q1 int8, q2 int8); create type nested_int8s as (c1 two_int8s, c2 two_int8s); -- base-case return of a composite type @@ -436,6 +437,18 @@ select getf1(row(1,2)); 1 (1 row) +select getf1(row(1,2)::two_int4s); + getf1 +------- + 1 +(1 row) + +select getf1(row('foo',123,456)::more_int4s); + getf1 +------- + 123 +(1 row) + -- the context stack is different when debug_discard_caches -- is set, so suppress context output \set SHOW_CONTEXT never @@ -448,6 +461,28 @@ select getf1(row(1,2)); 1 (1 row) +-- this seemingly-equivalent case behaves a bit differently, +-- because the core parser's handling of $N symbols is simplistic +create function getf2(record) returns int language plpgsql as +$$ begin return $1.f2; end $$; +select getf2(row(1,2)); -- ideally would work, but does not +ERROR: could not identify column "f2" in record data type +LINE 1: $1.f2 + ^ +QUERY: $1.f2 +CONTEXT: PL/pgSQL function getf2(record) line 1 at RETURN +select getf2(row(1,2)::two_int4s); + getf2 +------- + 2 +(1 row) + +select getf2(row('foo',123,456)::more_int4s); + getf2 +------- + 456 +(1 row) + -- check behavior when assignment to FOR-loop variable requires coercion do $$ declare r two_int8s; diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index 76ea85d8be6..9175358bf52 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -2524,9 +2524,15 @@ compute_function_hashkey(FunctionCallInfo fcinfo, /* * This is the same as the standard resolve_polymorphic_argtypes() function, - * but with a special case for validation: assume that polymorphic arguments - * are integer, integer-array or integer-range. Also, we go ahead and report - * the error if we can't resolve the types. + * except that: + * 1. We go ahead and report the error if we can't resolve the types. + * 2. We treat RECORD-type input arguments (not output arguments) as if + * they were polymorphic, replacing their types with the actual input + * types if we can determine those. This allows us to create a separate + * function cache entry for each named composite type passed to such an + * argument. + * 3. In validation mode, we have no inputs to look at, so assume that + * polymorphic arguments are integer, integer-array or integer-range. */ static void plpgsql_resolve_polymorphic_argtypes(int numargs, @@ -2538,6 +2544,8 @@ plpgsql_resolve_polymorphic_argtypes(int numargs, if (!forValidator) { + int inargno; + /* normal case, pass to standard routine */ if (!resolve_polymorphic_argtypes(numargs, argtypes, argmodes, call_expr)) @@ -2546,10 +2554,28 @@ plpgsql_resolve_polymorphic_argtypes(int numargs, errmsg("could not determine actual argument " "type for polymorphic function \"%s\"", proname))); + /* also, treat RECORD inputs (but not outputs) as polymorphic */ + inargno = 0; + for (i = 0; i < numargs; i++) + { + char argmode = argmodes ? argmodes[i] : PROARGMODE_IN; + + if (argmode == PROARGMODE_OUT || argmode == PROARGMODE_TABLE) + continue; + if (argtypes[i] == RECORDOID || argtypes[i] == RECORDARRAYOID) + { + Oid resolvedtype = get_call_expr_argtype(call_expr, + inargno); + + if (OidIsValid(resolvedtype)) + argtypes[i] = resolvedtype; + } + inargno++; + } } else { - /* special validation case */ + /* special validation case (no need to do anything for RECORD) */ for (i = 0; i < numargs; i++) { switch (argtypes[i]) diff --git a/src/pl/plpgsql/src/sql/plpgsql_record.sql b/src/pl/plpgsql/src/sql/plpgsql_record.sql index fa9f06eab11..d6242efbb81 100644 --- a/src/pl/plpgsql/src/sql/plpgsql_record.sql +++ b/src/pl/plpgsql/src/sql/plpgsql_record.sql @@ -12,6 +12,7 @@ -- s/NOTICE: old.ctid = \(0,[12]\)/ NOTICE: old.ctid = \(0,XXX\)/ -- end_matchsubs create type two_int4s as (f1 int4, f2 int4); +create type more_int4s as (f0 text, f1 int4, f2 int4); create type two_int8s as (q1 int8, q2 int8); create type nested_int8s as (c1 two_int8s, c2 two_int8s); @@ -266,6 +267,8 @@ create function getf1(x record) returns int language plpgsql as $$ begin return x.f1; end $$; select getf1(1); select getf1(row(1,2)); +select getf1(row(1,2)::two_int4s); +select getf1(row('foo',123,456)::more_int4s); -- the context stack is different when debug_discard_caches -- is set, so suppress context output \set SHOW_CONTEXT never @@ -273,6 +276,14 @@ select getf1(row(1,2)::two_int8s); \set SHOW_CONTEXT errors select getf1(row(1,2)); +-- this seemingly-equivalent case behaves a bit differently, +-- because the core parser's handling of $N symbols is simplistic +create function getf2(record) returns int language plpgsql as +$$ begin return $1.f2; end $$; +select getf2(row(1,2)); -- ideally would work, but does not +select getf2(row(1,2)::two_int4s); +select getf2(row('foo',123,456)::more_int4s); + -- check behavior when assignment to FOR-loop variable requires coercion do $$ declare r two_int8s; From 80bb3377121c04e0cb1ad0256c42bc711dd3ab95 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sat, 17 Sep 2022 09:21:59 -0700 Subject: [PATCH 60/95] Include c.h instead of postgres.h in src/port/*p{read,write}*.c Frontend code shouldn't include postgres.h. Some files in src/port/ need to include postgres.h/postgres_fe.h, but these files don't. Discussion: https://postgr.es/m/20220915022626.5xx3ccgkzpkqw5mq@awork3.anarazel.de Backpatch: 12-, where 3fd2a7932ef introduced (some) of these files --- src/port/pread.c | 2 +- src/port/preadv.c | 2 +- src/port/pwrite.c | 2 +- src/port/pwritev.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/port/pread.c b/src/port/pread.c index 486f07a7dff..a35d3f4b07c 100644 --- a/src/port/pread.c +++ b/src/port/pread.c @@ -15,7 +15,7 @@ */ -#include "postgres.h" +#include "c.h" #ifdef WIN32 #include diff --git a/src/port/preadv.c b/src/port/preadv.c index eb153ca5028..d83dab6718f 100644 --- a/src/port/preadv.c +++ b/src/port/preadv.c @@ -15,7 +15,7 @@ */ -#include "postgres.h" +#include "c.h" #ifdef WIN32 #include diff --git a/src/port/pwrite.c b/src/port/pwrite.c index 282b27115e5..e3a132aa7aa 100644 --- a/src/port/pwrite.c +++ b/src/port/pwrite.c @@ -15,7 +15,7 @@ */ -#include "postgres.h" +#include "c.h" #ifdef WIN32 #include diff --git a/src/port/pwritev.c b/src/port/pwritev.c index 2e8ef7e3785..082bed9abfc 100644 --- a/src/port/pwritev.c +++ b/src/port/pwritev.c @@ -15,7 +15,7 @@ */ -#include "postgres.h" +#include "c.h" #ifdef WIN32 #include From 36e38a48a59a4ee327187cd05147b1ee398dc5a5 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Sat, 17 Sep 2022 16:54:14 -0700 Subject: [PATCH 61/95] Make check_usermap() parameter names consistent. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function has a bool argument named "case_insensitive", but that was spelled "case_sensitive" in the declaration. Make them consistent now to avoid confusion in the future. Author: Peter Geoghegan Reviewed-By: Michael Paquiër Discussion: https://postgr.es/m/CAH2-WznJt9CMM9KJTMjJh_zbL5hD9oX44qdJ4aqZtjFi-zA3Tg@mail.gmail.com Backpatch: 10- --- src/include/libpq/hba.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/libpq/hba.h b/src/include/libpq/hba.h index 793e3a5d85b..705e2196365 100644 --- a/src/include/libpq/hba.h +++ b/src/include/libpq/hba.h @@ -141,7 +141,7 @@ extern const char *hba_authname(UserAuth auth_method); extern void hba_getauthmethod(hbaPort *port); extern int check_usermap(const char *usermap_name, const char *pg_role, const char *auth_user, - bool case_sensitive); + bool case_insensitive); extern bool check_same_host_or_net(SockAddr *raddr, IPCompareMethod method); extern bool pg_isblank(const char c); From c6128ebab9ab3f552c85b2b7760524add11b5aa8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 19 Sep 2022 12:16:02 -0400 Subject: [PATCH 62/95] Future-proof the recursion inside ExecShutdownNode(). The API contract for planstate_tree_walker() callbacks is that they take a PlanState pointer and a context pointer. Somebody figured they could save a couple lines of code by ignoring that, and passing ExecShutdownNode itself as the walker even though it has but one argument. Somewhat remarkably, we've gotten away with that so far. However, it seems clear that the upcoming C2x standard means to forbid such cases, and compilers that actively break such code likely won't be far behind. So spend the extra few lines of code to do it honestly with a separate walker function. In HEAD, we might as well go further and remove ExecShutdownNode's useless return value. I left that as-is in back branches though, to forestall complaints about ABI breakage. Back-patch, with the thought that this might become of practical importance before our stable branches are all out of service. It doesn't seem to be fixing any live bug on any currently known platform, however. Discussion: https://postgr.es/m/208054.1663534665@sss.pgh.pa.us --- src/backend/executor/execProcnode.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 1bae66ef334..92328429d5a 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -170,6 +170,7 @@ static TupleTableSlot *ExecProcNodeFirst(PlanState *node); static TupleTableSlot *ExecProcNodeInstr(PlanState *node); #endif static TupleTableSlot *ExecProcNodeGPDB(PlanState *node); +static bool ExecShutdownNode_walker(PlanState *node, void *context); /* ------------------------------------------------------------------------ @@ -1348,6 +1349,12 @@ planstate_walk_kids(PlanState *planstate, */ bool ExecShutdownNode(PlanState *node) +{ + return ExecShutdownNode_walker(node, NULL); +} + +static bool +ExecShutdownNode_walker(PlanState *node, void *context) { if (node == NULL) return false; @@ -1367,7 +1374,7 @@ ExecShutdownNode(PlanState *node) if (node->instrument && node->instrument->running) InstrStartNode(node->instrument); - planstate_tree_walker(node, ExecShutdownNode, NULL); + planstate_tree_walker(node, ExecShutdownNode_walker, context); switch (nodeTag(node)) { From f512e2246c54673c7cb6a1f11a6d18b7ddc78a4f Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 20 Sep 2022 18:13:46 +0900 Subject: [PATCH 63/95] Fix incorrect variable types for origin IDs in decode.c These variables used XLogRecPtr instead of RepOriginId. Author: Masahiko Sawada Discussion: https://postgr.es/m/CAD21AoBm-vNyBSXGp4bmJGvhr=S-EGc5q1dtV70cFTcJvLhC=Q@mail.gmail.com Backpatch-through: 14 --- src/backend/replication/logical/decode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index c198baacfd4..7a6323c3989 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -736,7 +736,7 @@ DecodePrepare(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, SnapBuild *builder = ctx->snapshot_builder; XLogRecPtr origin_lsn = parsed->origin_lsn; TimestampTz prepare_time = parsed->xact_time; - XLogRecPtr origin_id = XLogRecGetOrigin(buf->record); + RepOriginId origin_id = XLogRecGetOrigin(buf->record); int i; TransactionId xid = parsed->twophase_xid; @@ -812,7 +812,7 @@ DecodeAbort(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, int i; XLogRecPtr origin_lsn = InvalidXLogRecPtr; TimestampTz abort_time = parsed->xact_time; - XLogRecPtr origin_id = XLogRecGetOrigin(buf->record); + RepOriginId origin_id = XLogRecGetOrigin(buf->record); bool skip_xact; if (parsed->xinfo & XACT_XINFO_HAS_ORIGIN) From 3387c2295a05b53d53a70c45fa1c85654f76410d Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 20 Sep 2022 19:28:47 +0900 Subject: [PATCH 64/95] doc: Fix parameter name for pg_create_logical_replication_slot() The parameter controlling if two-phase transactions can be decoded was named "two_phase" in the documentation while its procedure defines "twophase". Author: Florin Irion Discussion: https://postgr.es/m/5eeabd10-1aff-ea61-f92d-9fa0d9a7e207@gmail.com Backpatch-through: 14 --- doc/src/sgml/func.sgml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 99647f56fe9..9d0714bf935 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -25910,7 +25910,7 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); pg_create_logical_replication_slot - pg_create_logical_replication_slot ( slot_name name, plugin name , temporary boolean, two_phase boolean ) + pg_create_logical_replication_slot ( slot_name name, plugin name , temporary boolean, twophase boolean ) record ( slot_name name, lsn pg_lsn ) @@ -25923,7 +25923,7 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); the slot should not be permanently stored to disk and is only meant for use by the current session. Temporary slots are also released upon any error. The optional fourth parameter, - two_phase, when set to true, specifies + twophase, when set to true, specifies that the decoding of prepared transactions is enabled for this slot. A call to this function has the same effect as the replication protocol command CREATE_REPLICATION_SLOT ... LOGICAL. From 088c3a6cca45e7a69f6b9c83547d54c65bac3a11 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 20 Sep 2022 12:04:37 -0400 Subject: [PATCH 65/95] Suppress variable-set-but-not-used warnings from clang 15. clang 15+ will issue a set-but-not-used warning when the only use of a variable is in autoincrements (e.g., "foo++;"). That's perfectly sensible, but it detects a few more cases that we'd not noticed before. Silence the warnings with our usual methods, such as PG_USED_FOR_ASSERTS_ONLY, or in one case by actually removing a useless variable. One thing that we can't nicely get rid of is that with %pure-parser, Bison emits "yynerrs" as a local variable that falls foul of this warning. To silence those, I inserted "(void) yynerrs;" in the top-level productions of affected grammars. Per recently-established project policy, this is a candidate for back-patching into out-of-support branches: it suppresses annoying compiler warnings but changes no behavior. Hence, back-patch to 9.5, which is as far as these patches go without issues. (A preliminary check shows that the prior branches need some other set-but-not-used cleanups too, so I'll leave them for another day.) Discussion: https://postgr.es/m/514615.1663615243@sss.pgh.pa.us --- src/backend/access/gist/gistxlog.c | 2 +- src/backend/access/transam/xlog.c | 2 +- src/backend/parser/gram.y | 1 + src/backend/utils/adt/array_typanalyze.c | 4 +--- src/backend/utils/adt/jsonpath_gram.y | 1 + src/bin/pgbench/exprparse.y | 5 ++++- 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index 6464cb9281b..76723fa805e 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -81,7 +81,7 @@ gistRedoPageUpdateRecord(XLogReaderState *record) char *begin; char *data; Size datalen; - int ninserted = 0; + int ninserted PG_USED_FOR_ASSERTS_ONLY = 0; data = begin = XLogRecGetBlockData(record, 0, &datalen); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f0229322dd6..1ec6b82938a 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2200,7 +2200,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic) XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr; XLogRecPtr NewPageBeginPtr; XLogPageHeader NewPage; - int npages = 0; + int npages pg_attribute_unused() = 0; LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index b982ba3a29a..a36a456d318 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -1343,6 +1343,7 @@ parse_toplevel: stmtmulti { pg_yyget_extra(yyscanner)->parsetree = $1; + (void) yynerrs; /* suppress compiler warning */ } | MODE_TYPE_NAME Typename { diff --git a/src/backend/utils/adt/array_typanalyze.c b/src/backend/utils/adt/array_typanalyze.c index e873d228592..8993d23e18b 100644 --- a/src/backend/utils/adt/array_typanalyze.c +++ b/src/backend/utils/adt/array_typanalyze.c @@ -218,7 +218,6 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, { ArrayAnalyzeExtraData *extra_data; int num_mcelem; - int null_cnt = 0; int null_elem_cnt = 0; int analyzed_rows = 0; @@ -320,8 +319,7 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, value = fetchfunc(stats, array_no, &isnull); if (isnull) { - /* array is null, just count that */ - null_cnt++; + /* ignore arrays that are null overall */ continue; } diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y index b5cf865f3c1..3377cc81cbf 100644 --- a/src/backend/utils/adt/jsonpath_gram.y +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -130,6 +130,7 @@ result: *result = palloc(sizeof(JsonPathParseResult)); (*result)->expr = $2; (*result)->lax = $1; + (void) yynerrs; } | /* EMPTY */ { *result = NULL; } ; diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y index 56f75ccd253..b3dd0488b2a 100644 --- a/src/bin/pgbench/exprparse.y +++ b/src/bin/pgbench/exprparse.y @@ -80,7 +80,10 @@ static PgBenchExpr *make_case(yyscan_t yyscanner, PgBenchExprList *when_then_lis %% -result: expr { expr_parse_result = $1; } +result: expr { + expr_parse_result = $1; + (void) yynerrs; /* suppress compiler warning */ + } elist: { $$ = NULL; } | expr { $$ = make_elist($1, NULL); } From b563f026dbcbe8fb6b9603e08b6fad0fe5cdb91e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 21 Sep 2022 13:52:38 -0400 Subject: [PATCH 66/95] Suppress more variable-set-but-not-used warnings from clang 15. Mop up assorted set-but-not-used warnings in the back branches. This includes back-patching relevant fixes from commit 152c9f7b8 the rest of the way, but there are also several cases that did not appear in HEAD. Some of those we'd fixed in a retail way but not back-patched, and others I think just got rewritten out of existence during nearby refactoring. While here, also back-patch b1980f6d0 (PL/Tcl: Fix compiler warnings with Tcl 8.6) into 9.2, so that that branch compiles warning-free with modern Tcl. Per project policy, this is a candidate for back-patching into out-of-support branches: it suppresses annoying compiler warnings but changes no behavior. Hence, back-patch all the way to 9.2. Discussion: https://postgr.es/m/514615.1663615243@sss.pgh.pa.us --- src/backend/optimizer/util/var.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 84248d31d30..997acdc323b 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -935,16 +935,13 @@ flatten_join_alias_vars_mutator(Node *node, RowExpr *rowexpr; List *fields = NIL; List *colnames = NIL; - AttrNumber attnum; ListCell *lv; ListCell *ln; - attnum = 0; Assert(list_length(rte->joinaliasvars) == list_length(rte->eref->colnames)); forboth(lv, rte->joinaliasvars, ln, rte->eref->colnames) { newvar = (Node *) lfirst(lv); - attnum++; /* Ignore dropped columns */ if (newvar == NULL) continue; From cdbfa1d080e917b6faf819665b2955ef61bb23a7 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Thu, 22 Sep 2022 12:54:26 +0900 Subject: [PATCH 67/95] docs: Fix snapshot name in SET TRANSACTION docs. Commit 6c2003f8a1 changed the snapshot names mentioned in SET TRANSACTION docs, however, there was one place that the commit missed updating the name. Back-patch to all supported versions. Author: Japin Li Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/MEYP282MB1669BD4280044501165F8B07B64F9@MEYP282MB1669.AUSP282.PROD.OUTLOOK.COM --- doc/src/sgml/ref/set_transaction.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/ref/set_transaction.sgml b/doc/src/sgml/ref/set_transaction.sgml index e062e2461e1..d394e622f8a 100644 --- a/doc/src/sgml/ref/set_transaction.sgml +++ b/doc/src/sgml/ref/set_transaction.sgml @@ -164,7 +164,7 @@ SET SESSION CHARACTERISTICS AS TRANSACTION transa snapshot identifier, which must be given to SET TRANSACTION SNAPSHOT to specify which snapshot is to be imported. The identifier must be written as a string literal in this command, for example - '000003A1-1'. + '00000003-0000001B-1'. SET TRANSACTION SNAPSHOT can only be executed at the start of a transaction, before the first query or data-modification statement (SELECT, From 5be2501be345e4821fa471c945681aebcadedd98 Mon Sep 17 00:00:00 2001 From: Etsuro Fujita Date: Thu, 22 Sep 2022 15:55:03 +0900 Subject: [PATCH 68/95] Fix thinko in comment. This comment has been wrong since its introduction in commit 0d5f05cde; backpatch to v12 where that came in. Discussion: https://postgr.es/m/CAPmGK14VGf-xQjGQN4o1QyAbXAaxugU5%3DqfcmTDh1iufUDnV_w%40mail.gmail.com --- src/backend/commands/copyfrom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 9cdee90fbe5..1fcd9643db2 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1805,8 +1805,8 @@ CopyFrom(CopyFromState cstate) * For partitioned tables we can't support multi-inserts when there * are any statement level insert triggers. It might be possible to * allow partitioned tables with such triggers in the future, but for - * now, CopyMultiInsertInfoFlush expects that any before row insert - * and statement level insert triggers are on the same relation. + * now, CopyMultiInsertInfoFlush expects that any after row insert and + * statement level insert triggers are on the same relation. */ insertMethod = CIM_SINGLE; } From 356361d1a6df977b7b7f445210516542a92b7258 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Thu, 22 Sep 2022 10:58:49 -0700 Subject: [PATCH 69/95] Fix race condition where heap_delete() fails to pin VM page. Similar to 5f12bc94dc, the code must re-check PageIsAllVisible() after buffer lock is re-acquired. Backpatching to the same version, 12. Discussion: https://postgr.es/m/CAEP4nAw9jYQDKd_5Y+-s2E4YiUJq1vqiikFjYGpLShtp-K3gag@mail.gmail.com Reported-by: Robins Tharakan Reviewed-by: Robins Tharakan Backpatch-through: 12 --- src/backend/access/heap/heapam.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0003425b79f..b398e307c97 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2884,6 +2884,15 @@ heap_delete(Relation relation, ItemPointer tid, LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid)); + Assert(ItemIdIsNormal(lp)); + + tp.t_tableOid = RelationGetRelid(relation); + tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); + tp.t_len = ItemIdGetLength(lp); + tp.t_self = *tid; + +l1: /* * If we didn't pin the visibility map page and the page has become all * visible while we were busy locking the buffer, we'll have to unlock and @@ -2897,15 +2906,6 @@ heap_delete(Relation relation, ItemPointer tid, LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); } - lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid)); - Assert(ItemIdIsNormal(lp)); - - tp.t_tableOid = RelationGetRelid(relation); - tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); - tp.t_len = ItemIdGetLength(lp); - tp.t_self = *tid; - -l1: result = HeapTupleSatisfiesUpdate(relation, &tp, cid, buffer); if (result == TM_Invisible) @@ -2964,8 +2964,12 @@ heap_delete(Relation relation, ItemPointer tid, * If xwait had just locked the tuple then some other xact * could update this tuple before we get to this point. Check * for xmax change, and start over if so. + * + * We also must start over if we didn't pin the VM page, and + * the page has become all visible. */ - if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) || + if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) || + xmax_infomask_changed(tp.t_data->t_infomask, infomask) || !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data), xwait)) goto l1; @@ -2997,8 +3001,12 @@ heap_delete(Relation relation, ItemPointer tid, * xwait is done, but if xwait had just locked the tuple then some * other xact could update this tuple before we get to this point. * Check for xmax change, and start over if so. + * + * We also must start over if we didn't pin the VM page, and the + * page has become all visible. */ - if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) || + if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) || + xmax_infomask_changed(tp.t_data->t_infomask, infomask) || !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data), xwait)) goto l1; From bd97eafbe9d8d9f44c732b97d7881801e7648607 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Fri, 23 Sep 2022 18:11:48 +0200 Subject: [PATCH 70/95] Stop using PQsendQuery in libpq_pipeline The "emulation" I wrote for PQsendQuery in pipeline mode to use extended query protocol, in commit acb7e4eb6b1c, is problematic. Due to numerous bugs we'll soon remove it. As a first step and for all branches back to 14, stop using PQsendQuery in libpq_pipeline. Also remove a few test lines that will no longer be relevant. Backpatch to 14. Discussion: https://postgr.es/m/CA+mi_8ZGSQNmW6-mk_iSR4JZB_LJ4ww3suOF+1vGNs3MrLsv4g@mail.gmail.com --- .../modules/libpq_pipeline/libpq_pipeline.c | 138 +----------------- .../traces/pipeline_abort.trace | 6 +- .../libpq_pipeline/traces/pipeline_idle.trace | 67 +-------- 3 files changed, 13 insertions(+), 198 deletions(-) diff --git a/src/test/modules/libpq_pipeline/libpq_pipeline.c b/src/test/modules/libpq_pipeline/libpq_pipeline.c index 9d07681b297..ff43f4b7d0b 100644 --- a/src/test/modules/libpq_pipeline/libpq_pipeline.c +++ b/src/test/modules/libpq_pipeline/libpq_pipeline.c @@ -499,7 +499,7 @@ test_pipeline_abort(PGconn *conn) PQerrorMessage(conn)); /* Try to send two queries in one command */ - if (PQsendQuery(conn, "SELECT 1; SELECT 2") != 1) + if (PQsendQueryParams(conn, "SELECT 1; SELECT 2", 0, NULL, NULL, NULL, NULL, 0) != 1) pg_fatal("failed to send query: %s", PQerrorMessage(conn)); if (PQpipelineSync(conn) != 1) pg_fatal("pipeline sync failed: %s", PQerrorMessage(conn)); @@ -531,7 +531,8 @@ test_pipeline_abort(PGconn *conn) fprintf(stderr, "ok\n"); /* Test single-row mode with an error partways */ - if (PQsendQuery(conn, "SELECT 1.0/g FROM generate_series(3, -1, -1) g") != 1) + if (PQsendQueryParams(conn, "SELECT 1.0/g FROM generate_series(3, -1, -1) g", + 0, NULL, NULL, NULL, NULL, 0) != 1) pg_fatal("failed to send query: %s", PQerrorMessage(conn)); if (PQpipelineSync(conn) != 1) pg_fatal("pipeline sync failed: %s", PQerrorMessage(conn)); @@ -991,133 +992,10 @@ test_pipeline_idle(PGconn *conn) PQsetNoticeProcessor(conn, notice_processor, &n_notices); - /* - * Cause a Close message to be sent to the server, and watch libpq's - * reaction to the resulting CloseComplete. libpq must not get in IDLE - * state until that has been received. - */ - if (PQenterPipelineMode(conn) != 1) - pg_fatal("failed to enter pipeline mode: %s", PQerrorMessage(conn)); - - if (PQsendQuery(conn, "SELECT 1") != 1) - pg_fatal("failed to send query: %s", PQerrorMessage(conn)); - PQsendFlushRequest(conn); - res = PQgetResult(conn); - if (res == NULL) - pg_fatal("PQgetResult returned null when there's a pipeline item: %s", - PQerrorMessage(conn)); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pg_fatal("Unexpected result code %s from first pipeline item", - PQresStatus(PQresultStatus(res))); - PQclear(res); - - res = PQgetResult(conn); - if (res != NULL) - pg_fatal("expected NULL result"); - - if (PQpipelineSync(conn) != 1) - pg_fatal("pipeline sync failed: %s", PQerrorMessage(conn)); - res = PQgetResult(conn); - if (res == NULL) - pg_fatal("PQgetResult returned null when there's a pipeline item: %s", - PQerrorMessage(conn)); - if (PQresultStatus(res) != PGRES_PIPELINE_SYNC) - pg_fatal("Unexpected result code %s instead of PGRES_PIPELINE_SYNC, error: %s", - PQresStatus(PQresultStatus(res)), PQerrorMessage(conn)); - PQclear(res); - res = NULL; - - if (PQexitPipelineMode(conn) != 1) - pg_fatal("attempt to exit pipeline mode failed when it should've succeeded: %s", - PQerrorMessage(conn)); - - /* - * Must not have got any notices here; note bug as described in - * https://postgr.es/m/CA+mi_8bvD0_CW3sumgwPvWdNzXY32itoG_16tDYRu_1S2gV2iw@mail.gmail.com - */ - if (n_notices > 0) - pg_fatal("got %d notice(s)", n_notices); - fprintf(stderr, "ok - 1\n"); - - /* - * Verify that we can send a query using simple query protocol after one - * in pipeline mode. - */ - if (PQenterPipelineMode(conn) != 1) - pg_fatal("failed to enter pipeline mode: %s", PQerrorMessage(conn)); - if (PQsendQuery(conn, "SELECT 1") != 1) - pg_fatal("failed to send query: %s", PQerrorMessage(conn)); - PQsendFlushRequest(conn); - res = PQgetResult(conn); - if (res == NULL) - pg_fatal("PQgetResult returned null when there's a pipeline item: %s", - PQerrorMessage(conn)); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pg_fatal("unexpected result code %s from first pipeline item", - PQresStatus(PQresultStatus(res))); - res = PQgetResult(conn); - if (res != NULL) - pg_fatal("got unexpected non-null result"); - /* We can exit pipeline mode now */ - if (PQexitPipelineMode(conn) != 1) - pg_fatal("attempt to exit pipeline mode failed when it should've succeeded: %s", - PQerrorMessage(conn)); - res = PQexec(conn, "SELECT 2"); - if (n_notices > 0) - pg_fatal("got %d notice(s)", n_notices); - if (res == NULL) - pg_fatal("PQexec returned NULL"); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pg_fatal("unexpected result code %s from non-pipeline query", - PQresStatus(PQresultStatus(res))); - res = PQgetResult(conn); - if (res != NULL) - pg_fatal("did not receive terminating NULL"); - if (n_notices > 0) - pg_fatal("got %d notice(s)", n_notices); - fprintf(stderr, "ok - 2\n"); - - /* - * Case 2: exiting pipeline mode is not OK if a second command is sent. - */ - - if (PQenterPipelineMode(conn) != 1) - pg_fatal("failed to enter pipeline mode: %s", PQerrorMessage(conn)); - if (PQsendQuery(conn, "SELECT 1") != 1) - pg_fatal("failed to send query: %s", PQerrorMessage(conn)); - PQsendFlushRequest(conn); - res = PQgetResult(conn); - if (res == NULL) - pg_fatal("PQgetResult returned null when there's a pipeline item: %s", - PQerrorMessage(conn)); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pg_fatal("unexpected result code %s from first pipeline item", - PQresStatus(PQresultStatus(res))); - if (PQsendQuery(conn, "SELECT 2") != 1) - pg_fatal("failed to send query: %s", PQerrorMessage(conn)); - PQsendFlushRequest(conn); - /* read terminating null from first query */ - res = PQgetResult(conn); - if (res != NULL) - pg_fatal("did not receive terminating NULL"); - res = PQgetResult(conn); - if (res == NULL) - pg_fatal("PQgetResult returned null when there's a pipeline item: %s", - PQerrorMessage(conn)); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pg_fatal("unexpected result code %s from first pipeline item", - PQresStatus(PQresultStatus(res))); - res = PQgetResult(conn); - if (res != NULL) - pg_fatal("did not receive terminating NULL"); - if (PQexitPipelineMode(conn) != 1) - pg_fatal("attempt to exit pipeline mode failed when it should've succeeded: %s", - PQerrorMessage(conn)); - /* Try to exit pipeline mode in pipeline-idle state */ if (PQenterPipelineMode(conn) != 1) pg_fatal("failed to enter pipeline mode: %s", PQerrorMessage(conn)); - if (PQsendQuery(conn, "SELECT 1") != 1) + if (PQsendQueryParams(conn, "SELECT 1", 0, NULL, NULL, NULL, NULL, 0) != 1) pg_fatal("failed to send query: %s", PQerrorMessage(conn)); PQsendFlushRequest(conn); res = PQgetResult(conn); @@ -1131,7 +1009,7 @@ test_pipeline_idle(PGconn *conn) res = PQgetResult(conn); if (res != NULL) pg_fatal("did not receive terminating NULL"); - if (PQsendQuery(conn, "SELECT 2") != 1) + if (PQsendQueryParams(conn, "SELECT 2", 0, NULL, NULL, NULL, NULL, 0) != 1) pg_fatal("failed to send query: %s", PQerrorMessage(conn)); if (PQexitPipelineMode(conn) == 1) pg_fatal("exiting pipeline succeeded when it shouldn't"); @@ -1153,12 +1031,12 @@ test_pipeline_idle(PGconn *conn) if (n_notices > 0) pg_fatal("got %d notice(s)", n_notices); - fprintf(stderr, "ok - 3\n"); + fprintf(stderr, "ok - 1\n"); /* Have a WARNING in the middle of a resultset */ if (PQenterPipelineMode(conn) != 1) pg_fatal("entering pipeline mode failed: %s", PQerrorMessage(conn)); - if (PQsendQuery(conn, "SELECT pg_catalog.pg_advisory_unlock(1,1)") != 1) + if (PQsendQueryParams(conn, "SELECT pg_catalog.pg_advisory_unlock(1,1)", 0, NULL, NULL, NULL, NULL, 0) != 1) pg_fatal("failed to send query: %s", PQerrorMessage(conn)); PQsendFlushRequest(conn); res = PQgetResult(conn); @@ -1168,7 +1046,7 @@ test_pipeline_idle(PGconn *conn) pg_fatal("unexpected result code %s", PQresStatus(PQresultStatus(res))); if (PQexitPipelineMode(conn) != 1) pg_fatal("failed to exit pipeline mode: %s", PQerrorMessage(conn)); - fprintf(stderr, "ok - 4\n"); + fprintf(stderr, "ok - 2\n"); } static void diff --git a/src/test/modules/libpq_pipeline/traces/pipeline_abort.trace b/src/test/modules/libpq_pipeline/traces/pipeline_abort.trace index 3fce548b995..cf6ccec6b9d 100644 --- a/src/test/modules/libpq_pipeline/traces/pipeline_abort.trace +++ b/src/test/modules/libpq_pipeline/traces/pipeline_abort.trace @@ -35,18 +35,16 @@ B 4 NoData B 15 CommandComplete "INSERT 0 1" B 5 ReadyForQuery I F 26 Parse "" "SELECT 1; SELECT 2" 0 -F 12 Bind "" "" 0 0 0 +F 14 Bind "" "" 0 0 1 0 F 6 Describe P "" F 9 Execute "" 0 -F 6 Close P "" F 4 Sync B NN ErrorResponse S "ERROR" V "ERROR" C "42601" M "cannot insert multiple commands into a prepared statement" F "SSSS" L "SSSS" R "SSSS" \x00 B 5 ReadyForQuery I F 54 Parse "" "SELECT 1.0/g FROM generate_series(3, -1, -1) g" 0 -F 12 Bind "" "" 0 0 0 +F 14 Bind "" "" 0 0 1 0 F 6 Describe P "" F 9 Execute "" 0 -F 6 Close P "" F 4 Sync B 4 ParseComplete B 4 BindComplete diff --git a/src/test/modules/libpq_pipeline/traces/pipeline_idle.trace b/src/test/modules/libpq_pipeline/traces/pipeline_idle.trace index 3957ee4dfe1..83ee415b03e 100644 --- a/src/test/modules/libpq_pipeline/traces/pipeline_idle.trace +++ b/src/test/modules/libpq_pipeline/traces/pipeline_idle.trace @@ -1,87 +1,27 @@ F 16 Parse "" "SELECT 1" 0 -F 12 Bind "" "" 0 0 0 +F 14 Bind "" "" 0 0 1 0 F 6 Describe P "" F 9 Execute "" 0 -F 6 Close P "" F 4 Flush B 4 ParseComplete B 4 BindComplete B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 B 11 DataRow 1 1 '1' B 13 CommandComplete "SELECT 1" -B 4 CloseComplete -F 4 Sync -B 5 ReadyForQuery I -F 16 Parse "" "SELECT 1" 0 -F 12 Bind "" "" 0 0 0 -F 6 Describe P "" -F 9 Execute "" 0 -F 6 Close P "" -F 4 Flush -B 4 ParseComplete -B 4 BindComplete -B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 -B 11 DataRow 1 1 '1' -B 13 CommandComplete "SELECT 1" -B 4 CloseComplete -F 13 Query "SELECT 2" -B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 -B 11 DataRow 1 1 '2' -B 13 CommandComplete "SELECT 1" -B 5 ReadyForQuery I -F 16 Parse "" "SELECT 1" 0 -F 12 Bind "" "" 0 0 0 -F 6 Describe P "" -F 9 Execute "" 0 -F 6 Close P "" -F 4 Flush -B 4 ParseComplete -B 4 BindComplete -B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 -B 11 DataRow 1 1 '1' -B 13 CommandComplete "SELECT 1" -B 4 CloseComplete -F 16 Parse "" "SELECT 2" 0 -F 12 Bind "" "" 0 0 0 -F 6 Describe P "" -F 9 Execute "" 0 -F 6 Close P "" -F 4 Flush -B 4 ParseComplete -B 4 BindComplete -B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 -B 11 DataRow 1 1 '2' -B 13 CommandComplete "SELECT 1" -B 4 CloseComplete -F 16 Parse "" "SELECT 1" 0 -F 12 Bind "" "" 0 0 0 -F 6 Describe P "" -F 9 Execute "" 0 -F 6 Close P "" -F 4 Flush -B 4 ParseComplete -B 4 BindComplete -B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 -B 11 DataRow 1 1 '1' -B 13 CommandComplete "SELECT 1" -B 4 CloseComplete F 16 Parse "" "SELECT 2" 0 -F 12 Bind "" "" 0 0 0 +F 14 Bind "" "" 0 0 1 0 F 6 Describe P "" F 9 Execute "" 0 -F 6 Close P "" F 4 Flush B 4 ParseComplete B 4 BindComplete B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 B 11 DataRow 1 1 '2' B 13 CommandComplete "SELECT 1" -B 4 CloseComplete F 49 Parse "" "SELECT pg_catalog.pg_advisory_unlock(1,1)" 0 -F 12 Bind "" "" 0 0 0 +F 14 Bind "" "" 0 0 1 0 F 6 Describe P "" F 9 Execute "" 0 -F 6 Close P "" F 4 Flush B 4 ParseComplete B 4 BindComplete @@ -89,5 +29,4 @@ B 43 RowDescription 1 "pg_advisory_unlock" NNNN 0 NNNN 1 -1 0 B NN NoticeResponse S "WARNING" V "WARNING" C "01000" M "you don't own a lock of type ExclusiveLock" F "SSSS" L "SSSS" R "SSSS" \x00 B 11 DataRow 1 1 'f' B 13 CommandComplete "SELECT 1" -B 4 CloseComplete F 4 Terminate From 1315662fc5870d030ed5fe04cc50ee38a309c060 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Sun, 25 Sep 2022 17:48:03 +0200 Subject: [PATCH 71/95] Add missing source files to pg_waldump/nls.mk --- src/bin/pg_waldump/nls.mk | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/bin/pg_waldump/nls.mk b/src/bin/pg_waldump/nls.mk index 9104a4a417b..9aebaef4a75 100644 --- a/src/bin/pg_waldump/nls.mk +++ b/src/bin/pg_waldump/nls.mk @@ -1,6 +1,7 @@ # src/bin/pg_waldump/nls.mk CATALOG_NAME = pg_waldump AVAIL_LANGUAGES = cs de el es fr ja ko ru sv tr uk zh_CN -GETTEXT_FILES = $(FRONTEND_COMMON_GETTEXT_FILES) pg_waldump.c -GETTEXT_TRIGGERS = $(FRONTEND_COMMON_GETTEXT_TRIGGERS) fatal_error -GETTEXT_FLAGS = $(FRONTEND_COMMON_GETTEXT_FLAGS) fatal_error:1:c-format +GETTEXT_FILES = $(FRONTEND_COMMON_GETTEXT_FILES) pg_waldump.c xlogreader.c +GETTEXT_TRIGGERS = $(FRONTEND_COMMON_GETTEXT_TRIGGERS) fatal_error report_invalid_record:2 +GETTEXT_FLAGS = $(FRONTEND_COMMON_GETTEXT_FLAGS) fatal_error:1:c-format \ + report_invalid_record:2:c-format From 608424a51529eb45fbeac4e1c8bf1f7f067ff401 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Wed, 28 Sep 2022 17:14:53 +0200 Subject: [PATCH 72/95] Change some errdetail() to errdetail_internal() This prevents marking the argument string for translation for gettext, and it also prevents the given string (which is already translated) from being translated at runtime. Also, mark the strings used as arguments to check_rolespec_name for translation. Backpatch all the way back as appropriate. None of this is caught by any tests (necessarily so), so I verified it manually. --- src/backend/catalog/dependency.c | 6 +++--- src/backend/commands/user.c | 4 ++-- src/backend/utils/adt/acl.c | 4 ++-- src/backend/utils/adt/jsonfuncs.c | 4 ++-- src/common/jsonapi.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 39994474faf..d7bf9f7ca5b 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1257,14 +1257,14 @@ reportDependentObjects(const ObjectAddresses *targetObjects, (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), errmsg("cannot drop %s because other objects depend on it", getObjectDescription(origObject, false)), - errdetail("%s", clientdetail.data), + errdetail_internal("%s", clientdetail.data), errdetail_log("%s", logdetail.data), errhint("Use DROP ... CASCADE to drop the dependent objects too."))); else ereport(ERROR, (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), errmsg("cannot drop desired object(s) because other objects depend on them"), - errdetail("%s", clientdetail.data), + errdetail_internal("%s", clientdetail.data), errdetail_log("%s", logdetail.data), errhint("Use DROP ... CASCADE to drop the dependent objects too."))); } @@ -1276,7 +1276,7 @@ reportDependentObjects(const ObjectAddresses *targetObjects, "drop cascades to %d other objects", numReportedClient + numNotReportedClient, numReportedClient + numNotReportedClient), - errdetail("%s", clientdetail.data), + errdetail_internal("%s", clientdetail.data), errdetail_log("%s", logdetail.data))); } else if (numReportedClient == 1) diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c index 54f4df765ec..f7dcff72494 100644 --- a/src/backend/commands/user.c +++ b/src/backend/commands/user.c @@ -990,7 +990,7 @@ AlterRole(AlterRoleStmt *stmt) } check_rolespec_name(stmt->role, - "Cannot alter reserved roles."); + _("Cannot alter reserved roles.")); now = GetCurrentTimestamp(); @@ -1932,7 +1932,7 @@ AlterRoleSet(AlterRoleSetStmt *stmt) if (stmt->role) { check_rolespec_name(stmt->role, - "Cannot alter reserved roles."); + _("Cannot alter reserved roles.")); roletuple = get_rolespec_tuple(stmt->role); roleform = (Form_pg_authid) GETSTRUCT(roletuple); diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c index e3463f636ae..906480c5137 100644 --- a/src/backend/utils/adt/acl.c +++ b/src/backend/utils/adt/acl.c @@ -5552,7 +5552,7 @@ get_rolespec_name(const RoleSpec *role) /* * Given a RoleSpec, throw an error if the name is reserved, using detail_msg, - * if provided. + * if provided (which must be already translated). * * If node is NULL, no error is thrown. If detail_msg is NULL then no detail * message is provided. @@ -5573,7 +5573,7 @@ check_rolespec_name(const RoleSpec *role, const char *detail_msg) (errcode(ERRCODE_RESERVED_NAME), errmsg("role name \"%s\" is reserved", role->rolename), - errdetail("%s", detail_msg))); + errdetail_internal("%s", detail_msg))); else ereport(ERROR, (errcode(ERRCODE_RESERVED_NAME), diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index 5fd54b64b56..0364765dc0e 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -615,13 +615,13 @@ json_ereport_error(JsonParseErrorType error, JsonLexContext *lex) ereport(ERROR, (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER), errmsg("unsupported Unicode escape sequence"), - errdetail("%s", json_errdetail(error, lex)), + errdetail_internal("%s", json_errdetail(error, lex)), report_json_context(lex))); else ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type %s", "json"), - errdetail("%s", json_errdetail(error, lex)), + errdetail_internal("%s", json_errdetail(error, lex)), report_json_context(lex))); } diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index d376ab152d4..ade13aed3a4 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -1052,7 +1052,7 @@ report_parse_error(JsonParseContext ctx, JsonLexContext *lex) } /* - * Construct a detail message for a JSON error. + * Construct an (already translated) detail message for a JSON error. */ char * json_errdetail(JsonParseErrorType error, JsonLexContext *lex) From 0ac462cd52d182666acfa0aa351460ccf278f5fe Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 28 Sep 2022 13:05:20 -0400 Subject: [PATCH 73/95] revert "warn of SECURITY DEFINER schemas for non-sql_body funcs" doc revert of commit 1703726488. Change was applied to irrelevant branches, and was not detailed enough to be helpful in relevant branches. Reported-by: Peter Eisentraut, Noah Misch Discussion: https://postgr.es/m/a2dc9de4-24fc-3222-87d3-0def8057d7d8@enterprisedb.com Backpatch-through: 10 --- doc/src/sgml/ref/create_function.sgml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/src/sgml/ref/create_function.sgml b/doc/src/sgml/ref/create_function.sgml index dc36a171a96..66e52555a8f 100644 --- a/doc/src/sgml/ref/create_function.sgml +++ b/doc/src/sgml/ref/create_function.sgml @@ -780,10 +780,7 @@ SELECT * FROM dup(42); Because a SECURITY DEFINER function is executed with the privileges of the user that owns it, care is needed to - ensure that the function cannot be misused. This is particularly - important for non-sql_body functions because - their function bodies are evaluated at run-time, not creation time. - For security, + ensure that the function cannot be misused. For security, should be set to exclude any schemas writable by untrusted users. This prevents malicious users from creating objects (e.g., tables, functions, and From 75298ac271847fd472e7b1d84a2f0b4def4bd793 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 28 Sep 2022 13:14:38 -0400 Subject: [PATCH 74/95] doc: clarify internal behavior of RECURSIVE CTE queries Reported-by: Tom Lane Discussion: https://postgr.es/m/3976627.1662651004@sss.pgh.pa.us Backpatch-through: 10 --- doc/src/sgml/queries.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml index 4c5a83c9cbf..95559ef1ac4 100644 --- a/doc/src/sgml/queries.sgml +++ b/doc/src/sgml/queries.sgml @@ -2173,7 +2173,7 @@ SELECT sum(n) FROM t; While RECURSIVE allows queries to be specified - recursively, internally all queries are evaluated iteratively. + recursively, internally such queries are evaluated iteratively. From 64ff75fd3ea830241fa88a96fa3f2d39021e315d Mon Sep 17 00:00:00 2001 From: Etsuro Fujita Date: Thu, 29 Sep 2022 16:55:03 +0900 Subject: [PATCH 75/95] Update comment in ExecInsert() regarding batch insertion. Remove the stale text that is a leftover from an earlier version of the patch to add support for batch insertion, and adjust the wording in the remaining text. Back-patch to v14 where batch insertion came in. Review and wording adjustment by Tom Lane. Discussion: https://postgr.es/m/CAPmGK14goatHPHQv2Aeu_UTKqZ%2BBO%2BP%2Bzd3HKv5D%2BdyyfWKDSw%40mail.gmail.com --- src/backend/executor/nodeModifyTable.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 27b48152195..4e4d67bd4f9 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -766,9 +766,8 @@ ExecInsert(ModifyTableState *mtstate, if (resultRelInfo->ri_BatchSize > 1) { /* - * If a certain number of tuples have already been accumulated, or - * a tuple has come for a different relation than that for the - * accumulated tuples, perform the batch insert + * When we've reached the desired batch size, perform the + * insertion. */ if (resultRelInfo->ri_NumSlots == resultRelInfo->ri_BatchSize) { From 6005f1d52756cbb0ec4ab18e750ebe2f9cdaa9b2 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Fri, 30 Sep 2022 12:03:48 +0200 Subject: [PATCH 76/95] doc: Fix PQsslAttribute docs for compression The compression parameter to PQsslAttribute has never returned the compression method used, it has always returned "on" or "off since it was added in commit 91fa7b4719ac. Backpatch through v10. Reviewed-by: Tom Lane Discussion: https://postgr.es/m/B9EC60EC-F665-47E8-A221-398C76E382C9@yesql.se Backpatch-through: v10 --- doc/src/sgml/libpq.sgml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 3bde3aef54a..fd230d9fd0e 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -2564,9 +2564,7 @@ const char *PQsslAttribute(const PGconn *conn, const char *attribute_name); compression - If SSL compression is in use, returns the name of the compression - algorithm, or "on" if compression is used but the algorithm is - not known. If compression is not in use, returns "off". + Returns "on" if SSL compression is in use, else it returns "off". From 51785eaba08e5812041b8ce085412dd996dc64b2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 30 Sep 2022 19:36:46 -0400 Subject: [PATCH 77/95] Avoid improbable PANIC during heap_update, redux. Commit 34f581c39 intended to ensure that RelationGetBufferForTuple would acquire a visibility-map page pin in case the otherBuffer's all-visible bit had become set since we last had lock on that page. But I missed a case: when we're extending the relation, VM concerns were dealt with only in the relatively-less-likely case that we fail to conditionally lock the otherBuffer. I think I'd believed that we couldn't need to worry about it if the conditional lock succeeds, which is true for the target buffer; but the otherBuffer was unlocked for awhile so its bit might be set anyway. So we need to do the GetVisibilityMapPins dance, and then also recheck the page's free space, in both cases. Per report from Jaime Casanova. Back-patch to v12 as the previous patch was (although there's still no evidence that the bug is reachable pre-v14). Discussion: https://postgr.es/m/E1lWLjP-00006Y-Ml@gemulon.postgresql.org --- src/backend/access/heap/hio.c | 41 ++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index c3bd9a68974..8c46967a676 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -678,29 +678,34 @@ RelationGetBufferForTuple(Relation relation, Size len, LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + } - /* - * Because the buffers were unlocked for a while, it's possible, - * although unlikely, that an all-visible flag became set or that - * somebody used up the available space in the new page. We can - * use GetVisibilityMapPins to deal with the first case. In the - * second case, just retry from start. - */ - GetVisibilityMapPins(relation, otherBuffer, buffer, - otherBlock, targetBlock, vmbuffer_other, - vmbuffer); + /* + * Because the buffers were unlocked for a while, it's possible, + * although unlikely, that an all-visible flag became set or that + * somebody used up the available space in the new page. We can use + * GetVisibilityMapPins to deal with the first case. In the second + * case, just retry from start. + */ + GetVisibilityMapPins(relation, otherBuffer, buffer, + otherBlock, targetBlock, vmbuffer_other, + vmbuffer); - if (len > PageGetHeapFreeSpace(page)) - { - LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK); - UnlockReleaseBuffer(buffer); + /* + * Note that we have to check the available space even if our + * conditional lock succeeded, because GetVisibilityMapPins might've + * transiently released lock on the target buffer to acquire a VM pin + * for the otherBuffer. + */ + if (len > PageGetHeapFreeSpace(page)) + { + LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK); + UnlockReleaseBuffer(buffer); - goto loop; - } + goto loop; } } - - if (len > PageGetHeapFreeSpace(page)) + else if (len > PageGetHeapFreeSpace(page)) { /* We should not get here given the test at the top */ elog(PANIC, "tuple is too big: size %zu", len); From b715be826120bd57158002cce5df329675600ee2 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Fri, 7 Oct 2022 19:37:48 +0200 Subject: [PATCH 78/95] Fix self-referencing foreign keys with partitioned tables There are a number of bugs in this area. Two of them are fixed here, namely: 1. get_relation_idx_constraint_oid does not restrict the type of constraint that's returned, so with sufficient bad luck it can return the OID of a foreign key constraint. This has the effect that a primary key in a partition can end up as a child of a foreign key, which makes no sense (it needs to be the child of the equivalent primary key.) Change the API contract so that only index-backed constraints are returned, mimicking get_constraint_index(). 2. Both CloneFkReferenced and CloneFkReferencing clone a self-referencing foreign key, so the partition ends up with a duplicate foreign key. Change the former function to ignore such constraints. Add some tests to verify that things are better now. (However, these new tests show some additional misbehavior that will be fixed later -- namely that there's a constraint marked NOT VALID.) Backpatch to 12, where these constraints are possible at all. Author: Jehan-Guillaume de Rorthais Discussion: https://postgr.es/m/20220603154232.1715b14c@karst --- src/backend/catalog/pg_constraint.c | 13 +++- src/backend/commands/tablecmds.c | 13 ++++ src/test/regress/expected/foreign_key.out | 81 +++++++++++++++++++++++ src/test/regress/sql/foreign_key.sql | 55 +++++++++++++++ 4 files changed, 161 insertions(+), 1 deletion(-) diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index 40beade7c54..9a4055c3b6b 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -977,8 +977,12 @@ get_relation_constraint_attnos(Oid relid, const char *conname, } /* - * Return the OID of the constraint associated with the given index in the + * Return the OID of the constraint enforced by the given index in the * given relation; or InvalidOid if no such index is catalogued. + * + * Much like get_constraint_index, this function is concerned only with the + * one constraint that "owns" the given index. Therefore, constraints of + * types other than unique, primary-key, and exclusion are ignored. */ Oid get_relation_idx_constraint_oid(Oid relationId, Oid indexId) @@ -1003,6 +1007,13 @@ get_relation_idx_constraint_oid(Oid relationId, Oid indexId) Form_pg_constraint constrForm; constrForm = (Form_pg_constraint) GETSTRUCT(tuple); + + /* See above */ + if (constrForm->contype != CONSTRAINT_PRIMARY && + constrForm->contype != CONSTRAINT_UNIQUE && + constrForm->contype != CONSTRAINT_EXCLUSION) + continue; + if (constrForm->conindid == indexId) { constraintId = constrForm->oid; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index ba985fc99ae..401b22e095f 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -11842,6 +11842,8 @@ CloneForeignKeyConstraints(List **wqueue, Relation parentRel, * clone those constraints to the given partition. This is to be called * when the partition is being created or attached. * + * This ignores self-referencing FKs; those are handled by CloneFkReferencing. + * * This recurses to partitions, if the relation being attached is partitioned. * Recursion is done by calling addFkRecurseReferenced. */ @@ -11917,6 +11919,17 @@ CloneFkReferenced(Relation parentRel, Relation partitionRel) continue; } + /* + * Don't clone self-referencing foreign keys, which can be in the + * partitioned table or in the partition-to-be. + */ + if (constrForm->conrelid == RelationGetRelid(parentRel) || + constrForm->conrelid == RelationGetRelid(partitionRel)) + { + ReleaseSysCache(tuple); + continue; + } + /* * Because we're only expanding the key space at the referenced side, * we don't need to prevent any operation in the referencing table, so diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out index c5df3a5be06..ab6ec72653f 100644 --- a/src/test/regress/expected/foreign_key.out +++ b/src/test/regress/expected/foreign_key.out @@ -1927,6 +1927,87 @@ drop table other_partitioned_fk; reset role; revoke all on fk_notpartitioned_pk from regress_other_partitioned_fk_owner; drop role regress_other_partitioned_fk_owner; +-- +-- Test self-referencing foreign key with partition. +-- This should create only one fk constraint per partition +-- +CREATE TABLE parted_self_fk ( + id bigint NOT NULL PRIMARY KEY, + id_abc bigint, + FOREIGN KEY (id_abc) REFERENCES parted_self_fk(id) +) +PARTITION BY RANGE (id); +CREATE TABLE part1_self_fk ( + id bigint NOT NULL PRIMARY KEY, + id_abc bigint +); +ALTER TABLE parted_self_fk ATTACH PARTITION part1_self_fk FOR VALUES FROM (0) TO (10); +CREATE TABLE part2_self_fk PARTITION OF parted_self_fk FOR VALUES FROM (10) TO (20); +CREATE TABLE part3_self_fk ( -- a partitioned partition + id bigint NOT NULL PRIMARY KEY, + id_abc bigint +) PARTITION BY RANGE (id); +CREATE TABLE part32_self_fk PARTITION OF part3_self_fk FOR VALUES FROM (20) TO (30); +ALTER TABLE parted_self_fk ATTACH PARTITION part3_self_fk FOR VALUES FROM (20) TO (40); +CREATE TABLE part33_self_fk ( + id bigint NOT NULL PRIMARY KEY, + id_abc bigint +); +ALTER TABLE part3_self_fk ATTACH PARTITION part33_self_fk FOR VALUES FROM (30) TO (40); +SELECT cr.relname, co.conname, co.contype, co.convalidated, + p.conname AS conparent, p.convalidated, cf.relname AS foreignrel +FROM pg_constraint co +JOIN pg_class cr ON cr.oid = co.conrelid +LEFT JOIN pg_class cf ON cf.oid = co.confrelid +LEFT JOIN pg_constraint p ON p.oid = co.conparentid +WHERE cr.oid IN (SELECT relid FROM pg_partition_tree('parted_self_fk')) +ORDER BY co.contype, cr.relname, co.conname, p.conname; + relname | conname | contype | convalidated | conparent | convalidated | foreignrel +----------------+----------------------------+---------+--------------+----------------------------+--------------+---------------- + part1_self_fk | parted_self_fk_id_abc_fkey | f | t | parted_self_fk_id_abc_fkey | t | parted_self_fk + part2_self_fk | parted_self_fk_id_abc_fkey | f | t | parted_self_fk_id_abc_fkey | t | parted_self_fk + part32_self_fk | parted_self_fk_id_abc_fkey | f | f | parted_self_fk_id_abc_fkey | t | parted_self_fk + part33_self_fk | parted_self_fk_id_abc_fkey | f | t | parted_self_fk_id_abc_fkey | t | parted_self_fk + part3_self_fk | parted_self_fk_id_abc_fkey | f | t | parted_self_fk_id_abc_fkey | t | parted_self_fk + parted_self_fk | parted_self_fk_id_abc_fkey | f | t | | | parted_self_fk + part1_self_fk | part1_self_fk_pkey | p | t | parted_self_fk_pkey | t | + part2_self_fk | part2_self_fk_pkey | p | t | parted_self_fk_pkey | t | + part32_self_fk | part32_self_fk_pkey | p | t | part3_self_fk_pkey | t | + part33_self_fk | part33_self_fk_pkey | p | t | part3_self_fk_pkey | t | + part3_self_fk | part3_self_fk_pkey | p | t | parted_self_fk_pkey | t | + parted_self_fk | parted_self_fk_pkey | p | t | | | +(12 rows) + +-- detach and re-attach multiple times just to ensure everything is kosher +ALTER TABLE parted_self_fk DETACH PARTITION part2_self_fk; +ALTER TABLE parted_self_fk ATTACH PARTITION part2_self_fk FOR VALUES FROM (10) TO (20); +ALTER TABLE parted_self_fk DETACH PARTITION part2_self_fk; +ALTER TABLE parted_self_fk ATTACH PARTITION part2_self_fk FOR VALUES FROM (10) TO (20); +SELECT cr.relname, co.conname, co.contype, co.convalidated, + p.conname AS conparent, p.convalidated, cf.relname AS foreignrel +FROM pg_constraint co +JOIN pg_class cr ON cr.oid = co.conrelid +LEFT JOIN pg_class cf ON cf.oid = co.confrelid +LEFT JOIN pg_constraint p ON p.oid = co.conparentid +WHERE cr.oid IN (SELECT relid FROM pg_partition_tree('parted_self_fk')) +ORDER BY co.contype, cr.relname, co.conname, p.conname; + relname | conname | contype | convalidated | conparent | convalidated | foreignrel +----------------+----------------------------+---------+--------------+----------------------------+--------------+---------------- + part1_self_fk | parted_self_fk_id_abc_fkey | f | t | parted_self_fk_id_abc_fkey | t | parted_self_fk + part2_self_fk | parted_self_fk_id_abc_fkey | f | t | parted_self_fk_id_abc_fkey | t | parted_self_fk + part32_self_fk | parted_self_fk_id_abc_fkey | f | f | parted_self_fk_id_abc_fkey | t | parted_self_fk + part33_self_fk | parted_self_fk_id_abc_fkey | f | t | parted_self_fk_id_abc_fkey | t | parted_self_fk + part3_self_fk | parted_self_fk_id_abc_fkey | f | t | parted_self_fk_id_abc_fkey | t | parted_self_fk + parted_self_fk | parted_self_fk_id_abc_fkey | f | t | | | parted_self_fk + part1_self_fk | part1_self_fk_pkey | p | t | parted_self_fk_pkey | t | + part2_self_fk | part2_self_fk_pkey | p | t | parted_self_fk_pkey | t | + part32_self_fk | part32_self_fk_pkey | p | t | part3_self_fk_pkey | t | + part33_self_fk | part33_self_fk_pkey | p | t | part3_self_fk_pkey | t | + part3_self_fk | part3_self_fk_pkey | p | t | parted_self_fk_pkey | t | + parted_self_fk | parted_self_fk_pkey | p | t | | | +(12 rows) + +-- Leave this table around, for pg_upgrade/pg_dump tests -- Test creating a constraint at the parent that already exists in partitions. -- There should be no duplicated constraints, and attempts to drop the -- constraint in partitions should raise appropriate errors. diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql index 835569a2198..a8c9e77b800 100644 --- a/src/test/regress/sql/foreign_key.sql +++ b/src/test/regress/sql/foreign_key.sql @@ -1390,6 +1390,61 @@ reset role; revoke all on fk_notpartitioned_pk from regress_other_partitioned_fk_owner; drop role regress_other_partitioned_fk_owner; +-- +-- Test self-referencing foreign key with partition. +-- This should create only one fk constraint per partition +-- +CREATE TABLE parted_self_fk ( + id bigint NOT NULL PRIMARY KEY, + id_abc bigint, + FOREIGN KEY (id_abc) REFERENCES parted_self_fk(id) +) +PARTITION BY RANGE (id); +CREATE TABLE part1_self_fk ( + id bigint NOT NULL PRIMARY KEY, + id_abc bigint +); +ALTER TABLE parted_self_fk ATTACH PARTITION part1_self_fk FOR VALUES FROM (0) TO (10); +CREATE TABLE part2_self_fk PARTITION OF parted_self_fk FOR VALUES FROM (10) TO (20); +CREATE TABLE part3_self_fk ( -- a partitioned partition + id bigint NOT NULL PRIMARY KEY, + id_abc bigint +) PARTITION BY RANGE (id); +CREATE TABLE part32_self_fk PARTITION OF part3_self_fk FOR VALUES FROM (20) TO (30); +ALTER TABLE parted_self_fk ATTACH PARTITION part3_self_fk FOR VALUES FROM (20) TO (40); +CREATE TABLE part33_self_fk ( + id bigint NOT NULL PRIMARY KEY, + id_abc bigint +); +ALTER TABLE part3_self_fk ATTACH PARTITION part33_self_fk FOR VALUES FROM (30) TO (40); + +SELECT cr.relname, co.conname, co.contype, co.convalidated, + p.conname AS conparent, p.convalidated, cf.relname AS foreignrel +FROM pg_constraint co +JOIN pg_class cr ON cr.oid = co.conrelid +LEFT JOIN pg_class cf ON cf.oid = co.confrelid +LEFT JOIN pg_constraint p ON p.oid = co.conparentid +WHERE cr.oid IN (SELECT relid FROM pg_partition_tree('parted_self_fk')) +ORDER BY co.contype, cr.relname, co.conname, p.conname; + +-- detach and re-attach multiple times just to ensure everything is kosher +ALTER TABLE parted_self_fk DETACH PARTITION part2_self_fk; +ALTER TABLE parted_self_fk ATTACH PARTITION part2_self_fk FOR VALUES FROM (10) TO (20); +ALTER TABLE parted_self_fk DETACH PARTITION part2_self_fk; +ALTER TABLE parted_self_fk ATTACH PARTITION part2_self_fk FOR VALUES FROM (10) TO (20); + +SELECT cr.relname, co.conname, co.contype, co.convalidated, + p.conname AS conparent, p.convalidated, cf.relname AS foreignrel +FROM pg_constraint co +JOIN pg_class cr ON cr.oid = co.conrelid +LEFT JOIN pg_class cf ON cf.oid = co.confrelid +LEFT JOIN pg_constraint p ON p.oid = co.conparentid +WHERE cr.oid IN (SELECT relid FROM pg_partition_tree('parted_self_fk')) +ORDER BY co.contype, cr.relname, co.conname, p.conname; + +-- Leave this table around, for pg_upgrade/pg_dump tests + + -- Test creating a constraint at the parent that already exists in partitions. -- There should be no duplicated constraints, and attempts to drop the -- constraint in partitions should raise appropriate errors. From c400954dbf6538c835cf5857be266027d23e6c18 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Tue, 11 Oct 2022 09:56:13 +0200 Subject: [PATCH 79/95] Ensure all perl test modules are installed PostgreSQL::Test::Cluster and ::Utils were not being installed. This is very hard to notice, as it only seems to affect external modules that want to run tests from 15 back in earlier versions. Oversight in b235d41d9646. This applies only to branches 14 and back, because 15 had already been made correct in commit b3b4d8e68ae8. Discussion: https://postgr.es/m/20221010093415.poplkyn7pjeiv2y7@alvherre.pgsql --- src/test/perl/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/test/perl/Makefile b/src/test/perl/Makefile index 3d3a95b52fd..811acf7cd59 100644 --- a/src/test/perl/Makefile +++ b/src/test/perl/Makefile @@ -16,7 +16,7 @@ include $(top_builddir)/src/Makefile.global ifeq ($(enable_tap_tests),yes) installdirs: - $(MKDIR_P) '$(DESTDIR)$(pgxsdir)/$(subdir)' + $(MKDIR_P) '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test' install: all installdirs $(INSTALL_DATA) $(srcdir)/TestLib.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/TestLib.pm' @@ -24,6 +24,8 @@ install: all installdirs $(INSTALL_DATA) $(srcdir)/RecursiveCopy.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/RecursiveCopy.pm' $(INSTALL_DATA) $(srcdir)/PostgresNode.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgresNode.pm' $(INSTALL_DATA) $(srcdir)/PostgresVersion.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgresVersion.pm' + $(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/Cluster.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Cluster.pm' + $(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/Utils.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Utils.pm' uninstall: rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/TestLib.pm' @@ -31,5 +33,7 @@ uninstall: rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/RecursiveCopy.pm' rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgresNode.pm' rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgresVersion.pm' + rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Cluster.pm' + rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Utils.pm' endif From 8860332f353efbe11324fa224e026a15f9db1331 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 11 Oct 2022 18:54:31 -0400 Subject: [PATCH 80/95] Harden pmsignal.c against clobbered shared memory. The postmaster is not supposed to do anything that depends fundamentally on shared memory contents, because that creates the risk that a backend crash that trashes shared memory will take the postmaster down with it, preventing automatic recovery. In commit 969d7cd43 I lost sight of this principle and coded AssignPostmasterChildSlot() in such a way that it could fail or even crash if the shared PMSignalState structure became corrupted. Remarkably, we've not seen field reports of such crashes; but I managed to induce one while testing the recent changes around palloc chunk headers. To fix, make a semi-duplicative state array inside the postmaster so that we need consult only local state while choosing a "child slot" for a new backend. Ensure that other postmaster-executed routines in pmsignal.c don't have critical dependencies on the shared state, either. Corruption of PMSignalState might now lead ReleasePostmasterChildSlot() to conclude that backend X failed, when actually backend Y was the one that trashed things. But that doesn't matter, because we'll force a cluster-wide reset regardless. Back-patch to all supported branches, since this is an old bug. Discussion: https://postgr.es/m/3436789.1665187055@sss.pgh.pa.us --- src/backend/storage/ipc/pmsignal.c | 56 +++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/src/backend/storage/ipc/pmsignal.c b/src/backend/storage/ipc/pmsignal.c index 280c2395c9e..0c5e5f0b6fa 100644 --- a/src/backend/storage/ipc/pmsignal.c +++ b/src/backend/storage/ipc/pmsignal.c @@ -26,6 +26,7 @@ #include "replication/walsender.h" #include "storage/pmsignal.h" #include "storage/shmem.h" +#include "utils/memutils.h" /* @@ -75,12 +76,21 @@ struct PMSignalData QuitSignalReason sigquit_reason; /* why SIGQUIT was sent */ /* per-child-process flags */ int num_child_flags; /* # of entries in PMChildFlags[] */ - int next_child_flag; /* next slot to try to assign */ sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER]; }; +/* PMSignalState pointer is valid in both postmaster and child processes */ NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL; +/* + * These static variables are valid only in the postmaster. We keep a + * duplicative private array so that we can trust its state even if some + * failing child has clobbered the PMSignalData struct in shared memory. + */ +static int num_child_inuse; /* # of entries in PMChildInUse[] */ +static int next_child_inuse; /* next slot to try to assign */ +static bool *PMChildInUse; /* true if i'th flag slot is assigned */ + /* * Signal handler to be notified if postmaster dies. */ @@ -142,7 +152,25 @@ PMSignalShmemInit(void) { /* initialize all flags to zeroes */ MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize()); - PMSignalState->num_child_flags = MaxLivePostmasterChildren(); + num_child_inuse = MaxLivePostmasterChildren(); + PMSignalState->num_child_flags = num_child_inuse; + + /* + * Also allocate postmaster's private PMChildInUse[] array. We + * might've already done that in a previous shared-memory creation + * cycle, in which case free the old array to avoid a leak. (Do it + * like this to support the possibility that MaxLivePostmasterChildren + * changed.) In a standalone backend, we do not need this. + */ + if (PostmasterContext != NULL) + { + if (PMChildInUse) + pfree(PMChildInUse); + PMChildInUse = (bool *) + MemoryContextAllocZero(PostmasterContext, + num_child_inuse * sizeof(bool)); + } + next_child_inuse = 0; } } @@ -218,21 +246,24 @@ GetQuitSignalReason(void) int AssignPostmasterChildSlot(void) { - int slot = PMSignalState->next_child_flag; + int slot = next_child_inuse; int n; /* - * Scan for a free slot. We track the last slot assigned so as not to - * waste time repeatedly rescanning low-numbered slots. + * Scan for a free slot. Notice that we trust nothing about the contents + * of PMSignalState, but use only postmaster-local data for this decision. + * We track the last slot assigned so as not to waste time repeatedly + * rescanning low-numbered slots. */ - for (n = PMSignalState->num_child_flags; n > 0; n--) + for (n = num_child_inuse; n > 0; n--) { if (--slot < 0) - slot = PMSignalState->num_child_flags - 1; - if (PMSignalState->PMChildFlags[slot] == PM_CHILD_UNUSED) + slot = num_child_inuse - 1; + if (!PMChildInUse[slot]) { + PMChildInUse[slot] = true; PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED; - PMSignalState->next_child_flag = slot; + next_child_inuse = slot; return slot + 1; } } @@ -254,7 +285,7 @@ ReleasePostmasterChildSlot(int slot) { bool result; - Assert(slot > 0 && slot <= PMSignalState->num_child_flags); + Assert(slot > 0 && slot <= num_child_inuse); slot--; /* @@ -264,17 +295,18 @@ ReleasePostmasterChildSlot(int slot) */ result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED); PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED; + PMChildInUse[slot] = false; return result; } /* * IsPostmasterChildWalSender - check if given slot is in use by a - * walsender process. + * walsender process. This is called only by the postmaster. */ bool IsPostmasterChildWalSender(int slot) { - Assert(slot > 0 && slot <= PMSignalState->num_child_flags); + Assert(slot > 0 && slot <= num_child_inuse); slot--; if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER) From 41ad4235a610ceb6faee80d7a39fc1bac23ca893 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 12 Oct 2022 10:51:11 -0400 Subject: [PATCH 81/95] Doc: improve recommended systemd unit file. Add After=network-online.target Wants=network-online.target to the suggested unit file for starting a Postgres server. This delays startup until the network interfaces have been configured; without that, any attempt to bind to a specific IP address will fail. If listen_addresses is set to "localhost" or "*", it might be possible to get away with the less restrictive "network.target", but I don't think we need to get into such detail here. Per suggestion from Pablo Federico. Discussion: https://postgr.es/m/166552157407.591805.10036014441784710940@wrigleys.postgresql.org --- doc/src/sgml/runtime.sgml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index 479eb62f4ce..982fff3c3db 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -480,6 +480,8 @@ fi [Unit] Description=PostgreSQL database server Documentation=man:postgres(1) +After=network-online.target +Wants=network-online.target [Service] Type=notify From ed23788b0f363098c6801ff962e9cda77279d75f Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Thu, 13 Oct 2022 13:36:14 +0200 Subject: [PATCH 82/95] Fix typo in CREATE PUBLICATION reference page While at it, simplify wording a bit. Author: Takamichi Osumi Reviewed-by: Peter Smith Discussion: https://postgr.es/m/TYCPR01MB8373F93F5D094A2BE648990DED259@TYCPR01MB8373.jpnprd01.prod.outlook.com --- doc/src/sgml/ref/create_publication.sgml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml index 1205df5f88c..7ed48f50b0b 100644 --- a/doc/src/sgml/ref/create_publication.sgml +++ b/doc/src/sgml/ref/create_publication.sgml @@ -184,8 +184,8 @@ CREATE PUBLICATION name For an INSERT ... ON CONFLICT command, the publication will - publish the operation that actually results from the command. So depending - of the outcome, it may be published as either INSERT or + publish the operation that results from the command. Depending + on the outcome, it may be published as either INSERT or UPDATE, or it may not be published at all. From dbe07d193ce847d8ed30399dae5004cf19a7e770 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Fri, 14 Oct 2022 19:06:26 +0200 Subject: [PATCH 83/95] libpq: Reset singlerow flag correctly in pipeline mode When a query whose results were requested in single-row mode is the last in the queue by the time those results are being read, the single-row flag was not being reset, because we were returning early from pqPipelineProcessQueue. Move that stanza up so that the flag is always reset at the end of sending that query's results. Add a test for the situation. Backpatch to 14. Author: Denis Laxalde Discussion: https://postgr.es/m/01af18c5-dacc-a8c8-07ee-aecc7650c3e8@dalibo.com --- src/interfaces/libpq/fe-exec.c | 12 ++--- .../modules/libpq_pipeline/libpq_pipeline.c | 45 ++++++++++++++++++- .../libpq_pipeline/traces/singlerow.trace | 20 +++++++++ 3 files changed, 70 insertions(+), 7 deletions(-) diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index 8a69cb5164c..031c89336e7 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -3123,6 +3123,12 @@ pqPipelineProcessQueue(PGconn *conn) break; } + /* + * Reset single-row processing mode. (Client has to set it up for each + * query, if desired.) + */ + conn->singleRowMode = false; + /* * If there are no further commands to process in the queue, get us in * "real idle" mode now. @@ -3136,12 +3142,6 @@ pqPipelineProcessQueue(PGconn *conn) /* Initialize async result-accumulation state */ pqClearAsyncResult(conn); - /* - * Reset single-row processing mode. (Client has to set it up for each - * query, if desired.) - */ - conn->singleRowMode = false; - if (conn->pipelineStatus == PQ_PIPELINE_ABORTED && conn->cmd_queue_head->queryclass != PGQUERY_SYNC) { diff --git a/src/test/modules/libpq_pipeline/libpq_pipeline.c b/src/test/modules/libpq_pipeline/libpq_pipeline.c index ff43f4b7d0b..95e653c8c3b 100644 --- a/src/test/modules/libpq_pipeline/libpq_pipeline.c +++ b/src/test/modules/libpq_pipeline/libpq_pipeline.c @@ -1143,11 +1143,11 @@ test_singlerowmode(PGconn *conn) int i; bool pipeline_ended = false; - /* 1 pipeline, 3 queries in it */ if (PQenterPipelineMode(conn) != 1) pg_fatal("failed to enter pipeline mode: %s", PQerrorMessage(conn)); + /* One series of three commands, using single-row mode for the first two. */ for (i = 0; i < 3; i++) { char *param[1]; @@ -1239,6 +1239,49 @@ test_singlerowmode(PGconn *conn) pg_fatal("didn't get expected terminating TUPLES_OK"); } + /* + * Now issue one command, get its results in with single-row mode, then + * issue another command, and get its results in normal mode; make sure + * the single-row mode flag is reset as expected. + */ + if (PQsendQueryParams(conn, "SELECT generate_series(0, 0)", + 0, NULL, NULL, NULL, NULL, 0) != 1) + pg_fatal("failed to send query: %s", + PQerrorMessage(conn)); + if (PQsendFlushRequest(conn) != 1) + pg_fatal("failed to send flush request"); + if (PQsetSingleRowMode(conn) != 1) + pg_fatal("PQsetSingleRowMode() failed"); + res = PQgetResult(conn); + if (res == NULL) + pg_fatal("unexpected NULL"); + if (PQresultStatus(res) != PGRES_SINGLE_TUPLE) + pg_fatal("Expected PGRES_SINGLE_TUPLE, got %s", + PQresStatus(PQresultStatus(res))); + res = PQgetResult(conn); + if (res == NULL) + pg_fatal("unexpected NULL"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("Expected PGRES_TUPLES_OK, got %s", + PQresStatus(PQresultStatus(res))); + if (PQgetResult(conn) != NULL) + pg_fatal("expected NULL result"); + + if (PQsendQueryParams(conn, "SELECT 1", + 0, NULL, NULL, NULL, NULL, 0) != 1) + pg_fatal("failed to send query: %s", + PQerrorMessage(conn)); + if (PQsendFlushRequest(conn) != 1) + pg_fatal("failed to send flush request"); + res = PQgetResult(conn); + if (res == NULL) + pg_fatal("unexpected NULL"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("Expected PGRES_TUPLES_OK, got %s", + PQresStatus(PQresultStatus(res))); + if (PQgetResult(conn) != NULL) + pg_fatal("expected NULL result"); + if (PQexitPipelineMode(conn) != 1) pg_fatal("failed to end pipeline mode: %s", PQerrorMessage(conn)); diff --git a/src/test/modules/libpq_pipeline/traces/singlerow.trace b/src/test/modules/libpq_pipeline/traces/singlerow.trace index 9de99befcc1..83043e1407e 100644 --- a/src/test/modules/libpq_pipeline/traces/singlerow.trace +++ b/src/test/modules/libpq_pipeline/traces/singlerow.trace @@ -36,4 +36,24 @@ B 12 DataRow 1 2 '45' B 12 DataRow 1 2 '46' B 13 CommandComplete "SELECT 5" B 5 ReadyForQuery I +F 36 Parse "" "SELECT generate_series(0, 0)" 0 +F 14 Bind "" "" 0 0 1 0 +F 6 Describe P "" +F 9 Execute "" 0 +F 4 Flush +B 4 ParseComplete +B 4 BindComplete +B 40 RowDescription 1 "generate_series" NNNN 0 NNNN 4 -1 0 +B 11 DataRow 1 1 '0' +B 13 CommandComplete "SELECT 1" +F 16 Parse "" "SELECT 1" 0 +F 14 Bind "" "" 0 0 1 0 +F 6 Describe P "" +F 9 Execute "" 0 +F 4 Flush +B 4 ParseComplete +B 4 BindComplete +B 33 RowDescription 1 "?column?" NNNN 0 NNNN 4 -1 0 +B 11 DataRow 1 1 '1' +B 13 CommandComplete "SELECT 1" F 4 Terminate From 1f3c51b6f6c2385d2b589601c688331725715971 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 16 Oct 2022 11:47:44 -0400 Subject: [PATCH 84/95] Use libc's snprintf, not sprintf, for special cases in snprintf.c. snprintf.c has always fallen back on libc's *printf implementation when printing pointers (%p) and floats. When this code originated, we were still supporting some platforms that lacked native snprintf, so we used sprintf for that. That's not actually unsafe in our usage, but nonetheless builds on macOS are starting to complain about sprintf being unconditionally deprecated; and I wouldn't be surprised if other platforms follow suit. There seems little reason to believe that any platform supporting C99 wouldn't have standards-compliant snprintf, so let's just use that instead to suppress such warnings. Back-patch to v12, which is where we started to require C99. It's also where we started to use our snprintf.c everywhere, so this wouldn't be enough to suppress the warning in older branches anyway --- that is, in older branches these aren't necessarily all our usages of libc's sprintf. It is enough in v12+ because any deprecation annotation attached to libc's sprintf won't apply to pg_sprintf. (Whether all our usages of pg_sprintf are adequately safe is not a matter I intend to address here, but perhaps it could do with some review.) Per report from Andres Freund and local testing. Discussion: https://postgr.es/m/20221015211955.q4cwbsfkyk3c4ty3@awork3.anarazel.de --- src/port/snprintf.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/port/snprintf.c b/src/port/snprintf.c index 7c214293699..87525663907 100644 --- a/src/port/snprintf.c +++ b/src/port/snprintf.c @@ -1002,8 +1002,8 @@ fmtptr(const void *value, PrintfTarget *target) int vallen; char convert[64]; - /* we rely on regular C library's sprintf to do the basic conversion */ - vallen = sprintf(convert, "%p", value); + /* we rely on regular C library's snprintf to do the basic conversion */ + vallen = snprintf(convert, sizeof(convert), "%p", value); if (vallen < 0) target->failed = true; else @@ -1129,11 +1129,11 @@ fmtfloat(double value, char type, int forcesign, int leftjust, int padlen; /* amount to pad with spaces */ /* - * We rely on the regular C library's sprintf to do the basic conversion, + * We rely on the regular C library's snprintf to do the basic conversion, * then handle padding considerations here. * * The dynamic range of "double" is about 1E+-308 for IEEE math, and not - * too wildly more than that with other hardware. In "f" format, sprintf + * too wildly more than that with other hardware. In "f" format, snprintf * could therefore generate at most 308 characters to the left of the * decimal point; while we need to allow the precision to get as high as * 308+17 to ensure that we don't truncate significant digits from very @@ -1185,14 +1185,14 @@ fmtfloat(double value, char type, int forcesign, int leftjust, fmt[2] = '*'; fmt[3] = type; fmt[4] = '\0'; - vallen = sprintf(convert, fmt, prec, value); + vallen = snprintf(convert, sizeof(convert), fmt, prec, value); } else { fmt[0] = '%'; fmt[1] = type; fmt[2] = '\0'; - vallen = sprintf(convert, fmt, value); + vallen = snprintf(convert, sizeof(convert), fmt, value); } if (vallen < 0) goto fail; @@ -1321,7 +1321,7 @@ pg_strfromd(char *str, size_t count, int precision, double value) fmt[2] = '*'; fmt[3] = 'g'; fmt[4] = '\0'; - vallen = sprintf(convert, fmt, precision, value); + vallen = snprintf(convert, sizeof(convert), fmt, precision, value); if (vallen < 0) { target.failed = true; From b0c74233312d0e18dea492f0352b4d034bba1998 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 16 Oct 2022 15:27:04 -0400 Subject: [PATCH 85/95] Rename parser token REF to REF_P to avoid a symbol conflict. In the latest version of Apple's macOS SDK, fails to compile if "REF" is #define'd as something. Apple may or may not agree that this is a bug, and even if they do accept the bug report I filed, they probably won't fix it very quickly. In the meantime, our back branches will all fail to compile gram.y. v15 and HEAD currently escape the problem thanks to the refactoring done in 98e93a1fc, but that's purely accidental. Moreover, since that patch removed a widely-visible inclusion of , back-patching it seems too likely to break third-party code. Instead, change the token's code name to REF_P, following our usual convention for naming parser tokens that are likely to have symbol conflicts. The effects of that should be localized to the grammar and immediately surrounding files, so it seems like a safer answer. Per project policy that we want to keep recently-out-of-support branches buildable on modern systems, back-patch all the way to 9.2. Discussion: https://postgr.es/m/1803927.1665938411@sss.pgh.pa.us --- src/backend/parser/gram.y | 8 ++++---- src/include/parser/kwlist.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index a36a456d318..a58ea00f763 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -812,7 +812,7 @@ static void check_expressions_in_partition_key(PartitionSpec *spec, core_yyscan_ QUOTE - RANGE READ REAL REASSIGN RECHECK RECURSIVE REF REFERENCES REFERENCING + RANGE READ REAL REASSIGN RECHECK RECURSIVE REF_P REFERENCES REFERENCING REFRESH REINDEX RELATIVE_P RELEASE RENAME REPEATABLE REPLACE REPLICA RESET RESTART RESTRICT RETRIEVE RETURN RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROLLUP ROUTINE ROUTINES ROW ROWS RULE @@ -18424,7 +18424,7 @@ xmlexists_argument: ; xml_passing_mech: - BY REF + BY REF_P | BY VALUE_P ; @@ -19844,7 +19844,7 @@ unreserved_keyword: | REASSIGN | RECHECK | RECURSIVE - | REF + | REF_P | REFERENCING | REFRESH | REINDEX @@ -20853,7 +20853,7 @@ bare_label_keyword: | REASSIGN | RECHECK | RECURSIVE - | REF + | REF_P | REFERENCES | REFERENCING | REFRESH diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 06ab8f7d61f..5d7959af6b0 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -395,7 +395,7 @@ PG_KEYWORD("real", REAL, COL_NAME_KEYWORD, BARE_LABEL) PG_KEYWORD("reassign", REASSIGN, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("recheck", RECHECK, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("recursive", RECURSIVE, UNRESERVED_KEYWORD, BARE_LABEL) -PG_KEYWORD("ref", REF, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("ref", REF_P, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("references", REFERENCES, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("referencing", REFERENCING, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("refresh", REFRESH, UNRESERVED_KEYWORD, BARE_LABEL) From df52658a2436b63bddc7e00c8a889257e16c7054 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 16 Oct 2022 19:18:08 -0400 Subject: [PATCH 86/95] Fix EXPLAIN of SEARCH BREADTH FIRST with a constant initial value. If the non-recursive term of a SEARCH BREADTH FIRST recursive query has only constants in its target list, the planner will fold the starting RowExpr added by rewrite into a simple Const of type RECORD. The executor doesn't have any problem with that --- but EXPLAIN VERBOSE will encounter the Const as the ultimate source of truth about what the field names of the SET column are, and it didn't know what to do with that. Fortunately, we can pull the identifying typmod out of the Const, in much the same way that record_out would. For reasons that remain a bit obscure to me, this only fails with SEARCH BREADTH FIRST, not SEARCH DEPTH FIRST or CYCLE. But I added regression test cases for both of those options too, just to make sure we don't break it in future. Per bug #17644 from Matthijs van der Vleuten. Back-patch to v14 where these constructs were added. Discussion: https://postgr.es/m/17644-3bd1f3036d6d7a16@postgresql.org --- src/backend/utils/adt/ruleutils.c | 3 +- src/backend/utils/fmgr/funcapi.c | 34 ++++++++ src/test/regress/expected/with.out | 120 +++++++++++++++++++++++++++++ src/test/regress/sql/with.sql | 52 +++++++++++++ 4 files changed, 208 insertions(+), 1 deletion(-) diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index ea8156bebad..c6a3dd40166 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -7508,7 +7508,8 @@ get_name_for_var_field(Var *var, int fieldno, /* * If it's a RowExpr that was expanded from a whole-row Var, use the - * column names attached to it. + * column names attached to it. (We could let get_expr_result_tupdesc() + * handle this, but it's much cheaper to just pull out the name we need.) */ if (IsA(var, RowExpr)) { diff --git a/src/backend/utils/fmgr/funcapi.c b/src/backend/utils/fmgr/funcapi.c index 487a46c30f7..eda366ceb86 100644 --- a/src/backend/utils/fmgr/funcapi.c +++ b/src/backend/utils/fmgr/funcapi.c @@ -338,6 +338,40 @@ get_expr_result_type(Node *expr, *resultTupleDesc = BlessTupleDesc(tupdesc); return TYPEFUNC_COMPOSITE; } + else if (expr && IsA(expr, Const) && + ((Const *) expr)->consttype == RECORDOID && + !((Const *) expr)->constisnull) + { + /* + * When EXPLAIN'ing some queries with SEARCH/CYCLE clauses, we may + * need to resolve field names of a RECORD-type Const. The datum + * should contain a typmod that will tell us that. + */ + HeapTupleHeader rec; + Oid tupType; + int32 tupTypmod; + + rec = DatumGetHeapTupleHeader(((Const *) expr)->constvalue); + tupType = HeapTupleHeaderGetTypeId(rec); + tupTypmod = HeapTupleHeaderGetTypMod(rec); + if (resultTypeId) + *resultTypeId = tupType; + if (tupType != RECORDOID || tupTypmod >= 0) + { + /* Should be able to look it up */ + if (resultTupleDesc) + *resultTupleDesc = lookup_rowtype_tupdesc_copy(tupType, + tupTypmod); + return TYPEFUNC_COMPOSITE; + } + else + { + /* This shouldn't really happen ... */ + if (resultTupleDesc) + *resultTupleDesc = NULL; + return TYPEFUNC_RECORD; + } + } else { /* handle as a generic expression; no chance to resolve RECORD */ diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out index 6c643fec8d4..6ded94e998a 100644 --- a/src/test/regress/expected/with.out +++ b/src/test/regress/expected/with.out @@ -792,6 +792,83 @@ select * from search_graph order by seq; 4 | 5 | arc 4 -> 5 | (1,4,5) (7 rows) +-- a constant initial value causes issues for EXPLAIN +explain (verbose, costs off) +with recursive test as ( + select 1 as x + union all + select x + 1 + from test +) search depth first by x set y +select * from test limit 5; + QUERY PLAN +----------------------------------------------------------------------------------------- + Limit + Output: test.x, test.y + CTE test + -> Recursive Union + -> Result + Output: 1, '{(1)}'::record[] + -> WorkTable Scan on test test_1 + Output: (test_1.x + 1), array_cat(test_1.y, ARRAY[ROW((test_1.x + 1))]) + -> CTE Scan on test + Output: test.x, test.y +(10 rows) + +with recursive test as ( + select 1 as x + union all + select x + 1 + from test +) search depth first by x set y +select * from test limit 5; + x | y +---+----------------------- + 1 | {(1)} + 2 | {(1),(2)} + 3 | {(1),(2),(3)} + 4 | {(1),(2),(3),(4)} + 5 | {(1),(2),(3),(4),(5)} +(5 rows) + +explain (verbose, costs off) +with recursive test as ( + select 1 as x + union all + select x + 1 + from test +) search breadth first by x set y +select * from test limit 5; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Limit + Output: test.x, test.y + CTE test + -> Recursive Union + -> Result + Output: 1, '(0,1)'::record + -> WorkTable Scan on test test_1 + Output: (test_1.x + 1), ROW(int8inc((test_1.y)."*DEPTH*"), (test_1.x + 1)) + -> CTE Scan on test + Output: test.x, test.y +(10 rows) + +with recursive test as ( + select 1 as x + union all + select x + 1 + from test +) search breadth first by x set y +select * from test limit 5; + x | y +---+------- + 1 | (0,1) + 2 | (1,2) + 3 | (2,3) + 4 | (3,4) + 5 | (4,5) +(5 rows) + -- various syntax errors with recursive search_graph(f, t, label) as ( select * from graph0 g @@ -1103,6 +1180,49 @@ select * from search_graph; 2 | 3 | arc 2 -> 3 | N | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"} (25 rows) +explain (verbose, costs off) +with recursive test as ( + select 0 as x + union all + select (x + 1) % 10 + from test +) cycle x set is_cycle using path +select * from test; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + CTE Scan on test + Output: test.x, test.is_cycle, test.path + CTE test + -> Recursive Union + -> Result + Output: 0, false, '{(0)}'::record[] + -> WorkTable Scan on test test_1 + Output: ((test_1.x + 1) % 10), CASE WHEN (ROW(((test_1.x + 1) % 10)) = ANY (test_1.path)) THEN true ELSE false END, array_cat(test_1.path, ARRAY[ROW(((test_1.x + 1) % 10))]) + Filter: (NOT test_1.is_cycle) +(9 rows) + +with recursive test as ( + select 0 as x + union all + select (x + 1) % 10 + from test +) cycle x set is_cycle using path +select * from test; + x | is_cycle | path +---+----------+----------------------------------------------- + 0 | f | {(0)} + 1 | f | {(0),(1)} + 2 | f | {(0),(1),(2)} + 3 | f | {(0),(1),(2),(3)} + 4 | f | {(0),(1),(2),(3),(4)} + 5 | f | {(0),(1),(2),(3),(4),(5)} + 6 | f | {(0),(1),(2),(3),(4),(5),(6)} + 7 | f | {(0),(1),(2),(3),(4),(5),(6),(7)} + 8 | f | {(0),(1),(2),(3),(4),(5),(6),(7),(8)} + 9 | f | {(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)} + 0 | t | {(0),(1),(2),(3),(4),(5),(6),(7),(8),(9),(0)} +(11 rows) + -- multiple CTEs with recursive graph(f, t, label) as ( diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql index 0b3da6cd98c..4dd6dc86b5e 100644 --- a/src/test/regress/sql/with.sql +++ b/src/test/regress/sql/with.sql @@ -422,6 +422,41 @@ with recursive search_graph(f, t, label) as ( ) search breadth first by f, t set seq select * from search_graph order by seq; +-- a constant initial value causes issues for EXPLAIN +explain (verbose, costs off) +with recursive test as ( + select 1 as x + union all + select x + 1 + from test +) search depth first by x set y +select * from test limit 5; + +with recursive test as ( + select 1 as x + union all + select x + 1 + from test +) search depth first by x set y +select * from test limit 5; + +explain (verbose, costs off) +with recursive test as ( + select 1 as x + union all + select x + 1 + from test +) search breadth first by x set y +select * from test limit 5; + +with recursive test as ( + select 1 as x + union all + select x + 1 + from test +) search breadth first by x set y +select * from test limit 5; + -- various syntax errors with recursive search_graph(f, t, label) as ( select * from graph0 g @@ -561,6 +596,23 @@ with recursive search_graph(f, t, label) as ( ) cycle f, t set is_cycle to 'Y' default 'N' using path select * from search_graph; +explain (verbose, costs off) +with recursive test as ( + select 0 as x + union all + select (x + 1) % 10 + from test +) cycle x set is_cycle using path +select * from test; + +with recursive test as ( + select 0 as x + union all + select (x + 1) % 10 + from test +) cycle x set is_cycle using path +select * from test; + -- multiple CTEs with recursive graph(f, t, label) as ( From f96cd98c1a73ab76e8898bcd88464eeba30a04de Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 17 Oct 2022 11:35:23 -0400 Subject: [PATCH 87/95] Guard against table-AM-less relations in planner. The executor will dump core if it's asked to execute a seqscan on a relation having no table AM, such as a view. While that shouldn't really happen, it's possible to get there via catalog corruption, such as a missing ON SELECT rule. It seems worth installing a defense against that. There are multiple plausible places for such a defense, but I picked the planner's get_relation_info(). Per discussion of bug #17646 from Kui Liu. Back-patch to v12 where the tableam APIs were introduced; in older versions you won't get a SIGSEGV, so it seems less pressing. Discussion: https://postgr.es/m/17646-70c93cfa40365776@postgresql.org --- src/backend/optimizer/util/plancat.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 5417db71431..46d85f3c324 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -136,6 +136,22 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, */ relation = table_open(relationObjectId, NoLock); + /* + * Relations without a table AM can be used in a query only if they are of + * special-cased relkinds. This check prevents us from crashing later if, + * for example, a view's ON SELECT rule has gone missing. Note that + * table_open() already rejected indexes and composite types. + */ + if (!relation->rd_tableam) + { + if (!(relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE || + relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot open relation \"%s\"", + RelationGetRelationName(relation)))); + } + /* Temporary and unlogged relations are inaccessible during recovery. */ if (!RelationIsPermanent(relation) && RecoveryInProgress()) ereport(ERROR, From ef24a36e32907085a07cb3632c003a60745eb0a8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 17 Oct 2022 12:14:39 -0400 Subject: [PATCH 88/95] Reject non-ON-SELECT rules that are named "_RETURN". DefineQueryRewrite() has long required that ON SELECT rules be named "_RETURN". But we overlooked the converse case: we should forbid non-ON-SELECT rules that are named "_RETURN". In particular this prevents using CREATE OR REPLACE RULE to overwrite a view's _RETURN rule with some other kind of rule, thereby breaking the view. Per bug #17646 from Kui Liu. Back-patch to all supported branches. Discussion: https://postgr.es/m/17646-70c93cfa40365776@postgresql.org --- src/backend/rewrite/rewriteDefine.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index 19973c195da..969d4968b1a 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -578,6 +578,18 @@ DefineQueryRewrite(const char *rulename, RelationGetDescr(event_relation), false, false); } + + /* + * And finally, if it's not an ON SELECT rule then it must *not* be + * named _RETURN. This prevents accidentally or maliciously replacing + * a view's ON SELECT rule with some other kind of rule. + */ + if (strcmp(rulename, ViewSelectRuleName) == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("non-view rule for \"%s\" must not be named \"%s\"", + RelationGetRelationName(event_relation), + ViewSelectRuleName))); } /* From 71363969b75ef6177c6b70ba53873687ca70a666 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Mon, 17 Oct 2022 15:07:03 -0400 Subject: [PATCH 89/95] doc: warn pg_stat_reset() can cause vacuum/analyze problems The fix is to run ANALYZE. Discussion: https://postgr.es/m/YzRr+ys98UzVQJvK@momjian.us, https://postgr.es/m/flat/CAKJS1f8DTbCHf9gedU0He6ARsd58E6qOhEHM1caomqj_r9MOiQ%40mail.gmail.com, https://postgr.es/m/CAKJS1f80o98hcfSk8j%3DfdN09S7Sjz%2BvuzhEwbyQqvHJb_sZw0g%40mail.gmail.com Backpatch-through: 10 --- doc/src/sgml/monitoring.sgml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index ef795a752bf..ce6c34d6ea1 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -5193,6 +5193,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i + + + Using pg_stat_reset() also resets counters that + autovacuum uses to determine when to trigger a vacuum or an analyze. + Resetting these counters can cause autovacuum to not perform necessary + work, which can cause problems such as table bloat or out-dated + table statistics. A database-wide ANALYZE is + recommended after the statistics have been reset. + + + pg_stat_get_activity, the underlying function of the pg_stat_activity view, returns a set of records From ba080038dbd1e51da63f60fcdf71fb5ea58bfc4e Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Mon, 17 Oct 2022 15:21:29 -0400 Subject: [PATCH 90/95] doc: move the mention of aggregate JSON functions up in section It was previously easily overlooked at the end of several tables. Reported-by: Alex Denman Discussion: https://postgr.es/m/166335888474.659.16897487975376230364@wrigleys.postgresql.org Backpatch-through: 10 --- doc/src/sgml/func.sgml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 9d0714bf935..7960cc2a5aa 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -14728,6 +14728,12 @@ table2-mapping jsonb, though not for json. The comparison operators follow the ordering rules for B-tree operations outlined in . + See also for the aggregate + function json_agg which aggregates record + values as JSON, the aggregate function + json_object_agg which aggregates pairs of values + into a JSON object, and their jsonb equivalents, + jsonb_agg and jsonb_object_agg. @@ -16183,15 +16189,6 @@ table2-mapping
- - - See also for the aggregate - function json_agg which aggregates record - values as JSON, the aggregate function - json_object_agg which aggregates pairs of values - into a JSON object, and their jsonb equivalents, - jsonb_agg and jsonb_object_agg. - From af0af1569efc37ce84e6bcafa23457dfd9c590f5 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Wed, 19 Oct 2022 22:32:14 +1300 Subject: [PATCH 91/95] Track LLVM 15 changes. Per https://llvm.org/docs/OpaquePointers.html, support for non-opaque pointers still exists and we can request that on our context. We have until LLVM 16 to move to opaque pointers, a much larger change. Back-patch to 11, where LLVM support arrived. Author: Thomas Munro Author: Andres Freund Discussion: https://postgr.es/m/CAMHz58Sf_xncdyqsekoVsNeKcruKootLtVH6cYXVhhUR1oKPCg%40mail.gmail.com --- configure | 89 +++++++++++++++++++++++++ configure.ac | 3 + src/backend/jit/llvm/llvmjit.c | 18 +++++ src/backend/jit/llvm/llvmjit_inline.cpp | 1 + 4 files changed, 111 insertions(+) diff --git a/configure b/configure index 3592340e7c9..b61f7eda7b6 100755 --- a/configure +++ b/configure @@ -7739,6 +7739,95 @@ if test x"$pgac_cv_prog_CLANGXX_cxxflags__fexcess_precision_standard" = x"yes"; fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CLANG} supports -Xclang -no-opaque-pointers, for BITCODE_CFLAGS" >&5 +$as_echo_n "checking whether ${CLANG} supports -Xclang -no-opaque-pointers, for BITCODE_CFLAGS... " >&6; } +if ${pgac_cv_prog_CLANG_cflags__Xclang__no_opaque_pointers+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +pgac_save_CC=$CC +CC=${CLANG} +CFLAGS="${BITCODE_CFLAGS} -Xclang -no-opaque-pointers" +ac_save_c_werror_flag=$ac_c_werror_flag +ac_c_werror_flag=yes +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv_prog_CLANG_cflags__Xclang__no_opaque_pointers=yes +else + pgac_cv_prog_CLANG_cflags__Xclang__no_opaque_pointers=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_c_werror_flag=$ac_save_c_werror_flag +CFLAGS="$pgac_save_CFLAGS" +CC="$pgac_save_CC" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CLANG_cflags__Xclang__no_opaque_pointers" >&5 +$as_echo "$pgac_cv_prog_CLANG_cflags__Xclang__no_opaque_pointers" >&6; } +if test x"$pgac_cv_prog_CLANG_cflags__Xclang__no_opaque_pointers" = x"yes"; then + BITCODE_CFLAGS="${BITCODE_CFLAGS} -Xclang -no-opaque-pointers" +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CLANGXX} supports -Xclang -no-opaque-pointers, for BITCODE_CXXFLAGS" >&5 +$as_echo_n "checking whether ${CLANGXX} supports -Xclang -no-opaque-pointers, for BITCODE_CXXFLAGS... " >&6; } +if ${pgac_cv_prog_CLANGXX_cxxflags__Xclang__no_opaque_pointers+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CXXFLAGS=$CXXFLAGS +pgac_save_CXX=$CXX +CXX=${CLANGXX} +CXXFLAGS="${BITCODE_CXXFLAGS} -Xclang -no-opaque-pointers" +ac_save_cxx_werror_flag=$ac_cxx_werror_flag +ac_cxx_werror_flag=yes +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + pgac_cv_prog_CLANGXX_cxxflags__Xclang__no_opaque_pointers=yes +else + pgac_cv_prog_CLANGXX_cxxflags__Xclang__no_opaque_pointers=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_cxx_werror_flag=$ac_save_cxx_werror_flag +CXXFLAGS="$pgac_save_CXXFLAGS" +CXX="$pgac_save_CXX" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CLANGXX_cxxflags__Xclang__no_opaque_pointers" >&5 +$as_echo "$pgac_cv_prog_CLANGXX_cxxflags__Xclang__no_opaque_pointers" >&6; } +if test x"$pgac_cv_prog_CLANGXX_cxxflags__Xclang__no_opaque_pointers" = x"yes"; then + BITCODE_CXXFLAGS="${BITCODE_CXXFLAGS} -Xclang -no-opaque-pointers" +fi + + NOT_THE_CFLAGS="" { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CLANG} supports -Wunused-command-line-argument, for NOT_THE_CFLAGS" >&5 $as_echo_n "checking whether ${CLANG} supports -Wunused-command-line-argument, for NOT_THE_CFLAGS... " >&6; } diff --git a/configure.ac b/configure.ac index 46ab6f50d21..d2ec14cf926 100644 --- a/configure.ac +++ b/configure.ac @@ -672,6 +672,9 @@ if test "$with_llvm" = yes ; then PGAC_PROG_VARCC_VARFLAGS_OPT(CLANG, BITCODE_CFLAGS, [-fexcess-precision=standard]) PGAC_PROG_VARCXX_VARFLAGS_OPT(CLANGXX, BITCODE_CXXFLAGS, [-fexcess-precision=standard]) + PGAC_PROG_VARCC_VARFLAGS_OPT(CLANG, BITCODE_CFLAGS, [-Xclang -no-opaque-pointers]) + PGAC_PROG_VARCXX_VARFLAGS_OPT(CLANGXX, BITCODE_CXXFLAGS, [-Xclang -no-opaque-pointers]) + NOT_THE_CFLAGS="" PGAC_PROG_VARCC_VARFLAGS_OPT(CLANG, NOT_THE_CFLAGS, [-Wunused-command-line-argument]) if test -n "$NOT_THE_CFLAGS"; then diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c index fb294495737..199fff4f773 100644 --- a/src/backend/jit/llvm/llvmjit.c +++ b/src/backend/jit/llvm/llvmjit.c @@ -798,6 +798,16 @@ llvm_session_initialize(void) LLVMInitializeNativeAsmPrinter(); LLVMInitializeNativeAsmParser(); + /* + * When targeting an LLVM version with opaque pointers enabled by + * default, turn them off for the context we build our code in. We don't + * need to do so for other contexts (e.g. llvm_ts_context). Once the IR is + * generated, it carries the necessary information. + */ +#if LLVM_VERSION_MAJOR > 14 + LLVMContextSetOpaquePointers(LLVMGetGlobalContext(), false); +#endif + /* * Synchronize types early, as that also includes inferring the target * triple. @@ -1112,7 +1122,11 @@ llvm_resolve_symbols(LLVMOrcDefinitionGeneratorRef GeneratorObj, void *Ctx, LLVMOrcJITDylibRef JD, LLVMOrcJITDylibLookupFlags JDLookupFlags, LLVMOrcCLookupSet LookupSet, size_t LookupSetSize) { +#if LLVM_VERSION_MAJOR > 14 + LLVMOrcCSymbolMapPairs symbols = palloc0(sizeof(LLVMOrcCSymbolMapPair) * LookupSetSize); +#else LLVMOrcCSymbolMapPairs symbols = palloc0(sizeof(LLVMJITCSymbolMapPair) * LookupSetSize); +#endif LLVMErrorRef error; LLVMOrcMaterializationUnitRef mu; @@ -1230,7 +1244,11 @@ llvm_create_jit_instance(LLVMTargetMachineRef tm) * Symbol resolution support for "special" functions, e.g. a call into an * SQL callable function. */ +#if LLVM_VERSION_MAJOR > 14 + ref_gen = LLVMOrcCreateCustomCAPIDefinitionGenerator(llvm_resolve_symbols, NULL, NULL); +#else ref_gen = LLVMOrcCreateCustomCAPIDefinitionGenerator(llvm_resolve_symbols, NULL); +#endif LLVMOrcJITDylibAddGenerator(LLVMOrcLLJITGetMainJITDylib(lljit), ref_gen); return lljit; diff --git a/src/backend/jit/llvm/llvmjit_inline.cpp b/src/backend/jit/llvm/llvmjit_inline.cpp index 9bb4b672a73..774d9e8b66d 100644 --- a/src/backend/jit/llvm/llvmjit_inline.cpp +++ b/src/backend/jit/llvm/llvmjit_inline.cpp @@ -62,6 +62,7 @@ extern "C" #include #include #include +#include /* From 9944b6e139cd8d900aaa73fab3d1d8b81c1c5b3e Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 20 Oct 2022 09:34:18 +0530 Subject: [PATCH 92/95] Fix assertion failures while processing NEW_CID record in logical decoding. When the logical decoding restarts from NEW_CID, since there is no association between the top transaction and its subtransaction, both are created as top transactions and have the same LSN. This caused the assertion failure in AssertTXNLsnOrder(). This patch skips the assertion check until we reach the LSN at which we start decoding the contents of the transaction, specifically start_decoding_at LSN in SnapBuild. This is okay because we don't guarantee to make the association between top transaction and subtransaction until we try to decode the actual contents of transaction. The ordering of the records prior to the start_decoding_at LSN should have been checked before the restart. The other assertion failure is due to the reason that we forgot to track that we have considered top-level transaction id in the list of catalog changing transactions that were committed when one of its subtransactions is marked as containing catalog change. Reported-by: Tomas Vondra, Osumi Takamichi Author: Masahiko Sawada, Kuroda Hayato Reviewed-by: Amit Kapila, Dilip Kumar, Kuroda Hayato, Kyotaro Horiguchi, Masahiko Sawada Backpatch-through: 10 Discussion: https://postgr.es/m/a89b46b6-0239-2fd5-71a9-b19b1f7a7145%40enterprisedb.com Discussion: https://postgr.es/m/TYCPR01MB83733C6CEAE47D0280814D5AED7A9%40TYCPR01MB8373.jpnprd01.prod.outlook.com --- .../expected/catalog_change_snapshot.out | 45 +++++++++++++++++++ .../specs/catalog_change_snapshot.spec | 16 +++++++ .../replication/logical/reorderbuffer.c | 14 ++++++ src/backend/replication/logical/snapbuild.c | 3 ++ 4 files changed, 78 insertions(+) diff --git a/contrib/test_decoding/expected/catalog_change_snapshot.out b/contrib/test_decoding/expected/catalog_change_snapshot.out index dc4f9b7018f..1d75cf5af02 100644 --- a/contrib/test_decoding/expected/catalog_change_snapshot.out +++ b/contrib/test_decoding/expected/catalog_change_snapshot.out @@ -42,3 +42,48 @@ COMMIT stop (1 row) + +starting permutation: s0_init s0_begin s0_savepoint s0_insert s1_checkpoint s1_get_changes s0_insert2 s0_commit s0_begin s0_insert s1_checkpoint s1_get_changes s0_commit s1_get_changes +step s0_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? +-------- +init +(1 row) + +step s0_begin: BEGIN; +step s0_savepoint: SAVEPOINT sp1; +step s0_insert: INSERT INTO tbl1 VALUES (1); +step s1_checkpoint: CHECKPOINT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data +---- +(0 rows) + +step s0_insert2: INSERT INTO user_cat VALUES (1); +step s0_commit: COMMIT; +step s0_begin: BEGIN; +step s0_insert: INSERT INTO tbl1 VALUES (1); +step s1_checkpoint: CHECKPOINT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data +------------------------------------------------------------- +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:null +table public.user_cat: INSERT: val1[integer]:1 +COMMIT +(4 rows) + +step s0_commit: COMMIT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data +------------------------------------------------------------- +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:null +COMMIT +(3 rows) + +?column? +-------- +stop +(1 row) + diff --git a/contrib/test_decoding/specs/catalog_change_snapshot.spec b/contrib/test_decoding/specs/catalog_change_snapshot.spec index 2971ddc69cb..2ad1edeaa87 100644 --- a/contrib/test_decoding/specs/catalog_change_snapshot.spec +++ b/contrib/test_decoding/specs/catalog_change_snapshot.spec @@ -4,11 +4,13 @@ setup { DROP TABLE IF EXISTS tbl1; CREATE TABLE tbl1 (val1 integer, val2 integer); + CREATE TABLE user_cat (val1 integer) WITH (user_catalog_table = true); } teardown { DROP TABLE tbl1; + DROP TABLE user_cat; SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot'); } @@ -19,6 +21,7 @@ step "s0_begin" { BEGIN; } step "s0_savepoint" { SAVEPOINT sp1; } step "s0_truncate" { TRUNCATE tbl1; } step "s0_insert" { INSERT INTO tbl1 VALUES (1); } +step "s0_insert2" { INSERT INTO user_cat VALUES (1); } step "s0_commit" { COMMIT; } session "s1" @@ -37,3 +40,16 @@ step "s1_get_changes" { SELECT data FROM pg_logical_slot_get_changes('isolation_ # record written by bgwriter. One might think we can either stop the bgwriter or # increase LOG_SNAPSHOT_INTERVAL_MS but it's not practical via tests. permutation "s0_init" "s0_begin" "s0_savepoint" "s0_truncate" "s1_checkpoint" "s1_get_changes" "s0_commit" "s0_begin" "s0_insert" "s1_checkpoint" "s1_get_changes" "s0_commit" "s1_get_changes" + +# Test that we can handle the case where there is no association between top-level +# transaction and its subtransactions. The last decoding restarts from the first +# checkpoint, decodes NEW_CID generated by "s0_insert2", and marks the subtransaction +# as containing catalog changes while adding tuple cids to its top-level transaction. +# During that, both transaction entries are created in ReorderBuffer as top-level +# transactions and have the same LSN. We check if the assertion check for the order +# of transaction LSNs in AssertTXNLsnOrder() is skipped since we are still before the +# LSN at which we start replaying the contents of transactions. Besides, when decoding +# the commit record of the top-level transaction, we must force the top-level +# transaction to do timetravel since one of its subtransactions has been marked as +# containing catalog changes. +permutation "s0_init" "s0_begin" "s0_savepoint" "s0_insert" "s1_checkpoint" "s1_get_changes" "s0_insert2" "s0_commit" "s0_begin" "s0_insert" "s1_checkpoint" "s1_get_changes" "s0_commit" "s1_get_changes" diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 3194e418511..d55782e0106 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -871,10 +871,24 @@ static void AssertTXNLsnOrder(ReorderBuffer *rb) { #ifdef USE_ASSERT_CHECKING + LogicalDecodingContext *ctx = rb->private_data; dlist_iter iter; XLogRecPtr prev_first_lsn = InvalidXLogRecPtr; XLogRecPtr prev_base_snap_lsn = InvalidXLogRecPtr; + /* + * Skip the verification if we don't reach the LSN at which we start + * decoding the contents of transactions yet because until we reach the + * LSN, we could have transactions that don't have the association between + * the top-level transaction and subtransaction yet and consequently have + * the same LSN. We don't guarantee this association until we try to + * decode the actual contents of transaction. The ordering of the records + * prior to the start_decoding_at LSN should have been checked before the + * restart. + */ + if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, ctx->reader->EndRecPtr)) + return; + dlist_foreach(iter, &rb->toplevel_by_lsn) { ReorderBufferTXN *cur_txn = dlist_container(ReorderBufferTXN, node, diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 9788711bf6b..b96f40c27b2 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -1092,6 +1092,9 @@ SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, else if (sub_needs_timetravel) { /* track toplevel txn as well, subxact alone isn't meaningful */ + elog(DEBUG2, "forced transaction %u to do timetravel due to one of its subtransactions", + xid); + needs_timetravel = true; SnapBuildAddCommittedTxn(builder, xid); } else if (needs_timetravel) From fdd6da4b2aeda736c62af11b280b6cf3fc557406 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 21 Oct 2022 09:30:27 +1300 Subject: [PATCH 93/95] Doc: fix outdated wording about parallel seq scans 56788d215 adjusted the parallel seq scan code so that instead of handing out a single block at a time to parallel workers, it now hands out ranges of blocks. Here we update the documentation which still claimed that workers received just 1 block at a time. Reported-by: Zhang Mingli Discussion: https://postgr.es/m/17c99615-2c3b-4e4e-9d0b-424a66a7bccd@Spark Backpatch-through: 14, where 56788d215 was added. --- doc/src/sgml/parallel.sgml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml index 13479d7e5e3..3e8326d46c8 100644 --- a/doc/src/sgml/parallel.sgml +++ b/doc/src/sgml/parallel.sgml @@ -272,8 +272,9 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%'; In a parallel sequential scan, the table's blocks will - be divided among the cooperating processes. Blocks are handed out one - at a time, so that access to the table remains sequential. + be divided into ranges and shared among the cooperating processes. Each + worker process will complete the scanning of its given range of blocks before + requesting an additional range of blocks. From dcaf16fbd647c4e83ca1fb719790d3cc98604625 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Fri, 21 Oct 2022 09:52:44 +0530 Subject: [PATCH 94/95] Fix executing invalidation messages generated by subtransactions during decoding. This problem has been introduced by commit 272248a0c1 where we started assigning the subtransactions to the top-level transaction when we mark both the top-level transaction and its subtransactions as containing catalog changes. After we assign subtransactions to the top-level transaction, we were not allowed to execute any invalidations associated with it when we decide to skip the transaction. The reason to assign the subtransactions to the top-level transaction was to avoid the assertion failure in AssertTXNLsnOrder() as they have the same LSN when we sometimes start accumulating transaction changes for partial transactions after the restart. Now that with commit 64ff0fe4e8, we skip this assertion check until we reach the LSN at which we start decoding the contents of the transaction, so, there is no reason for such an assignment anymore. The assignment change was introduced in 15 and prior versions but this bug doesn't exist in branches prior to 14 since we don't add invalidation messages to subtransactions. We decided to backpatch through 11 for consistency but not for 10 since its final release is near. Reported-by: Kuroda Hayato Author: Masahiko Sawada Reviewed-by: Amit Kapila Backpatch-through: 11 Discussion: https://postgr.es/m/TYAPR01MB58660803BCAA7849C8584AA4F57E9%40TYAPR01MB5866.jpnprd01.prod.outlook.com Discussion: https://postgr.es/m/a89b46b6-0239-2fd5-71a9-b19b1f7a7145%40enterprisedb.com --- .../expected/catalog_change_snapshot.out | 45 +++++++++++++++++++ .../specs/catalog_change_snapshot.spec | 7 +++ src/backend/replication/logical/snapbuild.c | 3 -- 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/contrib/test_decoding/expected/catalog_change_snapshot.out b/contrib/test_decoding/expected/catalog_change_snapshot.out index 1d75cf5af02..b33e49c0b1c 100644 --- a/contrib/test_decoding/expected/catalog_change_snapshot.out +++ b/contrib/test_decoding/expected/catalog_change_snapshot.out @@ -87,3 +87,48 @@ COMMIT stop (1 row) + +starting permutation: s0_init s0_begin s0_savepoint s0_insert s1_checkpoint s1_get_changes s0_truncate s0_commit s0_begin s0_insert s1_checkpoint s1_get_changes s0_commit s1_get_changes +step s0_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? +-------- +init +(1 row) + +step s0_begin: BEGIN; +step s0_savepoint: SAVEPOINT sp1; +step s0_insert: INSERT INTO tbl1 VALUES (1); +step s1_checkpoint: CHECKPOINT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data +---- +(0 rows) + +step s0_truncate: TRUNCATE tbl1; +step s0_commit: COMMIT; +step s0_begin: BEGIN; +step s0_insert: INSERT INTO tbl1 VALUES (1); +step s1_checkpoint: CHECKPOINT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data +------------------------------------------------------------- +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:null +table public.tbl1: TRUNCATE: (no-flags) +COMMIT +(4 rows) + +step s0_commit: COMMIT; +step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); +data +------------------------------------------------------------- +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:null +COMMIT +(3 rows) + +?column? +-------- +stop +(1 row) + diff --git a/contrib/test_decoding/specs/catalog_change_snapshot.spec b/contrib/test_decoding/specs/catalog_change_snapshot.spec index 2ad1edeaa87..770dbd642d6 100644 --- a/contrib/test_decoding/specs/catalog_change_snapshot.spec +++ b/contrib/test_decoding/specs/catalog_change_snapshot.spec @@ -53,3 +53,10 @@ permutation "s0_init" "s0_begin" "s0_savepoint" "s0_truncate" "s1_checkpoint" "s # transaction to do timetravel since one of its subtransactions has been marked as # containing catalog changes. permutation "s0_init" "s0_begin" "s0_savepoint" "s0_insert" "s1_checkpoint" "s1_get_changes" "s0_insert2" "s0_commit" "s0_begin" "s0_insert" "s1_checkpoint" "s1_get_changes" "s0_commit" "s1_get_changes" + +# The last decoding restarts from the first checkpoint and adds invalidation +# messages generated by "s0_truncate" to the subtransaction. While +# processing the commit record for the top-level transaction, we decide +# to skip this xact but ensure that corresponding invalidation messages +# get processed. +permutation "s0_init" "s0_begin" "s0_savepoint" "s0_insert" "s1_checkpoint" "s1_get_changes" "s0_truncate" "s0_commit" "s0_begin" "s0_insert" "s1_checkpoint" "s1_get_changes" "s0_commit" "s1_get_changes" diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index b96f40c27b2..50df199f01f 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -2112,9 +2112,6 @@ SnapBuildXidSetCatalogChanges(SnapBuild *builder, TransactionId xid, int subxcnt ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn); for (int i = 0; i < subxcnt; i++) - { - ReorderBufferAssignChild(builder->reorder, xid, subxacts[i], lsn); ReorderBufferXidSetCatalogChanges(builder->reorder, subxacts[i], lsn); - } } } From 398b6fb8dff2310f7eb78b8da11525fcda4c8ec6 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Fri, 21 Oct 2022 12:33:47 +0530 Subject: [PATCH 95/95] Add CHECK_FOR_INTERRUPTS while restoring changes during decoding. Previously in commit 42681dffaf, we added CFI during decoding changes but missed another similar case that can happen while restoring changes spilled to disk back into memory in a loop. Reported-by: Robert Haas Author: Amit Kapila Backpatch-through: 10 Discussion: https://postgr.es/m/CA+TgmoaLObg0QbstbC8ykDwOdD1bDkr4AbPpB=0DPgA2JW0mFg@mail.gmail.com --- src/backend/replication/logical/reorderbuffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index d55782e0106..271a74c6908 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -4103,6 +4103,8 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn, int readBytes; ReorderBufferDiskChange *ondisk; + CHECK_FOR_INTERRUPTS(); + if (*fd == -1) { char path[MAXPGPATH];