From a826073fd8508bc08809a53f1faa5b7c786b057b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 31 Jul 2022 13:43:17 -0400 Subject: [PATCH 01/12] Fix trim_array() for zero-dimensional array argument. The code tried to access ARR_DIMS(v)[0] and ARR_LBOUND(v)[0] whether or not those values exist. This made the range check on the "n" argument unstable --- it might or might not fail, and if it did it would report garbage for the allowed upper limit. These bogus accesses would probably annoy Valgrind, and if you were very unlucky even lead to SIGSEGV. Report and fix by Martin Kalcher. Back-patch to v14 where this function was added. Discussion: https://postgr.es/m/baaeb413-b8a8-4656-5757-ef347e5ec11f@aboutsource.net --- src/backend/utils/adt/arrayfuncs.c | 9 ++++++--- src/test/regress/expected/arrays.out | 2 ++ src/test/regress/sql/arrays.sql | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 26cd7458961..6d00be72b16 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -6802,7 +6802,7 @@ trim_array(PG_FUNCTION_ARGS) { ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); int n = PG_GETARG_INT32(1); - int array_length = ARR_DIMS(v)[0]; + int array_length = (ARR_NDIM(v) > 0) ? ARR_DIMS(v)[0] : 0; int16 elmlen; bool elmbyval; char elmalign; @@ -6822,8 +6822,11 @@ trim_array(PG_FUNCTION_ARGS) /* Set all the bounds as unprovided except the first upper bound */ memset(lowerProvided, false, sizeof(lowerProvided)); memset(upperProvided, false, sizeof(upperProvided)); - upper[0] = ARR_LBOUND(v)[0] + array_length - n - 1; - upperProvided[0] = true; + if (ARR_NDIM(v) > 0) + { + upper[0] = ARR_LBOUND(v)[0] + array_length - n - 1; + upperProvided[0] = true; + } /* Fetch the needed information about the element type */ get_typlenbyvalalign(ARR_ELEMTYPE(v), &elmlen, &elmbyval, &elmalign); diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index a8686405408..d6a8f8f8545 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -2586,3 +2586,5 @@ SELECT trim_array(ARRAY[1, 2, 3], -1); -- fail ERROR: number of elements to trim must be between 0 and 3 SELECT trim_array(ARRAY[1, 2, 3], 10); -- fail ERROR: number of elements to trim must be between 0 and 3 +SELECT trim_array(ARRAY[]::int[], 1); -- fail +ERROR: number of elements to trim must be between 0 and 0 diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index d10278e32f0..cefe5b05b3b 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -875,3 +875,4 @@ FROM SELECT trim_array(ARRAY[1, 2, 3], -1); -- fail SELECT trim_array(ARRAY[1, 2, 3], 10); -- fail +SELECT trim_array(ARRAY[]::int[], 1); -- fail From 41f287c2b39eb49f4645cfe899d0d32031090793 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 3 Aug 2022 17:33:42 -0400 Subject: [PATCH 02/12] Fix incorrect tests for SRFs in relation_can_be_sorted_early(). Commit fac1b470a thought we could check for set-returning functions by testing only the top-level node in an expression tree. This is wrong in itself, and to make matters worse it encouraged others to make the same mistake, by exporting tlist.c's special-purpose IS_SRF_CALL() as a widely-visible macro. I can't find any evidence that anyone's taken the bait, but it was only a matter of time. Use expression_returns_set() instead, and stuff the IS_SRF_CALL() genie back in its bottle, this time with a warning label. I also added a couple of cross-reference comments. After a fair amount of fooling around, I've despaired of making a robust test case that exposes the bug reliably, so no test case here. (Note that the test case added by fac1b470a is itself broken, in that it doesn't notice if you remove the code change. The repro given by the bug submitter currently doesn't fail either in v15 or HEAD, though I suspect that may indicate an unrelated bug.) Per bug #17564 from Martijn van Oosterhout. Back-patch to v13, as the faulty patch was. Discussion: https://postgr.es/m/17564-c7472c2f90ef2da3@postgresql.org --- src/backend/nodes/nodeFuncs.c | 6 ++++++ src/backend/optimizer/path/equivclass.c | 4 ++-- src/backend/optimizer/util/tlist.c | 11 +++++++++++ src/include/optimizer/optimizer.h | 5 ----- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 8e2b9230cb3..6c45d349524 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -754,6 +754,12 @@ expression_returns_set_walker(Node *node, void *context) /* else fall through to check args */ } + /* + * If you add any more cases that return sets, also fix + * expression_returns_set_rows() in clauses.c and IS_SRF_CALL() in + * tlist.c. + */ + /* Avoid recursion for some cases that parser checks not to return a set */ if (IsA(node, Aggref)) return false; diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 547b2550917..6a941c662c7 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1005,7 +1005,7 @@ relation_can_be_sorted_early(PlannerInfo *root, RelOptInfo *rel, * one are effectively checking properties of targetexpr, so there's * no point in asking whether some other EC member would be better.) */ - if (IS_SRF_CALL((Node *) em->em_expr)) + if (expression_returns_set((Node *) em->em_expr)) continue; /* @@ -1033,7 +1033,7 @@ relation_can_be_sorted_early(PlannerInfo *root, RelOptInfo *rel, * member in this case; since SRFs can't appear in WHERE, they cannot * belong to multi-member ECs.) */ - if (IS_SRF_CALL((Node *) em->em_expr)) + if (expression_returns_set((Node *) em->em_expr)) return false; return true; diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 9d0f3274dbe..bb16a8d2c38 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -32,6 +32,17 @@ typedef struct maxSortGroupRef_context static bool maxSortGroupRef_walker(Node *node, maxSortGroupRef_context *cxt); +/* + * Test if an expression node represents a SRF call. Beware multiple eval! + * + * Please note that this is only meant for use in split_pathtarget_at_srfs(); + * if you use it anywhere else, your code is almost certainly wrong for SRFs + * nested within expressions. Use expression_returns_set() instead. + */ +#define IS_SRF_CALL(node) \ + ((IsA(node, FuncExpr) && ((FuncExpr *) (node))->funcretset) || \ + (IsA(node, OpExpr) && ((OpExpr *) (node))->opretset)) + /* * Data structures for split_pathtarget_at_srfs(). To preserve the identity * of sortgroupref items even if they are textually equal(), what we track is diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index f8400206288..c2553891186 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -25,11 +25,6 @@ #include "nodes/parsenodes.h" #include "nodes/plannodes.h" -/* Test if an expression node represents a SRF call. Beware multiple eval! */ -#define IS_SRF_CALL(node) \ - ((IsA(node, FuncExpr) && ((FuncExpr *) (node))->funcretset) || \ - (IsA(node, OpExpr) && ((OpExpr *) (node))->opretset)) - /* * We don't want to include nodes/pathnodes.h here, because non-planner * code should generally treat PlannerInfo as an opaque typedef. From 7d648ad4f37629f5d4522c4d0affbcaed39763a5 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Thu, 4 Aug 2022 15:29:25 +0700 Subject: [PATCH 03/12] Clarify DROP EXTENSION docs regarding explicitly dependent routines Per suggestion from Robert Haas Backpatch to v14 Discussion: https://www.postgresql.org/message-id/CA%2BTgmoZ1QvHquYHLkMy1oHKqz4-E7QQctj6e0ocq_GP1B5%2B9bA%40mail.gmail.com --- doc/src/sgml/ref/drop_extension.sgml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/ref/drop_extension.sgml b/doc/src/sgml/ref/drop_extension.sgml index c01ddace84c..4ea20131c79 100644 --- a/doc/src/sgml/ref/drop_extension.sgml +++ b/doc/src/sgml/ref/drop_extension.sgml @@ -32,7 +32,8 @@ DROP EXTENSION [ IF EXISTS ] name [ DROP EXTENSION removes extensions from the database. Dropping an extension causes its component objects, and other explicitly dependent routines (see , - the depends on extension action), to be dropped as well. + the DEPENDS ON EXTENSION extension_name + action), to be dropped as well. From a7f7ff100fbd4d260bb42fb72345f47e54e321d1 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Thu, 4 Aug 2022 15:59:32 +0700 Subject: [PATCH 04/12] Fix assorted doc typos Erik Rijkers and Justin Pryzby Backpatch to v14 Discussion: https://www.postgresql.org/message-id/b79bfeff-d0e3-29a3-2576-0e325848dede%40xs4all.nl --- doc/src/sgml/brin.sgml | 2 +- doc/src/sgml/ref/drop_extension.sgml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/brin.sgml b/doc/src/sgml/brin.sgml index 4ee8908b65a..71697155d7c 100644 --- a/doc/src/sgml/brin.sgml +++ b/doc/src/sgml/brin.sgml @@ -75,7 +75,7 @@ summarized will cause the summary information to be updated with data from the new tuples. When a new page is created that does not fall within the last - summarized range, the range that the new page belongs into + summarized range, the range that the new page belongs to does not automatically acquire a summary tuple; those tuples remain unsummarized until a summarization run is invoked later, creating the initial summary for that range. diff --git a/doc/src/sgml/ref/drop_extension.sgml b/doc/src/sgml/ref/drop_extension.sgml index 4ea20131c79..dcc52c2ced0 100644 --- a/doc/src/sgml/ref/drop_extension.sgml +++ b/doc/src/sgml/ref/drop_extension.sgml @@ -81,8 +81,8 @@ DROP EXTENSION [ IF EXISTS ] name [ This option prevents the specified extensions from being dropped - if there exists non-extension-member objects that depends on any - the extensions. This is the default. + if there exist non-extension-member objects that depend on any + of the extensions. This is the default. From 69a8bff0308bd639eb2c4646a9b9dfd3858b2586 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 4 Aug 2022 11:11:22 -0400 Subject: [PATCH 05/12] Add proper regression test for the recent SRFs-in-pathkeys problem. Remove the test case added by commit fac1b470a, which never actually worked to expose the problem it claimed to test. Replace it with a case that does expose the problem, and also covers the SRF-not- at-the-top deficiency repaired in 1aa8dad41. Richard Guo, with some editorialization by me Discussion: https://postgr.es/m/17564-c7472c2f90ef2da3@postgresql.org --- .../regress/expected/incremental_sort.out | 12 --------- .../expected/incremental_sort_optimizer.out | 14 ---------- src/test/regress/expected/select_parallel.out | 27 +++++++++++++++++++ src/test/regress/sql/incremental_sort.sql | 2 -- src/test/regress/sql/select_parallel.sql | 6 +++++ 5 files changed, 33 insertions(+), 28 deletions(-) diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index c5f52e1e97f..8a25141d8f5 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1804,15 +1804,3 @@ order by 1, 2; -> Function Scan on generate_series (7 rows) --- Disallow pushing down sort when pathkey is an SRF. -explain (costs off) select unique1 from tenk1 order by unnest('{1,2}'::int[]); - QUERY PLAN -------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) - Merge Key: (unnest('{1,2}'::anyarray)) - -> Sort - Sort Key: (unnest('{1,2}'::anyarray)) - -> ProjectSet - -> Index Only Scan using tenk1_unique1 on tenk1 -(6 rows) - diff --git a/src/test/regress/expected/incremental_sort_optimizer.out b/src/test/regress/expected/incremental_sort_optimizer.out index f5fd24f81ba..0648bb03799 100644 --- a/src/test/regress/expected/incremental_sort_optimizer.out +++ b/src/test/regress/expected/incremental_sort_optimizer.out @@ -1658,17 +1658,3 @@ order by 1, 2; Optimizer: Postgres query optimizer (8 rows) --- Disallow pushing down sort when pathkey is an SRF. -explain (costs off) select unique1 from tenk1 order by unnest('{1,2}'::int[]); - QUERY PLAN ------------------------------------------------------ - Result - -> Gather Motion 3:1 (slice1; segments: 3) - Merge Key: (unnest('{1,2}'::anyarray)) - -> Sort - Sort Key: (unnest('{1,2}'::anyarray)) - -> ProjectSet - -> Seq Scan on tenk1 - Optimizer: Pivotal Optimizer (GPORCA) -(8 rows) - diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 4de01f4f632..db579197c3a 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -1217,6 +1217,33 @@ SELECT generate_series(1, two), array(select generate_series(1, two)) Optimizer: Postgres query optimizer (18 rows) +-- must disallow pushing sort below gather when pathkey contains an SRF +EXPLAIN (VERBOSE, COSTS OFF) +SELECT unnest(ARRAY[]::integer[]) + 1 AS pathkey + FROM tenk1 t1 JOIN tenk1 t2 ON TRUE + ORDER BY pathkey; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 6:1 (slice1; segments: 6) + Output: (((unnest('{}'::integer[])) + 1)) + Merge Key: (((unnest('{}'::integer[])) + 1)) + -> Sort + Output: (((unnest('{}'::integer[])) + 1)) + Sort Key: (((unnest('{}'::integer[])) + 1)) + -> Result + Output: ((unnest('{}'::integer[])) + 1) + -> ProjectSet + Output: unnest('{}'::integer[]) + -> Nested Loop + -> Parallel Seq Scan on public.tenk1 t1 + Output: t1.unique1, t1.unique2, t1.two, t1.four, t1.ten, t1.twenty, t1.hundred, t1.thousand, t1.twothousand, t1.fivethous, t1.tenthous, t1.odd, t1.even, t1.stringu1, t1.stringu2, t1.string4 + -> Materialize + -> Broadcast Motion 3:6 (slice2; segments: 3) + -> Seq Scan on public.tenk1 t2 + Settings: enable_parallel = 'on', min_parallel_table_scan_size = '0', optimizer = 'off', parallel_setup_cost = '0', parallel_tuple_cost = '0' + Optimizer: Postgres query optimizer +(18 rows) + -- test passing expanded-value representations to workers CREATE FUNCTION make_some_array(int,int) returns int[] as $$declare x int[]; diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index afd1dab2045..648eced7e14 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -294,5 +294,3 @@ from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; explain (costs off) select sub.unique1, stringu1 || random()::text from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub order by 1, 2; --- Disallow pushing down sort when pathkey is an SRF. -explain (costs off) select unique1 from tenk1 order by unnest('{1,2}'::int[]); diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql index 846066fad05..4a22a001ef9 100644 --- a/src/test/regress/sql/select_parallel.sql +++ b/src/test/regress/sql/select_parallel.sql @@ -443,6 +443,12 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT generate_series(1, two), array(select generate_series(1, two)) FROM tenk1 ORDER BY tenthous; +-- must disallow pushing sort below gather when pathkey contains an SRF +EXPLAIN (VERBOSE, COSTS OFF) +SELECT unnest(ARRAY[]::integer[]) + 1 AS pathkey + FROM tenk1 t1 JOIN tenk1 t2 ON TRUE + ORDER BY pathkey; + -- test passing expanded-value representations to workers CREATE FUNCTION make_some_array(int,int) returns int[] as $$declare x int[]; From a64e62b9b5fadf5744476d11b245b4ca88eae7ae Mon Sep 17 00:00:00 2001 From: Etsuro Fujita Date: Fri, 5 Aug 2022 17:15:03 +0900 Subject: [PATCH 06/12] postgres_fdw: Disable batch insertion when there are WCO constraints. When inserting a view referencing a foreign table that has WITH CHECK OPTION constraints, in single-insert mode postgres_fdw retrieves the data that was actually inserted on the remote side so that the WITH CHECK OPTION constraints are enforced with the data locally, but in batch-insert mode it cannot currently retrieve the data (except for the row first inserted through the view), resulting in enforcing the WITH CHECK OPTION constraints with the data passed from the core (except for the first-inserted row), which led to incorrect results when inserting into a view referencing a foreign table in which a remote BEFORE ROW INSERT trigger changes the rows inserted through the view so that they violate the view's WITH CHECK OPTION constraint. Also, the query inserting into the view caused an assertion failure in assert-enabled builds. Fix these by disabling batch insertion when inserting into such a view. Back-patch to v14 where batch insertion was added. Discussion: https://postgr.es/m/CAPmGK17LpbTZs4m4a_6THP54UBeK9fHvX8aVVA%2BC6yEZDZwQcg%40mail.gmail.com --- .../postgres_fdw/expected/postgres_fdw.out | 44 +++++++++++++++++++ contrib/postgres_fdw/postgres_fdw.c | 6 ++- contrib/postgres_fdw/sql/postgres_fdw.sql | 16 +++++++ 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 655bdced4f1..2874e69c7e5 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -6486,6 +6486,29 @@ SELECT * FROM foreign_tbl; 20 | 30 (1 row) +-- We don't allow batch insert when there are any WCO constraints +ALTER SERVER loopback OPTIONS (ADD batch_size '10'); +EXPLAIN (VERBOSE, COSTS OFF) +INSERT INTO rw_view VALUES (0, 15), (0, 5); + QUERY PLAN +-------------------------------------------------------------------------------- + Insert on public.foreign_tbl + Remote SQL: INSERT INTO public.base_tbl(a, b) VALUES ($1, $2) RETURNING a, b + Batch Size: 1 + -> Values Scan on "*VALUES*" + Output: "*VALUES*".column1, "*VALUES*".column2 +(5 rows) + +INSERT INTO rw_view VALUES (0, 15), (0, 5); -- should fail +ERROR: new row violates check option for view "rw_view" +DETAIL: Failing row contains (10, 5). +SELECT * FROM foreign_tbl; + a | b +----+---- + 20 | 30 +(1 row) + +ALTER SERVER loopback OPTIONS (DROP batch_size); DROP FOREIGN TABLE foreign_tbl CASCADE; NOTICE: drop cascades to view rw_view DROP TRIGGER row_before_insupd_trigger ON base_tbl; @@ -6578,6 +6601,27 @@ SELECT * FROM foreign_tbl; 20 | 30 (1 row) +-- We don't allow batch insert when there are any WCO constraints +ALTER SERVER loopback OPTIONS (ADD batch_size '10'); +EXPLAIN (VERBOSE, COSTS OFF) +INSERT INTO rw_view VALUES (0, 15), (0, 5); + QUERY PLAN +-------------------------------------------------------- + Insert on public.parent_tbl + -> Values Scan on "*VALUES*" + Output: "*VALUES*".column1, "*VALUES*".column2 +(3 rows) + +INSERT INTO rw_view VALUES (0, 15), (0, 5); -- should fail +ERROR: new row violates check option for view "rw_view" +DETAIL: Failing row contains (10, 5). +SELECT * FROM foreign_tbl; + a | b +----+---- + 20 | 30 +(1 row) + +ALTER SERVER loopback OPTIONS (DROP batch_size); DROP FOREIGN TABLE foreign_tbl CASCADE; DROP TRIGGER row_before_insupd_trigger ON child_tbl; DROP TABLE parent_tbl CASCADE; diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 58599c7aeaa..b5adcb86c09 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -2046,8 +2046,9 @@ postgresGetForeignModifyBatchSize(ResultRelInfo *resultRelInfo) batch_size = get_batch_size_option(resultRelInfo->ri_RelationDesc); /* - * Disable batching when we have to use RETURNING or there are any - * BEFORE/AFTER ROW INSERT triggers on the foreign table. + * Disable batching when we have to use RETURNING, there are any + * BEFORE/AFTER ROW INSERT triggers on the foreign table, or there are any + * WITH CHECK OPTION constraints from parent views. * * When there are any BEFORE ROW INSERT triggers on the table, we can't * support it, because such triggers might query the table we're inserting @@ -2055,6 +2056,7 @@ postgresGetForeignModifyBatchSize(ResultRelInfo *resultRelInfo) * and prepared for insertion are not there. */ if (resultRelInfo->ri_projectReturning != NULL || + resultRelInfo->ri_WithCheckOptions != NIL || (resultRelInfo->ri_TrigDesc && (resultRelInfo->ri_TrigDesc->trig_insert_before_row || resultRelInfo->ri_TrigDesc->trig_insert_after_row))) diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 724c1802b28..3663ca3bdf4 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -1442,6 +1442,14 @@ UPDATE rw_view SET b = b + 15; UPDATE rw_view SET b = b + 15; -- ok SELECT * FROM foreign_tbl; +-- We don't allow batch insert when there are any WCO constraints +ALTER SERVER loopback OPTIONS (ADD batch_size '10'); +EXPLAIN (VERBOSE, COSTS OFF) +INSERT INTO rw_view VALUES (0, 15), (0, 5); +INSERT INTO rw_view VALUES (0, 15), (0, 5); -- should fail +SELECT * FROM foreign_tbl; +ALTER SERVER loopback OPTIONS (DROP batch_size); + DROP FOREIGN TABLE foreign_tbl CASCADE; DROP TRIGGER row_before_insupd_trigger ON base_tbl; DROP TABLE base_tbl; @@ -1480,6 +1488,14 @@ UPDATE rw_view SET b = b + 15; UPDATE rw_view SET b = b + 15; -- ok SELECT * FROM foreign_tbl; +-- We don't allow batch insert when there are any WCO constraints +ALTER SERVER loopback OPTIONS (ADD batch_size '10'); +EXPLAIN (VERBOSE, COSTS OFF) +INSERT INTO rw_view VALUES (0, 15), (0, 5); +INSERT INTO rw_view VALUES (0, 15), (0, 5); -- should fail +SELECT * FROM foreign_tbl; +ALTER SERVER loopback OPTIONS (DROP batch_size); + DROP FOREIGN TABLE foreign_tbl CASCADE; DROP TRIGGER row_before_insupd_trigger ON child_tbl; DROP TABLE parent_tbl CASCADE; From 5b92e1b284bfdaffcad0dafd033383377740fcd8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 5 Aug 2022 13:58:37 -0400 Subject: [PATCH 07/12] Fix non-bulletproof ScalarArrayOpExpr code for extended statistics. statext_is_compatible_clause_internal() checked that the arguments of a ScalarArrayOpExpr are one Var and one Const, but it would allow cases where the Const was on the left. Subsequent uses of the clause are not expecting that and would suffer assertion failures or core dumps. mcv.c also had not bothered to cope with the case of a NULL array constant, which seems really unacceptably sloppy of somebody. (Although our tools failed us there too, since AFAIK neither Coverity nor any compiler warned of the obvious use-of-uninitialized-variable condition.) It seems best to handle that by having statext_is_compatible_clause_internal() reject it. Noted while fixing bug #17570. Back-patch to v13 where the extended stats code grew some awareness of ScalarArrayOpExpr. --- src/backend/statistics/extended_stats.c | 12 +++++++--- src/backend/statistics/mcv.c | 23 ++++++++----------- src/test/regress/expected/stats_ext.out | 22 ++++++++++++++---- .../regress/expected/stats_ext_optimizer.out | 22 ++++++++++++++---- src/test/regress/sql/stats_ext.sql | 14 +++++++---- 5 files changed, 65 insertions(+), 28 deletions(-) diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index ee1c25416bd..d91d80357ff 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1333,8 +1333,8 @@ choose_best_statistics(List *stats, char requiredkind, * * (c) combinations using AND/OR/NOT * - * (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (array)) or (Var/Expr - * op ALL (array)) + * (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (Const)) or + * (Var/Expr op ALL (Const)) * * In the future, the range of supported clauses may be expanded to more * complex cases, for example (Var op Var). @@ -1454,13 +1454,19 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, RangeTblEntry *rte = root->simple_rte_array[relid]; ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; Node *clause_expr; + Const *cst; + bool expronleft; /* Only expressions with two arguments are considered compatible. */ if (list_length(expr->args) != 2) return false; /* Check if the expression has the right shape (one Var, one Const) */ - if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL)) + if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft)) + return false; + + /* We only support Var on left and non-null array constants */ + if (!expronleft || cst->constisnull) return false; /* diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index e6a60865282..9cbd093fce7 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1746,20 +1746,17 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft)) elog(ERROR, "incompatible clause"); - /* ScalarArrayOpExpr has the Var always on the left */ - Assert(expronleft); + /* We expect Var on left and non-null constant on right */ + if (!expronleft || cst->constisnull) + elog(ERROR, "incompatible clause"); - /* XXX what if (cst->constisnull == NULL)? */ - if (!cst->constisnull) - { - arrayval = DatumGetArrayTypeP(cst->constvalue); - get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), - &elmlen, &elmbyval, &elmalign); - deconstruct_array(arrayval, - ARR_ELEMTYPE(arrayval), - elmlen, elmbyval, elmalign, - &elem_values, &elem_nulls, &num_elems); - } + arrayval = DatumGetArrayTypeP(cst->constvalue); + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), + &elmlen, &elmbyval, &elmalign); + deconstruct_array(arrayval, + ARR_ELEMTYPE(arrayval), + elmlen, elmbyval, elmalign, + &elem_values, &elem_nulls, &num_elems); /* match the attribute/expression to a dimension of the statistic */ idx = mcv_match_expression(clause_expr, keys, exprs, &collid); diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 06aff4d5bd0..60c7915eff0 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -1835,7 +1835,8 @@ CREATE TABLE mcv_lists ( b VARCHAR, filler3 DATE, c INT, - d TEXT + d TEXT, + ia INT[] ) WITH (autovacuum_enabled = off); -- random data (no MCV list) @@ -1905,8 +1906,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 A -- 100 distinct combinations, all in the MCV list TRUNCATE mcv_lists; DROP STATISTICS mcv_lists_stats; -INSERT INTO mcv_lists (a, b, c, filler1) - SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i); +INSERT INTO mcv_lists (a, b, c, ia, filler1) + SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i + FROM generate_series(1,5000) s(i); ANALYZE mcv_lists; SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); estimated | actual @@ -2046,8 +2048,14 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY 1 | 100 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); + estimated | actual +-----------+-------- + 4 | 50 +(1 row) + -- create statistics -CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c, ia FROM mcv_lists; ANALYZE mcv_lists; SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); estimated | actual @@ -2193,6 +2201,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY 100 | 100 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); + estimated | actual +-----------+-------- + 4 | 50 +(1 row) + -- check change of unrelated column type does not reset the MCV statistics ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64); SELECT d.stxdmcv IS NOT NULL diff --git a/src/test/regress/expected/stats_ext_optimizer.out b/src/test/regress/expected/stats_ext_optimizer.out index dafbf0a28b4..9ad26109b59 100644 --- a/src/test/regress/expected/stats_ext_optimizer.out +++ b/src/test/regress/expected/stats_ext_optimizer.out @@ -1857,7 +1857,8 @@ CREATE TABLE mcv_lists ( b VARCHAR, filler3 DATE, c INT, - d TEXT + d TEXT, + ia INT[] ) WITH (autovacuum_enabled = off); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'filler1' as the Apache Cloudberry data distribution key for this table. @@ -1929,8 +1930,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 A -- 100 distinct combinations, all in the MCV list TRUNCATE mcv_lists; DROP STATISTICS mcv_lists_stats; -INSERT INTO mcv_lists (a, b, c, filler1) - SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i); +INSERT INTO mcv_lists (a, b, c, ia, filler1) + SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i + FROM generate_series(1,5000) s(i); ANALYZE mcv_lists; SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); estimated | actual @@ -2070,8 +2072,14 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY 22 | 100 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); + estimated | actual +-----------+-------- + 4 | 50 +(1 row) + -- create statistics -CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c, ia FROM mcv_lists; ANALYZE mcv_lists; SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'''); estimated | actual @@ -2217,6 +2225,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY 22 | 100 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); + estimated | actual +-----------+-------- + 4 | 50 +(1 row) + -- check change of unrelated column type does not reset the MCV statistics ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64); SELECT d.stxdmcv IS NOT NULL diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 744bb00c161..abd1d0b8608 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -923,7 +923,8 @@ CREATE TABLE mcv_lists ( b VARCHAR, filler3 DATE, c INT, - d TEXT + d TEXT, + ia INT[] ) WITH (autovacuum_enabled = off); @@ -972,8 +973,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 A TRUNCATE mcv_lists; DROP STATISTICS mcv_lists_stats; -INSERT INTO mcv_lists (a, b, c, filler1) - SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i); +INSERT INTO mcv_lists (a, b, c, ia, filler1) + SELECT mod(i,100), mod(i,50), mod(i,25), array[mod(i,25)], i + FROM generate_series(1,5000) s(i); ANALYZE mcv_lists; @@ -1023,8 +1025,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); + -- create statistics -CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; +CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c, ia FROM mcv_lists; ANALYZE mcv_lists; @@ -1076,6 +1080,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); + -- check change of unrelated column type does not reset the MCV statistics ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64); From c4f7cb66c76d413c0eeb7e46ae99222e4139bfef Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 5 Aug 2022 15:00:03 -0400 Subject: [PATCH 08/12] Fix handling of bare boolean expressions in mcv_get_match_bitmap. Since v14, the extended stats machinery will try to estimate for otherwise-unsupported boolean expressions if they match an expression available from an extended stats object. mcv.c did not get the memo about this, and would spit up with "unknown clause type". Fortunately the case is easy to handle, since we can expect the expression yields boolean. While here, replace some not-terribly-on-point assertions with simpler runtime tests for lookup failure. That seems appropriate so that we get an elog not a crash if we somehow get to the new it-should-be-a-bool-expression code with a subexpression that doesn't match any stats column. Per report from Danny Shemesh. Thanks to Justin Pryzby for preliminary investigation. Discussion: https://postgr.es/m/CAFZC=QqD6=27wQPOW1pbRa98KPyuyn+7cL_Ay_Ck-roZV84vHg@mail.gmail.com --- src/backend/statistics/mcv.c | 51 ++++++++++++------- src/test/regress/expected/stats_ext.out | 19 +++++-- .../regress/expected/stats_ext_optimizer.out | 23 ++++++--- src/test/regress/sql/stats_ext.sql | 17 +++++-- 4 files changed, 76 insertions(+), 34 deletions(-) diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 9cbd093fce7..50c65c3fdeb 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1531,13 +1531,13 @@ pg_mcv_list_send(PG_FUNCTION_ARGS) /* * match the attribute/expression to a dimension of the statistic * - * Match the attribute/expression to statistics dimension. Optionally - * determine the collation. + * Returns the zero-based index of the matching statistics dimension. + * Optionally determines the collation. */ static int mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid) { - int idx = -1; + int idx; if (IsA(expr, Var)) { @@ -1549,20 +1549,19 @@ mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid) idx = bms_member_index(keys, var->varattno); - /* make sure the index is valid */ - Assert((idx >= 0) && (idx <= bms_num_members(keys))); + if (idx < 0) + elog(ERROR, "variable not found in statistics object"); } else { + /* expression - lookup in stats expressions */ ListCell *lc; - /* expressions are stored after the simple columns */ - idx = bms_num_members(keys); - if (collid) *collid = exprCollation(expr); - /* expression - lookup in stats expressions */ + /* expressions are stored after the simple columns */ + idx = bms_num_members(keys); foreach(lc, exprs) { Node *stat_expr = (Node *) lfirst(lc); @@ -1573,13 +1572,10 @@ mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid) idx++; } - /* make sure the index is valid */ - Assert((idx >= bms_num_members(keys)) && - (idx <= bms_num_members(keys) + list_length(exprs))); + if (lc == NULL) + elog(ERROR, "expression not found in statistics object"); } - Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs))); - return idx; } @@ -1659,8 +1655,6 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, /* match the attribute/expression to a dimension of the statistic */ idx = mcv_match_expression(clause_expr, keys, exprs, &collid); - Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs))); - /* * Walk through the MCV items and evaluate the current clause. We * can skip items that were already ruled out, and terminate if @@ -1944,7 +1938,30 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, } } else - elog(ERROR, "unknown clause type: %d", clause->type); + { + /* Otherwise, it must be a bare boolean-returning expression */ + int idx; + + /* match the expression to a dimension of the statistic */ + idx = mcv_match_expression(clause, keys, exprs, NULL); + + /* + * Walk through the MCV items and evaluate the current clause. We + * can skip items that were already ruled out, and terminate if + * there are no remaining MCV items that might possibly match. + */ + for (i = 0; i < mcvlist->nitems; i++) + { + bool match; + MCVItem *item = &mcvlist->items[i]; + + /* "match" just means it's bool TRUE */ + match = !item->isnull[idx] && DatumGetBool(item->values[idx]); + + /* now, update the match bitmap, depending on OR/AND type */ + matches[i] = RESULT_MERGE(matches[i], is_or, match); + } + } } return matches; diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 60c7915eff0..ba2914d3c7a 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -270,14 +270,23 @@ SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_3'; CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1; -- date_trunc on timestamp is immutable CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1; +-- check use of a boolean-returning expression +CREATE STATISTICS ab1_exprstat_6 ON + (case a when 1 then true else false end), b FROM ab1; -- insert some data and run analyze, to test that these cases build properly INSERT INTO ab1 -SELECT - generate_series(1,10), - generate_series(1,10), - generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'), - generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day'); +SELECT x / 10, x / 3, + '2020-10-01'::timestamp + x * interval '1 day', + '2020-10-01'::timestamptz + x * interval '1 day' +FROM generate_series(1, 100) x; ANALYZE ab1; +-- apply some stats +SELECT * FROM check_estimated_rows('SELECT * FROM ab1 WHERE (case a when 1 then true else false end) AND b=2'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + DROP TABLE ab1; -- Verify supported object types for extended statistics CREATE schema tststats; diff --git a/src/test/regress/expected/stats_ext_optimizer.out b/src/test/regress/expected/stats_ext_optimizer.out index 9ad26109b59..655f7ab1147 100644 --- a/src/test/regress/expected/stats_ext_optimizer.out +++ b/src/test/regress/expected/stats_ext_optimizer.out @@ -278,14 +278,23 @@ SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_3'; CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1; -- date_trunc on timestamp is immutable CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1; +-- check use of a boolean-returning expression +CREATE STATISTICS ab1_exprstat_6 ON + (case a when 1 then true else false end), b FROM ab1; -- insert some data and run analyze, to test that these cases build properly INSERT INTO ab1 -SELECT - generate_series(1,10), - generate_series(1,10), - generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'), - generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day'); +SELECT x / 10, x / 3, + '2020-10-01'::timestamp + x * interval '1 day', + '2020-10-01'::timestamptz + x * interval '1 day' +FROM generate_series(1, 100) x; ANALYZE ab1; +-- apply some stats +SELECT * FROM check_estimated_rows('SELECT * FROM ab1 WHERE (case a when 1 then true else false end) AND b=2'); + estimated | actual +-----------+-------- + 3 | 0 +(1 row) + DROP TABLE ab1; -- Verify supported object types for extended statistics CREATE schema tststats; @@ -2075,7 +2084,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); estimated | actual -----------+-------- - 4 | 50 + 72 | 50 (1 row) -- create statistics @@ -2228,7 +2237,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[4,5]) AND 4 = ANY(ia)'); estimated | actual -----------+-------- - 4 | 50 + 72 | 50 (1 row) -- check change of unrelated column type does not reset the MCV statistics diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index abd1d0b8608..c2a91675045 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -176,14 +176,21 @@ CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1; -- date_trunc on timestamp is immutable CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1; +-- check use of a boolean-returning expression +CREATE STATISTICS ab1_exprstat_6 ON + (case a when 1 then true else false end), b FROM ab1; + -- insert some data and run analyze, to test that these cases build properly INSERT INTO ab1 -SELECT - generate_series(1,10), - generate_series(1,10), - generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'), - generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day'); +SELECT x / 10, x / 3, + '2020-10-01'::timestamp + x * interval '1 day', + '2020-10-01'::timestamptz + x * interval '1 day' +FROM generate_series(1, 100) x; ANALYZE ab1; + +-- apply some stats +SELECT * FROM check_estimated_rows('SELECT * FROM ab1 WHERE (case a when 1 then true else false end) AND b=2'); + DROP TABLE ab1; -- Verify supported object types for extended statistics From aed00266f714b30362dbf2cae5f4a568c3c734da Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 5 Aug 2022 15:57:46 -0400 Subject: [PATCH 09/12] Partially undo commit 94da73281. On closer inspection, mcv.c isn't as broken for ScalarArrayOpExpr as I thought. The Var-on-right issue is real enough, but actually it does cope fine with a NULL array constant --- I was misled by an XXX comment suggesting it didn't. Undo that part of the code change, and replace the XXX comment with something less misleading. --- src/backend/statistics/extended_stats.c | 7 +++---- src/backend/statistics/mcv.c | 25 ++++++++++++++++--------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index d91d80357ff..134a047f8df 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1454,7 +1454,6 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, RangeTblEntry *rte = root->simple_rte_array[relid]; ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; Node *clause_expr; - Const *cst; bool expronleft; /* Only expressions with two arguments are considered compatible. */ @@ -1462,11 +1461,11 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, return false; /* Check if the expression has the right shape (one Var, one Const) */ - if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft)) + if (!examine_opclause_args(expr->args, &clause_expr, NULL, &expronleft)) return false; - /* We only support Var on left and non-null array constants */ - if (!expronleft || cst->constisnull) + /* We only support Var on left, Const on right */ + if (!expronleft) return false; /* diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 50c65c3fdeb..01837758534 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1740,17 +1740,24 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft)) elog(ERROR, "incompatible clause"); - /* We expect Var on left and non-null constant on right */ - if (!expronleft || cst->constisnull) + /* We expect Var on left */ + if (!expronleft) elog(ERROR, "incompatible clause"); - arrayval = DatumGetArrayTypeP(cst->constvalue); - get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), - &elmlen, &elmbyval, &elmalign); - deconstruct_array(arrayval, - ARR_ELEMTYPE(arrayval), - elmlen, elmbyval, elmalign, - &elem_values, &elem_nulls, &num_elems); + /* + * Deconstruct the array constant, unless it's NULL (we'll cover + * that case below) + */ + if (!cst->constisnull) + { + arrayval = DatumGetArrayTypeP(cst->constvalue); + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), + &elmlen, &elmbyval, &elmalign); + deconstruct_array(arrayval, + ARR_ELEMTYPE(arrayval), + elmlen, elmbyval, elmalign, + &elem_values, &elem_nulls, &num_elems); + } /* match the attribute/expression to a dimension of the statistic */ idx = mcv_match_expression(clause_expr, keys, exprs, &collid); From 442c2c7c612c59dee8dd978e4cdfdd89b239ca60 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 5 Aug 2022 17:38:53 -0400 Subject: [PATCH 10/12] First-draft release notes for 14.5. As usual, the release notes for older branches will be made by cutting these down, but put them up for community review first. Due to the out-of-cycle release of 14.4, there are a number of commits that appeared in 14.4 that are not yet shipped in the earlier branches. This draft repeats those release note entries for convenience in preparing the older-branch notes later. They'll be stripped out of the 14.5 section after that's done. --- doc/src/sgml/release-14.sgml | 1199 ++++++++++++++++++++++++++++++++++ 1 file changed, 1199 insertions(+) diff --git a/doc/src/sgml/release-14.sgml b/doc/src/sgml/release-14.sgml index 95a6bfe037f..981993e70e3 100644 --- a/doc/src/sgml/release-14.sgml +++ b/doc/src/sgml/release-14.sgml @@ -1,6 +1,1205 @@ + + Release 14.5 + + + Release date: + 2022-08-11 + + + + This release contains a variety of fixes from 14.4. + For information about new features in major release 14, see + . + + + + Migration to Version 14.5 + + + A dump/restore is not required for those running 14.X. + + + + However, if you are upgrading from a version earlier than 14.4, + see . + + + + + Changes + + + + + + + Fix replay of CREATE DATABASE WAL + records on standby servers + (Kyotaro Horiguchi, Asim R Praveen, Paul Guo) + + + + Standby servers may encounter missing tablespace directories + when replaying database-creation WAL records. Prior to this + patch, a standby would fail to recover in such a case; + however, such directories could be legitimately missing. + Create the tablespace (as a plain directory), then check that it + has been dropped again once replay reaches a consistent state. + + + + + + + Support in place tablespaces + (Thomas Munro, Michael Paquier, Álvaro Herrera) + + + + Normally a Postgres tablespace is a symbolic link to a directory on + some other filesystem. This change allows it to just be a plain + directory. While this has no use for separating tables onto + different filesystems, it is a convenient setup for testing. + Moreover, it is necessary to support the CREATE + DATABASE replay fix, which transiently creates a missing + tablespace as an in place tablespace. + + + + + + + Fix permissions checks in CREATE INDEX (Nathan + Bossart, Noah Misch) + + + + The fix for CVE-2022-1552 caused CREATE INDEX to + apply the table owner's permissions while performing lookups of + operator classes and other objects, where formerly the calling + user's permissions were used. This broke dump/restore scenarios, + because pg_dump issues CREATE + INDEX before re-granting permissions. + + + + + + + In extended query protocol, force an immediate commit + after CREATE DATABASE and other commands that + can't run in a transaction block (Tom Lane) + + + + If the client does not send a Sync message immediately after such a + command, but instead sends another command, any failure in that + command would lead to rolling back the preceding command, typically + leaving inconsistent state on-disk (such as a missing or extra + database directory). The mechanisms intended to prevent that + situation turn out to work for multiple commands in a simple-Query + message, but not for a series of extended-protocol messages. To + prevent inconsistency without breaking use-cases that work today, + force an implicit commit after such commands. + + + + + + + Fix race condition when checking transaction visibility (Simon Riggs) + + + + TransactionIdIsInProgress could + report false before the subject transaction is + considered visible, leading to various misbehaviors. The race + condition window is normally very narrow, but use of synchronous + replication makes it much wider, because the wait for a synchronous + replica happens in that window. + + + + + + + Fix queries in which a whole-row variable references + the result of a function that returns a domain over composite type + (Tom Lane) + + + + + + + Fix variable not found in subplan target list planner + error when pulling up a sub-SELECT that's + referenced in a GROUPING function (Richard Guo) + + + + + + + Fix incorrect plans when sorting by an expression that contains a + non-top-level set-returning function (Richard Guo, Tom Lane) + + + + + + + Fix incorrect permissions-checking code for extended statistics + (Richard Guo) + + + + If there are extended statistics on a table that the user has only + partial SELECT permissions on, some queries would + fail with unrecognized node type errors. + + + + + + + Fix extended statistics machinery to handle MCV-type statistics on + boolean-valued expressions (Tom Lane) + + + + Statistics collection worked fine, but a query containing such an + expression in WHERE would fail + with unknown clause type. + + + + + + + Avoid planner core dump with constant + = ANY(array) clauses when + there are MCV-type extended statistics on + the array variable (Tom Lane) + + + + + + + Fix ALTER TABLE ... ENABLE/DISABLE TRIGGER to + handle recursion correctly for triggers on partitioned tables + (Álvaro Herrera, Amit Langote) + + + + In certain cases, a trigger does not exist failure + would occur because the command would try to adjust the trigger on a + child partition that doesn't have it. + + + + + + + Allow cancellation of ANALYZE while it is + computing extended statistics (Tom Lane, Justin Pryzby) + + + + In some scenarios with high statistics targets, it was possible to + spend many seconds in an un-cancellable sort operation. + + + + + + + Improve syntax error messages for type jsonpath + (Andrew Dunstan) + + + + + + + Prevent pg_stat_get_subscription() from + possibly returning an extra row containing garbage values + (Kuntal Ghosh) + + + + + + + Ensure that pg_stop_backup() cleans up session + state properly (Fujii Masao) + + + + This omission could lead to assertion failures or crashes later in + the session. + + + + + + + Fix trim_array() to handle a zero-dimensional + array argument sanely (Martin Kalcher) + + + + + + + Fix join alias matching in FOR [KEY] UPDATE/SHARE + clauses (Dean Rasheed) + + + + In corner cases, a misleading error could be reported. + + + + + + + Avoid crashing if too many column aliases are attached to + an XMLTABLE or JSON_TABLE + construct (Álvaro Herrera) + + + + + + + Reject ROW() expressions and functions + in FROM that have too many columns (Tom Lane) + + + + Cases with more than about 1600 columns are unsupported, and + have always failed at execution. However, it emerges that some + earlier code could be driven to assertion failures or crashes by + queries with more than 32K columns. Add a parse-time check to + prevent that. + + + + + + + When decompiling a view or rule, show a SELECT + output column's AS "?column?" alias clause + if it could be referenced elsewhere (Tom Lane) + + + + Previously, this auto-generated alias was always hidden; but there + are corner cases where doing so results in a non-restorable view or + rule definition. + + + + + + + Fix dumping of a view using a function in FROM + that returns a composite type, when column(s) of the composite type + have been dropped since the view was made (Tom Lane) + + + + This oversight could lead to dump/reload + or pg_upgrade failures, as the dumped + view would have too many column aliases for the function. + + + + + + + Report implicitly-created operator families to event triggers + (Masahiko Sawada) + + + + If CREATE OPERATOR CLASS results in the implicit + creation of an operator family, that object was not reported to + event triggers that should capture such events. + + + + + + + Fix control file updates made when a restartpoint is running during + promotion of a standby server (Kyotaro Horiguchi) + + + + Previously, when the restartpoint completed it could incorrectly + update the last-checkpoint fields of the control file, potentially + leading to PANIC and failure to restart if the server crashes before + the next normal checkpoint completes. + + + + + + + Prevent triggering of + standby's wal_receiver_timeout during logical + replication of large transactions (Wang Wei, Amit Kapila) + + + + If a large transaction on the primary server sends no data to the + standby (perhaps because no table it changes is published), it was + possible for the standby to timeout. Fix that by ensuring we send + keepalive messages periodically in such situations. + + + + + + + Disallow nested backup operations in logical replication walsenders + (Fujii Masao) + + + + + + + Fix memory leak in logical replication subscribers (Hou Zhijie) + + + + + + + Fix logical replication's checking of replica identity when the + target table is partitioned (Shi Yu, Hou Zhijie) + + + + The replica identity columns have to be re-identified for the child + partition. + + + + + + + Fix failures to update cached schema data in a logical replication + subscriber after a schema change on the publisher (Shi Yu, Hou + Zhijie) + + + + + + + Ignore heap-rewrite temporary tables for materialized views in + logical replication (Euler Taveira) + + + + A FOR ALL TABLES publication will try to publish + temporary tables if left to its own devices. There is a heuristic + to suppress these, but it failed to cover internal temporary tables + created while rewriting a materialized view. This created a risk of + logical replication target relation ... does not + exist failures during REFRESH MATERIALIZED + VIEW. + + + + + + + Prevent open-file leak when reading an invalid timezone abbreviation + file (Kyotaro Horiguchi) + + + + Such cases could result in harmless warning messages. + + + + + + + Allow custom server parameters to have short descriptions that are + NULL (Steve Chavez) + + + + Previously, although extensions could choose to create such + settings, some code paths would crash while processing them. + + + + + + + Fix WAL consistency checking logic to correctly + handle BRIN_EVACUATE_PAGE flags (Haiyang Wang) + + + + + + + Fix erroneous assertion checks in shared hashtable management + (Thomas Munro) + + + + + + + Avoid assertion failure + when min_dynamic_shared_memory is set to a + non-default value (Thomas Munro) + + + + + + + Arrange to clean up after commit-time errors + within SPI_commit(), rather than expecting + callers to do that (Peter Eisentraut, Tom Lane) + + + + Proper cleanup is complicated and requires use of low-level + facilities, so it's not surprising that no known caller got it + right. This led to misbehaviors when a PL procedure + issued COMMIT but a failure occurred (such as a + deferred constraint check). To improve matters, + redefine SPI_commit() as starting a new + transaction, so that it becomes equivalent + to SPI_commit_and_chain() except that you get + default transaction characteristics instead of preserving the prior + transaction's characteristics. To make this somewhat transparent + API-wise, redefine SPI_start_transaction() as a + no-op. All known callers of SPI_commit() + immediately call SPI_start_transaction(), so + they will not notice any change. Similar remarks apply + to SPI_rollback(). + + + + Also fix PL/Python, which omitted any handling of such errors at all, + resulting in jumping out of the Python interpreter. This is + reported to crash Python 3.11. Older Python releases leak some + memory but seem okay with it otherwise. + + + + + + + Remove misguided SSL key file ownership check + in libpq (Tom Lane) + + + + In the previous minor releases, we copied the server's permission + checking rules for SSL private key files into libpq. But we should + not have also copied the server's file-ownership check. While that + works in normal use-cases, it can result in an unexpected failure + for clients running as root, and perhaps in other cases. + + + + + + + Improve libpq's handling of idle states + in pipeline mode (Álvaro Herrera, Kyotaro Horiguchi) + + + + This fixes message type 0x33 arrived from server while + idle warnings, as well as possible loss of end-of-query NULL + results from PQgetResult(). + + + + + + + Ensure ecpg reports server connection loss + sanely (Tom Lane) + + + + Misprocessing of a libpq-generated error result, such as a report of + lost connection, would lead to printing (null) + instead of a useful error message; or in older releases it would + lead to a crash. + + + + + + + Avoid core dump in ecpglib with + unexpected orders of operations (Tom Lane) + + + + Certain operations such as EXEC SQL PREPARE would + crash (rather than reporting an error as expected) if called before + establishing any database connection. + + + + + + + In ecpglib, avoid + redundant newlocale() calls (Noah Misch) + + + + Allocate a C locale object once per process when first connecting, + rather than creating and freeing locale objects once per query. + This mitigates a libc memory leak on AIX, and may offer some + performance benefit everywhere. + + + + + + + In psql's \watch + command, echo a newline after cancellation with control-C + (Pavel Stehule) + + + + This prevents libedit (and possibly also libreadline) from becoming + confused about which column the cursor is in. + + + + + + + Fix pg_upgrade to detect non-upgradable + usages of functions taking anyarray (Justin Pryzby) + + + + Version 14 changed some built-in functions to take + type anycompatiblearray instead + of anyarray. While this is mostly transparent, + user-defined aggregates and operators built atop these functions + have to be declared with exactly matching types. The presence of an + object referencing the old signature will + cause pg_upgrade to fail, so change it to + detect and report such cases before beginning the upgrade. + + + + + + + Fix possible report of wrong error condition + after clone() failure + in pg_upgrade + with option (Justin Pryzby) + + + + + + + Fix contrib/pg_stat_statements to avoid + problems with very large query-text files on 32-bit platforms + (Tom Lane) + + + + + + + In contrib/postgres_fdw, prevent batch + insertion when there are WITH CHECK OPTION + constraints (Etsuro Fujita) + + + + Such constraints cannot be checked properly if more than one row is + inserted at a time. + + + + + + + Fix contrib/postgres_fdw to detect failure to + send an asynchronous data fetch query (Fujii Masao) + + + + + + + Ensure that contrib/postgres_fdw sends + constants of regconfig and other reg* + types with proper schema qualification (Tom Lane) + + + + + + + Block signals while allocating dynamic shared memory on Linux + (Thomas Munro) + + + + This avoids problems when a signal + interrupts posix_fallocate(). + + + + + + + Detect unexpected EEXIST error + from shm_open() (Thomas Munro) + + + + This avoids a possible crash on Solaris. + + + + + + + Avoid using signalfd() + on illumos systems (Thomas Munro) + + + + This appears to trigger hangs and kernel panics, so avoid the + function until a fix is available. + + + + + + + Adjust PL/Perl test case so it will work under Perl 5.36 + (Dagfinn Ilmari MannsÃ¥ker) + + + + + + + Avoid incorrectly using an + out-of-date libldap_r library when + multiple OpenLDAP installations are + present while building PostgreSQL + (Tom Lane) + + + + + + + + Release 14.4 From ae70b55c82082f4837b1e1af7a3431648a9fa68e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 7 Aug 2022 15:46:27 -0400 Subject: [PATCH 11/12] Release notes for 14.5, 13.8, 12.12, 11.17, 10.22. --- doc/src/sgml/release-14.sgml | 330 ++--------------------------------- 1 file changed, 16 insertions(+), 314 deletions(-) diff --git a/doc/src/sgml/release-14.sgml b/doc/src/sgml/release-14.sgml index 981993e70e3..e7da94b025a 100644 --- a/doc/src/sgml/release-14.sgml +++ b/doc/src/sgml/release-14.sgml @@ -58,6 +58,14 @@ Branch: REL_13_STABLE [b76e136ce] 2022-07-29 18:17:36 -0400 Branch: REL_12_STABLE [4349a7615] 2022-07-29 18:17:42 -0400 Branch: REL_11_STABLE [3f9c20536] 2022-07-29 18:17:49 -0400 Branch: REL_10_STABLE [c308003d2] 2022-07-29 18:17:55 -0400 +Author: Alvaro Herrera +Branch: master [6c1c9f88a] 2022-08-06 15:52:10 +0200 +Branch: REL_15_STABLE [6390bc740] 2022-08-06 15:52:10 +0200 +Branch: REL_14_STABLE [9d5c96d9b] 2022-08-06 15:52:10 +0200 +Branch: REL_13_STABLE [8c5d9ccca] 2022-08-06 15:52:10 +0200 +Branch: REL_12_STABLE [782e5631e] 2022-08-06 15:52:10 +0200 +Branch: REL_11_STABLE [772e6383d] 2022-08-06 15:52:10 +0200 +Branch: REL_10_STABLE [ad0e08394] 2022-08-06 15:52:10 +0200 --> Fix replay of CREATE DATABASE WAL @@ -189,39 +197,6 @@ Branch: REL_10_STABLE [4822b4627] 2022-06-27 08:24:38 +0300 - - Fix queries in which a whole-row variable references - the result of a function that returns a domain over composite type - (Tom Lane) - - - - - - - Fix variable not found in subplan target list planner - error when pulling up a sub-SELECT that's - referenced in a GROUPING function (Richard Guo) - - - - - - - Prevent pg_stat_get_subscription() from - possibly returning an extra row containing garbage values - (Kuntal Ghosh) - - - - - - - Avoid crashing if too many column aliases are attached to - an XMLTABLE or JSON_TABLE - construct (Álvaro Herrera) - - - - - - - When decompiling a view or rule, show a SELECT - output column's AS "?column?" alias clause - if it could be referenced elsewhere (Tom Lane) - - - - Previously, this auto-generated alias was always hidden; but there - are corner cases where doing so results in a non-restorable view or - rule definition. - - - - - - - Report implicitly-created operator families to event triggers - (Masahiko Sawada) - - - - If CREATE OPERATOR CLASS results in the implicit - creation of an operator family, that object was not reported to - event triggers that should capture such events. - - - - - - - Fix control file updates made when a restartpoint is running during - promotion of a standby server (Kyotaro Horiguchi) - - - - Previously, when the restartpoint completed it could incorrectly - update the last-checkpoint fields of the control file, potentially - leading to PANIC and failure to restart if the server crashes before - the next normal checkpoint completes. - - - - - - - Prevent triggering of - standby's wal_receiver_timeout during logical - replication of large transactions (Wang Wei, Amit Kapila) - - - - If a large transaction on the primary server sends no data to the - standby (perhaps because no table it changes is published), it was - possible for the standby to timeout. Fix that by ensuring we send - keepalive messages periodically in such situations. - - - - - - - Ignore heap-rewrite temporary tables for materialized views in - logical replication (Euler Taveira) - - - - A FOR ALL TABLES publication will try to publish - temporary tables if left to its own devices. There is a heuristic - to suppress these, but it failed to cover internal temporary tables - created while rewriting a materialized view. This created a risk of - logical replication target relation ... does not - exist failures during REFRESH MATERIALIZED - VIEW. - - - - - - - Prevent open-file leak when reading an invalid timezone abbreviation - file (Kyotaro Horiguchi) - - - - Such cases could result in harmless warning messages. - - - - - - - Allow custom server parameters to have short descriptions that are - NULL (Steve Chavez) - - - - Previously, although extensions could choose to create such - settings, some code paths would crash while processing them. - - - - - Fix WAL consistency checking logic to correctly @@ -826,30 +615,6 @@ Branch: REL_11_STABLE [6d61aef5d] 2022-07-18 19:38:24 +0200 - - Remove misguided SSL key file ownership check - in libpq (Tom Lane) - - - - In the previous minor releases, we copied the server's permission - checking rules for SSL private key files into libpq. But we should - not have also copied the server's file-ownership check. While that - works in normal use-cases, it can result in an unexpected failure - for clients running as root, and perhaps in other cases. - - - - - - - Ensure ecpg reports server connection loss - sanely (Tom Lane) - - - - Misprocessing of a libpq-generated error result, such as a report of - lost connection, would lead to printing (null) - instead of a useful error message; or in older releases it would - lead to a crash. - - - - - - - Adjust PL/Perl test case so it will work under Perl 5.36 - (Dagfinn Ilmari MannsÃ¥ker) - - - - - - - Avoid incorrectly using an - out-of-date libldap_r library when - multiple OpenLDAP installations are - present while building PostgreSQL - (Tom Lane) - - - From c4d6fa2985bb6a6bde6ef0c9d673b3b505be3660 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 8 Aug 2022 11:28:47 -0400 Subject: [PATCH 12/12] Last-minute updates for release notes. Security: CVE-2022-2625 --- doc/src/sgml/release-14.sgml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/doc/src/sgml/release-14.sgml b/doc/src/sgml/release-14.sgml index e7da94b025a..b5f91109812 100644 --- a/doc/src/sgml/release-14.sgml +++ b/doc/src/sgml/release-14.sgml @@ -35,6 +35,41 @@ + + Do not let extension scripts replace objects not already belonging + to the extension (Tom Lane) + + + + This change prevents extension scripts from doing CREATE + OR REPLACE if there is an existing object that does not + belong to the extension. It also prevents CREATE IF NOT + EXISTS in the same situation. This prevents a form of + trojan-horse attack in which a hostile database user could become + the owner of an extension object and then modify it to compromise + future uses of the object by other users. As a side benefit, it + also reduces the risk of accidentally replacing objects one did + not mean to. + + + + The PostgreSQL Project thanks + Sven Klemm for reporting this problem. + (CVE-2022-2625) + + + + +