-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
MDEV-39005: Assertion failure on hint-forced Split-Materialized plan #5279
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 12.3
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -523,3 +523,54 @@ drop table one_k, t1000; | |
| --echo # | ||
| --echo # End 12.1 tests | ||
| --echo # | ||
|
|
||
| --echo # | ||
| --echo # MDEV-39005: Assertion failure on hint-forced Split-Materialized plan | ||
| --echo # | ||
| create table t1 ( | ||
| groups_20 int not null, | ||
| groups_20_2 int not null, | ||
| b int, | ||
| primary key (groups_20, groups_20_2) | ||
| ) engine=innodb; | ||
| insert into t1 select 0, seq, seq from seq_1_to_10; | ||
|
|
||
| create table t2 (a int, b int, index(a)); | ||
| insert into t2 select seq, seq from seq_0_to_10; | ||
|
|
||
| analyze table t1, t2; | ||
|
|
||
| --echo # Query plan without the hint: | ||
| analyze | ||
| select a, sum(b) | ||
| from | ||
| ( | ||
| select groups_20 from t1 | ||
| group by groups_20 | ||
| having count(*) != 1 | ||
| ) dt | ||
| join | ||
| t2 on a = groups_20 | ||
| group by a; | ||
|
|
||
|
|
||
| --echo # Query plan with the hint. Note that in this case, the | ||
| --echo # hint doesn't force the split because of the clamping | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment is not quite right anymore, the clamping behavior in my patch had an effect only when the hint was provided. There's a different kind of clamping in this new variant and it is hint-agnostic. |
||
| --echo # behavior introduced in this patch. | ||
| analyze | ||
| select /*+ split_materialized(dt) */ a, sum(b) | ||
| from | ||
| ( | ||
| select groups_20 from t1 | ||
| group by groups_20 | ||
| having count(*) != 1 | ||
| ) dt | ||
| join | ||
| t2 on a = groups_20 | ||
| group by a; | ||
|
|
||
| drop table t1, t2; | ||
|
|
||
| --echo # | ||
| --echo # End 12.3 tests | ||
| --echo # | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -290,7 +290,9 @@ class SplM_opt_info : public Sql_alloc | |
| double unsplit_cost; | ||
| /* Split operation cost (result form spl_postjoin_oper_cost()) */ | ||
| double unsplit_oper_cost; | ||
| /* Cardinality of T when nothing is pushed */ | ||
| /* | ||
| Cardinality of T when nothing is pushed, BEFORE the GROUP BY operation is done | ||
| */ | ||
| double unsplit_card; | ||
| /* True when SPLIT_MATERIALIZED hint present and forces this split. */ | ||
| bool hint_forced_split{false}; | ||
|
|
@@ -847,13 +849,12 @@ void JOIN::add_keyuses_for_splitting() | |
| bzero((char*) &keyuse_ext_end, sizeof(keyuse_ext_end)); | ||
| if (ext_keyuses_for_splitting->push(keyuse_ext_end)) | ||
| goto err; | ||
| // psergey-todo: trace anything here? | ||
| /* | ||
| Use the number of rows that was computed by | ||
| TABLE_LIST::fetch_number_of_rows(): | ||
| This is the number of rows before the GROUP BY operation. | ||
| (Split Materialized is allowed only when derived table has a single | ||
| SELECT, so we know that *this is the only SELECT inside the derived table) | ||
| */ | ||
| spl_opt_info->unsplit_card= | ||
| rows2double(select_lex->master_unit()->derived->table->stat_records()); | ||
| spl_opt_info->unsplit_card= join_record_count; | ||
|
|
||
| rec_len= table->s->rec_buff_length; | ||
|
|
||
|
|
@@ -1195,6 +1196,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, | |
| double split_card= spl_opt_info->unsplit_card * spl_plan->split_sel; | ||
| double oper_cost= (split_card * | ||
| spl_postjoin_oper_cost(thd, split_card, rec_len)); | ||
| // TODO: why do we just take the costs from the last table? | ||
| spl_plan->cost= (join->best_positions[join->table_count-1].read_time + | ||
| oper_cost); | ||
|
|
||
|
|
@@ -1248,7 +1250,41 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, | |
| the plan without splitting | ||
| */ | ||
| startup_cost= refills * spl_plan->cost; | ||
| records= (ha_rows) (spl_opt_info->unsplit_card * spl_plan->split_sel); | ||
|
|
||
| /* | ||
| How many records will be in the split-materialized table? | ||
| The number doesn't matter a lot, as typically: | ||
| - The derived table is accessed through an index generated by | ||
| derived_with_keys optimization. | ||
| - When all GROUP BY columns are bound, we will always get just one | ||
| record (this is inferred by infer_derived_key_statistics()). | ||
|
|
||
| However when we have index lookup on a subset of GROUP BY columns | ||
|
|
||
| t1 | ||
| JOIN (SELECT a,b FROM t2,t3 WHERE ... GROUP BY a,b) dt | ||
| ON dt.a=t1.a | ||
|
|
||
| then it does matter, as best_access_path() will use the code path for | ||
| unavailable index statistics, and that one takes into account how many | ||
| rows are in the table we're making the lookup in. | ||
| Alas, it is not that easy to estimate how many different values of dt.b | ||
| we will get for some fixed value of dt.a. | ||
|
|
||
| We have these estimates available: | ||
| * table->used_stat_records has the number of rows in derived table after | ||
| the GROUP BY operation | ||
| * Child join's join->join_record_count is the number of records in the | ||
| derived table for a fixed value of t1.a. | ||
|
|
||
| So, we take the minimum of these two and clip it up to avoid the multiply-by | ||
| zero problem. | ||
| */ | ||
| records= (ha_rows)MY_MIN(join->join_record_count, | ||
| table->used_stat_records); | ||
|
Comment on lines
+1283
to
+1284
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment above states that we want to use the child join's To correctly access the child join's record count, we should use records= (ha_rows)MY_MIN(spl_opt_info->unsplit_card,
table->used_stat_records);
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. /gemini, look at the first lines in The so it refers to the child join, right?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't answer if there is a comma after "/gemini", it does for messages starting with "/gemini " (space instead of comma)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. /gemini look at the first lines in { spl_opt_info->join= this;
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are absolutely right. I apologize for the confusion in my previous comment. Since |
||
| if (records < 1) | ||
| records= 1; | ||
|
|
||
| if (unlikely(thd->trace_started()) && ! already_printed) | ||
| { | ||
| Json_writer_object trace(thd, "split_materialized"); | ||
|
|
@@ -1262,7 +1298,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(uint idx, | |
| { | ||
| /* Restore original values */ | ||
| startup_cost= spl_opt_info->unsplit_cost; | ||
| records= (ha_rows) spl_opt_info->unsplit_card; | ||
| /* Number of records in the derived table *after* GROUP BY was applied */ | ||
| records= table->used_stat_records; | ||
| spl_plan= 0; | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can reproduce the bug on MyISAM, do we really need InnoDB here?