Skip to content

Commit 72faddb

Browse files
KKouldCopilot
andauthored
feat: add cost to explain pruning stats (#19015)
* feat: add cost to explain pruning stats * Update src/query/storages/fuse/src/pruning/vector_index_pruner.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * chore: codefmt * chore: remove the explain_pruner_cost option * chore: codefmt * chore: codefmt * chore: codefmt * chore: rebase * chore: add cost for topn pruning * chore: update explain_inverted_index.test * chore: fix cost display fmt * chore: codefmt --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent ae9e365 commit 72faddb

File tree

95 files changed

+952
-693
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+952
-693
lines changed

src/query/catalog/src/plan/pruning_statistics.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,45 +13,64 @@
1313
// limitations under the License.
1414

1515
#[derive(serde::Serialize, serde::Deserialize, PartialEq, Eq, Clone, Debug, Default)]
16+
#[serde(default)]
1617
pub struct PruningStatistics {
1718
/// Segment range pruning stats.
1819
pub segments_range_pruning_before: usize,
1920
pub segments_range_pruning_after: usize,
21+
/// Segment range pruning cost in microseconds.
22+
pub segments_range_pruning_cost: u64,
2023

2124
/// Block range pruning stats.
2225
pub blocks_range_pruning_before: usize,
2326
pub blocks_range_pruning_after: usize,
27+
/// Block range pruning cost in microseconds.
28+
pub blocks_range_pruning_cost: u64,
2429

2530
/// Block bloom filter pruning stats.
2631
pub blocks_bloom_pruning_before: usize,
2732
pub blocks_bloom_pruning_after: usize,
33+
/// Block bloom pruning cost in microseconds.
34+
pub blocks_bloom_pruning_cost: u64,
2835

2936
/// Block inverted index filter pruning stats.
3037
pub blocks_inverted_index_pruning_before: usize,
3138
pub blocks_inverted_index_pruning_after: usize,
39+
/// Block inverted index pruning cost in microseconds.
40+
pub blocks_inverted_index_pruning_cost: u64,
3241

3342
/// Block vector index filter pruning stats.
3443
pub blocks_vector_index_pruning_before: usize,
3544
pub blocks_vector_index_pruning_after: usize,
45+
/// Block vector index pruning cost in microseconds.
46+
pub blocks_vector_index_pruning_cost: u64,
3647

3748
/// Block topn pruning stats.
3849
pub blocks_topn_pruning_before: usize,
3950
pub blocks_topn_pruning_after: usize,
51+
/// Block topn pruning cost in microseconds.
52+
pub blocks_topn_pruning_cost: u64,
4053
}
4154

4255
impl PruningStatistics {
4356
pub fn merge(&mut self, other: &Self) {
4457
self.segments_range_pruning_before += other.segments_range_pruning_before;
4558
self.segments_range_pruning_after += other.segments_range_pruning_after;
59+
self.segments_range_pruning_cost += other.segments_range_pruning_cost;
4660
self.blocks_range_pruning_before += other.blocks_range_pruning_before;
4761
self.blocks_range_pruning_after += other.blocks_range_pruning_after;
62+
self.blocks_range_pruning_cost += other.blocks_range_pruning_cost;
4863
self.blocks_bloom_pruning_before += other.blocks_bloom_pruning_before;
4964
self.blocks_bloom_pruning_after += other.blocks_bloom_pruning_after;
65+
self.blocks_bloom_pruning_cost += other.blocks_bloom_pruning_cost;
5066
self.blocks_inverted_index_pruning_before += other.blocks_inverted_index_pruning_before;
5167
self.blocks_inverted_index_pruning_after += other.blocks_inverted_index_pruning_after;
68+
self.blocks_inverted_index_pruning_cost += other.blocks_inverted_index_pruning_cost;
5269
self.blocks_vector_index_pruning_before += other.blocks_vector_index_pruning_before;
5370
self.blocks_vector_index_pruning_after += other.blocks_vector_index_pruning_after;
71+
self.blocks_vector_index_pruning_cost += other.blocks_vector_index_pruning_cost;
5472
self.blocks_topn_pruning_before += other.blocks_topn_pruning_before;
5573
self.blocks_topn_pruning_after += other.blocks_topn_pruning_after;
74+
self.blocks_topn_pruning_cost += other.blocks_topn_pruning_cost;
5675
}
5776
}

src/query/service/src/physical_plans/format/common.rs

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,14 @@ pub fn pretty_display_agg_desc(desc: &AggregateFunctionDesc, metadata: &Metadata
4848
)
4949
}
5050

51+
fn format_pruning_cost_suffix(cost_micros: u64) -> String {
52+
if cost_micros < 1_000 {
53+
" cost: 1 ms".to_string()
54+
} else {
55+
format!(" cost: {} ms", cost_micros / 1_000)
56+
}
57+
}
58+
5159
pub fn part_stats_info_to_format_tree(info: &PartStatistics) -> Vec<FormatTreeNode<String>> {
5260
let read_size = format_byte_size(info.read_bytes);
5361
let mut items = vec![
@@ -63,9 +71,10 @@ pub fn part_stats_info_to_format_tree(info: &PartStatistics) -> Vec<FormatTreeNo
6371
// range pruning status.
6472
if info.pruning_stats.blocks_range_pruning_before > 0 {
6573
blocks_pruning_description += &format!(
66-
"range pruning: {} to {}",
74+
"range pruning: {} to {}{}",
6775
info.pruning_stats.blocks_range_pruning_before,
68-
info.pruning_stats.blocks_range_pruning_after
76+
info.pruning_stats.blocks_range_pruning_after,
77+
format_pruning_cost_suffix(info.pruning_stats.blocks_range_pruning_cost)
6978
);
7079
}
7180

@@ -75,9 +84,10 @@ pub fn part_stats_info_to_format_tree(info: &PartStatistics) -> Vec<FormatTreeNo
7584
blocks_pruning_description += ", ";
7685
}
7786
blocks_pruning_description += &format!(
78-
"bloom pruning: {} to {}",
87+
"bloom pruning: {} to {}{}",
7988
info.pruning_stats.blocks_bloom_pruning_before,
80-
info.pruning_stats.blocks_bloom_pruning_after
89+
info.pruning_stats.blocks_bloom_pruning_after,
90+
format_pruning_cost_suffix(info.pruning_stats.blocks_bloom_pruning_cost)
8191
);
8292
}
8393

@@ -87,9 +97,10 @@ pub fn part_stats_info_to_format_tree(info: &PartStatistics) -> Vec<FormatTreeNo
8797
blocks_pruning_description += ", ";
8898
}
8999
blocks_pruning_description += &format!(
90-
"inverted pruning: {} to {}",
100+
"inverted pruning: {} to {}{}",
91101
info.pruning_stats.blocks_inverted_index_pruning_before,
92-
info.pruning_stats.blocks_inverted_index_pruning_after
102+
info.pruning_stats.blocks_inverted_index_pruning_after,
103+
format_pruning_cost_suffix(info.pruning_stats.blocks_inverted_index_pruning_cost)
93104
);
94105
}
95106

@@ -99,9 +110,10 @@ pub fn part_stats_info_to_format_tree(info: &PartStatistics) -> Vec<FormatTreeNo
99110
blocks_pruning_description += ", ";
100111
}
101112
blocks_pruning_description += &format!(
102-
"topn pruning: {} to {}",
113+
"topn pruning: {} to {}{}",
103114
info.pruning_stats.blocks_topn_pruning_before,
104-
info.pruning_stats.blocks_topn_pruning_after
115+
info.pruning_stats.blocks_topn_pruning_after,
116+
format_pruning_cost_suffix(info.pruning_stats.blocks_topn_pruning_cost)
105117
);
106118
}
107119

@@ -111,9 +123,10 @@ pub fn part_stats_info_to_format_tree(info: &PartStatistics) -> Vec<FormatTreeNo
111123
blocks_pruning_description += ", ";
112124
}
113125
blocks_pruning_description += &format!(
114-
"vector pruning: {} to {}",
126+
"vector pruning: {} to {}{}",
115127
info.pruning_stats.blocks_vector_index_pruning_before,
116-
info.pruning_stats.blocks_vector_index_pruning_after
128+
info.pruning_stats.blocks_vector_index_pruning_after,
129+
format_pruning_cost_suffix(info.pruning_stats.blocks_vector_index_pruning_cost)
117130
);
118131
}
119132

@@ -125,9 +138,10 @@ pub fn part_stats_info_to_format_tree(info: &PartStatistics) -> Vec<FormatTreeNo
125138

126139
if info.pruning_stats.segments_range_pruning_before > 0 {
127140
pruning_description += &format!(
128-
"segments: <range pruning: {} to {}>",
141+
"segments: <range pruning: {} to {}{}>",
129142
info.pruning_stats.segments_range_pruning_before,
130-
info.pruning_stats.segments_range_pruning_after
143+
info.pruning_stats.segments_range_pruning_after,
144+
format_pruning_cost_suffix(info.pruning_stats.segments_range_pruning_cost)
131145
);
132146
}
133147

src/query/service/tests/it/sql/planner/optimizer/data/results/basic/01_cross_join_aggregation_physical.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ AggregateFinal
2929
│ ├── read size: 20.15 KiB
3030
│ ├── partitions total: 1
3131
│ ├── partitions scanned: 1
32-
│ ├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
32+
│ ├── pruning stats: [segments: <range pruning: 1 to 1 cost: 1 ms>, blocks: <range pruning: 1 to 1 cost: 1 ms>]
3333
│ ├── push downs: [filters: [], limit: NONE]
3434
│ └── estimated rows: 5000.00
3535
└── TableScan(Probe)
@@ -40,7 +40,7 @@ AggregateFinal
4040
├── read size: 20.15 KiB
4141
├── partitions total: 1
4242
├── partitions scanned: 1
43-
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
43+
├── pruning stats: [segments: <range pruning: 1 to 1 cost: 1 ms>, blocks: <range pruning: 1 to 1 cost: 1 ms>]
4444
├── push downs: [filters: [], limit: NONE]
4545
└── estimated rows: 5000.00
4646

src/query/sql/src/planner/planner.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ pub fn get_query_kind(stmt: &Statement) -> QueryKind {
347347
Statement::StatementWithSettings { stmt, .. } => get_query_kind(stmt),
348348
Statement::CopyIntoTable(_) => QueryKind::CopyIntoTable,
349349
Statement::CopyIntoLocation(_) => QueryKind::CopyIntoLocation,
350-
Statement::Explain { .. } => QueryKind::Explain,
350+
Statement::Explain { .. } | Statement::ExplainAnalyze { .. } => QueryKind::Explain,
351351
Statement::Insert(_) => QueryKind::Insert,
352352
Statement::Replace(_)
353353
| Statement::Delete(_)

src/query/storages/fuse/src/pruning/block_pruner.rs

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ use log::info;
3333

3434
use super::SegmentLocation;
3535
use crate::pruning::PruningContext;
36+
use crate::pruning::PruningCostKind;
3637

3738
pub struct BlockPruner {
3839
pub pruning_ctx: Arc<PruningContext>,
@@ -97,6 +98,7 @@ impl BlockPruner {
9798
block_meta_indexes: Vec<(usize, Arc<BlockMeta>)>,
9899
) -> Result<Vec<(BlockMetaIndex, Arc<BlockMeta>)>> {
99100
let pruning_stats = self.pruning_ctx.pruning_stats.clone();
101+
let pruning_cost = self.pruning_ctx.pruning_cost.clone();
100102
let pruning_runtime = &self.pruning_ctx.pruning_runtime;
101103
let pruning_semaphore = &self.pruning_ctx.pruning_semaphore;
102104
let limit_pruner = self.pruning_ctx.limit_pruner.clone();
@@ -132,8 +134,9 @@ impl BlockPruner {
132134
BlockPruneResult::new(block_idx, block_meta.location.0.clone());
133135
let block_meta = block_meta.clone();
134136
let row_count = block_meta.row_count;
135-
prune_result.keep =
136-
range_pruner.should_keep(&block_meta.col_stats, Some(&block_meta.col_metas));
137+
prune_result.keep = pruning_cost.measure(PruningCostKind::BlocksRange, || {
138+
range_pruner.should_keep(&block_meta.col_stats, Some(&block_meta.col_metas))
139+
});
137140
if prune_result.keep {
138141
// Perf.
139142
{
@@ -154,6 +157,7 @@ impl BlockPruner {
154157
let index_size = block_meta.bloom_filter_index_size;
155158
let column_ids = block_meta.col_metas.keys().cloned().collect::<Vec<_>>();
156159

160+
let pruning_cost = pruning_cost.clone();
157161
let v: BlockPruningFuture = Box::new(move |permit: OwnedSemaphorePermit| {
158162
Box::pin(async move {
159163
let _permit = permit;
@@ -168,16 +172,18 @@ impl BlockPruner {
168172
pruning_stats.set_blocks_bloom_pruning_before(1);
169173
}
170174

171-
let keep_by_bloom = bloom_pruner
172-
.should_keep(
173-
&index_location,
174-
index_size,
175-
&block_meta.col_stats,
176-
column_ids,
177-
&block_meta.as_ref().into(),
175+
let keep_by_bloom = pruning_cost
176+
.measure_async(
177+
PruningCostKind::BlocksBloom,
178+
bloom_pruner.should_keep(
179+
&index_location,
180+
index_size,
181+
&block_meta.col_stats,
182+
column_ids,
183+
&block_meta.as_ref().into(),
184+
),
178185
)
179186
.await;
180-
181187
prune_result.keep =
182188
keep_by_bloom && limit_pruner.within_limit(row_count);
183189
if prune_result.keep {
@@ -212,8 +218,12 @@ impl BlockPruner {
212218

213219
pruning_stats.set_blocks_inverted_index_pruning_before(1);
214220
}
215-
let matched_rows = inverted_index_pruner
216-
.should_keep(&block_location.0, row_count)
221+
let matched_rows = pruning_cost
222+
.measure_async(
223+
PruningCostKind::BlocksInverted,
224+
inverted_index_pruner
225+
.should_keep(&block_location.0, row_count),
226+
)
217227
.await?;
218228
prune_result.keep = matched_rows.is_some();
219229
prune_result.matched_rows = matched_rows;
@@ -315,6 +325,7 @@ impl BlockPruner {
315325
block_meta_indexes: Vec<(usize, Arc<BlockMeta>)>,
316326
) -> Result<Vec<(BlockMetaIndex, Arc<BlockMeta>)>> {
317327
let pruning_stats = self.pruning_ctx.pruning_stats.clone();
328+
let pruning_cost = self.pruning_ctx.pruning_cost.clone();
318329
let limit_pruner = self.pruning_ctx.limit_pruner.clone();
319330
let range_pruner = self.pruning_ctx.range_pruner.clone();
320331
let page_pruner = self.pruning_ctx.page_pruner.clone();
@@ -337,9 +348,10 @@ impl BlockPruner {
337348
break;
338349
}
339350
let row_count = block_meta.row_count;
340-
if range_pruner.should_keep(&block_meta.col_stats, Some(&block_meta.col_metas))
341-
&& limit_pruner.within_limit(row_count)
342-
{
351+
let keep_by_range = pruning_cost.measure(PruningCostKind::BlocksRange, || {
352+
range_pruner.should_keep(&block_meta.col_stats, Some(&block_meta.col_metas))
353+
});
354+
if keep_by_range && limit_pruner.within_limit(row_count) {
343355
// Perf.
344356
{
345357
metrics_inc_blocks_range_pruning_after(1);

src/query/storages/fuse/src/pruning/fuse_pruner.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use databend_common_base::base::tokio::sync::Semaphore;
1919
use databend_common_base::runtime::Runtime;
2020
use databend_common_base::runtime::TrySpawn;
2121
use databend_common_catalog::plan::PushDownInfo;
22+
use databend_common_catalog::query_kind::QueryKind;
2223
use databend_common_catalog::table_context::TableContext;
2324
use databend_common_exception::ErrorCode;
2425
use databend_common_exception::Result;
@@ -67,6 +68,8 @@ use crate::pruning::BloomPruner;
6768
use crate::pruning::BloomPrunerCreator;
6869
use crate::pruning::FusePruningStatistics;
6970
use crate::pruning::InvertedIndexPruner;
71+
use crate::pruning::PruningCostController;
72+
use crate::pruning::PruningCostKind;
7073
use crate::pruning::SegmentLocation;
7174
use crate::pruning::VectorIndexPruner;
7275
use crate::pruning::VirtualColumnPruner;
@@ -88,6 +91,7 @@ pub struct PruningContext {
8891
pub virtual_column_pruner: Option<Arc<VirtualColumnPruner>>,
8992

9093
pub pruning_stats: Arc<FusePruningStatistics>,
94+
pub pruning_cost: PruningCostController,
9195
}
9296

9397
impl PruningContext {
@@ -105,6 +109,7 @@ impl PruningContext {
105109
bloom_index_builder: Option<BloomIndexRebuilder>,
106110
) -> Result<Arc<PruningContext>> {
107111
let func_ctx = ctx.get_function_context()?;
112+
let collect_pruning_cost = matches!(ctx.get_query_kind(), QueryKind::Explain);
108113

109114
let filter_expr = push_down.as_ref().and_then(|extra| {
110115
extra
@@ -192,6 +197,8 @@ impl PruningContext {
192197
let pruning_semaphore = Arc::new(Semaphore::new(max_concurrency));
193198
let pruning_stats = Arc::new(FusePruningStatistics::default());
194199

200+
let pruning_cost = PruningCostController::new(pruning_stats.clone(), collect_pruning_cost);
201+
195202
let pruning_ctx = Arc::new(PruningContext {
196203
ctx: ctx.clone(),
197204
dal,
@@ -205,6 +212,7 @@ impl PruningContext {
205212
inverted_index_pruner,
206213
virtual_column_pruner,
207214
pruning_stats,
215+
pruning_cost,
208216
});
209217
Ok(pruning_ctx)
210218
}
@@ -558,7 +566,11 @@ impl FusePruner {
558566
let sort = push_down.order_by.clone();
559567
let filter_only_use_index = push_down.filter_only_use_index();
560568
let topn_pruner = TopNPruner::create(schema, sort, limit, filter_only_use_index);
561-
let pruned_metas = topn_pruner.prune(metas.clone()).unwrap_or(metas);
569+
let pruning_cost = self.pruning_ctx.pruning_cost.clone();
570+
let pruned_res = pruning_cost.measure(PruningCostKind::BlocksTopN, || {
571+
topn_pruner.prune(metas.clone())
572+
});
573+
let pruned_metas = pruned_res.unwrap_or(metas);
562574

563575
// Perf.
564576
{
@@ -612,39 +624,51 @@ impl FusePruner {
612624

613625
let segments_range_pruning_before = stats.get_segments_range_pruning_before() as usize;
614626
let segments_range_pruning_after = stats.get_segments_range_pruning_after() as usize;
627+
let segments_range_pruning_cost = stats.get_segments_range_pruning_cost();
615628

616629
let blocks_range_pruning_before = stats.get_blocks_range_pruning_before() as usize;
617630
let blocks_range_pruning_after = stats.get_blocks_range_pruning_after() as usize;
631+
let blocks_range_pruning_cost = stats.get_blocks_range_pruning_cost();
618632

619633
let blocks_bloom_pruning_before = stats.get_blocks_bloom_pruning_before() as usize;
620634
let blocks_bloom_pruning_after = stats.get_blocks_bloom_pruning_after() as usize;
635+
let blocks_bloom_pruning_cost = stats.get_blocks_bloom_pruning_cost();
621636

622637
let blocks_inverted_index_pruning_before =
623638
stats.get_blocks_inverted_index_pruning_before() as usize;
624639
let blocks_inverted_index_pruning_after =
625640
stats.get_blocks_inverted_index_pruning_after() as usize;
641+
let blocks_inverted_index_pruning_cost = stats.get_blocks_inverted_index_pruning_cost();
626642

627643
let blocks_vector_index_pruning_before =
628644
stats.get_blocks_vector_index_pruning_before() as usize;
629645
let blocks_vector_index_pruning_after =
630646
stats.get_blocks_vector_index_pruning_after() as usize;
647+
let blocks_vector_index_pruning_cost = stats.get_blocks_vector_index_pruning_cost();
631648

632649
let blocks_topn_pruning_before = stats.get_blocks_topn_pruning_before() as usize;
633650
let blocks_topn_pruning_after = stats.get_blocks_topn_pruning_after() as usize;
651+
let blocks_topn_pruning_cost = stats.get_blocks_topn_pruning_cost();
634652

635653
databend_common_catalog::plan::PruningStatistics {
636654
segments_range_pruning_before,
637655
segments_range_pruning_after,
656+
segments_range_pruning_cost,
638657
blocks_range_pruning_before,
639658
blocks_range_pruning_after,
659+
blocks_range_pruning_cost,
640660
blocks_bloom_pruning_before,
641661
blocks_bloom_pruning_after,
662+
blocks_bloom_pruning_cost,
642663
blocks_inverted_index_pruning_before,
643664
blocks_inverted_index_pruning_after,
665+
blocks_inverted_index_pruning_cost,
644666
blocks_vector_index_pruning_before,
645667
blocks_vector_index_pruning_after,
668+
blocks_vector_index_pruning_cost,
646669
blocks_topn_pruning_before,
647670
blocks_topn_pruning_after,
671+
blocks_topn_pruning_cost,
648672
}
649673
}
650674

0 commit comments

Comments
 (0)