From 58e828d40513e9dd06cb89ad1ee536099a9308a4 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 3 Mar 2026 02:33:50 -0500 Subject: [PATCH 1/6] Add explain plans for ClickBench queries --- .../sqllogictest/test_files/clickbench.slt | 896 +++++++++++++++++- 1 file changed, 891 insertions(+), 5 deletions(-) diff --git a/datafusion/sqllogictest/test_files/clickbench.slt b/datafusion/sqllogictest/test_files/clickbench.slt index 42b7cfafdaa63..6bff399a2db6b 100644 --- a/datafusion/sqllogictest/test_files/clickbench.slt +++ b/datafusion/sqllogictest/test_files/clickbench.slt @@ -15,10 +15,9 @@ # specific language governing permissions and limitations # under the License. - -# This file contains the clickbench schema and queries -# and the first 10 rows of data. Since ClickBench contains case sensitive queries -# this is also a good test of that usecase too +## Notes: This file contains the ClickBench schema and queries and the first 10 +## rows of data. Since ClickBench contains case sensitive identifiers (e.g. +## "EventDate") this is also a good test of that usecase too # create.sql came from # https://github.com/ClickHouse/ClickBench/blob/8b9e3aa05ea18afa427f14909ddc678b8ef0d5e6/datafusion/create.sql @@ -31,6 +30,8 @@ STORED AS PARQUET LOCATION '../core/tests/data/clickbench_hits_10.parquet'; # ClickBench encodes EventDate as UInt16 days since epoch. +# So we define this view to convert it to the correct DATE type (this is done +# in the ClickBench runner as well, see https://github.com/ClickHouse/ClickBench/pull/803 statement ok CREATE VIEW hits AS SELECT * EXCEPT ("EventDate"), @@ -38,6 +39,7 @@ SELECT * EXCEPT ("EventDate"), FROM hits_raw; # Verify EventDate transformation from UInt16 to DATE + query D SELECT "EventDate" FROM hits LIMIT 1; ---- @@ -52,45 +54,197 @@ SELECT "EventDate" FROM hits_raw LIMIT 1; # queries.sql came from # https://github.com/ClickHouse/ClickBench/blob/8b9e3aa05ea18afa427f14909ddc678b8ef0d5e6/datafusion/queries.sql +## Q0 +query TT +EXPLAIN SELECT COUNT(*) FROM hits; +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: hits +04)------TableScan: hits_raw projection=[] +physical_plan +01)ProjectionExec: expr=[10 as count(*)] +02)--PlaceholderRowExec + query I SELECT COUNT(*) FROM hits; ---- 10 +## Q1 +query TT +EXPLAIN SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0; +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: hits +04)------Projection: +05)--------Filter: hits_raw.AdvEngineID != Int16(0) +06)----------TableScan: hits_raw projection=[AdvEngineID], partial_filters=[hits_raw.AdvEngineID != Int16(0)] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: AdvEngineID@0 != 0, projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[AdvEngineID], file_type=parquet, predicate=AdvEngineID@40 != 0, pruning_predicate=AdvEngineID_null_count@2 != row_count@3 AND (AdvEngineID_min@0 != 0 OR 0 != AdvEngineID_max@1), required_guarantees=[AdvEngineID not in (0)] + query I SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0; ---- 0 +query TT +EXPLAIN SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits; +---- +logical_plan +01)Projection: sum(hits.AdvEngineID), count(Int64(1)) AS count(*), avg(hits.ResolutionWidth) +02)--Aggregate: groupBy=[[]], aggr=[[sum(CAST(hits.AdvEngineID AS Int64)), count(Int64(1)), avg(CAST(hits.ResolutionWidth AS Float64))]] +03)----SubqueryAlias: hits +04)------TableScan: hits_raw projection=[ResolutionWidth, AdvEngineID] +physical_plan +01)ProjectionExec: expr=[sum(hits.AdvEngineID)@0 as sum(hits.AdvEngineID), count(Int64(1))@1 as count(*), avg(hits.ResolutionWidth)@2 as avg(hits.ResolutionWidth)] +02)--AggregateExec: mode=Single, gby=[], aggr=[sum(hits.AdvEngineID), count(Int64(1)), avg(hits.ResolutionWidth)] +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[ResolutionWidth, AdvEngineID], file_type=parquet + query IIR SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits; ---- 0 10 0 +query TT +EXPLAIN SELECT AVG("UserID") FROM hits; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[avg(CAST(hits.UserID AS Float64))]] +02)--SubqueryAlias: hits +03)----TableScan: hits_raw projection=[UserID] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[avg(hits.UserID)] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[UserID], file_type=parquet + query R SELECT AVG("UserID") FROM hits; ---- -304548765855551740 +query TT +EXPLAIN SELECT COUNT(DISTINCT "UserID") FROM hits; +---- +logical_plan +01)Projection: count(alias1) AS count(DISTINCT hits.UserID) +02)--Aggregate: groupBy=[[]], aggr=[[count(alias1)]] +03)----Aggregate: groupBy=[[hits.UserID AS alias1]], aggr=[[]] +04)------SubqueryAlias: hits +05)--------TableScan: hits_raw projection=[UserID] +physical_plan +01)ProjectionExec: expr=[count(alias1)@0 as count(DISTINCT hits.UserID)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(alias1)] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(alias1)] +05)--------AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], aggr=[] +06)----------RepartitionExec: partitioning=Hash([alias1@0], 4), input_partitions=1 +07)------------AggregateExec: mode=Partial, gby=[UserID@0 as alias1], aggr=[] +08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[UserID], file_type=parquet + query I SELECT COUNT(DISTINCT "UserID") FROM hits; ---- 5 +query TT +EXPLAIN SELECT COUNT(DISTINCT "SearchPhrase") FROM hits; +---- +logical_plan +01)Projection: count(alias1) AS count(DISTINCT hits.SearchPhrase) +02)--Aggregate: groupBy=[[]], aggr=[[count(alias1)]] +03)----Aggregate: groupBy=[[hits.SearchPhrase AS alias1]], aggr=[[]] +04)------SubqueryAlias: hits +05)--------TableScan: hits_raw projection=[SearchPhrase] +physical_plan +01)ProjectionExec: expr=[count(alias1)@0 as count(DISTINCT hits.SearchPhrase)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(alias1)] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(alias1)] +05)--------AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], aggr=[] +06)----------RepartitionExec: partitioning=Hash([alias1@0], 4), input_partitions=1 +07)------------AggregateExec: mode=Partial, gby=[SearchPhrase@0 as alias1], aggr=[] +08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[SearchPhrase], file_type=parquet + query I SELECT COUNT(DISTINCT "SearchPhrase") FROM hits; ---- 1 +query TT +EXPLAIN SELECT MIN("EventDate"), MAX("EventDate") FROM hits; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[min(hits.EventDate), max(hits.EventDate)]] +02)--SubqueryAlias: hits +03)----Projection: CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) AS EventDate +04)------TableScan: hits_raw projection=[EventDate] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[min(hits.EventDate), max(hits.EventDate)] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[CAST(CAST(EventDate@5 AS Int32) AS Date32) as EventDate], file_type=parquet + query DD SELECT MIN("EventDate"), MAX("EventDate") FROM hits; ---- 2013-07-15 2013-07-15 +query TT +EXPLAIN SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC; +---- +logical_plan +01)Projection: hits.AdvEngineID, count(*) +02)--Sort: count(Int64(1)) AS count(*) AS count(*) DESC NULLS FIRST +03)----Projection: hits.AdvEngineID, count(Int64(1)) AS count(*), count(Int64(1)) +04)------Aggregate: groupBy=[[hits.AdvEngineID]], aggr=[[count(Int64(1))]] +05)--------SubqueryAlias: hits +06)----------Filter: hits_raw.AdvEngineID != Int16(0) +07)------------TableScan: hits_raw projection=[AdvEngineID], partial_filters=[hits_raw.AdvEngineID != Int16(0)] +physical_plan +01)ProjectionExec: expr=[AdvEngineID@0 as AdvEngineID, count(*)@1 as count(*)] +02)--SortPreservingMergeExec: [count(Int64(1))@2 DESC] +03)----SortExec: expr=[count(*)@1 DESC], preserve_partitioning=[true] +04)------ProjectionExec: expr=[AdvEngineID@0 as AdvEngineID, count(Int64(1))@1 as count(*), count(Int64(1))@1 as count(Int64(1))] +05)--------AggregateExec: mode=FinalPartitioned, gby=[AdvEngineID@0 as AdvEngineID], aggr=[count(Int64(1))] +06)----------RepartitionExec: partitioning=Hash([AdvEngineID@0], 4), input_partitions=4 +07)------------AggregateExec: mode=Partial, gby=[AdvEngineID@0 as AdvEngineID], aggr=[count(Int64(1))] +08)--------------FilterExec: AdvEngineID@0 != 0 +09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[AdvEngineID], file_type=parquet, predicate=AdvEngineID@40 != 0, pruning_predicate=AdvEngineID_null_count@2 != row_count@3 AND (AdvEngineID_min@0 != 0 OR 0 != AdvEngineID_max@1), required_guarantees=[AdvEngineID not in (0)] + query II SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC; ---- +query TT +EXPLAIN SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10; +---- +logical_plan +01)Sort: u DESC NULLS FIRST, fetch=10 +02)--Projection: hits.RegionID, count(alias1) AS u +03)----Aggregate: groupBy=[[hits.RegionID]], aggr=[[count(alias1)]] +04)------Aggregate: groupBy=[[hits.RegionID, hits.UserID AS alias1]], aggr=[[]] +05)--------SubqueryAlias: hits +06)----------TableScan: hits_raw projection=[RegionID, UserID] +physical_plan +01)SortPreservingMergeExec: [u@1 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[u@1 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[RegionID@0 as RegionID, count(alias1)@1 as u] +04)------AggregateExec: mode=FinalPartitioned, gby=[RegionID@0 as RegionID], aggr=[count(alias1)] +05)--------RepartitionExec: partitioning=Hash([RegionID@0], 4), input_partitions=4 +06)----------AggregateExec: mode=Partial, gby=[RegionID@0 as RegionID], aggr=[count(alias1)] +07)------------AggregateExec: mode=FinalPartitioned, gby=[RegionID@0 as RegionID, alias1@1 as alias1], aggr=[] +08)--------------RepartitionExec: partitioning=Hash([RegionID@0, alias1@1], 4), input_partitions=1 +09)----------------AggregateExec: mode=Partial, gby=[RegionID@0 as RegionID, UserID@1 as alias1], aggr=[] +10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[RegionID, UserID], file_type=parquet + query II rowsort SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10; ---- @@ -99,6 +253,24 @@ SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" O 39 1 839 2 +query TT +EXPLAIN SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10; +---- +logical_plan +01)Sort: c DESC NULLS FIRST, fetch=10 +02)--Projection: hits.RegionID, sum(hits.AdvEngineID), count(Int64(1)) AS count(*) AS c, avg(hits.ResolutionWidth), count(DISTINCT hits.UserID) +03)----Aggregate: groupBy=[[hits.RegionID]], aggr=[[sum(CAST(hits.AdvEngineID AS Int64)), count(Int64(1)), avg(CAST(hits.ResolutionWidth AS Float64)), count(DISTINCT hits.UserID)]] +04)------SubqueryAlias: hits +05)--------TableScan: hits_raw projection=[RegionID, UserID, ResolutionWidth, AdvEngineID] +physical_plan +01)SortPreservingMergeExec: [c@2 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[RegionID@0 as RegionID, sum(hits.AdvEngineID)@1 as sum(hits.AdvEngineID), count(Int64(1))@2 as c, avg(hits.ResolutionWidth)@3 as avg(hits.ResolutionWidth), count(DISTINCT hits.UserID)@4 as count(DISTINCT hits.UserID)] +04)------AggregateExec: mode=FinalPartitioned, gby=[RegionID@0 as RegionID], aggr=[sum(hits.AdvEngineID), count(Int64(1)), avg(hits.ResolutionWidth), count(DISTINCT hits.UserID)] +05)--------RepartitionExec: partitioning=Hash([RegionID@0], 4), input_partitions=1 +06)----------AggregateExec: mode=Partial, gby=[RegionID@0 as RegionID], aggr=[sum(hits.AdvEngineID), count(Int64(1)), avg(hits.ResolutionWidth), count(DISTINCT hits.UserID)] +07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[RegionID, UserID, ResolutionWidth, AdvEngineID], file_type=parquet + query IIIRI rowsort SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10; ---- @@ -107,26 +279,163 @@ SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), CO 39 0 1 0 1 839 0 6 0 2 +query TT +EXPLAIN SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10; +---- +logical_plan +01)Sort: u DESC NULLS FIRST, fetch=10 +02)--Projection: hits.MobilePhoneModel, count(alias1) AS u +03)----Aggregate: groupBy=[[hits.MobilePhoneModel]], aggr=[[count(alias1)]] +04)------Aggregate: groupBy=[[hits.MobilePhoneModel, hits.UserID AS alias1]], aggr=[[]] +05)--------SubqueryAlias: hits +06)----------Filter: hits_raw.MobilePhoneModel != Utf8View("") +07)------------TableScan: hits_raw projection=[UserID, MobilePhoneModel], partial_filters=[hits_raw.MobilePhoneModel != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [u@1 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[u@1 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[MobilePhoneModel@0 as MobilePhoneModel, count(alias1)@1 as u] +04)------AggregateExec: mode=FinalPartitioned, gby=[MobilePhoneModel@0 as MobilePhoneModel], aggr=[count(alias1)] +05)--------RepartitionExec: partitioning=Hash([MobilePhoneModel@0], 4), input_partitions=4 +06)----------AggregateExec: mode=Partial, gby=[MobilePhoneModel@0 as MobilePhoneModel], aggr=[count(alias1)] +07)------------AggregateExec: mode=FinalPartitioned, gby=[MobilePhoneModel@0 as MobilePhoneModel, alias1@1 as alias1], aggr=[] +08)--------------RepartitionExec: partitioning=Hash([MobilePhoneModel@0, alias1@1], 4), input_partitions=4 +09)----------------AggregateExec: mode=Partial, gby=[MobilePhoneModel@1 as MobilePhoneModel, UserID@0 as alias1], aggr=[] +10)------------------FilterExec: MobilePhoneModel@1 != +11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +12)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[UserID, MobilePhoneModel], file_type=parquet, predicate=MobilePhoneModel@34 != , pruning_predicate=MobilePhoneModel_null_count@2 != row_count@3 AND (MobilePhoneModel_min@0 != OR != MobilePhoneModel_max@1), required_guarantees=[MobilePhoneModel not in ()] + query TI SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10; ---- +query TT +EXPLAIN SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10; +---- +logical_plan +01)Sort: u DESC NULLS FIRST, fetch=10 +02)--Projection: hits.MobilePhone, hits.MobilePhoneModel, count(alias1) AS u +03)----Aggregate: groupBy=[[hits.MobilePhone, hits.MobilePhoneModel]], aggr=[[count(alias1)]] +04)------Aggregate: groupBy=[[hits.MobilePhone, hits.MobilePhoneModel, hits.UserID AS alias1]], aggr=[[]] +05)--------SubqueryAlias: hits +06)----------Filter: hits_raw.MobilePhoneModel != Utf8View("") +07)------------TableScan: hits_raw projection=[UserID, MobilePhone, MobilePhoneModel], partial_filters=[hits_raw.MobilePhoneModel != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [u@2 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[u@2 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[MobilePhone@0 as MobilePhone, MobilePhoneModel@1 as MobilePhoneModel, count(alias1)@2 as u] +04)------AggregateExec: mode=FinalPartitioned, gby=[MobilePhone@0 as MobilePhone, MobilePhoneModel@1 as MobilePhoneModel], aggr=[count(alias1)] +05)--------RepartitionExec: partitioning=Hash([MobilePhone@0, MobilePhoneModel@1], 4), input_partitions=4 +06)----------AggregateExec: mode=Partial, gby=[MobilePhone@0 as MobilePhone, MobilePhoneModel@1 as MobilePhoneModel], aggr=[count(alias1)] +07)------------AggregateExec: mode=FinalPartitioned, gby=[MobilePhone@0 as MobilePhone, MobilePhoneModel@1 as MobilePhoneModel, alias1@2 as alias1], aggr=[] +08)--------------RepartitionExec: partitioning=Hash([MobilePhone@0, MobilePhoneModel@1, alias1@2], 4), input_partitions=4 +09)----------------AggregateExec: mode=Partial, gby=[MobilePhone@1 as MobilePhone, MobilePhoneModel@2 as MobilePhoneModel, UserID@0 as alias1], aggr=[] +10)------------------FilterExec: MobilePhoneModel@2 != +11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +12)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[UserID, MobilePhone, MobilePhoneModel], file_type=parquet, predicate=MobilePhoneModel@34 != , pruning_predicate=MobilePhoneModel_null_count@2 != row_count@3 AND (MobilePhoneModel_min@0 != OR != MobilePhoneModel_max@1), required_guarantees=[MobilePhoneModel not in ()] + query ITI SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10; ---- +query TT +EXPLAIN SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; +---- +logical_plan +01)Sort: c DESC NULLS FIRST, fetch=10 +02)--Projection: hits.SearchPhrase, count(Int64(1)) AS count(*) AS c +03)----Aggregate: groupBy=[[hits.SearchPhrase]], aggr=[[count(Int64(1))]] +04)------SubqueryAlias: hits +05)--------Filter: hits_raw.SearchPhrase != Utf8View("") +06)----------TableScan: hits_raw projection=[SearchPhrase], partial_filters=[hits_raw.SearchPhrase != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [c@1 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[c@1 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase, count(Int64(1))@1 as c] +04)------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as SearchPhrase], aggr=[count(Int64(1))] +05)--------RepartitionExec: partitioning=Hash([SearchPhrase@0], 4), input_partitions=4 +06)----------AggregateExec: mode=Partial, gby=[SearchPhrase@0 as SearchPhrase], aggr=[count(Int64(1))] +07)------------FilterExec: SearchPhrase@0 != +08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND (SearchPhrase_min@0 != OR != SearchPhrase_max@1), required_guarantees=[SearchPhrase not in ()] + query TI SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; ---- +query TT +EXPLAIN SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10; +---- +logical_plan +01)Sort: u DESC NULLS FIRST, fetch=10 +02)--Projection: hits.SearchPhrase, count(alias1) AS u +03)----Aggregate: groupBy=[[hits.SearchPhrase]], aggr=[[count(alias1)]] +04)------Aggregate: groupBy=[[hits.SearchPhrase, hits.UserID AS alias1]], aggr=[[]] +05)--------SubqueryAlias: hits +06)----------Filter: hits_raw.SearchPhrase != Utf8View("") +07)------------TableScan: hits_raw projection=[UserID, SearchPhrase], partial_filters=[hits_raw.SearchPhrase != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [u@1 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[u@1 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase, count(alias1)@1 as u] +04)------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as SearchPhrase], aggr=[count(alias1)] +05)--------RepartitionExec: partitioning=Hash([SearchPhrase@0], 4), input_partitions=4 +06)----------AggregateExec: mode=Partial, gby=[SearchPhrase@0 as SearchPhrase], aggr=[count(alias1)] +07)------------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as SearchPhrase, alias1@1 as alias1], aggr=[] +08)--------------RepartitionExec: partitioning=Hash([SearchPhrase@0, alias1@1], 4), input_partitions=4 +09)----------------AggregateExec: mode=Partial, gby=[SearchPhrase@1 as SearchPhrase, UserID@0 as alias1], aggr=[] +10)------------------FilterExec: SearchPhrase@1 != +11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +12)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[UserID, SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND (SearchPhrase_min@0 != OR != SearchPhrase_max@1), required_guarantees=[SearchPhrase not in ()] + query TI SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10; ---- +query TT +EXPLAIN SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10; +---- +logical_plan +01)Sort: c DESC NULLS FIRST, fetch=10 +02)--Projection: hits.SearchEngineID, hits.SearchPhrase, count(Int64(1)) AS count(*) AS c +03)----Aggregate: groupBy=[[hits.SearchEngineID, hits.SearchPhrase]], aggr=[[count(Int64(1))]] +04)------SubqueryAlias: hits +05)--------Filter: hits_raw.SearchPhrase != Utf8View("") +06)----------TableScan: hits_raw projection=[SearchEngineID, SearchPhrase], partial_filters=[hits_raw.SearchPhrase != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [c@2 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[SearchEngineID@0 as SearchEngineID, SearchPhrase@1 as SearchPhrase, count(Int64(1))@2 as c] +04)------AggregateExec: mode=FinalPartitioned, gby=[SearchEngineID@0 as SearchEngineID, SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))] +05)--------RepartitionExec: partitioning=Hash([SearchEngineID@0, SearchPhrase@1], 4), input_partitions=4 +06)----------AggregateExec: mode=Partial, gby=[SearchEngineID@0 as SearchEngineID, SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))] +07)------------FilterExec: SearchPhrase@1 != +08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[SearchEngineID, SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND (SearchPhrase_min@0 != OR != SearchPhrase_max@1), required_guarantees=[SearchPhrase not in ()] + query ITI SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10; ---- +query TT +EXPLAIN SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10; +---- +logical_plan +01)Projection: hits.UserID, count(*) +02)--Sort: count(Int64(1)) AS count(*) AS count(*) DESC NULLS FIRST, fetch=10 +03)----Projection: hits.UserID, count(Int64(1)) AS count(*), count(Int64(1)) +04)------Aggregate: groupBy=[[hits.UserID]], aggr=[[count(Int64(1))]] +05)--------SubqueryAlias: hits +06)----------TableScan: hits_raw projection=[UserID] +physical_plan +01)ProjectionExec: expr=[UserID@0 as UserID, count(*)@1 as count(*)] +02)--SortPreservingMergeExec: [count(Int64(1))@2 DESC], fetch=10 +03)----SortExec: TopK(fetch=10), expr=[count(*)@1 DESC], preserve_partitioning=[true] +04)------ProjectionExec: expr=[UserID@0 as UserID, count(Int64(1))@1 as count(*), count(Int64(1))@1 as count(Int64(1))] +05)--------AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID], aggr=[count(Int64(1))] +06)----------RepartitionExec: partitioning=Hash([UserID@0], 4), input_partitions=1 +07)------------AggregateExec: mode=Partial, gby=[UserID@0 as UserID], aggr=[count(Int64(1))] +08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[UserID], file_type=parquet + query II rowsort SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10; ---- @@ -136,6 +445,26 @@ SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIM 519640690937130534 2 7418527520126366595 1 +query TT +EXPLAIN SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; +---- +logical_plan +01)Projection: hits.UserID, hits.SearchPhrase, count(*) +02)--Sort: count(Int64(1)) AS count(*) AS count(*) DESC NULLS FIRST, fetch=10 +03)----Projection: hits.UserID, hits.SearchPhrase, count(Int64(1)) AS count(*), count(Int64(1)) +04)------Aggregate: groupBy=[[hits.UserID, hits.SearchPhrase]], aggr=[[count(Int64(1))]] +05)--------SubqueryAlias: hits +06)----------TableScan: hits_raw projection=[UserID, SearchPhrase] +physical_plan +01)ProjectionExec: expr=[UserID@0 as UserID, SearchPhrase@1 as SearchPhrase, count(*)@2 as count(*)] +02)--SortPreservingMergeExec: [count(Int64(1))@3 DESC], fetch=10 +03)----SortExec: TopK(fetch=10), expr=[count(*)@2 DESC], preserve_partitioning=[true] +04)------ProjectionExec: expr=[UserID@0 as UserID, SearchPhrase@1 as SearchPhrase, count(Int64(1))@2 as count(*), count(Int64(1))@2 as count(Int64(1))] +05)--------AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID, SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))] +06)----------RepartitionExec: partitioning=Hash([UserID@0, SearchPhrase@1], 4), input_partitions=1 +07)------------AggregateExec: mode=Partial, gby=[UserID@0 as UserID, SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))] +08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[UserID, SearchPhrase], file_type=parquet + query ITI rowsort SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; ---- @@ -145,6 +474,23 @@ SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPh 519640690937130534 (empty) 2 7418527520126366595 (empty) 1 +query TT +EXPLAIN SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10; +---- +logical_plan +01)Projection: hits.UserID, hits.SearchPhrase, count(Int64(1)) AS count(*) +02)--Limit: skip=0, fetch=10 +03)----Aggregate: groupBy=[[hits.UserID, hits.SearchPhrase]], aggr=[[count(Int64(1))]] +04)------SubqueryAlias: hits +05)--------TableScan: hits_raw projection=[UserID, SearchPhrase] +physical_plan +01)ProjectionExec: expr=[UserID@0 as UserID, SearchPhrase@1 as SearchPhrase, count(Int64(1))@2 as count(*)] +02)--CoalescePartitionsExec: fetch=10 +03)----AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID, SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))] +04)------RepartitionExec: partitioning=Hash([UserID@0, SearchPhrase@1], 4), input_partitions=1 +05)--------AggregateExec: mode=Partial, gby=[UserID@0 as UserID, SearchPhrase@1 as SearchPhrase], aggr=[count(Int64(1))] +06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[UserID, SearchPhrase], file_type=parquet + query ITI rowsort SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10; ---- @@ -154,6 +500,26 @@ SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPh 519640690937130534 (empty) 2 7418527520126366595 (empty) 1 +query TT +EXPLAIN SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; +---- +logical_plan +01)Projection: hits.UserID, m, hits.SearchPhrase, count(*) +02)--Sort: count(Int64(1)) AS count(*) AS count(*) DESC NULLS FIRST, fetch=10 +03)----Projection: hits.UserID, date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime)) AS m, hits.SearchPhrase, count(Int64(1)) AS count(*), count(Int64(1)) +04)------Aggregate: groupBy=[[hits.UserID, date_part(Utf8("MINUTE"), to_timestamp_seconds(hits.EventTime)), hits.SearchPhrase]], aggr=[[count(Int64(1))]] +05)--------SubqueryAlias: hits +06)----------TableScan: hits_raw projection=[EventTime, UserID, SearchPhrase] +physical_plan +01)ProjectionExec: expr=[UserID@0 as UserID, m@1 as m, SearchPhrase@2 as SearchPhrase, count(*)@3 as count(*)] +02)--SortPreservingMergeExec: [count(Int64(1))@4 DESC], fetch=10 +03)----SortExec: TopK(fetch=10), expr=[count(*)@3 DESC], preserve_partitioning=[true] +04)------ProjectionExec: expr=[UserID@0 as UserID, date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime))@1 as m, SearchPhrase@2 as SearchPhrase, count(Int64(1))@3 as count(*), count(Int64(1))@3 as count(Int64(1))] +05)--------AggregateExec: mode=FinalPartitioned, gby=[UserID@0 as UserID, date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime))@1 as date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime)), SearchPhrase@2 as SearchPhrase], aggr=[count(Int64(1))] +06)----------RepartitionExec: partitioning=Hash([UserID@0, date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime))@1, SearchPhrase@2], 4), input_partitions=1 +07)------------AggregateExec: mode=Partial, gby=[UserID@1 as UserID, date_part(MINUTE, to_timestamp_seconds(EventTime@0)) as date_part(Utf8("MINUTE"),to_timestamp_seconds(hits.EventTime)), SearchPhrase@2 as SearchPhrase], aggr=[count(Int64(1))] +08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[EventTime, UserID, SearchPhrase], file_type=parquet + query IITI rowsort SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; ---- @@ -168,61 +534,323 @@ SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, "S 519640690937130534 36 (empty) 1 7418527520126366595 18 (empty) 1 +query TT +EXPLAIN SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449; +---- +logical_plan +01)SubqueryAlias: hits +02)--Filter: hits_raw.UserID = Int64(435090932899640449) +03)----TableScan: hits_raw projection=[UserID], partial_filters=[hits_raw.UserID = Int64(435090932899640449)] +physical_plan +01)FilterExec: UserID@0 = 435090932899640449 +02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[UserID], file_type=parquet, predicate=UserID@9 = 435090932899640449, pruning_predicate=UserID_null_count@2 != row_count@3 AND UserID_min@0 <= 435090932899640449 AND 435090932899640449 <= UserID_max@1, required_guarantees=[UserID in (435090932899640449)] + query I SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449; ---- +query TT +EXPLAIN SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%'; +---- +logical_plan +01)Projection: count(Int64(1)) AS count(*) +02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]] +03)----SubqueryAlias: hits +04)------Projection: +05)--------Filter: hits_raw.URL LIKE Utf8View("%google%") +06)----------TableScan: hits_raw projection=[URL], partial_filters=[hits_raw.URL LIKE Utf8View("%google%")] +physical_plan +01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)] +02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] +03)----CoalescePartitionsExec +04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] +05)--------FilterExec: URL@0 LIKE %google%, projection=[] +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[URL], file_type=parquet, predicate=URL@13 LIKE %google% + query I SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%'; ---- 0 +query TT +EXPLAIN SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; +---- +logical_plan +01)Sort: c DESC NULLS FIRST, fetch=10 +02)--Projection: hits.SearchPhrase, min(hits.URL), count(Int64(1)) AS count(*) AS c +03)----Aggregate: groupBy=[[hits.SearchPhrase]], aggr=[[min(hits.URL), count(Int64(1))]] +04)------SubqueryAlias: hits +05)--------Filter: hits_raw.URL LIKE Utf8View("%google%") AND hits_raw.SearchPhrase != Utf8View("") +06)----------TableScan: hits_raw projection=[URL, SearchPhrase], partial_filters=[hits_raw.URL LIKE Utf8View("%google%"), hits_raw.SearchPhrase != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [c@2 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase, min(hits.URL)@1 as min(hits.URL), count(Int64(1))@2 as c] +04)------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as SearchPhrase], aggr=[min(hits.URL), count(Int64(1))] +05)--------RepartitionExec: partitioning=Hash([SearchPhrase@0], 4), input_partitions=4 +06)----------AggregateExec: mode=Partial, gby=[SearchPhrase@1 as SearchPhrase], aggr=[min(hits.URL), count(Int64(1))] +07)------------FilterExec: URL@0 LIKE %google% AND SearchPhrase@1 != +08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[URL, SearchPhrase], file_type=parquet, predicate=URL@13 LIKE %google% AND SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@4 != row_count@5 AND (SearchPhrase_min@2 != OR != SearchPhrase_max@3), required_guarantees=[SearchPhrase not in ()] + query TTI SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; ---- +query TT +EXPLAIN SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; +---- +logical_plan +01)Sort: c DESC NULLS FIRST, fetch=10 +02)--Projection: hits.SearchPhrase, min(hits.URL), min(hits.Title), count(Int64(1)) AS count(*) AS c, count(DISTINCT hits.UserID) +03)----Aggregate: groupBy=[[hits.SearchPhrase]], aggr=[[min(hits.URL), min(hits.Title), count(Int64(1)), count(DISTINCT hits.UserID)]] +04)------SubqueryAlias: hits +05)--------Filter: hits_raw.Title LIKE Utf8View("%Google%") AND hits_raw.URL NOT LIKE Utf8View("%.google.%") AND hits_raw.SearchPhrase != Utf8View("") +06)----------TableScan: hits_raw projection=[Title, UserID, URL, SearchPhrase], partial_filters=[hits_raw.Title LIKE Utf8View("%Google%"), hits_raw.URL NOT LIKE Utf8View("%.google.%"), hits_raw.SearchPhrase != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [c@3 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[c@3 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase, min(hits.URL)@1 as min(hits.URL), min(hits.Title)@2 as min(hits.Title), count(Int64(1))@3 as c, count(DISTINCT hits.UserID)@4 as count(DISTINCT hits.UserID)] +04)------AggregateExec: mode=FinalPartitioned, gby=[SearchPhrase@0 as SearchPhrase], aggr=[min(hits.URL), min(hits.Title), count(Int64(1)), count(DISTINCT hits.UserID)] +05)--------RepartitionExec: partitioning=Hash([SearchPhrase@0], 4), input_partitions=4 +06)----------AggregateExec: mode=Partial, gby=[SearchPhrase@3 as SearchPhrase], aggr=[min(hits.URL), min(hits.Title), count(Int64(1)), count(DISTINCT hits.UserID)] +07)------------FilterExec: Title@0 LIKE %Google% AND URL@2 NOT LIKE %.google.% AND SearchPhrase@3 != +08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[Title, UserID, URL, SearchPhrase], file_type=parquet, predicate=Title@2 LIKE %Google% AND URL@13 NOT LIKE %.google.% AND SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@6 != row_count@7 AND (SearchPhrase_min@4 != OR != SearchPhrase_max@5), required_guarantees=[SearchPhrase not in ()] + query TTTII SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; ---- -query IITIIIIIIIIITTIIIIIIIIIITIIITIIIITTIIITIIIIIIIIIITIIIIITIIIIIITIIIIIIIIIITTTTIIIIIIIITITTITTTTTTTTTTIIIID +query TT +EXPLAIN SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10; +---- +logical_plan +01)Sort: hits.EventTime ASC NULLS LAST, fetch=10 +02)--SubqueryAlias: hits +03)----Projection: hits_raw.WatchID, hits_raw.JavaEnable, hits_raw.Title, hits_raw.GoodEvent, hits_raw.EventTime, hits_raw.CounterID, hits_raw.ClientIP, hits_raw.RegionID, hits_raw.UserID, hits_raw.CounterClass, hits_raw.OS, hits_raw.UserAgent, hits_raw.URL, hits_raw.Referer, hits_raw.IsRefresh, hits_raw.RefererCategoryID, hits_raw.RefererRegionID, hits_raw.URLCategoryID, hits_raw.URLRegionID, hits_raw.ResolutionWidth, hits_raw.ResolutionHeight, hits_raw.ResolutionDepth, hits_raw.FlashMajor, hits_raw.FlashMinor, hits_raw.FlashMinor2, hits_raw.NetMajor, hits_raw.NetMinor, hits_raw.UserAgentMajor, hits_raw.UserAgentMinor, hits_raw.CookieEnable, hits_raw.JavascriptEnable, hits_raw.IsMobile, hits_raw.MobilePhone, hits_raw.MobilePhoneModel, hits_raw.Params, hits_raw.IPNetworkID, hits_raw.TraficSourceID, hits_raw.SearchEngineID, hits_raw.SearchPhrase, hits_raw.AdvEngineID, hits_raw.IsArtifical, hits_raw.WindowClientWidth, hits_raw.WindowClientHeight, hits_raw.ClientTimeZone, hits_raw.ClientEventTime, hits_raw.SilverlightVersion1, hits_raw.SilverlightVersion2, hits_raw.SilverlightVersion3, hits_raw.SilverlightVersion4, hits_raw.PageCharset, hits_raw.CodeVersion, hits_raw.IsLink, hits_raw.IsDownload, hits_raw.IsNotBounce, hits_raw.FUniqID, hits_raw.OriginalURL, hits_raw.HID, hits_raw.IsOldCounter, hits_raw.IsEvent, hits_raw.IsParameter, hits_raw.DontCountHits, hits_raw.WithHash, hits_raw.HitColor, hits_raw.LocalEventTime, hits_raw.Age, hits_raw.Sex, hits_raw.Income, hits_raw.Interests, hits_raw.Robotness, hits_raw.RemoteIP, hits_raw.WindowName, hits_raw.OpenerName, hits_raw.HistoryLength, hits_raw.BrowserLanguage, hits_raw.BrowserCountry, hits_raw.SocialNetwork, hits_raw.SocialAction, hits_raw.HTTPError, hits_raw.SendTiming, hits_raw.DNSTiming, hits_raw.ConnectTiming, hits_raw.ResponseStartTiming, hits_raw.ResponseEndTiming, hits_raw.FetchTiming, hits_raw.SocialSourceNetworkID, hits_raw.SocialSourcePage, hits_raw.ParamPrice, hits_raw.ParamOrderID, hits_raw.ParamCurrency, hits_raw.ParamCurrencyID, hits_raw.OpenstatServiceName, hits_raw.OpenstatCampaignID, hits_raw.OpenstatAdID, hits_raw.OpenstatSourceID, hits_raw.UTMSource, hits_raw.UTMMedium, hits_raw.UTMCampaign, hits_raw.UTMContent, hits_raw.UTMTerm, hits_raw.FromTag, hits_raw.HasGCLID, hits_raw.RefererHash, hits_raw.URLHash, hits_raw.CLID, CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) AS EventDate +04)------Filter: hits_raw.URL LIKE Utf8View("%google%") +05)--------TableScan: hits_raw projection=[WatchID, JavaEnable, Title, GoodEvent, EventTime, EventDate, CounterID, ClientIP, RegionID, UserID, CounterClass, OS, UserAgent, URL, Referer, IsRefresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, ClientEventTime, SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, FUniqID, OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, LocalEventTime, Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, RefererHash, URLHash, CLID], partial_filters=[hits_raw.URL LIKE Utf8View("%google%")] +physical_plan +01)SortPreservingMergeExec: [EventTime@4 ASC NULLS LAST], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[EventTime@4 ASC NULLS LAST], preserve_partitioning=[true] +03)----ProjectionExec: expr=[WatchID@0 as WatchID, JavaEnable@1 as JavaEnable, Title@2 as Title, GoodEvent@3 as GoodEvent, EventTime@4 as EventTime, CounterID@6 as CounterID, ClientIP@7 as ClientIP, RegionID@8 as RegionID, UserID@9 as UserID, CounterClass@10 as CounterClass, OS@11 as OS, UserAgent@12 as UserAgent, URL@13 as URL, Referer@14 as Referer, IsRefresh@15 as IsRefresh, RefererCategoryID@16 as RefererCategoryID, RefererRegionID@17 as RefererRegionID, URLCategoryID@18 as URLCategoryID, URLRegionID@19 as URLRegionID, ResolutionWidth@20 as ResolutionWidth, ResolutionHeight@21 as ResolutionHeight, ResolutionDepth@22 as ResolutionDepth, FlashMajor@23 as FlashMajor, FlashMinor@24 as FlashMinor, FlashMinor2@25 as FlashMinor2, NetMajor@26 as NetMajor, NetMinor@27 as NetMinor, UserAgentMajor@28 as UserAgentMajor, UserAgentMinor@29 as UserAgentMinor, CookieEnable@30 as CookieEnable, JavascriptEnable@31 as JavascriptEnable, IsMobile@32 as IsMobile, MobilePhone@33 as MobilePhone, MobilePhoneModel@34 as MobilePhoneModel, Params@35 as Params, IPNetworkID@36 as IPNetworkID, TraficSourceID@37 as TraficSourceID, SearchEngineID@38 as SearchEngineID, SearchPhrase@39 as SearchPhrase, AdvEngineID@40 as AdvEngineID, IsArtifical@41 as IsArtifical, WindowClientWidth@42 as WindowClientWidth, WindowClientHeight@43 as WindowClientHeight, ClientTimeZone@44 as ClientTimeZone, ClientEventTime@45 as ClientEventTime, SilverlightVersion1@46 as SilverlightVersion1, SilverlightVersion2@47 as SilverlightVersion2, SilverlightVersion3@48 as SilverlightVersion3, SilverlightVersion4@49 as SilverlightVersion4, PageCharset@50 as PageCharset, CodeVersion@51 as CodeVersion, IsLink@52 as IsLink, IsDownload@53 as IsDownload, IsNotBounce@54 as IsNotBounce, FUniqID@55 as FUniqID, OriginalURL@56 as OriginalURL, HID@57 as HID, IsOldCounter@58 as IsOldCounter, IsEvent@59 as IsEvent, IsParameter@60 as IsParameter, DontCountHits@61 as DontCountHits, WithHash@62 as WithHash, HitColor@63 as HitColor, LocalEventTime@64 as LocalEventTime, Age@65 as Age, Sex@66 as Sex, Income@67 as Income, Interests@68 as Interests, Robotness@69 as Robotness, RemoteIP@70 as RemoteIP, WindowName@71 as WindowName, OpenerName@72 as OpenerName, HistoryLength@73 as HistoryLength, BrowserLanguage@74 as BrowserLanguage, BrowserCountry@75 as BrowserCountry, SocialNetwork@76 as SocialNetwork, SocialAction@77 as SocialAction, HTTPError@78 as HTTPError, SendTiming@79 as SendTiming, DNSTiming@80 as DNSTiming, ConnectTiming@81 as ConnectTiming, ResponseStartTiming@82 as ResponseStartTiming, ResponseEndTiming@83 as ResponseEndTiming, FetchTiming@84 as FetchTiming, SocialSourceNetworkID@85 as SocialSourceNetworkID, SocialSourcePage@86 as SocialSourcePage, ParamPrice@87 as ParamPrice, ParamOrderID@88 as ParamOrderID, ParamCurrency@89 as ParamCurrency, ParamCurrencyID@90 as ParamCurrencyID, OpenstatServiceName@91 as OpenstatServiceName, OpenstatCampaignID@92 as OpenstatCampaignID, OpenstatAdID@93 as OpenstatAdID, OpenstatSourceID@94 as OpenstatSourceID, UTMSource@95 as UTMSource, UTMMedium@96 as UTMMedium, UTMCampaign@97 as UTMCampaign, UTMContent@98 as UTMContent, UTMTerm@99 as UTMTerm, FromTag@100 as FromTag, HasGCLID@101 as HasGCLID, RefererHash@102 as RefererHash, URLHash@103 as URLHash, CLID@104 as CLID, CAST(CAST(EventDate@5 AS Int32) AS Date32) as EventDate] +04)------FilterExec: URL@13 LIKE %google% +05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[WatchID, JavaEnable, Title, GoodEvent, EventTime, EventDate, CounterID, ClientIP, RegionID, UserID, CounterClass, OS, UserAgent, URL, Referer, IsRefresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, ClientEventTime, SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, FUniqID, OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, LocalEventTime, Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, RefererHash, URLHash, CLID], file_type=parquet, predicate=URL@13 LIKE %google% AND DynamicFilter [ empty ] +query IITIIIIIIIIITTIIIIIIIIIITIIITIIIITTIIITIIIIIIIIIITIIIIITIIIIIITIIIIIIIIIITTTTIIIIIIIITITTITTTTTTTTTTIIIID SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10; ---- +query TT +EXPLAIN SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10; +---- +logical_plan +01)Projection: hits.SearchPhrase +02)--Sort: hits.EventTime ASC NULLS LAST, fetch=10 +03)----Projection: hits.SearchPhrase, hits.EventTime +04)------SubqueryAlias: hits +05)--------Filter: hits_raw.SearchPhrase != Utf8View("") +06)----------TableScan: hits_raw projection=[EventTime, SearchPhrase], partial_filters=[hits_raw.SearchPhrase != Utf8View("")] +physical_plan +01)ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase] +02)--SortPreservingMergeExec: [EventTime@1 ASC NULLS LAST], fetch=10 +03)----SortExec: TopK(fetch=10), expr=[EventTime@1 ASC NULLS LAST], preserve_partitioning=[true] +04)------ProjectionExec: expr=[SearchPhrase@1 as SearchPhrase, EventTime@0 as EventTime] +05)--------FilterExec: SearchPhrase@1 != +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[EventTime, SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != AND DynamicFilter [ empty ], pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND (SearchPhrase_min@0 != OR != SearchPhrase_max@1), required_guarantees=[SearchPhrase not in ()] + query T SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10; ---- +query TT +EXPLAIN SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10; +---- +logical_plan +01)Sort: hits.SearchPhrase ASC NULLS LAST, fetch=10 +02)--SubqueryAlias: hits +03)----Filter: hits_raw.SearchPhrase != Utf8View("") +04)------TableScan: hits_raw projection=[SearchPhrase], partial_filters=[hits_raw.SearchPhrase != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [SearchPhrase@0 ASC NULLS LAST], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[SearchPhrase@0 ASC NULLS LAST], preserve_partitioning=[true] +03)----FilterExec: SearchPhrase@0 != +04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != AND DynamicFilter [ empty ], pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND (SearchPhrase_min@0 != OR != SearchPhrase_max@1), required_guarantees=[SearchPhrase not in ()] + query T SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10; ---- +query TT +EXPLAIN SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10; +---- +logical_plan +01)Projection: hits.SearchPhrase +02)--Sort: hits.EventTime ASC NULLS LAST, hits.SearchPhrase ASC NULLS LAST, fetch=10 +03)----Projection: hits.SearchPhrase, hits.EventTime +04)------SubqueryAlias: hits +05)--------Filter: hits_raw.SearchPhrase != Utf8View("") +06)----------TableScan: hits_raw projection=[EventTime, SearchPhrase], partial_filters=[hits_raw.SearchPhrase != Utf8View("")] +physical_plan +01)ProjectionExec: expr=[SearchPhrase@0 as SearchPhrase] +02)--SortPreservingMergeExec: [EventTime@1 ASC NULLS LAST, SearchPhrase@0 ASC NULLS LAST], fetch=10 +03)----SortExec: TopK(fetch=10), expr=[EventTime@1 ASC NULLS LAST, SearchPhrase@0 ASC NULLS LAST], preserve_partitioning=[true] +04)------ProjectionExec: expr=[SearchPhrase@1 as SearchPhrase, EventTime@0 as EventTime] +05)--------FilterExec: SearchPhrase@1 != +06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[EventTime, SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != AND DynamicFilter [ empty ], pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND (SearchPhrase_min@0 != OR != SearchPhrase_max@1), required_guarantees=[SearchPhrase not in ()] + query T SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10; ---- +query TT +EXPLAIN SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +---- +logical_plan +01)Sort: l DESC NULLS FIRST, fetch=25 +02)--Projection: hits.CounterID, avg(length(hits.URL)) AS l, count(Int64(1)) AS count(*) AS c +03)----Filter: count(Int64(1)) > Int64(100000) +04)------Aggregate: groupBy=[[hits.CounterID]], aggr=[[avg(CAST(character_length(hits.URL) AS length(hits.URL) AS Float64)), count(Int64(1))]] +05)--------SubqueryAlias: hits +06)----------Filter: hits_raw.URL != Utf8View("") +07)------------TableScan: hits_raw projection=[CounterID, URL], partial_filters=[hits_raw.URL != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [l@1 DESC], fetch=25 +02)--SortExec: TopK(fetch=25), expr=[l@1 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[CounterID@0 as CounterID, avg(length(hits.URL))@1 as l, count(Int64(1))@2 as c] +04)------FilterExec: count(Int64(1))@2 > 100000 +05)--------AggregateExec: mode=FinalPartitioned, gby=[CounterID@0 as CounterID], aggr=[avg(length(hits.URL)), count(Int64(1))] +06)----------RepartitionExec: partitioning=Hash([CounterID@0], 4), input_partitions=4 +07)------------AggregateExec: mode=Partial, gby=[CounterID@0 as CounterID], aggr=[avg(length(hits.URL)), count(Int64(1))] +08)--------------FilterExec: URL@1 != +09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[CounterID, URL], file_type=parquet, predicate=URL@13 != , pruning_predicate=URL_null_count@2 != row_count@3 AND (URL_min@0 != OR != URL_max@1), required_guarantees=[URL not in ()] + query IRI SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; ---- +query TT +EXPLAIN SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +---- +logical_plan +01)Sort: l DESC NULLS FIRST, fetch=25 +02)--Projection: regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1")) AS k, avg(length(hits.Referer)) AS l, count(Int64(1)) AS count(*) AS c, min(hits.Referer) +03)----Filter: count(Int64(1)) > Int64(100000) +04)------Aggregate: groupBy=[[regexp_replace(hits.Referer, Utf8View("^https?://(?:www\.)?([^/]+)/.*$"), Utf8View("\1")) AS regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))]], aggr=[[avg(CAST(character_length(hits.Referer) AS length(hits.Referer) AS Float64)), count(Int64(1)), min(hits.Referer)]] +05)--------SubqueryAlias: hits +06)----------Filter: hits_raw.Referer != Utf8View("") +07)------------TableScan: hits_raw projection=[Referer], partial_filters=[hits_raw.Referer != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [l@1 DESC], fetch=25 +02)--SortExec: TopK(fetch=25), expr=[l@1 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))@0 as k, avg(length(hits.Referer))@1 as l, count(Int64(1))@2 as c, min(hits.Referer)@3 as min(hits.Referer)] +04)------FilterExec: count(Int64(1))@2 > 100000 +05)--------AggregateExec: mode=FinalPartitioned, gby=[regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))@0 as regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))], aggr=[avg(length(hits.Referer)), count(Int64(1)), min(hits.Referer)] +06)----------RepartitionExec: partitioning=Hash([regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))@0], 4), input_partitions=4 +07)------------AggregateExec: mode=Partial, gby=[regexp_replace(Referer@0, ^https?://(?:www\.)?([^/]+)/.*$, \1) as regexp_replace(hits.Referer,Utf8("^https?://(?:www\.)?([^/]+)/.*$"),Utf8("\1"))], aggr=[avg(length(hits.Referer)), count(Int64(1)), min(hits.Referer)] +08)--------------FilterExec: Referer@0 != +09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[Referer], file_type=parquet, predicate=Referer@14 != , pruning_predicate=Referer_null_count@2 != row_count@3 AND (Referer_min@0 != OR != Referer_max@1), required_guarantees=[Referer not in ()] + query TRIT SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; ---- +query TT +EXPLAIN SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS hits.ResolutionWidth), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(1)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(2)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(3)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(4)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(5)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(6)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(7)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(8)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(9)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(10)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(11)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(12)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(13)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(14)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(15)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(16)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(17)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(18)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(19)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(20)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(21)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(22)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(23)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(24)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(25)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(26)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(27)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(28)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(29)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(30)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(31)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(32)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(33)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(34)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(35)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(36)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(37)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(38)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(39)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(40)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(41)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(42)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(43)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(44)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(45)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(46)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(47)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(48)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(49)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(50)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(51)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(52)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(53)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(54)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(55)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(56)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(57)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(58)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(59)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(60)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(61)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(62)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(63)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(64)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(65)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(66)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(67)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(68)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(69)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(70)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(71)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(72)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(73)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(74)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(75)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(76)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(77)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(78)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(79)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(80)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(81)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(82)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(83)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(84)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(85)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(86)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(87)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(88)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(89))]] +02)--Projection: CAST(hits.ResolutionWidth AS Int64) AS __common_expr_1 +03)----SubqueryAlias: hits +04)------TableScan: hits_raw projection=[ResolutionWidth] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[sum(hits.ResolutionWidth), sum(hits.ResolutionWidth + Int64(1)), sum(hits.ResolutionWidth + Int64(2)), sum(hits.ResolutionWidth + Int64(3)), sum(hits.ResolutionWidth + Int64(4)), sum(hits.ResolutionWidth + Int64(5)), sum(hits.ResolutionWidth + Int64(6)), sum(hits.ResolutionWidth + Int64(7)), sum(hits.ResolutionWidth + Int64(8)), sum(hits.ResolutionWidth + Int64(9)), sum(hits.ResolutionWidth + Int64(10)), sum(hits.ResolutionWidth + Int64(11)), sum(hits.ResolutionWidth + Int64(12)), sum(hits.ResolutionWidth + Int64(13)), sum(hits.ResolutionWidth + Int64(14)), sum(hits.ResolutionWidth + Int64(15)), sum(hits.ResolutionWidth + Int64(16)), sum(hits.ResolutionWidth + Int64(17)), sum(hits.ResolutionWidth + Int64(18)), sum(hits.ResolutionWidth + Int64(19)), sum(hits.ResolutionWidth + Int64(20)), sum(hits.ResolutionWidth + Int64(21)), sum(hits.ResolutionWidth + Int64(22)), sum(hits.ResolutionWidth + Int64(23)), sum(hits.ResolutionWidth + Int64(24)), sum(hits.ResolutionWidth + Int64(25)), sum(hits.ResolutionWidth + Int64(26)), sum(hits.ResolutionWidth + Int64(27)), sum(hits.ResolutionWidth + Int64(28)), sum(hits.ResolutionWidth + Int64(29)), sum(hits.ResolutionWidth + Int64(30)), sum(hits.ResolutionWidth + Int64(31)), sum(hits.ResolutionWidth + Int64(32)), sum(hits.ResolutionWidth + Int64(33)), sum(hits.ResolutionWidth + Int64(34)), sum(hits.ResolutionWidth + Int64(35)), sum(hits.ResolutionWidth + Int64(36)), sum(hits.ResolutionWidth + Int64(37)), sum(hits.ResolutionWidth + Int64(38)), sum(hits.ResolutionWidth + Int64(39)), sum(hits.ResolutionWidth + Int64(40)), sum(hits.ResolutionWidth + Int64(41)), sum(hits.ResolutionWidth + Int64(42)), sum(hits.ResolutionWidth + Int64(43)), sum(hits.ResolutionWidth + Int64(44)), sum(hits.ResolutionWidth + Int64(45)), sum(hits.ResolutionWidth + Int64(46)), sum(hits.ResolutionWidth + Int64(47)), sum(hits.ResolutionWidth + Int64(48)), sum(hits.ResolutionWidth + Int64(49)), sum(hits.ResolutionWidth + Int64(50)), sum(hits.ResolutionWidth + Int64(51)), sum(hits.ResolutionWidth + Int64(52)), sum(hits.ResolutionWidth + Int64(53)), sum(hits.ResolutionWidth + Int64(54)), sum(hits.ResolutionWidth + Int64(55)), sum(hits.ResolutionWidth + Int64(56)), sum(hits.ResolutionWidth + Int64(57)), sum(hits.ResolutionWidth + Int64(58)), sum(hits.ResolutionWidth + Int64(59)), sum(hits.ResolutionWidth + Int64(60)), sum(hits.ResolutionWidth + Int64(61)), sum(hits.ResolutionWidth + Int64(62)), sum(hits.ResolutionWidth + Int64(63)), sum(hits.ResolutionWidth + Int64(64)), sum(hits.ResolutionWidth + Int64(65)), sum(hits.ResolutionWidth + Int64(66)), sum(hits.ResolutionWidth + Int64(67)), sum(hits.ResolutionWidth + Int64(68)), sum(hits.ResolutionWidth + Int64(69)), sum(hits.ResolutionWidth + Int64(70)), sum(hits.ResolutionWidth + Int64(71)), sum(hits.ResolutionWidth + Int64(72)), sum(hits.ResolutionWidth + Int64(73)), sum(hits.ResolutionWidth + Int64(74)), sum(hits.ResolutionWidth + Int64(75)), sum(hits.ResolutionWidth + Int64(76)), sum(hits.ResolutionWidth + Int64(77)), sum(hits.ResolutionWidth + Int64(78)), sum(hits.ResolutionWidth + Int64(79)), sum(hits.ResolutionWidth + Int64(80)), sum(hits.ResolutionWidth + Int64(81)), sum(hits.ResolutionWidth + Int64(82)), sum(hits.ResolutionWidth + Int64(83)), sum(hits.ResolutionWidth + Int64(84)), sum(hits.ResolutionWidth + Int64(85)), sum(hits.ResolutionWidth + Int64(86)), sum(hits.ResolutionWidth + Int64(87)), sum(hits.ResolutionWidth + Int64(88)), sum(hits.ResolutionWidth + Int64(89))] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[CAST(ResolutionWidth@20 AS Int64) as __common_expr_1], file_type=parquet + query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits; ---- 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 310 320 330 340 350 360 370 380 390 400 410 420 430 440 450 460 470 480 490 500 510 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660 670 680 690 700 710 720 730 740 750 760 770 780 790 800 810 820 830 840 850 860 870 880 890 +query TT +EXPLAIN SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10; +---- +logical_plan +01)Sort: c DESC NULLS FIRST, fetch=10 +02)--Projection: hits.SearchEngineID, hits.ClientIP, count(Int64(1)) AS count(*) AS c, sum(hits.IsRefresh), avg(hits.ResolutionWidth) +03)----Aggregate: groupBy=[[hits.SearchEngineID, hits.ClientIP]], aggr=[[count(Int64(1)), sum(CAST(hits.IsRefresh AS Int64)), avg(CAST(hits.ResolutionWidth AS Float64))]] +04)------SubqueryAlias: hits +05)--------Projection: hits_raw.ClientIP, hits_raw.IsRefresh, hits_raw.ResolutionWidth, hits_raw.SearchEngineID +06)----------Filter: hits_raw.SearchPhrase != Utf8View("") +07)------------TableScan: hits_raw projection=[ClientIP, IsRefresh, ResolutionWidth, SearchEngineID, SearchPhrase], partial_filters=[hits_raw.SearchPhrase != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [c@2 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[SearchEngineID@0 as SearchEngineID, ClientIP@1 as ClientIP, count(Int64(1))@2 as c, sum(hits.IsRefresh)@3 as sum(hits.IsRefresh), avg(hits.ResolutionWidth)@4 as avg(hits.ResolutionWidth)] +04)------AggregateExec: mode=FinalPartitioned, gby=[SearchEngineID@0 as SearchEngineID, ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh), avg(hits.ResolutionWidth)] +05)--------RepartitionExec: partitioning=Hash([SearchEngineID@0, ClientIP@1], 4), input_partitions=4 +06)----------AggregateExec: mode=Partial, gby=[SearchEngineID@3 as SearchEngineID, ClientIP@0 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh), avg(hits.ResolutionWidth)] +07)------------FilterExec: SearchPhrase@4 != , projection=[ClientIP@0, IsRefresh@1, ResolutionWidth@2, SearchEngineID@3] +08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[ClientIP, IsRefresh, ResolutionWidth, SearchEngineID, SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND (SearchPhrase_min@0 != OR != SearchPhrase_max@1), required_guarantees=[SearchPhrase not in ()] + query IIIIR SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10; ---- +query TT +EXPLAIN SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; +---- +logical_plan +01)Sort: c DESC NULLS FIRST, fetch=10 +02)--Projection: hits.WatchID, hits.ClientIP, count(Int64(1)) AS count(*) AS c, sum(hits.IsRefresh), avg(hits.ResolutionWidth) +03)----Aggregate: groupBy=[[hits.WatchID, hits.ClientIP]], aggr=[[count(Int64(1)), sum(CAST(hits.IsRefresh AS Int64)), avg(CAST(hits.ResolutionWidth AS Float64))]] +04)------SubqueryAlias: hits +05)--------Projection: hits_raw.WatchID, hits_raw.ClientIP, hits_raw.IsRefresh, hits_raw.ResolutionWidth +06)----------Filter: hits_raw.SearchPhrase != Utf8View("") +07)------------TableScan: hits_raw projection=[WatchID, ClientIP, IsRefresh, ResolutionWidth, SearchPhrase], partial_filters=[hits_raw.SearchPhrase != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [c@2 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[WatchID@0 as WatchID, ClientIP@1 as ClientIP, count(Int64(1))@2 as c, sum(hits.IsRefresh)@3 as sum(hits.IsRefresh), avg(hits.ResolutionWidth)@4 as avg(hits.ResolutionWidth)] +04)------AggregateExec: mode=FinalPartitioned, gby=[WatchID@0 as WatchID, ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh), avg(hits.ResolutionWidth)] +05)--------RepartitionExec: partitioning=Hash([WatchID@0, ClientIP@1], 4), input_partitions=4 +06)----------AggregateExec: mode=Partial, gby=[WatchID@0 as WatchID, ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh), avg(hits.ResolutionWidth)] +07)------------FilterExec: SearchPhrase@4 != , projection=[WatchID@0, ClientIP@1, IsRefresh@2, ResolutionWidth@3] +08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[WatchID, ClientIP, IsRefresh, ResolutionWidth, SearchPhrase], file_type=parquet, predicate=SearchPhrase@39 != , pruning_predicate=SearchPhrase_null_count@2 != row_count@3 AND (SearchPhrase_min@0 != OR != SearchPhrase_max@1), required_guarantees=[SearchPhrase not in ()] + query IIIIR SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; ---- +query TT +EXPLAIN SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; +---- +logical_plan +01)Sort: c DESC NULLS FIRST, fetch=10 +02)--Projection: hits.WatchID, hits.ClientIP, count(Int64(1)) AS count(*) AS c, sum(hits.IsRefresh), avg(hits.ResolutionWidth) +03)----Aggregate: groupBy=[[hits.WatchID, hits.ClientIP]], aggr=[[count(Int64(1)), sum(CAST(hits.IsRefresh AS Int64)), avg(CAST(hits.ResolutionWidth AS Float64))]] +04)------SubqueryAlias: hits +05)--------TableScan: hits_raw projection=[WatchID, ClientIP, IsRefresh, ResolutionWidth] +physical_plan +01)SortPreservingMergeExec: [c@2 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[WatchID@0 as WatchID, ClientIP@1 as ClientIP, count(Int64(1))@2 as c, sum(hits.IsRefresh)@3 as sum(hits.IsRefresh), avg(hits.ResolutionWidth)@4 as avg(hits.ResolutionWidth)] +04)------AggregateExec: mode=FinalPartitioned, gby=[WatchID@0 as WatchID, ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh), avg(hits.ResolutionWidth)] +05)--------RepartitionExec: partitioning=Hash([WatchID@0, ClientIP@1], 4), input_partitions=1 +06)----------AggregateExec: mode=Partial, gby=[WatchID@0 as WatchID, ClientIP@1 as ClientIP], aggr=[count(Int64(1)), sum(hits.IsRefresh), avg(hits.ResolutionWidth)] +07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[WatchID, ClientIP, IsRefresh, ResolutionWidth], file_type=parquet + query IIIIR rowsort SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; ---- @@ -237,6 +865,24 @@ SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWi 8924809397503602651 -1216690514 1 0 0 9110818468285196899 -1216690514 1 0 0 +query TT +EXPLAIN SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10; +---- +logical_plan +01)Sort: c DESC NULLS FIRST, fetch=10 +02)--Projection: hits.URL, count(Int64(1)) AS count(*) AS c +03)----Aggregate: groupBy=[[hits.URL]], aggr=[[count(Int64(1))]] +04)------SubqueryAlias: hits +05)--------TableScan: hits_raw projection=[URL] +physical_plan +01)SortPreservingMergeExec: [c@1 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[c@1 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[URL@0 as URL, count(Int64(1))@1 as c] +04)------AggregateExec: mode=FinalPartitioned, gby=[URL@0 as URL], aggr=[count(Int64(1))] +05)--------RepartitionExec: partitioning=Hash([URL@0], 4), input_partitions=1 +06)----------AggregateExec: mode=Partial, gby=[URL@0 as URL], aggr=[count(Int64(1))] +07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[URL], file_type=parquet + query TI rowsort SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10; ---- @@ -247,6 +893,24 @@ http://bonprix.ru/index.ru/cinema/art/A00387,3797); ru)&bL 1 http://holodilnik.ru/russia/05jul2013&model=0 1 http://tours/Ekategoriya%2F&sr=http://slovareniye 1 +query TT +EXPLAIN SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10; +---- +logical_plan +01)Sort: c DESC NULLS FIRST, fetch=10 +02)--Projection: Int64(1), hits.URL, count(Int64(1)) AS c +03)----Aggregate: groupBy=[[hits.URL]], aggr=[[count(Int64(1))]] +04)------SubqueryAlias: hits +05)--------TableScan: hits_raw projection=[URL] +physical_plan +01)SortPreservingMergeExec: [c@2 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[c@2 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[1 as Int64(1), URL@0 as URL, count(Int64(1))@1 as c] +04)------AggregateExec: mode=FinalPartitioned, gby=[URL@0 as URL], aggr=[count(Int64(1))] +05)--------RepartitionExec: partitioning=Hash([URL@0], 4), input_partitions=1 +06)----------AggregateExec: mode=Partial, gby=[URL@0 as URL], aggr=[count(Int64(1))] +07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[URL], file_type=parquet + query ITI rowsort SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10; ---- @@ -257,6 +921,25 @@ SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 1 http://holodilnik.ru/russia/05jul2013&model=0 1 1 http://tours/Ekategoriya%2F&sr=http://slovareniye 1 +query TT +EXPLAIN SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10; +---- +logical_plan +01)Sort: c DESC NULLS FIRST, fetch=10 +02)--Projection: hits.ClientIP, hits.ClientIP - Int64(1), hits.ClientIP - Int64(2), hits.ClientIP - Int64(3), count(Int64(1)) AS count(*) AS c +03)----Aggregate: groupBy=[[hits.ClientIP, __common_expr_1 AS hits.ClientIP - Int64(1), __common_expr_1 AS hits.ClientIP - Int64(2), __common_expr_1 AS hits.ClientIP - Int64(3)]], aggr=[[count(Int64(1))]] +04)------Projection: CAST(hits.ClientIP AS Int64) AS __common_expr_1, hits.ClientIP +05)--------SubqueryAlias: hits +06)----------TableScan: hits_raw projection=[ClientIP] +physical_plan +01)SortPreservingMergeExec: [c@4 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[c@4 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[ClientIP@0 as ClientIP, hits.ClientIP - Int64(1)@1 as hits.ClientIP - Int64(1), hits.ClientIP - Int64(2)@2 as hits.ClientIP - Int64(2), hits.ClientIP - Int64(3)@3 as hits.ClientIP - Int64(3), count(Int64(1))@4 as c] +04)------AggregateExec: mode=FinalPartitioned, gby=[ClientIP@0 as ClientIP, hits.ClientIP - Int64(1)@1 as hits.ClientIP - Int64(1), hits.ClientIP - Int64(2)@2 as hits.ClientIP - Int64(2), hits.ClientIP - Int64(3)@3 as hits.ClientIP - Int64(3)], aggr=[count(Int64(1))] +05)--------RepartitionExec: partitioning=Hash([ClientIP@0, hits.ClientIP - Int64(1)@1, hits.ClientIP - Int64(2)@2, hits.ClientIP - Int64(3)@3], 4), input_partitions=1 +06)----------AggregateExec: mode=Partial, gby=[ClientIP@1 as ClientIP, __common_expr_1@0 - 1 as hits.ClientIP - Int64(1), __common_expr_1@0 - 2 as hits.ClientIP - Int64(2), __common_expr_1@0 - 3 as hits.ClientIP - Int64(3)], aggr=[count(Int64(1))] +07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[CAST(ClientIP@7 AS Int64) as __common_expr_1, ClientIP], file_type=parquet + query IIIII rowsort SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10; ---- @@ -265,46 +948,249 @@ SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c 1568366281 1568366280 1568366279 1568366278 2 1615432634 1615432633 1615432632 1615432631 1 +query TT +EXPLAIN SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10; +---- +logical_plan +01)Sort: pageviews DESC NULLS FIRST, fetch=10 +02)--Projection: hits.URL, count(Int64(1)) AS count(*) AS pageviews +03)----Aggregate: groupBy=[[hits.URL]], aggr=[[count(Int64(1))]] +04)------SubqueryAlias: hits +05)--------Projection: hits_raw.URL +06)----------Filter: hits_raw.CounterID = Int32(62) AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND hits_raw.DontCountHits = Int16(0) AND hits_raw.IsRefresh = Int16(0) AND hits_raw.URL != Utf8View("") +07)------------TableScan: hits_raw projection=[EventDate, CounterID, URL, IsRefresh, DontCountHits], partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31"), hits_raw.DontCountHits = Int16(0), hits_raw.IsRefresh = Int16(0), hits_raw.URL != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [pageviews@1 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[pageviews@1 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[URL@0 as URL, count(Int64(1))@1 as pageviews] +04)------AggregateExec: mode=FinalPartitioned, gby=[URL@0 as URL], aggr=[count(Int64(1))] +05)--------RepartitionExec: partitioning=Hash([URL@0], 4), input_partitions=4 +06)----------AggregateExec: mode=Partial, gby=[URL@0 as URL], aggr=[count(Int64(1))] +07)------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) <= 2013-07-31 AND DontCountHits@4 = 0 AND IsRefresh@3 = 0 AND URL@2 != , projection=[URL@2] +08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[EventDate, CounterID, URL, IsRefresh, DontCountHits], file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-31 AND DontCountHits@61 = 0 AND IsRefresh@15 = 0 AND URL@13 != , pruning_predicate=CounterID_null_count@2 != row_count@3 AND CounterID_min@0 <= 62 AND 62 <= CounterID_max@1 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_max@4 AS Int32) AS Date32) >= 2013-07-01 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_min@6 AS Int32) AS Date32) <= 2013-07-31 AND DontCountHits_null_count@9 != row_count@3 AND DontCountHits_min@7 <= 0 AND 0 <= DontCountHits_max@8 AND IsRefresh_null_count@12 != row_count@3 AND IsRefresh_min@10 <= 0 AND 0 <= IsRefresh_max@11 AND URL_null_count@15 != row_count@3 AND (URL_min@13 != OR != URL_max@14), required_guarantees=[CounterID in (62), DontCountHits in (0), IsRefresh in (0), URL not in ()] + query TI SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10; ---- +query TT +EXPLAIN SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10; +---- +logical_plan +01)Sort: pageviews DESC NULLS FIRST, fetch=10 +02)--Projection: hits.Title, count(Int64(1)) AS count(*) AS pageviews +03)----Aggregate: groupBy=[[hits.Title]], aggr=[[count(Int64(1))]] +04)------SubqueryAlias: hits +05)--------Projection: hits_raw.Title +06)----------Filter: hits_raw.CounterID = Int32(62) AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND hits_raw.DontCountHits = Int16(0) AND hits_raw.IsRefresh = Int16(0) AND hits_raw.Title != Utf8View("") +07)------------TableScan: hits_raw projection=[Title, EventDate, CounterID, IsRefresh, DontCountHits], partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31"), hits_raw.DontCountHits = Int16(0), hits_raw.IsRefresh = Int16(0), hits_raw.Title != Utf8View("")] +physical_plan +01)SortPreservingMergeExec: [pageviews@1 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[pageviews@1 DESC], preserve_partitioning=[true] +03)----ProjectionExec: expr=[Title@0 as Title, count(Int64(1))@1 as pageviews] +04)------AggregateExec: mode=FinalPartitioned, gby=[Title@0 as Title], aggr=[count(Int64(1))] +05)--------RepartitionExec: partitioning=Hash([Title@0], 4), input_partitions=4 +06)----------AggregateExec: mode=Partial, gby=[Title@0 as Title], aggr=[count(Int64(1))] +07)------------FilterExec: CounterID@2 = 62 AND CAST(CAST(EventDate@1 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@1 AS Int32) AS Date32) <= 2013-07-31 AND DontCountHits@4 = 0 AND IsRefresh@3 = 0 AND Title@0 != , projection=[Title@0] +08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[Title, EventDate, CounterID, IsRefresh, DontCountHits], file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-31 AND DontCountHits@61 = 0 AND IsRefresh@15 = 0 AND Title@2 != , pruning_predicate=CounterID_null_count@2 != row_count@3 AND CounterID_min@0 <= 62 AND 62 <= CounterID_max@1 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_max@4 AS Int32) AS Date32) >= 2013-07-01 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_min@6 AS Int32) AS Date32) <= 2013-07-31 AND DontCountHits_null_count@9 != row_count@3 AND DontCountHits_min@7 <= 0 AND 0 <= DontCountHits_max@8 AND IsRefresh_null_count@12 != row_count@3 AND IsRefresh_min@10 <= 0 AND 0 <= IsRefresh_max@11 AND Title_null_count@15 != row_count@3 AND (Title_min@13 != OR != Title_max@14), required_guarantees=[CounterID in (62), DontCountHits in (0), IsRefresh in (0), Title not in ()] + query TI SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10; ---- +query TT +EXPLAIN SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +---- +logical_plan +01)Limit: skip=1000, fetch=10 +02)--Sort: pageviews DESC NULLS FIRST, fetch=1010 +03)----Projection: hits.URL, count(Int64(1)) AS count(*) AS pageviews +04)------Aggregate: groupBy=[[hits.URL]], aggr=[[count(Int64(1))]] +05)--------SubqueryAlias: hits +06)----------Projection: hits_raw.URL +07)------------Filter: hits_raw.CounterID = Int32(62) AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND hits_raw.IsRefresh = Int16(0) AND hits_raw.IsLink != Int16(0) AND hits_raw.IsDownload = Int16(0) +08)--------------TableScan: hits_raw projection=[EventDate, CounterID, URL, IsRefresh, IsLink, IsDownload], partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31"), hits_raw.IsRefresh = Int16(0), hits_raw.IsLink != Int16(0), hits_raw.IsDownload = Int16(0)] +physical_plan +01)GlobalLimitExec: skip=1000, fetch=10 +02)--SortPreservingMergeExec: [pageviews@1 DESC], fetch=1010 +03)----SortExec: TopK(fetch=1010), expr=[pageviews@1 DESC], preserve_partitioning=[true] +04)------ProjectionExec: expr=[URL@0 as URL, count(Int64(1))@1 as pageviews] +05)--------AggregateExec: mode=FinalPartitioned, gby=[URL@0 as URL], aggr=[count(Int64(1))] +06)----------RepartitionExec: partitioning=Hash([URL@0], 4), input_partitions=4 +07)------------AggregateExec: mode=Partial, gby=[URL@0 as URL], aggr=[count(Int64(1))] +08)--------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh@3 = 0 AND IsLink@4 != 0 AND IsDownload@5 = 0, projection=[URL@2] +09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[EventDate, CounterID, URL, IsRefresh, IsLink, IsDownload], file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh@15 = 0 AND IsLink@52 != 0 AND IsDownload@53 = 0, pruning_predicate=CounterID_null_count@2 != row_count@3 AND CounterID_min@0 <= 62 AND 62 <= CounterID_max@1 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_max@4 AS Int32) AS Date32) >= 2013-07-01 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_min@6 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh_null_count@9 != row_count@3 AND IsRefresh_min@7 <= 0 AND 0 <= IsRefresh_max@8 AND IsLink_null_count@12 != row_count@3 AND (IsLink_min@10 != 0 OR 0 != IsLink_max@11) AND IsDownload_null_count@15 != row_count@3 AND IsDownload_min@13 <= 0 AND 0 <= IsDownload_max@14, required_guarantees=[CounterID in (62), IsDownload in (0), IsLink not in (0), IsRefresh in (0)] + query TI SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; ---- +query TT +EXPLAIN SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +---- +logical_plan +01)Limit: skip=1000, fetch=10 +02)--Sort: pageviews DESC NULLS FIRST, fetch=1010 +03)----Projection: hits.TraficSourceID, hits.SearchEngineID, hits.AdvEngineID, CASE WHEN hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END AS src, hits.URL AS dst, count(Int64(1)) AS count(*) AS pageviews +04)------Aggregate: groupBy=[[hits.TraficSourceID, hits.SearchEngineID, hits.AdvEngineID, CASE WHEN hits.SearchEngineID = Int16(0) AND hits.AdvEngineID = Int16(0) THEN hits.Referer ELSE Utf8View("") END AS CASE WHEN hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END, hits.URL]], aggr=[[count(Int64(1))]] +05)--------SubqueryAlias: hits +06)----------Projection: hits_raw.URL, hits_raw.Referer, hits_raw.TraficSourceID, hits_raw.SearchEngineID, hits_raw.AdvEngineID +07)------------Filter: hits_raw.CounterID = Int32(62) AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND hits_raw.IsRefresh = Int16(0) +08)--------------TableScan: hits_raw projection=[EventDate, CounterID, URL, Referer, IsRefresh, TraficSourceID, SearchEngineID, AdvEngineID], partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31"), hits_raw.IsRefresh = Int16(0)] +physical_plan +01)GlobalLimitExec: skip=1000, fetch=10 +02)--SortPreservingMergeExec: [pageviews@5 DESC], fetch=1010 +03)----SortExec: TopK(fetch=1010), expr=[pageviews@5 DESC], preserve_partitioning=[true] +04)------ProjectionExec: expr=[TraficSourceID@0 as TraficSourceID, SearchEngineID@1 as SearchEngineID, AdvEngineID@2 as AdvEngineID, CASE WHEN hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END@3 as src, URL@4 as dst, count(Int64(1))@5 as pageviews] +05)--------AggregateExec: mode=FinalPartitioned, gby=[TraficSourceID@0 as TraficSourceID, SearchEngineID@1 as SearchEngineID, AdvEngineID@2 as AdvEngineID, CASE WHEN hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END@3 as CASE WHEN hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END, URL@4 as URL], aggr=[count(Int64(1))] +06)----------RepartitionExec: partitioning=Hash([TraficSourceID@0, SearchEngineID@1, AdvEngineID@2, CASE WHEN hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END@3, URL@4], 4), input_partitions=4 +07)------------AggregateExec: mode=Partial, gby=[TraficSourceID@2 as TraficSourceID, SearchEngineID@3 as SearchEngineID, AdvEngineID@4 as AdvEngineID, CASE WHEN SearchEngineID@3 = 0 AND AdvEngineID@4 = 0 THEN Referer@1 ELSE END as CASE WHEN hits.SearchEngineID = Int64(0) AND hits.AdvEngineID = Int64(0) THEN hits.Referer ELSE Utf8("") END, URL@0 as URL], aggr=[count(Int64(1))] +08)--------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh@4 = 0, projection=[URL@2, Referer@3, TraficSourceID@5, SearchEngineID@6, AdvEngineID@7] +09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[EventDate, CounterID, URL, Referer, IsRefresh, TraficSourceID, SearchEngineID, AdvEngineID], file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh@15 = 0, pruning_predicate=CounterID_null_count@2 != row_count@3 AND CounterID_min@0 <= 62 AND 62 <= CounterID_max@1 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_max@4 AS Int32) AS Date32) >= 2013-07-01 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_min@6 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh_null_count@9 != row_count@3 AND IsRefresh_min@7 <= 0 AND 0 <= IsRefresh_max@8, required_guarantees=[CounterID in (62), IsRefresh in (0)] + query IIITTI SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; ---- +query TT +EXPLAIN SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +---- +logical_plan +01)Limit: skip=100, fetch=10 +02)--Sort: pageviews DESC NULLS FIRST, fetch=110 +03)----Projection: hits.URLHash, hits.EventDate, count(Int64(1)) AS count(*) AS pageviews +04)------Aggregate: groupBy=[[hits.URLHash, hits.EventDate]], aggr=[[count(Int64(1))]] +05)--------SubqueryAlias: hits +06)----------Projection: hits_raw.URLHash, CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) AS EventDate +07)------------Filter: hits_raw.CounterID = Int32(62) AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND hits_raw.IsRefresh = Int16(0) AND (hits_raw.TraficSourceID = Int16(-1) OR hits_raw.TraficSourceID = Int16(6)) AND hits_raw.RefererHash = Int64(3594120000172545465) +08)--------------TableScan: hits_raw projection=[EventDate, CounterID, IsRefresh, TraficSourceID, RefererHash, URLHash], partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31"), hits_raw.IsRefresh = Int16(0), hits_raw.TraficSourceID = Int16(-1) OR hits_raw.TraficSourceID = Int16(6), hits_raw.RefererHash = Int64(3594120000172545465)] +physical_plan +01)GlobalLimitExec: skip=100, fetch=10 +02)--SortPreservingMergeExec: [pageviews@2 DESC], fetch=110 +03)----SortExec: TopK(fetch=110), expr=[pageviews@2 DESC], preserve_partitioning=[true] +04)------ProjectionExec: expr=[URLHash@0 as URLHash, EventDate@1 as EventDate, count(Int64(1))@2 as pageviews] +05)--------AggregateExec: mode=FinalPartitioned, gby=[URLHash@0 as URLHash, EventDate@1 as EventDate], aggr=[count(Int64(1))] +06)----------RepartitionExec: partitioning=Hash([URLHash@0, EventDate@1], 4), input_partitions=4 +07)------------AggregateExec: mode=Partial, gby=[URLHash@0 as URLHash, EventDate@1 as EventDate], aggr=[count(Int64(1))] +08)--------------ProjectionExec: expr=[URLHash@1 as URLHash, CAST(CAST(EventDate@0 AS Int32) AS Date32) as EventDate] +09)----------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh@2 = 0 AND (TraficSourceID@3 = -1 OR TraficSourceID@3 = 6) AND RefererHash@4 = 3594120000172545465, projection=[EventDate@0, URLHash@5] +10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[EventDate, CounterID, IsRefresh, TraficSourceID, RefererHash, URLHash], file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh@15 = 0 AND (TraficSourceID@37 = -1 OR TraficSourceID@37 = 6) AND RefererHash@102 = 3594120000172545465, pruning_predicate=CounterID_null_count@2 != row_count@3 AND CounterID_min@0 <= 62 AND 62 <= CounterID_max@1 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_max@4 AS Int32) AS Date32) >= 2013-07-01 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_min@6 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh_null_count@9 != row_count@3 AND IsRefresh_min@7 <= 0 AND 0 <= IsRefresh_max@8 AND (TraficSourceID_null_count@12 != row_count@3 AND TraficSourceID_min@10 <= -1 AND -1 <= TraficSourceID_max@11 OR TraficSourceID_null_count@12 != row_count@3 AND TraficSourceID_min@10 <= 6 AND 6 <= TraficSourceID_max@11) AND RefererHash_null_count@15 != row_count@3 AND RefererHash_min@13 <= 3594120000172545465 AND 3594120000172545465 <= RefererHash_max@14, required_guarantees=[CounterID in (62), IsRefresh in (0), RefererHash in (3594120000172545465), TraficSourceID in (-1, 6)] + query IDI SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY PageViews DESC LIMIT 10 OFFSET 100; ---- +query TT +EXPLAIN SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +---- +logical_plan +01)Limit: skip=10000, fetch=10 +02)--Sort: pageviews DESC NULLS FIRST, fetch=10010 +03)----Projection: hits.WindowClientWidth, hits.WindowClientHeight, count(Int64(1)) AS count(*) AS pageviews +04)------Aggregate: groupBy=[[hits.WindowClientWidth, hits.WindowClientHeight]], aggr=[[count(Int64(1))]] +05)--------SubqueryAlias: hits +06)----------Projection: hits_raw.WindowClientWidth, hits_raw.WindowClientHeight +07)------------Filter: hits_raw.CounterID = Int32(62) AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01") AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31") AND hits_raw.IsRefresh = Int16(0) AND hits_raw.DontCountHits = Int16(0) AND hits_raw.URLHash = Int64(2868770270353813622) +08)--------------TableScan: hits_raw projection=[EventDate, CounterID, IsRefresh, WindowClientWidth, WindowClientHeight, DontCountHits, URLHash], partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-01"), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-31"), hits_raw.IsRefresh = Int16(0), hits_raw.DontCountHits = Int16(0), hits_raw.URLHash = Int64(2868770270353813622)] +physical_plan +01)GlobalLimitExec: skip=10000, fetch=10 +02)--SortPreservingMergeExec: [pageviews@2 DESC], fetch=10010 +03)----SortExec: TopK(fetch=10010), expr=[pageviews@2 DESC], preserve_partitioning=[true] +04)------ProjectionExec: expr=[WindowClientWidth@0 as WindowClientWidth, WindowClientHeight@1 as WindowClientHeight, count(Int64(1))@2 as pageviews] +05)--------AggregateExec: mode=FinalPartitioned, gby=[WindowClientWidth@0 as WindowClientWidth, WindowClientHeight@1 as WindowClientHeight], aggr=[count(Int64(1))] +06)----------RepartitionExec: partitioning=Hash([WindowClientWidth@0, WindowClientHeight@1], 4), input_partitions=4 +07)------------AggregateExec: mode=Partial, gby=[WindowClientWidth@0 as WindowClientWidth, WindowClientHeight@1 as WindowClientHeight], aggr=[count(Int64(1))] +08)--------------FilterExec: CounterID@1 = 62 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@0 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh@2 = 0 AND DontCountHits@5 = 0 AND URLHash@6 = 2868770270353813622, projection=[WindowClientWidth@3, WindowClientHeight@4] +09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[EventDate, CounterID, IsRefresh, WindowClientWidth, WindowClientHeight, DontCountHits, URLHash], file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) >= 2013-07-01 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh@15 = 0 AND DontCountHits@61 = 0 AND URLHash@103 = 2868770270353813622, pruning_predicate=CounterID_null_count@2 != row_count@3 AND CounterID_min@0 <= 62 AND 62 <= CounterID_max@1 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_max@4 AS Int32) AS Date32) >= 2013-07-01 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_min@6 AS Int32) AS Date32) <= 2013-07-31 AND IsRefresh_null_count@9 != row_count@3 AND IsRefresh_min@7 <= 0 AND 0 <= IsRefresh_max@8 AND DontCountHits_null_count@12 != row_count@3 AND DontCountHits_min@10 <= 0 AND 0 <= DontCountHits_max@11 AND URLHash_null_count@15 != row_count@3 AND URLHash_min@13 <= 2868770270353813622 AND 2868770270353813622 <= URLHash_max@14, required_guarantees=[CounterID in (62), DontCountHits in (0), IsRefresh in (0), URLHash in (2868770270353813622)] + query III SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; ---- +query TT +EXPLAIN SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-14' AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000; +---- +logical_plan +01)Limit: skip=1000, fetch=10 +02)--Sort: date_trunc(Utf8("minute"), m) ASC NULLS LAST, fetch=1010 +03)----Projection: date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime)) AS m, count(Int64(1)) AS count(*) AS pageviews +04)------Aggregate: groupBy=[[date_trunc(Utf8("minute"), to_timestamp_seconds(hits.EventTime))]], aggr=[[count(Int64(1))]] +05)--------SubqueryAlias: hits +06)----------Projection: hits_raw.EventTime +07)------------Filter: hits_raw.CounterID = Int32(62) AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-14") AND CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-15") AND hits_raw.IsRefresh = Int16(0) AND hits_raw.DontCountHits = Int16(0) +08)--------------TableScan: hits_raw projection=[EventTime, EventDate, CounterID, IsRefresh, DontCountHits], partial_filters=[hits_raw.CounterID = Int32(62), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) >= Date32("2013-07-14"), CAST(CAST(hits_raw.EventDate AS Int32) AS Date32) <= Date32("2013-07-15"), hits_raw.IsRefresh = Int16(0), hits_raw.DontCountHits = Int16(0)] +physical_plan +01)GlobalLimitExec: skip=1000, fetch=10 +02)--SortPreservingMergeExec: [date_trunc(minute, m@0) ASC NULLS LAST], fetch=1010 +03)----SortExec: TopK(fetch=1010), expr=[date_trunc(minute, m@0) ASC NULLS LAST], preserve_partitioning=[true] +04)------ProjectionExec: expr=[date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))@0 as m, count(Int64(1))@1 as pageviews] +05)--------AggregateExec: mode=FinalPartitioned, gby=[date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))@0 as date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))], aggr=[count(Int64(1))] +06)----------RepartitionExec: partitioning=Hash([date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))@0], 4), input_partitions=4 +07)------------AggregateExec: mode=Partial, gby=[date_trunc(minute, to_timestamp_seconds(EventTime@0)) as date_trunc(Utf8("minute"),to_timestamp_seconds(hits.EventTime))], aggr=[count(Int64(1))] +08)--------------FilterExec: CounterID@2 = 62 AND CAST(CAST(EventDate@1 AS Int32) AS Date32) >= 2013-07-14 AND CAST(CAST(EventDate@1 AS Int32) AS Date32) <= 2013-07-15 AND IsRefresh@3 = 0 AND DontCountHits@4 = 0, projection=[EventTime@0] +09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[EventTime, EventDate, CounterID, IsRefresh, DontCountHits], file_type=parquet, predicate=CounterID@6 = 62 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) >= 2013-07-14 AND CAST(CAST(EventDate@5 AS Int32) AS Date32) <= 2013-07-15 AND IsRefresh@15 = 0 AND DontCountHits@61 = 0, pruning_predicate=CounterID_null_count@2 != row_count@3 AND CounterID_min@0 <= 62 AND 62 <= CounterID_max@1 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_max@4 AS Int32) AS Date32) >= 2013-07-14 AND EventDate_null_count@5 != row_count@3 AND CAST(CAST(EventDate_min@6 AS Int32) AS Date32) <= 2013-07-15 AND IsRefresh_null_count@9 != row_count@3 AND IsRefresh_min@7 <= 0 AND 0 <= IsRefresh_max@8 AND DontCountHits_null_count@12 != row_count@3 AND DontCountHits_min@10 <= 0 AND 0 <= DontCountHits_max@11, required_guarantees=[CounterID in (62), DontCountHits in (0), IsRefresh in (0)] + query PI SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-14' AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000; ---- # Clickbench "Extended" queries that test count distinct +query TT +EXPLAIN SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), COUNT(DISTINCT "MobilePhoneModel") FROM hits; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[count(DISTINCT hits.SearchPhrase), count(DISTINCT hits.MobilePhone), count(DISTINCT hits.MobilePhoneModel)]] +02)--SubqueryAlias: hits +03)----TableScan: hits_raw projection=[MobilePhone, MobilePhoneModel, SearchPhrase] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[count(DISTINCT hits.SearchPhrase), count(DISTINCT hits.MobilePhone), count(DISTINCT hits.MobilePhoneModel)] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[MobilePhone, MobilePhoneModel, SearchPhrase], file_type=parquet + query III SELECT COUNT(DISTINCT "SearchPhrase"), COUNT(DISTINCT "MobilePhone"), COUNT(DISTINCT "MobilePhoneModel") FROM hits; ---- 1 1 1 +query TT +EXPLAIN SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), COUNT(DISTINCT "BrowserLanguage") FROM hits; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[count(DISTINCT hits.HitColor), count(DISTINCT hits.BrowserCountry), count(DISTINCT hits.BrowserLanguage)]] +02)--SubqueryAlias: hits +03)----TableScan: hits_raw projection=[HitColor, BrowserLanguage, BrowserCountry] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[count(DISTINCT hits.HitColor), count(DISTINCT hits.BrowserCountry), count(DISTINCT hits.BrowserLanguage)] +02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[HitColor, BrowserLanguage, BrowserCountry], file_type=parquet + query III SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), COUNT(DISTINCT "BrowserLanguage") FROM hits; ---- 1 1 1 +query TT +EXPLAIN SELECT "BrowserCountry", COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10; +---- +logical_plan +01)Sort: count(DISTINCT hits.SocialNetwork) DESC NULLS FIRST, fetch=10 +02)--Aggregate: groupBy=[[hits.BrowserCountry]], aggr=[[count(DISTINCT hits.SocialNetwork), count(DISTINCT hits.HitColor), count(DISTINCT hits.BrowserLanguage), count(DISTINCT hits.SocialAction)]] +03)----SubqueryAlias: hits +04)------TableScan: hits_raw projection=[HitColor, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction] +physical_plan +01)SortPreservingMergeExec: [count(DISTINCT hits.SocialNetwork)@1 DESC], fetch=10 +02)--SortExec: TopK(fetch=10), expr=[count(DISTINCT hits.SocialNetwork)@1 DESC], preserve_partitioning=[true] +03)----AggregateExec: mode=FinalPartitioned, gby=[BrowserCountry@0 as BrowserCountry], aggr=[count(DISTINCT hits.SocialNetwork), count(DISTINCT hits.HitColor), count(DISTINCT hits.BrowserLanguage), count(DISTINCT hits.SocialAction)] +04)------RepartitionExec: partitioning=Hash([BrowserCountry@0], 4), input_partitions=1 +05)--------AggregateExec: mode=Partial, gby=[BrowserCountry@2 as BrowserCountry], aggr=[count(DISTINCT hits.SocialNetwork), count(DISTINCT hits.HitColor), count(DISTINCT hits.BrowserLanguage), count(DISTINCT hits.SocialAction)] +06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[HitColor, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction], file_type=parquet + query TIIII SELECT "BrowserCountry", COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10; ---- From 5ad7342c0fa4bc90ed3c9274973f0f9335613f53 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 3 Mar 2026 02:39:20 -0500 Subject: [PATCH 2/6] Add per-query labels --- .../sqllogictest/test_files/clickbench.slt | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/datafusion/sqllogictest/test_files/clickbench.slt b/datafusion/sqllogictest/test_files/clickbench.slt index 6bff399a2db6b..10059664adad7 100644 --- a/datafusion/sqllogictest/test_files/clickbench.slt +++ b/datafusion/sqllogictest/test_files/clickbench.slt @@ -115,6 +115,7 @@ SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits; ---- 0 10 0 +## Q3 query TT EXPLAIN SELECT AVG("UserID") FROM hits; ---- @@ -131,6 +132,7 @@ SELECT AVG("UserID") FROM hits; ---- -304548765855551740 +## Q4 query TT EXPLAIN SELECT COUNT(DISTINCT "UserID") FROM hits; ---- @@ -155,6 +157,7 @@ SELECT COUNT(DISTINCT "UserID") FROM hits; ---- 5 +## Q5 query TT EXPLAIN SELECT COUNT(DISTINCT "SearchPhrase") FROM hits; ---- @@ -179,6 +182,7 @@ SELECT COUNT(DISTINCT "SearchPhrase") FROM hits; ---- 1 +## Q6 query TT EXPLAIN SELECT MIN("EventDate"), MAX("EventDate") FROM hits; ---- @@ -196,6 +200,7 @@ SELECT MIN("EventDate"), MAX("EventDate") FROM hits; ---- 2013-07-15 2013-07-15 +## Q7 query TT EXPLAIN SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC; ---- @@ -223,6 +228,7 @@ query II SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC; ---- +## Q8 query TT EXPLAIN SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10; ---- @@ -253,6 +259,7 @@ SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" O 39 1 839 2 +## Q9 query TT EXPLAIN SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10; ---- @@ -279,6 +286,7 @@ SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), CO 39 0 1 0 1 839 0 6 0 2 +## Q10 query TT EXPLAIN SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10; ---- @@ -308,6 +316,7 @@ query TI SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10; ---- +## Q11 query TT EXPLAIN SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10; ---- @@ -337,6 +346,7 @@ query ITI SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10; ---- +## Q12 query TT EXPLAIN SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; ---- @@ -362,6 +372,7 @@ query TI SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; ---- +## Q13 query TT EXPLAIN SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10; ---- @@ -391,6 +402,7 @@ query TI SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10; ---- +## Q14 query TT EXPLAIN SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10; ---- @@ -416,6 +428,7 @@ query ITI SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10; ---- +## Q15 query TT EXPLAIN SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10; ---- @@ -445,6 +458,7 @@ SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIM 519640690937130534 2 7418527520126366595 1 +## Q16 query TT EXPLAIN SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; ---- @@ -474,6 +488,7 @@ SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPh 519640690937130534 (empty) 2 7418527520126366595 (empty) 1 +## Q17 query TT EXPLAIN SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10; ---- @@ -500,6 +515,7 @@ SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPh 519640690937130534 (empty) 2 7418527520126366595 (empty) 1 +## Q18 query TT EXPLAIN SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; ---- @@ -534,6 +550,7 @@ SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, "S 519640690937130534 36 (empty) 1 7418527520126366595 18 (empty) 1 +## Q19 query TT EXPLAIN SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449; ---- @@ -550,6 +567,7 @@ query I SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449; ---- +## Q20 query TT EXPLAIN SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%'; ---- @@ -574,6 +592,7 @@ SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%'; ---- 0 +## Q21 query TT EXPLAIN SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; ---- @@ -599,6 +618,7 @@ query TTI SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; ---- +## Q22 query TT EXPLAIN SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; ---- @@ -624,6 +644,7 @@ query TTTII SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; ---- +## Q23 query TT EXPLAIN SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10; ---- @@ -645,6 +666,7 @@ query IITIIIIIIIIITTIIIIIIIIIITIIITIIIITTIIITIIIIIIIIIITIIIIITIIIIIITIIIIIIIIIIT SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10; ---- +## Q24 query TT EXPLAIN SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10; ---- @@ -668,6 +690,7 @@ query T SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10; ---- +## Q25 query TT EXPLAIN SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10; ---- @@ -687,6 +710,7 @@ query T SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10; ---- +## Q26 query TT EXPLAIN SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10; ---- @@ -710,6 +734,7 @@ query T SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10; ---- +## Q27 query TT EXPLAIN SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; ---- @@ -737,6 +762,7 @@ query IRI SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; ---- +## Q28 query TT EXPLAIN SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; ---- @@ -764,6 +790,7 @@ query TRIT SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; ---- +## Q29 query TT EXPLAIN SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits; ---- @@ -781,6 +808,7 @@ SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" ---- 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 310 320 330 340 350 360 370 380 390 400 410 420 430 440 450 460 470 480 490 500 510 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660 670 680 690 700 710 720 730 740 750 760 770 780 790 800 810 820 830 840 850 860 870 880 890 +## Q30 query TT EXPLAIN SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10; ---- @@ -807,6 +835,7 @@ query IIIIR SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10; ---- +## Q31 query TT EXPLAIN SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; ---- @@ -833,6 +862,7 @@ query IIIIR SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; ---- +## Q32 query TT EXPLAIN SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; ---- @@ -865,6 +895,7 @@ SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWi 8924809397503602651 -1216690514 1 0 0 9110818468285196899 -1216690514 1 0 0 +## Q33 query TT EXPLAIN SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10; ---- @@ -893,6 +924,7 @@ http://bonprix.ru/index.ru/cinema/art/A00387,3797); ru)&bL 1 http://holodilnik.ru/russia/05jul2013&model=0 1 http://tours/Ekategoriya%2F&sr=http://slovareniye 1 +## Q34 query TT EXPLAIN SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10; ---- @@ -921,6 +953,7 @@ SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 1 http://holodilnik.ru/russia/05jul2013&model=0 1 1 http://tours/Ekategoriya%2F&sr=http://slovareniye 1 +## Q35 query TT EXPLAIN SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10; ---- @@ -948,6 +981,7 @@ SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c 1568366281 1568366280 1568366279 1568366278 2 1615432634 1615432633 1615432632 1615432631 1 +## Q36 query TT EXPLAIN SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10; ---- @@ -974,6 +1008,7 @@ query TI SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10; ---- +## Q37 query TT EXPLAIN SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10; ---- @@ -1000,6 +1035,7 @@ query TI SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10; ---- +## Q38 query TT EXPLAIN SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; ---- @@ -1028,6 +1064,7 @@ query TI SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; ---- +## Q39 query TT EXPLAIN SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; ---- @@ -1056,6 +1093,7 @@ query IIITTI SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; ---- +## Q40 query TT EXPLAIN SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY PageViews DESC LIMIT 10 OFFSET 100; ---- @@ -1085,6 +1123,7 @@ query IDI SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY PageViews DESC LIMIT 10 OFFSET 100; ---- +## Q41 query TT EXPLAIN SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; ---- @@ -1113,6 +1152,7 @@ query III SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; ---- +## Q42 query TT EXPLAIN SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-14' AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000; ---- From 974d9d100cf50270260d90726a99f8208ef62398 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 3 Mar 2026 03:21:01 -0500 Subject: [PATCH 3/6] Improve AggregateUDFImpl::simplify documentation --- datafusion/expr/src/expr.rs | 2 +- datafusion/expr/src/function.rs | 36 +++++++++++++++------------------ datafusion/expr/src/udaf.rs | 9 +++++---- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 87e8e029a6ee5..33b4b2020aae1 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -901,7 +901,7 @@ impl<'a> TreeNodeContainer<'a, Expr> for Sort { } } -/// Aggregate function +/// Aggregate Function /// /// See also [`ExprFunctionExt`] to set these fields on `Expr` /// diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index 68d2c9073241b..d358497cc11df 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -27,6 +27,8 @@ pub use datafusion_functions_aggregate_common::accumulator::{ AccumulatorArgs, AccumulatorFactoryFunction, StateFieldsArgs, }; +use crate::expr::{AggregateFunction, WindowFunction}; +use crate::simplify::SimplifyContext; pub use datafusion_functions_window_common::expr::ExpressionArgs; pub use datafusion_functions_window_common::field::WindowUDFFieldArgs; pub use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; @@ -64,28 +66,22 @@ pub type PartitionEvaluatorFactory = pub type StateTypeFunction = Arc Result>> + Send + Sync>; -/// [crate::udaf::AggregateUDFImpl::simplify] simplifier closure -/// A closure with two arguments: -/// * 'aggregate_function': [crate::expr::AggregateFunction] for which simplified has been invoked -/// * 'info': [crate::simplify::SimplifyContext] +/// Return type for [crate::udaf::AggregateUDFImpl::simplify] +/// +/// This closure is invoked with two arguments: +/// * 'aggregate_function': [AggregateFunction] with already simplified arguments +/// * 'info': [SimplifyContext] /// /// Closure returns simplified [Expr] or an error. -pub type AggregateFunctionSimplification = Box< - dyn Fn( - crate::expr::AggregateFunction, - &crate::simplify::SimplifyContext, - ) -> Result, ->; +pub type AggregateFunctionSimplification = + Box Result>; -/// [crate::udwf::WindowUDFImpl::simplify] simplifier closure -/// A closure with two arguments: -/// * 'window_function': [crate::expr::WindowFunction] for which simplified has been invoked -/// * 'info': [crate::simplify::SimplifyContext] +/// Return type for [crate::udwf::WindowUDFImpl::simplify] +/// +/// This closure is invoked with two arguments: +/// * 'window_function': [WindowFunction] for which simplified has been invoked +/// * 'info': [SimplifyContext] /// /// Closure returns simplified [Expr] or an error. -pub type WindowFunctionSimplification = Box< - dyn Fn( - crate::expr::WindowFunction, - &crate::simplify::SimplifyContext, - ) -> Result, ->; +pub type WindowFunctionSimplification = + Box Result>; diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index ee38077dbf304..06094a8e44001 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -651,10 +651,10 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync { AggregateOrderSensitivity::HardRequirement } - /// Optionally apply per-UDaF simplification / rewrite rules. + /// Return a closure for simplifying a user defined aggregate. /// /// This can be used to apply function specific simplification rules during - /// optimization (e.g. `arrow_cast` --> `Expr::Cast`). The default + /// optimization (e.g. `percentile_cont(` --> `Min`). The default /// implementation does nothing. /// /// Note that DataFusion handles simplifying arguments and "constant @@ -664,10 +664,11 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync { /// /// # Returns /// - /// [None] if simplify is not defined or, + /// [None] if simplify is not defined /// /// Or, a closure with two arguments: - /// * 'aggregate_function': [AggregateFunction] for which simplified has been invoked + /// * 'aggregate_function': [AggregateFunction], which includes already simplified + /// arguments /// * 'info': [crate::simplify::SimplifyContext] /// /// closure returns simplified [Expr] or an error. From 2b2eb42d8117a13b29686bb880ffb83b53ab41f1 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 3 Mar 2026 04:45:44 -0500 Subject: [PATCH 4/6] Add rewrite SUM(expr+C) --> SUM(expr) + COUNT(expr)*C --- datafusion/expr/src/expr.rs | 2 +- datafusion/functions-aggregate/src/sum.rs | 96 +++++++++++++++++++++-- 2 files changed, 92 insertions(+), 6 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 33b4b2020aae1..de5e6dda34c2c 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -598,7 +598,7 @@ impl Alias { } } -/// Binary expression +/// Binary expression for [`Expr::BinaryExpr`] #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] pub struct BinaryExpr { /// Left-hand side of the expression diff --git a/datafusion/functions-aggregate/src/sum.rs b/datafusion/functions-aggregate/src/sum.rs index 198ba54adfa2a..1b0d4ca5c2198 100644 --- a/datafusion/functions-aggregate/src/sum.rs +++ b/datafusion/functions-aggregate/src/sum.rs @@ -32,12 +32,16 @@ use datafusion_common::types::{ logical_int64, logical_uint8, logical_uint16, logical_uint32, logical_uint64, }; use datafusion_common::{HashMap, Result, ScalarValue, exec_err, not_impl_err}; -use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::expr::{AggregateFunction, AggregateFunctionParams}; +use datafusion_expr::function::{ + AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs, +}; +use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::utils::{AggregateOrderSensitivity, format_state_name}; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, Coercion, Documentation, Expr, GroupsAccumulator, - ReversedUDAF, SetMonotonicity, Signature, TypeSignature, TypeSignatureClass, - Volatility, + Accumulator, AggregateUDFImpl, BinaryExpr, Coercion, Documentation, Expr, + GroupsAccumulator, Operator, ReversedUDAF, SetMonotonicity, Signature, TypeSignature, + TypeSignatureClass, Volatility, }; use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator; use datafusion_functions_aggregate_common::aggregate::sum_distinct::DistinctSumAccumulator; @@ -54,7 +58,7 @@ make_udaf_expr_and_func!( ); pub fn sum_distinct(expr: Expr) -> Expr { - Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction::new_udf( + Expr::AggregateFunction(AggregateFunction::new_udf( sum_udaf(), vec![expr], true, @@ -346,6 +350,88 @@ impl AggregateUDFImpl for Sum { _ => SetMonotonicity::NotMonotonic, } } + + /// Simplification Rules + fn simplify(&self) -> Option { + Some(Box::new(sum_simplifier)) + } +} + +/// Implement ClickBench Q29 specific optimization: +/// `SUM(arg + constant)` --> `SUM(arg) + constant * COUNT(arg)` +/// +/// Backstory: TODO +/// +fn sum_simplifier(mut agg: AggregateFunction, _info: &SimplifyContext) -> Result { + // Explicitly destructure to ensure we check all relevant fields + let AggregateFunctionParams { + args, + distinct, + filter, + order_by, + null_treatment, + } = &agg.params; + + if *distinct + || filter.is_some() + || !order_by.is_empty() + || null_treatment.is_some() + || args.len() != 1 + { + return Ok(Expr::AggregateFunction(agg)); + } + + // otherwise check the arguments if they are scalar + let (arg, lit) = match SplitResult::new(agg.params.args.swap_remove(0)) { + SplitResult::Original(expr) => { + agg.params.args.push(expr); // put it back + return Ok(Expr::AggregateFunction(agg)); + } + SplitResult::Split { arg, lit } => (arg, lit), + }; + + // Rewrite to SUM(arg) + agg.params.args.push(arg.clone()); + let sum_agg = Expr::AggregateFunction(agg); + + // sum(arg) + scalar * COUNT(arg) + Ok(sum_agg + (lit * crate::count::count(arg))) +} + +/// Result of trying to split an expression into an arg and constant +#[derive(Debug, Clone)] +enum SplitResult { + /// if the expression is either of + /// * ` ` + /// * ` ` + /// + /// When `op` is `+` + Split { arg: Expr, lit: Expr }, + /// If the expression is something else + Original(Expr), +} + +impl SplitResult { + fn new(expr: Expr) -> Self { + let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr else { + return Self::Original(expr); + }; + if op != Operator::Plus { + return Self::Original(Expr::BinaryExpr(BinaryExpr { left, op, right })); + } + + match (left.as_ref(), right.as_ref()) { + (Expr::Literal(..), _) => Self::Split { + arg: *right, + lit: *left, + }, + (_, Expr::Literal(..)) => Self::Split { + arg: *left, + lit: *right, + }, + _ => Self::Original(Expr::BinaryExpr(BinaryExpr { left, op, right })), + } + } } /// This accumulator computes SUM incrementally From 19143422ffb2a37087afa16aa8f525aeb38a424a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 3 Mar 2026 05:15:40 -0500 Subject: [PATCH 5/6] Update the explain plan --- datafusion/sqllogictest/test_files/clickbench.slt | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/datafusion/sqllogictest/test_files/clickbench.slt b/datafusion/sqllogictest/test_files/clickbench.slt index 10059664adad7..063d39ed0818f 100644 --- a/datafusion/sqllogictest/test_files/clickbench.slt +++ b/datafusion/sqllogictest/test_files/clickbench.slt @@ -795,13 +795,15 @@ query TT EXPLAIN SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits; ---- logical_plan -01)Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS hits.ResolutionWidth), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(1)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(2)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(3)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(4)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(5)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(6)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(7)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(8)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(9)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(10)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(11)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(12)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(13)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(14)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(15)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(16)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(17)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(18)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(19)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(20)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(21)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(22)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(23)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(24)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(25)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(26)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(27)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(28)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(29)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(30)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(31)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(32)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(33)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(34)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(35)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(36)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(37)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(38)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(39)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(40)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(41)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(42)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(43)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(44)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(45)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(46)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(47)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(48)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(49)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(50)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(51)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(52)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(53)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(54)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(55)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(56)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(57)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(58)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(59)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(60)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(61)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(62)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(63)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(64)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(65)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(66)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(67)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(68)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(69)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(70)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(71)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(72)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(73)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(74)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(75)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(76)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(77)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(78)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(79)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(80)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(81)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(82)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(83)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(84)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(85)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(86)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(87)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(88)), sum(__common_expr_1 AS hits.ResolutionWidth + Int64(89))]] -02)--Projection: CAST(hits.ResolutionWidth AS Int64) AS __common_expr_1 -03)----SubqueryAlias: hits -04)------TableScan: hits_raw projection=[ResolutionWidth] +01)Projection: __common_expr_4 AS sum(hits.ResolutionWidth), __common_expr_2 + __common_expr_3 AS sum(hits.ResolutionWidth + Int64(1)), __common_expr_2 + Int64(2) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(2)), __common_expr_2 + Int64(3) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(3)), __common_expr_2 + Int64(4) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(4)), __common_expr_2 + Int64(5) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(5)), __common_expr_2 + Int64(6) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(6)), __common_expr_2 + Int64(7) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(7)), __common_expr_2 + Int64(8) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(8)), __common_expr_2 + Int64(9) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(9)), __common_expr_2 + Int64(10) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(10)), __common_expr_2 + Int64(11) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(11)), __common_expr_2 + Int64(12) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(12)), __common_expr_2 + Int64(13) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(13)), __common_expr_2 + Int64(14) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(14)), __common_expr_2 + Int64(15) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(15)), __common_expr_2 + Int64(16) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(16)), __common_expr_2 + Int64(17) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(17)), __common_expr_2 + Int64(18) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(18)), __common_expr_2 + Int64(19) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(19)), __common_expr_2 + Int64(20) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(20)), __common_expr_2 + Int64(21) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(21)), __common_expr_2 + Int64(22) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(22)), __common_expr_2 + Int64(23) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(23)), __common_expr_2 + Int64(24) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(24)), __common_expr_2 + Int64(25) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(25)), __common_expr_2 + Int64(26) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(26)), __common_expr_2 + Int64(27) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(27)), __common_expr_2 + Int64(28) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(28)), __common_expr_2 + Int64(29) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(29)), __common_expr_2 + Int64(30) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(30)), __common_expr_2 + Int64(31) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(31)), __common_expr_2 + Int64(32) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(32)), __common_expr_2 + Int64(33) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(33)), __common_expr_2 + Int64(34) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(34)), __common_expr_2 + Int64(35) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(35)), __common_expr_2 + Int64(36) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(36)), __common_expr_2 + Int64(37) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(37)), __common_expr_2 + Int64(38) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(38)), __common_expr_2 + Int64(39) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(39)), __common_expr_2 + Int64(40) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(40)), __common_expr_2 + Int64(41) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(41)), __common_expr_2 + Int64(42) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(42)), __common_expr_2 + Int64(43) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(43)), __common_expr_2 + Int64(44) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(44)), __common_expr_2 + Int64(45) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(45)), __common_expr_2 + Int64(46) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(46)), __common_expr_2 + Int64(47) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(47)), __common_expr_2 + Int64(48) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(48)), __common_expr_2 + Int64(49) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(49)), __common_expr_2 + Int64(50) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(50)), __common_expr_2 + Int64(51) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(51)), __common_expr_2 + Int64(52) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(52)), __common_expr_2 + Int64(53) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(53)), __common_expr_2 + Int64(54) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(54)), __common_expr_2 + Int64(55) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(55)), __common_expr_2 + Int64(56) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(56)), __common_expr_2 + Int64(57) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(57)), __common_expr_2 + Int64(58) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(58)), __common_expr_2 + Int64(59) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(59)), __common_expr_2 + Int64(60) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(60)), __common_expr_2 + Int64(61) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(61)), __common_expr_2 + Int64(62) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(62)), __common_expr_2 + Int64(63) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(63)), __common_expr_2 + Int64(64) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(64)), __common_expr_2 + Int64(65) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(65)), __common_expr_2 + Int64(66) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(66)), __common_expr_2 + Int64(67) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(67)), __common_expr_2 + Int64(68) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(68)), __common_expr_2 + Int64(69) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(69)), __common_expr_2 + Int64(70) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(70)), __common_expr_2 + Int64(71) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(71)), __common_expr_2 + Int64(72) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(72)), __common_expr_2 + Int64(73) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(73)), __common_expr_2 + Int64(74) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(74)), __common_expr_2 + Int64(75) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(75)), __common_expr_2 + Int64(76) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(76)), __common_expr_2 + Int64(77) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(77)), __common_expr_2 + Int64(78) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(78)), __common_expr_2 + Int64(79) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(79)), __common_expr_2 + Int64(80) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(80)), __common_expr_2 + Int64(81) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(81)), __common_expr_2 + Int64(82) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(82)), __common_expr_2 + Int64(83) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(83)), __common_expr_2 + Int64(84) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(84)), __common_expr_2 + Int64(85) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(85)), __common_expr_2 + Int64(86) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(86)), __common_expr_2 + Int64(87) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(87)), __common_expr_2 + Int64(88) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(88)), __common_expr_2 + Int64(89) * __common_expr_3 AS sum(hits.ResolutionWidth + Int64(89)) +02)--Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1) AS __common_expr_2, count(__common_expr_1) AS __common_expr_3, sum(__common_expr_1 AS hits.ResolutionWidth) AS __common_expr_4]] +03)----Projection: CAST(hits.ResolutionWidth AS Int64) AS __common_expr_1 +04)------SubqueryAlias: hits +05)--------TableScan: hits_raw projection=[ResolutionWidth] physical_plan -01)AggregateExec: mode=Single, gby=[], aggr=[sum(hits.ResolutionWidth), sum(hits.ResolutionWidth + Int64(1)), sum(hits.ResolutionWidth + Int64(2)), sum(hits.ResolutionWidth + Int64(3)), sum(hits.ResolutionWidth + Int64(4)), sum(hits.ResolutionWidth + Int64(5)), sum(hits.ResolutionWidth + Int64(6)), sum(hits.ResolutionWidth + Int64(7)), sum(hits.ResolutionWidth + Int64(8)), sum(hits.ResolutionWidth + Int64(9)), sum(hits.ResolutionWidth + Int64(10)), sum(hits.ResolutionWidth + Int64(11)), sum(hits.ResolutionWidth + Int64(12)), sum(hits.ResolutionWidth + Int64(13)), sum(hits.ResolutionWidth + Int64(14)), sum(hits.ResolutionWidth + Int64(15)), sum(hits.ResolutionWidth + Int64(16)), sum(hits.ResolutionWidth + Int64(17)), sum(hits.ResolutionWidth + Int64(18)), sum(hits.ResolutionWidth + Int64(19)), sum(hits.ResolutionWidth + Int64(20)), sum(hits.ResolutionWidth + Int64(21)), sum(hits.ResolutionWidth + Int64(22)), sum(hits.ResolutionWidth + Int64(23)), sum(hits.ResolutionWidth + Int64(24)), sum(hits.ResolutionWidth + Int64(25)), sum(hits.ResolutionWidth + Int64(26)), sum(hits.ResolutionWidth + Int64(27)), sum(hits.ResolutionWidth + Int64(28)), sum(hits.ResolutionWidth + Int64(29)), sum(hits.ResolutionWidth + Int64(30)), sum(hits.ResolutionWidth + Int64(31)), sum(hits.ResolutionWidth + Int64(32)), sum(hits.ResolutionWidth + Int64(33)), sum(hits.ResolutionWidth + Int64(34)), sum(hits.ResolutionWidth + Int64(35)), sum(hits.ResolutionWidth + Int64(36)), sum(hits.ResolutionWidth + Int64(37)), sum(hits.ResolutionWidth + Int64(38)), sum(hits.ResolutionWidth + Int64(39)), sum(hits.ResolutionWidth + Int64(40)), sum(hits.ResolutionWidth + Int64(41)), sum(hits.ResolutionWidth + Int64(42)), sum(hits.ResolutionWidth + Int64(43)), sum(hits.ResolutionWidth + Int64(44)), sum(hits.ResolutionWidth + Int64(45)), sum(hits.ResolutionWidth + Int64(46)), sum(hits.ResolutionWidth + Int64(47)), sum(hits.ResolutionWidth + Int64(48)), sum(hits.ResolutionWidth + Int64(49)), sum(hits.ResolutionWidth + Int64(50)), sum(hits.ResolutionWidth + Int64(51)), sum(hits.ResolutionWidth + Int64(52)), sum(hits.ResolutionWidth + Int64(53)), sum(hits.ResolutionWidth + Int64(54)), sum(hits.ResolutionWidth + Int64(55)), sum(hits.ResolutionWidth + Int64(56)), sum(hits.ResolutionWidth + Int64(57)), sum(hits.ResolutionWidth + Int64(58)), sum(hits.ResolutionWidth + Int64(59)), sum(hits.ResolutionWidth + Int64(60)), sum(hits.ResolutionWidth + Int64(61)), sum(hits.ResolutionWidth + Int64(62)), sum(hits.ResolutionWidth + Int64(63)), sum(hits.ResolutionWidth + Int64(64)), sum(hits.ResolutionWidth + Int64(65)), sum(hits.ResolutionWidth + Int64(66)), sum(hits.ResolutionWidth + Int64(67)), sum(hits.ResolutionWidth + Int64(68)), sum(hits.ResolutionWidth + Int64(69)), sum(hits.ResolutionWidth + Int64(70)), sum(hits.ResolutionWidth + Int64(71)), sum(hits.ResolutionWidth + Int64(72)), sum(hits.ResolutionWidth + Int64(73)), sum(hits.ResolutionWidth + Int64(74)), sum(hits.ResolutionWidth + Int64(75)), sum(hits.ResolutionWidth + Int64(76)), sum(hits.ResolutionWidth + Int64(77)), sum(hits.ResolutionWidth + Int64(78)), sum(hits.ResolutionWidth + Int64(79)), sum(hits.ResolutionWidth + Int64(80)), sum(hits.ResolutionWidth + Int64(81)), sum(hits.ResolutionWidth + Int64(82)), sum(hits.ResolutionWidth + Int64(83)), sum(hits.ResolutionWidth + Int64(84)), sum(hits.ResolutionWidth + Int64(85)), sum(hits.ResolutionWidth + Int64(86)), sum(hits.ResolutionWidth + Int64(87)), sum(hits.ResolutionWidth + Int64(88)), sum(hits.ResolutionWidth + Int64(89))] -02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[CAST(ResolutionWidth@20 AS Int64) as __common_expr_1], file_type=parquet +01)ProjectionExec: expr=[__common_expr_4@2 as sum(hits.ResolutionWidth), __common_expr_2@0 + __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(1)), __common_expr_2@0 + 2 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(2)), __common_expr_2@0 + 3 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(3)), __common_expr_2@0 + 4 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(4)), __common_expr_2@0 + 5 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(5)), __common_expr_2@0 + 6 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(6)), __common_expr_2@0 + 7 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(7)), __common_expr_2@0 + 8 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(8)), __common_expr_2@0 + 9 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(9)), __common_expr_2@0 + 10 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(10)), __common_expr_2@0 + 11 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(11)), __common_expr_2@0 + 12 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(12)), __common_expr_2@0 + 13 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(13)), __common_expr_2@0 + 14 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(14)), __common_expr_2@0 + 15 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(15)), __common_expr_2@0 + 16 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(16)), __common_expr_2@0 + 17 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(17)), __common_expr_2@0 + 18 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(18)), __common_expr_2@0 + 19 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(19)), __common_expr_2@0 + 20 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(20)), __common_expr_2@0 + 21 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(21)), __common_expr_2@0 + 22 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(22)), __common_expr_2@0 + 23 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(23)), __common_expr_2@0 + 24 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(24)), __common_expr_2@0 + 25 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(25)), __common_expr_2@0 + 26 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(26)), __common_expr_2@0 + 27 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(27)), __common_expr_2@0 + 28 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(28)), __common_expr_2@0 + 29 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(29)), __common_expr_2@0 + 30 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(30)), __common_expr_2@0 + 31 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(31)), __common_expr_2@0 + 32 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(32)), __common_expr_2@0 + 33 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(33)), __common_expr_2@0 + 34 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(34)), __common_expr_2@0 + 35 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(35)), __common_expr_2@0 + 36 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(36)), __common_expr_2@0 + 37 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(37)), __common_expr_2@0 + 38 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(38)), __common_expr_2@0 + 39 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(39)), __common_expr_2@0 + 40 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(40)), __common_expr_2@0 + 41 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(41)), __common_expr_2@0 + 42 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(42)), __common_expr_2@0 + 43 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(43)), __common_expr_2@0 + 44 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(44)), __common_expr_2@0 + 45 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(45)), __common_expr_2@0 + 46 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(46)), __common_expr_2@0 + 47 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(47)), __common_expr_2@0 + 48 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(48)), __common_expr_2@0 + 49 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(49)), __common_expr_2@0 + 50 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(50)), __common_expr_2@0 + 51 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(51)), __common_expr_2@0 + 52 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(52)), __common_expr_2@0 + 53 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(53)), __common_expr_2@0 + 54 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(54)), __common_expr_2@0 + 55 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(55)), __common_expr_2@0 + 56 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(56)), __common_expr_2@0 + 57 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(57)), __common_expr_2@0 + 58 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(58)), __common_expr_2@0 + 59 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(59)), __common_expr_2@0 + 60 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(60)), __common_expr_2@0 + 61 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(61)), __common_expr_2@0 + 62 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(62)), __common_expr_2@0 + 63 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(63)), __common_expr_2@0 + 64 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(64)), __common_expr_2@0 + 65 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(65)), __common_expr_2@0 + 66 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(66)), __common_expr_2@0 + 67 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(67)), __common_expr_2@0 + 68 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(68)), __common_expr_2@0 + 69 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(69)), __common_expr_2@0 + 70 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(70)), __common_expr_2@0 + 71 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(71)), __common_expr_2@0 + 72 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(72)), __common_expr_2@0 + 73 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(73)), __common_expr_2@0 + 74 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(74)), __common_expr_2@0 + 75 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(75)), __common_expr_2@0 + 76 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(76)), __common_expr_2@0 + 77 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(77)), __common_expr_2@0 + 78 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(78)), __common_expr_2@0 + 79 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(79)), __common_expr_2@0 + 80 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(80)), __common_expr_2@0 + 81 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(81)), __common_expr_2@0 + 82 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(82)), __common_expr_2@0 + 83 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(83)), __common_expr_2@0 + 84 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(84)), __common_expr_2@0 + 85 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(85)), __common_expr_2@0 + 86 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(86)), __common_expr_2@0 + 87 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(87)), __common_expr_2@0 + 88 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(88)), __common_expr_2@0 + 89 * __common_expr_3@1 as sum(hits.ResolutionWidth + Int64(89))] +02)--AggregateExec: mode=Single, gby=[], aggr=[__common_expr_2, __common_expr_3, __common_expr_4] +03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/clickbench_hits_10.parquet]]}, projection=[CAST(ResolutionWidth@20 AS Int64) as __common_expr_1], file_type=parquet query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits; From 4ff7980bfef7fb6bf984e0f4026d943cd0cf349f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 3 Mar 2026 05:48:44 -0500 Subject: [PATCH 6/6] Add some new simplify tests --- .../test_files/aggregates_simplify.slt | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 datafusion/sqllogictest/test_files/aggregates_simplify.slt diff --git a/datafusion/sqllogictest/test_files/aggregates_simplify.slt b/datafusion/sqllogictest/test_files/aggregates_simplify.slt new file mode 100644 index 0000000000000..c053a8796bd0d --- /dev/null +++ b/datafusion/sqllogictest/test_files/aggregates_simplify.slt @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +####### +# Tests for aggregate optimizations / simplifications +####### + +statement ok +CREATE TABLE sum_simplify_t AS VALUES (1), (2), (NULL); + +####### +# Positive EXPLAIN cases for SUM(arg + literal) simplification +####### + +# Expect to see one COUNT and one SUM in each query below +query TT +EXPLAIN SELECT SUM(column1 + 1), SUM(column1 + 2) FROM sum_simplify_t; +---- +logical_plan +01)Projection: __common_expr_1 + __common_expr_2 AS sum(sum_simplify_t.column1 + Int64(1)), __common_expr_1 + Int64(2) * __common_expr_2 AS sum(sum_simplify_t.column1 + Int64(2)) +02)--Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1) AS __common_expr_1, count(sum_simplify_t.column1) AS __common_expr_2]] +03)----TableScan: sum_simplify_t projection=[column1] +physical_plan +01)ProjectionExec: expr=[__common_expr_1@0 + __common_expr_2@1 as sum(sum_simplify_t.column1 + Int64(1)), __common_expr_1@0 + 2 * __common_expr_2@1 as sum(sum_simplify_t.column1 + Int64(2))] +02)--AggregateExec: mode=Single, gby=[], aggr=[__common_expr_1, __common_expr_2] +03)----DataSourceExec: partitions=1, partition_sizes=[1] + +query TT +EXPLAIN SELECT SUM(1 + column1), SUM(column1 + 2) FROM sum_simplify_t; +---- +logical_plan +01)Projection: __common_expr_1 + __common_expr_2 AS sum(Int64(1) + sum_simplify_t.column1), __common_expr_1 + Int64(2) * __common_expr_2 AS sum(sum_simplify_t.column1 + Int64(2)) +02)--Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1) AS __common_expr_1, count(sum_simplify_t.column1) AS __common_expr_2]] +03)----TableScan: sum_simplify_t projection=[column1] +physical_plan +01)ProjectionExec: expr=[__common_expr_1@0 + __common_expr_2@1 as sum(Int64(1) + sum_simplify_t.column1), __common_expr_1@0 + 2 * __common_expr_2@1 as sum(sum_simplify_t.column1 + Int64(2))] +02)--AggregateExec: mode=Single, gby=[], aggr=[__common_expr_1, __common_expr_2] +03)----DataSourceExec: partitions=1, partition_sizes=[1] + +####### +# Cases where rewrite should not apply +####### + +query TT +EXPLAIN SELECT SUM(DISTINCT column1 + 1), SUM(DISTINCT column1 + 2) FROM sum_simplify_t; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[sum(DISTINCT sum_simplify_t.column1 + Int64(1)), sum(DISTINCT sum_simplify_t.column1 + Int64(2))]] +02)--TableScan: sum_simplify_t projection=[column1] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[sum(DISTINCT sum_simplify_t.column1 + Int64(1)), sum(DISTINCT sum_simplify_t.column1 + Int64(2))] +02)--DataSourceExec: partitions=1, partition_sizes=[1] + +query TT +EXPLAIN SELECT SUM(column1 + 1) FILTER (WHERE column1 > 1), SUM(column1 + 2) FILTER (WHERE column1 > 2 ) FROM sum_simplify_t; +---- +logical_plan +01)Aggregate: groupBy=[[]], aggr=[[sum(sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(1)), sum(sum_simplify_t.column1 + Int64(2)) FILTER (WHERE sum_simplify_t.column1 > Int64(2))]] +02)--TableScan: sum_simplify_t projection=[column1] +physical_plan +01)AggregateExec: mode=Single, gby=[], aggr=[sum(sum_simplify_t.column1 + Int64(1)) FILTER (WHERE sum_simplify_t.column1 > Int64(1)), sum(sum_simplify_t.column1 + Int64(2)) FILTER (WHERE sum_simplify_t.column1 > Int64(2))] +02)--DataSourceExec: partitions=1, partition_sizes=[1] + +# This test should work +query error +SELECT SUM(random() + 1), SUM(random() + 2) FROM sum_simplify_t; + + +####### +# Reproducers for known issues +####### + +# Blocking: single rewritten SUM fails with "Invalid aggregate expression" +query error +SELECT SUM(column1 + 1) FROM sum_simplify_t; + +# Blocking: CSE can fail with "No field named ... Valid fields are __common_expr_1" +query error +SELECT SUM(column1), SUM(column1 + 1) FROM sum_simplify_t; + + +statement ok +DROP TABLE sum_simplify_t;