@@ -35,7 +35,9 @@ use databend_common_pipeline::core::Pipe;
3535use databend_common_pipeline:: core:: PipeItem ;
3636use databend_common_pipeline:: core:: ProcessorPtr ;
3737use databend_common_sql:: optimizer:: ir:: SExpr ;
38+ use databend_common_sql:: plans:: FunctionCall ;
3839use databend_common_sql:: plans:: Join ;
40+ use databend_common_sql:: plans:: JoinEquiCondition ;
3941use databend_common_sql:: plans:: JoinType ;
4042use databend_common_sql:: ColumnEntry ;
4143use databend_common_sql:: ColumnSet ;
@@ -52,6 +54,7 @@ use crate::physical_plans::format::PhysicalFormat;
5254use crate :: physical_plans:: physical_plan:: IPhysicalPlan ;
5355use crate :: physical_plans:: physical_plan:: PhysicalPlan ;
5456use crate :: physical_plans:: physical_plan:: PhysicalPlanMeta ;
57+ use crate :: physical_plans:: resolve_scalar;
5558use crate :: physical_plans:: runtime_filter:: build_runtime_filter;
5659use crate :: physical_plans:: Exchange ;
5760use crate :: physical_plans:: PhysicalPlanBuilder ;
@@ -99,6 +102,12 @@ type MergedFieldsResult = (
99102 Vec < ( usize , ( bool , bool ) ) > ,
100103) ;
101104
105+ #[ derive( Clone , Debug , serde:: Serialize , serde:: Deserialize ) ]
106+ pub struct NestedLoopFilterInfo {
107+ pub predicates : Vec < RemoteExpr > ,
108+ pub projection : Vec < usize > ,
109+ }
110+
102111#[ derive( Clone , Debug , serde:: Serialize , serde:: Deserialize ) ]
103112pub struct HashJoin {
104113 pub meta : PhysicalPlanMeta ,
@@ -140,6 +149,7 @@ pub struct HashJoin {
140149
141150 pub runtime_filter : PhysicalRuntimeFilters ,
142151 pub broadcast_id : Option < u32 > ,
152+ pub nested_loop_filter : NestedLoopFilterInfo ,
143153}
144154
145155#[ typetag:: serde]
@@ -261,6 +271,7 @@ impl IPhysicalPlan for HashJoin {
261271 build_side_cache_info : self . build_side_cache_info . clone ( ) ,
262272 runtime_filter : self . runtime_filter . clone ( ) ,
263273 broadcast_id : self . broadcast_id ,
274+ nested_loop_filter : self . nested_loop_filter . clone ( ) ,
264275 } )
265276 }
266277
@@ -1184,80 +1195,23 @@ impl PhysicalPlanBuilder {
11841195 . collect :: < Result < _ > > ( )
11851196 }
11861197
1187- /// Creates a HashJoin physical plan
1188- ///
1189- /// # Arguments
1190- /// * `join` - Join operation
1191- /// * `probe_side` - Probe side physical plan
1192- /// * `build_side` - Build side physical plan
1193- /// * `is_broadcast` - Whether this is a broadcast join
1194- /// * `projections` - Column projections
1195- /// * `probe_projections` - Probe side projections
1196- /// * `build_projections` - Build side projections
1197- /// * `left_join_conditions` - Left join conditions
1198- /// * `right_join_conditions` - Right join conditions
1199- /// * `is_null_equal` - Null equality flags
1200- /// * `non_equi_conditions` - Non-equi conditions
1201- /// * `probe_to_build` - Probe to build mapping
1202- /// * `output_schema` - Output schema
1203- /// * `build_side_cache_info` - Build side cache info
1204- /// * `runtime_filter` - Runtime filter
1205- /// * `stat_info` - Statistics info
1206- ///
1207- /// # Returns
1208- /// * `Result<PhysicalPlan>` - The HashJoin physical plan
1209- #[ allow( clippy:: too_many_arguments) ]
1210- fn create_hash_join (
1198+ fn build_nested_loop_filter_info (
12111199 & self ,
1212- s_expr : & SExpr ,
12131200 join : & Join ,
1214- probe_side : PhysicalPlan ,
1215- build_side : PhysicalPlan ,
1216- projections : ColumnSet ,
1217- probe_projections : ColumnSet ,
1218- build_projections : ColumnSet ,
1219- left_join_conditions : Vec < RemoteExpr > ,
1220- right_join_conditions : Vec < RemoteExpr > ,
1221- is_null_equal : Vec < bool > ,
1222- non_equi_conditions : Vec < RemoteExpr > ,
1223- probe_to_build : Vec < ( usize , ( bool , bool ) ) > ,
1224- output_schema : DataSchemaRef ,
1225- build_side_cache_info : Option < ( usize , HashMap < IndexType , usize > ) > ,
1226- runtime_filter : PhysicalRuntimeFilters ,
1227- stat_info : PlanStatsInfo ,
1228- ) -> Result < PhysicalPlan > {
1229- let build_side_data_distribution = s_expr. build_side_child ( ) . get_data_distribution ( ) ?;
1230- let broadcast_id = if build_side_data_distribution
1231- . as_ref ( )
1232- . is_some_and ( |e| matches ! ( e, databend_common_sql:: plans:: Exchange :: NodeToNodeHash ( _) ) )
1233- {
1234- Some ( self . ctx . get_next_broadcast_id ( ) )
1235- } else {
1236- None
1237- } ;
1238- Ok ( PhysicalPlan :: new ( HashJoin {
1239- projections,
1240- build_projections,
1241- probe_projections,
1242- build : build_side,
1243- probe : probe_side,
1244- join_type : join. join_type ,
1245- build_keys : right_join_conditions,
1246- probe_keys : left_join_conditions,
1247- is_null_equal,
1248- non_equi_conditions,
1249- marker_index : join. marker_index ,
1250- meta : PhysicalPlanMeta :: new ( "HashJoin" ) ,
1251- from_correlated_subquery : join. from_correlated_subquery ,
1252- probe_to_build,
1253- output_schema,
1254- need_hold_hash_table : join. need_hold_hash_table ,
1255- stat_info : Some ( stat_info) ,
1256- single_to_inner : join. single_to_inner ,
1257- build_side_cache_info,
1258- runtime_filter,
1259- broadcast_id,
1260- } ) )
1201+ merged_schema : & DataSchemaRef ,
1202+ ) -> Result < NestedLoopFilterInfo > {
1203+ let predicates = join
1204+ . non_equi_conditions
1205+ . iter ( )
1206+ . map ( |c| Ok ( c. clone ( ) ) )
1207+ . chain ( join. equi_conditions . iter ( ) . map ( condition_to_expr) )
1208+ . map ( |scalar| resolve_scalar ( & scalar?, merged_schema) )
1209+ . collect :: < Result < _ > > ( ) ?;
1210+
1211+ Ok ( NestedLoopFilterInfo {
1212+ predicates,
1213+ projection : vec ! [ ] ,
1214+ } )
12611215 }
12621216
12631217 pub async fn build_hash_join (
@@ -1332,6 +1286,8 @@ impl PhysicalPlanBuilder {
13321286 // Step 10: Process non-equi conditions
13331287 let non_equi_conditions = self . process_non_equi_conditions ( join, & merged_schema) ?;
13341288
1289+ let nested_loop_filter = self . build_nested_loop_filter_info ( join, & merged_schema) ?;
1290+
13351291 // Step 11: Build runtime filter
13361292 let runtime_filter = build_runtime_filter (
13371293 self . ctx . clone ( ) ,
@@ -1345,23 +1301,63 @@ impl PhysicalPlanBuilder {
13451301 . await ?;
13461302
13471303 // Step 12: Create and return the HashJoin
1348- self . create_hash_join (
1349- s_expr,
1350- join,
1351- probe_side,
1352- build_side,
1304+ let build_side_data_distribution = s_expr. build_side_child ( ) . get_data_distribution ( ) ?;
1305+ let broadcast_id = if build_side_data_distribution
1306+ . as_ref ( )
1307+ . is_some_and ( |e| matches ! ( e, databend_common_sql:: plans:: Exchange :: NodeToNodeHash ( _) ) )
1308+ {
1309+ Some ( self . ctx . get_next_broadcast_id ( ) )
1310+ } else {
1311+ None
1312+ } ;
1313+ Ok ( PhysicalPlan :: new ( HashJoin {
13531314 projections,
1354- probe_projections,
13551315 build_projections,
1356- left_join_conditions,
1357- right_join_conditions,
1316+ probe_projections,
1317+ build : build_side,
1318+ probe : probe_side,
1319+ join_type : join. join_type ,
1320+ build_keys : right_join_conditions,
1321+ probe_keys : left_join_conditions,
13581322 is_null_equal,
13591323 non_equi_conditions,
1324+ marker_index : join. marker_index ,
1325+ meta : PhysicalPlanMeta :: new ( "HashJoin" ) ,
1326+ from_correlated_subquery : join. from_correlated_subquery ,
13601327 probe_to_build,
13611328 output_schema,
1329+ need_hold_hash_table : join. need_hold_hash_table ,
1330+ stat_info : Some ( stat_info) ,
1331+ single_to_inner : join. single_to_inner ,
13621332 build_side_cache_info,
13631333 runtime_filter,
1364- stat_info,
1365- )
1334+ broadcast_id,
1335+ nested_loop_filter,
1336+ } ) )
1337+ }
1338+ }
1339+
1340+ fn condition_to_expr ( condition : & JoinEquiCondition ) -> Result < ScalarExpr > {
1341+ let left_type = condition. left . data_type ( ) ?;
1342+ let right_type = condition. right . data_type ( ) ?;
1343+
1344+ let arguments = match ( & left_type, & right_type) {
1345+ ( DataType :: Nullable ( left) , right) if * * left == * right => vec ! [
1346+ condition. left. clone( ) ,
1347+ condition. right. clone( ) . unify_to_data_type( & left_type) ,
1348+ ] ,
1349+ ( left, DataType :: Nullable ( right) ) if * left == * * right => vec ! [
1350+ condition. left. clone( ) . unify_to_data_type( & right_type) ,
1351+ condition. right. clone( ) ,
1352+ ] ,
1353+ _ => vec ! [ condition. left. clone( ) , condition. right. clone( ) ] ,
1354+ } ;
1355+
1356+ Ok ( FunctionCall {
1357+ span : condition. left . span ( ) ,
1358+ func_name : "eq" . to_string ( ) ,
1359+ params : vec ! [ ] ,
1360+ arguments,
13661361 }
1362+ . into ( ) )
13671363}
0 commit comments