1- use crate :: test_utils:: localhost:: start_localhost_context;
1+ use crate :: { DistributedExt , test_utils:: localhost:: start_localhost_context} ;
22use crate :: DefaultSessionBuilder ;
33use arrow:: record_batch:: RecordBatch ;
44use async_trait:: async_trait;
@@ -8,7 +8,6 @@ use datafusion::{
88 execution:: context:: SessionContext ,
99 logical_expr:: LogicalPlan ,
1010} ;
11- use rand:: Rng ;
1211
1312/// Fuzzing database with distributed session context and helper functions
1413pub struct FuzzDB {
@@ -22,98 +21,58 @@ pub struct FuzzDB {
2221
2322/// Configuration parameters for randomized session setup
2423#[ derive( Debug , Clone ) ]
25- pub struct SessionConfig {
24+ pub struct FuzzConfig {
2625 pub num_workers : usize ,
27- pub tasks_per_file : usize ,
28- pub cardinality_task_count_factor : usize ,
29- pub target_partitions : usize ,
30- }
31-
32- impl Default for SessionConfig {
33- fn default ( ) -> Self {
34- Self {
35- num_workers : 4 ,
36- tasks_per_file : 4 ,
37- cardinality_task_count_factor : 4 ,
38- target_partitions : 8 ,
39- }
40- }
26+ pub files_per_task : usize ,
27+ pub cardinality_task_count_factor : f64 ,
4128}
4229
4330impl FuzzDB {
4431 /// Create a new FuzzDB with randomized session parameters and setup function
45- pub async fn new < F , Fut > ( setup : F ) -> Result < Self >
32+ pub async fn new < F , Fut > ( cfg : FuzzConfig , setup : F ) -> Result < Self >
4633 where
4734 F : Fn ( SessionContext ) -> Fut + Send + Sync ,
4835 Fut : std:: future:: Future < Output = Result < ( ) > > + Send
4936 {
50- let config = randomize_session_config ( ) ;
51- create_db ( config, setup) . await
37+ create_db ( cfg, setup) . await
5238 }
5339
5440 /// Execute a query and validate results using all oracles
5541 pub async fn run ( & self , query : & str ) -> Result < Vec < RecordBatch > > {
5642
5743 // Execute on distributed context
5844 let df = self . distributed_ctx . sql ( query) . await ?;
59- let _logical_plan = df. logical_plan ( ) . clone ( ) ;
45+ let logical_plan = df. logical_plan ( ) . clone ( ) ;
6046 let results = df. collect ( ) . await ?;
6147
6248 // Run oracles
63- let _single_node_oracle = SingleNodeOracle :: new ( & self . single_node_ctx ) ;
64- let _ordering_oracle = OrderingOracle :: new ( ) ;
49+ let single_node_oracle = SingleNodeOracle :: new ( & self . single_node_ctx ) ;
50+ let ordering_oracle = OrderingOracle :: new ( ) ;
6551
6652 // Validate with SingleNodeOracle
67- // single_node_oracle.validate(&self.distributed_ctx, query, &results).await?;
53+ single_node_oracle. validate ( & self . distributed_ctx , query, & results) . await ?;
6854
6955 // Validate with OrderingOracle
70- // ordering_oracle.validate_with_plan(&logical_plan, &results).await?;
56+ ordering_oracle. validate_with_plan ( & logical_plan, & results) . await ?;
7157
7258 Ok ( results)
7359 }
7460}
7561
76- /// Randomize session configuration parameters
77- fn randomize_session_config ( ) -> SessionConfig {
78- let mut rng = rand:: thread_rng ( ) ;
79-
80- let config = SessionConfig {
81- num_workers : rng. gen_range ( 2 ..=8 ) ,
82- tasks_per_file : rng. gen_range ( 1 ..=8 ) ,
83- cardinality_task_count_factor : rng. gen_range ( 1 ..=8 ) ,
84- target_partitions : rng. gen_range ( 4 ..=16 ) ,
85- } ;
86-
87- println ! ( "Generated random session config: {:?}" , config) ;
88- config
89- }
62+
9063
9164/// Create distributed session context with specified configuration
92- async fn create_db < F , Fut > ( config : SessionConfig , setup : F ) -> Result < FuzzDB >
65+ async fn create_db < F , Fut > ( cfg : FuzzConfig , setup : F ) -> Result < FuzzDB >
9366where
9467 F : Fn ( SessionContext ) -> Fut + Send + Sync ,
9568 Fut : std:: future:: Future < Output = Result < ( ) > > + Send
9669{
97- println ! ( "Creating FuzzDB with {} workers" , config . num_workers) ;
70+ println ! ( "Creating FuzzDB with {} workers" , cfg . num_workers) ;
9871
9972 // Start localhost context with workers using DefaultSessionBuilder
100- let ( distributed_ctx, worker_tasks) = start_localhost_context ( config. num_workers , DefaultSessionBuilder ) . await ;
101-
102- // Configure session parameters
103- {
104- let mut session_config = distributed_ctx. state ( ) . config ( ) . clone ( ) ;
105- session_config. options_mut ( ) . execution . target_partitions = config. target_partitions ;
106- session_config. options_mut ( ) . optimizer . enable_round_robin_repartition = true ;
107-
108- // Set additional distributed-specific parameters
109- if let Ok ( tasks_per_file) = std:: env:: var ( "DATAFUSION_EXECUTION_TASKS_PER_FILE" ) {
110- println ! ( "Using DATAFUSION_EXECUTION_TASKS_PER_FILE from environment: {}" , tasks_per_file) ;
111- } else {
112- unsafe {
113- std:: env:: set_var ( "DATAFUSION_EXECUTION_TASKS_PER_FILE" , config. tasks_per_file . to_string ( ) ) ;
114- }
115- }
116- }
73+ let ( mut distributed_ctx, worker_tasks) = start_localhost_context ( cfg. num_workers , DefaultSessionBuilder ) . await ;
74+ distributed_ctx. set_distributed_files_per_task ( cfg. files_per_task ) ?;
75+ distributed_ctx. set_distributed_cardinality_effect_task_scale_factor ( cfg. cardinality_task_count_factor ) ?;
11776
11877 // Create single node context for oracle comparison
11978 let single_node_ctx = SessionContext :: new ( ) ;
@@ -122,19 +81,6 @@ where
12281 setup ( distributed_ctx. clone ( ) ) . await ?;
12382 setup ( single_node_ctx. clone ( ) ) . await ?;
12483
125- // Log worker configuration
126- println ! ( "Session configuration:" ) ;
127- println ! ( " Number of workers: {}" , config. num_workers) ;
128- println ! ( " Tasks per file: {}" , config. tasks_per_file) ;
129- println ! ( " Cardinality task count factor: {}" , config. cardinality_task_count_factor) ;
130- println ! ( " Target partitions: {}" , config. target_partitions) ;
131-
132- // Get worker ports from distributed context (this is a simplified approach)
133- // In a real implementation, you might want to extract actual port information
134- for i in 0 ..config. num_workers {
135- println ! ( " Worker {}: localhost:random_port" , i) ;
136- }
137-
13884 Ok ( FuzzDB {
13985 distributed_ctx,
14086 single_node_ctx,
@@ -163,8 +109,6 @@ impl<'a> SingleNodeOracle<'a> {
163109#[ async_trait]
164110impl < ' a > Oracle for SingleNodeOracle < ' a > {
165111 async fn validate ( & self , _distributed_ctx : & SessionContext , query : & str , distributed_results : & [ RecordBatch ] ) -> Result < ( ) > {
166- println ! ( "SingleNodeOracle: Validating query against single-node execution" ) ;
167-
168112 // Execute the same query on single node context
169113 let single_node_df = self . single_node_ctx . sql ( query) . await ?;
170114 let single_node_results = single_node_df. collect ( ) . await ?;
0 commit comments