2121
2222static pthread_barrier_t thread_barrier ;
2323
24+ static int ratio_send_to_receive = 1 ;
25+
2426int tst_threaded_ring_partitioned_init (struct tst_env * env )
2527{
2628 int comm_rank ;
@@ -66,15 +68,16 @@ int tst_threaded_ring_partitioned_init(struct tst_env *env)
6668 return 0 ;
6769}
6870
69- // busy wait until partition arrived
71+
72+ // busy wait until partition arrived, using exponential backoff with initial backoff time given.
7073// returns 1 if the partition has arrived and 0 if waiting was interupted
71- static int wait_for_partition (MPI_Request * recv_request , int partition_num )
74+ static int wait_for_partition (MPI_Request * recv_request , int partition_num , useconds_t backoff_time )
7275{
7376 int flag = 0 ;
7477 do
7578 {
7679 MPI_CHECK (MPI_Parrived (* recv_request , partition_num , & flag ));
77- } while (flag == 0 && usleep (2000 ) == 0 );
80+ } while (flag == 0 && usleep (( backoff_time = ( backoff_time * 3 ) / 2 ) ) == 0 );
7881
7982 return flag ;
8083}
@@ -129,26 +132,31 @@ int tst_threaded_ring_partitioned_run(struct tst_env *env)
129132 send_to , recv_from , env -> tag );
130133
131134 // number of partitions and values per partition
132- int num_partitions = num_worker_threads ;
133- int partition_size = env -> values_num ; // number of elements
135+ int num_send_partitions = num_worker_threads ;
136+ int num_recv_partitions = num_send_partitions / ratio_send_to_receive ;
137+ int partition_size = env -> values_num ; // number of elements per send partition
138+
139+ // partition numbers for this thread
140+ int send_partition_num = thread_num ;
141+ int recv_partition_num = (thread_num % ratio_send_to_receive == 0 ) ? thread_num / ratio_send_to_receive : -1 ;
134142
135143 // init send and recv and start both
136144 if (thread_num == TST_THREAD_MASTER )
137145 {
138- tst_output_printf (DEBUG_LOG , TST_REPORT_MAX , "(Rank:%i, Thread:%i) initializing send to %i and recv from %i with %i partitions of size %i*%i bytes\n" ,
146+ tst_output_printf (DEBUG_LOG , TST_REPORT_MAX ,"(Rank:%i, Thread:%i) initializing send to %i and recv from %i with %i partitions of size %i*%i bytes\n" ,
139147 comm_rank , thread_num ,
140- send_to , recv_from , num_partitions , partition_size , type_extent );
148+ send_to , recv_from , num_send_partitions , partition_size , type_extent );
141149
142- MPI_CHECK (MPI_Psend_init (env -> send_buffer , num_partitions , partition_size , type , send_to ,
150+ MPI_CHECK (MPI_Psend_init (env -> send_buffer , num_send_partitions , partition_size , type , send_to ,
143151 0 , comm , MPI_INFO_NULL , send_request ));
144- MPI_CHECK (MPI_Precv_init (env -> recv_buffer , num_partitions , partition_size , type , recv_from ,
152+ MPI_CHECK (MPI_Precv_init (env -> recv_buffer , num_recv_partitions , partition_size * ratio_send_to_receive , type , recv_from ,
145153 0 , comm , MPI_INFO_NULL , recv_request ));
146154
147155 MPI_CHECK (MPI_Startall (2 , env -> req_buffer ));
148156
149157 // wait for all ranks to become ready
150158 MPI_CHECK (MPI_Barrier (MPI_COMM_WORLD ));
151- };
159+ }
152160
153161 pthread_barrier_wait (& thread_barrier );
154162
@@ -157,30 +165,32 @@ int tst_threaded_ring_partitioned_run(struct tst_env *env)
157165 if (thread_num == TST_THREAD_MASTER )
158166 time_init = MPI_Wtime ();
159167
160- if (thread_num >= 0 && thread_num < num_partitions )
168+ if (send_partition_num >= 0 && send_partition_num < num_send_partitions )
161169 {
162170 // allow this partition to be sent
163- MPI_CHECK (MPI_Pready (thread_num , * send_request ));
171+ MPI_CHECK (MPI_Pready (send_partition_num , * send_request ));
164172 }
165173
166- if (thread_num >= 0 && thread_num < num_partitions )
174+ if (recv_partition_num >= 0 && recv_partition_num < num_recv_partitions )
167175 {
168- wait_for_partition (recv_request , thread_num );
176+ wait_for_partition (recv_request , recv_partition_num , 512 );
169177 }
170178 }
171179 else
172180 {
173- if (thread_num >= 0 && thread_num < num_partitions )
181+ if (send_partition_num >= 0 && send_partition_num < num_send_partitions )
174182 {
175- wait_for_partition (recv_request , thread_num );
183+ if (recv_partition_num >= 0 && recv_partition_num < num_recv_partitions ) {
184+ wait_for_partition (recv_request , recv_partition_num , 128 );
185+ }
176186
177187 // simply copy data from input to output buffer
178- int begin_index = partition_size * thread_num * type_extent ;
188+ int begin_index = partition_size * send_partition_num * type_extent ;
179189 int size = partition_size * type_extent ;
180190 memcpy (& env -> send_buffer [begin_index ], & env -> recv_buffer [begin_index ], size );
181191
182192 // allow sending of this partition
183- MPI_CHECK (MPI_Pready (thread_num , * send_request ));
193+ MPI_CHECK (MPI_Pready (send_partition_num , * send_request ));
184194 }
185195 }
186196
0 commit comments