@@ -310,6 +310,56 @@ mca_btl_ofi_context_t *get_ofi_context_rr(mca_btl_ofi_module_t *btl)
310310 return & btl -> contexts [rr_num ++ % btl -> num_contexts ];
311311}
312312
313+ static void inline complete_op_context (mca_btl_ofi_context_t * context ,
314+ void * op_context , int rc )
315+ {
316+ mca_btl_ofi_completion_context_t * c_ctx =
317+ (mca_btl_ofi_completion_context_t * ) op_context ;
318+ /* We are casting to every type here just for simplicity. */
319+ mca_btl_ofi_base_completion_t * comp =
320+ (mca_btl_ofi_base_completion_t * ) c_ctx -> comp ;
321+ mca_btl_ofi_frag_completion_t * frag_comp =
322+ (mca_btl_ofi_frag_completion_t * ) c_ctx -> comp ;
323+ mca_btl_ofi_rdma_completion_t * rdma_comp
324+ = (mca_btl_ofi_rdma_completion_t * ) c_ctx -> comp ;
325+
326+ switch (comp -> type ) {
327+ case MCA_BTL_OFI_TYPE_GET :
328+ case MCA_BTL_OFI_TYPE_PUT :
329+ case MCA_BTL_OFI_TYPE_AOP :
330+ case MCA_BTL_OFI_TYPE_AFOP :
331+ case MCA_BTL_OFI_TYPE_CSWAP :
332+ /* call the callback */
333+ if (rdma_comp -> cbfunc ) {
334+ rdma_comp -> cbfunc (comp -> btl , comp -> endpoint , rdma_comp -> local_address ,
335+ rdma_comp -> local_handle , rdma_comp -> cbcontext ,
336+ rdma_comp -> cbdata , rc );
337+ }
338+
339+ MCA_BTL_OFI_NUM_RDMA_DEC ((mca_btl_ofi_module_t * ) comp -> btl );
340+ break ;
341+
342+ case MCA_BTL_OFI_TYPE_RECV :
343+ mca_btl_ofi_recv_frag ((mca_btl_ofi_module_t * ) comp -> btl ,
344+ (mca_btl_ofi_endpoint_t * ) comp -> endpoint , context ,
345+ frag_comp -> frag , rc );
346+ break ;
347+
348+ case MCA_BTL_OFI_TYPE_SEND :
349+ MCA_BTL_OFI_NUM_SEND_DEC ((mca_btl_ofi_module_t * ) comp -> btl );
350+ mca_btl_ofi_frag_complete (frag_comp -> frag , rc );
351+ break ;
352+
353+ default :
354+ /* catasthrophic */
355+ BTL_ERROR (("unknown completion type" ));
356+ MCA_BTL_OFI_ABORT ();
357+ }
358+
359+ /* return the completion handler */
360+ opal_free_list_return (comp -> my_list , (opal_free_list_item_t * ) comp );
361+ }
362+
313363int mca_btl_ofi_context_progress (mca_btl_ofi_context_t * context )
314364{
315365
@@ -319,61 +369,14 @@ int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context)
319369 struct fi_cq_entry cq_entry [MCA_BTL_OFI_DEFAULT_MAX_CQE ];
320370 struct fi_cq_err_entry cqerr = {0 };
321371
322- mca_btl_ofi_completion_context_t * c_ctx ;
323- mca_btl_ofi_base_completion_t * comp ;
324- mca_btl_ofi_rdma_completion_t * rdma_comp ;
325- mca_btl_ofi_frag_completion_t * frag_comp ;
326-
327372 ret = fi_cq_read (context -> cq , & cq_entry , mca_btl_ofi_component .num_cqe_read );
328373
329374 if (0 < ret ) {
330375 events_read = ret ;
331376 for (int i = 0 ; i < events_read ; i ++ ) {
332377 if (NULL != cq_entry [i ].op_context ) {
333378 ++ events ;
334-
335- c_ctx = (mca_btl_ofi_completion_context_t * ) cq_entry [i ].op_context ;
336-
337- /* We are casting to every type here just for simplicity. */
338- comp = (mca_btl_ofi_base_completion_t * ) c_ctx -> comp ;
339- frag_comp = (mca_btl_ofi_frag_completion_t * ) c_ctx -> comp ;
340- rdma_comp = (mca_btl_ofi_rdma_completion_t * ) c_ctx -> comp ;
341-
342- switch (comp -> type ) {
343- case MCA_BTL_OFI_TYPE_GET :
344- case MCA_BTL_OFI_TYPE_PUT :
345- case MCA_BTL_OFI_TYPE_AOP :
346- case MCA_BTL_OFI_TYPE_AFOP :
347- case MCA_BTL_OFI_TYPE_CSWAP :
348- /* call the callback */
349- if (rdma_comp -> cbfunc ) {
350- rdma_comp -> cbfunc (comp -> btl , comp -> endpoint , rdma_comp -> local_address ,
351- rdma_comp -> local_handle , rdma_comp -> cbcontext ,
352- rdma_comp -> cbdata , OPAL_SUCCESS );
353- }
354-
355- MCA_BTL_OFI_NUM_RDMA_DEC ((mca_btl_ofi_module_t * ) comp -> btl );
356- break ;
357-
358- case MCA_BTL_OFI_TYPE_RECV :
359- mca_btl_ofi_recv_frag ((mca_btl_ofi_module_t * ) comp -> btl ,
360- (mca_btl_ofi_endpoint_t * ) comp -> endpoint , context ,
361- frag_comp -> frag );
362- break ;
363-
364- case MCA_BTL_OFI_TYPE_SEND :
365- MCA_BTL_OFI_NUM_SEND_DEC ((mca_btl_ofi_module_t * ) comp -> btl );
366- mca_btl_ofi_frag_complete (frag_comp -> frag , OPAL_SUCCESS );
367- break ;
368-
369- default :
370- /* catasthrophic */
371- BTL_ERROR (("unknown completion type" ));
372- MCA_BTL_OFI_ABORT ();
373- }
374-
375- /* return the completion handler */
376- opal_free_list_return (comp -> my_list , (opal_free_list_item_t * ) comp );
379+ complete_op_context (context , cq_entry [i ].op_context , OPAL_SUCCESS );
377380 }
378381 }
379382 } else if (OPAL_UNLIKELY (ret == - FI_EAVAIL )) {
@@ -383,10 +386,31 @@ int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context)
383386 if (0 > ret ) {
384387 BTL_ERROR (("%s:%d: Error returned from fi_cq_readerr: %s(%d)" , __FILE__ , __LINE__ ,
385388 fi_strerror (- ret ), ret ));
386- } else {
387- BTL_ERROR (("fi_cq_readerr: (provider err_code = %d)\n" , cqerr .prov_errno ));
389+ MCA_BTL_OFI_ABORT ();
390+ } else if (NULL != cqerr .op_context ){
391+ switch (cqerr .err ) {
392+ case - FI_EIO : {
393+ mca_btl_ofi_completion_context_t * c_ctx =
394+ (mca_btl_ofi_completion_context_t * ) cqerr .op_context ;
395+ mca_btl_ofi_base_completion_t * comp =
396+ (mca_btl_ofi_base_completion_t * ) c_ctx -> comp ;
397+ mca_btl_ofi_module_t * ofi_btl =
398+ (mca_btl_ofi_module_t * ) comp -> btl ;
399+ if (ofi_btl -> ofi_error_cb ){
400+ ofi_btl -> ofi_error_cb (comp -> btl , 0 , comp -> endpoint -> ep_proc ,
401+ "IO error reported by libfabric" );
402+ }
403+
404+ ++ events ;
405+ complete_op_context (context , cqerr .op_context , OPAL_ERR_UNREACH );
406+ break ;
407+ }
408+ default :
409+ BTL_ERROR (("fi_cq_readerr: %s(%d) (provider err_code = %d)\n" ,
410+ fi_strerror (- cqerr .err ), cqerr .err , cqerr .prov_errno ));
411+ MCA_BTL_OFI_ABORT ();
412+ }
388413 }
389- MCA_BTL_OFI_ABORT ();
390414 }
391415#ifdef FI_EINTR
392416 /* sometimes, sockets provider complain about interrupt. We do nothing. */
0 commit comments