Skip to content

Commit 7376b5c

Browse files
committed
Add fallback for deterministic mode
1 parent ba9554a commit 7376b5c

1 file changed

Lines changed: 17 additions & 2 deletions

File tree

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,10 +267,10 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t
267267
}
268268

269269
bool hasHIPTrigger = false;
270-
271270
if (hipFilterOn) {
272271
hasHIPTrigger = work_group_any(thisThreadHasTrigger);
273272
} else {
273+
// Need a barrier here even if HIP filter is disabled
274274
GPUbarrier();
275275
}
276276

@@ -458,6 +458,21 @@ GPUd() void GPUTPCCFHIPTailConnector::Thread<0>(int32_t nBlocks, int32_t nThread
458458
// HIP TAILS: indexing starts at 1, so 0 index indicates no connection
459459
HIPTailDescriptor* tails = GetHIPTails(clusterer, row);
460460

461+
#ifdef GPUCA_DETERMINISTIC_MODE
462+
// Races in tail comparisons and atomic swap can lead to slightly different clusters.
463+
// So need a sequential fallback for deterministic mode
464+
if (iThread > 0) {
465+
return;
466+
}
467+
nThreads = 1;
468+
GPUCommonAlgorithm::sortInBlock(tails + 1, tails + nTails + 1, [](auto &&t1, auto &&t2) {
469+
if (t1.pad != t2.pad) {
470+
return t1.pad < t2.pad;
471+
}
472+
return t1.tailStart < t2.tailStart;
473+
});
474+
#endif
475+
461476
for (uint32_t iTail = iThread + 1; iTail <= nTails; iTail += nThreads) {
462477
auto* tail = &tails[iTail];
463478

@@ -543,7 +558,7 @@ GPUd() void GPUTPCCFHIPClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
543558
float padSigma = CAMath::Sqrt(CAMath::Max(0.f, padSqSum / weightSum - padMean * padMean));
544559
float timeSigma = CAMath::Sqrt(CAMath::Max(0.f, timeSqSum / weightSum - timeMean * timeMean));
545560

546-
o2::tpc::ClusterNative cn;
561+
tpc::ClusterNative cn;
547562
cn.qMax = qMax;
548563
cn.qTot = (uint16_t)CAMath::Min(qTot, 65535.f);
549564
float clusterTime = fragment.start + timeMean - clusterer.Param().rec.tpc.clustersShiftTimebinsClusterizer;

0 commit comments

Comments
 (0)