diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 893d8ad29..ff17dfadb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -256,6 +256,9 @@ endif () if (ENABLE_CUDA) find_package(CUDAToolkit REQUIRED) target_compile_definitions(mmseqs-framework PUBLIC -DHAVE_CUDA=1) + if (NOT DEFINED USE_GPU_SEM OR USE_GPU_SEM) + target_compile_definitions(mmseqs-framework PUBLIC -DUSE_GPU_SEM=1) + endif () target_link_libraries(mmseqs-framework marv) if (FORCE_STATIC_DEPS) # link to rt explicitly so it doesn't get statically compiled and adds GLIBC_PRIVATE symbols diff --git a/src/commons/GpuUtil.h b/src/commons/GpuUtil.h index 1fa40e875..5689c32df 100644 --- a/src/commons/GpuUtil.h +++ b/src/commons/GpuUtil.h @@ -4,6 +4,14 @@ #include #include #include +#include +#ifdef USE_GPU_SEM +#include +#include +#include +#include +#include +#endif #include "marv.h" struct GPUSharedMemory { @@ -52,4 +60,44 @@ struct GPUSharedMemory { }; +struct GPUSharedMemorySem { +#ifdef USE_GPU_SEM + sem_t* sem; + std::string shmName; + + GPUSharedMemorySem() : sem(SEM_FAILED) {} + void create(const std::string& name) { + shmName = name; + std::string semName = "/" + name + "_sem"; + sem_unlink(semName.c_str()); + sem = sem_open(semName.c_str(), O_CREAT, 0660, 0); + if (sem == SEM_FAILED) { + perror(("sem_open(create) " + semName).c_str()); + exit(EXIT_FAILURE); + } + } + void open(const std::string& name) { + shmName = name; + std::string semName = "/" + name + "_sem"; + sem = sem_open(semName.c_str(), 0); + if (sem == SEM_FAILED) { + perror(("sem_open " + semName).c_str()); + exit(EXIT_FAILURE); + } + } + void wait() { while (sem_wait(sem) == -1 && errno == EINTR) {} } + void post() { if (sem != SEM_FAILED) sem_post(sem); } + void close() { if (sem != SEM_FAILED) { sem_close(sem); sem = SEM_FAILED; } } + void destroy() { close(); sem_unlink(("/" + shmName + "_sem").c_str()); } +#else + GPUSharedMemorySem() {} + void create(const std::string&) {} + void open(const std::string&) {} + void wait() { sched_yield(); } + void post() {} + void close() {} + void destroy() {} +#endif +}; + #endif diff --git a/src/prefiltering/ungappedprefilter.cpp b/src/prefiltering/ungappedprefilter.cpp index e497835a3..981fda424 100644 --- a/src/prefiltering/ungappedprefilter.cpp +++ b/src/prefiltering/ungappedprefilter.cpp @@ -118,6 +118,7 @@ void runFilterOnGpu(Parameters & par, BaseMatrix * subMat, std::vector offsets; std::vector lengths; GPUSharedMemory* layout = NULL; + GPUSharedMemorySem gpuSem; if (hash.empty()) { offsets.reserve(tdbr->getSize() + 1); lengths.reserve(tdbr->getSize()); @@ -130,6 +131,7 @@ void runFilterOnGpu(Parameters & par, BaseMatrix * subMat, lengthData = lengths.data(); } else { layout = GPUSharedMemory::openSharedMemory(hash); + gpuSem.open(hash); } const bool serverMode = par.gpuServer; @@ -220,6 +222,7 @@ void runFilterOnGpu(Parameters & par, BaseMatrix * subMat, std::atomic_thread_fence(std::memory_order_release); // Debug(Debug::ERROR) << "switch to ready\n"; layout->state.store(GPUSharedMemory::READY, std::memory_order_release); + gpuSem.post(); while (true) { if (layout->serverExit.load(std::memory_order_acquire) == true) { @@ -323,6 +326,7 @@ void runFilterOnGpu(Parameters & par, BaseMatrix * subMat, if (marv != NULL) { delete marv; } else { + gpuSem.close(); GPUSharedMemory::unmap(layout); } diff --git a/src/util/gpuserver.cpp b/src/util/gpuserver.cpp index 12b39ee86..a0f12524f 100644 --- a/src/util/gpuserver.cpp +++ b/src/util/gpuserver.cpp @@ -16,9 +16,16 @@ #include #include +#ifdef HAVE_CUDA +GPUSharedMemorySem gpuSemaphore; +#endif + volatile sig_atomic_t keepRunning = 1; void intHandler(int) { keepRunning = 0; +#ifdef HAVE_CUDA + gpuSemaphore.post(); +#endif } int gpuserver(int argc, const char **argv, const Command& command) { @@ -64,6 +71,12 @@ int gpuserver(int argc, const char **argv, const Command& command) { marv.setDb(h1); marv.prefetch(); + std::string shmFile = GPUSharedMemory::getShmHash(par.db1); + GPUSharedMemory* layout = GPUSharedMemory::alloc(shmFile, par.maxSeqLen, par.maxResListLen); + Debug(Debug::WARNING) << shmFile << "\n"; + + gpuSemaphore.create(shmFile); + struct sigaction act; memset(&act, 0, sizeof(act)); act.sa_handler = intHandler; @@ -72,10 +85,11 @@ int gpuserver(int argc, const char **argv, const Command& command) { sigaction(SIGINT, &act, NULL); sigaction(SIGTERM, &act, NULL); - std::string shmFile = GPUSharedMemory::getShmHash(par.db1); - GPUSharedMemory* layout = GPUSharedMemory::alloc(shmFile, par.maxSeqLen, par.maxResListLen); - Debug(Debug::WARNING) << shmFile << "\n"; while (keepRunning) { + gpuSemaphore.wait(); + if (!keepRunning) { + break; + } if (layout->state.load(std::memory_order_acquire) == GPUSharedMemory::READY) { std::atomic_thread_fence(std::memory_order_acquire); @@ -85,8 +99,6 @@ int gpuserver(int argc, const char **argv, const Command& command) { std::atomic_thread_fence(std::memory_order_release); // Debug(Debug::ERROR) << "switch to done\n"; layout->state.store(GPUSharedMemory::DONE, std::memory_order_release); - } else { - std::this_thread::yield(); } } @@ -95,6 +107,7 @@ int gpuserver(int argc, const char **argv, const Command& command) { layout->serverExit.store(true, std::memory_order_release); std::atomic_thread_fence(std::memory_order_release); GPUSharedMemory::dealloc(layout, shmFile); + gpuSemaphore.destroy(); #endif return EXIT_SUCCESS;