@@ -838,12 +838,9 @@ void runTest(int version, size_t M, size_t K, size_t N,
838838
839839 // Initialize Kernel and bind GPU buffers
840840 // pre-allocate for async dispatch
841- std::array<std::promise<void >, nIter> promises;
842- std::array<std::future<void >, nIter> futures;
843841 std::array<Kernel, nIter> kernels;
844842 std::array<Tensor, nIter> outputs;
845843 for (int i = 0 ; i < nIter; i++) {
846- futures[i] = promises[i].get_future ();
847844 outputs[i] = createTensor (ctx, Shape{M, N}, numtype);
848845 kernels[i] = selectMatmul (ctx, version, {input, weights, outputs[i]}, M, K, N, numtype);
849846 }
@@ -854,10 +851,7 @@ void runTest(int version, size_t M, size_t K, size_t N,
854851 // Dispatch kernel nIter times
855852 auto start = std::chrono::high_resolution_clock::now ();
856853 for (int i = 0 ; i < nIter; i++) {
857- dispatchKernel (ctx, kernels[i], promises[i]);
858- }
859- for (int i = 0 ; i < nIter; i++) {
860- wait (ctx, futures[i]);
854+ dispatchKernel (ctx, kernels[i]);
861855 }
862856 auto end = std::chrono::high_resolution_clock::now ();
863857
0 commit comments