@@ -57,7 +57,7 @@ HIP_DEBUG_FLAGS += --offload-arch=$(GPU_ARCH)
5757CXX_FLAGS = -std=c++17 -O3 -fopenmp
5858
5959# Profiling flags
60- NVPROF_FLAGS = --print-gpu -trace --log-file %s.nvprof
60+ NSYS_FLAGS = --cuda-event -trace=false --force-overwrite
6161ROCPROF_FLAGS = --hip-trace --stats --output-file %s.csv
6262
6363# Directories
@@ -144,7 +144,7 @@ ifeq ($(BUILD_CUDA),1)
144144 @for target in $(CUDA_TARGETS); do \
145145 if [ -f $$target ]; then \
146146 echo "Profiling $$target..."; \
147- nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
147+ nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
148148 fi; \
149149 done
150150endif
@@ -171,11 +171,12 @@ run: all
171171# Performance profiling targets
172172.PHONY : profile-cuda
173173profile-cuda : $(CUDA_TARGETS )
174- @echo " Profiling CUDA examples with nvprof..."
174+ @echo " Profiling CUDA examples with nsys..."
175+ @mkdir -p $(PROFILE_DIR )
175176 @for target in $(CUDA_TARGETS ) ; do \
176177 if [ -f $$ target ]; then \
177178 echo " Profiling $$ target..." ; \
178- nvprof $( NVPROF_FLAGS ) $$ target > $(PROFILE_DIR ) /$$(basename $$target) .nvprof 2>&1 ; \
179+ nsys profile $( NSYS_FLAGS ) -o $( PROFILE_DIR ) / $$( basename $$ target) .nsys-rep $$ target > $(PROFILE_DIR ) /$$(basename $$target) .nsys.log 2>&1 ; \
179180 fi ; \
180181 done
181182
@@ -330,7 +331,7 @@ help:
330331 @echo " validate - Validate optimization correctness"
331332 @echo " "
332333 @echo " Profiling Targets:"
333- @echo " profile-cuda - Profile CUDA examples with nvprof "
334+ @echo " profile-cuda - Profile CUDA examples with nsys "
334335 @echo " profile-hip - Profile HIP examples with rocprof"
335336 @echo " profile-detailed-cuda - Detailed profiling with Nsight Compute"
336337 @echo " memcheck-cuda - Run CUDA memory checker"
0 commit comments