diff --git a/scripts/cpu-gpu-monitor.sh b/scripts/cpu-gpu-monitor.sh
new file mode 100755
index 0000000..a80dd79
--- /dev/null
+++ b/scripts/cpu-gpu-monitor.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+mkdir data
+cd data
+
+# apt-get install moreutils
+# needed for ts
+
+while true
+do
+    top -n 1 -o +%CPU -b | grep cascade_server | sed -e 's/\s\+/,/g' | ts %s >> cpu_utilization.dat
+    { date +%s | sed -z 's/\n/, /g'; nvidia-smi --query-gpu=utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv,noheader; } >> gpu_utilization.dat
+    nvidia-smi --query-compute-apps=process_name,pid,used_memory --format=csv,noheader | ts %s"," >> gpu_by_process.dat
+    sleep 3
+done
diff --git a/scripts/gpu_test.py b/scripts/gpu_test.py
new file mode 100644
index 0000000..97be7ad
--- /dev/null
+++ b/scripts/gpu_test.py
@@ -0,0 +1,26 @@
+from numba import jit, cuda 
+import numpy as np 
+# to measure exec time 
+from timeit import default_timer as timer       
+
+@cuda.jit                      
+def func(a):
+    # Thread id in a 1D block
+    tx = cuda.threadIdx.x
+    # Block id in a 1D grid
+    ty = cuda.blockIdx.x
+    # Block width, i.e. number of threads per block
+    bw = cuda.blockDim.x
+    # Compute flattened index inside the array
+    pos = tx + ty * bw
+    if pos < 10000000: 
+        a[pos] += 1
+
+if __name__=="__main__": 
+    n = 10000000                            
+    a = np.ones(n, dtype = np.float64) 
+
+    threadsperblock = 10
+    blockspergrid = (n + (threadsperblock - 1))
+    for i in range(10000000):
+        func[blockspergrid, threadsperblock](a)