diff --git a/.dockerignore b/.dockerignore
index 7797741..96a401f 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,3 +1,5 @@
+**/tmp*
+test_data
 .ruff_cache
 .tox
 *.egg-info
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3907571
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+**/benchmark_results
diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassification.xml b/superpixel_classification/SuperpixelClassification/SuperpixelClassification.xml
index 38c7b77..4767aa9 100644
--- a/superpixel_classification/SuperpixelClassification/SuperpixelClassification.xml
+++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassification.xml
@@ -61,7 +61,7 @@
     <description>Superpixel parameters</description>
     <boolean>
       <name>gensuperpixels</name>
-      <longflag>generate-superpxiels</longflag>
+      <longflag>generate-superpixels</longflag>
       <description>If an image does not have an annotation with superpixels, generate one</description>
       <label>Generate superpixels</label>
       <default>true</default>
@@ -100,6 +100,13 @@
       <label>Train model</label>
       <default>true</default>
     </boolean>
+    <boolean>
+      <name>useCuda</name>
+      <longflag>usecuda</longflag>
+      <description>Whether or not to use GPU/cuda (true) or cpu (false).</description>
+      <label>Use CUDA</label>
+      <default>false</default>
+    </boolean>
     <integer>
       <name>batchSize</name>
       <longflag>batchsize</longflag>
@@ -198,5 +205,12 @@
       <default>4</default>
       <description>The number of worker threads for superpixel and feature generation</description>
     </integer>
+    <integer>
+      <name>cutoff</name>
+      <longflag>cutoff</longflag>
+      <label>Number of annotations per slide</label>
+      <default>500</default>
+      <description>Number of unannotated superpixels to use per slide for features, training and predictions</description>
+    </integer>
   </parameters>
 </executable>
diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py
index cd82ded..a9d1353 100644
--- a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py
+++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py
@@ -204,7 +204,7 @@ def progCallback(step, count, total):
             print('Create superpixels for %s' % item['name'])
             imagePath = os.path.join(tempdir, item['name'])
             gc.downloadFile(item['largeImage']['fileId'], imagePath)
-            outImagePath = os.path.join(tempdir, 'superpixel.tiff')
+            outImagePath = os.path.join(tempdir, '%s.pixelmap.tiff' % item['name'])
             outAnnotationPath = os.path.join(tempdir, '%s.anot' % annotationName)
 
             if True:
@@ -332,7 +332,7 @@ def createFeatureListFromPatchAndMaskList(self, patch_list, mask_list, maskvals_
         )
         return feature_list
 
-    def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patchSize, prog):
+    def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patchSize, prog, cutoff):
         import large_image
 
         print('Create feature', fileName)
@@ -349,17 +349,35 @@ def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patch
             gc.downloadFile(maskItem['largeImage']['fileId'], maskPath)
             tsMask = large_image.open(maskPath)
 
+            num_values = len(elem['values'])
+            labeled_samples = set([i for i, x in enumerate(elem['values']) if x > 0])
+            # background is used if we have a bounding box of 1 pixel in top left corner that is unlabeled. We do not want to extract features for that
+            has_background = elem['user']['bbox'][:4] == [0,0,1,1]
+            start_index = 1 if has_background else 0
+            unlabeled_samples = [i for i, x in enumerate(elem['values'][start_index:], start=start_index) if x == 0]
+
+            if num_values - len(labeled_samples) > cutoff:
+                # only select a subset of unlabeled samples, i.e., prune the feature list
+                random.shuffle(unlabeled_samples)
+                unlabeled_samples = unlabeled_samples[:cutoff]
+            indices = list(sorted(list(labeled_samples) + unlabeled_samples))
+
             with h5py.File(filePath, 'w') as fptr:
                 batch_size = 1024  # TODO: Is this the best value?
-                for batch_start in range(0, len(elem['values']), batch_size):
-                    batch_list = elem['values'][batch_start: batch_start + batch_size]
+                total_size = len(indices)
+                for batch_start in range(0, total_size, batch_size):
+                    #batch_list = elem['values'][batch_start: batch_start + batch_size]
+                    batch_list = indices[batch_start: batch_start + batch_size]
                     patch_list = []
                     mask_list = []
                     maskvals_list = []
-                    for idx, _ in enumerate(batch_list, start=batch_start):
-                        prog.item_progress(item, 0.9 * idx / len(elem['values']))
-                        bbox = elem['user']['bbox'][idx * 4: idx * 4 + 4]
+
+                    for idx, i in enumerate(batch_list, start=batch_start):
+                        prog.item_progress(item, 0.9 * idx / total_size)
+                        bbox = elem['user']['bbox'][i * 4: i * 4 + 4]
                         # use masked superpixel
+                        if len(bbox) < 4:
+                            pass
                         patch = ts.getRegion(
                             region=dict(
                                 left=int(bbox[0]), top=int(bbox[1]),
@@ -384,7 +402,7 @@ def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patch
                         if mask.shape[2] == 4:
                             mask = mask[:, :, :-1]
                         maskvals = [[val % 256, val // 256 % 256, val // 65536 % 256]
-                                    for val in [idx * 2, idx * 2 + 1]]
+                                    for val in [(i + 1) * 2, (i + 1) * 2 + 1]]
                         patch_list.append(patch)
                         mask_list.append(mask)
                         maskvals_list.append(maskvals)
@@ -409,6 +427,8 @@ def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patch
                                              (time.time() - starttime)),
                                   item['name'])
                     del batch_list, patch_list, mask_list, maskvals_list, feature_list
+                used_indices_ds = fptr.create_dataset(
+                    'used_indices', data=np.array(indices), dtype='i')
                 print(ds.shape, len(elem['values']), '%5.3f' % (time.time() - starttime),
                       item['name'])
             prog.item_progress(item, 0.9)
@@ -418,30 +438,38 @@ def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patch
             prog.item_progress(item, 1)
             return file
 
-    def createFeatures(self, gc, folderId, annotationName, featureFolderId, patchSize, numWorkers,
-                       prog):
-        itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName)
+    def createFeatures(self, gc, folderId, annotationName, itemsAndAnnot, featureFolderId, patchSize, numWorkers,
+                       prog, cutoff):
+        # itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName)
         prog.message('Creating features')
         prog.progress(0)
         prog.items([item for item, _, _ in itemsAndAnnot])
         results = {}
         futures = []
+        featureFiles = [
+            f for item in gc.listItem(featureFolderId) for f in gc.listFile(item['_id'])
+        ]
         with concurrent.futures.ThreadPoolExecutor(max_workers=numWorkers) as executor:
             for item, _, elem in itemsAndAnnot:
-                bbox = elem['user']['bbox']
-                hashval = repr(dict(
-                    itemId=item['_id'], bbox=[int(v) for v in bbox], patchSize=patchSize))
-                hashval = hashlib.new('sha256', hashval.encode()).hexdigest()
-                fileName = 'feature-%s.h5' % (hashval)
-                found = False
-                for existing in gc.listItem(featureFolderId, name=fileName):
-                    results[item['_id']] = next(gc.listFile(existing['_id'], limit=1))
-                    found = True
-                    break
-                if not found:
-                    futures.append((item, executor.submit(
-                        self.createFeaturesForItem, gc, item, elem, featureFolderId, fileName,
-                        patchSize, prog)))
+                match = [
+                    f for f in featureFiles if
+                    re.match('^%s.*[.]feature.h5$' % re.escape(item['name']), f['name'])
+                ]
+                if len(match):
+                    results[item['_id']] = match[0]
+                else:  # fallback to hash-based naming - generate features if necessary
+                    bbox = elem['user']['bbox']
+                    hashval = repr(dict(
+                        itemId=item['_id'], bbox=[int(v) for v in bbox], patchSize=patchSize))
+                    hashval = hashlib.new('sha256', hashval.encode()).hexdigest()
+                    fileName = 'feature-%s.h5' % (hashval)
+                    match = [f for f in featureFiles if f['name'] == fileName]
+                    if len(match):
+                        results[item['_id']] = match[0]
+                    else:
+                        futures.append((item, executor.submit(
+                            self.createFeaturesForItem, gc, item, elem, featureFolderId,
+                            '%s.feature.h5' % (item['name']), patchSize, prog, cutoff)))
         for item, future in futures:
             file = future.result()
             try:
@@ -461,12 +489,20 @@ def trainModelAddItem(self, gc, record, item, annotrec, elem, feature,
             item['name'], annotrec['annotation']['name'], annotrec['_id'], annotrec['_version']))
         featurePath = os.path.join(record['tempdir'], feature['name'])
         gc.downloadFile(feature['_id'], featurePath)
+        print(f"Downloaded '{feature['_id']}' to '{featurePath}'")
         with h5py.File(featurePath, 'r') as ffptr:
             fds = ffptr['images']
-            for idx, labelnum in enumerate(elem['values']):
-                if labelnum and labelnum < len(elem['categories']):
+            if 'used_indices' in ffptr:
+                indices = ffptr['used_indices']
+            else:
+                indices = range(len(elem['values']))
+            skipped_excluded = 0
+            for i,idx in enumerate(indices):
+                labelnum = elem['values'][idx]
+                if 0 < labelnum < len(elem['categories']):
                     labelname = elem['categories'][labelnum]['label']
                     if labelname in excludeLabelList:
+                        skipped_excluded += 1
                         continue
                     if labelname not in record['groups']:
                         record['groups'][labelname] = elem['categories'][labelnum]
@@ -475,7 +511,7 @@ def trainModelAddItem(self, gc, record, item, annotrec, elem, feature,
                     labelname = labelList[labelnum - 1]
                 else:
                     continue
-                patch = fds[idx]
+                patch = fds[i]
                 if not record['ds']:
                     record['ds'] = record['fptr'].create_dataset(
                         'images', (1,) + patch.shape, maxshape=(None,) + patch.shape,
@@ -494,11 +530,11 @@ def trainModelAddItem(self, gc, record, item, annotrec, elem, feature,
                     record['lastlog'] = time.time()
                     print(record['ds'].shape, record['counts'],
                           '%5.3f' % (time.time() - record['starttime']))
+            print(f"Skipped {skipped_excluded} samples with labels that were excluded")
 
-    def trainModel(self, gc, folderId, annotationName, features, modelFolderId,
+    def trainModel(self, gc, annotationName, itemsAndAnnot, features, modelFolderId,
                    batchSize, epochs, trainingSplit, randomInput, labelList,
-                   excludeLabelList, prog):
-        itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName)
+                   excludeLabelList, use_cuda, prog):
         with tempfile.TemporaryDirectory(dir=os.getcwd()) as tempdir:
             trainingPath = os.path.join(tempdir, 'training.h5')
             with h5py.File(trainingPath, 'w') as fptr:
@@ -526,7 +562,7 @@ def trainModel(self, gc, folderId, annotationName, features, modelFolderId,
                 prog.progress(1)
                 if not record['ds']:
                     print('No labeled data')
-                    return
+                    return None, None
                 record['labelds'] = fptr.create_dataset(
                     'labels', (len(record['labelvals']),), dtype=int)
                 record['labelds'] = np.array(record['labelvals'], dtype=int)
@@ -536,7 +572,7 @@ def trainModel(self, gc, folderId, annotationName, features, modelFolderId,
                 prog.progress(0)
                 history, modelPath = self.trainModelDetails(
                     record, annotationName, batchSize, epochs, itemsAndAnnot, prog, tempdir,
-                    trainingSplit)
+                    trainingSplit, use_cuda)
 
                 modTrainingPath = os.path.join(tempdir, '%s ModTraining Epoch %d.h5' % (
                     annotationName, self.getCurrentEpoch(itemsAndAnnot)))
@@ -551,16 +587,16 @@ def trainModel(self, gc, folderId, annotationName, features, modelFolderId,
             for attempt in tenacity.Retrying(stop=tenacity.stop_after_attempt(self.uploadRetries)):
                 with attempt:
                     modelFile = gc.uploadFileToFolder(modelFolderId, modelPath)
-            print('Saved model')
+            print(f'Saved model to {modelFolderId}')
             for attempt in tenacity.Retrying(stop=tenacity.stop_after_attempt(self.uploadRetries)):
                 with attempt:
                     modTrainingFile = gc.uploadFileToFolder(modelFolderId, modTrainingPath)
-            print('Saved modTraining')
+            print(f'Saved modTraining to {modelFolderId}')
             return modelFile, modTrainingFile
 
-    def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir, model, item,
+    def predictLabelsForItem(self, gc, annotationName, tempdir, model, item,
                              annotrec, elem, feature, curEpoch, userId, labels, groups,
-                             makeHeatmaps, radius, magnification, certainty, batchSize, prog):
+                             makeHeatmaps, radius, magnification, certainty, batchSize, use_cuda, prog):
         import al_bench.factory
 
         print('Predicting %s' % (item['name']))
@@ -571,6 +607,8 @@ def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir,
 
         # Figure out which samples are already labeled
         labeled_samples: NDArray[np.int_] = np.nonzero(np.array(elem['values']))
+        number_annotations = len(elem['values'])
+        tiny = np.finfo(np.float32).tiny
 
         print(f'{labeled_samples = }')
         print(f'certainty_type = {certainty!r}')
@@ -581,9 +619,17 @@ def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir,
         # In case we are computing batchbald
         compCertainty.set_batchbald_num_samples(16)
         compCertainty.set_batchbald_batch_size(100)
-        compCertainty.set_batchbald_excluded_samples(labeled_samples)
+        #compCertainty.set_batchbald_excluded_samples(labeled_samples)
 
         with h5py.File(featurePath, 'r') as ffptr:
+            if 'used_indices' in ffptr:
+                used_indices = set(list(ffptr['used_indices']))
+            else:
+                used_indices = set(range(number_annotations))
+            all_indices = set(range(number_annotations))
+            unused_indices = list(sorted(all_indices.difference(used_indices)))
+            compCertainty.set_batchbald_excluded_samples(np.array(unused_indices))
+
             prog.item_progress(item, 0)
             # Create predicted annotation
             annot = copy.deepcopy(annotrec)
@@ -592,21 +638,29 @@ def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir,
             annot['elements'][0]['categories'] = [groups[key] for key in labels]
             ds = ffptr['images']
             prog.item_progress(item, 0.05)
-            catWeights, predictions = self.predictLabelsForItemDetails(
-                batchSize, ds, item, model, prog)
-            catWeights = np.array(catWeights)
-            predictions = np.array(predictions)
+            _catWeights, _predictions, indices = self.predictLabelsForItemDetails(
+                batchSize, ds, np.array(list(used_indices), dtype=np.int64), item, model, use_cuda, prog)
+            # expand catWeights and predictions to be length of elem['values'] instead of just `cutoff` samples
+            # then copy in results from predictions
+            catWeights = np.zeros((number_annotations,) + _catWeights.shape[1:], dtype=np.float32 if str(_catWeights.dtype).endswith("32") else np.float64)
+            predictions = np.zeros((number_annotations,) + _predictions.shape[1:], dtype=np.float32 if str(_predictions.dtype).endswith("32") else np.float64)
+            for cw,p,idx in zip(_catWeights, _predictions, indices):
+                catWeights[idx] = cw
+                predictions[idx] = p
+                
             print_fully('predictions', predictions)
             prog.item_progress(item, 0.7)
             # compCertainty needs catWeights to have shape (num_superpixels,
             # bayesian_samples, num_classes) if 'batchbald' is selected, otherwise the
             # shape should be (num_superpixels, num_classes).
-            print_fully('catWeights', catWeights)
             # Ask compCertainty to compute certainties
-            cert = compCertainty.from_numpy_array(catWeights)
+            cert = compCertainty.from_numpy_array(catWeights + tiny)
+            print_fully('catWeights', catWeights)
+
             # After the call to compCertainty, those numbers that end up as values for
             # annot's keys 'values', 'confidence', 'categoryConfidence', and 'certainty'
             # should have shape (num_superpixels, num_classes).
+
             print_fully('cert', cert)
             scores = cert[certainty]['scores']
             print_fully('scores', scores)
@@ -617,14 +671,28 @@ def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir,
                 epsilon = 1e-50
                 predictions = np.log(catWeights + epsilon)
             cats = np.argmax(catWeights, axis=-1)
-            indices = np.arange(cats.shape[0])
-            conf = catWeights[indices, cats[indices]]
+            # 0 means we didn't make a prediction, so increment by one
+            #cats[indices] += 1
+            conf = catWeights[list(all_indices), cats[np.arange(cats.shape[0])]]
             print_fully('cats', cats)
             print_fully('conf', conf)
 
+            # give unused_indices the highest possible confidence so that they show up last in the active learning UI
+            # (because it sorts by confidence in descending order)
+            scores[unused_indices] = np.finfo(scores.dtype).max
+            # additionally, ensure that labels that are already labeled also end up last or late in the recommendations
+            # for the DSA UI, this prevents labeled samples from being shown again to the user
+            scores[labeled_samples] = np.finfo(scores.dtype).max
+
+            # additionally, ensure that labels that are already labeled also end up last or late in the recommendations
+            # for the DSA UI, this prevents labeled samples from being shown again to the user
+            scores[labeled_samples] = np.finfo(scores.dtype).max
+
             cats = cats.tolist()
             conf = conf.tolist()
-            # Should this be from predictions for from catWeights?!!!
+
+            # Should this be from predictions or from catWeights?!!!
+            predictions[np.isneginf(predictions)] = np.finfo(predictions.dtype).min
             catConf = predictions.tolist()
             scores = scores.tolist()
             annot['elements'][0]['values'] = cats
@@ -761,10 +829,10 @@ def makeHeatmapsForItem(self, gc, annotationName, userId, tempdir, radius, item,
                                           'fileId': item['largeImage']['fileId'],
                                           'userId': userId}))
 
-    def predictLabels(self, gc, folderId, annotationName, features, modelFolderId,
+    def predictLabels(self, gc, folderId, annotationName, itemsAndAnnot, features, modelFolderId,
                       annotationFolderId, saliencyMaps, radius, magnification,
-                      certainty, batchSize, prog):
-        itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName)
+                      certainty, batchSize, use_cuda, prog):
+        #itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName)
         curEpoch = self.getCurrentEpoch(itemsAndAnnot)
         folder = gc.getFolder(folderId)
         userId = folder['creatorId']
@@ -779,7 +847,7 @@ def predictLabels(self, gc, folderId, annotationName, features, modelFolderId,
                     modelFile = next(gc.listFile(item['_id'], limit=1))
                     break
             if not modelFile:
-                print('No model file found')
+                print(f'No model file found in {modelFolderId}')
                 return
             print(modelFile['name'], item)
             modelPath = os.path.join(tempdir, modelFile['name'])
@@ -792,7 +860,7 @@ def predictLabels(self, gc, folderId, annotationName, features, modelFolderId,
                     modTrainingFile = next(gc.listFile(item['_id'], limit=1))
                     break
             if not modTrainingFile:
-                print('No modTraining file found')
+                print(f'No modTraining file found in {modelFolderId}')
                 return
             print(modTrainingFile['name'], item)
             modTrainingPath = os.path.join(tempdir, modTrainingFile['name'])
@@ -823,20 +891,26 @@ def predictLabels(self, gc, folderId, annotationName, features, modelFolderId,
                 if item['_id'] not in features:
                     continue
                 self.predictLabelsForItem(
-                    gc, annotationName, annotationFolderId, tempdir, model, item, annotrec, elem,
+                    gc, annotationName, tempdir, model, item, annotrec, elem,
                     features.get(item['_id']), curEpoch, userId, labels, groups, saliencyMaps,
-                    radius, magnification, certainty, batchSize, prog)
+                    radius, magnification, certainty, batchSize, use_cuda, prog)
             prog.progress(1)
 
-    def main(self, args):
+    def main(self, args, gc = None):
         self.feature_is_image = args.feature != 'vector'
         self.certainty = args.certainty
 
         print('\n>> CLI Parameters ...\n')
         pprint.pprint(vars(args))
 
-        gc = girder_client.GirderClient(apiUrl=args.girderApiUrl)
-        gc.token = args.girderToken
+        if gc is None:
+            gc = girder_client.GirderClient(apiUrl=args.girderApiUrl)
+            gc.token = args.girderToken
+            gc.authenticate('admin', 'password')
+
+            # check to make sure we have access to server
+            if not [x for x in list(gc.listCollection()) if x['name'] == 'Active Learning']:
+                raise Exception("Unable to authenticate with girder")
 
         with ProgressHelper(
                 'Superpixel Classification', 'Superpixel classification', args.progress) as prog:
@@ -845,16 +919,24 @@ def main(self, args):
                     gc, args.images, args.annotationName, args.radius, args.magnification,
                     args.annotationDir, args.numWorkers, prog)
 
+            itemsAndAnnot = self.getItemsAndAnnotations(gc, args.images, args.annotationName)
+            print("Creating features...")
             features = self.createFeatures(
-                gc, args.images, args.annotationName, args.features, args.patchSize,
-                args.numWorkers, prog)
+                gc, args.images, args.annotationName, itemsAndAnnot, args.features, args.patchSize,
+                args.numWorkers, prog, args.cutoff)
+            print("Done creating features...")
 
             if args.train:
+                print("Training...")
                 self.trainModel(
-                    gc, args.images, args.annotationName, features, args.modeldir, args.batchSize,
-                    args.epochs, args.split, args.randominput, args.labels, args.exclude, prog)
+                    gc, args.annotationName, itemsAndAnnot, features, args.modeldir, args.batchSize,
+                    args.epochs, args.split, args.randominput, args.labels, args.exclude, args.useCuda, prog)
+                print("Done training...")
 
+            print("Predicting labels...")
             self.predictLabels(
-                gc, args.images, args.annotationName, features, args.modeldir, args.annotationDir,
-                args.heatmaps, args.radius, args.magnification, args.certainty, args.batchSize,
+                gc, args.images, args.annotationName, itemsAndAnnot, features, args.modeldir, args.annotationDir,
+                args.heatmaps, args.radius, args.magnification, args.certainty, args.batchSize, args.useCuda,
                 prog)
+            print("Done predicting labels...")
+        print("Done, exiting")
diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTensorflow.py b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTensorflow.py
index 0af02d8..e50cd8a 100644
--- a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTensorflow.py
+++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTensorflow.py
@@ -3,6 +3,7 @@
 from typing import Optional
 
 import h5py
+import numpy as np
 import tensorflow as tf
 from SuperpixelClassificationBase import SuperpixelClassificationBase
 
@@ -35,33 +36,56 @@ class SuperpixelClassificationTensorflow(SuperpixelClassificationBase):
     def __init__(self):
         self.training_optimal_batchsize: Optional[int] = None
         self.prediction_optimal_batchsize: Optional[int] = None
+        self.use_cuda = False
 
     def trainModelDetails(self, record, annotationName, batchSize, epochs, itemsAndAnnot, prog,
-                          tempdir, trainingSplit):
-        # print(f'Tensorflow trainModelDetails(batchSize={batchSize}, ...)')
-        # make model
-        num_classes = len(record['labels'])
-        model = tf.keras.Sequential([
-            tf.keras.layers.Rescaling(1.0 / 255),
-            tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
-            tf.keras.layers.MaxPooling2D(),
-            tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
-            tf.keras.layers.MaxPooling2D(),
-            tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
-            tf.keras.layers.MaxPooling2D(),
-            tf.keras.layers.Flatten(),
-            # tf.keras.layers.Dropout(0.2),
-            tf.keras.layers.Dense(128, activation='relu'),
-            tf.keras.layers.Dense(num_classes)])
-        prog.progress(0.2)
-        model.compile(optimizer='adam',
-                      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
-                      metrics=['accuracy'])
+                          tempdir, trainingSplit, use_cuda):
+        self.use_cuda = use_cuda
+
+        # Enable GPU memory growth globally to avoid precondition errors
+        gpus = tf.config.list_physical_devices('GPU')
+        if gpus and self.use_cuda:
+            try:
+                for gpu in gpus:
+                    tf.config.experimental.set_memory_growth(gpu, True)
+            except RuntimeError as e:
+                print(f"Could not set memory growth: {e}")
+        if not self.use_cuda:
+            tf.config.set_visible_devices([], 'GPU')
+        device = "gpu" if use_cuda else "cpu"
+        print(f"Using device: {device}")
+
+        # Dataset preparation (outside strategy scope)
+        ds_h5 = record['ds']
+        labelds_h5 = record['labelds']
+        # Fully load to memory and break h5py reference
+        ds_numpy = np.array(ds_h5[:])
+        labelds_numpy = np.array(labelds_h5[:])
+
+        strategy = tf.distribute.MirroredStrategy()
+        with strategy.scope():
+            num_classes = len(record['labels'])
+            model = tf.keras.Sequential([
+                tf.keras.layers.Rescaling(1.0 / 255),
+                tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
+                tf.keras.layers.MaxPooling2D(),
+                tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
+                tf.keras.layers.MaxPooling2D(),
+                tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
+                tf.keras.layers.MaxPooling2D(),
+                tf.keras.layers.Flatten(),
+                tf.keras.layers.Dense(128, activation='relu'),
+                tf.keras.layers.Dense(num_classes)])
+            prog.progress(0.2)
+            model.compile(optimizer='adam',
+                          loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+                          metrics=['accuracy'])
+
         prog.progress(0.7)
-        # generate split
-        full_ds = tf.data.Dataset.from_tensor_slices((record['ds'], record['labelds']))
-        full_ds = full_ds.shuffle(1000)  # add seed=123 ?
-        count = len(full_ds)
+        # generate split using numpy arrays
+        full_ds = tf.data.Dataset.from_tensor_slices((ds_numpy, labelds_numpy))
+        full_ds = full_ds.shuffle(1000)
+        count = len(ds_numpy)
         train_size = int(count * trainingSplit)
         if batchSize < 1:
             batchSize = self.findOptimalBatchSize(model, full_ds, training=True)
@@ -85,24 +109,53 @@ def trainModelDetails(self, record, annotationName, batchSize, epochs, itemsAndA
         self.saveModel(model, modelPath)
         return history, modelPath
 
+    def _get_device(self, use_cuda):
+        if tf.config.list_physical_devices('GPU') and use_cuda:
+            return '/GPU:0'
+        return '/CPU:0'
+
     def predictLabelsForItemDetails(
-        self, batchSize, ds: h5py._hl.dataset.Dataset, item, model, prog,
+            self, batchSize, ds: h5py._hl.dataset.Dataset, indices, item, model, use_cuda, prog,
     ):
-        # print(f'Tensorflow predictLabelsForItemDetails(batchSize={batchSize}, ...)')
         if batchSize < 1:
             batchSize = self.findOptimalBatchSize(
                 model, tf.data.Dataset.from_tensor_slices(ds), training=False,
             )
             print(f'Optimal batch size for prediction = {batchSize}')
-        predictions = model.predict(
-            ds,
-            batch_size=batchSize,
-            callbacks=[_LogTensorflowProgress(
-                prog, (ds.shape[0] + batchSize - 1) // batchSize, 0.05, 0.35, item)])
-        prog.item_progress(item, 0.4)
-        # softmax to scale to 0 to 1
-        catWeights = tf.nn.softmax(predictions)
-        return catWeights, predictions
+
+        device = self._get_device(use_cuda)
+        with tf.device(device):
+            # Create a dataset that pairs the data with their indices
+            dataset = tf.data.Dataset.from_tensor_slices((ds, indices))
+            dataset = dataset.batch(batchSize)
+        
+            # Initialize arrays to store results
+            all_predictions = []
+            all_cat_weights = []
+            all_indices = []
+        
+            # Iterate through batches manually to keep track of indices
+            for data, batch_indices in dataset:
+                batch_predictions = model.predict(
+                    data,
+                    batch_size=batchSize,
+                    verbose=0)  # Set verbose=0 to avoid multiple progress bars
+            
+                # Apply softmax to scale to 0 to 1
+                batch_cat_weights = tf.nn.softmax(batch_predictions)
+            
+                all_predictions.append(batch_predictions)
+                all_cat_weights.append(batch_cat_weights)
+                all_indices.append(batch_indices)
+            
+                prog.item_progress(item, 0.4)
+        
+            # Concatenate all results
+            predictions = tf.concat(all_predictions, axis=0)
+            catWeights = tf.concat(all_cat_weights, axis=0)
+            final_indices = tf.concat(all_indices, axis=0)
+        
+            return catWeights.numpy(), predictions.numpy(), final_indices.numpy().astype(np.int64)
 
     def findOptimalBatchSize(self, model, ds, training) -> int:
         if training and self.training_optimal_batchsize is not None:
diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTorch.py b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTorch.py
index e06d247..e8acb68 100644
--- a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTorch.py
+++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTorch.py
@@ -66,12 +66,10 @@ class _BayesianPatchTorchModel(bbald.consistent_mc_dropout.BayesianModule):
     # A Bayesian model that takes patches (2-dimensional shape) rather than vectors
     # (1-dimensional shape) as input.  It is useful when feature != 'vector' and
     # SuperpixelClassificationBase.certainty == 'batchbald'.
-    def __init__(self, num_classes: int) -> None:
+    def __init__(self, num_classes: int, device : torch.device) -> None:
         # Set `self.device` as early as possible so that other code does not lock out
         # what we want.
-        self.device: str = torch.device(
-            ('cuda' if torch.cuda.is_available() and torch.cuda.device_count() > 0 else 'cpu'),
-        )
+        self.device : torch.device = device
         # print(f'Initial model.device = {self.device}')
         super(_BayesianPatchTorchModel, self).__init__()
 
@@ -134,18 +132,16 @@ class _VectorTorchModel(torch.nn.Module):
     # (2-dimensional shape) as input.  It is useful when feature == 'vector' and
     # SuperpixelClassificationBase.certainty != 'batchbald'.
 
-    def __init__(self, input_dim: int, num_classes: int) -> None:
+    def __init__(self, input_dim: int, num_classes: int, device : torch.device) -> None:
         # Set `self.device` as early as possible so that other code does not lock out
         # what we want.
-        self.device: str = torch.device(
-            ('cuda' if torch.cuda.is_available() and torch.cuda.device_count() > 0 else 'cpu'),
-        )
+        self.device: torch.device = device
         # print(f'Initial model.device = {self.device}')
         super(_VectorTorchModel, self).__init__()
 
         self.input_dim: int = input_dim
         self.num_classes: int = num_classes
-        self.fc: torch.Module = torch.nn.Linear(input_dim, num_classes)
+        self.fc: torch.Linear = torch.nn.Linear(input_dim, num_classes)
 
     def forward(self, input: torch.Tensor) -> torch.Tensor:
         # TODO: Is torch.mul appropriate here?
@@ -161,20 +157,18 @@ class _BayesianVectorTorchModel(bbald.consistent_mc_dropout.BayesianModule):
     # (2-dimensional shape) as input.  It is useful when feature == 'vector' and
     # SuperpixelClassificationBase.certainty == 'batchbald'.
 
-    def __init__(self, input_dim: int, num_classes: int) -> None:
+    def __init__(self, input_dim: int, num_classes: int, device : torch.device) -> None:
         # Set `self.device` as early as possible so that other code does not lock out
         # what we want.
-        self.device: str = torch.device(
-            ('cuda' if torch.cuda.is_available() and torch.cuda.device_count() > 0 else 'cpu'),
-        )
+        self.device = device
         # print(f'Initial model.device = {self.device}')
         super(_BayesianVectorTorchModel, self).__init__()
 
         self.input_dim: int = input_dim
         self.num_classes: int = num_classes
         self.bayesian_samples: int = 12
-        self.fc: torch.Module = torch.nn.Linear(input_dim, num_classes)
-        self.fc_drop: torch.Module = bbald.consistent_mc_dropout.ConsistentMCDropout()
+        self.fc: torch.Linear = torch.nn.Linear(input_dim, num_classes)
+        self.fc_drop: torch.ConsistentMCDropout = bbald.consistent_mc_dropout.ConsistentMCDropout()
 
     def mc_forward_impl(self, input: torch.Tensor) -> torch.Tensor:
         # TODO: Is torch.mul appropriate here?
@@ -311,14 +305,17 @@ def trainModelDetails(
         prog: ProgressHelper,
         tempdir: str,
         trainingSplit: float,
+        cuda : bool,
     ):
+        device = torch.device("cuda" if cuda else "cpu")
+        print(f"Using device: {device}")
         # make model
         num_classes: int = len(record['labels'])
         model: torch.nn.Module
         if self.feature_is_image:
             # Feature is patch
             if self.certainty == 'batchbald':
-                model = _BayesianPatchTorchModel(num_classes)
+                model = _BayesianPatchTorchModel(num_classes, device)
             else:
                 mesg = 'Expected torch model for input of type image to be Bayesian'
                 raise ValueError(mesg)
@@ -326,9 +323,9 @@ def trainModelDetails(
             # Feature is vector
             input_dim: int = record['ds'].shape[1]
             if self.certainty == 'batchbald':
-                model = _BayesianVectorTorchModel(input_dim, num_classes)
+                model = _BayesianVectorTorchModel(input_dim, num_classes, device)
             else:
-                model = _VectorTorchModel(input_dim, num_classes)
+                model = _VectorTorchModel(input_dim, num_classes, device)
         model.to(model.device)
 
         # print(f'Torch trainModelDetails(batchSize={batchSize}, ...)')
@@ -348,6 +345,7 @@ def trainModelDetails(
         val_ds: torch.utils.data.TensorDataset
         train_dl: torch.utils.data.DataLoader
         val_dl: torch.utils.data.DataLoader
+        prog.message('Loading features for model training')
         train_arg1 = (
             torch.from_numpy(record['ds'][train_indices].transpose((0, 3, 2, 1)))
             if self.feature_is_image
@@ -507,7 +505,7 @@ def fitModel(
         return history
 
     def predictLabelsForItemDetails(
-        self, batchSize: int, ds_h5, item, model: torch.nn.Module, prog: ProgressHelper,
+        self, batchSize: int, ds_h5, indices, item, model: torch.nn.Module, use_cuda : bool, prog: ProgressHelper,
     ):
         # print(f'Torch predictLabelsForItemDetails(batchSize={batchSize}, ...)')
         num_superpixels: int = ds_h5.shape[0]
@@ -517,6 +515,9 @@ def predictLabelsForItemDetails(
         num_classes: int = model.num_classes
         # print(f'{num_classes = }')
 
+        # also set on model.device, ideally
+        #device = torch.device("cuda" if use_cuda else "cpu")
+
         callbacks = [
             _LogTorchProgress(prog, 1 + (num_superpixels - 1) // batchSize, 0.05, 0.35, item),
         ]
@@ -532,12 +533,13 @@ def predictLabelsForItemDetails(
         for cb in callbacks:
             cb.on_predict_begin(logs=logs)
 
+        # ds also needs to have information about the indices so that we can shuffle the data but still link it to an index
         ds: torch.utils.data.TensorDataset = torch.utils.data.TensorDataset(
             (
                 torch.from_numpy(np.array(ds_h5).transpose((0, 3, 2, 1)))
                 if self.feature_is_image
                 else torch.from_numpy(np.array(ds_h5))
-            ),
+            ), torch.from_numpy(indices),
         )
         if batchSize < 1:
             batchSize = self.findOptimalBatchSize(model, ds, training=False)
@@ -545,6 +547,7 @@ def predictLabelsForItemDetails(
         dl: torch.utils.data.DataLoader = torch.utils.data.DataLoader(ds, batch_size=batchSize)
         predictions: NDArray[np.float_] = np.zeros((num_superpixels, bayesian_samples, num_classes))
         catWeights: NDArray[np.float_] = np.zeros((num_superpixels, bayesian_samples, num_classes))
+        outIndices: NDArray[np.int64] = np.zeros(num_superpixels, dtype=np.int64)
         with torch.no_grad():
             model.eval()  # Tell torch that we will be doing predictions
             row: int = 0
@@ -567,6 +570,8 @@ def predictLabelsForItemDetails(
                 catWeights_raw = torch.nn.functional.softmax(predictions_raw, dim=-1)
                 predictions[row:new_row, :, :] = predictions_raw.detach().cpu().numpy()
                 catWeights[row:new_row, :, :] = catWeights_raw.detach().cpu().numpy()
+                outIndices[row:new_row] = data[1].detach().cpu().numpy().astype(np.int64)[:]
+
                 row = new_row
                 for cb in callbacks:
                     cb.on_predict_batch_end(i)
@@ -574,7 +579,7 @@ def predictLabelsForItemDetails(
             cb.on_predict_end({'outputs': predictions})
         prog.item_progress(item, 0.4)
         # scale to units
-        return catWeights, predictions
+        return catWeights, predictions, outIndices
 
     def findOptimalBatchSize(
         self, model: torch.nn.Module, ds: torch.utils.data.TensorDataset, training: bool,
@@ -651,9 +656,14 @@ def add_safe_globals(self):
 
     def loadModel(self, modelPath):
         self.add_safe_globals()
-        model = torch.load(modelPath)
-        model.eval()
-        return model
+        try:
+            model = torch.load(modelPath, weights_only=False)
+            model.eval()
+            return model
+        except Exception as e:
+            print(f"Unable to load {modelPath}")
+            raise
+
 
     def saveModel(self, model, modelPath):
         self.add_safe_globals()
diff --git a/superpixel_classification/SuperpixelClassification/benchmarks/benchmark_torch.py b/superpixel_classification/SuperpixelClassification/benchmarks/benchmark_torch.py
new file mode 100644
index 0000000..617ae86
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/benchmarks/benchmark_torch.py
@@ -0,0 +1,193 @@
+''' Benchmark script for the SuperpixelClassificationTorch class
+Originally written by feeding "tests/test_torch.py" to ChatGPT and asking for a benchmarking using timeit.
+'''
+import shutil
+import numpy as np
+import h5py
+import os
+import tempfile
+import timeit
+from unittest.mock import MagicMock
+import csv
+import matplotlib.pyplot as plt
+from datetime import datetime
+
+from IPython.utils.path import ensure_dir_exists
+from more_itertools.more import side_effect
+from superpixel_classification.SuperpixelClassification.SuperpixelClassificationBase import SuperpixelClassificationBase
+from superpixel_classification.SuperpixelClassification.SuperpixelClassificationTorch import SuperpixelClassificationTorch
+from superpixel_classification.SuperpixelClassification.progress_helper import ProgressHelper
+
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Benchmark SuperpixelClassificationTorch.")
+    parser.add_argument('--mnist-image-size', type=int, default=100, help='patchsize of individual images')
+    parser.add_argument('--color-dim', type=int, default=3, help='Number of color channels')
+    parser.add_argument('--image-sizes', default=list(map(int, [1e3, 1e4])), help='Output path for the pyramidal TIF file')
+    parser.add_argument('--epochs', default=3, type=int, help='Number of epochs to train')
+    parser.add_argument('--out-dir', default='benchmark_results', type=str, help='default output directory for benchmark results')
+
+    return parser.parse_args()
+
+
+def create_sample_data(num_images, tmpdir, image_size, color_dim):
+    h5_path = os.path.join(tmpdir, "test_data.h5")
+    images = np.random.randint(0, 255, size=(num_images, image_size, image_size, color_dim), dtype=np.uint8)
+
+    with h5py.File(h5_path, 'w') as f:
+        f.create_dataset('images', data=images)
+        f.create_dataset('used_indices', data=np.arange(num_images - 2))
+
+    return h5_path
+
+def train_model(num_images, num_epochs, h5_path):
+    base: SuperpixelClassificationBase = SuperpixelClassificationTorch()
+    base.feature_is_image = True
+    base.certainty = 'batchbald'
+
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        return shutil.copy(h5_path, dst)
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    def mv_to_src(_, src):
+        dst = os.path.dirname(os.path.dirname(h5_path))
+        return shutil.copy(src, dst)
+    gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True)
+
+    labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+    elem = {
+        'girderId': 'test_girder_id',
+        'categories': [
+            {"label": c} for c in labels
+        ],
+        'values':
+            [] \
+            + np.random.randint(1, len(labels) - 1, size=(num_images - 2), dtype=np.uint8).tolist()
+            + [0, 0],  # last two images unlabeled
+        'transform': {'matrix': [[1.0]]}
+    }
+
+    item = {'_id': 'test_h5_file', 'name': 'test'}
+    annotrec = {'_id': '1', '_version': 0, 'annotation': {'name': 'TorchTest'}}
+    items = [(item, annotrec, elem)]
+
+    with ProgressHelper('Superpixel Classification', 'Test training', True) as prog:
+        prog.progress(0)
+        prog.items(items)
+        modelFile, modelTrainingFile = base.trainModel(
+            gc=gc,
+            annotationName="TorchTest",
+            itemsAndAnnot=items,
+            features={'test_h5_file': {'_id': 'feature_id', 'name': 'test_h5_file'}},
+            modelFolderId="test_folder_id",
+            batchSize=4,
+            epochs=1,
+            trainingSplit=0.5,
+            randomInput=False,
+            labelList='',
+            excludeLabelList=[],
+            prog=prog,
+            use_cuda=True,
+        )
+
+    return modelFile, modelTrainingFile
+
+def create_benchmark_plot(results, out_dir):
+    plt.figure(figsize=(12, 6))
+    
+    # Number of image sizes and runs
+    n_sizes = len(results)
+    n_runs = len(results[0]['times'])
+    
+    # Create positions for bars
+    ind = np.arange(n_sizes)
+    width = 0.25  # Width of bars
+    
+    # Plot bars for each run
+    for i in range(n_runs):
+        times = [result['times'][i] for result in results]
+        plt.bar(ind + i*width, times, width, label=f'Run {i+1}')
+    
+    plt.xlabel('Number of Images')
+    plt.ylabel('Time (seconds)')
+    plt.title('Model Training Benchmark Times')
+    
+    # Set x-axis labels
+    plt.xticks(ind + width, [str(result['num_images']) for result in results])
+    
+    plt.legend()
+    plt.tight_layout()
+    
+    # Save plot
+    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+    dst_pth = os.path.join(out_dir, f'benchmark_results_{timestamp}.png')
+    plt.savefig(dst_pth)
+    plt.close()
+
+    return dst_pth
+
+def main():
+    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+    args = parse_args()
+    ensure_dir_exists(args.out_dir)
+    csv_filename = os.path.join(args.out_dir, f'benchmark_results_{timestamp}.csv')
+    results = []
+
+    # Write CSV header
+    with open(csv_filename, 'w', newline='') as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerow(['Num Images', 'Run 1', 'Run 2', 'Run 3', 'Average', 'Best'])
+
+    for num_images in args.image_sizes:
+        print(f"\nBenchmarking with NUM_IMAGES = {num_images}")
+        with tempfile.TemporaryDirectory() as tmpdir:
+            h5_path = create_sample_data(num_images, tmpdir, args.mnist_image_size, args.color_dim)
+            timer = timeit.Timer(lambda: train_model(num_images, args.epochs, h5_path))
+        
+            try:
+                times = timer.repeat(repeat=3, number=1)
+                avg_time = sum(times) / len(times)
+                best_time = min(times)
+
+                # Store results for plotting
+                results.append({
+                    'num_images': num_images,
+                    'times': times,
+                    'average': avg_time,
+                    'best': best_time
+                })
+
+                # Write results to CSV
+                with open(csv_filename, 'a', newline='') as csvfile:
+                    writer = csv.writer(csvfile)
+                    writer.writerow([
+                        num_images,
+                        round(times[0], 3),
+                        round(times[1], 3),
+                        round(times[2], 3),
+                        round(avg_time, 3),
+                        round(best_time, 3)
+                    ])
+
+                print(f"Times for each run (seconds): {[round(t, 3) for t in times]}")
+                print(f"Average time (seconds): {round(avg_time, 3)}")
+                print(f"Best time (seconds): {round(best_time, 3)}")
+
+            except Exception as e:
+                print(f"Error during benchmark: {str(e)}")
+                # Write error to CSV
+                with open(csv_filename, 'a', newline='') as csvfile:
+                    writer = csv.writer(csvfile)
+                    writer.writerow([num_images, f"Error: {str(e)}", "", "", "", ""])
+            finally:
+                shutil.rmtree(tmpdir)
+
+    # Create and save the plot
+    out_file = create_benchmark_plot(results, args.out_dir)
+    print(f"\nResults saved to {csv_filename}")
+    print(f"Plot saved as {out_file}")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/superpixel_classification/SuperpixelClassification/tests/generate_MNIST_image.py b/superpixel_classification/SuperpixelClassification/tests/generate_MNIST_image.py
new file mode 100644
index 0000000..9d7e121
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/generate_MNIST_image.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+'''
+Generate a .tiff with numbers from MNIST
+'''
+
+import os
+import argparse
+import random
+
+import numpy as np
+import pandas as pd
+import tifffile
+from PIL import Image
+from torchvision.datasets import MNIST
+
+def parse_args():
+    # Parse arguments
+    parser = argparse.ArgumentParser(description="Generate a pyramidal MNIST image.")
+    parser.add_argument('--root_dataset_path', type=str, default="/data/aza4423_anders/mnist", help='Path to download and store MNIST dataset')
+    #parser.add_argument('--num_images', type=int, default=244 * 244, help='Number of random MNIST images to use')
+    parser.add_argument('--num_images', type=int, default=4, help='Number of random MNIST images to use')
+    parser.add_argument('--output_path', type=str, default="/data/aza4423_anders/aml-dsa/mnist_pyramid.tif", help='Output path for the pyramidal TIF file')
+    parser.add_argument('--test', default=False, type=bool, action=argparse.BooleanOptionalAction,
+                        metavar='T',
+                        help='whether to use test MNIST or train'
+                        )
+
+    args = parser.parse_args()
+
+    return args
+
+def d_to_rgb(d):
+    r = d & 0xFF
+    g = (d >> 8) & 0xFF
+    b = (d >> 16) & 0xFF
+    return [r, g, b]
+
+
+def create_mnist_image(root_dataset_path=".", num_images=100, output_path="./out", test=False, start_value=0):
+    # verify that num_images has a square root; otherwise we'd have to insert blank tiles for the uneven grid
+    assert num_images % np.sqrt(num_images) == 0
+
+    # Download MNIST (if not already downloaded)
+    dataset = MNIST(root=root_dataset_path, train=not test, download=True)
+
+    # Select N random MNIST images (each image is PIL.Image in mode "L")
+    # (Make the number square-rootable)
+    num_images = num_images  # Number of images from argument
+    # oversample if we want more images than the length of MNIST
+    if num_images > len(dataset):
+        indices = random.choices(range(len(dataset)), k=num_images)
+    else:
+        indices = list(range(num_images))
+        random.shuffle(indices)
+
+    #indices = random.sample(range(len(dataset)), num_images)
+    mnist_images = [np.array(dataset[i][0]) for i in indices]  # each is 28x28, uint8
+    mnist_labels = [np.array(dataset[i][1]) for i in indices]
+
+    # Arrange the images in a grid (so num_images should be a number with an integer root)
+    tile_rows, tile_cols = int(np.sqrt(num_images)), int(np.sqrt(num_images))
+    tile_h, tile_w = mnist_images[0].shape  # typically 28x28
+    grid_h, grid_w = tile_rows * tile_h, tile_cols * tile_w
+    base_image = np.zeros((grid_h, grid_w, 3), dtype=np.uint8)
+    pm_image = np.zeros((grid_h, grid_w, 3), dtype=np.uint8)
+
+    for idx, img in enumerate(mnist_images):
+        r = idx // tile_cols
+        c = idx % tile_cols
+        # convert img to RGB
+        rgb_img = np.stack([img, img, img], axis=-1)
+        base_image[r*tile_h:(r+1)*tile_h, c*tile_w:(c+1)*tile_w, :] = rgb_img
+
+        value_img = np.zeros((tile_h, tile_w, 3), dtype=np.uint8)
+        i = (idx + 1) * 2
+        rgb = d_to_rgb(i + start_value)
+        value_img[1:-1, 1:-1] = rgb
+        rgb = d_to_rgb(i + start_value + 1)
+        value_img[0, :] = rgb
+        value_img[-1, :] = rgb
+        value_img[:, 0] = rgb
+        value_img[:, -1] = rgb
+
+        pm_image[r*tile_h:(r+1)*tile_h, c*tile_w:(c+1)*tile_w, :] = value_img
+
+
+    # Note: We assume that the base level corresponds to 40x magnification.
+    # Now, build a pyramid (list of downsampled images).
+    pyramid_pm = [pm_image]
+    pm_current = pm_image.copy()
+
+    pyramid = [base_image]
+    current = base_image.copy()
+    # Continue downsampling by a factor of 2 until one dimension becomes very small.
+    while min(current.shape) >= 64:
+        # Use Pillow to resize (ANTIALIAS gives good quality downsampling)
+        im = Image.fromarray(current)
+        new_w, new_h = current.shape[1] // 2, current.shape[0] // 2
+        if new_w < 1 or new_h < 1:
+            break
+        im_resized = im.resize((new_w, new_h))
+        current = np.array(im_resized)
+        pyramid.append(current)
+
+        im = Image.fromarray(pm_image)
+        new_w, new_h = pm_current.shape[1] // 2, pm_current.shape[0] // 2
+        if new_w < 1 or new_h < 1:
+            break
+        im_resized = im.resize((new_w, new_h))
+        pm_current = np.array(im_resized)
+        pyramid_pm.append(current)
+
+    # Save the image as a pyramidal TIFF.
+    # The base image is the main image and the pyramid list (excluding the base) is saved as subIFDs.
+    output_filename = output_path  # Use the output path from argument
+    if os.path.dirname(output_filename):
+        os.makedirs(os.path.dirname(output_filename), exist_ok=True)
+    if os.path.exists(output_filename):
+        os.remove(output_filename)
+
+    with tifffile.TiffWriter(output_filename, bigtiff=False) as tif:
+        tif.write(pyramid[0],
+                   tile=(tile_w * 4, tile_h * 4),
+                   photometric='RGB',
+                   description='Whole-slide MNIST image at 40x magnification',
+                   subifds=pyramid[1:])
+    print(f"Pyramidal TIFF saved as {output_filename}")
+
+    output_filename_pm = output_filename + ".pixelmap.tiff"  # Use the output path from argument
+    if os.path.dirname(output_filename_pm):
+        os.makedirs(os.path.dirname(output_filename_pm), exist_ok=True)
+    if os.path.exists(output_filename_pm):
+        os.remove(output_filename_pm)
+    with tifffile.TiffWriter(output_filename_pm, bigtiff=False) as tif:
+        tif.write(pyramid_pm[0],
+                  tile=(tile_w * 4, tile_h * 4),
+                  photometric='RGB',
+                  description='Pixelmap for Whole-slide MNIST image at 40x magnification',
+                  subifds=pyramid_pm[1:])
+    print(f"Pyramidal TIFF saved as {output_filename_pm}")
+
+    # generate a corresponding CSV "cells" file
+    # with headers "x,y,w,h" for each image
+    csv_filename = output_filename + "_cells.csv"
+    with open(csv_filename, 'w') as f:
+        f.write("x,y,w,h,value\n")
+        i = 0
+        for r in range(tile_rows):
+            for c in range(tile_cols):
+                x, y = c * tile_w, r * tile_h
+                f.write(f"{x},{y},{tile_w},{tile_h},{mnist_labels[i]}\n")
+                i += 1
+    df = pd.read_csv(csv_filename, header=0)
+    print(f"Annotation file saved as {csv_filename}")
+    return output_filename, output_filename_pm, df
+
+if __name__ == "__main__":
+    _args = parse_args()
+    create_mnist_image(_args.root_dataset_path, _args.num_images, _args.output_path, _args.test)
diff --git a/superpixel_classification/SuperpixelClassification/tests/test_feature_extract.py b/superpixel_classification/SuperpixelClassification/tests/test_feature_extract.py
new file mode 100644
index 0000000..2c17864
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/test_feature_extract.py
@@ -0,0 +1,218 @@
+import os
+import shutil
+import sys
+import tempfile
+from unittest.mock import MagicMock
+
+import h5py
+import large_image
+import numpy as np
+import pytest
+
+# make pythonpath work out of the box - although your editor may complain
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+from SuperpixelClassificationBase import SuperpixelClassificationBase
+from progress_helper import ProgressHelper
+from tests.generate_MNIST_image import create_mnist_image
+
+from xdg_base_dirs import ( xdg_cache_home, )
+
+NUM_IMAGES = 64
+
+@pytest.fixture(scope="session")
+def create_sample_data():
+    global NUM_IMAGES
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        tiff_path = os.path.join(tmpdirname, "test_mnist.tiff")
+        #tiff_path_pm = os.path.join(tmpdirname, "test_mnist.tiff.pixelmap.tiff")
+
+        tiff_path, tiff_path_pm, labels = create_mnist_image(
+            root_dataset_path=xdg_cache_home(),
+            num_images=NUM_IMAGES,
+            output_path=tiff_path,
+            test=False,
+        )
+        # 0 is background
+        labels['value'] = labels['value'] + 1
+
+        # we use yield so that the temporarydirectory is still open in the tests
+        yield tiff_path, tiff_path_pm, NUM_IMAGES, labels
+
+MNIST_IMAGE_SIZE=28
+COLOR_DIM = 3
+
+def test_cutoff(create_sample_data):
+    global MNIST_IMAGE_SIZE, COLOR_DIM
+    test_image_pth, test_image_pth_pm, num_images, labels = create_sample_data
+    base = SuperpixelClassificationBase()
+
+    # Create test data
+    item = {
+        'name': test_image_pth,
+        'largeImage': {'fileId': 'test_image_id'}
+    }
+
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        if "pixelmap" in dst:
+            if not os.path.exists(dst):
+                return shutil.copy(test_image_pth_pm, dst)
+        else:
+            if not os.path.exists(dst):
+                return shutil.copy(test_image_pth, dst)
+        return None
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    gc.getItem = MagicMock(return_value={'name': test_image_pth_pm, 'largeImage': {'fileId': 'foobar'}})
+    def mv_to_src(_, src):
+        dst = os.path.dirname(test_image_pth)
+        return shutil.copy(src, dst)
+    gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value={'_id': 'test_file_id'})
+    #gc.uploadFileToFolder = MagicMock(return_value={'_id': 'test_file_id'})
+
+    bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[['x', 'y', 'w', 'h']].iterrows()]
+
+    elem = {
+        'girderId': 'test_girder_id',
+        'values':
+            [] \
+            + list(labels['value'])[:-2]
+            + [0, 0],  # last two images unlabeled
+        'user': {
+            'bbox':  [item for sublist in bboxes for item in sublist]
+        },
+        'transform': {'matrix': [[1.0]]}
+    }
+
+    filename = 'test_features.h5'
+    h5_file = os.path.join(os.path.dirname(test_image_pth), filename)
+    if os.path.exists(h5_file):
+        os.remove(h5_file)
+
+    assert not os.path.exists(h5_file)
+
+    cutoff = 1
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items([item])
+        result = base.createFeaturesForItem(
+            gc=gc,
+            item=item,
+            elem=elem,
+            featureFolderId='test_folder_id',
+            fileName=filename,
+            patchSize=MNIST_IMAGE_SIZE,
+            prog=prog,
+            cutoff=cutoff,
+        )
+
+    assert os.path.exists(h5_file), f"Output file {h5_file} does not exist"
+    with h5py.File(h5_file, 'r') as ffptr:
+        assert 'images' in ffptr
+        assert ffptr['images'].shape == (NUM_IMAGES - cutoff, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM)
+        assert len(ffptr['used_indices']) == NUM_IMAGES - cutoff # number of labeled - cutoff
+
+def test_create_features_for_item(create_sample_data):
+    global MNIST_IMAGE_SIZE, COLOR_DIM
+    test_image_pth, test_image_pth_pm, num_images, labels = create_sample_data
+    base = SuperpixelClassificationBase()
+
+    # Create test data
+    item = {
+        'name': test_image_pth,
+        'largeImage': {'fileId': 'test_image_id'}
+    }
+
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        if "pixelmap" in dst:
+            if not os.path.exists(dst):
+                return shutil.copy(test_image_pth_pm, dst)
+        else:
+            if not os.path.exists(dst):
+                return shutil.copy(test_image_pth, dst)
+        return None
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    gc.getItem = MagicMock(return_value={'name': test_image_pth_pm, 'largeImage': {'fileId': 'foobar'}})
+    def mv_to_src(_, src):
+        dst = os.path.dirname(test_image_pth)
+        return shutil.copy(src, dst)
+    gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value={'_id': 'test_file_id'})
+    #gc.uploadFileToFolder = MagicMock(return_value={'_id': 'test_file_id'})
+
+    bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[['x', 'y', 'w', 'h']].iterrows()]
+
+    elem = {
+        'girderId': 'test_girder_id',
+        'values':
+            [] \
+            + list(labels['value'])[:-2]
+            + [0, 0],  # last two images unlabeled
+        'user': {
+            'bbox':  [item for sublist in bboxes for item in sublist]
+        },
+        'transform': {'matrix': [[1.0]]}
+    }
+
+    filename = 'test_features.h5'
+    h5_file = os.path.join(os.path.dirname(test_image_pth), filename)
+    if os.path.exists(h5_file):
+        os.remove(h5_file)
+
+    assert not os.path.exists(h5_file)
+
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items([item])
+        result = base.createFeaturesForItem(
+            gc=gc,
+            item=item,
+            elem=elem,
+            featureFolderId='test_folder_id',
+            fileName=filename,
+            patchSize=MNIST_IMAGE_SIZE,
+            prog=prog,
+            cutoff=9999
+        )
+
+    assert os.path.exists(h5_file), f"Output file {h5_file} does not exist"
+    with h5py.File(h5_file, 'r') as ffptr:
+        assert 'images' in ffptr
+        assert ffptr['images'].shape == (num_images, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM)
+        feature_img = ffptr['images'][0]
+        # open test_image_pth using coordinates [x,y,w,h] from elem['user']['bbox'][:4] and make sure it's pixel-equal with first_img
+        x, y, x2, y2 = elem['user']['bbox'][:4]
+        ts = large_image.getTileSource(test_image_pth)
+        orig_image = ts.getRegion(
+            region=dict(left=x, top=y, right=x2, bottom=y2),
+            format=large_image.tilesource.TILE_FORMAT_NUMPY
+        )[0]
+        orig_image = orig_image.astype(feature_img.dtype)
+        print(orig_image.dtype)
+        np.testing.assert_array_equal(orig_image, feature_img)
+
+        # also check that the last image matches
+        feature_img = ffptr['images'][-1]
+        x, y, x2, y2 = elem['user']['bbox'][-4:]
+        ts = large_image.getTileSource(test_image_pth)
+        orig_image = ts.getRegion(
+            region=dict(left=x, top=y, right=x2, bottom=y2),
+            format=large_image.tilesource.TILE_FORMAT_NUMPY
+        )[0]
+        orig_image = orig_image.astype(feature_img.dtype)
+        print(orig_image.dtype)
+        np.testing.assert_array_equal(orig_image, feature_img)
+
+        assert 'used_indices' in ffptr
+        assert len(ffptr['used_indices']) == num_images
+
+    # Assertions
+    assert result == h5_file
+    assert gc.downloadFile.call_count == 2  # Called for both image and mask
+    assert gc.getItem.call_count == 1
+    assert gc.uploadFileToFolder.call_count == 1
diff --git a/superpixel_classification/SuperpixelClassification/tests/test_full_training_cycle.py b/superpixel_classification/SuperpixelClassification/tests/test_full_training_cycle.py
new file mode 100644
index 0000000..03c6b8a
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/test_full_training_cycle.py
@@ -0,0 +1,524 @@
+'''
+This file contains tests for a full training cycle: extracting superpixels, training and evaluation.
+The "cycle" is:
+    1. generate NUM_WSIS different whole slide images using numbers from MNIST.
+    2. extract features from said images.
+    3. train a model on the features.
+    4. evaluate the model on the features.
+We expect an accuracy of at least 90%.
+
+This test is to verify that the training cycle works as expected.
+Since there is batching involved, we want to use a larger number of samples instead of just a quick mini-test, as found in the other files.
+'''
+import argparse
+import glob
+import json
+import os
+import re
+import shutil
+import sys
+import tempfile
+from unittest.mock import MagicMock
+
+import numpy as np
+import pytest
+from xdg_base_dirs import (xdg_cache_home, )
+
+# make pythonpath work out of the box - although your editor may complain
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+from SuperpixelClassificationBase import SuperpixelClassificationBase
+from SuperpixelClassificationTensorflow import SuperpixelClassificationTensorflow
+from SuperpixelClassificationTorch import SuperpixelClassificationTorch
+from tests.generate_MNIST_image import create_mnist_image
+
+NUM_WSIS = 2
+MNIST_IMAGE_SIZE = 28
+NUM_IMAGES_PER_WSI = 10 ** 2
+COLOR_DIM = 3
+PATCH_SIZE = 100 # only size compatible with pytorch model for the time being (since there are hardcoded sizes in the definition of the model)
+NUM_EPOCHS = 5
+
+@pytest.fixture(scope="function")
+def create_sample_data(request):
+    global NUM_WSIS, NUM_IMAGES_PER_WSI
+    wsi_paths, pm_paths, list_labels = [], [], []
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        for i in range(NUM_WSIS):
+            tiff_path    = os.path.join(tmpdirname, f"test_mnist_{i}.tiff")
+
+            tiff_path, tiff_path_pm, labels = create_mnist_image(
+                root_dataset_path=xdg_cache_home(),
+                num_images=NUM_IMAGES_PER_WSI,
+                output_path=tiff_path,
+                test=False,
+                start_value = request.param
+            )
+            # where labels['value'] == 0, put 10 instead, since 0 will be reserved for unlabeled
+            labels.loc[labels['value'] == 0, 'value'] = 10
+
+            wsi_paths.append(tiff_path)
+            pm_paths.append(tiff_path_pm)
+            list_labels.append(labels)
+
+        # we use yield so that the temporarydirectory is still open in the tests
+        yield wsi_paths, pm_paths, NUM_WSIS, list_labels
+
+@pytest.mark.skipif("RUNALL" not in os.environ, reason="this is a slow test (~5-10 min), run only if you want to")
+@pytest.mark.parametrize('create_sample_data', [0], indirect=True)
+def test_main_pytorch(create_sample_data):
+    global NUM_WSIS, PATCH_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM, NUM_EPOCHS
+    tiff_paths, tiff_path_pms, num_images, labels = create_sample_data
+    base: SuperpixelClassificationBase = SuperpixelClassificationTorch()
+
+    annotation_name = 'torchMNISTtest'
+    config = dict(
+        annotationDir = 'annotationdir',
+        annotationName = annotation_name,
+        batchSize = int(np.sqrt(NUM_IMAGES_PER_WSI)), # one row of the wsi at a time
+        certainty = 'batchbald',
+        cutoff = 600000, # plenty of space to allow all training samples
+        epochs = NUM_EPOCHS,
+        exclude = [],
+        feature = 'patch',
+        features = 'featuredir',
+        gensuperpixels = False,
+        girderApiUrl = 'http://localhost:8080/api/v1',
+        girderToken = '<PASSWORD>',
+        heatmaps = False,
+        images = 'imagedir',
+        labels = '',
+        magnification = 40.0,
+        modeldir = '',
+        numWorkers = 1,
+        patchSize = PATCH_SIZE,
+        radius    = MNIST_IMAGE_SIZE,
+        randominput = False,
+        split = 0.7,
+        train = True,
+        useCuda = True,
+        progress = True,
+    )
+    args = argparse.Namespace(**config)
+
+    mnist_labels = ['default', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
+
+    items = []
+    for i in range(NUM_WSIS):
+        bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[i][['x', 'y', 'w', 'h']].iterrows()]
+        elem = {
+            'girderId': f'test_girder_id{i}',
+            'categories': [
+                {"label": c} for c in mnist_labels
+                ],
+            'values': labels[i]['value'].tolist(),
+            'user': {
+                'bbox':  [item for sublist in bboxes for item in sublist]
+            },
+            'transform': {'matrix': [[1.0]]}
+        }
+        item = {
+            '_id': f'test_file{i}',
+            'name': os.path.basename(tiff_paths[i]),
+            'largeImage': {'fileId': f'test_image_id{i}'},
+        }
+        mask_item = {
+            '_id': f'test_file{i}',
+            'name': '.tiff'.join(os.path.basename(tiff_path_pms[i]).split('.tiff')[:-1]),
+            'largeImage': {'fileId': f'test_mask_id{i}'},
+        }
+        annotrec = {
+            '_id': f'test_file{i}',
+            '_version': 0,
+            'annotation': {'name': 'TorchTest'},
+        }
+        items.append((item, annotrec, elem))
+
+
+    gc = MagicMock()
+    base.getItemsAndAnnotations = MagicMock(return_value=items)
+
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        def mv_to_dst(req_pth : str, dst : str):
+            if req_pth.startswith("test_"):
+                for f in tiff_paths + tiff_path_pms:
+                    dpath = os.path.join(dst, os.path.basename(f))
+                    if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst):
+                        shutil.copy(f, dst)
+                        print(f"Copied {f} to {dst}")
+            elif req_pth.startswith("feature"):
+                feature_files = glob.glob(os.path.join(tmpdirname, "*feature.h5"))
+                for f in feature_files:
+                    dpath = os.path.join(dst, os.path.basename(f))
+                    if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst):
+                        shutil.copy(f, dst)
+                        print(f"Copied {f} to {dst}")
+            elif req_pth.endswith("model"):
+                model_file = glob.glob(os.path.join(tmpdirname, f"*Model *{0}.pth"))[0]
+                shutil.copy(model_file, dst)
+            elif "modtraining" in req_pth:
+                model_file = glob.glob(os.path.join(tmpdirname, f"*ModTraining *{0}.h5"))[0]
+                shutil.copy(model_file, dst)
+            else:
+                print(f"Received unknown request path '{req_pth}'")
+            return {}
+
+        gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+        def mv_to_src(req, src, reference=None):
+            shutil.copy(src, tmpdirname)
+            print(f"Copied {src} to {tmpdirname}")
+            # each WSI gets two separate .anot files. The below if statement gives them unique filenames so we can reference later
+            if src.endswith(".anot"):
+                # extract the number at the end of req, which can look like "testfile1" or "testfile1000"
+                m = re.search(r'(\d+)$', req)
+                num = int(m.group(1))
+                s = os.path.basename(src).replace(".anot", f"_{num}.myanot")
+                shutil.copy(src, os.path.join(tmpdirname, s))
+                print(f"Also copied {s} to {tmpdirname}")
+            return {'_id': 'feature', 'name': os.path.basename(src)}
+        gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True)
+
+        gc.getItem = MagicMock(return_value=mask_item)
+
+        gc.listResource = MagicMock(return_value=[dict(name=f"{annotation_name}model", _id = 'model'), dict(name=f"{annotation_name}modtraining", _id = 'modtraining')])
+        gc.uploadFileToItem = MagicMock(side_effect=mv_to_src, return_value=True)
+        gc.getFolder = MagicMock(return_value=dict(name='test_folder', creatorId='creatorId', _id='test_folder_id'))
+
+        def list_file(req: str, limit: int = 0) -> iter:
+            if "modtraining" in req:
+                return iter([dict(name=req, _id = 'modtraining')])
+            else:
+                return iter([dict(name=req, _id='model')])
+        gc.listFile = MagicMock(side_effect=list_file)
+
+        base.main(args, gc)
+
+        for file in sorted(glob.glob(os.path.join(tmpdirname, f"*Predictions*.myanot"))):
+            assert os.path.exists(file)
+            with open(file, 'r') as f:
+                pred_json = json.load(f)
+                e = pred_json['elements'][0]
+                assert len(e['values']) == NUM_IMAGES_PER_WSI
+
+                assert len(e['user']['bbox']) == NUM_IMAGES_PER_WSI * 4 # 4 is for x,y,w,h
+
+                assert len(e['categories']) == len(mnist_labels) - 1 # -1 because we don't have a default category
+                assert len(e['user']['confidence']) == NUM_IMAGES_PER_WSI
+
+                # compare e['values'] to labels['values'], to make sure we've trained a valid model
+                # the order of the values is shuffled in the annotation file, the ordering is in e['categories']
+                file_num = int(file.split('Predictions_')[-1].split('.myanot')[0])
+                predicted_labels = np.array([e['categories'][c]['label'] for c in e['values']])
+                matches = (predicted_labels == np.array(list(map(str, labels[file_num]['value']))))
+                similarity = matches.sum() / len(matches)
+                expected_min_accuracy = 0.75
+                assert similarity > expected_min_accuracy, f"File {file}: Similarity between predicted values and GT is {similarity}, expected > {expected_min_accuracy}"
+                print(f"Similarity between predicted values and GT is {similarity}")
+
+@pytest.mark.skipif("RUNALL" not in os.environ, reason="this is a slow test (~1-10 min), run only if you want to")
+@pytest.mark.parametrize('create_sample_data', [0], indirect=True)
+def test_main_tf(create_sample_data):
+    global NUM_WSIS, PATCH_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM, NUM_EPOCHS
+    tiff_paths, tiff_path_pms, num_images, labels = create_sample_data
+    base: SuperpixelClassificationBase = SuperpixelClassificationTensorflow()
+
+    annotation_name = 'tensorflowMNISTtest'
+    config = dict(
+        annotationDir = 'annotationdir',
+        annotationName = annotation_name,
+        batchSize = int(np.sqrt(NUM_IMAGES_PER_WSI)), # one row of the wsi at a time
+        certainty = 'confidence',
+        cutoff = 600000, # plenty of space to allow all training samples
+        epochs = NUM_EPOCHS,
+        exclude = [],
+        feature = 'patch',
+        features = 'featuredir',
+        gensuperpixels = False,
+        girderApiUrl = 'http://localhost:8080/api/v1',
+        girderToken = '<PASSWORD>',
+        heatmaps = False,
+        images = 'imagedir',
+        labels = '',
+        magnification = 40.0,
+        modeldir = 'modeldir',
+        numWorkers = 1,
+        patchSize = PATCH_SIZE,
+        radius    = MNIST_IMAGE_SIZE,
+        randominput = False,
+        split = 0.7,
+        train = True,
+        useCuda = False,
+        progress = True,
+    )
+    args = argparse.Namespace(**config)
+
+    mnist_labels = ['default', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
+
+    items = []
+    for i in range(NUM_WSIS):
+        bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[i][['x', 'y', 'w', 'h']].iterrows()]
+        elem = {
+            'girderId': f'test_girder_id{i}',
+            'categories': [
+                {"label": c} for c in mnist_labels
+            ],
+            'values': labels[i]['value'].tolist(),
+            'user': {
+                'bbox':  [item for sublist in bboxes for item in sublist]
+            },
+            'transform': {'matrix': [[1.0]]}
+        }
+        item = {
+            '_id': f'test_file{i}',
+            'name': os.path.basename(tiff_paths[i]),
+            'largeImage': {'fileId': f'test_image_id{i}'},
+        }
+        mask_item = {
+            '_id': f'test_file{i}',
+            'name': '.tiff'.join(os.path.basename(tiff_path_pms[i]).split('.tiff')[:-1]),
+            'largeImage': {'fileId': f'test_mask_id{i}'},
+        }
+        annotrec = {
+            '_id': f'test_file{i}',
+            '_version': 0,
+            'annotation': {'name': 'TorchTest'},
+        }
+        items.append((item, annotrec, elem))
+
+
+    gc = MagicMock()
+    base.getItemsAndAnnotations = MagicMock(return_value=items)
+
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        def mv_to_dst(req_pth : str, dst : str):
+            if req_pth.startswith("test_"):
+                for f in tiff_paths + tiff_path_pms:
+                    dpath = os.path.join(dst, os.path.basename(f))
+                    if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst):
+                        shutil.copy(f, dst)
+                        print(f"MockDownload: Copied {f} to {dst}")
+            elif req_pth.startswith("feature"):
+                feature_files = glob.glob(os.path.join(tmpdirname, "*feature.h5"))
+                for f in feature_files:
+                    dpath = os.path.join(dst, os.path.basename(f))
+                    if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst):
+                        shutil.copy(f, dst)
+                        print(f"MockDownload: Copied {f} to {dst}")
+            elif req_pth.endswith("model"):
+                model_file = glob.glob(os.path.join(tmpdirname, f"*Model *{0}.h5"))[0]
+                shutil.copy(model_file, dst)
+            elif "modtraining" in req_pth:
+                model_file = glob.glob(os.path.join(tmpdirname, f"*ModTraining *{0}.h5"))[0]
+                shutil.copy(model_file, dst)
+            else:
+                raise RuntimeError(f"Received unknown request path '{req_pth}'")
+            return {}
+
+        gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+        def mv_to_src(req, src, reference=None):
+            shutil.copy(src, tmpdirname)
+            print(f"MockUpload: Copied {src} to {tmpdirname}")
+            # each WSI gets two separate .anot files. The below if statement gives them unique filenames so we can reference later
+            if src.endswith(".anot"):
+                # extract the number at the end of req, which can look like "testfile1" or "testfile1000"
+                m = re.search(r'(\d+)$', req)
+                num = int(m.group(1))
+                s = os.path.basename(src).replace(".anot", f"_{num}.myanot")
+                shutil.copy(src, os.path.join(tmpdirname, s))
+                print(f"Also copied {s} to {tmpdirname}")
+            return {'_id': 'feature', 'name': os.path.basename(src)}
+        gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True)
+
+        gc.getItem = MagicMock(return_value=mask_item)
+
+        modelName = f"{annotation_name} Model Epoch 0.h5"
+        modTrainingName = f"{annotation_name} ModTraining Epoch 0.h5"
+        gc.listResource = MagicMock(return_value=[dict(name=modelName, _id = 'model'), dict(name=modTrainingName, _id = 'modtraining')])
+        gc.uploadFileToItem = MagicMock(side_effect=mv_to_src, return_value=True)
+        gc.getFolder = MagicMock(return_value=dict(name='test_folder', creatorId='creatorId', _id='test_folder_id'))
+
+        def list_file(req: str, limit: int = 0) -> iter:
+            if "modtraining" in req:
+                return iter([dict(name=modTrainingName, _id = 'modtraining')])
+            else:
+                return iter([dict(name=modelName, _id='model')])
+        gc.listFile = MagicMock(side_effect=list_file)
+
+        base.main(args, gc)
+
+        for file in sorted(glob.glob(os.path.join(tmpdirname, f"*Predictions*.myanot"))):
+            assert os.path.exists(file)
+            with open(file, 'r') as f:
+                pred_json = json.load(f)
+                e = pred_json['elements'][0]
+                assert len(e['values']) == NUM_IMAGES_PER_WSI
+
+                assert len(e['user']['bbox']) == NUM_IMAGES_PER_WSI * 4 # 4 is for x,y,w,h
+
+                assert len(e['categories']) == len(mnist_labels) - 1 # exclude the default category
+                assert len(e['user']['confidence']) == NUM_IMAGES_PER_WSI
+
+                # compare e['values'] to labels['values'], to make sure we've trained a valid model
+                # the order of the values is shuffled in the annotation file, the ordering is in e['categories']
+                file_num = int(file.split('Predictions_')[-1].split('.myanot')[0])
+                predicted_labels = np.array([e['categories'][c]['label'] for c in e['values']])
+                matches = (predicted_labels == np.array(list(map(str, labels[file_num]['value']))))
+                similarity = matches.sum() / len(matches)
+                expected_min_accuracy = 0.75
+                assert similarity > expected_min_accuracy, f"File {file}: Similarity between predicted values and GT is {similarity}, expected > {expected_min_accuracy}"
+                print(f"Similarity between predicted values and GT is {similarity}")
+
+@pytest.mark.skipif("RUNALL" not in os.environ, reason="this is a slow test (~1-10 min), run only if you want to")
+@pytest.mark.parametrize('create_sample_data', [2], indirect=True)
+def test_main_tf_with_background(create_sample_data):
+    global NUM_WSIS, PATCH_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM, NUM_EPOCHS
+    tiff_paths, tiff_path_pms, num_images, labels = create_sample_data
+    base: SuperpixelClassificationBase = SuperpixelClassificationTensorflow()
+
+    annotation_name = 'tensorflowMNISTtest'
+    config = dict(
+        annotationDir = 'annotationdir',
+        annotationName = annotation_name,
+        batchSize = int(np.sqrt(NUM_IMAGES_PER_WSI)), # one row of the wsi at a time
+        certainty = 'confidence',
+        cutoff = 600000, # plenty of space to allow all training samples
+        epochs = NUM_EPOCHS,
+        exclude = [],
+        feature = 'patch',
+        features = 'featuredir',
+        gensuperpixels = False,
+        girderApiUrl = 'http://localhost:8080/api/v1',
+        girderToken = '<PASSWORD>',
+        heatmaps = False,
+        images = 'imagedir',
+        labels = '',
+        magnification = 40.0,
+        modeldir = 'modeldir',
+        numWorkers = 1,
+        patchSize = PATCH_SIZE,
+        radius    = MNIST_IMAGE_SIZE,
+        randominput = False,
+        split = 0.7,
+        train = True,
+        useCuda = False,
+        progress = True,
+    )
+    args = argparse.Namespace(**config)
+
+    mnist_labels = ['default', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
+
+    items = []
+    for i in range(NUM_WSIS):
+        bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[i][['x', 'y', 'w', 'h']].iterrows()]
+        elem = {
+            'girderId': f'test_girder_id{i}',
+            'categories': [
+                {"label": c} for c in mnist_labels
+            ],
+            'values': [0] + labels[i]['value'].tolist(),
+            'user': {
+                'bbox':  [0,0,1,1] + [item for sublist in bboxes for item in sublist]
+            },
+            'transform': {'matrix': [[1.0]]}
+        }
+        item = {
+            '_id': f'test_file{i}',
+            'name': os.path.basename(tiff_paths[i]),
+            'largeImage': {'fileId': f'test_image_id{i}'},
+        }
+        mask_item = {
+            '_id': f'test_file{i}',
+            'name': '.tiff'.join(os.path.basename(tiff_path_pms[i]).split('.tiff')[:-1]),
+            'largeImage': {'fileId': f'test_mask_id{i}'},
+        }
+        annotrec = {
+            '_id': f'test_file{i}',
+            '_version': 0,
+            'annotation': {'name': 'TorchTest'},
+        }
+        items.append((item, annotrec, elem))
+
+
+    gc = MagicMock()
+    base.getItemsAndAnnotations = MagicMock(return_value=items)
+
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        def mv_to_dst(req_pth : str, dst : str):
+            if req_pth.startswith("test_"):
+                for f in tiff_paths + tiff_path_pms:
+                    dpath = os.path.join(dst, os.path.basename(f))
+                    if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst):
+                        shutil.copy(f, dst)
+                        print(f"MockDownload: Copied {f} to {dst}")
+            elif req_pth.startswith("feature"):
+                feature_files = glob.glob(os.path.join(tmpdirname, "*feature.h5"))
+                for f in feature_files:
+                    dpath = os.path.join(dst, os.path.basename(f))
+                    if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst):
+                        shutil.copy(f, dst)
+                        print(f"MockDownload: Copied {f} to {dst}")
+            elif req_pth.endswith("model"):
+                model_file = glob.glob(os.path.join(tmpdirname, f"*Model *{0}.h5"))[0]
+                shutil.copy(model_file, dst)
+            elif "modtraining" in req_pth:
+                model_file = glob.glob(os.path.join(tmpdirname, f"*ModTraining *{0}.h5"))[0]
+                shutil.copy(model_file, dst)
+            else:
+                raise RuntimeError(f"Received unknown request path '{req_pth}'")
+            return {}
+
+        gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+        def mv_to_src(req, src, reference=None):
+            shutil.copy(src, tmpdirname)
+            print(f"MockUpload: Copied {src} to {tmpdirname}")
+            # each WSI gets two separate .anot files. The below if statement gives them unique filenames so we can reference later
+            if src.endswith(".anot"):
+                # extract the number at the end of req, which can look like "testfile1" or "testfile1000"
+                m = re.search(r'(\d+)$', req)
+                num = int(m.group(1))
+                s = os.path.basename(src).replace(".anot", f"_{num}.myanot")
+                shutil.copy(src, os.path.join(tmpdirname, s))
+                print(f"Also copied {s} to {tmpdirname}")
+            return {'_id': 'feature', 'name': os.path.basename(src)}
+        gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True)
+
+        gc.getItem = MagicMock(return_value=mask_item)
+
+        modelName = f"{annotation_name} Model Epoch 0.h5"
+        modTrainingName = f"{annotation_name} ModTraining Epoch 0.h5"
+        gc.listResource = MagicMock(return_value=[dict(name=modelName, _id = 'model'), dict(name=modTrainingName, _id = 'modtraining')])
+        gc.uploadFileToItem = MagicMock(side_effect=mv_to_src, return_value=True)
+        gc.getFolder = MagicMock(return_value=dict(name='test_folder', creatorId='creatorId', _id='test_folder_id'))
+
+        def list_file(req: str, limit: int = 0) -> iter:
+            if "modtraining" in req:
+                return iter([dict(name=modTrainingName, _id = 'modtraining')])
+            else:
+                return iter([dict(name=modelName, _id='model')])
+        gc.listFile = MagicMock(side_effect=list_file)
+
+        base.main(args, gc)
+
+        for file in sorted(glob.glob(os.path.join(tmpdirname, f"*Predictions*.myanot"))):
+            assert os.path.exists(file)
+            with open(file, 'r') as f:
+                pred_json = json.load(f)
+                e = pred_json['elements'][0]
+                assert len(e['values']) == NUM_IMAGES_PER_WSI + 1
+
+                assert len(e['user']['bbox']) == (NUM_IMAGES_PER_WSI + 1) * 4 # 4 is for x,y,w,h
+
+                assert len(e['categories']) == len(mnist_labels) - 1 # exclude the default category
+                assert len(e['user']['confidence']) == (NUM_IMAGES_PER_WSI + 1)
+
+                # compare e['values'] to labels['values'], to make sure we've trained a valid model
+                # the order of the values is shuffled in the annotation file, the ordering is in e['categories']
+                file_num = int(file.split('Predictions_')[-1].split('.myanot')[0])
+                predicted_labels = np.array([e['categories'][c]['label'] for c in e['values']])
+                assert e['values'][0] == 0, "Background should have prediction 0"
+                matches = (predicted_labels == np.array([e['values'][0]] + list(map(str, labels[file_num]['value']))))
+                similarity = matches.sum() / len(matches)
+                expected_min_accuracy = 0.75
+                assert similarity > expected_min_accuracy, f"File {file}: Similarity between predicted values and GT is {similarity}, expected > {expected_min_accuracy}"
+                print(f"Similarity between predicted values and GT is {similarity}")
diff --git a/superpixel_classification/SuperpixelClassification/tests/test_gen_superpixels.py b/superpixel_classification/SuperpixelClassification/tests/test_gen_superpixels.py
new file mode 100644
index 0000000..5fc814f
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/test_gen_superpixels.py
@@ -0,0 +1,164 @@
+import os
+import shutil
+import sys
+import tempfile
+from unittest.mock import MagicMock
+
+import h5py
+import large_image
+import numpy as np
+import pytest
+from PIL.Image import Image
+from tifffile import tifffile
+
+# make pythonpath work out of the box - although your editor may complain
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+from SuperpixelClassificationBase import SuperpixelClassificationBase
+from progress_helper import ProgressHelper
+from tests.generate_MNIST_image import create_mnist_image
+
+from xdg_base_dirs import ( xdg_cache_home, )
+
+NUM_IMAGES : int = 9
+IMAGE_SIZE : int = 16 # 16 is the smallest tile size for .TIFFs, although we could operate within a single tile, too.
+COLOR_DIM = 3
+
+
+def d_to_rgb(d):
+    r = d & 0xFF
+    g = (d >> 8) & 0xFF
+    b = (d >> 16) & 0xFF
+    return [r, g, b]
+
+@pytest.fixture(scope="session")
+def create_sample_data():
+    '''
+    Create a sample WSI for testing.
+    '''
+    global NUM_IMAGES, IMAGE_SIZE
+    num_images = NUM_IMAGES
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        output_filename = os.path.join(tmpdirname, "test.tiff")
+
+        if os.path.dirname(output_filename):
+            os.makedirs(os.path.dirname(output_filename), exist_ok=True)
+        if os.path.exists(output_filename):
+            os.remove(output_filename)
+
+        # Arrange the images in a grid (so num_images should be a number with an integer root)
+        tile_rows, tile_cols = int(np.sqrt(num_images)), int(np.sqrt(num_images))
+        tile_h, tile_w = 16, 16
+        grid_h, grid_w = tile_rows * tile_h, tile_cols * tile_w
+        base_image = np.zeros((grid_h, grid_w, 3), dtype=np.uint8)
+
+        vals = np.array([0, 127, 255], dtype=np.uint8)
+        colors = np.stack(np.meshgrid(vals, vals, vals), axis=-1).reshape(-1, 3)[:NUM_IMAGES]
+        images = np.tile(colors[:, None, None, :], (1, IMAGE_SIZE, IMAGE_SIZE, 1))
+
+        for idx, img in enumerate(images):
+            r = idx // tile_cols
+            c = idx % tile_cols
+            base_image[r*tile_h:(r+1)*tile_h, c*tile_w:(c+1)*tile_w, :] = img
+
+        pyramid = [base_image]
+        current = base_image.copy()
+        while min(current.shape) >= 64:
+            # Use Pillow to resize (ANTIALIAS gives good quality downsampling)
+            im = Image.fromarray(current)
+            new_w, new_h = current.shape[1] // 2, current.shape[0] // 2
+            if new_w < 1 or new_h < 1:
+                break
+            im_resized = im.resize((new_w, new_h))
+            current = np.array(im_resized)
+            pyramid.append(current)
+
+        # Save the image as a pyramidal TIFF.
+        # The base image is the main image and the pyramid list (excluding the base) is saved as subIFDs.
+        if os.path.dirname(output_filename):
+            os.makedirs(os.path.dirname(output_filename), exist_ok=True)
+        if os.path.exists(output_filename):
+            os.remove(output_filename)
+
+        with tifffile.TiffWriter(output_filename, bigtiff=False) as tif:
+            tif.write(pyramid[0],
+                      tile=(tile_w * 4, tile_h * 4),
+                      photometric='RGB',
+                      description='Whole-slide MNIST image at 40x magnification',
+                      subifds=pyramid[1:])
+        print(f"Pyramidal TIFF saved as {output_filename}")
+
+        # we use yield so that the temporarydirectory is still open in the tests
+        yield output_filename, images
+
+def test_gen_superpixel(create_sample_data):
+    global IMAGE_SIZE, COLOR_DIM
+    test_image_pth, test_images = create_sample_data
+    base = SuperpixelClassificationBase()
+
+    # Create test data
+    item = {
+        "_id": "test_item_id",
+        'largeImage': {'fileId': 'test_image_id'},
+        'name': test_image_pth,
+    }
+
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        if not os.path.exists(os.path.join(dst, test_image_pth)):
+            shutil.copy(test_image_pth, dst)
+            print(">>> Copied file from", test_image_pth, "to", dst)
+        return None
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    gc.getItem = MagicMock(return_value={'name': test_image_pth, 'largeImage': {'fileId': 'foobar'}})
+    def mv_to_src(_, src):
+        dst = os.path.dirname(test_image_pth)
+        if not os.path.exists(os.path.join(dst, src)):
+            shutil.copy(src, dst)
+            print(">>> Copied file from", src, "to", dst)
+        return {'itemId': 'uploaded_item_id'}
+    gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value={'_id': 'test_file_id'})
+    #gc.uploadFileToFolder = MagicMock(return_value={'_id': 'test_file_id'})
+
+    #bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[['x', 'y', 'w', 'h']].iterrows()]
+    bboxes = [[x, x, x + IMAGE_SIZE, x + IMAGE_SIZE] for x in range(0, NUM_IMAGES, IMAGE_SIZE)]
+
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items([item])
+        result = base.createSuperpixelsForItem(
+            gc=gc,
+            annotationName="TorchTest",
+            item=item,
+            radius=IMAGE_SIZE,
+            magnification=40,
+            annotationFolderId='annotation_folder_id',
+            userId="user_id",
+            prog=prog,
+        )
+
+    out_pixelmap_file = os.path.join(os.path.dirname(test_image_pth), '%s.pixelmap.tiff' % item['name'])
+    assert os.path.exists(out_pixelmap_file), f"Output file {out_pixelmap_file} does not exist"
+    x, y, x2, y2 = 0, 0, IMAGE_SIZE, IMAGE_SIZE
+    ts = large_image.getTileSource(test_image_pth)
+    orig_image = ts.getRegion(
+        region=dict(left=x, top=y, right=x2, bottom=y2),
+        format=large_image.tilesource.TILE_FORMAT_NUMPY
+    )[0]
+    # test that all values in orig_image is equal to 1
+    # TODO: waiting for another PR: want this to be 1
+    assert np.all(orig_image == 0)
+
+    feature_img = test_images[-1]
+    x, y, x2, y2 = IMAGE_SIZE * (IMAGE_SIZE - 1), IMAGE_SIZE * (IMAGE_SIZE - 1), IMAGE_SIZE * IMAGE_SIZE, IMAGE_SIZE * IMAGE_SIZE
+    ts = large_image.getTileSource(test_image_pth)
+    orig_image = ts.getRegion(
+        region=dict(left=x, top=y, right=x2, bottom=y2),
+        format=large_image.tilesource.TILE_FORMAT_NUMPY
+    )[0]
+    orig_image = orig_image.astype(feature_img.dtype)
+    # TODO: same as TODO above
+    assert np.all(orig_image == NUM_IMAGES - 1)
\ No newline at end of file
diff --git a/superpixel_classification/SuperpixelClassification/tests/test_predict.py b/superpixel_classification/SuperpixelClassification/tests/test_predict.py
new file mode 100644
index 0000000..9341a90
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/test_predict.py
@@ -0,0 +1,254 @@
+import json
+import os
+import shutil
+import tempfile
+from unittest.mock import MagicMock
+
+import h5py
+import numpy as np
+import pytest
+import torch
+
+# make pythonpath work out of the box - although your editor may complain
+import sys
+import os
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+from SuperpixelClassificationBase import SuperpixelClassificationBase
+from SuperpixelClassificationTorch import SuperpixelClassificationTorch, _BayesianPatchTorchModel
+from progress_helper import ProgressHelper
+from tests.validate_json_annotation import validate_json_file
+
+# currently, torch model only supports 100x100
+MNIST_IMAGE_SIZE=100
+COLOR_DIM = 3
+NUM_IMAGES = 64
+CUTOFF_IMAGES = 2
+
+@pytest.fixture(scope="session")
+def create_sample_data():
+    global NUM_IMAGES, CUTOFF_IMAGES
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        h5_path = os.path.join(tmpdirname, "test_data.h5")
+
+        images = np.random.randint(0, 255, size=(NUM_IMAGES - CUTOFF_IMAGES, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM), dtype=np.uint8)
+        indices = np.arange(NUM_IMAGES - CUTOFF_IMAGES)
+        assert images.shape[0] == indices.shape[0]
+
+        with h5py.File(h5_path, 'w') as f:
+            f.create_dataset('images', data=images)
+            f.create_dataset('used_indices', data=indices, dtype='i')
+
+        # we use yield so that the temporarydirectory is still open in the tests
+        yield h5_path
+
+'''
+This test checks to predictions on a dataset that is only labeled with two values of out ten categories.
+'''
+def test_subset_labels(create_sample_data):
+    global NUM_IMAGES, CUTOFF_IMAGES
+    h5_path = create_sample_data
+    base: SuperpixelClassificationBase = SuperpixelClassificationTorch()
+    base.certainty = 'batchbald'
+    base.feature_is_image = True
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        return shutil.copy(h5_path, dst)
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    gc.uploadFileToItem = MagicMock()
+
+    feature = {
+        '_id': '0',
+        'name': 'my_test_feature'
+    }
+    labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+    annotrec = {
+        'annotation': {
+            'attributes': {},
+            'name': 'TorchTest',
+        },
+    }
+
+    # make a list with values 1 and 3 in a random order, and NUM_IMAGES entries
+    value_list = [1, 3] * (NUM_IMAGES // 2)
+
+    elem = {
+        "type": "pixelmap",
+        "girderId": "6838aab654f0ca783ff03871",
+        "transform": {"matrix": [[1.0, 0], [0, 1.0]]},
+        'values': value_list,
+        'categories' : [{"label": k, "fillColor": "rgba(0,0,0,0)"} for k in labels],
+        "boundaries": True,
+        "id": "myid",
+        'user': { },
+    }
+
+    groups = { k: {"label": k, "fillColor": "rgba(0,0,0,0)", "strokeColor": "rgba(0,0,0,0)" } for k in labels }
+
+    device = torch.device("cpu")
+    model = _BayesianPatchTorchModel(len(labels), device)
+    model.device = device
+
+    items = [(feature, annotrec, elem)]
+    item = {'_id': 0, 'name': 'my_item', 'largeImage': {'fileId': 'test_image_id'}}
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items(items)
+
+        annotation_name = 'testannotation'
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            base.predictLabelsForItem(
+                gc=gc,
+                annotationName=annotation_name,
+                tempdir=tmpdirname,
+                model=model,
+                item=item,
+                annotrec=annotrec,
+                elem=elem,
+                feature=feature,
+                curEpoch=0,
+                userId='user_id',
+                labels=labels,
+                groups=groups,
+                makeHeatmaps=False,
+                radius=-1,
+                magnification=40.0,
+                certainty='batchbald',
+                batchSize=NUM_IMAGES,
+                use_cuda = False,
+                prog=prog,
+            )
+            out_pth = os.path.join(tmpdirname, '%s Epoch 0 Predictions.anot' % annotation_name)
+            assert os.path.exists(out_pth), "Output file %s does not exist" % out_pth
+            with open(out_pth, 'r') as f:
+                pred_json = json.load(f)
+                e = pred_json['elements'][0]
+                assert len(e['values']) == NUM_IMAGES
+                for i in range(1, CUTOFF_IMAGES):
+                    assert e['values'][-i] == 0, "Expected unknown/none label for cutoff images"
+                assert len(e['categories']) == len(labels)
+                assert len(e['user']['confidence']) == NUM_IMAGES
+                assert len(e['user']['categoryConfidence']) == NUM_IMAGES
+                assert len(e['user']['categoryConfidence'][0]) == len(labels)
+                assert len(e['user']['certainty']) == NUM_IMAGES
+                for i in range(1, CUTOFF_IMAGES):
+                    assert e['user']['certainty'][-i] > 10000, "Expected certainty to be very high for unlabeled samples to ensure they occur last in the AL filmstrip (DSA)"
+                assert 'percentiles' in e['user']['certainty_info']
+                assert 'cdf' in e['user']['certainty_info']
+
+            validate_json_file(out_pth)
+
+            out_pth = os.path.join(tmpdirname, '%s Epoch 1.anot' % annotation_name)
+            assert os.path.exists(out_pth), "Output file %s does not exist" % out_pth
+            with open(out_pth, 'r') as f:
+                annotation_file = json.load(f)
+                e = annotation_file['elements'][0]
+                assert len(e['values']) == NUM_IMAGES
+                assert len(e['categories']) == len(labels)
+
+            validate_json_file(out_pth)
+
+def test_predict_unlabeled_with_cutoff(create_sample_data):
+    global NUM_IMAGES, CUTOFF_IMAGES
+    h5_path = create_sample_data
+    base: SuperpixelClassificationBase = SuperpixelClassificationTorch()
+    base.certainty = 'batchbald'
+    base.feature_is_image = True
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        return shutil.copy(h5_path, dst)
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    gc.uploadFileToItem = MagicMock()
+
+    feature = {
+       '_id': '0',
+       'name': 'my_test_feature'
+    }
+    labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+    annotrec = {
+        'annotation': {
+            'attributes': {},
+            'name': 'TorchTest',
+        },
+    }
+
+    elem = {
+        "type": "pixelmap",
+        "girderId": "6838aab654f0ca783ff03871",
+        "transform": {"matrix": [[1.0, 0], [0, 1.0]]},
+        'values': [0] * NUM_IMAGES,
+        'categories' : [{"label": k, "fillColor": "rgba(0,0,0,0)"} for k in labels],
+        "boundaries": True,
+        "id": "myid",
+        'user': { },
+    }
+
+    groups = { k: {"label": k, "fillColor": "rgba(0,0,0,0)", "strokeColor": "rgba(0,0,0,0)" } for k in labels }
+
+    device = torch.device("cpu")
+    model = _BayesianPatchTorchModel(len(labels), device)
+    model.device = device
+
+    items = [(feature, annotrec, elem)]
+    item = {'_id': 0, 'name': 'my_item', 'largeImage': {'fileId': 'test_image_id'}}
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items(items)
+
+        annotation_name = 'testannotation'
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            base.predictLabelsForItem(
+                gc=gc,
+                annotationName=annotation_name,
+                tempdir=tmpdirname,
+                model=model,
+                item=item,
+                annotrec=annotrec,
+                elem=elem,
+                feature=feature,
+                curEpoch=0,
+                userId='user_id',
+                labels=labels,
+                groups=groups,
+                makeHeatmaps=False,
+                radius=-1,
+                magnification=40.0,
+                certainty='batchbald',
+                batchSize=NUM_IMAGES,
+                use_cuda = False,
+                prog=prog,
+            )
+            out_pth = os.path.join(tmpdirname, '%s Epoch 0 Predictions.anot' % annotation_name)
+            assert os.path.exists(out_pth), "Output file %s does not exist" % out_pth
+            with open(out_pth, 'r') as f:
+                pred_json = json.load(f)
+                e = pred_json['elements'][0]
+                assert len(e['values']) == NUM_IMAGES
+                for i in range(1, CUTOFF_IMAGES):
+                    assert e['values'][-i] == 0, "Expected unknown/none label for cutoff images"
+                assert len(e['categories']) == len(labels)
+                assert len(e['user']['confidence']) == NUM_IMAGES
+                assert len(e['user']['categoryConfidence']) == NUM_IMAGES
+                assert len(e['user']['categoryConfidence'][0]) == len(labels)
+                assert len(e['user']['certainty']) == NUM_IMAGES
+                for i in range(1, CUTOFF_IMAGES):
+                    assert e['user']['certainty'][-i] > 10000, "Expected certainty to be very high for unlabeled samples to ensure they occur last in the AL filmstrip (DSA)"
+                assert 'percentiles' in e['user']['certainty_info']
+                assert 'cdf' in e['user']['certainty_info']
+
+            validate_json_file(out_pth)
+
+            out_pth = os.path.join(tmpdirname, '%s Epoch 1.anot' % annotation_name)
+            assert os.path.exists(out_pth), "Output file %s does not exist" % out_pth
+            with open(out_pth, 'r') as f:
+                annotation_file = json.load(f)
+                e = annotation_file['elements'][0]
+                assert len(e['values']) == NUM_IMAGES
+                assert len(e['categories']) == len(labels)
+
+            validate_json_file(out_pth)
diff --git a/superpixel_classification/SuperpixelClassification/tests/test_tensorflow.py b/superpixel_classification/SuperpixelClassification/tests/test_tensorflow.py
new file mode 100644
index 0000000..1a40365
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/test_tensorflow.py
@@ -0,0 +1,93 @@
+import os
+import shutil
+import tempfile
+from unittest.mock import MagicMock
+
+import h5py
+import numpy as np
+import pytest
+
+# make pythonpath work out of the box - although your editor may complain
+import sys
+import os
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+from SuperpixelClassificationBase import SuperpixelClassificationBase
+from SuperpixelClassificationTensorflow import SuperpixelClassificationTensorflow
+from progress_helper import ProgressHelper
+
+MNIST_IMAGE_SIZE=28
+COLOR_DIM = 3
+NUM_IMAGES = 64
+
+@pytest.fixture(scope="session")
+def create_sample_data():
+    global NUM_IMAGES
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        h5_path = os.path.join(tmpdirname, "test_data.h5")
+        images = np.random.randint(0, 255, size=(NUM_IMAGES, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM), dtype=np.uint8)
+
+        with h5py.File(h5_path, 'w') as f:
+            f.create_dataset('images', data=images)
+            f.create_dataset('used_indices', data=np.arange(NUM_IMAGES - 2))
+
+        # we use yield so that that the temporarydirectory is still open in the tests
+        yield h5_path
+
+def test_train_model(create_sample_data):
+    global NUM_IMAGES
+    h5_path = create_sample_data
+    base: SuperpixelClassificationBase
+    base = SuperpixelClassificationTensorflow()
+    base.feature_is_image = True
+    base.certainty = 'not batchbald' # same as using tensorflow
+
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        return shutil.copy(h5_path, dst)
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    def mv_to_src(_, src):
+        dst = os.path.dirname(os.path.dirname(h5_path))
+        return shutil.copy(src, dst)
+    gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True)
+
+    labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+    elem = {
+        'girderId': 'test_girder_id',
+        'categories': [
+            {"label": c} for c in labels
+            ],
+        'values':
+            [] \
+            + np.random.randint(1, len(labels) - 1, size=(NUM_IMAGES - 2), dtype=np.uint8).tolist()
+            + [0, 0],  # last two images unlabeled
+        'transform': {'matrix': [[1.0]]}
+    }
+
+    item = {'_id': 'test_h5_file', 'name': 'test'}
+    annotrec = {'_id': '1', '_version': 0, 'annotation': {'name': 'TorchTest'}}
+    items = [(item, annotrec, elem)]
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items(items)
+        modelFile, modelTrainingFile = base.trainModel(
+            annotationName="TorchTest",
+            batchSize = 4,
+            epochs = 1,
+            excludeLabelList = [],
+            features={'test_h5_file': {'_id': 'feature_id', 'name': 'test_h5_file'}},
+            gc=gc,
+            itemsAndAnnot=items,
+            labelList = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
+            modelFolderId="test_folder_id",
+            prog=prog,
+            randomInput = False,
+            trainingSplit = 0.5,
+            use_cuda = False,
+        )
+
+    assert os.path.exists(modelFile)
+    assert os.path.exists(modelTrainingFile)
\ No newline at end of file
diff --git a/superpixel_classification/SuperpixelClassification/tests/test_torch.py b/superpixel_classification/SuperpixelClassification/tests/test_torch.py
new file mode 100644
index 0000000..edb7dbc
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/test_torch.py
@@ -0,0 +1,94 @@
+import os
+import shutil
+import tempfile
+from unittest.mock import MagicMock
+
+import h5py
+import numpy as np
+import pytest
+
+# make pythonpath work out of the box - although your editor may complain
+import sys
+import os
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+from SuperpixelClassificationBase import SuperpixelClassificationBase
+from SuperpixelClassificationTorch import SuperpixelClassificationTorch
+from progress_helper import ProgressHelper
+
+# currently, torch model only supports 100x100
+MNIST_IMAGE_SIZE=100
+COLOR_DIM = 3
+NUM_IMAGES = 64
+
+@pytest.fixture(scope="session")
+def create_sample_data():
+    global NUM_IMAGES
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        h5_path = os.path.join(tmpdirname, "test_data.h5")
+        images = np.random.randint(0, 255, size=(NUM_IMAGES, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM), dtype=np.uint8)
+
+        with h5py.File(h5_path, 'w') as f:
+            f.create_dataset('images', data=images)
+            f.create_dataset('used_indices', data=np.arange(NUM_IMAGES - 2))
+
+        # we use yield so that that the temporarydirectory is still open in the tests
+        yield h5_path
+
+def test_train_model(create_sample_data):
+    global NUM_IMAGES
+    h5_path = create_sample_data
+    base: SuperpixelClassificationBase
+    base = SuperpixelClassificationTorch()
+    base.feature_is_image = True
+    base.certainty = 'batchbald' # same as using torch
+
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        return shutil.copy(h5_path, dst)
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    def mv_to_src(_, src):
+        dst = os.path.dirname(os.path.dirname(h5_path))
+        return shutil.copy(src, dst)
+    gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True)
+
+    labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+    elem = {
+        'girderId': 'test_girder_id',
+        'categories': [
+            {"label": c} for c in labels
+            ],
+        'values':
+            [] \
+            + np.random.randint(1, len(labels) - 1, size=(NUM_IMAGES - 2), dtype=np.uint8).tolist()
+            + [0, 0],  # last two images unlabeled
+        'transform': {'matrix': [[1.0]]}
+    }
+
+    item = {'_id': 'test_h5_file', 'name': 'test'}
+    annotrec = {'_id': '1', '_version': 0, 'annotation': {'name': 'TorchTest'}}
+    items = [(item, annotrec, elem)]
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items(items)
+        modelFile, modelTrainingFile = base.trainModel(
+            annotationName="TorchTest",
+            batchSize = 4,
+            epochs = 1,
+            excludeLabelList = [],
+            features={'test_h5_file': {'_id': 'feature_id', 'name': 'test_h5_file'}},
+            gc=gc,
+            itemsAndAnnot=items,
+            labelList = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
+            modelFolderId="test_folder_id",
+            prog=prog,
+            randomInput = False,
+            trainingSplit = 0.5,
+            use_cuda = True,
+        )
+
+    assert os.path.exists(modelFile)
+    assert os.path.exists(modelTrainingFile)
diff --git a/superpixel_classification/SuperpixelClassification/tests/validate_json_annotation.py b/superpixel_classification/SuperpixelClassification/tests/validate_json_annotation.py
new file mode 100644
index 0000000..5f209ce
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/validate_json_annotation.py
@@ -0,0 +1,588 @@
+#!/usr/bin/env python
+'''
+This code is similar to girder_annotation/girder_large_image_annotation/models/annotation.py
+The meaning is to validate the json annotation file without having to use girder or large_image
+'''
+import argparse
+import json
+import logging
+import os
+import sys
+import jsonschema
+from tqdm import tqdm
+
+import copy
+
+def extendSchema(base, add):
+    extend = copy.deepcopy(base)
+    for key in add:
+        if key == 'required' and 'required' in base:
+            extend[key] = sorted(set(extend[key]) | set(add[key]))
+        elif key != 'properties' and 'properties' in base:
+            extend[key] = add[key]
+    if 'properties' in add:
+        extend['properties'].update(add['properties'])
+    return extend
+
+
+colorSchema = {
+    'type': 'string',
+    # We accept colors of the form
+    #   #rrggbb                 six digit RRGGBB hex
+    #   #rgb                    three digit RGB hex
+    #   #rrggbbaa               eight digit RRGGBBAA hex
+    #   #rgba                   four digit RGBA hex
+    #   rgb(255, 255, 255)      rgb decimal triplet
+    #   rgba(255, 255, 255, 1)  rgba quad with RGB in the range [0-255] and
+    #                           alpha [0-1]
+    'pattern': r'^(#([0-9a-fA-F]{3,4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})|'
+               r'rgb\(\d+,\s*\d+,\s*\d+\)|'
+               r'rgba\(\d+,\s*\d+,\s*\d+,\s*(\d?\.|)\d+\))$',
+}
+
+transformArray = {
+    'type': 'array',
+    'items': {
+        'type': 'array',
+        'minItems': 2,
+        'maxItems': 2,
+    },
+    'minItems': 2,
+    'maxItems': 2,
+    'description': 'A 2D matrix representing the transform of an '
+                    'image overlay.',
+}
+
+
+colorRangeSchema = {
+    'type': 'array',
+    'items': colorSchema,
+    'description': 'A list of colors',
+}
+
+rangeValueSchema = {
+    'type': 'array',
+    'items': {'type': 'number'},
+    'description': 'A weakly monotonic list of range values',
+}
+
+userSchema = {
+    'type': 'object',
+    'additionalProperties': True,
+}
+
+labelSchema = {
+    'type': 'object',
+    'properties': {
+        'value': {'type': 'string'},
+        'visibility': {
+            'type': 'string',
+            # TODO: change to True, False, None?
+            'enum': ['hidden', 'always', 'onhover'],
+        },
+        'fontSize': {
+            'type': 'number',
+            'exclusiveMinimum': 0,
+        },
+        'color': colorSchema,
+    },
+    'required': ['value'],
+    'additionalProperties': False,
+}
+
+groupSchema = {'type': 'string'}
+
+baseElementSchema = {
+    'type': 'object',
+    'properties': {
+        'id': {
+            'type': 'string',
+            'pattern': '^[0-9a-f]{24}$',
+        },
+        'type': {'type': 'string'},
+        # schema free field for users to extend annotations
+        'user': userSchema,
+        'label': labelSchema,
+        'group': groupSchema,
+    },
+    'required': ['type'],
+    'additionalProperties': True,
+}
+baseShapeSchema = extendSchema(baseElementSchema, {
+    'properties': {
+        'lineColor': colorSchema,
+        'lineWidth': {
+            'type': 'number',
+            'minimum': 0,
+        },
+    },
+})
+
+
+pixelmapCategorySchema = {
+    'type': 'object',
+    'properties': {
+        'fillColor': colorSchema,
+        'strokeColor': colorSchema,
+        'label': {
+            'type': 'string',
+            'description': 'A string representing the semantic '
+                           'meaning of regions of the map with '
+                           'the corresponding color.',
+        },
+        'description': {
+            'type': 'string',
+            'description': 'A more detailed explanation of the '
+                           'meaining of this category.',
+        },
+    },
+    'required': ['fillColor'],
+    'additionalProperties': False,
+}
+
+_annotationSchema = {
+    'type': 'object',
+    'properties': {
+        'value': colorSchema,
+        'id': colorSchema,
+        'label': {
+            'type': 'string',
+            'description': 'A string representing the semantic '
+                           'meaning of regions of the map with '
+                           'the corresponding color.',
+        },
+        'description': {
+            'type': 'string',
+            'description': 'A more detailed explanation of the '
+                           'meaining of this category.',
+        },
+    },
+    'required': ['fillColor'],
+    'additionalProperties': False,
+}
+
+
+overlaySchema = extendSchema(baseElementSchema, {
+    'properties': {
+        'type': {
+            'type': 'string',
+            'enum': ['image'],
+        },
+        'girderId': {
+            'type': 'string',
+            'pattern': '^[0-9a-f]{24}$',
+            'description': 'Girder item ID containing the image to '
+                            'overlay.',
+        },
+        'opacity': {
+            'type': 'number',
+            'minimum': 0,
+            'maximum': 1,
+            'description': 'Default opacity for this image overlay. Must '
+                            'be between 0 and 1. Defaults to 1.',
+        },
+        'hasAlpha': {
+            'type': 'boolean',
+            'description':
+                'If true, the image is treated assuming it has an alpha '
+                'channel.',
+        },
+        'transform': {
+            'type': 'object',
+            'description': 'Specification for an affine transform of the '
+                            'image overlay. Includes a 2D transform matrix, '
+                            'an X offset and a Y offset.',
+            'properties': {
+                'xoffset': {
+                    'type': 'number',
+                },
+                'yoffset': {
+                    'type': 'number',
+                },
+                'matrix': transformArray,
+            },
+        },
+    },
+    'required': ['girderId', 'type'],
+    'additionalProperties': False,
+    'description': 'An image overlay on top of the base resource.',
+})
+
+
+pixelmapSchema = extendSchema(overlaySchema, {
+    'properties': {
+        'type': {
+            'type': 'string',
+            'enum': ['pixelmap'],
+        },
+        'values': {
+            'type': 'array',
+            'items': {'type': 'integer'},
+            'description': 'An array where the indices '
+                           'correspond to pixel values in the '
+                           'pixel map image and the values are '
+                           'used to look up the appropriate '
+                           'color in the categories property.',
+        },
+        'categories': {
+            'type': 'array',
+            'items': pixelmapCategorySchema,
+            'description': 'An array used to map between the '
+                           'values array and color values. '
+                           'Can also contain semantic '
+                           'information for color values.',
+        },
+        'boundaries': {
+            'type': 'boolean',
+            'description': 'True if the pixelmap doubles pixel '
+                           'values such that even values are the '
+                           'fill and odd values the are stroke '
+                           'of each superpixel. If true, the '
+                           'length of the values array should be '
+                           'half of the maximum value in the '
+                           'pixelmap.',
+
+        },
+    },
+    'required': ['values', 'categories', 'boundaries'],
+    'additionalProperties': False,
+    'description': 'A tiled pixelmap to overlay onto a base resource.',
+})
+
+bboxSchema = extendSchema(overlaySchema, {
+    'properties': {
+        'type': {
+            'type': 'string',
+            'enum': ['bboxmap'],
+        },
+        'categories': {
+            'type': 'array',
+            'items': pixelmapCategorySchema,
+            'description': 'An array used to map between the '
+                           'values array and color values. '
+                           'Can also contain semantic '
+                           'information for color values.',
+        },
+        'annotations': {
+            'type': 'array',
+            'description': 'Value, id, and bounding box for each annotation',
+                'items': {
+                'type': 'object',
+                'additionalProperties': False,
+                'properties': {
+                    'value': {
+                        'type': 'integer',
+                    },
+                    'id': {
+                        'type': 'integer',
+                    },
+                    'bbox': {
+                        'type': 'array',
+                        'items': {'type': 'number'},
+                        'minItems': 4,
+                        'maxItems': 4,
+                        'description': 'Bounding box in the form '
+                                       '[left, top, right, bottom].',
+                    },
+                }
+            }
+        },
+        'boundaries': {
+            'type': 'boolean',
+            'description': 'True if the pixelmap doubles pixel '
+                           'values such that even values are the '
+                           'fill and odd values the are stroke '
+                           'of each superpixel. If true, the '
+                           'length of the values array should be '
+                           'half of the maximum value in the '
+                           'pixelmap.',
+
+        },
+    },
+    'required': ['categories', 'boundaries', 'annotations'],
+    'additionalProperties': True,
+    'description': 'A tiled pixelmap to overlay onto a base resource.',
+})
+
+annotationElementSchema = {
+    # Shape subtypes are mutually exclusive, so for efficiency, don't use
+    # 'oneOf'
+    'anyOf': [
+        pixelmapSchema,
+        bboxSchema,
+    ],
+}
+
+
+class AnnotationSchema:
+    annotationSchema = {
+        '$schema': 'http://json-schema.org/schema#',
+        'type': 'object',
+        'properties': {
+            'name': {
+                'type': 'string',
+                # TODO: Disallow empty?
+                'minLength': 1,
+            },
+            'description': {'type': 'string'},
+            'display': {
+                'type': 'object',
+                'properties': {
+                    'visible': {
+                        'type': ['boolean', 'string'],
+                        'enum': ['new', True, False],
+                        'description': 'This advises viewers on when the '
+                        'annotation should be shown.  If "new" (the default), '
+                        'show the annotation when it is first added to the '
+                        "system.  If false, don't show the annotation by "
+                        'default.  If true, show the annotation when the item '
+                        'is displayed.',
+                    },
+                },
+            },
+            'attributes': {
+                'type': 'object',
+                'additionalProperties': True,
+                'title': 'Image Attributes',
+                'description': 'Subjective things that apply to the entire '
+                               'image.',
+            },
+            'elements': {
+                'type': 'array',
+                'items': annotationElementSchema,
+                # We want to ensure unique element IDs, if they are set.  If
+                # they are not set, we assign them from Mongo.
+                'title': 'Image Markup',
+                'description': 'Subjective things that apply to a '
+                               'spatial region.',
+            },
+        },
+        'additionalProperties': False,
+    }
+
+
+
+    coordSchema = {
+        'type': 'array',
+        # TODO: validate that z==0 for now
+        'items': {
+            'type': 'number',
+        },
+        'minItems': 3,
+        'maxItems': 3,
+        'name': 'Coordinate',
+        # TODO: define origin for 3D images
+        'description': 'An X, Y, Z coordinate tuple, in base layer pixel '
+                       'coordinates, where the origin is the upper-left.',
+    }
+    coordValueSchema = {
+        'type': 'array',
+        'items': {
+            'type': 'number',
+        },
+        'minItems': 4,
+        'maxItems': 4,
+        'name': 'CoordinateWithValue',
+        'description': 'An X, Y, Z, value coordinate tuple, in base layer '
+                       'pixel coordinates, where the origin is the upper-left.',
+    }
+
+    colorSchema = {
+        'type': 'string',
+        # We accept colors of the form
+        #   #rrggbb                 six digit RRGGBB hex
+        #   #rgb                    three digit RGB hex
+        #   #rrggbbaa               eight digit RRGGBBAA hex
+        #   #rgba                   four digit RGBA hex
+        #   rgb(255, 255, 255)      rgb decimal triplet
+        #   rgba(255, 255, 255, 1)  rgba quad with RGB in the range [0-255] and
+        #                           alpha [0-1]
+        # TODO: make rgb and rgba spec validate that rgb is [0-255] and a is
+        # [0-1], rather than just checking if they are digits and such.
+        'pattern': r'^(#([0-9a-fA-F]{3,4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})|'
+                   r'rgb\(\d+,\s*\d+,\s*\d+\)|'
+                   r'rgba\(\d+,\s*\d+,\s*\d+,\s*(\d?\.|)\d+\))$',
+    }
+
+    colorRangeSchema = {
+        'type': 'array',
+        'items': colorSchema,
+        'description': 'A list of colors',
+    }
+
+    rangeValueSchema = {
+        'type': 'array',
+        'items': {'type': 'number'},
+        'description': 'A weakly monotonic list of range values',
+    }
+
+    userSchema = {
+        'type': 'object',
+        'additionalProperties': True,
+    }
+
+    labelSchema = {
+        'type': 'object',
+        'properties': {
+            'value': {'type': 'string'},
+            'visibility': {
+                'type': 'string',
+                # TODO: change to True, False, None?
+                'enum': ['hidden', 'always', 'onhover'],
+            },
+            'fontSize': {
+                'type': 'number',
+                'exclusiveMinimum': 0,
+            },
+            'color': colorSchema,
+        },
+        'required': ['value'],
+        'additionalProperties': False,
+    }
+
+    groupSchema = {'type': 'string'}
+
+    baseElementSchema = {
+        'type': 'object',
+        'properties': {
+            'id': {
+                'type': 'string',
+                'pattern': '^[0-9a-f]{24}$',
+            },
+            'type': {'type': 'string'},
+            # schema free field for users to extend annotations
+            'user': userSchema,
+            'label': labelSchema,
+            'group': groupSchema,
+        },
+        'required': ['type'],
+        'additionalProperties': True,
+    }
+    baseShapeSchema = extendSchema(baseElementSchema, {
+        'properties': {
+            'lineColor': colorSchema,
+            'lineWidth': {
+                'type': 'number',
+                'minimum': 0,
+            },
+        },
+    })
+
+    pointShapeSchema = extendSchema(baseShapeSchema, {
+        'properties': {
+            'type': {
+                'type': 'string',
+                'enum': ['point'],
+            },
+            'center': coordSchema,
+            'fillColor': colorSchema,
+        },
+        'required': ['type', 'center'],
+        'additionalProperties': False,
+    })
+
+    arrowShapeSchema = extendSchema(baseShapeSchema, {
+        'properties': {
+            'type': {
+                'type': 'string',
+                'enum': ['arrow'],
+            },
+            'points': {
+                'type': 'array',
+                'items': coordSchema,
+                'minItems': 2,
+                'maxItems': 2,
+            },
+            'fillColor': colorSchema,
+        },
+        'description': 'The first point is the head of the arrow',
+        'required': ['type', 'points'],
+        'additionalProperties': False,
+    })
+
+    circleShapeSchema = extendSchema(baseShapeSchema, {
+        'properties': {
+            'type': {
+                'type': 'string',
+                'enum': ['circle'],
+            },
+            'center': coordSchema,
+            'radius': {
+                'type': 'number',
+                'minimum': 0,
+            },
+            'fillColor': colorSchema,
+        },
+        'required': ['type', 'center', 'radius'],
+        'additionalProperties': False,
+    })
+
+    polylineShapeSchema = extendSchema(baseShapeSchema, {
+        'properties': {
+            'type': {
+                'type': 'string',
+                'enum': ['polyline'],
+            },
+            'points': {
+                'type': 'array',
+                'items': coordSchema,
+                'minItems': 2,
+            },
+            'fillColor': colorSchema,
+            'closed': {
+                'type': 'boolean',
+                'description': 'polyline is open if closed flag is '
+                               'not specified',
+            },
+            'holes': {
+                'type': 'array',
+                'description':
+                    'If closed is true, this is a list of polylines that are '
+                    'treated as holes in the base polygon. These should not '
+                    'cross each other and should be contained within the base '
+                    'polygon.',
+                'items': {
+                    'type': 'array',
+                    'items': coordSchema,
+                    'minItems': 3,
+                },
+            },
+        },
+        'required': ['type', 'points'],
+        'additionalProperties': False,
+    })
+
+
+def validate_annotation(annotation_dict):
+    validator = jsonschema.Draft6Validator(AnnotationSchema.annotationSchema)
+    validatorElement = jsonschema.Draft6Validator(AnnotationSchema.baseElementSchema)
+
+    validator.validate(annotation_dict)
+    for element in tqdm(annotation_dict['elements']):
+        validatorElement.validate(element)
+
+def validate_json_file(json_dst):
+    with open(json_dst, 'r') as f:
+        data = json.load(f)
+        validate_annotation(data)
+        # num_elem = len(data['elements'][0]['annotations'])
+        # if num_elem % 4 != 0:
+        #     raise ValueError(f"Number of elements ({num_elem}) is not a multiple of 4")
+        # num_values = len(data['elements'][0]['annotations'])
+        # if int(num_elem / 4) != num_values:
+        #     raise ValueError(f"Number of elements ({num_elem / 4}) does not match values ({num_values})")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Validate a json annotation file')
+    parser.add_argument('--input', default=os.path.join("out", "superpixel.anot"), type=str,
+                        help='Name of input json file with a pixelmap annotation"')
+    args = parser.parse_args()
+    # Call the function with the filenames
+    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+    if not os.path.exists(args.input):
+        logging.error(f"Annotation path {args.input} does not exist")
+        sys.exit(1)
+
+    validate_json_file(args.input)
+    logging.info("Done validating annotation ['%s']", args.input)
diff --git a/tools/inspect_image_feature_file.py b/tools/inspect_image_feature_file.py
new file mode 100644
index 0000000..a93d911
--- /dev/null
+++ b/tools/inspect_image_feature_file.py
@@ -0,0 +1,37 @@
+'''
+This script will open a feature file (.h5) and show a 3x3 grid of images.
+This tool is useful if you suspect that features are not extracted properly, for example due to erroneous mask values/indexing.
+'''
+
+import h5py
+import matplotlib.pyplot as plt
+import numpy as np
+import sys
+
+if len(sys.argv) > 0:
+    feature_file = sys.argv[1]
+else:
+    feature_file = "features.h5"
+
+# open the file
+with h5py.File(feature_file, "r") as f:
+    # get the images dataset
+    images = f["images"]
+    # get the first 9 images
+    images = images[:9]
+    # reshape the images to 3x3
+    #images = np.reshape(images, (3,3,100,100,3))
+    # transpose the images to 3x3
+    #images = np.transpose(images, (0,2,1,3,4))
+    # flatten the images to 9x100x100x3
+    #images = np.reshape(images, (9,100,100,3))
+
+    # hide axis from pyplot
+    plt.axis('off')
+
+    # plot the images
+    for i in range(9):
+        plt.subplot(3,3,i+1)
+        plt.imshow(images[i])
+    plt.show()
+    print(f"Image {i+1} is {images[i].shape}")