From 566ff5fce9a66a809e78a2f103c18f3e297962b9 Mon Sep 17 00:00:00 2001
From: Julia Volmer <julia.volmer@arangodb.com>
Date: Mon, 10 Nov 2025 13:07:52 +0100
Subject: [PATCH] Add limited-traversal query on a binary tree with one
 supernode

---
 simple/{BIGvertices.js => binaryTrees.js} | 113 ++++++++++++++--------
 simple/test.js                            |  42 ++++++--
 2 files changed, 103 insertions(+), 52 deletions(-)
 rename simple/{BIGvertices.js => binaryTrees.js} (53%)

diff --git a/simple/BIGvertices.js b/simple/binaryTrees.js
similarity index 53%
rename from simple/BIGvertices.js
rename to simple/binaryTrees.js
index 159f8d0..6c60201 100644
--- a/simple/BIGvertices.js
+++ b/simple/binaryTrees.js
@@ -1,39 +1,3 @@
-// This script can create a binary tree in ArangoDB with relatively large
-// vertex documents (approx. 1 MB each). You can give the depth and you can
-// choose what type of graph to create.
-//
-// Usage:
-//
-// makeGraph("G", "V", "E")      - creates a general graph with name G, vertex
-//                                 collection V and edge collection E
-// makeTree(6, "V", "E") - creates the actual tree after the graph was created
-//                         6 is the depth and one has to name the vertex and
-//                         edge collections
-//
-// The following AQL query and its performance could be of interest:
-//
-// FOR v IN 0..6 OUTBOUND "V/S1:K1" GRAPH "G"
-//   RETURN v.data
-//
-// This traverses the whole graph starting from the root but retrieves only
-// a tiny part of the vertex data. This tests the 3.10 feature of
-// traversal projections. You can see that it does this from this explain
-// output for the above query:
-//
-// Query String (58 chars, cacheable: true):
-//  FOR v IN 0..6 OUTBOUND "V/S1:K1" GRAPH "G"
-//    RETURN v.data
-//
-// Execution plan:
-//  Id   NodeType          Site  Est.   Comment
-//   1   SingletonNode     COOR     1   * ROOT
-//   2   TraversalNode     COOR    64     - FOR v  /* vertex (projections: `data`) */ IN 0..6  /* min..maxPathDepth */ OUTBOUND 'V/S1:K1' /* startnode */  GRAPH 'G'
-//   3   CalculationNode   COOR    64       - LET #3 = v.`data`   /* attribute expression */
-//   4   ReturnNode        COOR    64       - RETURN #3
-//
-// In the line with Id 2 you can see that the TraversalNode uses a projection to the field `data`.
-//
-
 rand = require("internal").rand;
 time = require("internal").time;
 
@@ -48,7 +12,7 @@ function makeRandomString(l) {
   return s;
 }
 
-function makeGraph(graphName, vertexCollName, edgeCollName) {
+function createGraph(graphName, vertexCollName, edgeCollName) {
   let graph = require("@arangodb/general-graph");
   try {
     graph._drop(graphName, true);
@@ -62,25 +26,50 @@ function makeKey(i) {
   return "S" + (i % 3) + ":K" + i;
 }
 
-function makeTree(depth, vertexCollName, edgeCollName) {
+// creates a binary tree where every vertex includes one megabyte of data
+//
+// The following AQL query and its performance could be of interest:
+//
+// FOR v IN 0..6 OUTBOUND "V/S1:K1" GRAPH "G"
+//   RETURN v.data
+//
+// This traverses the whole graph starting from the root but retrieves only
+// a tiny part of the vertex data. This tests the 3.10 feature of
+// traversal projections. You can see that it does this from this explain
+// output for the above query:
+//
+// Query String (58 chars, cacheable: true):
+//  FOR v IN 0..6 OUTBOUND "V/S1:K1" GRAPH "G"
+//    RETURN v.smallData
+//
+// Execution plan:
+//  Id   NodeType          Site  Est.   Comment
+//   1   SingletonNode     COOR     1   * ROOT
+//   2   TraversalNode     COOR    64     - FOR v  /* vertex (projections: `data`) */ IN 0..6  /* min..maxPathDepth */ OUTBOUND 'V/S1:K1' /* startnode */  GRAPH 'G'
+//   3   CalculationNode   COOR    64       - LET #3 = v.`smallData`   /* attribute expression */
+//   4   ReturnNode        COOR    64       - RETURN #3
+//
+// In the line with Id 2 you can see that the TraversalNode uses a projection to the field `smallData`.
+function makeTreeWithLargeData(graphName, vertexCollName, edgeCollName, depth) {
+  createGraph(graphName, vertexCollName, edgeCollName);
   let V = db._collection(vertexCollName);
   let E = db._collection(edgeCollName);
+
+  // create vertices
   let klumpen = {};
   for (let i = 0; i < 1000; ++i) {
     klumpen["K"+i] = makeRandomString(1024);
   }
   for (let i = 1; i <= 2 ** depth - 1; ++i) {
     let v = klumpen;
-    v.data = "D"+i;
+    v.smallData = "D"+i;
     v.smart = "S"+(i % 3);
     v._key = makeKey(i);
     V.insert(v);
     print("Have created", i, "vertices out of", 2 ** depth - 1);
   }
 
-  // This is now a gigabyte of data, one megabyte per vertex.
-
-  // Make a binary tree:
+  // make a binary tree from these vertices
   for (let i = 1; i <= 2 ** (depth - 1) - 1; ++i) {
     let e = { _from: vertexCollName + "/" + makeKey(i), 
               _to: vertexCollName + "/" + makeKey(2 * i)};
@@ -89,5 +78,45 @@ function makeTree(depth, vertexCollName, edgeCollName) {
           _to: vertexCollName + "/" + makeKey(2 * i + 1)};
     E.insert(e);
   }
+}
+
+// creates a binary tree with vertex 2 beeing a supernode
+//        1
+//     /     \
+//    3       2  with additional superNodeSize neighbours
+//   / \     / \
+//  7   6   5   4
+//       ...
+function makeTreeWithSupernode(graphName, vertexCollName, edgeCollName, depth, superNodeSize) {
+  createGraph(graphName, vertexCollName, edgeCollName);
+  let V = db._collection(vertexCollName);
+  let E = db._collection(edgeCollName);
+
+  // Add 2^depth - 1 vertices for tree and additionally superNodeSize vertices
+  let docs = []
+  for (let i = 1; i <= 2**depth-1+superNodeSize; ++i) {
+      docs.push({data: "D"+i, smart: "S"+(i%3), _key: makeKey(i)});
+  }
+  V.insert(docs);
+
+  // make a binary tree from the first 2^depth - 1 vertices
+  docs = [];
+  for (let i = 1; i <= 2 ** (depth - 1) - 1; ++i) {
+      docs.push({ _from: vertexCollName + "/" + makeKey(i), 
+		  _to: vertexCollName + "/" + makeKey(2 * i)});
+      docs.push({ _from: vertexCollName + "/" + makeKey(i), 
+		  _to: vertexCollName + "/" + makeKey(2 * i + 1)});
 
+  }
+  E.insert(docs);
+
+  // make vertex 2 a supernode
+  if (depth > 1) {
+    docs = [];
+    let key = makeKey(2);
+    for (let j=1; j <= superNodeSize; j++) {
+      docs.push({_from: vertexCollName + "/" + key, _to: vertexCollName + "/" + (2**depth - 1+j)});
+    }
+    E.insert(docs);
+  }
 }
diff --git a/simple/test.js b/simple/test.js
index f1277c1..0d61135 100644
--- a/simple/test.js
+++ b/simple/test.js
@@ -1,6 +1,6 @@
 "use strict";
 /* jshint globalstrict:false, strict:false, maxlen: 500 */
-/* global GLOBAL, makeGraph, makeTree */
+/* global GLOBAL, makeTreeWithLargeData, makeTreeWithSupernode */
 
 const internal = require("internal");
 const arango = internal.arango;
@@ -9,10 +9,10 @@ const fs = require("fs");
 const semver = require("semver");
 const _ = require("lodash");
 const db = require("org/arangodb").db;
-require("internal").load("simple/BIGvertices.js");// makeGraph, makeTree
+require("internal").load("simple/binaryTrees.js");// makeTreeWithLargeData, makeTreeWithSupernode
 
 GLOBAL.returnValue = 0;
-
+var supernodeTreeDepth = 0; // required for supernode_limit test
 
 function sum (values) {
   if (values.length > 1) {
@@ -513,16 +513,19 @@ exports.test = function (testParams) {
         createEdges(1000);
       } else if (testParams.small) {
         createEdges(10000);
-        makeGraph("Tree", "TreeV", "TreeE");
-        makeTree(6, "TreeV", "TreeE");
+        makeTreeWithLargeData("Tree", "TreeV", "TreeE", 6);
+        supernodeTreeDepth = 4;
+        makeTreeWithSupernode("Supernode", "SupernodeV", "SupernodeE", supernodeTreeDepth, 5000);
       } else if (testParams.medium) {
         createEdges(100000);
-        makeGraph("Tree", "TreeV", "TreeE");
-        makeTree(7, "TreeV", "TreeE");
+        makeTreeWithLargeData("Tree", "TreeV", "TreeE", 7);
+        supernodeTreeDepth = 5;
+        makeTreeWithSupernode("Supernode", "SupernodeV", "SupernodeE", supernodeTreeDepth, 10000);
       } else if (testParams.big) {
         createEdges(1000000);
-        makeGraph("Tree", "TreeV", "TreeE");
-        makeTree(8, "TreeV", "TreeE");
+        makeTreeWithLargeData("Tree", "TreeV", "TreeE", 8);
+        supernodeTreeDepth = 6;
+        makeTreeWithSupernode("Supernode", "SupernodeV", "SupernodeE", supernodeTreeDepth, 100000);
       }
 
       internal.wal.flush(true, true);
@@ -1034,7 +1037,7 @@ exports.test = function (testParams) {
     traversalProjections = function (params) {
       // Note that depth 8 is good for all three sizes small (6), medium (7)
       // and big (8). Depending on the size, we create a different tree.
-      db._query(`FOR v IN 0..8 OUTBOUND "TreeV/S1:K1" GRAPH "Tree" RETURN v.data`, {}, {}, {silent});
+      db._query(`FOR v IN 0..8 OUTBOUND "TreeV/S1:K1" GRAPH "Tree" RETURN v.smallData`, {}, {}, {silent});
     },
 
     outbound = function (params) {
@@ -1153,6 +1156,21 @@ exports.test = function (testParams) {
         { silent }
       );
     },
+    supernode_limit = function (params) {
+      // limit output vertices and make sure that at least one of the supernodes's neighbours is in the result but not all of the supernode's neighbours
+      // dfs first enumerates all vertices in one half-tree, then the other all vertices in the other half-tree
+      // if the supernode is in the first half-tree, limit should be smaller than the number of supernode neighbours (is already assured by tree creation) 
+      // if the supernode is in the second half-tree, limit should be at least the size of the first half-tree (2**(depth-1)) plus 2 to additionally enumerate the supernode and one of its neighbours
+      let limit = 2**(supernodeTreeDepth-1)+2; 
+      db._query(`FOR v IN 0..8 OUTBOUND "SupernodeV/S1:K1" GRAPH "Supernode" LIMIT @limit RETURN v.data`,
+        {
+          limit: limit
+        },
+        {},
+        {silent}
+      );
+    },
+
 
     // /////////////////////////////////////////////////////////////////////////////
     // documentTests
@@ -2619,6 +2637,10 @@ exports.test = function (testParams) {
           name: "k-shortest-any",
           params: { func: kShortestAny }
         },
+        {
+          name: "supernode-traversal-limit",
+          params: { func: supernode_limit }
+        },
         {
           name: "subquery-exists-path",
           params: { func: subqueryExistsPath }