From 566ff5fce9a66a809e78a2f103c18f3e297962b9 Mon Sep 17 00:00:00 2001 From: Julia Volmer Date: Mon, 10 Nov 2025 13:07:52 +0100 Subject: [PATCH] Add limited-traversal query on a binary tree with one supernode --- simple/{BIGvertices.js => binaryTrees.js} | 113 ++++++++++++++-------- simple/test.js | 42 ++++++-- 2 files changed, 103 insertions(+), 52 deletions(-) rename simple/{BIGvertices.js => binaryTrees.js} (53%) diff --git a/simple/BIGvertices.js b/simple/binaryTrees.js similarity index 53% rename from simple/BIGvertices.js rename to simple/binaryTrees.js index 159f8d0..6c60201 100644 --- a/simple/BIGvertices.js +++ b/simple/binaryTrees.js @@ -1,39 +1,3 @@ -// This script can create a binary tree in ArangoDB with relatively large -// vertex documents (approx. 1 MB each). You can give the depth and you can -// choose what type of graph to create. -// -// Usage: -// -// makeGraph("G", "V", "E") - creates a general graph with name G, vertex -// collection V and edge collection E -// makeTree(6, "V", "E") - creates the actual tree after the graph was created -// 6 is the depth and one has to name the vertex and -// edge collections -// -// The following AQL query and its performance could be of interest: -// -// FOR v IN 0..6 OUTBOUND "V/S1:K1" GRAPH "G" -// RETURN v.data -// -// This traverses the whole graph starting from the root but retrieves only -// a tiny part of the vertex data. This tests the 3.10 feature of -// traversal projections. You can see that it does this from this explain -// output for the above query: -// -// Query String (58 chars, cacheable: true): -// FOR v IN 0..6 OUTBOUND "V/S1:K1" GRAPH "G" -// RETURN v.data -// -// Execution plan: -// Id NodeType Site Est. Comment -// 1 SingletonNode COOR 1 * ROOT -// 2 TraversalNode COOR 64 - FOR v /* vertex (projections: `data`) */ IN 0..6 /* min..maxPathDepth */ OUTBOUND 'V/S1:K1' /* startnode */ GRAPH 'G' -// 3 CalculationNode COOR 64 - LET #3 = v.`data` /* attribute expression */ -// 4 ReturnNode COOR 64 - RETURN #3 -// -// In the line with Id 2 you can see that the TraversalNode uses a projection to the field `data`. -// - rand = require("internal").rand; time = require("internal").time; @@ -48,7 +12,7 @@ function makeRandomString(l) { return s; } -function makeGraph(graphName, vertexCollName, edgeCollName) { +function createGraph(graphName, vertexCollName, edgeCollName) { let graph = require("@arangodb/general-graph"); try { graph._drop(graphName, true); @@ -62,25 +26,50 @@ function makeKey(i) { return "S" + (i % 3) + ":K" + i; } -function makeTree(depth, vertexCollName, edgeCollName) { +// creates a binary tree where every vertex includes one megabyte of data +// +// The following AQL query and its performance could be of interest: +// +// FOR v IN 0..6 OUTBOUND "V/S1:K1" GRAPH "G" +// RETURN v.data +// +// This traverses the whole graph starting from the root but retrieves only +// a tiny part of the vertex data. This tests the 3.10 feature of +// traversal projections. You can see that it does this from this explain +// output for the above query: +// +// Query String (58 chars, cacheable: true): +// FOR v IN 0..6 OUTBOUND "V/S1:K1" GRAPH "G" +// RETURN v.smallData +// +// Execution plan: +// Id NodeType Site Est. Comment +// 1 SingletonNode COOR 1 * ROOT +// 2 TraversalNode COOR 64 - FOR v /* vertex (projections: `data`) */ IN 0..6 /* min..maxPathDepth */ OUTBOUND 'V/S1:K1' /* startnode */ GRAPH 'G' +// 3 CalculationNode COOR 64 - LET #3 = v.`smallData` /* attribute expression */ +// 4 ReturnNode COOR 64 - RETURN #3 +// +// In the line with Id 2 you can see that the TraversalNode uses a projection to the field `smallData`. +function makeTreeWithLargeData(graphName, vertexCollName, edgeCollName, depth) { + createGraph(graphName, vertexCollName, edgeCollName); let V = db._collection(vertexCollName); let E = db._collection(edgeCollName); + + // create vertices let klumpen = {}; for (let i = 0; i < 1000; ++i) { klumpen["K"+i] = makeRandomString(1024); } for (let i = 1; i <= 2 ** depth - 1; ++i) { let v = klumpen; - v.data = "D"+i; + v.smallData = "D"+i; v.smart = "S"+(i % 3); v._key = makeKey(i); V.insert(v); print("Have created", i, "vertices out of", 2 ** depth - 1); } - // This is now a gigabyte of data, one megabyte per vertex. - - // Make a binary tree: + // make a binary tree from these vertices for (let i = 1; i <= 2 ** (depth - 1) - 1; ++i) { let e = { _from: vertexCollName + "/" + makeKey(i), _to: vertexCollName + "/" + makeKey(2 * i)}; @@ -89,5 +78,45 @@ function makeTree(depth, vertexCollName, edgeCollName) { _to: vertexCollName + "/" + makeKey(2 * i + 1)}; E.insert(e); } +} + +// creates a binary tree with vertex 2 beeing a supernode +// 1 +// / \ +// 3 2 with additional superNodeSize neighbours +// / \ / \ +// 7 6 5 4 +// ... +function makeTreeWithSupernode(graphName, vertexCollName, edgeCollName, depth, superNodeSize) { + createGraph(graphName, vertexCollName, edgeCollName); + let V = db._collection(vertexCollName); + let E = db._collection(edgeCollName); + + // Add 2^depth - 1 vertices for tree and additionally superNodeSize vertices + let docs = [] + for (let i = 1; i <= 2**depth-1+superNodeSize; ++i) { + docs.push({data: "D"+i, smart: "S"+(i%3), _key: makeKey(i)}); + } + V.insert(docs); + + // make a binary tree from the first 2^depth - 1 vertices + docs = []; + for (let i = 1; i <= 2 ** (depth - 1) - 1; ++i) { + docs.push({ _from: vertexCollName + "/" + makeKey(i), + _to: vertexCollName + "/" + makeKey(2 * i)}); + docs.push({ _from: vertexCollName + "/" + makeKey(i), + _to: vertexCollName + "/" + makeKey(2 * i + 1)}); + } + E.insert(docs); + + // make vertex 2 a supernode + if (depth > 1) { + docs = []; + let key = makeKey(2); + for (let j=1; j <= superNodeSize; j++) { + docs.push({_from: vertexCollName + "/" + key, _to: vertexCollName + "/" + (2**depth - 1+j)}); + } + E.insert(docs); + } } diff --git a/simple/test.js b/simple/test.js index f1277c1..0d61135 100644 --- a/simple/test.js +++ b/simple/test.js @@ -1,6 +1,6 @@ "use strict"; /* jshint globalstrict:false, strict:false, maxlen: 500 */ -/* global GLOBAL, makeGraph, makeTree */ +/* global GLOBAL, makeTreeWithLargeData, makeTreeWithSupernode */ const internal = require("internal"); const arango = internal.arango; @@ -9,10 +9,10 @@ const fs = require("fs"); const semver = require("semver"); const _ = require("lodash"); const db = require("org/arangodb").db; -require("internal").load("simple/BIGvertices.js");// makeGraph, makeTree +require("internal").load("simple/binaryTrees.js");// makeTreeWithLargeData, makeTreeWithSupernode GLOBAL.returnValue = 0; - +var supernodeTreeDepth = 0; // required for supernode_limit test function sum (values) { if (values.length > 1) { @@ -513,16 +513,19 @@ exports.test = function (testParams) { createEdges(1000); } else if (testParams.small) { createEdges(10000); - makeGraph("Tree", "TreeV", "TreeE"); - makeTree(6, "TreeV", "TreeE"); + makeTreeWithLargeData("Tree", "TreeV", "TreeE", 6); + supernodeTreeDepth = 4; + makeTreeWithSupernode("Supernode", "SupernodeV", "SupernodeE", supernodeTreeDepth, 5000); } else if (testParams.medium) { createEdges(100000); - makeGraph("Tree", "TreeV", "TreeE"); - makeTree(7, "TreeV", "TreeE"); + makeTreeWithLargeData("Tree", "TreeV", "TreeE", 7); + supernodeTreeDepth = 5; + makeTreeWithSupernode("Supernode", "SupernodeV", "SupernodeE", supernodeTreeDepth, 10000); } else if (testParams.big) { createEdges(1000000); - makeGraph("Tree", "TreeV", "TreeE"); - makeTree(8, "TreeV", "TreeE"); + makeTreeWithLargeData("Tree", "TreeV", "TreeE", 8); + supernodeTreeDepth = 6; + makeTreeWithSupernode("Supernode", "SupernodeV", "SupernodeE", supernodeTreeDepth, 100000); } internal.wal.flush(true, true); @@ -1034,7 +1037,7 @@ exports.test = function (testParams) { traversalProjections = function (params) { // Note that depth 8 is good for all three sizes small (6), medium (7) // and big (8). Depending on the size, we create a different tree. - db._query(`FOR v IN 0..8 OUTBOUND "TreeV/S1:K1" GRAPH "Tree" RETURN v.data`, {}, {}, {silent}); + db._query(`FOR v IN 0..8 OUTBOUND "TreeV/S1:K1" GRAPH "Tree" RETURN v.smallData`, {}, {}, {silent}); }, outbound = function (params) { @@ -1153,6 +1156,21 @@ exports.test = function (testParams) { { silent } ); }, + supernode_limit = function (params) { + // limit output vertices and make sure that at least one of the supernodes's neighbours is in the result but not all of the supernode's neighbours + // dfs first enumerates all vertices in one half-tree, then the other all vertices in the other half-tree + // if the supernode is in the first half-tree, limit should be smaller than the number of supernode neighbours (is already assured by tree creation) + // if the supernode is in the second half-tree, limit should be at least the size of the first half-tree (2**(depth-1)) plus 2 to additionally enumerate the supernode and one of its neighbours + let limit = 2**(supernodeTreeDepth-1)+2; + db._query(`FOR v IN 0..8 OUTBOUND "SupernodeV/S1:K1" GRAPH "Supernode" LIMIT @limit RETURN v.data`, + { + limit: limit + }, + {}, + {silent} + ); + }, + // ///////////////////////////////////////////////////////////////////////////// // documentTests @@ -2619,6 +2637,10 @@ exports.test = function (testParams) { name: "k-shortest-any", params: { func: kShortestAny } }, + { + name: "supernode-traversal-limit", + params: { func: supernode_limit } + }, { name: "subquery-exists-path", params: { func: subqueryExistsPath }