Skip to content
This repository was archived by the owner on Apr 22, 2020. It is now read-only.

Commit f10151f

Browse files
authored
WIP: Personalized PageRank (#669)
* test showing just normal PageRank * PPR test WIP * wip * more wip * add calculation to huge as well * this factory doesn't seem to listen to my undirected suggestion * Fixing boxing * fix imports * fixing a bug in where we set alpha. Offset should be based on our startNode * filter unmapped nodes * updating docs to mention PPR * oops we don't need this
1 parent da626ba commit f10151f

File tree

11 files changed

+615
-89
lines changed

11 files changed

+615
-89
lines changed

algo/src/main/java/org/neo4j/graphalgo/PageRankProc.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.neo4j.graphalgo.impl.PageRankAlgorithm;
3434
import org.neo4j.graphalgo.results.PageRankScore;
3535
import org.neo4j.graphdb.Direction;
36+
import org.neo4j.graphdb.Node;
3637
import org.neo4j.kernel.api.KernelTransaction;
3738
import org.neo4j.kernel.internal.GraphDatabaseAPI;
3839
import org.neo4j.logging.Log;
@@ -42,6 +43,8 @@
4243
import org.neo4j.procedure.Name;
4344
import org.neo4j.procedure.Procedure;
4445

46+
import java.util.ArrayList;
47+
import java.util.List;
4548
import java.util.Map;
4649
import java.util.stream.IntStream;
4750
import java.util.stream.LongStream;
@@ -104,7 +107,7 @@ public Stream<PageRankScore> pageRankStream(
104107
@Name(value = "relationship", defaultValue = "") String relationship,
105108
@Name(value = "config", defaultValue = "{}") Map<String, Object> config) {
106109

107-
ProcedureConfiguration configuration = ProcedureConfiguration.create(config);
110+
ProcedureConfiguration configuration = ProcedureConfiguration.create(config);
108111

109112
PageRankScore.Stats.Builder statsBuilder = new PageRankScore.Stats.Builder();
110113
AllocationTracker tracker = AllocationTracker.create();
@@ -150,13 +153,19 @@ private Graph load(
150153
AllocationTracker tracker,
151154
Class<? extends GraphFactory> graphFactory,
152155
PageRankScore.Stats.Builder statsBuilder, ProcedureConfiguration configuration) {
153-
154156
GraphLoader graphLoader = new GraphLoader(api, Pools.DEFAULT)
155157
.init(log, label, relationship, configuration)
156158
.withAllocationTracker(tracker)
157-
.withDirection(Direction.OUTGOING)
158159
.withoutRelationshipWeights();
159160

161+
Direction direction = configuration.getDirection(Direction.OUTGOING);
162+
if (direction == Direction.BOTH) {
163+
graphLoader.asUndirected(true);
164+
} else {
165+
graphLoader.withDirection(direction);
166+
}
167+
168+
160169
try (ProgressTimer timer = statsBuilder.timeLoad()) {
161170
Graph graph = graphLoader.load(graphFactory);
162171
statsBuilder.withNodes(graph.nodeCount());
@@ -177,10 +186,14 @@ private PageRankResult evaluate(
177186
final int concurrency = configuration.getConcurrency(Pools.getNoThreadsInDefaultPool());
178187
log.debug("Computing page rank with damping of " + dampingFactor + " and " + iterations + " iterations.");
179188

189+
190+
List<Node> sourceNodes = configuration.get("sourceNodes", new ArrayList<>());
191+
LongStream sourceNodeIds = sourceNodes.stream().mapToLong(Node::getId);
180192
PageRankAlgorithm prAlgo = PageRankAlgorithm.of(
181193
tracker,
182194
graph,
183195
dampingFactor,
196+
sourceNodeIds,
184197
Pools.DEFAULT,
185198
concurrency,
186199
batchSize);
@@ -189,6 +202,7 @@ private PageRankResult evaluate(
189202
.withLog(log)
190203
.withTerminationFlag(terminationFlag);
191204

205+
192206
statsBuilder.timeEval(() -> prAlgo.compute(iterations));
193207

194208
statsBuilder

algo/src/main/java/org/neo4j/graphalgo/impl/HugePageRank.java

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,7 @@
2121
import com.carrotsearch.hppc.IntArrayList;
2222
import com.carrotsearch.hppc.LongArrayList;
2323
import org.neo4j.collection.primitive.PrimitiveLongIterator;
24-
import org.neo4j.graphalgo.api.HugeDegrees;
25-
import org.neo4j.graphalgo.api.HugeIdMapping;
26-
import org.neo4j.graphalgo.api.HugeNodeIterator;
27-
import org.neo4j.graphalgo.api.HugeRelationshipConsumer;
28-
import org.neo4j.graphalgo.api.HugeRelationshipIterator;
24+
import org.neo4j.graphalgo.api.*;
2925
import org.neo4j.graphalgo.core.utils.ParallelUtil;
3026
import org.neo4j.graphalgo.core.utils.paged.AllocationTracker;
3127
import org.neo4j.graphalgo.core.write.Exporter;
@@ -39,14 +35,11 @@
3935
import java.util.Iterator;
4036
import java.util.List;
4137
import java.util.concurrent.ExecutorService;
38+
import java.util.stream.LongStream;
4239

4340
import static org.neo4j.graphalgo.core.utils.ArrayUtil.binaryLookup;
4441
import static org.neo4j.graphalgo.core.utils.paged.AllocationTracker.humanReadable;
45-
import static org.neo4j.graphalgo.core.utils.paged.MemoryUsage.shallowSizeOfInstance;
46-
import static org.neo4j.graphalgo.core.utils.paged.MemoryUsage.sizeOfDoubleArray;
47-
import static org.neo4j.graphalgo.core.utils.paged.MemoryUsage.sizeOfIntArray;
48-
import static org.neo4j.graphalgo.core.utils.paged.MemoryUsage.sizeOfLongArray;
49-
import static org.neo4j.graphalgo.core.utils.paged.MemoryUsage.sizeOfObjectArray;
42+
import static org.neo4j.graphalgo.core.utils.paged.MemoryUsage.*;
5043

5144

5245
/**
@@ -111,6 +104,8 @@ public class HugePageRank extends Algorithm<HugePageRank> implements PageRankAlg
111104
private final HugeRelationshipIterator relationshipIterator;
112105
private final HugeDegrees degrees;
113106
private final double dampingFactor;
107+
private final HugeGraph graph;
108+
private LongStream sourceNodeIds;
114109

115110
private Log log;
116111
private ComputeSteps computeSteps;
@@ -121,21 +116,17 @@ public class HugePageRank extends Algorithm<HugePageRank> implements PageRankAlg
121116
*/
122117
HugePageRank(
123118
AllocationTracker tracker,
124-
HugeIdMapping idMapping,
125-
HugeNodeIterator nodeIterator,
126-
HugeRelationshipIterator relationshipIterator,
127-
HugeDegrees degrees,
128-
double dampingFactor) {
119+
HugeGraph graph,
120+
double dampingFactor,
121+
LongStream sourceNodeIds) {
129122
this(
130123
null,
131124
-1,
132125
ParallelUtil.DEFAULT_BATCH_SIZE,
133126
tracker,
134-
idMapping,
135-
nodeIterator,
136-
relationshipIterator,
137-
degrees,
138-
dampingFactor);
127+
graph,
128+
dampingFactor,
129+
sourceNodeIds);
139130
}
140131

141132
/**
@@ -148,20 +139,20 @@ public class HugePageRank extends Algorithm<HugePageRank> implements PageRankAlg
148139
int concurrency,
149140
int batchSize,
150141
AllocationTracker tracker,
151-
HugeIdMapping idMapping,
152-
HugeNodeIterator nodeIterator,
153-
HugeRelationshipIterator relationshipIterator,
154-
HugeDegrees degrees,
155-
double dampingFactor) {
142+
HugeGraph graph,
143+
double dampingFactor,
144+
LongStream sourceNodeIds) {
156145
this.executor = executor;
157146
this.concurrency = concurrency;
158147
this.batchSize = batchSize;
159148
this.tracker = tracker;
160-
this.idMapping = idMapping;
161-
this.nodeIterator = nodeIterator;
162-
this.relationshipIterator = relationshipIterator;
163-
this.degrees = degrees;
149+
this.idMapping = graph;
150+
this.nodeIterator = graph;
151+
this.relationshipIterator = graph;
152+
this.degrees = graph;
153+
this.graph = graph;
164154
this.dampingFactor = dampingFactor;
155+
this.sourceNodeIds = sourceNodeIds;
165156
}
166157

167158
/**
@@ -209,6 +200,7 @@ private void initializeSteps() {
209200
concurrency,
210201
idMapping.nodeCount(),
211202
dampingFactor,
203+
sourceNodeIds.map(graph::toHugeMappedNodeId).filter(mappedId -> mappedId != -1L).toArray(),
212204
relationshipIterator,
213205
degrees,
214206
partitions,
@@ -246,6 +238,7 @@ private ComputeSteps createComputeSteps(
246238
int concurrency,
247239
long nodeCount,
248240
double dampingFactor,
241+
long[] sourceNodeIds,
249242
HugeRelationshipIterator relationshipIterator,
250243
HugeDegrees degrees,
251244
List<Partition> partitions,
@@ -281,6 +274,7 @@ private ComputeSteps createComputeSteps(
281274

282275
computeSteps.add(new ComputeStep(
283276
dampingFactor,
277+
sourceNodeIds,
284278
relationshipIterator,
285279
degrees,
286280
tracker,
@@ -542,6 +536,7 @@ private static final class ComputeStep implements Runnable, HugeRelationshipCons
542536

543537
private long[] starts;
544538
private int[] lengths;
539+
private long[] sourceNodeIds;
545540
private final HugeRelationshipIterator relationshipIterator;
546541
private final HugeDegrees degrees;
547542
private final AllocationTracker tracker;
@@ -562,13 +557,15 @@ private static final class ComputeStep implements Runnable, HugeRelationshipCons
562557

563558
ComputeStep(
564559
double dampingFactor,
560+
long[] sourceNodeIds,
565561
HugeRelationshipIterator relationshipIterator,
566562
HugeDegrees degrees,
567563
AllocationTracker tracker,
568564
int partitionSize,
569565
long startNode) {
570566
this.dampingFactor = dampingFactor;
571567
this.alpha = 1.0 - dampingFactor;
568+
this.sourceNodeIds = sourceNodeIds;
572569
this.relationshipIterator = relationshipIterator.concurrentCopy();
573570
this.degrees = degrees;
574571
this.tracker = tracker;
@@ -606,8 +603,21 @@ private void initialize() {
606603
});
607604

608605
tracker.add(sizeOfDoubleArray(partitionSize) << 1);
606+
609607
double[] partitionRank = new double[partitionSize];
610-
Arrays.fill(partitionRank, alpha);
608+
if(sourceNodeIds.length == 0) {
609+
Arrays.fill(partitionRank, alpha);
610+
} else {
611+
Arrays.fill(partitionRank,0);
612+
613+
long[] partitionSourceNodeIds = LongStream.of(sourceNodeIds)
614+
.filter(sourceNodeId -> sourceNodeId >= startNode && sourceNodeId <= endNode)
615+
.toArray();
616+
617+
for (long sourceNodeId : partitionSourceNodeIds) {
618+
partitionRank[Math.toIntExact(sourceNodeId - this.startNode)] = alpha;
619+
}
620+
}
611621

612622
this.pageRank = partitionRank;
613623
this.deltas = Arrays.copyOf(partitionRank, partitionSize);

0 commit comments

Comments
 (0)