Skip to content
This repository was archived by the owner on Apr 22, 2020. It is now read-only.

Commit 205877b

Browse files
knutwalkerjexp
authored andcommitted
Improve Label Propagation (#432)
Orienting on [Near linear time algorithm to detect community structures in large-scale networks][1], change LPA as follows: - Randomize the order in which we calculate the nodes - Since we are using iterators and want to avoid the allocation of an extra random-order-array, we're randomly skipping and reinserting elements while iterating. This does not truly shuffle the order but just mixes it up a bit. - Run in asynchronous mode - We write and read to the result array directly during iteration, which achieves the asynchronous execution __per-thread__ - Across threads, we may read older data during a single iteration, but this is ok - We implicitly interpret a stale read as "not yet having been in this iteration" which further simulates shuffling of the input. It is as if this node would have been processed afterwards - This differes from the stale read issue in #270 in that it's only during a single iteration, not across multiple iterations - Early terminate if no changes have happened during an iteration - A change only occurs if a node does not have the label that most of its neighbours have - The paper suggest to confirm this sitaution after every itertation instead of using the 'label-has-changed' semantics, but this would require 2 passes per iteration, effectively doubling the runtime - For the sake of performace, we sacrifice situations on which we could converge earlier or at all for completing the algortihm faster Note that the algorithm does not guarantee a deterministic result, different invocatoins could lead to different clustering, based on the actual graph. This is as designed by the authors of the aforementioned paper. Fixes #270 [1]: https://arxiv.org/pdf/0709.2938.pdf * Sometime the cluster converges in just 2 iterations
1 parent af61d7a commit 205877b

File tree

7 files changed

+437
-69
lines changed

7 files changed

+437
-69
lines changed

algo/src/main/java/org/neo4j/graphalgo/LabelPropagationProc.java

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.neo4j.graphalgo;
22

3-
import com.carrotsearch.hppc.IntDoubleMap;
43
import org.neo4j.graphalgo.core.GraphLoader;
54
import org.neo4j.graphalgo.core.ProcedureConfiguration;
65
import org.neo4j.graphalgo.core.heavyweight.HeavyGraph;
@@ -10,7 +9,7 @@
109
import org.neo4j.graphalgo.core.utils.ProgressTimer;
1110
import org.neo4j.graphalgo.core.utils.TerminationFlag;
1211
import org.neo4j.graphalgo.core.write.Exporter;
13-
import org.neo4j.graphalgo.core.write.OptionalIntDoubleMapTranslator;
12+
import org.neo4j.graphalgo.core.write.IntArrayTranslator;
1413
import org.neo4j.graphalgo.impl.LabelPropagation;
1514
import org.neo4j.graphalgo.results.LabelPropagationStats;
1615
import org.neo4j.graphdb.Direction;
@@ -52,7 +51,7 @@ public final class LabelPropagationProc {
5251
@Description("CALL algo.labelPropagation(" +
5352
"label:String, relationship:String, direction:String, " +
5453
"{iterations:1, weightProperty:'weight', partitionProperty:'partition', write:true, concurrency:4}) " +
55-
"YIELD nodes, iterations, loadMillis, computeMillis, writeMillis, write, weightProperty, partitionProperty - " +
54+
"YIELD nodes, iterations, didConverge, loadMillis, computeMillis, writeMillis, write, weightProperty, partitionProperty - " +
5655
"simple label propagation kernel")
5756
public Stream<LabelPropagationStats> labelPropagation(
5857
@Name(value = "label", defaultValue = "") String label,
@@ -86,10 +85,7 @@ public Stream<LabelPropagationStats> labelPropagation(
8685
concurrency,
8786
stats);
8887

89-
IntDoubleMap labels = compute(direction, iterations, batchSize, concurrency, graph, stats);
90-
91-
stats.nodes(labels.size());
92-
88+
int[] labels = compute(direction, iterations, batchSize, concurrency, graph, stats);
9389
if (configuration.isWriteFlag(DEFAULT_WRITE) && partitionProperty != null) {
9490
write(concurrency, partitionProperty, graph, labels, stats);
9591
}
@@ -122,7 +118,7 @@ private HeavyGraph load(
122118
}
123119
}
124120

125-
private IntDoubleMap compute(
121+
private int[] compute(
126122
Direction direction,
127123
int iterations,
128124
int batchSize,
@@ -133,10 +129,18 @@ private IntDoubleMap compute(
133129
ExecutorService pool = batchSize > 0 ? Pools.DEFAULT : null;
134130
batchSize = Math.max(1, batchSize);
135131
final LabelPropagation labelPropagation = new LabelPropagation(graph, batchSize, concurrency, pool);
136-
final IntDoubleMap result = labelPropagation
137-
.withProgressLogger(ProgressLogger.wrap(log, "LabelPropagation"))
132+
labelPropagation
133+
.withProgressLogger(ProgressLogger.wrap(
134+
log,
135+
"LabelPropagation"))
138136
.withTerminationFlag(TerminationFlag.wrap(transaction))
139137
.compute(direction, iterations);
138+
final int[] result = labelPropagation.labels();
139+
140+
stats.iterations(labelPropagation.ranIterations());
141+
stats.didConverge(labelPropagation.didConverge());
142+
stats.nodes(result.length);
143+
140144
labelPropagation.release();
141145
graph.release();
142146
return result;
@@ -147,7 +151,7 @@ private void write(
147151
int concurrency,
148152
String partitionKey,
149153
HeavyGraph graph,
150-
IntDoubleMap labels,
154+
int[] labels,
151155
LabelPropagationStats.Builder stats) {
152156
stats.write(true);
153157
try (ProgressTimer timer = stats.timeWrite()) {
@@ -158,7 +162,7 @@ private void write(
158162
.write(
159163
partitionKey,
160164
labels,
161-
OptionalIntDoubleMapTranslator.INSTANCE
165+
IntArrayTranslator.INSTANCE
162166
);
163167
}
164168
}

0 commit comments

Comments
 (0)