Skip to content
This repository was archived by the owner on Apr 22, 2020. It is now read-only.

Commit 034acb1

Browse files
authored
Adamic Adar and Resource Allocation Link Prediction Algorithms (#779)
* adamic adar algorithm * resource allocation * fix link * Rework adamic adar and source allocation * adamic adar docs * update docs * double check for no reason * docs for resource alloc * fix clash of ids * update svg * update svg * move link prediction algos into their own package * move package * update all packages * update description * no need for graph db in Similarities now * move everything into linkprediction package * don't need this test * put link prediction algos into content map * typo * better explanation * Add Resource Allocation to readme * Refactoring Adamic Adar to remove 'similarity' * Refactoring Resource Allocation to remove 'similarity' * Michael Resource Allocation feedback * Michael Adamic Adar feedback * unnecessary test now
1 parent a45d04e commit 034acb1

File tree

16 files changed

+806
-3
lines changed

16 files changed

+806
-3
lines changed
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/**
2+
* Copyright (c) 2017 "Neo4j, Inc." <http://neo4j.com>
3+
* <p>
4+
* This file is part of Neo4j Graph Algorithms <http://github.com/neo4j-contrib/neo4j-graph-algorithms>.
5+
* <p>
6+
* Neo4j Graph Algorithms is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU General Public License as published by
8+
* the Free Software Foundation, either version 3 of the License, or
9+
* (at your option) any later version.
10+
* <p>
11+
* This program is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU General Public License for more details.
15+
* <p>
16+
* You should have received a copy of the GNU General Public License
17+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
*/
19+
package org.neo4j.graphalgo.linkprediction;
20+
21+
import org.neo4j.graphalgo.core.ProcedureConfiguration;
22+
import org.neo4j.graphdb.Direction;
23+
import org.neo4j.graphdb.Node;
24+
import org.neo4j.graphdb.Relationship;
25+
import org.neo4j.graphdb.RelationshipType;
26+
import org.neo4j.procedure.Description;
27+
import org.neo4j.procedure.Name;
28+
import org.neo4j.procedure.UserFunction;
29+
30+
import java.util.*;
31+
32+
public class LinkPrediction {
33+
34+
@UserFunction("algo.linkprediction.adamicAdar")
35+
@Description("algo.linkprediction.adamicAdar(node1:Node, node2:Node, {relationshipQuery:'relationshipName', direction:'BOTH'}) " +
36+
"given two nodes, calculate Adamic Adar similarity")
37+
public double adamicAdarSimilarity(@Name("node1") Node node1, @Name("node2") Node node2,
38+
@Name(value = "config", defaultValue = "{}") Map<String, Object> config) {
39+
// https://en.wikipedia.org/wiki/Adamic/Adar_index
40+
41+
if (node1 == null || node2 == null) {
42+
throw new RuntimeException("Nodes must not be null");
43+
}
44+
45+
ProcedureConfiguration configuration = ProcedureConfiguration.create(config);
46+
RelationshipType relationshipType = configuration.getRelationship();
47+
Direction direction = configuration.getDirection(Direction.BOTH);
48+
49+
Set<Node> neighbors = findPotentialNeighbors(node1, relationshipType, direction);
50+
neighbors.removeIf(node -> noCommonNeighbors(node, relationshipType, direction, node2));
51+
return neighbors.stream().mapToDouble(nb -> 1.0 / Math.log(degree(relationshipType, direction, nb))).sum();
52+
}
53+
54+
@UserFunction("algo.linkprediction.resourceAllocation")
55+
@Description("algo.linkprediction.resourceAllocation(node1:Node, node2:Node, {relationshipQuery:'relationshipName', direction:'BOTH'}) " +
56+
"given two nodes, calculate Resource Allocation similarity")
57+
public double resourceAllocationSimilarity(@Name("node1") Node node1, @Name("node2") Node node2,
58+
@Name(value = "config", defaultValue = "{}") Map<String, Object> config) {
59+
// https://arxiv.org/pdf/0901.0553.pdf
60+
61+
if (node1 == null || node2 == null) {
62+
throw new RuntimeException("Nodes must not be null");
63+
}
64+
65+
ProcedureConfiguration configuration = ProcedureConfiguration.create(config);
66+
RelationshipType relationshipType = configuration.getRelationship();
67+
Direction direction = configuration.getDirection(Direction.BOTH);
68+
69+
Set<Node> neighbors = findPotentialNeighbors(node1, relationshipType, direction);
70+
neighbors.removeIf(node -> noCommonNeighbors(node, relationshipType, direction, node2));
71+
return neighbors.stream().mapToDouble(nb -> 1.0 / degree(relationshipType, direction, nb)).sum();
72+
}
73+
74+
private Set<Node> findPotentialNeighbors(@Name("node1") Node node1, RelationshipType relationshipType, Direction direction) {
75+
Set<Node> neighbors = new HashSet<>();
76+
77+
for (Relationship rel : loadRelationships(node1, relationshipType, direction)) {
78+
Node endNode = rel.getEndNode();
79+
neighbors.add(endNode);
80+
}
81+
return neighbors;
82+
}
83+
84+
private int degree(RelationshipType relationshipType, Direction direction, Node node) {
85+
return relationshipType == null ? node.getDegree(direction) : node.getDegree(relationshipType, direction);
86+
}
87+
88+
private Iterable<Relationship> loadRelationships(Node node, RelationshipType relationshipType, Direction direction) {
89+
return relationshipType == null ? node.getRelationships(direction) : node.getRelationships(relationshipType, direction);
90+
}
91+
92+
private boolean noCommonNeighbors(Node node, RelationshipType relationshipType, Direction direction, Node node2) {
93+
for (Relationship rel : loadRelationships(node, relationshipType, direction)) {
94+
if (rel.getOtherNode(node).equals(node2)) {
95+
return false;
96+
}
97+
}
98+
return true;
99+
}
100+
101+
102+
}

algo/src/main/java/org/neo4j/graphalgo/similarity/Similarities.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,13 @@
2222
import com.carrotsearch.hppc.LongDoubleMap;
2323
import com.carrotsearch.hppc.LongHashSet;
2424
import com.carrotsearch.hppc.LongSet;
25-
import com.carrotsearch.hppc.cursors.LongCursor;
2625
import org.neo4j.graphalgo.core.ProcedureConfiguration;
2726
import org.neo4j.graphalgo.core.utils.Intersections;
28-
import org.neo4j.procedure.*;
27+
import org.neo4j.procedure.Description;
28+
import org.neo4j.procedure.Name;
29+
import org.neo4j.procedure.UserAggregationFunction;
30+
import org.neo4j.procedure.UserFunction;
2931

30-
import java.util.HashMap;
3132
import java.util.HashSet;
3233
import java.util.List;
3334
import java.util.Map;
@@ -178,4 +179,5 @@ public double overlapSimilarity(@Name("vector1") List<Number> vector1, @Name("ve
178179
long denominator = Math.min(vector1.size(), vector2.size());
179180
return denominator == 0 ? 0 : (double) intersection / denominator;
180181
}
182+
181183
}

core/src/main/java/org/neo4j/graphalgo/core/ProcedureConfiguration.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.neo4j.graphalgo.core.utils.ParallelUtil;
3333
import org.neo4j.graphalgo.core.utils.Pools;
3434
import org.neo4j.graphdb.Direction;
35+
import org.neo4j.graphdb.RelationshipType;
3536

3637
import java.util.Collections;
3738
import java.util.HashMap;
@@ -276,6 +277,10 @@ public Direction getDirection(Direction defaultDirection) {
276277
return Directions.fromString(getDirectionName(defaultDirection.name()));
277278
}
278279

280+
public RelationshipType getRelationship() {
281+
return getRelationshipOrQuery() == null ? null : RelationshipType.withName(getRelationshipOrQuery());
282+
}
283+
279284
public String getGraphName(String defaultValue) {
280285
return getString(ProcedureConstants.GRAPH_IMPL_PARAM, defaultValue);
281286
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[[algorithms-linkprediction]]
2+
= Link Prediction algorithms
3+
4+
ifdef::env-docs[]
5+
[abstract]
6+
--
7+
This chapter provides explanations and examples for each of the link prediction algorithms in the Neo4j Graph Algorithms library.
8+
--
9+
endif::env-docs[]
10+
11+
These algorithms help determine the closeness of a pair of nodes.
12+
We would then use the computed scores as part of a link prediction solution:
13+
14+
* <<algorithms-linkprediction-adamic-adar, Adamic Adar Similarity>> (`algo.linkprediction.adamicAdar`)
15+
* <<algorithms-linkprediction-resource-allocation, Resource Allocation Similarity>> (`algo.linkprediction.resourceAllocation`)
16+
17+
include::linkprediction-adamic-adar.adoc[leveloffset=2]
18+
include::linkprediction-resource-allocation.adoc[leveloffset=2]

doc/asciidoc/index.adoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ The guide covers the following areas:
3232
* <<algorithms-community>> -- A detailed guide to each of the community detection algorithms, including use-cases and examples.
3333
* <<algorithms-path-finding>> -- A detailed guide to each of the path finding algorithms, including use-cases and examples.
3434
* <<algorithms-similarity>> -- A detailed guide to each of the similarity algorithms, including use-cases and examples.
35+
* <<algorithms-linkprediction>> -- A detailed guide to each of the link prediction algorithms, including use-cases and examples.
3536
* <<algorithms-preprocessing>> -- A detailed guide to each of the preprocessing functions and procedures.
3637
3738
@@ -76,6 +77,7 @@ include::algorithms-centrality.adoc[leveloffset=1]
7677
include::algorithms-community.adoc[leveloffset=1]
7778
include::algorithms-path-finding.adoc[leveloffset=1]
7879
include::algorithms-similarity.adoc[leveloffset=1]
80+
include::algorithms-link-prediction.adoc[leveloffset=1]
7981
include::algorithms-preprocessing.adoc[leveloffset=1]
8082

8183

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
[[algorithms-linkprediction-adamic-adar]]
2+
= The Adamic Adar algorithm
3+
4+
[abstract]
5+
--
6+
This section describes the Adamic Adar algorithm in the Neo4j Graph Algorithms library.
7+
--
8+
9+
// tag::introduction[]
10+
link:https://en.wikipedia.org/wiki/Adamic/Adar_index[Adamic Adar] is a measure used to compute the closeness of nodes based on their shared neighbors.
11+
// end::introduction[]
12+
13+
14+
[[algorithms-linkprediction-adamic-adar-context]]
15+
== History and explanation
16+
17+
// tag::explanation[]
18+
19+
The Adamic Adar algorithm was introduced in 2003 by Lada Adamic and Eytan Adar to https://www.semanticscholar.org/paper/Friends-and-neighbors-on-the-Web-Adamic-Adar/39348c10c90be968357e2a6b65d5e0e479307735[predict links in a social network^].
20+
It is computed using the following formula:
21+
22+
image::adamic-adar.svg[role="middle"]
23+
24+
where `N(u)` is the set of nodes adjacent to `u`.
25+
26+
A value of 0 indicates that two nodes are not close, while higher values indicate nodes are closer.
27+
28+
The library contains a function to calculate closeness between two nodes.
29+
30+
// end::explanation[]
31+
32+
33+
[[algorithms-linkprediction-adamic-adar-sample]]
34+
== Adamic Adar algorithm sample
35+
36+
.The following will create a sample graph:
37+
[source, cypher]
38+
----
39+
include::scripts/linkprediction-adamic-adar.cypher[tag=create-sample-graph]
40+
----
41+
42+
.The following will return the Adamic Adar score for Michael and Karin:
43+
[source, cypher]
44+
----
45+
include::scripts/linkprediction-adamic-adar.cypher[tag=all-rels]
46+
----
47+
48+
// tag::function[]
49+
.Results
50+
[opts="header",cols="1"]
51+
|===
52+
| `score`
53+
| 1.4426950408889634
54+
|===
55+
// end::function[]
56+
57+
58+
We can also compute the score of a pair of nodes based on a specific relationship type.
59+
60+
.The following will return the Adamic Adar score for Michael and Karin based only on the `FRIENDS` relationships:
61+
[source, cypher]
62+
----
63+
include::scripts/linkprediction-adamic-adar.cypher[tag=only-friends]
64+
----
65+
66+
// tag::function[]
67+
.Results
68+
[opts="header",cols="1"]
69+
|===
70+
| `score`
71+
| 0.0
72+
|===
73+
// end::function[]
74+
75+
76+
[[algorithms-linkprediction-adamic-adar-syntax]]
77+
== Syntax
78+
79+
.The following will run the algorithm and return the result:
80+
[source, cypher]
81+
----
82+
RETURN algo.linkprediction.adamicAdar(node1:Node, node2:Node, {
83+
relationshipQuery: null,
84+
direction: "BOTH"
85+
})
86+
----
87+
88+
89+
.Parameters
90+
[opts="header",cols="1,1,1,1,4"]
91+
|===
92+
| Name | Type | Default | Optional | Description
93+
| `node1` | Node | null | no | A node
94+
| `node2` | Node | null | no | Another node
95+
| `relationshipQuery` | String | null | yes | The relationship type used to compute similarity between `node1` and `node2`
96+
| `direction` | String | BOTH | yes | The direction of relationship type used to compute similarity between `node1` and `node2`
97+
|===
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
[[algorithms-linkprediction-resource-allocation]]
2+
= The Resource Allocation algorithm
3+
4+
[abstract]
5+
--
6+
This section describes the Resource Allocation algorithm in the Neo4j Graph Algorithms library.
7+
--
8+
9+
// tag::introduction[]
10+
link:https://arxiv.org/pdf/0901.0553.pdf[Resource Allocation] is a measure used to compute the closeness of nodes based on their shared neighbors.
11+
// end::introduction[]
12+
13+
14+
[[algorithms-linkprediction-resource-allocation-context]]
15+
== History and explanation
16+
17+
// tag::explanation[]
18+
19+
The Resource Allocation algorithm was introduced in 2009 by Tao Zhou, Linyuan Lü, and Yi-Cheng Zhang as part of a study to predict links in various networks.
20+
It is computed using the following formula:
21+
22+
image::resource-allocation.svg[role="middle"]
23+
24+
where `N(u)` is the set of nodes adjacent to `u`.
25+
26+
A value of 0 indicates that two nodes are not close, while higher values indicate nodes are closer.
27+
28+
The library contains a function to calculate closeness between two nodes.
29+
30+
// end::explanation[]
31+
32+
33+
[[algorithms-linkprediction-resource-allocation-sample]]
34+
== Resource Allocation algorithm sample
35+
36+
.The following will create a sample graph:
37+
[source, cypher]
38+
----
39+
include::scripts/linkprediction-resource-allocation.cypher[tag=create-sample-graph]
40+
----
41+
42+
.The following will return the Resource Allocaiton score for Michael and Karin:
43+
[source, cypher]
44+
----
45+
include::scripts/linkprediction-resource-allocation.cypher[tag=all-rels]
46+
----
47+
48+
// tag::function[]
49+
.Results
50+
[opts="header",cols="1"]
51+
|===
52+
| `score`
53+
| 0.5
54+
|===
55+
// end::function[]
56+
57+
58+
We can also compute the score of a pair of nodes based on a specific relationship type.
59+
60+
.The following will return the Resource Allocaiton score for Michael and Karin based only on the `FRIENDS` relationships:
61+
[source, cypher]
62+
----
63+
include::scripts/linkprediction-resource-allocation.cypher[tag=only-friends]
64+
----
65+
66+
// tag::function[]
67+
.Results
68+
[opts="header",cols="1"]
69+
|===
70+
| `score`
71+
| 0.0
72+
|===
73+
// end::function[]
74+
75+
76+
[[algorithms-linkprediction-resource-allocation-syntax]]
77+
== Syntax
78+
79+
.The following will run the algorithm and return the result:
80+
[source, cypher]
81+
----
82+
RETURN algo.linkprediction.resourceAllocation(node1:Node, node2:Node, {
83+
relationshipQuery: null,
84+
direction: "BOTH"
85+
})
86+
----
87+
88+
89+
.Parameters
90+
[opts="header",cols="1,1,1,1,4"]
91+
|===
92+
| Name | Type | Default | Optional | Description
93+
| `node1` | Node | null | no | A node
94+
| `node2` | Node | null | no | Another node
95+
| `relationshipQuery` | String | null | yes | The relationship type to use to compute similarity between `node1` and `node2`
96+
| `direction` | String | BOTH | yes | The direction of relationship type to use to compute similarity between `node1` and `node2`
97+
|===

0 commit comments

Comments
 (0)