@@ -242,6 +242,70 @@ detectCommunitiesWithKCoreDecomposition() {
242242 calculateCommunityMetrics " ${@ } " " ${writePropertyName} "
243243}
244244
245+ # Node Embeddings using Fast Random Projection
246+ #
247+ # Required Parameters:
248+ # - dependencies_projection=...
249+ # Name prefix for the in-memory projection name for dependencies. Example: "package"
250+ # - dependencies_projection_node=...
251+ # Label of the nodes that will be used for the projection. Example: "Package"
252+ # - dependencies_projection_weight_property=...
253+ # Name of the node property that contains the dependency weight. Example: "weight"
254+ nodeEmbeddingsWithFastRandomProjectionForHDBSCAN () {
255+ local NODE_EMBEDDINGS_CYPHER_DIR=" ${CYPHER_DIR} /Node_Embeddings"
256+ local mutatePropertyName=" dependencies_projection_write_property=embeddingsFastRandomProjection"
257+ local embeddingsDimension=" dependencies_projection_embedding_dimension=2"
258+
259+ # Statistics
260+ # execute_cypher "${NODE_EMBEDDINGS_CYPHER_DIR}/Node_Embeddings_1a_Fast_Random_Projection_Estimate.cypher" "${@}" "${mutatePropertyName}" ${embeddingsDimension}
261+ # execute_cypher "${NODE_EMBEDDINGS_CYPHER_DIR}/Node_Embeddings_1b_Fast_Random_Projection_Statistics.cypher" "${@}" ${embeddingsDimension}
262+
263+ # Run the algorithm and write the result into the in-memory projection ("mutate")
264+ execute_cypher " ${NODE_EMBEDDINGS_CYPHER_DIR} /Node_Embeddings_1c_Fast_Random_Projection_Mutate.cypher" " ${@ } " " ${mutatePropertyName} " ${embeddingsDimension}
265+ }
266+
267+ # Community Detection using Hierarchical Density-Based Spatial Clustering (HDBSCAN) Algorithm
268+ #
269+ # Required Parameters:
270+ # - dependencies_projection=...
271+ # Name prefix for the in-memory projection name for dependencies. Example: "package"
272+ # - dependencies_projection_node=...
273+ # Label of the nodes that will be used for the projection. Example: "Package"
274+ # - dependencies_projection_weight_property=...
275+ # Name of the node property that contains the dependency weight. Example: "weight"
276+ # - dependencies_projection_node_embeddings_property=...
277+ # Name of the node property that contains node embeddings. Example: "embeddingsFastRandomProjection"
278+ #
279+ # Special Requirements:
280+ # - This algorithm needs a node property with an array of floats to compute clusters.
281+ # One possible way is to use node embeddings for that (like FastRP).
282+ detectCommunitiesWithHDBSCAN () {
283+ local COMMUNITY_DETECTION_CYPHER_DIR=" ${CYPHER_DIR} /Community_Detection"
284+ local PROJECTION_CYPHER_DIR=" ${CYPHER_DIR} /Dependencies_Projection"
285+
286+ local writePropertyName=" dependencies_projection_write_property=communityHdbscanLabel"
287+ local writeLabelName=" dependencies_projection_write_label=HDBSCAN"
288+
289+ # Statistics
290+ execute_cypher " ${COMMUNITY_DETECTION_CYPHER_DIR} /Community_Detection_11a_HDBSCAN_Estimate.cypher" " ${@ } " " ${writePropertyName} "
291+ execute_cypher " ${COMMUNITY_DETECTION_CYPHER_DIR} /Community_Detection_11b_HDBSCAN_Statistics.cypher" " ${@ } "
292+
293+ # Run the algorithm and write the result into the in-memory projection ("mutate")
294+ execute_cypher " ${COMMUNITY_DETECTION_CYPHER_DIR} /Community_Detection_11c_HDBSCAN_Mutate.cypher" " ${@ } " " ${writePropertyName} "
295+
296+ # Stream to CSV
297+ local nodeLabel
298+ nodeLabel=$( extractQueryParameter " dependencies_projection_node" " ${@ } " )
299+ execute_cypher " ${PROJECTION_CYPHER_DIR} /Dependencies_8_Stream_Mutated_Grouped.cypher" " ${@ } " " ${writePropertyName} " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} _Communities_HDBSCAN.csv"
300+
301+ # Update Graph (node properties and labels) using the already mutated property projection
302+ execute_cypher " ${PROJECTION_CYPHER_DIR} /Dependencies_9_Write_Mutated.cypher" " ${@ } " " ${writePropertyName} "
303+ execute_cypher " ${PROJECTION_CYPHER_DIR} /Dependencies_10_Delete_Label.cypher" " ${@ } " " ${writePropertyName} " " ${writeLabelName} "
304+ execute_cypher " ${PROJECTION_CYPHER_DIR} /Dependencies_11_Add_Label.cypher" " ${@ } " " ${writePropertyName} " " ${writeLabelName} "
305+
306+ calculateCommunityMetrics " ${@ } " " ${writePropertyName} "
307+ }
308+
245309# Community Detection using the Approximate Maximum k-cut Algorithm
246310#
247311# Required Parameters:
@@ -402,6 +466,13 @@ detectCommunities() {
402466 time detectCommunitiesWithKCoreDecomposition " ${@ } "
403467 time detectCommunitiesWithApproximateMaximumKCut " ${@ } "
404468 time calculateLocalClusteringCoefficient " ${@ } "
469+
470+ # TODO Hard-wire build-in dependencies_projection_node_embeddings_property
471+ nodeEmbeddingsProperty=$( extractQueryParameter " dependencies_projection_node_embeddings_property" " ${@ } " )
472+ if [ -n " ${nodeEmbeddingsProperty} " ]; then
473+ time nodeEmbeddingsWithFastRandomProjectionForHDBSCAN " ${@ } "
474+ time detectCommunitiesWithHDBSCAN " ${@ } "
475+ fi
405476 compareCommunityDetectionResults " ${@ } "
406477 listAllResults " ${@ } "
407478}
@@ -415,7 +486,7 @@ ARTIFACT_GAMMA="dependencies_leiden_gamma=1.11" # default = 1.00
415486ARTIFACT_KCUT=" dependencies_maxkcut=5" # default = 2
416487
417488if createUndirectedDependencyProjection " ${ARTIFACT_PROJECTION} " " ${ARTIFACT_NODE} " " ${ARTIFACT_WEIGHT} " ; then
418- detectCommunities " ${ARTIFACT_PROJECTION} " " ${ARTIFACT_NODE} " " ${ARTIFACT_WEIGHT} " " ${ARTIFACT_GAMMA} " " ${ARTIFACT_KCUT} "
489+ detectCommunities " ${ARTIFACT_PROJECTION} " " ${ARTIFACT_NODE} " " ${ARTIFACT_WEIGHT} " " ${ARTIFACT_GAMMA} " " ${ARTIFACT_KCUT} " # "${ARTIFACT_NODE_EMBEDDINGS}"
419490 writeLeidenModularity " ${ARTIFACT_PROJECTION} " " ${ARTIFACT_NODE} " " ${ARTIFACT_WEIGHT} "
420491fi
421492
@@ -426,9 +497,10 @@ PACKAGE_NODE="dependencies_projection_node=Package"
426497PACKAGE_WEIGHT=" dependencies_projection_weight_property=weight25PercentInterfaces"
427498PACKAGE_GAMMA=" dependencies_leiden_gamma=1.14" # default = 1.00
428499PACKAGE_KCUT=" dependencies_maxkcut=20" # default = 2
500+ PACKAGE_NODE_EMBEDDINGS=" dependencies_projection_node_embeddings_property=embeddingsFastRandomProjection" # default = none
429501
430502if createUndirectedDependencyProjection " ${PACKAGE_PROJECTION} " " ${PACKAGE_NODE} " " ${PACKAGE_WEIGHT} " ; then
431- detectCommunities " ${PACKAGE_PROJECTION} " " ${PACKAGE_NODE} " " ${PACKAGE_WEIGHT} " " ${PACKAGE_GAMMA} " " ${PACKAGE_KCUT} "
503+ detectCommunities " ${PACKAGE_PROJECTION} " " ${PACKAGE_NODE} " " ${PACKAGE_WEIGHT} " " ${PACKAGE_GAMMA} " " ${PACKAGE_KCUT} " " ${PACKAGE_NODE_EMBEDDINGS} "
432504 writeLeidenModularity " ${PACKAGE_PROJECTION} " " ${PACKAGE_NODE} " " ${PACKAGE_WEIGHT} "
433505
434506 # Package Community Detection - Special CSV Queries after update
@@ -444,8 +516,7 @@ TYPE_GAMMA="dependencies_leiden_gamma=5.00" # default = 1.00
444516TYPE_KCUT=" dependencies_maxkcut=100" # default = 2
445517
446518if createUndirectedJavaTypeDependencyProjection " ${TYPE_PROJECTION} " ; then
447- detectCommunities " ${TYPE_PROJECTION} " " ${TYPE_NODE} " " ${TYPE_WEIGHT} " " ${TYPE_GAMMA} " " ${TYPE_KCUT} "
448-
519+ detectCommunities " ${TYPE_PROJECTION} " " ${TYPE_NODE} " " ${TYPE_WEIGHT} " " ${TYPE_GAMMA} " " ${TYPE_KCUT} " " ${TYPE_NODE_EMBEDDINGS} "
449520 # Type Community Detection - Special CSV Queries after update
450521 execute_cypher " ${CYPHER_DIR} /Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher" > " ${FULL_REPORT_DIRECTORY} /Type_Communities_Leiden_That_Span_Multiple_Artifacts.csv"
451522 execute_cypher " ${CYPHER_DIR} /Community_Detection/Type_communities_with_few_members_in_foreign_packages.cypher" > " ${FULL_REPORT_DIRECTORY} /Type_communities_with_few_members_in_foreign_packages.csv"
0 commit comments