add bfs algorithm (#24)

Nicole00 · web-flow · commit e3ca660176eb · 2021-12-15T14:13:09.000+08:00
diff --git a/nebula-algorithm/src/main/resources/application.conf b/nebula-algorithm/src/main/resources/application.conf
@@ -141,6 +141,12 @@
    # ClosenessAlgo parameter
    closeness:{}
 
+   # BFS parameter
+   bfs:{
+       maxIter:5
+       root:"10"
+   }
+
    # HanpAlgo parameter
    hanp:{
        hopAttenuation:0.1
diff --git a/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/Main.scala b/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/Main.scala
@@ -9,6 +9,7 @@ import com.vesoft.nebula.algorithm.config.Configs.Argument
 import com.vesoft.nebula.algorithm.config.{
   AlgoConfig,
   BetweennessConfig,
+  BfsConfig,
   CcConfig,
   CoefficientConfig,
   Configs,
@@ -23,8 +24,9 @@ import com.vesoft.nebula.algorithm.config.{
 }
 import com.vesoft.nebula.algorithm.lib.{
   BetweennessCentralityAlgo,
-  ClusteringCoefficientAlgo,
+  BfsAlgo,
   ClosenessAlgo,
+  ClusteringCoefficientAlgo,
   ConnectedComponentsAlgo,
   DegreeStaticAlgo,
   GraphTriangleCountAlgo,
@@ -185,6 +187,10 @@ object Main {
           val node2vecConfig = Node2vecConfig.getNode2vecConfig(configs)
           Node2vecAlgo(spark, dataSet, node2vecConfig, hasWeight)
         }
+        case "bfs" => {
+          val bfsConfig = BfsConfig.getBfsConfig(configs)
+          BfsAlgo(spark, dataSet, bfsConfig)
+        }
         case _ => throw new UnknownParameterException("unknown executeAlgo name.")
       }
     }
diff --git a/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/config/AlgoConfig.scala b/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/config/AlgoConfig.scala
@@ -167,6 +167,22 @@ object CoefficientConfig {
   }
 }
 
+/**
+  * bfs
+  */
+case class BfsConfig(maxIter: Int, root: Long)
+object BfsConfig {
+  var maxIter: Int = _
+  var root: Long   = _
+
+  def getBfsConfig(configs: Configs): BfsConfig = {
+    val bfsConfig = configs.algorithmConfig.map
+    maxIter = bfsConfig("algorithm.bfs.maxIter").toInt
+    root = bfsConfig("algorithm.bfs.root").toLong
+    BfsConfig(maxIter, root)
+  }
+}
+
 /**
   * Hanp
   */
diff --git a/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/config/Configs.scala b/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/config/Configs.scala
@@ -365,4 +365,5 @@ object AlgoConstants {
   val CLOSENESS_RESULT_COL: String          = "closeness"
   val HANP_RESULT_COL: String               = "hanp"
   val NODE2VEC_RESULT_COL: String           = "node2vec"
+  val BFS_RESULT_COL: String                = "bfs"
 }
diff --git a/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/lib/BfsAlgo.scala b/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/lib/BfsAlgo.scala
@@ -0,0 +1,77 @@
+/* Copyright (c) 2021 vesoft inc. All rights reserved.
+ *
+ * This source code is licensed under Apache 2.0 License.
+ */
+
+package com.vesoft.nebula.algorithm.lib
+
+import com.vesoft.nebula.algorithm.config.{AlgoConstants, BfsConfig}
+import com.vesoft.nebula.algorithm.utils.NebulaUtil
+import org.apache.log4j.Logger
+import org.apache.spark.graphx.{EdgeTriplet, Graph, VertexId}
+import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StructField, StructType}
+import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
+
+/**
+  * Breadth-First Search for un-weight graph
+  */
+object BfsAlgo {
+  private val LOGGER = Logger.getLogger(this.getClass)
+
+  val ALGORITHM: String = "BFS"
+
+  /**
+    * run the louvain algorithm for nebula graph
+    */
+  def apply(spark: SparkSession, dataset: Dataset[Row], bfsConfig: BfsConfig): DataFrame = {
+    val graph: Graph[None.type, Double] = NebulaUtil.loadInitGraph(dataset, false)
+    val bfsGraph                        = execute(graph, bfsConfig.maxIter, bfsConfig.root)
+
+    // filter out the not traversal vertices
+    val visitedVertices = bfsGraph.vertices.filter(v => v._2 != Double.PositiveInfinity)
+
+    val schema = StructType(
+      List(
+        StructField(AlgoConstants.ALGO_ID_COL, LongType, nullable = false),
+        StructField(AlgoConstants.BFS_RESULT_COL, DoubleType, nullable = true)
+      ))
+    val resultRDD = visitedVertices.map(vertex => Row(vertex._1, vertex._2))
+    val algoResult = spark.sqlContext
+      .createDataFrame(resultRDD, schema)
+      .orderBy(col(AlgoConstants.BFS_RESULT_COL))
+    algoResult
+  }
+
+  def execute(graph: Graph[None.type, Double], maxIter: Int, root: Long): Graph[Double, Double] = {
+    val initialGraph = graph.mapVertices(
+      (id, _) =>
+        if (id == root) 0.0
+        else Double.PositiveInfinity)
+
+    // vertex program
+    val vprog = { (id: VertexId, attr: Double, msg: Double) =>
+      math.min(attr, msg)
+    }
+
+    val sendMsg = { (triplet: EdgeTriplet[Double, Double]) =>
+      var iter: Iterator[(VertexId, Double)] = Iterator.empty
+      val isSrcMarked                        = triplet.srcAttr != Double.PositiveInfinity
+      val isDstMarked                        = triplet.dstAttr != Double.PositiveInfinity
+      if (!(isSrcMarked && isDstMarked)) {
+        if (isSrcMarked) {
+          iter = Iterator((triplet.dstId, triplet.srcAttr + 1))
+        } else {
+          iter = Iterator((triplet.srcId, triplet.dstAttr + 1))
+        }
+      }
+      iter
+    }
+
+    val mergeMsg = { (a: Double, b: Double) =>
+      math.min(a, b)
+    }
+
+    initialGraph.pregel(Double.PositiveInfinity, maxIter)(vprog, sendMsg, mergeMsg);
+  }
+}
diff --git a/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib/BfsAlgoSuite.scala b/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib/BfsAlgoSuite.scala
@@ -0,0 +1,22 @@
+/* Copyright (c) 2021 vesoft inc. All rights reserved.
+ *
+ * This source code is licensed under Apache 2.0 License.
+ */
+
+package com.vesoft.nebula.algorithm.lib
+
+import com.vesoft.nebula.algorithm.config.{BfsConfig, CcConfig}
+import org.apache.spark.sql.SparkSession
+import org.junit.Test
+
+class BfsAlgoSuite {
+  @Test
+  def bfsAlgoSuite(): Unit = {
+    val spark         = SparkSession.builder().master("local").getOrCreate()
+    val data          = spark.read.option("header", true).csv("src/test/resources/edge.csv")
+    val bfsAlgoConfig = new BfsConfig(5, 1)
+    val result        = BfsAlgo.apply(spark, data, bfsAlgoConfig)
+    result.show()
+    assert(result.count() == 4)
+  }
+}

Original file line number	Diff line number	Diff line change
`@@ -365,4 +365,5 @@ object AlgoConstants {`
`365`	`365`	`val CLOSENESS_RESULT_COL: String = "closeness"`
`366`	`366`	`val HANP_RESULT_COL: String = "hanp"`
`367`	`367`	`val NODE2VEC_RESULT_COL: String = "node2vec"`
	`368`	`+ val BFS_RESULT_COL: String = "bfs"`
`368`	`369`	`}`