首先本身造了一份簡單的社交關係的圖java
第一份是人物數據,id和姓名,person.txtnode
1 孫儷 2 鄧超 3 佟大爲 4 馮紹峯 5 黃曉明 6 angelababy 7 李冰冰 8 范冰冰
第二份是社交關係數據,兩我的的id和社交關係,social.txtapache
1 丈夫 2 2 妻子 1 1 搭檔 3 3 同窗 4 3 好友 5 5 好友 3 5 妻子 6 5 好友 7 7 好友 8
使用SparkX和GraphStream來處理數據bash
package graphx import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.graphx._ import org.apache.spark.rdd.RDD import org.graphstream.graph.implementations.{AbstractEdge, SingleGraph, SingleNode} /** * Created by common on 18-1-22. */ object GraphxLearning { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("GraphX").setMaster("local") val sc = new SparkContext(conf) val path1 = "input/graphx/person.txt" val path2 = "input/graphx/social.txt" // 頂點RDD[頂點的id,頂點的屬性值] val users: RDD[(VertexId, (String, String))] = sc.textFile(path1).map { line => val vertexId = line.split(" ")(0).toLong val vertexName = line.split(" ")(1) (vertexId, (vertexName, vertexName)) } // 邊RDD[起始點id,終點id,邊的屬性(邊的標註,邊的權重等)] val relationships: RDD[Edge[String]] = sc.textFile(path2).map { line => val arr = line.split(" ") val edge = Edge(arr(0).toLong, arr(2).toLong, arr(1)) edge } // 默認(缺失)用戶 //Define a default user in case there are relationship with missing user val defaultUser = ("John Doe", "Missing") //使用RDDs創建一個Graph(有許多創建Graph的數據來源和方法,後面會詳細介紹) val srcGraph = Graph(users, relationships, defaultUser) val graph: SingleGraph = new SingleGraph("graphDemo") // load the graphx vertices into GraphStream for ((id, name) <- srcGraph.vertices.collect()) { val node = graph.addNode(id.toString).asInstanceOf[SingleNode] node.addAttribute("ui.label", name._1) } // load the graphx edges into GraphStream edges for (Edge(x, y, relation) <- srcGraph.edges.collect()) { val edge = graph.addEdge(x.toString ++ y.toString, x.toString, y.toString, true).asInstanceOf[AbstractEdge] edge.addAttribute("ui.label", relation) } graph.setAttribute("ui.quality") graph.setAttribute("ui.antialias") graph.display() } }
可視化的結果,該圖數據節點數不多,原本想嘗試一份百萬節點的數據,結果遇到了爆內存的問題ui
後來發現爆內存是確定的,並且顯示的點太多也不太利於debug,解決方法是使用subgraph()方法來對圖進行裁剪以減少節點和邊的數量spa