val rawFile = sc.textFile("README.md") val words = rawFile.flatMap(line=>line.split(" ")) val wordNumber = words.map(w => (w,1)) val wordCounts = wordNumber.reduceByKey(_ + _) wordCounts.foreach(println) sc.textFile("README.md").flatMap(line => line.split(" ")).map(w => (w,1)).reduceByKey(_ + _).foreach(println) sc.textFile("README.md").flatMap(line => line.split(" ")).map(w => (w,1)).groupByKey().map((p:(String, Iterable[Int])) => (p._1,p._2.sum)).collect
SimpleApp import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.SparkConf object SimpleApp{ def main(args: Array[String]){ val logFile = "README.md" val conf = new SparkConf().setAppName("Simple Application") val sc = new SparkContext(conf) val logData = sc.textFile(logFile, 2).cache() val numAs = logData.filter(line => line.contains("a")).count() val numBs = logData.filter(line => line.coutains("b")).count() println("Lines with a:%s, Lines with b:%s".format(numAs, numBs)) } }
simple.sbtjava
name := "Simple Project"shell
version := "1.0"apache
scalaVersion = "2.11.4"spa
libraryDependencies += "org.apache.spark" %% "spark-core" % "1.4.0"scala
resolvers += "Akka Repository"code