1、自定義排序規則-封裝類apache
import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} /** * 實現自定義的排序 */ object MySort1 { def main(args: Array[String]): Unit = { //1.spark程序的入口 val conf: SparkConf = new SparkConf().setAppName("MySort1").setMaster("local[2]") val sc: SparkContext = new SparkContext(conf) //2.建立數組 val girl: Array[String] = Array("Mary,18,80","Jenny,22,100","Joe,30,80","Tom,18,78") //3.轉換RDD val grdd1: RDD[String] = sc.parallelize(girl) //4.切分數據 val grdd2: RDD[Girl] = grdd1.map(line => { val fields: Array[String] = line.split(",") //拿到每一個屬性 val name = fields(0) val age = fields(1).toInt val weight = fields(2).toInt //元組輸出 //(name, age, weight) new Girl(name, age, weight) }) // val sorted: RDD[(String, String, Int)] = grdd2.sortBy(t => t._2, false) // val r: Array[(String, String, Int)] = sorted.collect() // println(r.toBuffer) val sorted: RDD[Girl] = grdd2.sortBy(s => s) val r = sorted.collect() println(r.toBuffer) sc.stop() } } //自定義類 scala Ordered class Girl(val name: String, val age: Int, val weight: Int) extends Ordered[Girl] with Serializable { override def compare(that: Girl): Int = { //若是年齡相同 體重重的往前排 if(this.age == that.age){ //若是正數 正序 負數 倒序 -(this.weight - that.weight) }else{ //年齡小的往前排 this.age - that.age } } override def toString: String = s"名字:$name,年齡:$age,體重:$weight" }
結果:數組
2、ide
2、自定義排序規則-模式匹配this
import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.rdd.RDD object MySort2 { def main(args: Array[String]): Unit = { //1.spark程序的入口 val conf: SparkConf = new SparkConf().setAppName("MySort2").setMaster("local[2]") val sc: SparkContext = new SparkContext(conf) //2.建立數組 val girl: Array[String] = Array("Mary,18,80","Jenny,22,100","Joe,30,80","Tom,18,78") //3.轉換RDD val grdd1: RDD[String] = sc.parallelize(girl) //4.切分數據 val grdd2: RDD[(String, Int, Int)] = grdd1.map(line => { val fields: Array[String] = line.split(",") //拿到每一個屬性 val name = fields(0) val age = fields(1).toInt val weight = fields(2).toInt //元組輸出 (name, age, weight) }) //5.模式匹配方式進行排序 val sorted = grdd2.sortBy(s => Girl2(s._1, s._2, s._3)) val r = sorted.collect() println(r.toBuffer) sc.stop() } } //自定義類 scala Ordered case class Girl2(val name: String, val age: Int, val weight: Int) extends Ordered[Girl2] { override def compare(that: Girl2): Int = { //若是年齡相同 體重重的往前排 if(this.age == that.age){ //若是正數 正序 負數 倒序 -(this.weight - that.weight) }else{ //年齡小的往前排 this.age - that.age } } override def toString: String = s"名字:$name,年齡:$age,體重:$weight" }
結果:spa
3、scala
3、自定義排序規則-隱式轉換code
import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.rdd.RDD //定義一個專門處理隱式的類 object ImplicitRules { //定義隱式規則 implicit object OrderingGirl extends Ordering[Girl1]{ override def compare(x: Girl1, y: Girl1): Int = { if(x.age == y.age){ //體重重的往前排 -(x.weight - y.weight) }else{ //年齡小的往前排 x.age - y.age } } } } object MySort3 { def main(args: Array[String]): Unit = { //1.spark程序的入口 val conf: SparkConf = new SparkConf().setAppName("MySort3").setMaster("local[2]") val sc: SparkContext = new SparkContext(conf) //2.建立數組 val girl: Array[String] = Array("Mary,18,80","Jenny,22,100","Joe,30,80","Tom,18,78") //3.轉換RDD val grdd1: RDD[String] = sc.parallelize(girl) //4.切分數據 val grdd2 = grdd1.map(line => { val fields: Array[String] = line.split(",") //拿到每一個屬性 val name = fields(0) val age = fields(1).toInt val weight = fields(2).toInt //元組輸出 (name, age, weight) }) import ImplicitRules.OrderingGirl val sorted = grdd2.sortBy(s => Girl1(s._1, s._2, s._3)) val r = sorted.collect() println(r.toBuffer) sc.stop() } } //自定義類 scala Ordered case class Girl1(val name: String, val age: Int, val weight: Int)
結果:blog