Cartesian算子操做。java
Github項目上已包含Spark全部操做DEMO。git
Java版本:github
package com.huangyueran.spark.operator; import java.util.Arrays; import java.util.List; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.VoidFunction; import scala.Tuple2; /** * @category 兩個RDD進行笛卡爾積合併--The two RDD are Cartesian product merging * @author huangyueran * @time 2019-7-21 16:38:20 */ public class Cartesian { public static void main(String[] args) { /** * SparkConf:第一步建立一個SparkConf,在這個對象裏面能夠設置容許模式Local Standalone yarn * AppName(能夠在Web UI中看到) 還能夠設置Spark運行時的資源要求 */ SparkConf conf = new SparkConf().setAppName("Cartesian").setMaster("local"); /** * 基於SparkConf的對象能夠建立出來一個SparkContext Spark上下文 * SparkContext是通往集羣的惟一通道,SparkContext在建立的時候還會建立任務調度器 */ JavaSparkContext sc = new JavaSparkContext(conf); cartesian(sc); } private static void cartesian(JavaSparkContext sc) { List<String> names = Arrays.asList("張三", "李四", "王五"); List<Integer> scores = Arrays.asList(60, 70, 80); JavaRDD<String> namesRDD = sc.parallelize(names); JavaRDD<Integer> scoreRDD = sc.parallelize(scores); /** * ===================================== * | 兩個RDD進行笛卡爾積合併 | * | The two RDD are Cartesian product merging | | * ===================================== */ JavaPairRDD<String, Integer> cartesianRDD = namesRDD.cartesian(scoreRDD); cartesianRDD.foreach(new VoidFunction<Tuple2<String, Integer>>() { public void call(Tuple2<String, Integer> t) throws Exception { System.out.println(t._1 + "\t" + t._2()); } }); } }
Scala版本:apache
package com.hyr.spark.operator import org.apache.spark.{SparkConf, SparkContext} /** ***************************************************************************** * * @date 2019-08-07 17:04 * @author: <a href=mailto:huangyr>黃躍然</a> * @Description: 兩個RDD進行笛卡爾積合併--The two RDD are Cartesian product merging ******************************************************************************/ object Cartesian { def cartesian(sparkContext: SparkContext): Unit = { val names = List("張三", "李四", "王五") val scores = List(60, 70, 90) val namesRDD = sparkContext.parallelize(names) val scoresRDD = sparkContext.parallelize(scores) val cartesianRDD = namesRDD.cartesian(scoresRDD) cartesianRDD.foreach(tuple => { println("key:"+tuple._1+"\tvalue:"+tuple._2) }) } def main(args: Array[String]): Unit = { val sparkConf = new SparkConf setAppName "Cartesian" setMaster "local" val sparkContext = new SparkContext(sparkConf) cartesian(sparkContext) } }
Github地址:https://github.com/huangyueranbbc/SparkDemo api