Spark算子彙總-Cartesian

Cartesian算子操做。java

Github項目上已包含Spark全部操做DEMO。git

Java版本:github

package com.huangyueran.spark.operator;

import java.util.Arrays;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;

import scala.Tuple2;

/** * @category 兩個RDD進行笛卡爾積合併--The two RDD are Cartesian product merging * @author huangyueran * @time 2019-7-21 16:38:20 */
public class Cartesian {

	public static void main(String[] args) {
		/** * SparkConf:第一步建立一個SparkConf,在這個對象裏面能夠設置容許模式Local Standalone yarn * AppName(能夠在Web UI中看到) 還能夠設置Spark運行時的資源要求 */
		SparkConf conf = new SparkConf().setAppName("Cartesian").setMaster("local");

		/** * 基於SparkConf的對象能夠建立出來一個SparkContext Spark上下文 * SparkContext是通往集羣的惟一通道,SparkContext在建立的時候還會建立任務調度器 */
		JavaSparkContext sc = new JavaSparkContext(conf);

		cartesian(sc);
	}

	private static void cartesian(JavaSparkContext sc) {
	    List<String> names = Arrays.asList("張三", "李四", "王五");
	    List<Integer> scores = Arrays.asList(60, 70, 80);

	    JavaRDD<String> namesRDD = sc.parallelize(names);
	    JavaRDD<Integer> scoreRDD = sc.parallelize(scores);

	    /** * ===================================== * | 兩個RDD進行笛卡爾積合併 | * | The two RDD are Cartesian product merging | | * ===================================== */
	    JavaPairRDD<String, Integer> cartesianRDD = namesRDD.cartesian(scoreRDD);
	    
	    cartesianRDD.foreach(new VoidFunction<Tuple2<String, Integer>>() {
	        public void call(Tuple2<String, Integer> t) throws Exception {
	            System.out.println(t._1 + "\t" + t._2());
	        }
	    });
	}
	
}

 

Scala版本:apache

package com.hyr.spark.operator

import org.apache.spark.{SparkConf, SparkContext}

/** ***************************************************************************** * * @date 2019-08-07 17:04 * @author: <a href=mailto:huangyr>黃躍然</a> * @Description: 兩個RDD進行笛卡爾積合併--The two RDD are Cartesian product merging ******************************************************************************/
object Cartesian {


  def cartesian(sparkContext: SparkContext): Unit = {
    val names = List("張三", "李四", "王五")
    val scores = List(60, 70, 90)

    val namesRDD = sparkContext.parallelize(names)
    val scoresRDD = sparkContext.parallelize(scores)

    val cartesianRDD = namesRDD.cartesian(scoresRDD)

    cartesianRDD.foreach(tuple => {
      println("key:"+tuple._1+"\tvalue:"+tuple._2)
    })

  }

  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf setAppName "Cartesian" setMaster "local"
    val sparkContext = new SparkContext(sparkConf)

    cartesian(sparkContext)
  }

}

Github地址:https://github.com/huangyueranbbc/SparkDemo  api

相關文章
相關標籤/搜索