sparkStreaming獲取kafka數據(java版本)

環境

spark-2.2.0

kafka_2.11-0.10.0.1

jdk1.8

配置好jdk,創建項目並將kafka和spark的jar包添加到項目中,除此之外還需要添加spark-streaming-kafka-*****.jar,筆者這裏用的是spark-streaming-kafka-0-10_2.11-2.2.0.jar,可在spark官網上自行下載

 1 import java.util.Arrays;
 2 import java.util.Collection;
 3 import java.util.HashMap;
 4 import java.util.Map;
 5 
 6 import org.apache.kafka.clients.consumer.ConsumerRecord;
 7 import org.apache.kafka.common.serialization.StringDeserializer;
 8 import org.apache.spark.SparkConf;
 9 import org.apache.spark.api.java.JavaPairRDD;
10 import org.apache.spark.api.java.JavaRDD;
11 import org.apache.spark.api.java.function.Function;
12 import org.apache.spark.api.java.function.PairFunction;
13 import org.apache.spark.api.java.function.VoidFunction;
14 import org.apache.spark.rdd.RDD;
15 import org.apache.spark.streaming.Durations;
16 import org.apache.spark.streaming.api.java.JavaDStream;
17 import org.apache.spark.streaming.api.java.JavaInputDStream;
18 import org.apache.spark.streaming.api.java.JavaPairDStream;
19 import org.apache.spark.streaming.api.java.JavaStreamingContext;
20 import org.apache.spark.streaming.kafka010.ConsumerStrategies;
21 import org.apache.spark.streaming.kafka010.KafkaUtils;
22 import org.apache.spark.streaming.kafka010.LocationStrategies;
23 
24 import scala.Tuple2;
25 
26 public class SparkStreamingFromkafka {
27 
28     public static void main(String[] args) throws Exception {
29         // TODO Auto-generated method stub
30         SparkConf sparkConf  = new SparkConf().setMaster("local[*]").setAppName("SparkStreamingFromkafka");
31         JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf , Durations.seconds(1));
32         
33         Map<String, Object> kafkaParams = new HashMap<>();
34         kafkaParams.put("bootstrap.servers", "192.168.246.134:9092");//多個可用ip可用","隔開
35         kafkaParams.put("key.deserializer", StringDeserializer.class);
36         kafkaParams.put("value.deserializer", StringDeserializer.class);
37         kafkaParams.put("group.id", "sparkStreaming");
38         Collection<String> topics = Arrays.asList("video");//配置topic,可以是數組
39 
40         JavaInputDStream<ConsumerRecord<String, String>> javaInputDStream =KafkaUtils.createDirectStream(
41                 streamingContext, 
42                 LocationStrategies.PreferConsistent(), 
43                 ConsumerStrategies.Subscribe(topics, kafkaParams));
44     
45         JavaPairDStream<String, String> javaPairDStream = javaInputDStream.mapToPair(new PairFunction<ConsumerRecord<String, String>, String, String>(){
46             private static final long serialVersionUID = 1L;
47             @Override
48             public Tuple2<String, String> call(ConsumerRecord<String, String> consumerRecord) throws Exception {
49                 return new Tuple2<>(consumerRecord.key(), consumerRecord.value());
50             }
51         });
52         javaPairDStream.foreachRDD(new VoidFunction<JavaPairRDD<String,String>>() {
53             @Override
54             public void call(JavaPairRDD<String, String> javaPairRDD) throws Exception {
55                 // TODO Auto-generated method stub
56                 javaPairRDD.foreach(new VoidFunction<Tuple2<String,String>>() {
57                     @Override
58                     public void call(Tuple2<String, String> tuple2)
59                             throws Exception {
60                         // TODO Auto-generated method stub
61                         System.out.println(tuple2._2);
62                     }
63                 });
64             }
65         });
66         streamingContext.start();
67         streamingContext.awaitTermination();
68     }
69 
70 }