一、安裝logstash,直接解壓便可html
測試logstash是否能夠正常運行java
bin/logstash -e 'input { stdin { } } output { stdout {codec => rubydebug } }'
只獲取消息apache
bin/logstash -e 'input { stdin { } } output { stdout {codec => plain { format => "%{message}" } } }'
二、編寫logstash配置文件
二、1在logstash目錄下建立conf目錄
二、2在conf目錄下建立文件logstash.conf,內容以下ruby
input { file { type => "logs" path => "/home/hadoop/logs/*.log" discover_interval => 10 start_position => "beginning" } } output { kafka { codec => plain { format => "%{message}" } topic_id => "spark" } }
logstash input: https://www.elastic.co/guide/en/logstash/current/input-plugins.html
logstash output: https://www.elastic.co/guide/en/logstash/current/output-plugins.htmlide
三、啓動logstash採集數據oop
bin/logstash -f conf/logstash.conf
四、代碼測試
package bigdata.spark import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.{SparkContext, SparkConf} /** * Created by Administrator on 2017/4/28. */ object SparkStreamDemo { def main(args: Array[String]) { val conf = new SparkConf() conf.setAppName("spark_streaming") conf.setMaster("local[*]") val sc = new SparkContext(conf) sc.setCheckpointDir("D:/checkpoints") sc.setLogLevel("ERROR") val ssc = new StreamingContext(sc, Seconds(5)) val topics = Map("spark" -> 2) val lines = KafkaUtils.createStream(ssc, "m1:2181,m2:2181,m3:2181", "spark", topics).map(_._2) val ds1 = lines.flatMap(_.split(" ")).map((_, 1)) val ds2 = ds1.updateStateByKey[Int]((x:Seq[Int], y:Option[Int]) => { Some(x.sum + y.getOrElse(0)) }) ds2.print() ssc.start() ssc.awaitTermination() } }