第一步:編寫具體處理消息的類java
import java.io.UnsupportedEncodingException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import kafka.consumer.ConsumerIterator; import kafka.consumer.KafkaStream; import kafka.message.MessageAndMetadata; public class Consumerwork implements Runnable { private static Logger LOG = LoggerFactory.getLogger(Consumerwork.class); @SuppressWarnings("rawtypes") private KafkaStream m_stream; private int m_threadNumber; @SuppressWarnings("rawtypes") public Consumerwork(KafkaStream a_stream,int a_threadNumber) { // TODO Auto-generated constructor stub m_threadNumber = a_threadNumber; m_stream = a_stream; } @SuppressWarnings("unchecked") @Override public void run() { // TODO Auto-generated method stub ConsumerIterator<byte[], byte[]> it = m_stream.iterator(); while (it.hasNext()) try { MessageAndMetadata<byte[], byte[]> thisMetadata=it.next(); String jsonStr = new String(thisMetadata.message(),"utf-8") ; LOG.info("Thread " + m_threadNumber + ": " +jsonStr); LOG.info("partion"+thisMetadata.partition()+",offset:"+thisMetadata.offset()); try { Thread.sleep(1000); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
第二步:編寫啓動Consumer主類apache
import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Scanner; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import kafka.consumer.ConsumerConfig; import kafka.consumer.KafkaStream; import kafka.javaapi.consumer.ConsumerConnector; public class ConsumerGroup { private final ConsumerConnector consumer; private final String topic; private ExecutorService executor; private static Logger LOG = LoggerFactory.getLogger(ConsumerGroup.class); public ConsumerGroup(String a_zookeeper, String a_groupId, String a_topic) { consumer = kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig(a_zookeeper, a_groupId)); this.topic = a_topic; } public static void main(String[] args) { Scanner sc = new Scanner(System.in); System.out.println("請輸入zookeeper集羣地址(如zk1:2181,zk2:2181,zk3:2181):"); String zooKeeper = sc.nextLine(); System.out.println("請輸入指定的消費group名稱:"); String groupId = sc.nextLine(); System.out.println("請輸入指定的消費topic名稱:"); String topic = sc.nextLine(); System.out.println("請輸入指定的消費處理線程數:"); int threads = sc.nextInt(); LOG.info("Starting consumer kafka messages with zk:" + zooKeeper + " and the topic is " + topic); ConsumerGroup example = new ConsumerGroup(zooKeeper, groupId, topic); example.run(threads); try { Thread.sleep(1000); } catch (InterruptedException ie) { } // example.shutdown(); } private void shutdown() { // TODO Auto-generated method stub if (consumer != null) consumer.shutdown(); if (executor != null) executor.shutdown(); try { if (!executor.awaitTermination(5000, TimeUnit.MILLISECONDS)) { LOG.info("Timed out waiting for consumer threads to shut down, exiting uncleanly"); } } catch (InterruptedException e) { LOG.info("Interrupted during shutdown, exiting uncleanly"); } } private void run(int a_numThreads) { // TODO Auto-generated method stub Map<String, Integer> topicCountMap = new HashMap<String, Integer>(); topicCountMap.put(topic, new Integer(a_numThreads)); Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap); List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic); // now launch all the threads // executor = Executors.newFixedThreadPool(a_numThreads); // now create an object to consume the messages // int threadNumber = 0; LOG.info("the streams size is "+streams.size()); for (final KafkaStream stream : streams) { executor.submit(new com.goodix.kafka.oldconsumer.Consumerwork(stream, threadNumber)); // consumer.commitOffsets(); threadNumber++; } } private ConsumerConfig createConsumerConfig(String a_zookeeper, String a_groupId) { // TODO Auto-generated method stub Properties props = new Properties(); props.put("zookeeper.connect", a_zookeeper); props.put("group.id", a_groupId); props.put("zookeeper.session.timeout.ms", "60000"); props.put("zookeeper.sync.time.ms", "200"); props.put("auto.commit.interval.ms", "1000"); props.put("auto.offset.reset", "smallest"); // props.put("rebalance.max.retries", "5"); // props.put("rebalance.backoff.ms", "15000"); return new ConsumerConfig(props); } }
1. topicCountMap.put(topic, new Integer(a_numThreads)) 是告訴Kafka我有多少個線程來處理消息。json
(1). 這個線程數必須是小等於topic的partition分區數;能夠經過
./kafka-topics.sh --describe --zookeeper "172.16.49.173:2181" --topic "producer_test"
命令來查看分區的狀況
(2). kafka會根據partition.assignment.strategy指定的分配策略來指定線程消費那些分區的消息;這裏沒有單獨配置該項便是採用的默認值range策略(按照階段平均分配)。好比分區有10個、線程數有3個,則線程 1消費0,1,2,3,線程2消費4,5,6,線程3消費7,8,9。另一種是roundrobin(循環分配策略),官方文檔中寫有使用該策略有兩個前提條件的,因此通常不要去設定。
(3). 通過測試:consumerMap.get(topic).size(),應該是得到的目前該topic有數據的分區數
(4). stream即指的是來自一個或多個服務器上的一個或者多個partition的消息。每個stream都對應一個單線程處理。所以,client可以設置知足本身需求的stream數目。總之,一個stream也許表明了多個服務器partion的消息的聚合,可是每個 partition都只能到一個streamapi2. Executors.newFixedThreadPool(a_numThreads)是建立一個建立固定容量大小的緩衝池:每次提交一個任務就建立一個線程,直到線程達到線程池的最大大小。線程池的大小一旦達到最大值就會保持不變,若是某個線程由於執行異常而結束,那麼線程池會補充一個新線程。服務器
3. props.put(「auto.offset.reset」, 「smallest」) 是指定從最小沒有被消費offset開始;若是沒有指定該項則是默認的爲largest,這樣的話該consumer就得不到生產者先產生的消息。session
4. 要使用old consumer API須要引用kafka_2.11以及kafka-clients。app
<dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka_2.11</artifactId> <version>0.10.0.0</version> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka-clients</artifactId> <version>0.10.0.0</version> </dependency>
這是一個更加底層和複雜的API,因爲使用該API須要本身控制的項比較多,也比較複雜,官方給出了一些合適的適用場景,也能夠理解成爲這些場景是High Level Consumer API 不可以作到的ide
1. 針對一個消息讀取屢次oop
2. 在一個process中,僅僅處理一個topic中的一個partitions測試
3. 使用事務,確保每一個消息只被處理一次
1. 必須在程序中跟蹤offset值
2. 必須找出指定Topic Partition中的lead broker
3. 必須處理broker的變更
首先,你必須知道讀哪一個topic的哪一個partition 而後,找到負責該partition的broker leader,從而找到存有該partition副本的那個broker
再者,本身去寫request並fetch數據 最終,還要注意須要識別和處理broker leader的改變
package com.goodix.kafka.oldconsumer; import kafka.api.FetchRequest; import kafka.api.FetchRequestBuilder; import kafka.api.PartitionOffsetRequestInfo; import kafka.common.ErrorMapping; import kafka.common.TopicAndPartition; import kafka.javaapi.*; import kafka.javaapi.consumer.SimpleConsumer; import kafka.message.MessageAndOffset; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Scanner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class SimpleExample { private static Logger LOG = LoggerFactory.getLogger(SimpleExample.class); public static void main(String args[]) { SimpleExample example = new SimpleExample(); Scanner sc = new Scanner(System.in); System.out.println("請輸入broker節點的ip地址(如172.16.49.173)"); String brokerIp = sc.nextLine(); List<String> seeds = new ArrayList<String>(); seeds.add(brokerIp); System.out.println("請輸入broker節點端口號(如9092)"); int port = Integer.parseInt( sc.nextLine()); System.out.println("請輸入要訂閱的topic名稱(如test)"); String topic = sc.nextLine(); System.out.println("請輸入要訂閱要查找的分區(如0)"); int partition = Integer.parseInt( sc.nextLine()); System.out.println("請輸入最大讀取消息數量(如10000)"); long maxReads = Long.parseLong( sc.nextLine()); try { example.run(maxReads, topic, partition, seeds, port); } catch (Exception e) { LOG.error("Oops:" + e); e.printStackTrace(); } } private List<String> m_replicaBrokers = new ArrayList<String>(); public SimpleExample() { m_replicaBrokers = new ArrayList<String>(); } public void run(long a_maxReads, String a_topic, int a_partition, List<String> a_seedBrokers, int a_port) throws Exception { // find the meta data about the topic and partition we are interested in //獲取指定Topic partition的元數據 PartitionMetadata metadata = findLeader(a_seedBrokers, a_port, a_topic, a_partition); if (metadata == null) { LOG.error("Can't find metadata for Topic and Partition. Exiting"); return; } if (metadata.leader() == null) { LOG.error("Can't find Leader for Topic and Partition. Exiting"); return; } String leadBroker = metadata.leader().host(); String clientName = "Client_" + a_topic + "_" + a_partition; SimpleConsumer consumer = new SimpleConsumer(leadBroker, a_port, 100000, 64 * 1024, clientName); long readOffset = getLastOffset(consumer,a_topic, a_partition, kafka.api.OffsetRequest.EarliestTime(), clientName); int numErrors = 0; while (a_maxReads > 0) { if (consumer == null) { consumer = new SimpleConsumer(leadBroker, a_port, 100000, 64 * 1024, clientName); } FetchRequest req = new FetchRequestBuilder() .clientId(clientName) .addFetch(a_topic, a_partition, readOffset, 100000) // Note: this fetchSize of 100000 might need to be increased if large batches are written to Kafka .build(); FetchResponse fetchResponse = consumer.fetch(req); if (fetchResponse.hasError()) { numErrors++; // Something went wrong! short code = fetchResponse.errorCode(a_topic, a_partition); LOG.error("Error fetching data from the Broker:" + leadBroker + " Reason: " + code); if (numErrors > 5) break; if (code == ErrorMapping.OffsetOutOfRangeCode()) { // We asked for an invalid offset. For simple case ask for the last element to reset readOffset = getLastOffset(consumer,a_topic, a_partition, kafka.api.OffsetRequest.LatestTime(), clientName); continue; } consumer.close(); consumer = null; leadBroker = findNewLeader(leadBroker, a_topic, a_partition, a_port); continue; } numErrors = 0; long numRead = 0; for (MessageAndOffset messageAndOffset : fetchResponse.messageSet(a_topic, a_partition)) { long currentOffset = messageAndOffset.offset(); if (currentOffset < readOffset) { LOG.error("Found an old offset: " + currentOffset + " Expecting: " + readOffset); continue; } readOffset = messageAndOffset.nextOffset(); ByteBuffer payload = messageAndOffset.message().payload(); byte[] bytes = new byte[payload.limit()]; payload.get(bytes); LOG.info("the messag's offset is :"+String.valueOf(messageAndOffset.offset()) + " and the value is :" + new String(bytes, "UTF-8")); numRead++; a_maxReads--; } if (numRead == 0) { try { Thread.sleep(1000); } catch (InterruptedException ie) { } } } if (consumer != null) consumer.close(); } public static long getLastOffset(SimpleConsumer consumer, String topic, int partition, long whichTime, String clientName) { TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partition); Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>(); requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(whichTime, 1)); kafka.javaapi.OffsetRequest request = new kafka.javaapi.OffsetRequest( requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientName); OffsetResponse response = consumer.getOffsetsBefore(request); if (response.hasError()) { LOG.error("Error fetching data Offset Data the Broker. Reason: " + response.errorCode(topic, partition) ); return 0; } long[] offsets = response.offsets(topic, partition); return offsets[0]; } /** * 找一個leader broker * 遍歷每一個broker,取出該topic的metadata,而後再遍歷其中的每一個partition metadata,若是找到咱們要找的partition就返回 * 根據返回的PartitionMetadata.leader().host()找到leader broker * @param a_oldLeader * @param a_topic * @param a_partition * @param a_port * @return * @throws Exception */ private String findNewLeader(String a_oldLeader, String a_topic, int a_partition, int a_port) throws Exception { for (int i = 0; i < 3; i++) { boolean goToSleep = false; PartitionMetadata metadata = findLeader(m_replicaBrokers, a_port, a_topic, a_partition); if (metadata == null) { goToSleep = true; } else if (metadata.leader() == null) { goToSleep = true; } else if (a_oldLeader.equalsIgnoreCase(metadata.leader().host()) && i == 0) { // first time through if the leader hasn't changed give ZooKeeper a second to recover // second time, assume the broker did recover before failover, or it was a non-Broker issue // goToSleep = true; } else { return metadata.leader().host(); } if (goToSleep) { try { Thread.sleep(1000); } catch (InterruptedException ie) { } } } LOG.error("Unable to find new leader after Broker failure. Exiting"); throw new Exception("Unable to find new leader after Broker failure. Exiting"); } /** * * @param a_seedBrokers * @param a_port * @param a_topic * @param a_partition * @return */ private PartitionMetadata findLeader(List<String> a_seedBrokers, int a_port, String a_topic, int a_partition) { PartitionMetadata returnMetaData = null; loop: for (String seed : a_seedBrokers) { //遍歷每一個broker SimpleConsumer consumer = null; try { // 建立Simple Consumer, consumer = new SimpleConsumer(seed, a_port, 100000, 64 * 1024, "leaderLookup"); List<String> topics = Collections.singletonList(a_topic); TopicMetadataRequest req = new TopicMetadataRequest(topics); //發送TopicMetadata Request請求 kafka.javaapi.TopicMetadataResponse resp = consumer.send(req); //取到Topic的Metadata List<TopicMetadata> metaData = resp.topicsMetadata(); //遍歷每一個partition的metadata for (TopicMetadata item : metaData) { for (PartitionMetadata part : item.partitionsMetadata()) { // 判斷是不是要找的partition if (part.partitionId() == a_partition) { returnMetaData = part; //找到就返回 break loop; } } } } catch (Exception e) { LOG.info("Error communicating with Broker [" + seed + "] to find Leader for [" + a_topic + ", " + a_partition + "] Reason: " + e); } finally { if (consumer != null) consumer.close(); } } if (returnMetaData != null) { m_replicaBrokers.clear(); for (kafka.cluster.BrokerEndPoint replica : returnMetaData.replicas()) { m_replicaBrokers.add(replica.host()); } } return returnMetaData; } }