環境要求 pre-installhtml
# 0. 設置集羣hosts,方便後續配置
vim /etc/hosts
172.1.1.1 Data_Center_ZK_1
172.1.1.2 Data_Center_ZK_2
172.1.1.3 Data_Center_ZK_3
# 1. unpack and cd to the root
tar xzf zookeeper-3.4.10.tar.gz && cd zookeeper-3.4.10
# 2. 配置單機 zk,此處僅作參考
# cp conf/zoo_sample.cfg conf/zoo.cfg
# vim conf/zoo.cfg
# tickTime=2000
# initLimit=10
# syncLimit=5
# dataDir=/opt/data/zookeeper
# clientPort=2181
# maxClientCnxns=60
# autopurge.snapRetainCount=3
# autopurge.purgeInterval=24
# 2. 配置集羣 zk
# 注意,zk集羣的 server id 不能相同
vim /opt/data/zookeeper/myid # 指定每臺zk服務器的id, 例如:一、二、3
cp conf/zoo_sample.cfg conf/zoo.cfg
vim conf/zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/data/zookeeper
clientPort=2181
maxClientCnxns=60
autopurge.purgeInterval=24
server.1=Data_Center_ZK_1:2888:3888
server.2=Data_Center_ZK_2:2888:3888
server.3=Data_Center_ZK_3:2888:3888
# 3. 配置 Java heap size (2G/4G)
# 注意,應儘可能避免zk使用swap,性能會有大幅降級
# 此處,在總內存爲4G的狀況下,指定zk初始jvm內存大小爲512M,最大爲2G
vim conf/java.env
export JVMFLAGS="-Xmx2048m -Xms512m"
# 4. 啓動服務
bin/zkServer.sh start
# bin/zkServer.sh stop
bin/zkServer.sh status
# 以上步驟須要在三臺server上分別配置
# 5. 客戶端鏈接測試集羣可用性
bin/zkCli.sh -server Data_Center_ZK_1:2181
help
ls /
create /test "hello world!"
get /test
bin/zkCli.sh -server Data_Center_ZK_3:2181
help
ls /
get /test
複製代碼
# 0. 設置集羣hosts,方便後續配置
vim /etc/hosts
172.1.1.1 Data_Center_Kafka_1
172.1.1.2 Data_Center_Kafka_2
172.1.1.3 Data_Center_Kafka_3
# 1. unpack
tar xzf kafka_2.11-1.0.0.tgz
cd kafka_2.11-1.0.0
# 2. 集羣配置
# Kafka uses ZooKeeper. 確保zookeeper環節已經成功啓動服務
vim config/server.properties
# The id of the broker. 3臺server配置不一樣的broker id
broker.id=1
# Zookeeper connection string
zookeeper.connect=Data_Center_ZK_1:2181,Juliye_Data_Center_ZK_2:2181,Juliye_Data_Center_ZK_3:2181
# 配置socket server
advertised.host.name=Data_Center_Kafka_1
advertised.port=9092
# 3. 啓動服務
bin/kafka-server-start.sh
# bin/kafka-server-stop.sh
# 以上步驟須要在三臺server上分別配置
# 4. 客戶端鏈接測試
# 查看全部 topic
bin/kafka-topics.sh --list --zookeeper Data_Center_ZK_1:2181
# 建立 topic
bin/kafka-topics.sh --create --zookeeper Data_Center_ZK_1:2181 --replication-factor 3 --partitions 1 --topic my-replicated-topic
# Run the producer and then type a few messages into the console to send to the server.
bin/kafka-console-producer.sh --broker-list Data_Center_Kafka_1:9092 --topic my-replicated-topic
# Start a consumer
bin/kafka-console-consumer.sh --bootstrap-server Data_Center_Kafka_2:9092 --topic my-replicated-topic --from-beginning
# 查看多副本topic的狀態
bin/kafka-topics.sh --describe --zookeeper Juliye_Data_Center_ZK_1:2181 --topic my-replicated-topic
# 輸出對應集羣狀態
# Topic: my-replicated-topic Partition: 0 Leader: 1 Replicas: 1,2,0 Isr: 0,2,1
# 5. 查看zk中kafka的狀態信息
/opt/tools/zookeeper-3.4.10/bin/zkCli.sh -server Data_Center_ZK_1:2181
help
ls /
ls /brokers
ls /consumers
ls /config
複製代碼
# Add Configs: Add Zookeeper and Kafka deamon
vim /etc/supervisord.conf
[program:zookeeper]
;command=/opt/tools/zookeeper-3.4.10/bin/zkServer.sh start
command=/opt/tools/zookeeper-3.4.10/bin/zkServer.sh start-foreground
[program:kafka]
;command=/opt/tools/kafka_2.11-1.0.0/bin/kafka-server-start.sh
command=/opt/tools/kafka_2.11-1.0.0/bin/kafka-server-start.sh /opt/tools/kafka_2.11-1.0.0/config/server.properties
# 啓動
supervisord -c /etc/supervisord.conf
# 查看狀態
supervisorctl status all
複製代碼
環境要求 pre-installjava
zookeeper集羣搭建參考本章第一部分node
python --version
# 0. 設置集羣hosts,方便後續配置
vim /etc/hosts
172.1.1.1 Data_Center_Storm_1
172.1.1.2 Data_Center_Storm_2
172.1.1.3 Data_Center_Storm_3
mkdir -p /opt/data/storm
# 1. Download and extract a Storm release to Nimbus and worker machines
tar xzf storm-1.2.1.tar.gz -C /etc/tools/
cd /etc/tools/storm-1.2.1
# 2. Fill in mandatory configurations into storm.yaml
vim conf/storm.yaml
storm.local.dir: "/opt/data/storm"
storm.zookeeper.servers:
- "Data_Center_ZK_1"
- "Data_Center_ZK_2"
- "Data_Center_ZK_3 nimbus.seeds : ["Data_Center_Storm_1"] drpc.servers: - "Data_Center_Storm_1" #- "Data_Center_Storm_2" #- "Data_Center_Storm_3" drpc.port: 3772 # 其它配置都默認 # 3. Launch daemons under supervision using "storm" script and a supervisor of your choice # 在Storm-1上啓動 nimbus、supervisor、ui nohup storm nimbus & nohup storm supervisor & nohup storm ui & nohup storm drpc & # 在Storm-二、Storm-3上啓動 supervisor nohup storm supervisor & # 啓動成功後能夠經過 # http://Data_Center_Storm_1:8080 # 來查看storm集羣狀態 複製代碼
# Add Configs: Storm-Supervisor | Storm-UI | Storm-Nimbus
# 注意 UI和Nimbus僅在節點1上設置
vim /etc/supervisord.conf
[program:storm_nimbus]
;nohup storm nimbus &
command=/opt/tools/apache-storm-1.2.1/bin/storm nimbus
[program:storm_supervisor]
;nohup storm supervisor &
command=/opt/tools/apache-storm-1.2.1/bin/storm supervisor
[program:storm_ui]
;nohup storm ui &
command=/opt/tools/apache-storm-1.2.1/bin/storm ui
[program:storm_drpc]
;nohup storm drpc &
command=/opt/tools/apache-storm-1.2.1/bin/storm drpc
# 啓動
supervisord -c /etc/supervisord.conf
# 查看狀態
supervisorctl status all
複製代碼
# 0. 解壓發行版storm
tar xzf software/apache-storm-1.2.1.tar.gz -C tools/
# 1. 配置環境
vim /etc/profile.d/global_ops_cmd.sh
export JAVA_HOME="/usr/java/jdk1.8.0_161"
export MVN_HOME="/opt/tools/apache-maven-3.5.2"
export STORM_HOME="/opt/tools/apache-storm-1.2.1"
export PATH="$PATH:$STORM_HOME/bin:$MVN_HOME/bin"
. /etc/profile.d/global_ops_cmd.sh
# 2. 配置遠程集羣信息,指定nimbus服務器節點
# Config cluster information,
# The local Storm configs are the ones in ~/.storm/storm.yaml merged in with the configs in defaults.yaml
vim ~/.storm/storm.yaml
nimbus.seeds: ["Data_Center_Storm_1"]
# 3. 查看集羣topology狀態
storm list
# 若是不在本地配置,能夠以命令行參數形式傳遞
# storm list -c nimbus.host=Data_Center_Storm_1
# 4. Storm 客戶端其它經常使用命令
storm kill topology-name [-w wait-time-secs]
storm activate topology-name
storm deactivate topology-name
storm jar topology-jar-path class ...
# 5. 獲取最新代碼 Git clone repo
# release 版本中的代碼
cd /opt/apps
git clone git://github.com/apache/storm.git
複製代碼
# Root dir
cd /opt/apps/storm/
# 1. 切換至你須要的代碼版本,避免不一樣版本實例可能形成的問題
# 因爲此處storm集羣的版本爲1.2.1,因此此處咱們切換到對應版本代碼
git tag
git checkout tags/v1.2.1
cd /opt/apps/storm/examples/storm-starter
mvn clean package
# Run the WordCountTopology in remote/cluster mode,
storm jar target/storm-starter-*.jar org.apache.storm.starter.WordCountTopology WordCountProduction remote
# Run the RollingTopWords in remote/cluster mode,
# under the name "production-topw-1"
storm jar target/storm-starter-*.jar org.apache.storm.starter.RollingTopWords production-topw-1 remote
複製代碼
# 1. 切換至你須要的代碼版本,避免不一樣版本實例可能形成的問題
# 因爲此處storm集羣的版本爲1.2.1,因此此處咱們切換到對應版本代碼
git tag
git checkout tags/v1.2.1
cd examples/storm-kafka-client-examples/
# 2. 修改工程依賴關係
# 須要明確指定scope爲compile,不然可能會出現相似於NoClassDefFoundError的錯誤
vim ./pom.xml
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka-client</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>${storm.kafka.artifact.id}</artifactId>
<version>${storm.kafka.client.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>${storm.kafka.client.version}</version>
<scope>compile</scope>
</dependency>
# 3. 修改該版本代碼
vim src/main/java/org/apache/storm/kafka/trident/TridentKafkaClientWordCountNamedTopics.java
// # 更新原有方法newKafkaTridentSpoutOpaque參數列表,以下:
private KafkaTridentSpoutOpaque<String, String> newKafkaTridentSpoutOpaque(String broker, String topic1, String topic2) { //...
// # 更新原有方法newKafkaSpoutConfig參數列表,以下:
protected KafkaSpoutConfig<String,String> newKafkaSpoutConfig(String broker, String topic1, String topic2) { //...
//# 原有代碼,默認爲local模式
//# DrpcResultsPrinter.remoteClient().printResults(60, 1, TimeUnit.SECONDS);
//# 若是remote模式運行,須要替換成以下:
Thread.sleep(2000);
Config drpc = new Config();
drpc.setDebug(false);
drpc.put("storm.thrift.transport", "org.apache.storm.security.auth.SimpleTransportPlugin");//"backtype.storm.security.auth.SimpleTransportPlugin");
drpc.put(Config.STORM_NIMBUS_RETRY_TIMES, 3);
drpc.put(Config.STORM_NIMBUS_RETRY_INTERVAL, 10);
drpc.put(Config.STORM_NIMBUS_RETRY_INTERVAL_CEILING, 20);
drpc.put(Config.DRPC_MAX_BUFFER_SIZE, 1048576);
System.out.printf("drpc config: %s \n", drpc);
try {
DrpcResultsPrinter client = DrpcResultsPrinter.remoteClient(drpc, "Juliye_Data_Center_Storm_1", 3772);
System.out.printf("client: %s \n", client);
client.printResults(60, 1, TimeUnit.SECONDS);
}catch (Exception e) {
e.printStackTrace();
}finally {
System.out.printf("finally \n");
}
# 4. 使用 maven 打包
# 根據當前kafka版本指定兩個參數:kafka_artifact_id、kafka_broker_version
# mvn clean package -Dstorm.kafka.artifact.id=<kafka_artifact_id> -Dstorm.kafka.client.version=<kafka_broker_version>
# 此處安裝的版本爲 kafka_2.11-1.0.0
mvn clean package -Dstorm.kafka.artifact.id=kafka_2.11 -Dstorm.kafka.client.version=1.0.0
# 5. 上傳storm topology
# 注意後面4個參數 分別是:
# 指定kafka節點;指定拓撲1的名稱(用於生產msg數據);指定拓撲2的名稱(用於生產msg數據);指定遠程執行(非local模式)
storm jar target/storm-kafka-client-examples-1.2.1.jar org.apache.storm.kafka.trident.TridentKafkaClientWordCountNamedTopics Data_Center_Kafka_2:9092 kafka-prod-1 kafka-prod-2 remote
# storm -c nimbus.host=Juliye_Data_Center_Storm_1 jar target/storm-kafka-client-examples-1.2.1.jar org.apache.storm.kafka.trident.TridentKafkaClientWordCountNamedTopics
複製代碼
#
#
# 1. 依賴問題,沒法找到部分依賴
# Error: A JNI error has occurred, please check your installation and try again
# Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/storm/kafka/...
# 參考上方第2步設置
# 2. kafka producer 沒法寫入問題
# org.apache.kafka.common.errors.TimeoutException: Failed to update metadata after 60000 ms.
# org.apache.kafka.common.errors.TimeoutException: Timeout expired while fetching topic metadata
# 注意修改kafka配置
vim config/server.properties
# 配置socket server
advertised.host.name=Data_Center_Kafka_1
advertised.port=9092
# 3. kafka consumer 沒法drpc鏈接問題
# java.lang.RuntimeException:
# No DRPC servers configured for topology at org.apache.storm.drpc.DRPCSpout.open(DRPCSpout.java:149) at org.apache.storm.trident.spout.RichSpoutBatchTriggerer.open(RichSpo
1. 啓動drpc server
vim /opt/tools/apache-storm-1.2.1/conf/storm.yaml
drpc.servers:
- "Juliye_Data_Center_Storm_1"
#- "Juliye_Data_Center_Storm_2"
#- "Juliye_Data_Center_Storm_3"
drpc.port: 3772
2. 配置代碼中的鏈接
vim src/main/java/org/apache/storm/kafka/trident/TridentKafkaClientWordCountNamedTopics.java
Thread.sleep(2000);
Config drpc = new Config();
drpc.setDebug(false);
drpc.put("storm.thrift.transport", "org.apache.storm.security.auth.SimpleTransportPlugin");//"backtype.storm.security.auth.SimpleTransportPlugin");
drpc.put(Config.STORM_NIMBUS_RETRY_TIMES, 3);
drpc.put(Config.STORM_NIMBUS_RETRY_INTERVAL, 10);
drpc.put(Config.STORM_NIMBUS_RETRY_INTERVAL_CEILING, 20);
drpc.put(Config.DRPC_MAX_BUFFER_SIZE, 1048576);
System.out.printf("drpc config: %s \n", drpc);
try {
DrpcResultsPrinter client = DrpcResultsPrinter.remoteClient(drpc, "Juliye_Data_Center_Storm_1", 3772);
System.out.printf("client: %s \n", client);
client.printResults(60, 1, TimeUnit.SECONDS);
}catch (Exception e) {
e.printStackTrace();
}finally {
System.out.printf("finally \n");
}
複製代碼