閱讀背景:您須要對Zk,Kafka有基礎的瞭解java
本章主題:詳盡的梳理ZkCoordinator的過程緩存
package com.mixbox.storm.kafka; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.mixbox.storm.kafka.trident.GlobalPartitionInformation; import java.util.*; import static com.mixbox.storm.kafka.KafkaUtils.taskId; /** * * * ZKCoordinator 協調器 * * @author Yin Shuai */ public class ZkCoordinator implements PartitionCoordinator { public static final Logger LOG = LoggerFactory .getLogger(ZkCoordinator.class); SpoutConfig _spoutConfig; int _taskIndex; int _totalTasks; String _topologyInstanceId; // 每個分區對應着一個分區管理器 Map<Partition, PartitionManager> _managers = new HashMap(); //緩存的List List<PartitionManager> _cachedList; //上次刷新的時間 Long _lastRefreshTime = null; //刷新頻率 毫秒 int _refreshFreqMs; //動態分區鏈接 DynamicPartitionConnections _connections; //動態BrokersReader DynamicBrokersReader _reader; ZkState _state; Map _stormConf; /** * * @param connections * 動態的 分區鏈接 * @param stormConf * Storm的配置文件 * @param spoutConfig * Storm sput的配置文件 * @param state * 對於ZKState的鏈接 * @param taskIndex * 任務 * @param totalTasks * 總共的任務 * @param topologyInstanceId * 拓撲的實例ID */ public ZkCoordinator(DynamicPartitionConnections connections, Map stormConf, SpoutConfig spoutConfig, ZkState state, int taskIndex, int totalTasks, String topologyInstanceId) { this(connections, stormConf, spoutConfig, state, taskIndex, totalTasks, topologyInstanceId, buildReader(stormConf, spoutConfig)); } public ZkCoordinator(DynamicPartitionConnections connections, Map stormConf, SpoutConfig spoutConfig, ZkState state, int taskIndex, int totalTasks, String topologyInstanceId, DynamicBrokersReader reader) { _spoutConfig = spoutConfig; _connections = connections; _taskIndex = taskIndex; _totalTasks = totalTasks; _topologyInstanceId = topologyInstanceId; _stormConf = stormConf; _state = state; ZkHosts brokerConf = (ZkHosts) spoutConfig.hosts; _refreshFreqMs = brokerConf.refreshFreqSecs * 1000; _reader = reader; } /** * @param stormConf * @param spoutConfig * @return */ private static DynamicBrokersReader buildReader(Map stormConf, SpoutConfig spoutConfig) { ZkHosts hosts = (ZkHosts) spoutConfig.hosts; return new DynamicBrokersReader(stormConf, hosts.brokerZkStr, hosts.brokerZkPath, spoutConfig.topic); } @Override public List<PartitionManager> getMyManagedPartitions() { if (_lastRefreshTime == null || (System.currentTimeMillis() - _lastRefreshTime) > _refreshFreqMs) { refresh(); _lastRefreshTime = System.currentTimeMillis(); } return _cachedList; } /** * 簡單的刷新的行爲 * */ void refresh() { try { LOG.info(taskId(_taskIndex, _totalTasks) + "Refreshing partition manager connections"); // 拿到全部的分區信息 GlobalPartitionInformation brokerInfo = _reader.getBrokerInfo(); // 拿到本身任務的全部分區 List<Partition> mine = KafkaUtils.calculatePartitionsForTask( brokerInfo, _totalTasks, _taskIndex); // 拿到當前任務的分區 Set<Partition> curr = _managers.keySet(); // 構造一個集合 Set<Partition> newPartitions = new HashSet<Partition>(mine); // 在new分區中,移除掉全部 本身擁有的分區 newPartitions.removeAll(curr); // 要刪除的分區 Set<Partition> deletedPartitions = new HashSet<Partition>(curr); // deletedPartitions.removeAll(mine); LOG.info(taskId(_taskIndex, _totalTasks) + "Deleted partition managers: " + deletedPartitions.toString()); for (Partition id : deletedPartitions) { PartitionManager man = _managers.remove(id); man.close(); } LOG.info(taskId(_taskIndex, _totalTasks) + "New partition managers: " + newPartitions.toString()); for (Partition id : newPartitions) { PartitionManager man = new PartitionManager(_connections, _topologyInstanceId, _state, _stormConf, _spoutConfig, id); _managers.put(id, man); } } catch (Exception e) { throw new RuntimeException(e); } _cachedList = new ArrayList<PartitionManager>(_managers.values()); LOG.info(taskId(_taskIndex, _totalTasks) + "Finished refreshing"); } @Override public PartitionManager getManager(Partition partition) { return _managers.get(partition); } }
1 : 首先 ZKCoorDinator 實現 PartitionCoordinator的接口ide
package com.mixbox.storm.kafka; import java.util.List; /** * @author Yin Shuai */ public interface PartitionCoordinator { /** * 拿到我管理的分區列表 List{PartitionManager} * @return */ List<PartitionManager> getMyManagedPartitions(); /** * @param 依據制定的分區partition,去getManager * @return */ PartitionManager getManager(Partition partition); }
第一個方法拿到全部的 PartitionManagerui
第二個方法依據特定的 Partition去獲得一個分區管理器this
對於PartitionManager 請參看本空間的另一篇博文:code
Storm-kafka【接口實現】4-2:PartitionManager: 分區管理器orm