介紹: URLInjector,我封裝了的一個簡單的客戶端,咱們將要放URLs,到一個分片隊列裏面 【sharded queue】,只有放置到分片隊列的數據纔會被Storm的管線所處理。java
package com.digitalpebble.storm.crawler; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import com.digitalpebble.storm.crawler.util.Configuration; import com.digitalpebble.storm.fetchqueue.ShardedQueue; public class URLInjector { private ShardedQueue queue; URLInjector() throws Exception { Configuration config = StormConfiguration.create(); queue = ShardedQueue.getInstance(config); } public void add(String url) { try { queue.add(url); } catch (Exception e) { e.printStackTrace(); } } public void close() { queue.close(); } public static void main(String[] args) throws Exception { String messages = args[0]; URLInjector client = new URLInjector(); BufferedReader reader = new BufferedReader(new FileReader(new File( messages))); String line = null; while ((line = reader.readLine()) != null) { client.add(line.trim()); } reader.close(); client.close(); } }
用來做爲測試。有關ShardedQueue,請參看本ID的另一篇博文: Storm【實踐系列-如何寫一個爬蟲-】6 .1ShardedQueue git
simple tips~
測試