在語義的世界裏,能夠近似地說:萬事萬物都是特徵提取。你只要找到特徵,事情就好辦。…………你指望畢其功於一役嗎?天然語言處理的真實應用裏是很難有什麼場景找到一個通吃特徵的。都是一層一層特徵疊加的。一層特徵去掉一部分垃圾數據。如此反覆,終成正果。注意方法論。
統計粗且糙,乃大錘。規則細而精,乃小錘。先大場後細棋。
KafkaSink.java
|
import kafka.javaapi.producer.
Producer;
……
public class KafkaSink
extendsAbstractSink
implements Configurable {
……
private Producerbyte[]< producer;
……
@
Override
public Status process()
throws EventDeliveryException {
Channel channel = getChannel();
Transaction tx = channel.getTransaction();
try{
tx.begin();
Evente = channel.take();
if (e ==
null) {
tx.rollback();
returnStatus.BACKOFF;
}
producer.send(newKeyedMessage< span style='font-size:12px;font-style:normal;font-weight:bold;color:rgb(255, 0, 0);' >String,
byte[]<(topic, e.getBody()));
tx.commit();
returnStatus.READY;
}
catch (
Exception e) {
|
KafkaSpout.java
|
public abstract class KafkaSpout
implementsIRichSpout {
……
@
Override
public
void activate() {
……
for(
final KafkaStream stream : streamList) {
executor.submit(new
Runnable() {
@
Override
public void run() {
ConsumerIterator< span style='font-size:12px;font-style:normal;font-weight:normal;color:rgb(0, 112, 192);' >byte[],
byte[]< iterator = stream.iterator();
while (iterator.hasNext()) {
if(spoutPending.get() < span>
sleep(1000);
continue;
}
MessageAndMetadata< span style='font-size:12px;font-style:normal;font-weight:normal;color:rgb(0, 112, 192);' >byte[],
byte[]< next = iterator.next();
byte[] message = next.message();
List< span style='font-size:12px;font-style:normal;font-weight:normal;color:rgb(255, 0, 0);' >Object< tuple =
null;
try{
tuple = generateTuple(message);
}
catch(Exception e) {
e.printStackTrace();
}
if (tuple ==
null|| tuple.size() != outputFieldsLength) {
continue;
}
collector.emit(tuple);
spoutPending.decrementAndGet();
}
}
|
EvaluateBolt.java
|
public classEvaluateBolt
extendsBaseBasicBolt {
……
@
Override
public void execute(Tuple input, BasicOutputCollector collector) {
……
if (LogWebsiteSpout.PAGE_EVENT_BROWSE.equals(event)) {
if (LogWebsiteSpout.PAGE_TYPE_GOODS.equals(pageType)) {
incrBaseStatistics(baseKeyMap, BROWSE_ALL, 1);
}
else if (LogWebsiteSpout.PAGE_TYPE_PAY1.equals(pageType)) {
incrBaseStatistics(baseKeyMap, ORDER_ALL, 1);
}
String recDisplay = input.getStringByField(LogWebsiteSpout.FIELD_REC_DISPLAY);
recDisplayStatistics(recDisplay, time, pageType, baseKeyMap);
}
else if (LogWebsiteSpout.PAGE_EVENT_CLICK.equals(event)) {
String recType = input.getStringByField(LogWebsiteSpout.FIELD_REC_TYPE);
|
窩窩的解決方案介紹列表: javascript
#研發解決方案#基於StatsD+Graphite的智能監控解決方案 html
#研發中間件介紹#定時任務調度與管理JobCenter java
#研發解決方案介紹#Recsys-Evaluate(推薦評測)
mysql
#研發解決方案介紹#Tracing(鷹眼)
redis
#研發解決方案介紹#IdCenter(內部統一認證系統) 數據庫
#研發解決方案介紹#基於ES的搜索+篩選+排序解決方案 api
#數據技術選型#即席查詢Shib+Presto,集羣任務調度HUE+Oozie