package NaviveBayesClassify;java
import java.io.IOException;apache
import java.io.StringReader;spa
import org.apache.lucene.analysis.Analyzer;.net
import org.apache.lucene.analysis.TokenStream;對象
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;token
import org.wltea.analyzer.lucene.IKAnalyzer;rem
public class ChineseSpliter {get
/**it
* 對給定的文本進行中文分詞io
*
* @param text
* 給定的文本
* @param splitToken
* 用於分割的標記,如"|"
* @return 分詞完畢的文本
* @throws IOException
*/
public static String split(String text, String splitToken) throws IOException {
String result = null;
// 建立分詞對象
@SuppressWarnings("resource")
Analyzer analyzer = new IKAnalyzer(true);
StringReader reader = new StringReader(text);
// 分詞
TokenStream ts = analyzer.tokenStream("", reader);
CharTermAttribute term = ts.getAttribute(CharTermAttribute.class);
// 遍歷分詞數據
while (ts.incrementToken()) {
System.out.print(term.toString() + splitToken);
result = term.toString() + splitToken + result;
}
reader.close();
return result;
}
}