Solr插件開發

場景介紹:java

在處理輸入的文本時,須要將http://bit.ly/3ynriE等短鏈接轉換爲真實鏈接lucene.apache.org/solr正則表達式


1,實現TokenFilterapache

package com.url.plugin;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

import java.io.IOException;
import java.util.regex.Pattern;


public class ResolveUrlTokenFilter extends TokenFilter {

    private final CharTermAttribute charTermAttribute=addAttribute(CharTermAttribute.class);
    private final Pattern patternToMatchShortenedUrls;

    public ResolveUrlTokenFilter(TokenStream input, Pattern patternToMatchShortenedUrls) {
        super(input);
        this.patternToMatchShortenedUrls = patternToMatchShortenedUrls;
    }

    @Override
    public boolean incrementToken() throws IOException {
        if (!input.incrementToken())
            return false;

        //charTermAttribute會保存讀取char
        char[] term=charTermAttribute.buffer();
        int len=term.length;
        //構造字符串
        String token=new String(term,0,len);
        //匹配token中是否出現咱們須要重構的場景
        if(patternToMatchShortenedUrls.matcher(token).matches()){
            charTermAttribute.setEmpty().append(resolveUrlToken(token));
        }

        return true;
    }

    private String resolveUrlToken(String token) {
        //TODO 根據實際需求處理token
        try {
            if ("http://bit.ly/3ynriE".equals(token)) {
                return "lucene.apache.org/solr";
            } else if ("http://bit.ly/15tzw".equals(token)) {
                return "manning.com";
            }
        } catch (Exception exc) {
            // rather than failing analysis if you can't resolve the URL,
            // you should log the error and return the un-resolved value
            exc.printStackTrace();
        }
        return token;
    }
}

2,實現TokenFilterFactoryapp

package com.url.plugin;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;

import java.util.Map;
import java.util.regex.Pattern;


public class ResolveUrlTokenFilterFactory extends TokenFilterFactory {

    private Pattern patternToMatchShortenedUrls;

    public ResolveUrlTokenFilterFactory(Map<String, String> args) {
        super(args);
        assureMatchVersion();
        //從solr讀取的配置文件信息中獲取正則表達式信息
        String shortenedUrls=require(args,"shortenedUrlPattern");
        patternToMatchShortenedUrls=Pattern.compile(shortenedUrls);
    }

    @Override
    public TokenFilter create(TokenStream tokenStream) {
        //建立ResolveUrlTokenFilter實例對象
        return new ResolveUrlTokenFilter(tokenStream,patternToMatchShortenedUrls);
    }
}

3,將其打成jar包ide

4,在solr的schema文件中添加以下內容
ui

<fieldType name="text_plugin" class="solr.TextField" positionIncrementGap="100">
	<analyzer type="index">
	<tokenizer class="solr.StandardTokenizerFactory"/>
	<filter class="com.url.plugin.ResolveUrlTokenFilterFactory" shortenedUrlPattern="http:\/\/bit.ly\/[\w\-]+" />
	<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
	<filter class="solr.LowerCaseFilterFactory"/>
	</analyzer>
	<analyzer type="query">
	<tokenizer class="solr.StandardTokenizerFactory"/>
	<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
	<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
	<filter class="solr.LowerCaseFilterFactory"/>
	</analyzer>
</fieldType>

5,在solr的根目錄下建立plugin文件夾,(位置同dist,contrib文件),並將3生成的jar放入其中this

6,在solrconfg.xml中添加url

<lib dir="../../../plugins/" regex=".*\.jar" />  code

7,java -jar start.jarxml

相關文章
相關標籤/搜索