org.apache.lucene lucene-core 5.3.1 org.apache.lucene lucene-analyzers-common 5.3.1 org.apache.lucene lucene-analyzers-smartcn 5.3.1 org.apache.lucene lucene-queryparser 5.3.1 org.apache.lucene lucene-highlighter 5.3.1 com.janeluo ikanalyzer 2012_u6
IKAnalyzer不支持5.x的lucene 添加下面两个类即可
import org.apache.lucene.analysis.Analyzer;public class IKAnalyzer5x extends Analyzer { private boolean useSmart; public boolean useSmart() { return this.useSmart; } public void setUseSmart(boolean useSmart) { this.useSmart = useSmart; } public IKAnalyzer5x() { this(false); } public IKAnalyzer5x(boolean useSmart) { this.useSmart = useSmart; } @Override protected TokenStreamComponents createComponents(String fieldName) { IKTokenizer5x _IKTokenizer = new IKTokenizer5x(this.useSmart); return new TokenStreamComponents(_IKTokenizer); }}
import org.apache.lucene.analysis.Tokenizer;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;import org.apache.lucene.analysis.tokenattributes.TypeAttribute;import org.apache.lucene.util.AttributeFactory;import org.wltea.analyzer.core.IKSegmenter;import org.wltea.analyzer.core.Lexeme;import java.io.IOException;public class IKTokenizer5x extends Tokenizer { private IKSegmenter _IKImplement; private final CharTermAttribute termAtt = (CharTermAttribute)this.addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAtt = (OffsetAttribute)this.addAttribute(OffsetAttribute.class); private final TypeAttribute typeAtt = (TypeAttribute)this.addAttribute(TypeAttribute.class); private int endPosition; public IKTokenizer5x() { this._IKImplement = new IKSegmenter(this.input, true); } public IKTokenizer5x(boolean useSmart) { this._IKImplement = new IKSegmenter(this.input, useSmart); } public IKTokenizer5x(AttributeFactory factory) { super(factory); this._IKImplement = new IKSegmenter(this.input, true); } public boolean incrementToken() throws IOException { this.clearAttributes(); Lexeme nextLexeme = this._IKImplement.next(); if(nextLexeme != null) { this.termAtt.append(nextLexeme.getLexemeText()); this.termAtt.setLength(nextLexeme.getLength()); this.offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition()); this.endPosition = nextLexeme.getEndPosition(); this.typeAtt.setType(nextLexeme.getLexemeTypeString()); return true; } else { return false; } } public void reset() throws IOException { super.reset(); this._IKImplement.reset(this.input); } public final void end() { int finalOffset = this.correctOffset(this.endPosition); this.offsetAtt.setOffset(finalOffset, finalOffset); }}
调用测试分词器
Analyzer analyzer = new IKAnalyzer5x(true);TokenStream tokenStream = analyzer.tokenStream("test", "一个新款韩版长袖羊驼绒羊毛皮草外套女海宁皮草");OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);tokenStream.reset();while (tokenStream.incrementToken()) { System.out.println(offsetAttribute.toString());}