import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Sugesstion {
private static final String GRAMMED_WORDS_FIELD = "words";
private static final String SOURCE_WORD_FIELD = "sourceWord";
private static final String COUNT_FIELD = "count";
private static final String[] ENGLISH_STOP_WORDS = {
"a", "an", "and", "are", "as", "at", "be", "but", "by",
"for", "i", "if", "in", "into", "is",
"no", "not", "of", "on", "or", "s", "such",
"t", "that", "the", "their", "then", "there", "these",
"they", "this", "to", "was", "will", "with"
};
private final Directory autoCompleteDirectory;
private IndexReader autoCompleteReader;
private IndexSearcher autoCompleteSearcher;
public Sugesstion(String autoCompleteDir) throws IOException {
this.autoCompleteDirectory = FSDirectory.getDirectory(autoCompleteDir,
null);
reOpenReader();
}
public List<String> suggestTermsFor(String term) throws IOException {
// get the top 5 terms for query
Query query = new TermQuery(new Term(GRAMMED_WORDS_FIELD, term));
Sort sort = new Sort(COUNT_FIELD, true);
TopDocs docs = autoCompleteSearcher.search(query, null, 5, sort);
List<String> suggestions = new ArrayList<String>();
for (ScoreDoc doc : docs.scoreDocs) {
suggestions.add(autoCompleteReader.document(doc.doc).get(
SOURCE_WORD_FIELD));
}
return suggestions;
}
@SuppressWarnings("unchecked")
public void reIndex(Directory sourceDirectory, String fieldToAutocomplete)
throws CorruptIndexException, IOException {
// build a dictionary (from the spell package)
IndexReader sourceReader = IndexReader.open(sourceDirectory);
LuceneDictionary dict = new LuceneDictionary(sourceReader,
fieldToAutocomplete);
// code from
// org.apache.lucene.search.spell.SpellChecker.indexDictionary(
// Dictionary)
IndexReader.unlock(autoCompleteDirectory);
// use a custom analyzer so we can do EdgeNGramFiltering
IndexWriter writer = new IndexWriter(autoCompleteDirectory,
new Analyzer() {
public TokenStream tokenStream(String fieldName,
Reader reader) {
TokenStream result = new StandardTokenizer(reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new ISOLatin1AccentFilter(result);
result = new StopFilter(result,
ENGLISH_STOP_WORDS);
result = new EdgeNGramTokenFilter(
result, Side.FRONT,1, 20);
return result;
}
}, true);
writer.setMergeFactor(300);
writer.setMaxBufferedDocs(150);
// go through every word, storing the original word (incl. n-grams)
// and the number of times it occurs
Map<String, Integer> wordsMap = new HashMap<String, Integer>();
Iterator<String> iter = (Iterator<String>) dict.getWordsIterator();
while (iter.hasNext()) {
String word = iter.next();
int len = word.length();
if (len < 3) {
continue; // too short we bail but "too long" is fine...
}
if (wordsMap.containsKey(word)) {
throw new IllegalStateException(
"This should never happen in Lucene 2.3.2");
// wordsMap.put(word, wordsMap.get(word) + 1);
} else {
// use the number of documents this word appears in
wordsMap.put(word, sourceReader.docFreq(new Term(
fieldToAutocomplete, word)));
}
}
for (String word : wordsMap.keySet()) {
// ok index the word
Document doc = new Document();
doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES,
Field.Index.UN_TOKENIZED)); // orig term
doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES,
Field.Index.TOKENIZED)); // grammed
doc.add(new Field(COUNT_FIELD,
Integer.toString(wordsMap.get(word)), Field.Store.NO,
Field.Index.UN_TOKENIZED)); // count
writer.addDocument(doc);
}
sourceReader.close();
// close writer
writer.optimize();
writer.close();
// re-open our reader
reOpenReader();
}
private void reOpenReader() throws CorruptIndexException, IOException {
if (autoCompleteReader == null) {
autoCompleteReader = IndexReader.open(autoCompleteDirectory);
} else {
autoCompleteReader.reopen();
}
autoCompleteSearcher = new IndexSearcher(autoCompleteReader);
}
public static void main(String[] args) throws Exception {
Sugesstion autocomplete = new Sugesstion("/index/autocomplete");
// run this to re-index from the current index, shouldn't need to do
// this very often
// autocomplete.reIndex(FSDirectory.getDirectory("/index/live", null),
// "content");
String term = "steve";
System.out.println(autocomplete.suggestTermsFor(term));
// prints [steve, steven, stevens, stevenson, stevenage]
}
}
相关推荐
实现输入提示,自动补全功能,仿照百度,谷歌样式设计。请大家参考,有bug或者问题给我留言。
servlet+jsp实现百度搜索自动补全功能.http://blog.csdn.net/lai13835601355/article/details/52302410这是ssh版本的。
通过Ajax跨域调用百度首页的自动补全功能,百度的自动完成可以使用jsonp完成跨域 ,jsonp 调用百度实现自动补全
利用Ajax实现类似百度的自动补全功能,代码详细,代码正确。
java ajax实现自动补全功能,代码完整,经过调试运行不会出错,有需要的朋友下载
实现搜索的自动补全功能.rar,太多无法一一验证是否可用,程序如果跑不起来需要自调,部分代码功能进行参考学习。
这个源码实现了类似于百度自动补全的功能,通过简单的异步操作进行实现 ajax+mysql模糊搜索,一个简单小demo。
改进版 jquery 仿百度谷歌自动补全输入(支持中文) 本资源原版本是在本论坛下载的,但只支持英文输入的自动补全,后经改进,现版本可支持中文和数字输入的自动补全,特来分享,感谢原版作者
ajax文本框自动补全功能,类似百度自动索引功能。
Ajax实现自动补全,案例详细。
可以实现百度搜索的功能
孤独的狼----jQuery实现自动补全功能
jQuery百度搜索自动补全插件是一款高级的自动补全jQuery插件typeahead.js。
UltraEdit实现自动补全 可以添加 Java PHP自动补全
c#TextBox输入框自动提示、自动完成、自动补全功能
自动补全 | 像百度那样的自动提示 用.net做的 数据从数据库中读取出来 性能好
利用jquery.autocomplete.js插件实现了文本框自动补全的功能, 附件包含两个实例代码,均以txt文档显示,都有前台和后台代码。一个是传一个参数,一个是传两个参数,后台以一般处理程序ashx文件编写。大家只需要修改...
用于eclipse开发中代码自动补全,包括类自动补全和xml文件自动补全
sublime的微信小程序代码自动补全sublime的微信小程序代码自动补全sublime的微信小程序代码自动补全sublime的微信小程序代码自动补全sublime的微信小程序代码自动补全sublime的微信小程序代码自动补全sublime的微信...
使用过Source Insight的人一定对它的自动补全功能印象深刻,...这里,我将自己用的配置文件发给大家,大家可以移步(待会会补上地址),只要简单的两步,就能实现vim的程序自动补全功能,大大增加了在vim上的开发效率。