package org.apache.lucene.search.spell; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import org.apache.lucene.search.suggest.InputIterator; /** * A simple interface representing a Dictionary. A Dictionary * here is a list of entries, where every entry consists of * term, weight and payload. * */ public interface Dictionary { /** * Returns an iterator over all the entries * @return Iterator */ InputIterator getEntryIterator() throws IOException; }
package com.tianditu.com.search; import java.io.File; import java.io.IOException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.spell.LuceneDictionary; import org.apache.lucene.search.spell.SpellChecker; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.util.Version; public class GlobalSuggest { //拼写检查构建的索引 private final String SPELL_CHECK_FOLDER = "c:\\spellcheck\\"; //根据已有的索引 private final String GLOBAL_PINYIN_SUGGEST = "O:\\searchwork_custom\\data_index\\pinyin2008\\"; //构建索引 public void testIndexPinyin2008() throws IOException{ long start = System.currentTimeMillis(); //北京吉威时代软件股份有限公司 //String indexDir ="O:\\searchwork_custom\\data_index\\GlobalIndex\\"; Directory direct = new MMapDirectory(new File(GLOBAL_PINYIN_SUGGEST)); LuceneDictionary ld = new LuceneDictionary(DirectoryReader.open(direct), "name"); ld.getEntryIterator(); Directory spd = FSDirectory.open(new File(SPELL_CHECK_FOLDER)); SpellChecker sc = new SpellChecker(spd); //sc.in IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_30,null); //往spellcheck目录下写索引-------------- sc.indexDictionary(ld, iwc, true); sc.close(); long end = System.currentTimeMillis(); System.out.println("索引完毕,耗时:"+(end-start)+"ms"); } public void testIndex() throws IOException{ long start = System.currentTimeMillis(); //北京吉威时代软件股份有限公司 String indexDir ="O:\\searchwork_custom\\data_index\\GlobalIndex\\"; Directory direct = new MMapDirectory(new File(indexDir)); LuceneDictionary ld = new LuceneDictionary(DirectoryReader.open(direct), "name"); ld.getEntryIterator(); Directory spd = FSDirectory.open(new File(SPELL_CHECK_FOLDER)); SpellChecker sc = new SpellChecker(spd); //sc.in IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_30,null); sc.indexDictionary(ld, iwc, true); sc.close(); long end = System.currentTimeMillis(); System.out.println("索引完毕,耗时:"+(end-start)+"ms"); } public void testSearch(String wd) throws IOException{ //构建Directory Directory spd = FSDirectory.open(new File(SPELL_CHECK_FOLDER)); //实例化 spellcheck组件 SpellChecker sc = new SpellChecker(spd); //根据输入关键字 获得N条最相近的几率 第三个鄙视精确度 越大越匹配 安装实际需要调整 String[] suggests = sc.suggestSimilar(wd, 10,0.6f); if(suggests!=null){ for(String word:suggests){ System.out.println("Dou you mean:"+word); } } } /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { GlobalSuggest spellcheck = new GlobalSuggest(); //spellcheck.testIndexPinyin2008(); spellcheck.testSearch("beijing京鸭"); //spellcheck.testSearch("beijng"); } }
//构建索引 public void testIndexPinyin2008() throws IOException{ long start = System.currentTimeMillis(); //北京吉威时代软件股份有限公司 //String indexDir ="O:\\searchwork_custom\\data_index\\GlobalIndex\\"; Directory direct = new MMapDirectory(new File(GLOBAL_PINYIN_SUGGEST)); LuceneDictionary ld = new LuceneDictionary(DirectoryReader.open(direct), "name"); ld.getEntryIterator(); Directory spd = FSDirectory.open(new File(SPELL_CHECK_FOLDER)); SpellChecker sc = new SpellChecker(spd); //sc.in IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_30,null); //往spellcheck目录下写索引-------------- sc.indexDictionary(ld, iwc, true); sc.close(); long end = System.currentTimeMillis(); System.out.println("索引完毕,耗时:"+(end-start)+"ms"); }
//构建Directory Directory spd = FSDirectory.open(new File(SPELL_CHECK_FOLDER)); //实例化 spellcheck组件 SpellChecker sc = new SpellChecker(spd); //根据输入关键字 获得N条最相近的几率 第三个鄙视精确度 越大越匹配 安装实际需要调整 String[] suggests = sc.suggestSimilar(wd, 10,0.6f); if(suggests!=null){ for(String word:suggests){ System.out.println("Dou you mean:"+word); } }
相关算法:默认是 LevensteinDistance 。