码迷,mamicode.com
首页 > Web开发 > 详细

lucene.net 3.0.3、结合盘古分词进行搜索的小例子(转)

时间:2014-10-17 02:44:13      阅读:402      评论:0      收藏:0      [点我收藏+]

标签:Lucene   style   blog   http   color   io   os   使用   ar   

 

添加:2013-12-25

更新:2013-12-26 新增分页功能。

更新:2013-12-27 新增按分类查询功能,调整索引行新增记录的图片字段。


//封装类

 

  1. using System;  
  2. using System.Collections.Generic;  
  3. using System.Linq;  
  4. using System.Web;  
  5. using Lucene.Net.Analysis;  
  6. using Lucene.Net.Index;  
  7. using Lucene.Net.Documents;  
  8. using System.Reflection;  
  9. using Lucene.Net.QueryParsers;  
  10. using Lucene.Net.Search;  
  11. namespace SearchTest  
  12. {  
  13.     /// <summary>  
  14.     /// 盘古分词在lucene.net中的使用帮助类  
  15.     /// 调用PanGuLuceneHelper.instance  
  16.     /// </summary>  
  17.     public class PanGuLuceneHelper  
  18.     {  
  19.         private PanGuLuceneHelper() { }  
  20.  
  21.         #region 单一实例  
  22.         private static PanGuLuceneHelper _instance = null;  
  23.         /// <summary>  
  24.         /// 单一实例  
  25.         /// </summary>  
  26.         public static PanGuLuceneHelper instance  
  27.         {  
  28.             get  
  29.             {  
  30.                 if (_instance == null) _instance = new PanGuLuceneHelper();  
  31.                 return _instance;  
  32.             }  
  33.         }  
  34.         #endregion  
  35.  
  36.         #region 分词测试  
  37.         /// <summary>  
  38.         /// 分词测试  
  39.         /// </summary>  
  40.         /// <param name="keyword"></param>  
  41.         /// <returns></returns>  
  42.         public string Token(string keyword)  
  43.         {  
  44.             string ret = "";  
  45.             System.IO.StringReader reader = new System.IO.StringReader(keyword);  
  46.             Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader);  
  47.             bool hasNext = ts.IncrementToken();  
  48.             Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;  
  49.             while (hasNext)  
  50.             {  
  51.                 ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();  
  52.                 ret += ita.Term + "|";  
  53.                 hasNext = ts.IncrementToken();  
  54.             }  
  55.             ts.CloneAttributes();  
  56.             reader.Close();  
  57.             analyzer.Close();  
  58.             return ret;  
  59.         }  
  60.         #endregion  
  61.  
  62.         #region 创建索引  
  63.         /// <summary>  
  64.         /// 创建索引  
  65.         /// </summary>  
  66.         /// <param name="datalist"></param>  
  67.         /// <returns></returns>  
  68.         public bool CreateIndex(List<MySearchUnit> datalist)  
  69.         {  
  70.             IndexWriter writer = null;  
  71.             try  
  72.             {  
  73.                 writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示删除之前的重新写入)  
  74.             }  
  75.             catch  
  76.             {  
  77.                 writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示删除之前的重新写入)  
  78.             }  
  79.             foreach (MySearchUnit data in datalist)  
  80.             {  
  81.                 CreateIndex(writer, data);  
  82.             }  
  83.             writer.Optimize();  
  84.             writer.Dispose();  
  85.             return true;  
  86.         }  
  87.   
  88.         public bool CreateIndex(IndexWriter writer, MySearchUnit data)  
  89.         {  
  90.             try  
  91.             {  
  92.   
  93.                 if (data == nullreturn false;  
  94.                 Document doc = new Document();  
  95.                 Type type = data.GetType();//assembly.GetType("Reflect_test.PurchaseOrderHeadManageModel", true, true); //命名空间名称 + 类名      
  96.   
  97.                 //创建类的实例      
  98.                 //object obj = Activator.CreateInstance(type, true);    
  99.                 //获取公共属性      
  100.                 PropertyInfo[] Propertys = type.GetProperties();  
  101.                 for (int i = 0; i < Propertys.Length; i++)  
  102.                 {  
  103.                     //Propertys[i].SetValue(Propertys[i], i, null); //设置值  
  104.                     PropertyInfo pi = Propertys[i];  
  105.                     string name=pi.Name;  
  106.                     object objval = pi.GetValue(data, null);  
  107.                     string value = objval == null ? "" : objval.ToString(); //值  
  108.                     if (name == "id" || name=="flag" )//id在写入索引时必是不分词,否则是模糊搜索和删除,会出现混乱  
  109.                     {  
  110.                         doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//id不分词  
  111.                     }  
  112.                     else  
  113.                     {  
  114.                         doc.Add(new Field(name, value, Field.Store.YES, Field.Index.ANALYZED));  
  115.                     }  
  116.                 }  
  117.                 writer.AddDocument(doc);  
  118.             }  
  119.             catch (System.IO.FileNotFoundException fnfe)  
  120.             {  
  121.                 throw fnfe;  
  122.             }  
  123.             return true;  
  124.         }  
  125.         #endregion  
  126.  
  127.         #region 在title和content字段中查询数据  
  128.         /// <summary>  
  129.         /// 在title和content字段中查询数据  
  130.         /// </summary>  
  131.         /// <param name="keyword"></param>  
  132.         /// <returns></returns>  
  133.         public List<MySearchUnit> Search(string keyword)  
  134.         {  
  135.   
  136.             string[] fileds = { "title""content" };//查询字段  
  137.             //Stopwatch st = new Stopwatch();  
  138.             //st.Start();  
  139.             QueryParser parser = null;// new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyzer);//一个字段查询  
  140.             parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询  
  141.             Query query = parser.Parse(keyword);  
  142.             int n = 1000;  
  143.             IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读  
  144.             TopDocs docs = searcher.Search(query, (Filter)null, n);  
  145.             if (docs == null || docs.TotalHits == 0)  
  146.             {  
  147.                 return null;  
  148.             }  
  149.             else  
  150.             {  
  151.                 List<MySearchUnit> list = new List<MySearchUnit>();  
  152.                 int counter = 1;  
  153.                 foreach (ScoreDoc sd in docs.ScoreDocs)//遍历搜索到的结果  
  154.                 {  
  155.                     try  
  156.                     {  
  157.                         Document doc = searcher.Doc(sd.Doc);  
  158.                         string id = doc.Get("id");  
  159.                         string title = doc.Get("title");  
  160.                         string content = doc.Get("content");  
  161.                         string flag = doc.Get("flag");  
  162.                         string imageurl = doc.Get("imageurl");  
  163.                         string updatetime = doc.Get("updatetime");  
  164.   
  165.                         string createdate = doc.Get("createdate");  
  166.                         PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">""</font>");  
  167.                         PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());  
  168.                         highlighter.FragmentSize = 50;  
  169.                         content = highlighter.GetBestFragment(keyword, content);  
  170.                         string titlehighlight = highlighter.GetBestFragment(keyword, title);  
  171.                         if (titlehighlight != "") title = titlehighlight;  
  172.                         list.Add(new MySearchUnit(id, title, content, flag,imageurl, updatetime));  
  173.                     }  
  174.                     catch (Exception ex)  
  175.                     {  
  176.                         Console.WriteLine(ex.Message);  
  177.                     }  
  178.                     counter++;  
  179.                 }  
  180.                 return list;  
  181.             }  
  182.             //st.Stop();  
  183.             //Response.Write("查询时间:" + st.ElapsedMilliseconds + " 毫秒<br/>");  
  184.   
  185.         }  
  186.         #endregion  
  187.  
  188.         #region 在不同的分类下再根据title和content字段中查询数据(分页)  
  189.         /// <summary>  
  190.         /// 在不同的类型下再根据title和content字段中查询数据(分页)  
  191.         /// </summary>  
  192.         /// <param name="_flag">分类,传空值查询全部</param>  
  193.         /// <param name="keyword"></param>  
  194.         /// <param name="PageIndex"></param>  
  195.         /// <param name="PageSize"></param>  
  196.         /// <param name="TotalCount"></param>  
  197.         /// <returns></returns>  
  198.         public List<MySearchUnit> Search(string _flag,string keyword, int PageIndex, int PageSize, out int TotalCount)  
  199.         {  
  200.             if (PageIndex < 1) PageIndex = 1;  
  201.             //Stopwatch st = new Stopwatch();  
  202.             //st.Start();  
  203.             BooleanQuery bq = new BooleanQuery();  
  204.             if (_flag != "")  
  205.             {  
  206.                 QueryParser qpflag = new QueryParser(version, "flag", analyzer);  
  207.                 Query qflag = qpflag.Parse(_flag);  
  208.                 bq.Add(qflag, Occur.MUST);//与运算  
  209.             }  
  210.             if (keyword != "")  
  211.             {  
  212.                 string[] fileds = { "title""content" };//查询字段  
  213.                 QueryParser parser = null;// new QueryParser(version, field, analyzer);//一个字段查询  
  214.                 parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询  
  215.                 Query queryKeyword = parser.Parse(keyword);  
  216.                 bq.Add(queryKeyword, Occur.MUST);//与运算  
  217.             }  
  218.               
  219.             TopScoreDocCollector collector = TopScoreDocCollector.Create(PageIndex * PageSize, false);  
  220.             IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读  
  221.             searcher.Search(bq, collector);  
  222.             if (collector == null || collector.TotalHits == 0)  
  223.             {  
  224.                 TotalCount = 0;  
  225.                 return null;  
  226.             }  
  227.             else  
  228.             {  
  229.                 int start = PageSize * (PageIndex - 1);  
  230.                 //结束数  
  231.                 int limit = PageSize;  
  232.                 ScoreDoc[] hits = collector.TopDocs(start, limit).ScoreDocs;  
  233.                 List<MySearchUnit> list = new List<MySearchUnit>();  
  234.                 int counter = 1;  
  235.                 TotalCount = collector.TotalHits;  
  236.                 foreach (ScoreDoc sd in hits)//遍历搜索到的结果  
  237.                 {  
  238.                     try  
  239.                     {  
  240.                         Document doc = searcher.Doc(sd.Doc);  
  241.                         string id = doc.Get("id");  
  242.                         string title = doc.Get("title");  
  243.                         string content = doc.Get("content");  
  244.                         string flag = doc.Get("flag");  
  245.                         string imageurl = doc.Get("imageurl");  
  246.                         string updatetime = doc.Get("updatetime");  
  247.   
  248.                         PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">""</font>");  
  249.                         PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());  
  250.                         highlighter.FragmentSize = 50;  
  251.                         content = highlighter.GetBestFragment(keyword, content);  
  252.                         string titlehighlight = highlighter.GetBestFragment(keyword, title);  
  253.                         if (titlehighlight != "") title = titlehighlight;  
  254.                         list.Add(new MySearchUnit(id, title, content, flag,imageurl, updatetime));  
  255.                     }  
  256.                     catch (Exception ex)  
  257.                     {  
  258.                         Console.WriteLine(ex.Message);  
  259.                     }  
  260.                     counter++;  
  261.                 }  
  262.                 return list;  
  263.             }  
  264.             //st.Stop();  
  265.             //Response.Write("查询时间:" + st.ElapsedMilliseconds + " 毫秒<br/>");  
  266.   
  267.         }  
  268.         #endregion  
  269.  
  270.         #region 删除索引数据(根据id)  
  271.         /// <summary>  
  272.         /// 删除索引数据(根据id)  
  273.         /// </summary>  
  274.         /// <param name="id"></param>  
  275.         /// <returns></returns>  
  276.         public bool Delete(string id)  
  277.         {  
  278.             bool IsSuccess = false;  
  279.             Term term = new Term("id", id);  
  280.             //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);  
  281.             //Version version = new Version();  
  282.             //MultiFieldQueryParser parser = new MultiFieldQueryParser(version, new string[] { "name", "job" }, analyzer);//多个字段查询  
  283.             //Query query = parser.Parse("小王");  
  284.   
  285.             //IndexReader reader = IndexReader.Open(directory_luce, false);  
  286.             //reader.DeleteDocuments(term);  
  287.             //Response.Write("删除记录结果: " + reader.HasDeletions + "<br/>");  
  288.             //reader.Dispose();  
  289.   
  290.             IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);  
  291.             writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query);  
  292.             ////writer.DeleteAll();  
  293.             writer.Commit();  
  294.             //writer.Optimize();//  
  295.             IsSuccess = writer.HasDeletions();  
  296.             writer.Dispose();  
  297.             return IsSuccess;  
  298.         }  
  299.         #endregion  
  300.  
  301.         #region 删除全部索引数据  
  302.         /// <summary>  
  303.         /// 删除全部索引数据  
  304.         /// </summary>  
  305.         /// <returns></returns>  
  306.         public bool DeleteAll()  
  307.         {  
  308.             bool IsSuccess = true;  
  309.             try  
  310.             {  
  311.                 IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);  
  312.                 writer.DeleteAll();  
  313.                 writer.Commit();  
  314.                 //writer.Optimize();//  
  315.                 IsSuccess = writer.HasDeletions();  
  316.                 writer.Dispose();  
  317.             }  
  318.             catch  
  319.             {  
  320.                 IsSuccess = false;  
  321.             }  
  322.             return IsSuccess;  
  323.         }  
  324.         #endregion  
  325.  
  326.         #region directory_luce  
  327.         private Lucene.Net.Store.Directory _directory_luce = null;  
  328.         /// <summary>  
  329.         /// Lucene.Net的目录-参数  
  330.         /// </summary>  
  331.         public Lucene.Net.Store.Directory directory_luce  
  332.         {  
  333.             get  
  334.             {  
  335.                 if (_directory_luce == null) _directory_luce = Lucene.Net.Store.FSDirectory.Open(directory);  
  336.                 return _directory_luce;  
  337.             }  
  338.         }   
  339.         #endregion  
  340.  
  341.         #region directory  
  342.         private System.IO.DirectoryInfo _directory = null;  
  343.         /// <summary>  
  344.         /// 索引在硬盘上的目录  
  345.         /// </summary>  
  346.         public System.IO.DirectoryInfo directory  
  347.         {  
  348.             get  
  349.             {  
  350.                 if (_directory == null)  
  351.                 {  
  352.                     string dirPath = AppDomain.CurrentDomain.BaseDirectory + "SearchIndex";  
  353.                     if (System.IO.Directory.Exists(dirPath) == false) _directory = System.IO.Directory.CreateDirectory(dirPath);  
  354.                     else _directory = new System.IO.DirectoryInfo(dirPath);  
  355.                 }  
  356.                 return _directory;  
  357.             }  
  358.         }   
  359.         #endregion  
  360.  
  361.         #region analyzer  
  362.         private Analyzer _analyzer = null;  
  363.         /// <summary>  
  364.         /// 分析器  
  365.         /// </summary>  
  366.         public Analyzer analyzer  
  367.         {  
  368.             get  
  369.             {  
  370.                 //if (_analyzer == null)  
  371.                 {  
  372.                     _analyzer = new Lucene.Net.Analysis.PanGu.PanGuAnalyzer();//盘古分词分析器  
  373.                     //_analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);//标准分析器  
  374.                 }  
  375.                 return _analyzer;  
  376.             }  
  377.         }   
  378.         #endregion  
  379.  
  380.         #region version  
  381.         private static Lucene.Net.Util.Version _version = Lucene.Net.Util.Version.LUCENE_30;  
  382.         /// <summary>  
  383.         /// 版本号枚举类  
  384.         /// </summary>  
  385.         public Lucene.Net.Util.Version version  
  386.         {  
  387.             get  
  388.             {  
  389.                 return _version;  
  390.             }  
  391.         }  
  392.         #endregion  
  393.     }  
  394.  
  395.     #region 索引的一个行单元,相当于数据库中的一行数据  
  396.     /// <summary>  
  397.     /// 索引的一个行单元,相当于数据库中的一行数据  
  398.     /// </summary>  
  399.     public class MySearchUnit  
  400.     {  
  401.         public MySearchUnit(string _id, string _title, string _content, string _flag, string _imageurl, string _updatetime)  
  402.         {  
  403.             this.id = _id;  
  404.             this.title = _title;  
  405.             this.content = _content;  
  406.             this.flag = _flag;  
  407.             this.imageurl = _imageurl;  
  408.             this.updatetime = _updatetime;  
  409.         }  
  410.         /// <summary>  
  411.         /// 唯一的id号  
  412.         /// </summary>  
  413.         public string id { getset; }  
  414.         /// <summary>  
  415.         /// 标题  
  416.         /// </summary>  
  417.         public string title { getset; }  
  418.         /// <summary>  
  419.         /// 内容  
  420.         /// </summary>  
  421.         public string content { getset; }  
  422.         /// <summary>  
  423.         /// 其他信息  
  424.         /// </summary>  
  425.         public string flag { getset; }  
  426.         /// <summary>  
  427.         /// 图片路径  
  428.         /// </summary>  
  429.         public string imageurl { getset; }  
  430.         /// <summary>  
  431.         /// 时间  
  432.         /// </summary>  
  433.         public string updatetime { getset; }  
  434.     }   
  435.     #endregion  
  436. }  
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using System.Reflection;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
namespace SearchTest
{
    /// <summary>
    /// 盘古分词在lucene.net中的使用帮助类
    /// 调用PanGuLuceneHelper.instance
    /// </summary>
    public class PanGuLuceneHelper
    {
        private PanGuLuceneHelper() { }

        #region 单一实例
        private static PanGuLuceneHelper _instance = null;
        /// <summary>
        /// 单一实例
        /// </summary>
        public static PanGuLuceneHelper instance
        {
            get
            {
                if (_instance == null) _instance = new PanGuLuceneHelper();
                return _instance;
            }
        }
        #endregion

        #region 分词测试
        /// <summary>
        /// 分词测试
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public string Token(string keyword)
        {
            string ret = "";
            System.IO.StringReader reader = new System.IO.StringReader(keyword);
            Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader);
            bool hasNext = ts.IncrementToken();
            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                ret += ita.Term + "|";
                hasNext = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return ret;
        }
        #endregion

        #region 创建索引
        /// <summary>
        /// 创建索引
        /// </summary>
        /// <param name="datalist"></param>
        /// <returns></returns>
        public bool CreateIndex(List<MySearchUnit> datalist)
        {
            IndexWriter writer = null;
            try
            {
                writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示删除之前的重新写入)
            }
            catch
            {
                writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示删除之前的重新写入)
            }
            foreach (MySearchUnit data in datalist)
            {
                CreateIndex(writer, data);
            }
            writer.Optimize();
            writer.Dispose();
            return true;
        }

        public bool CreateIndex(IndexWriter writer, MySearchUnit data)
        {
            try
            {

                if (data == null) return false;
                Document doc = new Document();
                Type type = data.GetType();//assembly.GetType("Reflect_test.PurchaseOrderHeadManageModel", true, true); //命名空间名称 + 类名    

                //创建类的实例    
                //object obj = Activator.CreateInstance(type, true);  
                //获取公共属性    
                PropertyInfo[] Propertys = type.GetProperties();
                for (int i = 0; i < Propertys.Length; i++)
                {
                    //Propertys[i].SetValue(Propertys[i], i, null); //设置值
                    PropertyInfo pi = Propertys[i];
                    string name=pi.Name;
                    object objval = pi.GetValue(data, null);
                    string value = objval == null ? "" : objval.ToString(); //值
                    if (name == "id" || name=="flag" )//id在写入索引时必是不分词,否则是模糊搜索和删除,会出现混乱
                    {
                        doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//id不分词
                    }
                    else
                    {
                        doc.Add(new Field(name, value, Field.Store.YES, Field.Index.ANALYZED));
                    }
                }
                writer.AddDocument(doc);
            }
            catch (System.IO.FileNotFoundException fnfe)
            {
                throw fnfe;
            }
            return true;
        }
        #endregion

        #region 在title和content字段中查询数据
        /// <summary>
        /// 在title和content字段中查询数据
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public List<MySearchUnit> Search(string keyword)
        {

            string[] fileds = { "title", "content" };//查询字段
            //Stopwatch st = new Stopwatch();
            //st.Start();
            QueryParser parser = null;// new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyzer);//一个字段查询
            parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
            Query query = parser.Parse(keyword);
            int n = 1000;
            IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读
            TopDocs docs = searcher.Search(query, (Filter)null, n);
            if (docs == null || docs.TotalHits == 0)
            {
                return null;
            }
            else
            {
                List<MySearchUnit> list = new List<MySearchUnit>();
                int counter = 1;
                foreach (ScoreDoc sd in docs.ScoreDocs)//遍历搜索到的结果
                {
                    try
                    {
                        Document doc = searcher.Doc(sd.Doc);
                        string id = doc.Get("id");
                        string title = doc.Get("title");
                        string content = doc.Get("content");
                        string flag = doc.Get("flag");
                        string imageurl = doc.Get("imageurl");
                        string updatetime = doc.Get("updatetime");

                        string createdate = doc.Get("createdate");
                        PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");
                        PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
                        highlighter.FragmentSize = 50;
                        content = highlighter.GetBestFragment(keyword, content);
                        string titlehighlight = highlighter.GetBestFragment(keyword, title);
                        if (titlehighlight != "") title = titlehighlight;
                        list.Add(new MySearchUnit(id, title, content, flag,imageurl, updatetime));
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                    counter++;
                }
                return list;
            }
            //st.Stop();
            //Response.Write("查询时间:" + st.ElapsedMilliseconds + " 毫秒<br/>");

        }
        #endregion

        #region 在不同的分类下再根据title和content字段中查询数据(分页)
        /// <summary>
        /// 在不同的类型下再根据title和content字段中查询数据(分页)
        /// </summary>
        /// <param name="_flag">分类,传空值查询全部</param>
        /// <param name="keyword"></param>
        /// <param name="PageIndex"></param>
        /// <param name="PageSize"></param>
        /// <param name="TotalCount"></param>
        /// <returns></returns>
        public List<MySearchUnit> Search(string _flag,string keyword, int PageIndex, int PageSize, out int TotalCount)
        {
            if (PageIndex < 1) PageIndex = 1;
            //Stopwatch st = new Stopwatch();
            //st.Start();
            BooleanQuery bq = new BooleanQuery();
            if (_flag != "")
            {
                QueryParser qpflag = new QueryParser(version, "flag", analyzer);
                Query qflag = qpflag.Parse(_flag);
                bq.Add(qflag, Occur.MUST);//与运算
            }
            if (keyword != "")
            {
                string[] fileds = { "title", "content" };//查询字段
                QueryParser parser = null;// new QueryParser(version, field, analyzer);//一个字段查询
                parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
                Query queryKeyword = parser.Parse(keyword);
                bq.Add(queryKeyword, Occur.MUST);//与运算
            }
            
            TopScoreDocCollector collector = TopScoreDocCollector.Create(PageIndex * PageSize, false);
            IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读
            searcher.Search(bq, collector);
            if (collector == null || collector.TotalHits == 0)
            {
                TotalCount = 0;
                return null;
            }
            else
            {
                int start = PageSize * (PageIndex - 1);
                //结束数
                int limit = PageSize;
                ScoreDoc[] hits = collector.TopDocs(start, limit).ScoreDocs;
                List<MySearchUnit> list = new List<MySearchUnit>();
                int counter = 1;
                TotalCount = collector.TotalHits;
                foreach (ScoreDoc sd in hits)//遍历搜索到的结果
                {
                    try
                    {
                        Document doc = searcher.Doc(sd.Doc);
                        string id = doc.Get("id");
                        string title = doc.Get("title");
                        string content = doc.Get("content");
                        string flag = doc.Get("flag");
                        string imageurl = doc.Get("imageurl");
                        string updatetime = doc.Get("updatetime");

                        PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");
                        PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
                        highlighter.FragmentSize = 50;
                        content = highlighter.GetBestFragment(keyword, content);
                        string titlehighlight = highlighter.GetBestFragment(keyword, title);
                        if (titlehighlight != "") title = titlehighlight;
                        list.Add(new MySearchUnit(id, title, content, flag,imageurl, updatetime));
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                    counter++;
                }
                return list;
            }
            //st.Stop();
            //Response.Write("查询时间:" + st.ElapsedMilliseconds + " 毫秒<br/>");

        }
        #endregion

        #region 删除索引数据(根据id)
        /// <summary>
        /// 删除索引数据(根据id)
        /// </summary>
        /// <param name="id"></param>
        /// <returns></returns>
        public bool Delete(string id)
        {
            bool IsSuccess = false;
            Term term = new Term("id", id);
            //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            //Version version = new Version();
            //MultiFieldQueryParser parser = new MultiFieldQueryParser(version, new string[] { "name", "job" }, analyzer);//多个字段查询
            //Query query = parser.Parse("小王");

            //IndexReader reader = IndexReader.Open(directory_luce, false);
            //reader.DeleteDocuments(term);
            //Response.Write("删除记录结果: " + reader.HasDeletions + "<br/>");
            //reader.Dispose();

            IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
            writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query);
            ////writer.DeleteAll();
            writer.Commit();
            //writer.Optimize();//
            IsSuccess = writer.HasDeletions();
            writer.Dispose();
            return IsSuccess;
        }
        #endregion

        #region 删除全部索引数据
        /// <summary>
        /// 删除全部索引数据
        /// </summary>
        /// <returns></returns>
        public bool DeleteAll()
        {
            bool IsSuccess = true;
            try
            {
                IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
                writer.DeleteAll();
                writer.Commit();
                //writer.Optimize();//
                IsSuccess = writer.HasDeletions();
                writer.Dispose();
            }
            catch
            {
                IsSuccess = false;
            }
            return IsSuccess;
        }
        #endregion

        #region directory_luce
        private Lucene.Net.Store.Directory _directory_luce = null;
        /// <summary>
        /// Lucene.Net的目录-参数
        /// </summary>
        public Lucene.Net.Store.Directory directory_luce
        {
            get
            {
                if (_directory_luce == null) _directory_luce = Lucene.Net.Store.FSDirectory.Open(directory);
                return _directory_luce;
            }
        } 
        #endregion

        #region directory
        private System.IO.DirectoryInfo _directory = null;
        /// <summary>
        /// 索引在硬盘上的目录
        /// </summary>
        public System.IO.DirectoryInfo directory
        {
            get
            {
                if (_directory == null)
                {
                    string dirPath = AppDomain.CurrentDomain.BaseDirectory + "SearchIndex";
                    if (System.IO.Directory.Exists(dirPath) == false) _directory = System.IO.Directory.CreateDirectory(dirPath);
                    else _directory = new System.IO.DirectoryInfo(dirPath);
                }
                return _directory;
            }
        } 
        #endregion

        #region analyzer
        private Analyzer _analyzer = null;
        /// <summary>
        /// 分析器
        /// </summary>
        public Analyzer analyzer
        {
            get
            {
                //if (_analyzer == null)
                {
                    _analyzer = new Lucene.Net.Analysis.PanGu.PanGuAnalyzer();//盘古分词分析器
                    //_analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);//标准分析器
                }
                return _analyzer;
            }
        } 
        #endregion

        #region version
        private static Lucene.Net.Util.Version _version = Lucene.Net.Util.Version.LUCENE_30;
        /// <summary>
        /// 版本号枚举类
        /// </summary>
        public Lucene.Net.Util.Version version
        {
            get
            {
                return _version;
            }
        }
        #endregion
    }

    #region 索引的一个行单元,相当于数据库中的一行数据
    /// <summary>
    /// 索引的一个行单元,相当于数据库中的一行数据
    /// </summary>
    public class MySearchUnit
    {
        public MySearchUnit(string _id, string _title, string _content, string _flag, string _imageurl, string _updatetime)
        {
            this.id = _id;
            this.title = _title;
            this.content = _content;
            this.flag = _flag;
            this.imageurl = _imageurl;
            this.updatetime = _updatetime;
        }
        /// <summary>
        /// 唯一的id号
        /// </summary>
        public string id { get; set; }
        /// <summary>
        /// 标题
        /// </summary>
        public string title { get; set; }
        /// <summary>
        /// 内容
        /// </summary>
        public string content { get; set; }
        /// <summary>
        /// 其他信息
        /// </summary>
        public string flag { get; set; }
        /// <summary>
        /// 图片路径
        /// </summary>
        public string imageurl { get; set; }
        /// <summary>
        /// 时间
        /// </summary>
        public string updatetime { get; set; }
    } 
    #endregion
}





 

//调用测试


  1. protected void Page_Load(object sender, EventArgs e)  
  2.  {  
  3.      //PanGuLuceneHelper.instance.DeleteAll();//删除全部  
  4.   
  5.      //PanGuLuceneHelper.instance.Delete("1d");//根据id删除  
  6.      bool exec = false;  
  7.      if (exec)  
  8.      {  
  9.          List<MySearchUnit> list = new List<MySearchUnit>();  
  10.          list.Add(new MySearchUnit("1a""标题小王""今天是小王的生日,大家都很高兴去他家喝酒,玩了一整天。"new Random().Next(1, 10).ToString(), """"));  
  11.          list.Add(new MySearchUnit("1b""标题小张""今天是小张的生日,大家都很高兴去他家喝酒,玩了几天。"new Random().Next(1, 10).ToString(), """"));  
  12.          list.Add(new MySearchUnit("1c""标题小王""今天是小王的生日,大家都很高兴去他家喝酒,玩了一整天。"new Random().Next(1, 10).ToString(), """"));  
  13.          list.Add(new MySearchUnit("1d""标题小张""今天是小张的生日,大家都很高兴去他家喝酒,玩了几天。"new Random().Next(1, 10).ToString(), """"));  
  14.          PanGuLuceneHelper.instance.CreateIndex(list);//添加索引  
  15.      }  
  16.      int count = 0;  
  17.      int PageIndex=2;  
  18.      int PageSize=4;  
  19.      string html_content = "";  
  20.      List<MySearchUnit> searchlist = PanGuLuceneHelper.instance.Search("3","小王 生日",PageIndex,PageSize,out count);  
  21.      html_content+=("查询结果:" + count + "条数据<br/>");  
  22.      if (searchlist == null || searchlist.Count==0)  
  23.      {  
  24.          html_content += ("未查询到数据。<br/>");  
  25.      }  
  26.      else  
  27.      {  
  28.          foreach (MySearchUnit data in searchlist)  
  29.          {  
  30.              html_content += (string.Format("id:{0},title:{1},content:{2},flag:{3},updatetime:{4}<br/>", data.id, data.title, data.content, data.flag, data.updatetime));  
  31.          }  
  32.      }  
  33.      html_content += (PanGuLuceneHelper.instance.version);  
  34.      div_content.InnerHtml = html_content;  
  35.  }  
       protected void Page_Load(object sender, EventArgs e)
        {
            //PanGuLuceneHelper.instance.DeleteAll();//删除全部

            //PanGuLuceneHelper.instance.Delete("1d");//根据id删除
            bool exec = false;
            if (exec)
            {
                List<MySearchUnit> list = new List<MySearchUnit>();
                list.Add(new MySearchUnit("1a", "标题小王", "今天是小王的生日,大家都很高兴去他家喝酒,玩了一整天。", new Random().Next(1, 10).ToString(), "", ""));
                list.Add(new MySearchUnit("1b", "标题小张", "今天是小张的生日,大家都很高兴去他家喝酒,玩了几天。", new Random().Next(1, 10).ToString(), "", ""));
                list.Add(new MySearchUnit("1c", "标题小王", "今天是小王的生日,大家都很高兴去他家喝酒,玩了一整天。", new Random().Next(1, 10).ToString(), "", ""));
                list.Add(new MySearchUnit("1d", "标题小张", "今天是小张的生日,大家都很高兴去他家喝酒,玩了几天。", new Random().Next(1, 10).ToString(), "", ""));
                PanGuLuceneHelper.instance.CreateIndex(list);//添加索引
            }
            int count = 0;
            int PageIndex=2;
            int PageSize=4;
            string html_content = "";
            List<MySearchUnit> searchlist = PanGuLuceneHelper.instance.Search("3","小王 生日",PageIndex,PageSize,out count);
            html_content+=("查询结果:" + count + "条数据<br/>");
            if (searchlist == null || searchlist.Count==0)
            {
                html_content += ("未查询到数据。<br/>");
            }
            else
            {
                foreach (MySearchUnit data in searchlist)
                {
                    html_content += (string.Format("id:{0},title:{1},content:{2},flag:{3},updatetime:{4}<br/>", data.id, data.title, data.content, data.flag, data.updatetime));
                }
            }
            html_content += (PanGuLuceneHelper.instance.version);
            div_content.InnerHtml = html_content;
        }







 

 

//效果:

bubuko.com,布布扣

 

 

 

第一版源码示例下载:http://download.csdn.net/detail/pukuimin1226/6768179

  最新源码示例下载:http://download.csdn.net/detail/pukuimin1226/6776049

 

百度云盘下载链接:http://pan.baidu.com/s/1o69cCD8

Lucene.Net没有判断数据重复性,同一条数据插入多少遍它就有多少条相同的数据,所以,我们人为地用id区分,在数据量大,全部重新创建索引时间长的情况下(数据量到几万以上就耗资源了,从数据库中查询出来,再写入索引,使得数据库和程序本身都增加负担),增量建立索引是很有必要的。

新增一条数据,就直接添加一条索引;

修改一条数据,先删除同一个id的索引(不管有多少个id相同的,都会一次性删除),再添加一条。

 

数据库中的id建议大家都用guid去掉“-”,还可以加日期“yyyyMMddHHmmss”这样组合,长度一致看起来美观,也充分保证唯一。

lucene.net 教程(转载)
Lucene(.net)学习
Lucene的缺点
web.config 学习之 httpHandlers
LUCENE 3.6 学习笔记
Lucene小练九——各种搜索(精确,范围,数字)
Lucene小练三——索引删除,恢复,更新
在 Asp.NET MVC 中使用 SignalR 实现推送功能
FieldCache在lucene中使用的代码解析,使用场景个人分析
Lucene3.0.1 学习笔记
 

lucene.net 3.0.3、结合盘古分词进行搜索的小例子(转)

标签:Lucene   style   blog   http   color   io   os   使用   ar   

原文地址:http://www.cnblogs.com/sumg/p/4030036.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!