标签:
1.站内搜索
1.1Lucene.Net建立信息索引
1 string indexPath = @"E:\xxx\xxx";//索引保存路径 2 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory()); 3 bool isUpdate = IndexReader.IndexExists(directory); 4 if (isUpdate) 5 { 6 //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁 7 if (IndexWriter.IsLocked(directory)) 8 { 9 IndexWriter.Unlock(directory); 10 } 11 } 12 IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED); 13 14 for (int i = 4939; i <= 6087; i++) 15 { 16 //因为从服务器下载页面有可能失败,为了避免失败时程序终止,所以要处理异常,写入日志 17 //这里能预知的异常是服务器下载失败异常,WebException 18 try 19 { 20 WebClient wc = new WebClient(); 21 wc.Encoding = Encoding.UTF8; 22 string url = "http://localhost:3448/Book.aspx?id=" + i; 23 string html = wc.DownloadString(url); 24 25 HTMLDocumentClass htmlDoc = new HTMLDocumentClass(); 26 htmlDoc.designMode = "on"; //不让解析引擎去尝试运行javascript 27 htmlDoc.IHTMLDocument2_write(html); 28 htmlDoc.close(); 29 30 string title = htmlDoc.title; 31 string content = ""; 32 if (htmlDoc.getElementById("ctl00_ContentPlaceHolder1_DetailsView1_txtContent") != null) 33 { 34 if (htmlDoc.getElementById("ctl00_ContentPlaceHolder1_DetailsView1_txtContent").innerText != null) 35 { 36 content = htmlDoc.getElementById("ctl00_ContentPlaceHolder1_DetailsView1_txtContent").innerText; 37 } 38 } 39 //为避免重复索引,所以要先删除"url"=url的记录,再重新添加 40 writer.DeleteDocuments(new Term("url", url)); 41 42 Document document = new Document(); 43 document.Add(new Field("url", url, Field.Store.YES, Field.Index.NOT_ANALYZED)); 44 document.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); 45 document.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); 46 writer.AddDocument(document); 47 48 logger.Debug("索引" + i + "完毕"); 49 } 50 catch (WebException webe) 51 { 52 logger.Error(webe.Message); 53 } 54 } 55 writer.Close(); 56 directory.Close();//不要忘了Close,否则索引结果搜不到 57 logger.Debug("全部索引完毕");
1.2盘古分词并高亮
1 public List<SearchContentResult> GetSearchContentResult(string kw, int startIndex,int pageSize, out int count) 2 { 3 string indexPath = @"E:xxx\xxx\index"; 4 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); 5 IndexReader reader = IndexReader.Open(directory, true); 6 IndexSearcher searcher = new IndexSearcher(reader); 7 8 //将用户搜索的关键字进行分词 9 string[] strs = CommonHelper.FenCi(kw.ToLower()); 10 PhraseQuery query = new PhraseQuery(); 11 foreach (string str in strs) 12 { 13 query.Add(new Term("content", str)); 14 } 15 query.SetSlop(100); 16 17 TopScoreDocCollector collector = TopScoreDocCollector.create(2000, true); 18 searcher.Search(query, null, collector); 19 count = collector.GetTotalHits(); 20 ScoreDoc[] docs = collector.TopDocs(startIndex,pageSize).scoreDocs; 21 List<SearchContentResult> scs = new List<SearchContentResult>(); 22 for (int i = 0; i < docs.Length; i++) 23 { 24 int docId = docs[i].doc; 25 Document doc = searcher.Doc(docId); 26 SearchContentResult sc = new SearchContentResult(); 27 sc.Url = doc.Get("url"); 28 sc.Title = doc.Get("title"); 29 sc.Body = highLight(kw, doc.Get("content")); 30 scs.Add(sc); 31 } 32 return scs; 33 } 34 35 private static String highLight(string keyword, String content) 36 { 37 PanGu.HighLight.SimpleHTMLFormatter formatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=‘red‘>", "</font>"); 38 PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(formatter, new Segment()); 39 highlighter.FragmentSize = 500; 40 string msg = highlighter.GetBestFragment(keyword, content); 41 if (string.IsNullOrEmpty(msg)) 42 { 43 return content; 44 } 45 else 46 { 47 return msg; 48 } 49 }
2.Quartz.Net定时任务
在Global类中声明一个静态变量
static IScheduler sched;
保证其在系统中是唯一的
1 //建立一个Quartz任务 2 ISchedulerFactory sf = new StdSchedulerFactory(); 3 sched = sf.GetScheduler(); 4 JobDetail job = new JobDetail("job1", "group1", typeof(IndexJob));//IndexJob为实现了IJob接口的类 5 6 Trigger trigger = TriggerUtils.MakeDailyTrigger("trigger", 10, 46); 7 trigger.JobGroup = "group1"; 8 trigger.JobName = "job1"; 9 10 sched.AddJob(job, true); 11 sched.ScheduleJob(trigger); 12 sched.Start();
添加任务类,并继承接口
1 public class IndexJob : IJob 2 { 3 private static ILog logger = LogManager.GetLogger(typeof(IndexJob)); 4 public void Execute(JobExecutionContext context) 5 { 6 //此处写执行的代码 7 } 8 }
标签:
原文地址:http://www.cnblogs.com/sunniest/p/4396544.html