码迷,mamicode.com
首页 > 其他好文 > 详细

lecune入门示例

时间:2017-02-28 18:01:09      阅读:226      评论:0      收藏:0      [点我收藏+]

标签:oca   .class   public   logs   color   array   文件   sea   nts   

  注意:本示例中的lucene版本需在jdk7以上使用。

一、pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>cd.jeryzhi</groupId>
    <artifactId>luceneDemo</artifactId>
    <version>1.0</version>
    <name>${project.artifactId}</name>
    
    <dependencies>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>5.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>5.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>5.0.0</version>
        </dependency>
    </dependencies>
    
</project>

二、代码:

  

package luceneDemo;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

public class FindTxtManager {

    public static void main(String[] args) {
        
        find("C:\\Users\\Administrator\\Desktop\\新建文件夹", "Application Strategy and Integration","pdf");

    }

    public static void find(String dirPath, String findStr,String fileType) {
        try {
            Directory directory = new RAMDirectory();
            Analyzer analyzer = new StandardAnalyzer();
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            IndexWriter iwriter = new IndexWriter(directory, config);

            File[] files = new File(dirPath).listFiles();
            List<File> fileList = new ArrayList<File>();
            StringBuffer sb = new StringBuffer();
            for (File file : files) {
                if (file.getName().lastIndexOf("."+fileType) > 0) {
                    fileList.add(file);
                    String fileStr = txt2String(file);
                    sb.append(fileStr);
                    Document document = new Document();
                    document.add(new TextField("filename", file.getName(), Store.YES));
                    document.add(new TextField("content", fileStr, Store.YES));
                    document.add(new TextField("path", file.getPath(), Store.YES));
                    iwriter.addDocument(document);
                    // iwriter.commit();
                }
            }
            iwriter.close();

            DirectoryReader ireader = DirectoryReader.open(directory);
            IndexSearcher isearcher = new IndexSearcher(ireader);

            QueryParser parser = new QueryParser("content", analyzer);
            Query query = parser.parse(findStr);
            ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;

            System.out.println(hits.length);
            for (int i = 0; i < hits.length; i++) {
                Document hitDoc = isearcher.doc(hits[i].doc);
                System.out.println("____________________________");
//                System.out.println(hitDoc.get("filename"));
//                System.out.println(hitDoc.get("content"));
                System.out.println(hitDoc.get("path"));
                System.out.println("____________________________");
            }
            ireader.close();
            directory.close();

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static String txt2String(File file) {
        StringBuffer sb = new StringBuffer();
        try {
            BufferedReader br = new BufferedReader(new FileReader(file));// 构造一个BufferedReader类来读取文件
            String s = null;
            while ((s = br.readLine()) != null) {// 使用readLine方法,一次读一行
                sb.append("\n").append(s);
            }
            br.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return sb.toString();
    }
}

输出:

 

1
____________________________
C:\Users\Administrator\Desktop\新建文件夹\java.pdf
____________________________

 

lecune入门示例

标签:oca   .class   public   logs   color   array   文件   sea   nts   

原文地址:http://www.cnblogs.com/shoubianxingchen/p/6479544.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!