`
diaolanshan
  • 浏览: 173130 次
  • 性别: Icon_minigender_1
  • 来自: 苏州
社区版块
存档分类
最新评论

Lucene检索文件(txt,jsp,html)

    博客分类:
  • JAVA
阅读更多

Lucene检索文件包裹txt,jsp,html格式(如果是word和pdf格式需要进行格式转化)

建立索引文件的代码如下:

import org.apache.lucene.index.*;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.document.*;
import java.io.*;
/**
 * @author Eric Zhang
 */
public class IndexFiles {
 public static void main(String[] args) {
  try{
   IndexWriter writer = new IndexWriter("myindex", new StandardAnalyzer(), true);
    
   File files = new File("mydoc");
   String[] Fnamelist = files.list();
   for (int i = 0; i < Fnamelist.length; i++){
    File file = new File(files,Fnamelist[i]);
     
    Document doc = new Document();
    Field fld = Field.Text("path", file.getPath());
    doc.add(fld);
  
    fld = Field.Keyword("modified", DateField.timeToString(file.lastModified()));
    doc.add(fld);
      
    FileInputStream in = new FileInputStream(file);
    Reader reader = new BufferedReader(new InputStreamReader(in));
    fld = Field.Text("contents", reader);
    doc.add(fld);
   
    writer.addDocument(doc);
    System.out.println("Added : " + doc.get("path"));  
   }   
   writer.optimize();
   writer.close();
   System.out.println("Has Added Total: " + Fnamelist.length);
  }catch(Exception e){
   System.out.println(e);
  }
 }
}

 检索索引的java代码如下:

import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.search.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.document.*;
//import com.augmentum.hrms.*;
import java.util.Date;
/**
 * @author Eric Zhang
 */
public class SearchFile {
 public static void main(String[] args) {
  
  //XMap a = new XMap("");
  Analyzer anlzr = new StandardAnalyzer();
  try{
   Query q = QueryParser.parse("数据库", "contents", anlzr);
   System.out.println("Searching for : " + q.toString("contents"));
  
   Searcher serch = new IndexSearcher("myindex");
   Hits hts = serch.search(q);
   for(int i=0; i<hts.length(); i++){
    Document doc = hts.doc(i);
    String path = doc.get("path");
    System.out.println("Find: " +i+": "+ path);
    System.out.println("Find: " + doc.get("modified"));
    System.out.println("Find: " + doc.get("path"));
   }
   System.out.println("Find Total: " + hts.length());
  }catch(Exception e){
   System.out.println(e);
  }
 }
}

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics