您的位置：首页 > 数据库

用lucene3.6搜索数据库和txt文件内容

2012-01-20 14:51 190 查看

我们以前经常碰到搜索数据库的内容；用like ％的sql语句；如果数据量大而且多表查询时；速度实在让人难以忍受。。。如果用lucene3.6那就可以把这个恼人的问题解决了。

lucene3.6搜索photo表的title，username，tagname，desr内容；

用一个例题来说明更直观；此例题能搜索中文分词；

（需要mysql5的jdbc包和lucene3.5的包）：

1、数据库我用mysql5；建一个photo表；数据库名是test。

--

-- 表的结构 photo

--

CREATE TABLE IF NOT EXISTS 'photo'(

'photo_id' int(10) unsigned NOT NULL AUTO_INCREMENT,

'title' varchar(11) DEFAULT NULL,

'descr' text,

'user_name' varchar(11) DEFAULT NULL,

'tag_name' varchar(11) DEFAULT NULL,

PRIMARY KEY ('photo_id')

) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=REDUNDANT AUTO_INCREMENT=5 ;

--

-- 导出表中的数据 'photo'

--

INSERT INTO 'photo' ('photo_id', 'title', 'descr', 'user_name', 'tag_name') VALUES

(1, 'z美女j', 'h美女h', 't好人5', 'g美女femal'),

(2, 'l美女k', '5美女', '6美女', 'd美女female'),

(3, 'hagh', '4说的就是我的是f', '', NULL),

(4, 'hagh', '2说的就是我的是g', ' ', NULL);

2、java文件有4个：

文件Photo.java是数据库的photo表的操作文件；内容如下：

package test;

import java.sql.Connection;

import java.util.ArrayList;

import java.s
cdc1
ql.PreparedStatement;

import java.sql.ResultSet;

import java.sql.SQLException;

public class Photo {

private long photoId;

private String title;

private String description;

private String userName;

private String tag;

public String getDescription() {

return description;

}

public void setDescription(String description) {

this.description = description;

}

public long getPhotoId() {

return photoId;

}

public void setPhotoId(long photoId) {

this.photoId = photoId;

}

public String getTag() {

return tag;

}

public void setTag(String tag) {

this.tag = tag;

}

public String getTitle() {

return title;

}

public void setTitle(String title) {

this.title = title;

}

public String getUserName() {

return userName;

}

public void setUserName(String userName) {

this.userName = userName;

}

public static Photo[] loadPhotos(Connection con) throws Exception {

ArrayList<Photo> list = new ArrayList<Photo>();

PreparedStatement pstm = null;

ResultSet rs = null;

String sql = "select photo_id,title,descr,user_name,tag_name from photo";

try {

   pstm = con.prepareStatement(sql);

   rs = pstm.executeQuery();

   while (rs.next()) {

    Photo photo = new Photo();

    photo.setPhotoId(rs.getLong(1));

    photo.setTitle(rs.getString(2));

    photo.setDescription(rs.getString(3));

    photo.setUserName(rs.getString(4));

    photo.setTag(rs.getString(5));

    list.add(photo);

   }

} catch (SQLException e) {

   e.printStackTrace();

} finally {

   if (rs != null) {

    rs.close();

   }

   if (pstm != null) {

    pstm.close();

   }

}

return (Photo[]) list.toArray(new Photo[list.size()]);

}

}

文件IndexerFile.java是把数据库的内容备份成索引文件到磁盘中去；

内容如下：

package test;

import java.io.File;

import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.index.IndexWriterConfig.OpenMode;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

import org.apache.lucene.document.Field;

public class IndexerFile {

public static int indexFile(String indexDir,Photo[] list) throws IOException{

IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));

     conf.setOpenMode(OpenMode.CREATE);

     IndexWriter writer = new IndexWriter(FSDirectory.open(new File(indexDir)), conf);

for(int i=0;i<list.length;i++){

   Document doc=new Document();

   doc.add(new Field("photoId", String.valueOf(list[i].getPhotoId()), Field.Store.YES, Field.Index.NO));

   if(list[i].getTitle()!=null && list[i].getTitle().length()>0)

    doc.add(new Field("title", list[i].getTitle(), Field.Store.YES, Field.Index.ANALYZED));

   if(list[i].getDescription()!=null && list[i].getDescription().length()>0)

    doc.add(new Field("description", list[i].getDescription(), Field.Store.YES, Field.Index.ANALYZED));

   if(list[i].getUserName()!= null && list[i].getUserName().length()>0)

   doc.add(new Field("userName", list[i].getUserName(), Field.Store.YES, Field.Index.ANALYZED));

   if(list[i].getTag()!= null && list[i].getTag().length()>0)

    doc.add(new Field("tag", list[i].getTag(), Field.Store.YES, Field.Index.ANALYZED));

   writer.addDocument(doc);

}

int numIndexed = writer.maxDoc();

writer.forceMerge(1);

writer.close();

return numIndexed;

}

}

文件SearcherFile.java是搜索磁盘索引文件内容的；

内容如下：

package test;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.queryParser.MultiFieldQueryParser;

import org.apache.lucene.queryParser.ParseException;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.util.Version;

public class SearcherFile {

public static void search(IndexSearcher searcher, String[] q) throws IOException, ParseException {

Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);

String[] fields = {"title","description","tag","userName"};

        Query query = MultiFieldQueryParser.parse(Version.LUCENE_36, q, fields, analyzer);

        TopDocs topDocs = searcher.search(query, 100);//100是显示队列的Size

         ScoreDoc[] hits = topDocs.scoreDocs;

         System.out.println("共有" + searcher.maxDoc() + "条索引，命中" + hits.length + "条");

         for (int i = 0; i < hits.length; i++) {

             int DocId = hits[i].doc;

             Document document = searcher.doc(DocId);

             System.out.println("photoId==="+document.get("photoId"));

         }

}

}

文件TestDb.java是操作的主文件；

内容如下：

package test;

import java.io.File;

import java.io.IOException;

import java.sql.Connection;

import java.sql.SQLException;

import java.util.Date;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.queryParser.ParseException;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.store.FSDirectory;

public class TestDb {

public final static String indexDir ="E:\\TestLucene";

private static Connection getConnection() {

Connection conn = null;

String url = "jdbc:mysql://localhost:3306/test";

String userName = "root";

String password = "root";

try {

   Class.forName("com.mysql.jdbc.Driver");

   conn = java.sql.DriverManager

     .getConnection(url, userName, password);

} catch (Exception e) {

   e.printStackTrace();

   System.out.println("Error Trace in getConnection() : "

     + e.getMessage());

}

return conn;

}

public static void main(String[] args) throws IOException, ParseException, SQLException {

index();//做索引

IndexSearcher searcher=null;

try{

   IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexDir)));

      searcher = new IndexSearcher(reader);

   search(searcher);//搜索

}catch(Exception e){

   e.printStackTrace();

}finally{

   if(searcher!=null)

   searcher.close();

}

}

public static void search(IndexSearcher searcher) throws IOException, ParseException{

//以下是搜索的关键词

String[] q = {"美女1","美女2","好人3","好人5"};

long start=new Date().getTime();

SearcherFile.search(searcher,q);

long end=new Date().getTime();

System.out.println("花费时间："+(double)(end-start)/1000+"秒");

}

public static void index() throws SQLException{

Connection conn = null;

try {

   conn = getConnection();

   Photo[] list = Photo.loadPhotos(conn);

   IndexerFile.indexFile(indexDir,list);

} catch (Exception e) {

   e.printStackTrace();

} finally {

   if (conn != null) {

    conn.close();

   }

}

}

}

二、下面是lucene3.5搜索txt文本文件

建一个E:\\TestLucene\\fileS的文件夹,放需要搜索的文件。

在该文件夹里面随便建三个txt文件，"1.txt","2.txt"和"3.txt"

其中1.txt的内容如下：

老周

北京人民

2009年

2.txt和3.txt也随便写些。

再建一个E:\\TestLucene\\fileIndex的文件夹；放索引文件。

java文件TestQueryFile：内容如下

package test;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStreamReader;

import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.index.IndexWriterConfig.OpenMode;

import org.apache.lucene.queryParser.ParseException;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

import org.apache.lucene.document.Field;

public class TestQueryFile {

      public static void main(String[] args) throws Exception {

        indexF();

        String queryString = "北京";

           Query query = null;

        IndexReader reader = IndexReader.open(FSDirectory.open(new File("E:\\TestLucene\\fileIndex")));

       IndexSearcher searcher = new IndexSearcher(reader);

           String fields = "body";

           try {

            QueryParser qp = new QueryParser(Version.LUCENE_36, fields, new StandardAnalyzer(Version.LUCENE_36));//有变化的地方

               query = qp.parse(queryString);

           } catch (ParseException e) {

           }

           if (searcher != null) {

               TopDocs topDocs = searcher.search(query, 100);//100是显示队列的Size

               ScoreDoc[] hits = topDocs.scoreDocs;

               System.out.println("共有" + searcher.maxDoc() + "条索引，命中" + hits.length + "条");

           }

       }

      private static void indexF() throws Exception {



           File fileDir = new File("E:\\TestLucene\\fileS");



           File indexDir = new File("E:\\TestLucene\\fileIndex");



        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));

         conf.setOpenMode(OpenMode.CREATE);

         IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexDir), conf);



           File[] textFiles = fileDir.listFiles();

           long startTime = new Date().getTime();



           //增加document到索引去

           for (int i = 0; i < textFiles.length; i++) {

               if (textFiles[i].isFile()

                       && textFiles[i].getName().endsWith(".txt")) {

                   System.out.println("File " + textFiles[i].getCanonicalPath()

                           + "正在被索引....");

                   String temp = FileReaderAll(textFiles[i].getCanonicalPath(),

                           "GBK");

                   System.out.println(temp);

                   Document document = new Document();

                   Field FieldPath = new Field("path", textFiles[i].getPath(),

                           Field.Store.YES, Field.Index.NO);

                   Field FieldBody = new Field("body", temp, Field.Store.YES,

                           Field.Index.ANALYZED,

                           Field.TermVector.WITH_POSITIONS_OFFSETS);

                   document.add(FieldPath);

                   document.add(FieldBody);

                   indexWriter.addDocument(document);

             }

           }

           //optimize()方法是对索引进行优化

           indexWriter.forceMerge(1);

           indexWriter.close();



           //测试一下索引的时间

           long endTime = new Date().getTime();

           System.out

                   .println("这花费了"

                           + (endTime - startTime)

                           + " 毫秒来把文档增加到索引里面去!"

                           + fileDir.getPath());

       }

       private static String FileReaderAll(String FileName, String charset)

               throws IOException {

           BufferedReader reader = new BufferedReader(new InputStreamReader(

                   new FileInputStream(FileName), charset));

           String line = null;

     StringBuffer temp = new StringBuffer("");



           while ((line = reader.readLine()) != null) {

               temp.append(line);

           }

           reader.close();

           return temp.toString();

       }

}

一执行就知道结果了

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航