您的位置:首页 > 其它

Lucene创建索引与搜索索引试手

2015-10-19 16:02 399 查看
由于仿写的源码的版本是Lucene2.1.0,我用的Lucene已经是4.5.0了,所以像创建IndexWriter、IndexSearcher的时候源码的已经不能用了,只好自己查api摸索,所以有个老师在旁边指导该多好。

首先我创建的是中文的索引。

CJKAnalyzer是:对中文汉字,每两个字作为一个词条

StandardAnalyzer是:单个汉字作为一个词条

所以如果要查询像:“大禹”这样俩个字的词条时,用CJKAnalyzer,查询像“水”这样的词条时,需要改用StandardAnalyzer。我在这里纠结了很久不知道哪里错了。

还有就是StringField和TextField的区别。api的解释分别是:

TextField:A field that is indexed and tokenized, without term vectors. For example this would be
used on a 'body' field, that contains the bulk of a document's text.

StringField:A field that is indexed but not tokenized: the entire String value is indexed as a single token.
For example this might be used for a 'country' field or an 'id' field, or any field that you intend to use for sorting or access through the field cache.

现在看看也没很多错的地方,但是写了仨小时。期间各种查api啊,还是那句话,有个老师指点一下的话,我就能少走很多弯路,节省很多时间了。唉。。。

package org.apache.lucene;

import java.awt.BorderLayout;
import java.awt.Container;
import java.awt.GridLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.Date;

import javax.swing.JButton;
import javax.swing.JFileChooser;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import javax.swing.SwingUtilities;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import com.wb.tool.FileList;
import com.wb.tool.FileText;

public class LuceneIndexer {

private JTextField jtfa;
private JButton jba;
private JTextField jtfb;
private JButton jbb;
private JButton jbc;
private static JTextArea jta;

private void createAndShowGUI()
{

// 设置跨平台外观感觉
//String lf=UIManager.getCrossPlatformLookAndFeelClassName();

//GTK
//String lf="com.sun.java.swing.plaf.gtk.GTKLookAndFeel";

//System
//String lf=UIManager.getSystemLookAndFeelClassName();

//windows
//String lf="com.sun.java.swing.plaf.windows.WindowsLookAndFeel";

//metal
//String lf="javax.swing.plaf.metal.MetalLookAndFeel";
/**common use
try
{
UIManager.setLookAndFeel(lf);
}
catch(Exception ce)
{
JOptionPane.showMessageDialog(null,"无法设定外观感觉!");
}
**/

//Java感觉
JFrame.setDefaultLookAndFeelDecorated(true);

JFrame frame=new JFrame("TEST");
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

final JFileChooser fc=new JFileChooser();
fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);

Container con= frame.getContentPane();
con.setLayout(new BorderLayout());

JPanel jpup=new JPanel();
jpup.setLayout(new GridLayout(3,2));
jtfa=new JTextField(30);
jba=new JButton("选择被索引的文件存放路径");
jba.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
int r=fc.showOpenDialog(null);
if(r==JFileChooser.APPROVE_OPTION)
{
jtfa.setText(fc.getSelectedFile().getPath());
jbc.setEnabled(true);
}
}
}
);
jtfb=new JTextField(30);
JButton jbb=new JButton("选择索引的存放路径");
jbb.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
int r=fc.showOpenDialog(null);
if(r==JFileChooser.APPROVE_OPTION)
{
jtfb.setText(fc.getSelectedFile().getPath());
jbc.setEnabled(true);
}
}
}
);
JLabel jl=new JLabel("");
jbc=new JButton("建立索引");
jbc.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
try
{
LuceneIndexerTool.index(jtfa.getText(),jtfb.getText());
//jbc.setEnabled(false);
}
catch(Exception ee)
{
ee.printStackTrace();
jbc.setEnabled(true);
JOptionPane.showMessageDialog(null,"索引创建失败!");
System.out.println(ee.getMessage());
}
}
}
);
jpup.add(jtfa);
jpup.add(jba);
jpup.add(jtfb);
jpup.add(jbb);
jpup.add(jl);
jpup.add(jbc);

jta=new JTextArea(10,60);
JScrollPane jsp=new JScrollPane(jta);

con.add(jpup,BorderLayout.NORTH);
con.add(jsp,BorderLayout.CENTER);

frame.setSize(200,100);
frame.pack();
frame.setVisible(true);
}

public static void main(String[] args) {
SwingUtilities.invokeLater(
new Runnable() {
public void run() {
new LuceneIndexer().createAndShowGUI();
}
}
);
}

static class LuceneIndexerTool {

public static void index(String filePath, String indexPath) throws IOException {
Path path = Paths.get(indexPath);
Directory dir = FSDirectory.open(path);
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(dir, config);

String s[] = FileList.getFiles(filePath);
int len = s.length;
for(int i=0; i<len; i++) {
File file = new File(s[i]);
String ext = getExt(file);
if((ext.equalsIgnoreCase("htm")) || (ext.equalsIgnoreCase("html"))) {
Document doc = new Document();
Field field;

String fileName = file.getName();
field = new TextField("fileName", fileName, Field.Store.YES);
doc.add(field);

String uri = file.getPath();
field = new TextField("uri", uri, Field.Store.YES);
doc.add(field);

Date dt = new Date(file.lastModified());
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-mm-dd");
String date = sdf.format(dt);
field = new TextField("date", date, Field.Store.YES);
doc.add(field);

double l = file.length();
String size = "";
if(l>1024)
size = String.valueOf(Math.floor(l/1024)) + "K";
else
size = String.valueOf(size) + "Bytes";
field = new TextField("size", size, Field.Store.YES);
doc.add(field);

String text = FileText.getText(file);
field = new TextField("text", text, Field.Store.YES);
doc.add(field);

String digest = "";
if(text.length() > 200)
digest = text.substring(0, 200);
else
digest = text;
field = new TextField("digest", digest, Field.Store.YES);
doc.add(field);

writer.addDocument(doc);

jta.setText(jta.getText() + "已经加入索引:" + file + "\n");

}
}
writer.close();

}

public static String getExt(File file) {
String s = file.getName();
s = s.substring(s.lastIndexOf(".") + 1);
return s;
}

}

}
</pre><pre name="code" class="java"><pre name="code" class="java">package org.apache.lucene;

import java.awt.BorderLayout;
import java.awt.Container;
import java.awt.FlowLayout;
import java.awt.GridLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Path;
import java.nio.file.Paths;

import javax.swing.JButton;
import javax.swing.JFileChooser;
import javax.swing.JFrame;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import javax.swing.SwingUtilities;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class LuceneSearcher {

private JTextField jtfa;
private JButton jba;
private JTextField jtfb;
private JButton jbb;
private JButton jbc;
private static JTextArea jta;
private JTextField jtfc;
private JButton jbd;
private JButton jbe;

private void createAndShowGUI()
{

// 设置跨平台外观感觉
//String lf=UIManager.getCrossPlatformLookAndFeelClassName();

//GTK
//String lf="com.sun.java.swing.plaf.gtk.GTKLookAndFeel";

//System
//String lf=UIManager.getSystemLookAndFeelClassName();

//windows
//String lf="com.sun.java.swing.plaf.windows.WindowsLookAndFeel";

//metal
//String lf="javax.swing.plaf.metal.MetalLookAndFeel";
/**common use
try
{
UIManager.setLookAndFeel(lf);
}
catch(Exception ce)
{
JOptionPane.showMessageDialog(null,"无法设定外观感觉!");
}
**/

//Java感觉
JFrame.setDefaultLookAndFeelDecorated(true);

JFrame frame=new JFrame("Tianen Searcher! yutianen@163.com");
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

final JFileChooser fc=new JFileChooser();
fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);

Container con= frame.getContentPane();
con.setLayout(new BorderLayout());

JPanel jpup=new JPanel();
jpup.setLayout(new GridLayout(2,2));
jtfa=new JTextField(30);
jba=new JButton("选择索引的存放路径");
jba.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
int r=fc.showOpenDialog(null);
if(r==JFileChooser.APPROVE_OPTION)
{
jtfa.setText(fc.getSelectedFile().getPath());
}
}
}
);

jtfb=new JTextField(30);
JButton jbb=new JButton("搜索");
jbb.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
try
{
String indexPath=jtfa.getText();
String phrase=jtfb.getText();
new LuceneSearcherTool().search(phrase,indexPath);
System.out.println("123");
}
catch(Exception ex)
{
JOptionPane.showMessageDialog(null,"搜索失败!","提示",JOptionPane.ERROR_MESSAGE);
}
}
}
);
jpup.add(jtfa);
jpup.add(jba);
jpup.add(jtfb);
jpup.add(jbb);

jta=new JTextArea(10,30);
JScrollPane jsp=new JScrollPane(jta);

JPanel jpdown=new JPanel();
jpdown.setLayout(new FlowLayout());
jtfc=new JTextField(35);
jbd=new JButton("设定导出路径");
fc.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
jbd.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
int r=fc.showOpenDialog(null);
if(r==JFileChooser.APPROVE_OPTION)
{
jtfc.setText(fc.getSelectedFile().getPath());
}
}
}
);
jbe=new JButton("导出搜索结果");
jbe.addActionListener
(
new ActionListener()
{
public void actionPerformed(ActionEvent e)
{
try
{
File f=new File(jtfc.getText());
FileWriter fw=new FileWriter(f);
PrintWriter pw=new PrintWriter(fw);
pw.write(jta.getText());
pw.flush();
pw.close();
JOptionPane.showMessageDialog(null,"写入文件成功!","提示",JOptionPane.INFORMATION_MESSAGE);
}
catch(IOException ioe)
{
JOptionPane.showMessageDialog(null,"写入文件失败!","提示",JOptionPane.ERROR_MESSAGE);
}
}
}
);
jpdown.add(jtfc);
jpdown.add(jbd);
jpdown.add(jbe);

con.add(jpup,BorderLayout.NORTH);
con.add(jsp,BorderLayout.CENTER);
con.add(jpdown,BorderLayout.SOUTH);

frame.setSize(200,100);
frame.pack();
frame.setVisible(true);
}

public static void main(String[] args) {
SwingUtilities.invokeLater(
new Runnable() {
public void run() {
new LuceneSearcher().createAndShowGUI();
}
}
);
}

static class LuceneSearcherTool {

public void search(String phrase, String indexPath) throws IOException, ParseException {
Path path = Paths.get(indexPath);
Directory dir = FSDirectory.open(path);
IndexReader ir = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(ir);
Analyzer analyzer = new StandardAnalyzer();
QueryParser parser = new QueryParser("text", analyzer);
Query query = parser.parse(phrase);
TopDocs hits = is.search(query, 10);

for(ScoreDoc scoreDoc: hits.scoreDocs) {

Document doc = is.doc(scoreDoc.doc);

if(doc == null)
continue;

Field field = (Field) doc.getField("fileName");
String fileName = field.stringValue();

field = (Field) doc.getField("uri");
String uri = field.stringValue();

field = (Field) doc.getField("date");
String date = field.stringValue();

field = (Field) doc.getField("digest");
String digest = field.stringValue();

StringBuffer sb = new StringBuffer();
sb.append("URI:" + uri + "\n");
sb.append("filename:" + fileName + "\n");
sb.append("date:" + date + "\n");
sb.append("digest:" + digest + "\n");
sb.append("------------------------------------\n");

jta.setText(jta.getText() + sb.toString());

}

ir.close();
dir.close();
}

}

}



                                            
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: