首页 > 技术文章 > Lucene创建索引与搜索索引试手

StevenL 2015-10-19 16:02 原文

由于仿写的源码的版本是Lucene2.1.0,我用的Lucene已经是4.5.0了,所以像创建IndexWriter、IndexSearcher的时候源码的已经不能用了,只好自己查api摸索,所以有个老师在旁边指导该多好。


首先我创建的是中文的索引。

CJKAnalyzer是:对中文汉字,每两个字作为一个词条

StandardAnalyzer是:单个汉字作为一个词条

所以如果要查询像:“大禹”这样俩个字的词条时,用CJKAnalyzer,查询像“水”这样的词条时,需要改用StandardAnalyzer。我在这里纠结了很久不知道哪里错了。


还有就是StringField和TextField的区别。api的解释分别是:

TextFieldA field that is indexed and tokenized, without term vectors. For example this would be used on a 'body' field, that contains the bulk of a document's text.

StringField:A field that is indexed but not tokenized: the entire String value is indexed as a single token. For example this might be used for a 'country' field or an 'id' field, or any field that you intend to use for sorting or access through the field cache.

现在看看也没很多错的地方,但是写了仨小时。期间各种查api啊,还是那句话,有个老师指点一下的话,我就能少走很多弯路,节省很多时间了。唉。。。

package org.apache.lucene;

import java.awt.BorderLayout;
import java.awt.Container;
import java.awt.GridLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.Date;

import javax.swing.JButton;
import javax.swing.JFileChooser;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import javax.swing.SwingUtilities;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import com.wb.tool.FileList;
import com.wb.tool.FileText;

public class LuceneIndexer {
	
	private JTextField jtfa;
	private JButton jba;
	private JTextField jtfb;
	private JButton jbb;
	private JButton jbc;
	private static JTextArea jta;
	
	private void createAndShowGUI()
	{
		
		// 设置跨平台外观感觉
		//String lf=UIManager.getCrossPlatformLookAndFeelClassName();
		
		//GTK
		//String lf="com.sun.java.swing.plaf.gtk.GTKLookAndFeel";

		//System
		//String lf=UIManager.getSystemLookAndFeelClassName();
		
		//windows
		//String lf="com.sun.java.swing.plaf.windows.WindowsLookAndFeel";
		
		//metal
		//String lf="javax.swing.plaf.metal.MetalLookAndFeel";
		/**common use
		try
		{
			UIManager.setLookAndFeel(lf);
		}
		catch(Exception ce)
		{
			JOptionPane.showMessageDialog(null,"无法设定外观感觉!");
		}
		**/
		
		//Java感觉
		JFrame.setDefaultLookAndFeelDecorated(true);
		
		JFrame frame=new JFrame("TEST");
		frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
		
		final JFileChooser fc=new JFileChooser();
		fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
		
		Container con= frame.getContentPane();	
		con.setLayout(new BorderLayout());
		
		JPanel jpup=new JPanel();
		jpup.setLayout(new GridLayout(3,2));
		jtfa=new JTextField(30);
		jba=new JButton("选择被索引的文件存放路径");
		jba.addActionListener
		(
			new ActionListener()
			{
				public void actionPerformed(ActionEvent e)
				{
					int r=fc.showOpenDialog(null);
					if(r==JFileChooser.APPROVE_OPTION)
					{
						jtfa.setText(fc.getSelectedFile().getPath());
						jbc.setEnabled(true);
					}
				}	
			}
		);
		jtfb=new JTextField(30);
		JButton jbb=new JButton("选择索引的存放路径");
		jbb.addActionListener
		(
			new ActionListener()
			{
				public void actionPerformed(ActionEvent e)
				{
					int r=fc.showOpenDialog(null);
					if(r==JFileChooser.APPROVE_OPTION)
					{
						jtfb.setText(fc.getSelectedFile().getPath());
						jbc.setEnabled(true);
					}
				}	
			}
		);
		JLabel jl=new JLabel("");
		jbc=new JButton("建立索引");
		jbc.addActionListener
		(
			new ActionListener()
			{
				public void actionPerformed(ActionEvent e)
				{
					try
					{
						LuceneIndexerTool.index(jtfa.getText(),jtfb.getText());
						//jbc.setEnabled(false);
					}
					catch(Exception ee)
					{
						ee.printStackTrace();
						jbc.setEnabled(true);
						JOptionPane.showMessageDialog(null,"索引创建失败!");
						System.out.println(ee.getMessage());
					}
				}	
			}
		);
		jpup.add(jtfa);
		jpup.add(jba);
		jpup.add(jtfb);
		jpup.add(jbb);
		jpup.add(jl);
		jpup.add(jbc);
		
		jta=new JTextArea(10,60);
		JScrollPane jsp=new JScrollPane(jta);
		
		con.add(jpup,BorderLayout.NORTH);
		con.add(jsp,BorderLayout.CENTER);

		frame.setSize(200,100);
		frame.pack();
		frame.setVisible(true);
	}
	
	public static void main(String[] args) {
		SwingUtilities.invokeLater(
				new Runnable() {
					public void run() {
						new LuceneIndexer().createAndShowGUI();
					}
				}
		);
	}
	
	static class LuceneIndexerTool {
		
		public static void index(String filePath, String indexPath) throws IOException {
			Path path = Paths.get(indexPath);
			Directory dir = FSDirectory.open(path);
			Analyzer analyzer = new StandardAnalyzer();
			IndexWriterConfig config = new IndexWriterConfig(analyzer);
			IndexWriter writer = new IndexWriter(dir, config);
			
			String s[] = FileList.getFiles(filePath);
			int len = s.length;
			for(int i=0; i<len; i++) {
				File file = new File(s[i]);
				String ext = getExt(file);
				if((ext.equalsIgnoreCase("htm")) || (ext.equalsIgnoreCase("html"))) {
					Document doc = new Document();
					Field field;
					
					String fileName = file.getName();
					field = new TextField("fileName", fileName, Field.Store.YES);
					doc.add(field);
					
					String uri = file.getPath();
					field = new TextField("uri", uri, Field.Store.YES);
					doc.add(field);
					
					Date dt = new Date(file.lastModified());
					SimpleDateFormat sdf = new SimpleDateFormat("yyyy-mm-dd");
					String date = sdf.format(dt);
					field = new TextField("date", date, Field.Store.YES);
					doc.add(field);
					
					double l = file.length();
					String size = "";
					if(l>1024)
						size = String.valueOf(Math.floor(l/1024)) + "K";
					else
						size = String.valueOf(size) + "Bytes";
					field = new TextField("size", size, Field.Store.YES);
					doc.add(field);
					
					String text = FileText.getText(file);
					field = new TextField("text", text, Field.Store.YES);
					doc.add(field);
					
					String digest = "";
					if(text.length() > 200)
						digest = text.substring(0, 200);
					else
						digest = text;
					field = new TextField("digest", digest, Field.Store.YES);
					doc.add(field);
					
					writer.addDocument(doc);
					
					jta.setText(jta.getText() + "已经加入索引:" + file + "\n");
					
				}
			}
			writer.close();
			
		}
		
		public static String getExt(File file) {
			String s = file.getName();
			s = s.substring(s.lastIndexOf(".") + 1);
			return s;
		}
		
	}

}

</pre><pre name="code" class="java"><pre name="code" class="java">package org.apache.lucene;

import java.awt.BorderLayout;
import java.awt.Container;
import java.awt.FlowLayout;
import java.awt.GridLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Path;
import java.nio.file.Paths;

import javax.swing.JButton;
import javax.swing.JFileChooser;
import javax.swing.JFrame;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import javax.swing.SwingUtilities;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class LuceneSearcher {

	private JTextField jtfa;
	private JButton jba;
	private JTextField jtfb;
	private JButton jbb;
	private JButton jbc;
	private static JTextArea jta;
	private JTextField jtfc;
	private JButton jbd;
	private JButton jbe;
	
	private void createAndShowGUI()
	{
		
		// 设置跨平台外观感觉
		//String lf=UIManager.getCrossPlatformLookAndFeelClassName();
		
		//GTK
		//String lf="com.sun.java.swing.plaf.gtk.GTKLookAndFeel";

		//System
		//String lf=UIManager.getSystemLookAndFeelClassName();
		
		//windows
		//String lf="com.sun.java.swing.plaf.windows.WindowsLookAndFeel";
		
		//metal
		//String lf="javax.swing.plaf.metal.MetalLookAndFeel";
		/**common use
		try
		{
			UIManager.setLookAndFeel(lf);
		}
		catch(Exception ce)
		{
			JOptionPane.showMessageDialog(null,"无法设定外观感觉!");
		}
		**/
		
		//Java感觉
		JFrame.setDefaultLookAndFeelDecorated(true);
		
		JFrame frame=new JFrame("Tianen Searcher! yutianen@163.com");
		frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
		
		final JFileChooser fc=new JFileChooser();
		fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
		
		Container con= frame.getContentPane();	
		con.setLayout(new BorderLayout());
		
		JPanel jpup=new JPanel();
		jpup.setLayout(new GridLayout(2,2));
		jtfa=new JTextField(30);
		jba=new JButton("选择索引的存放路径");
		jba.addActionListener
		(
			new ActionListener()
			{
				public void actionPerformed(ActionEvent e)
				{
					int r=fc.showOpenDialog(null);
					if(r==JFileChooser.APPROVE_OPTION)
					{
						jtfa.setText(fc.getSelectedFile().getPath());
					}
				}	
			}
		);
		
		jtfb=new JTextField(30);
		JButton jbb=new JButton("搜索");
		jbb.addActionListener
		(
			new ActionListener()
			{
				public void actionPerformed(ActionEvent e)
				{
					try
					{
						String indexPath=jtfa.getText();
						String phrase=jtfb.getText();
						new LuceneSearcherTool().search(phrase,indexPath);
System.out.println("123");
					}
					catch(Exception ex)
					{
						JOptionPane.showMessageDialog(null,"搜索失败!","提示",JOptionPane.ERROR_MESSAGE);
					}
				}	
			}
		);
		jpup.add(jtfa);
		jpup.add(jba);
		jpup.add(jtfb);
		jpup.add(jbb);
		
		jta=new JTextArea(10,30);	
		JScrollPane jsp=new JScrollPane(jta);
		
		JPanel jpdown=new JPanel();
		jpdown.setLayout(new FlowLayout());		
		jtfc=new JTextField(35);
		jbd=new JButton("设定导出路径");
		fc.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
		jbd.addActionListener
		(
			new ActionListener()
			{
				public void actionPerformed(ActionEvent e)
				{
					int r=fc.showOpenDialog(null);
					if(r==JFileChooser.APPROVE_OPTION)
					{
						jtfc.setText(fc.getSelectedFile().getPath());
					}
				}	
			}
		);
		jbe=new JButton("导出搜索结果");
		jbe.addActionListener
		(
			new ActionListener()
			{
				public void actionPerformed(ActionEvent e)
				{
					try
					{
						File f=new File(jtfc.getText());
						FileWriter fw=new FileWriter(f);
						PrintWriter pw=new PrintWriter(fw);
						pw.write(jta.getText());
						pw.flush();
						pw.close();							
						JOptionPane.showMessageDialog(null,"写入文件成功!","提示",JOptionPane.INFORMATION_MESSAGE);
					}
					catch(IOException ioe)
					{
						JOptionPane.showMessageDialog(null,"写入文件失败!","提示",JOptionPane.ERROR_MESSAGE);
					}
				}	
			}
		);
		jpdown.add(jtfc);
		jpdown.add(jbd);
		jpdown.add(jbe);
		
		con.add(jpup,BorderLayout.NORTH);
		con.add(jsp,BorderLayout.CENTER);
		con.add(jpdown,BorderLayout.SOUTH);

		frame.setSize(200,100);
		frame.pack();
		frame.setVisible(true);
	}
	
	public static void main(String[] args) {
		SwingUtilities.invokeLater(
			new Runnable() {
				public void run() {
					new LuceneSearcher().createAndShowGUI();
				}
			}
		);
	}

	static class LuceneSearcherTool {
		
		public void search(String phrase, String indexPath) throws IOException, ParseException {
			Path path = Paths.get(indexPath);
			Directory dir = FSDirectory.open(path);
			IndexReader ir = DirectoryReader.open(dir);
			IndexSearcher is = new IndexSearcher(ir);
			Analyzer analyzer = new StandardAnalyzer();
			QueryParser parser = new QueryParser("text", analyzer);
			Query query = parser.parse(phrase);
			TopDocs hits = is.search(query, 10);

			for(ScoreDoc scoreDoc: hits.scoreDocs) {

				Document doc = is.doc(scoreDoc.doc);

				if(doc == null)
					continue;
		
				Field field = (Field) doc.getField("fileName");
				String fileName = field.stringValue();
				
				field = (Field) doc.getField("uri");
				String uri = field.stringValue();

				field = (Field) doc.getField("date");
				String date = field.stringValue();

				field = (Field) doc.getField("digest");
				String digest = field.stringValue();
	
				StringBuffer sb = new StringBuffer();
				sb.append("URI:" + uri + "\n");
				sb.append("filename:" + fileName + "\n");
				sb.append("date:" + date + "\n");
				sb.append("digest:" + digest + "\n");
				sb.append("------------------------------------\n");
				
				jta.setText(jta.getText() + sb.toString());
				
			}

			ir.close();
			dir.close();
		}
		
	}
	
}





推荐阅读