lucene工作总结_lucene学习总结
lucene工作总结由刀豆文库小编整理,希望给你工作、学习、生活带来方便,猜你可能喜欢“lucene学习总结”。
Lucene工作总结
关键字: lucene总结
公司项目:portal中期刊文章内容作为大字段存储在Oracle中,首页有一个搜索功能:要求将所有包括搜索字段的文章的标题列出来(文章的内容存储在Oracle的CLOB字段中),也就是要用Lucene实现对数据库的大字段进行索引(索引通过计划任务定时建立索引)和搜索。。
==================定时建立索引文件:===============
Main方法: Java代码
1.package zxt.lucene.index;2.3.import java.util.Timer;4.public cla IndexerServer { 5.6.7./** 8.* 定时调用建立索引任务 9.* @author wulihai 10.* @create 2009-06-02 11.*/ 12.public static void main(String[] args){ 13.String propFile = “directory.properties”;14.Config.setConfigFileName(propFile);15.Timer timer = new Timer();16.LuceneDBIndexerTask luceneTask=LuceneDBIndexerTask.getInstance();17.timer.scheduleAtFixedRate(luceneTask, 0,DataTypeUtil.toLong(Constant.CREATE_INDEX_SLEEP_TIME));18.} 19.20.}
定时调用建立索引任务: Java代码
1.package zxt.lucene.index;2.3.import java.util.Timer;4.public cla IndexerServer { 5.6.7./** 8.* 定时调用建立索引任务 9.* @author wulihai 10.* @create 2009-06-02 11.*/ 12.public static void main(String[] args){ 13.String propFile = “directory.properties”;14.Config.setConfigFileName(propFile);15.Timer timer = new Timer();16.LuceneDBIndexerTask luceneTask=LuceneDBIndexerTask.getInstance();17.timer.scheduleAtFixedRate(luceneTask, 0,DataTypeUtil.toLong(Constant.CREATE_INDEX_SLEEP_TIME));18.} 19.20.}
建立索引的核心实现: Java代码
1.package zxt.lucene.index;2.import java.io.BufferedReader;3.import java.io.File;4.import java.io.IOException;5.import java.io.StringWriter;6.import java.sql.Connection;7.import java.sql.DriverManager;8.import java.sql.ResultSet;9.import java.sql.SQLException;10.import java.sql.Statement;11.import java.text.SimpleDateFormat;12.import java.util.Arrays;13.import java.util.Date;14.import java.util.TimerTask;15.16.import oracle.sql.CLOB;17.18.import org.apache.lucene.analysis.standard.StandardAnalyzer;19.import org.apache.lucene.document.Document;20.import org.apache.lucene.document.Field;21.import org.apache.lucene.index.IndexWriter;22.23./** 24.* 建立索引的任务类 25.* @author wulihai 26.* @create 2009-06-02 27.*/ 28.public cla LuceneDBIndexerTask extends TimerTask { 29.//缺省索引目录
30.private static String DEFAULT_INDEX_DIR=“C:IndexDB”;31.//临时索引目录的父目录
32.private File parentDir=null;33.//被搜索的索引文件
34.private static LuceneDBIndexerTask index=new LuceneDBIndexerTask();35.36.//构造方法
37.private LuceneDBIndexerTask(){ 38.String dirStr=Constant.INDEX_STORE_DIRECTORY;39.if(dirStr!=null&&!“”.equals(dirStr)){ 40.this.parentDir=new File(dirStr);41.42.}else{ 43.this.parentDir=new File(DEFAULT_INDEX_DIR);44.} 45.46.if(!this.parentDir.exists()){ 47.this.parentDir.mkdir();48.} 49.} 50.51./** 52.* 单实例访问接口 53.* @return 54.*/ 55.public static LuceneDBIndexerTask getInstance(){ 56.return index;57.} 58.59./** 60.* 锁定目录以及文件 61.* 只允许单线程访问 62.* 63.*/ 64./*public synchronized void singleRunning(){ 65.if(flag==false){ 66.flag=true;67.run(parentDir);68.} 69.}*/ 70.71./** 72.* 为数据库字段建立索引 73.*/ 74.public void run(){ 75.System.out.println(“====LuceneDBIndexerTask$run()===============”);76.77.System.out.println(“~~~开始建立索引文件~~~~~~~~~~~~~~~”);78.Connection conn=null;79.Statement stmt=null;80.ResultSet rs=null;81.try { 82.Cla.forName(Constant.DB_DRIVER_STRING);83.conn = DriverManager.getConnection(Constant.DB_URI_STRING, Constant.DB_USERNAME, Constant.DB_PWD);84.stmt = conn.createStatement();85.rs = stmt.executeQuery(Constant.DB_QUERY_STRING);
86.File file=new File(parentDir+File.separator+new SimpleDateFormat(“yyyyMMddHHmm”).format(new Date())+File.separator);87.if(!file.exists()){ 88.file.mkdir();89.} 90.IndexWriter writer = new IndexWriter(file,new StandardAnalyzer(), true);91.long startTime = new Date().getTime();92.while(rs.next()){ 93.Document doc = new Document();94.doc.add(new Field(“ARTICLEID”, rs.getString(“ARTICLEID”), Field.Store.YES,Field.Index.TOKENIZED));95.doc.add(new Field(“TITLE”, rs.getString(“TITLE”), Field.Store.YES,Field.Index.TOKENIZED));96.doc.add(new Field(“USERNAME”, rs.getString(“USERNAME”), Field.Store.YES,Field.Index.TOKENIZED));97.doc.add(new Field(“USERID”, rs.getString(“USERID”), Field.Store.YES,Field.Index.TOKENIZED));98.//对日期建立索引
99.String createdate=new SimpleDateFormat(“yyyy-MM-dd”).format(rs.getTimestamp(“CREATEDATE”));100.doc.add(new Field(“CREATEDATE”, createdate, Field.Store.YES,Field.Index.TOKENIZED));101.//对大字段建立索引
102.BufferedReader in=null;103.String content=“”;104.CLOB clob =(CLOB)rs.getClob(“CONTENT”);105.if(clob!= null){ 106.//得到一个读入流
107.in=new BufferedReader(clob.getCharacterStream());108.StringWriter out=new StringWriter();109.int c;110.while((c=in.read())!=-1){ 111.out.write(c);112.} 113.content=out.toString();114.} 115.doc.add(new Field(“CONTENT”, content, Field.Store.YES, Field.Index.TOKENIZED));116.writer.addDocument(doc);117.} 118.writer.optimize();119.writer.close();120.121.//测试一下索引的时间
122.long endTime = new Date().getTime();123.System.out.println(“索引文件”+file.getPath()+“建立成功...”);124.System.out.println(“这花费了” +(endTimestartTime)+ “ 毫秒!”);131.} else { 132.System.out.println(“0个结果!”);133.} 134.} 135.136.return results;137.138.} 139.140./** 141.* 确定搜索索引所在目录目录 142.*/ 143.private File getTargetDir(File dir){ 144.int length = dir.listFiles().length;145.File searchFile = null;146.147.// length=3的时候最多
148.// 同时搜索和同时建索引的时候会出现length=4 149.if(length >= 2){ 150.// 找到次最新建立的索引文件 151.String[] names = dir.list();152.Arrays.sort(names);153.searchFile = new File(dir + File.separator + names[length-2]);154.} 155.if(length == 1){ 156.File files[] = dir.listFiles();157.searchFile = files[0];158.} 159.if(length == 0){ 160.// 如果没有索引文件则,建立第一个索引
161.// TestDBIndexer.getInstance().isInstanceRunning();162.// search();163.} 164.165.return searchFile;166.} 167.// 168.// public static void main(String[] args)throws Exception { 169.// new LuceneDBQuery().search(“纳税人”);170.// } 171.172.}
配置文件管理类: Java代码
1.2.package com.liferay.portal.util;3.4.import java.io.IOException;5.6.import org.jdom.Document;7.import org.jdom.Element;8.import org.jdom.JDOMException;9.import org.jdom.input.SAXBuilder;10.11.public cla LuceneDBQueryUtil { 12.13.public static String getIndexPath(){ 14.15.String filePath = “zxt_index.xml”;16.String indexPath=“”;17.SAXBuilder builder = new SAXBuilder(false);18.try { 19.Document doc = builder.build(Thread.currentThread().getContextClaLoader().getResource(filePath));20.Element rootElement = doc.getRootElement();21.Element index=rootElement.getChild(“index”);22.indexPath=index.getText();23.System.out.println(indexPath);24.} catch(JDOMException e){ 25.e.printStackTrace();26.} catch(IOException e){ 27.e.printStackTrace();28.} 29.return indexPath;30.31.32.} 33.}
配置文件:zxt_index.xml Xml代码
1. 2.3.D:indexIndexDB 4.