lucene中FSDirectory、RAMDirectory的用法-白红宇

lucene中FSDirectory、RAMDirectory的用法

阅读量：4574 次

发布时间：2019-06-08

本文共 5141 字，大约阅读时间需要 17 分钟。

package com.ljq.one;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.InputStreamReader;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.NumberTools;

import org.apache.lucene.document.Field.Index;

import org.apache.lucene.document.Field.Store;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriter.MaxFieldLength;

import org.apache.lucene.queryParser.MultiFieldQueryParser;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.Filter;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.store.RAMDirectory;

import org.junit.Test;

public class DirectoryTest {

// 数据源路径

String dspath = "E:/workspace/mylucene/lucenes/IndexWriter addDocument's a javadoc .txt";

//存放索引文件的位置，即索引库

String indexpath = "E:/workspace/mylucene/luceneIndex";

//分词器

Analyzer analyzer = new StandardAnalyzer();

/**

* 创建索引，会抛异常，因为没对索引库进行保存

* IndexWriter 用来操作（增、删、改）索引库的

@Test

public void createIndex() throws Exception {

//Directory dir=FSDirectory.getDirectory(indexpath);

//内存存储：优点速度快，缺点程序退出数据就没了，所以记得程序退出时保存索引库，已FSDirectory结合使用

//由于此处只暂时保存在内存中，程序退出时没进行索引库保存，因此在搜索时程序会报错

Directory dir=new RAMDirectory();

File file = new File(dspath);

//Document存放经过组织后的数据源，只有转换为Document对象才可以被索引和搜索到

Document doc = new Document();

//文件名称

doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));

//检索到的内容

doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));

//文件大小

doc.add(new Field("size", NumberTools.longToString(file.length()),

Store.YES, Index.NOT_ANALYZED));

//检索到的文件位置

doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));

// 建立索引

//第一种方式

//IndexWriter indexWriter = new IndexWriter(indexpath, analyzer, MaxFieldLength.LIMITED);

//第二种方式

IndexWriter indexWriter = new IndexWriter(dir, analyzer, MaxFieldLength.LIMITED);

indexWriter.addDocument(doc);

indexWriter.close();

}

/**

* 创建索引(推荐)

* IndexWriter 用来操作（增、删、改）索引库的

@Test

public void createIndex2() throws Exception {

Directory fsDir = FSDirectory.getDirectory(indexpath);

//1、启动时读取

Directory ramDir = new RAMDirectory(fsDir);

// 运行程序时操作ramDir

IndexWriter ramIndexWriter = new IndexWriter(ramDir, analyzer, MaxFieldLength.LIMITED);

//数据源

File file = new File(dspath);

// 添加 Document

Document doc = new Document();

//文件名称

doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));

//检索到的内容

doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));

//文件大小

doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED));

//检索到的文件位置

doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));

ramIndexWriter.addDocument(doc);

ramIndexWriter.close();

//2、退出时保存

IndexWriter fsIndexWriter = new IndexWriter(fsDir, analyzer, true, MaxFieldLength.LIMITED);

fsIndexWriter.addIndexesNoOptimize(new Directory[]{ramDir});

// 优化操作

fsIndexWriter.commit();

fsIndexWriter.optimize();

fsIndexWriter.close();

}

/**

* 优化操作

* @throws Exception

@Test

public void createIndex3() throws Exception{

Directory fsDir = FSDirectory.getDirectory(indexpath);

IndexWriter fsIndexWriter = new IndexWriter(fsDir, analyzer, MaxFieldLength.LIMITED);

fsIndexWriter.optimize();

fsIndexWriter.close();

}

/**

* 搜索

* IndexSearcher 用来在索引库中进行查询

@Test

public void search() throws Exception {

//请求字段

//String queryString = "document";

String queryString = "adddocument";

// 1，把要搜索的文本解析为 Query

String[] fields = { "name", "content" };

QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);

Query query = queryParser.parse(queryString);

// 2，进行查询，从索引库中查找

IndexSearcher indexSearcher = new IndexSearcher(indexpath);

Filter filter = null;

TopDocs topDocs = indexSearcher.search(query, filter, 10000);

System.out.println("总共有【" + topDocs.totalHits + "】条匹配结果");

// 3，打印结果

for (ScoreDoc scoreDoc : topDocs.scoreDocs) {

// 文档内部编号

int index = scoreDoc.doc;

// 根据编号取出相应的文档

Document doc = indexSearcher.doc(index);

System.out.println("------------------------------");

System.out.println("name = " + doc.get("name"));

System.out.println("content = " + doc.get("content"));

System.out.println("size = " + NumberTools.stringToLong(doc.get("size")));

System.out.println("path = " + doc.get("path"));

}

/**

* 读取文件内容

public static String readFileContent(File file) {

try {

BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));

StringBuffer content = new StringBuffer();

for (String line = null; (line = reader.readLine()) != null;) {

content.append(line).append("\n");

}

reader.close();

return content.toString();

} catch (Exception e) {

throw new RuntimeException(e);

}

转载于:https://www.cnblogs.com/adrianlamo/p/4305777.html

你可能感兴趣的文章

为什么要配置sdk-tools/platform-toools?

查看>>

自己动手开发更好用的markdown编辑器-07(扩展语法)

查看>>

maven dependency:tree中反斜杠的含义

一个python的计算熵(entropy)的函数

查看>>

spring源码学习——spring整体架构和设计理念

为什么要进行需求分析？通常对软件系统有哪些需求？

放大的X--HDOJ-201307292012

查看>>