Simple Lucene Indexing practice

This is very simple practice of Lucene Indexing base on the tutorial written by Thomas Paul.



import java.util.Date;


import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.queryParser.QueryParser;





public class ArticleIndexer {

            public static final String INDEX_DIRECTORY = "lucene-index";


            private Document createDocument(String article, String author,

                                    String title, String topic, String url, Date dateWritten) {


                        // The Document class represents a document in Lucene. We index Document

                        // objects and get Document objects back when we do a search

                        Document document = new Document();

                        document.add(Field.Text("author", author));

                        document.add(Field.Text("title", title));

                        document.add(Field.Text("topic", topic));

                        // The data is stored but not indexed or tokenized. This is used with

                        // data that you want returned with the results of a search but you

                        // won’t actually be searching on this data. In our example, since we

                        // won’t allow searching for the URL there is no reason to index it but

                        // we want it returned to us when a search result is found.

                        document.add(Field.UnIndexed("url", url));

                        // The data is stored and indexed but not tokenized. This is most useful

                        // for data that should be stored unchanged such as a date. In fact, the

                        // Field.Keyword can take a Date object as input

                        document.add(Field.Keyword("date", dateWritten.toString()));

                        // The data is not stored but it is indexed and tokenized. Large amounts

                        // of data such as the text of the article should be placed in the index

                        // unstored.

                        document.add(Field.UnStored("article", article));

                        return document;



            private void indexDocument(Document document) throws Exception {

                        // The Analyzer class is an abstract class that used to provide an

                        // interface that will take a Document and turn it into tokens that can

                        // be indexed. There are several useful implementations of this class

                        // but the most commonly used is the StandardAnalyzer class.

                        Analyzer analyzer = new StandardAnalyzer();


                        // The IndexWriter class is used to create and maintain indexes, thread

                        // safe. false is to set to append to the existing index only,

                        // but if the index is not existing, if will cause Exception

                        // of Index locked for write

                        IndexWriter writter = new IndexWriter(INDEX_DIRECTORY, analyzer, true);


                        // To optimize an index, one has to call optimize() on an IndexWriter

                        // instance. When this happens, all in-memory documents are flushed to

                        // the disk and all index segments are merged into a single segment,

                        // reducing the number of files that make up the index. However,

                        // optimizing an index does not help improve indexing performance. As a

                        // matter of fact, optimizing an index during the indexing process will

                        // only slow things down. Despite this, optimizing may sometimes be

                        // necessary in order to keep the number of open files under control.





            public void indexArticle(String article, String author, String title,

                                    String topic, String url, Date dateWritten) throws Exception {

                        Document document = createDocument(article, author, title, topic, url,





            public String searchDocument(String indexDirectory, String field,

                                    String criteria) throws Exception {

                        String result = "";

                        // The IndexSearcher class is used to search through an index

                        IndexSearcher is = new IndexSearcher(indexDirectory);

                        Analyzer analyzer = new StandardAnalyzer();

                        // The QueryParser class is used to build a parser that can search

                        // through an index.

                        QueryParser parser = new QueryParser(field, analyzer);

                        Query query = parser.parse(criteria);

                        // The Hits class contains the Document objects that are returned by

                        // running the Query object against the index

                        Hits hits =;


                        StringWriter sw = new StringWriter();

                        if (hits.length() > 0) {

                                    for (int i = 0; i < hits.length(); i++) {

                                                Document doc = hits.doc(i);

                                                String author = doc.get("author");


                                                sw.append(", ");



                                    result = sw.toString();

                        } else {

                                    result = "No item found!";



                        return result;



            public static void main(String args[]) {

                        ArticleIndexer indexer = new ArticleIndexer();


                        try {

                                    String article = "First time write lucene is just like….";





                                                            "Hongliang Li",

                                                            "First Lucene",

                                                            "No relevant to football",

                                                            "file:///C:/Documents and Settings/hongliang/My Documents/study/Lucene Tutorial.doc",

                                                            new Date());


                                    String criteria = "football";

                                    String searchResult = indexer.searchDocument(INDEX_DIRECTORY,

                                                            "topic", criteria);


                        } catch (Exception e) {





Other reference


This entry was posted in Java Utilities. Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in: Logo

You are commenting using your account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s