Published

Thu 30 October 2014

←Home

Integrating Lucene with Android

If you want to integrate Lucene with your android application, this post will get you started.  Lucene provides you with a wide range of searching options  like  Fuzzy Search, wildcard search, etc.  So, you can use this in your android application, if you want to provide search option over your custom data model.

In the code shown below searches will be near real time as I am passing IndexWriter instance to it, so IndexReader will be created using the passed IndexWriter instance.  Also, as creation of IndexWriter and SearcherManager is expensive, so the best place to initialize them is in the application class.

Initialization: The application class which initializes the IndexWriter and SearcherManager.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
public class FeedReaderApplication extends Application {

public static final SearcherManager getSearcherManager(){return searcherManager;}
public static final IndexWriter getIndexWriter(){return indexWriter;}
private static SearcherManager searcherManager=null;
private static IndexWriter indexWriter=null;

@Override
    public void onCreate() {
   //pick the properties from user preferences
   SharedPreferences preferences = PreferenceManager.getDefaultSharedPreferences(getApplicationContext());
    Analyzer analyzer= new SimpleAnalyzer(Version.LUCENE_41);

        IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_41,analyzer);
        //pick the buffer size from property
        String memorySize=preferences.getString("lucene_memory_size","5.0");
        config.setRAMBufferSizeMB(Double.valueOf(memorySize));
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        //create index on external directory under lucene folder
        File path= new File(getApplicationContext().getExternalFilesDir(null),"lucene");
        try {
            Directory directory= FSDirectory.open(path);
            indexWriter=new IndexWriter(directory,config);
            boolean applyAllDeletes = true;
            //no need to warm the search
            searcherManager = new SearcherManager(indexWriter, applyAllDeletes, null);
        } catch (IOException e) {
            Log.e(tag,"Error occurred while opening indexWriter/SearcherManager"+ e.getMessage(),e);
        }
    }

}

Now, In the example application I am using Sqlite database to store the Feed data, but titles are being analyzed and stored in the lucene index. Also, I am using the SimpleAnalyzer rather than standard analyzer as the StandardAnalyzer does stop word filtering before storing the terms which is not going to work out for us as the user might search with stop words and find no matches.


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
public class LuceneSearchUtil {
    private static final String tag = LuceneSearchUtil.class.getName();

    public LuceneSearchUtil() {
    }
//insert articles id,title and feedid
    public static void insertArticleDocument(ContentValues contentValues) {
        try {
        IndexWriter writer = FeedReaderApplication.getIndexWriter();
        Document document = new Document();
       //don't analyze id field, store as such
        Field idField = new StringField(FeedSQLLiteHelper.COLUMN_ID, String.valueOf(contentValues.get(FeedSQLLiteHelper.COLUMN_ID)), Field.Store.YES);
        document.add(idField);
       //analyze the url field so textfield
        Field titleField = new TextField(FeedSQLLiteHelper.COLUMN_ARTICLE_TITLE, String.valueOf(contentValues.get(FeedSQLLiteHelper.COLUMN_ARTICLE_TITLE)), Field.Store.YES);
        document.add(titleField);
        Field feedId= new StringField(FeedSQLLiteHelper.COLUMN_ARTICLE_FEED_ID,String.valueOf(contentValues.get(FeedSQLLiteHelper.COLUMN_ARTICLE_FEED_ID)), Field.Store.YES);
        document.add(feedId);
         writer.addDocument(document);
        } catch (IOException e) {
            Log.e(tag, "Unable to add document as " + e.getMessage(), e);
        }
    }

  //searching the articles searchterm is passed and broken down into individual terms
    public static ArrayList<String> searchAndGetMatchingIds(String searchTerm) {
        ArrayList result=new ArrayList<String>();
      //get the searchermanager
        SearcherManager searcherManager = FeedReaderApplication.getSearcherManager();
        IndexSearcher indexSearcher = null;

        indexSearcher = searcherManager.acquire();
        //split on space
        String[] terms= searchTerm.split("[\\s]+");
     //multiple terms are to be searched
        SpanQuery[] spanQueryArticleTitle=new SpanQuery[terms.length];
       int i=0;
        for (String term:terms){
         //wildcardquery
       WildcardQuery wildcardQuery=new WildcardQuery(new Term(FeedSQLLiteHelper.COLUMN_ARTICLE_TITLE,term.toLowerCase()));
       spanQueryArticleTitle[i]=new SpanMultiTermQueryWrapper<WildcardQuery>(wildcardQuery);
        i=i+1;
        }
       //no words between the typed text you could increase this but then performance will be lowered
        SpanNearQuery spanNearQuery1=new SpanNearQuery(spanQueryArticleTitle,0,true);
     TopDocs topDocs=null;
        try {
          //execute topN query
            topDocs = indexSearcher.search(spanNearQuery1, ProjectConstants.LUCENE_TOP_N);
            if(topDocs!=null){
                for(ScoreDoc scoreDoc:topDocs.scoreDocs){
                   Document document= indexSearcher.doc(scoreDoc.doc);
                   String id= document.get(FeedSQLLiteHelper.COLUMN_ID);
                   result.add(id);
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        finally {
            try {
                searcherManager.release(indexSearcher);
            } catch (IOException e) {
                Log.e(tag,"Exception while releasing Index Searcher "+e.getMessage(),e);
            }
        }

return result;
}

//sample delete method

public static void deleteArticlesByFeedId(String feedId){
        IndexWriter indexWriter = FeedReaderApplication.getIndexWriter();
        TermQuery query=new TermQuery(new Term(FeedSQLLiteHelper.COLUMN_ARTICLE_FEED_ID,feedId));
        try {
            indexWriter.deleteDocuments(query);
        } catch (IOException e) {
            Log.e(tag, "Unable to delete document as " + e.getMessage(), e);
        }
        try {
            indexWriter.commit();
        } catch (IOException e) {
            Log.e(tag, "Unable to commit changes " + e.getMessage(), e);
        }
    }
}

Note the search method in the above code. It is going to split the query that user passed into individual terms and then search each of those terms by using SpanNearQuery with a word distance of 0, which means that whatever user has typed must be matched without a word gap. For example if user types:  “Sweet Orange ” then those two terms will be matched only if there is no word between them in the article title. Also note that Lucene returns top matching results, so when you pass these id’s to your database for retrieving the actual data from the database, you must make sure that the returned data is in that order. Here is the relevant snippet from the AsyncTaskLoader.


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
@Override
   public List<FeedArticleSummary> loadInBackground() {
      //query the searchterm
       ArrayList<String> ids=  LuceneSearchUtil.searchAndGetMatchingIds(searchTerm);
       ArrayList results=new ArrayList();
      //returns all the articles that match
       HashMap<String,FeedArticleSummary> map= fdb.getEntriesForFeedByIds(ids);
     //order them
       if(map!=null){
           for(String id:ids){
               if(map.get(id)!=null){
                   results.add(map.get(id));
               }

           }
       }

       return results;
   }

Now, all you need to do is invoke the loader to query and load the data when the user uses the SearchView in your application.  Here are the implemented methods for the SearchView.OnQueryTextListener.


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
 @Override
    public boolean onQueryTextSubmit(String queryText) {
        //let's set a threshold
     if (queryText!=null&&queryText.trim().length() > 5) {
       Loader loader = getActivity().getSupportLoaderManager().getLoader(ProjectConstants.LOADER_ARTICLES_SEARCH);
            if (loader != null && !loader.isReset()) {
                getActivity().getSupportLoaderManager().restartLoader(ProjectConstants.LOADER_ARTICLES_SEARCH, args, _self);
            } else {
                getActivity().getSupportLoaderManager().initLoader(ProjectConstants.LOADER_ARTICLES_SEARCH, args, _self);
            }
           if(!pd.isShowing()){
          //show the progressdialog
               pd.show();
           }
        }
        return true;
        }
@Override
    public boolean onQueryTextChange(String s) {
        //not handling this
        return false;
    }

Now in onloadfinished method just replace the data in your arrayadapter and you are set. That's it now you have integrated lucene into your application.


device-search

Go Top
comments powered by Disqus