I've read some documents and build a lucene index which looks like
id 1
keyword foo bar
keyword john
id 2
keyword foo
id 3
keyword john doe
keyword bar foo
keyword what the hell
I want to query lucene in a way, where I can combine single term and phrases.
Let's say my query is
foo bar
should give back the doc ids 1, 2 and 3
The query
"foo bar"
should give back the doc ids 1
The query
should give back the doc ids 1 and 3
The query
john "foo bar"
should give back the doc ids 1
My implementation in java is not working. Also reading tons of documents didn't help.
When I query my index with
"foo bar"
I get 0 hits
When I query my index with
foo "john doe"
I get back the doc ids 1, 2 and 3 (i would expect only doc id 3 since the query is meant as foo AND "john doe") The problem is, that "john doe" gives back 0 hits but foo gives back 3 hits.
My goal is to combine single term and phrase terms. What am I doing wrong? I've also played around with the analyzers with no luck.
My implementation looks like this:
import ...
public class Indexer
private static final Logger LOG = LoggerFactory.getLogger(Indexer.class);
private final File indexDir;
private IndexWriter writer;
public Indexer(File indexDir)
this.indexDir = indexDir;
this.writer = null;
private IndexWriter createIndexWriter()
Directory dir = FSDirectory.open(indexDir);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_34, analyzer);
IndexWriter idx = new IndexWriter(dir, iwc);
return idx;
} catch (IOException e)
throw new RuntimeException(String.format("Could create indexer on directory [%s]", indexDir.getAbsolutePath()), e);
public void index(TestCaseDescription desc)
if (writer == null)
writer = createIndexWriter();
Document doc = new Document();
addPathToDoc(desc, doc);
addLastModifiedToDoc(desc, doc);
addIdToDoc(desc, doc);
for (String keyword : desc.getKeywords())
addKeywordToDoc(doc, keyword);
updateIndex(doc, desc);
private void addIdToDoc(TestCaseDescription desc, Document doc)
Field idField = new Field(LuceneConstants.FIELD_ID, desc.getId(), Field.Store.YES, Field.Index.ANALYZED);
private void addKeywordToDoc(Document doc, String keyword)
Field keywordField = new Field(LuceneConstants.FIELD_KEYWORDS, keyword, Field.Store.YES, Field.Index.ANALYZED);
private void addLastModifiedToDoc(TestCaseDescription desc, Document doc)
NumericField modifiedField = new NumericField(LuceneConstants.FIELD_LAST_MODIFIED);
private void addPathToDoc(TestCaseDescription desc, Document doc)
Field pathField = new Field(LuceneConstants.FIELD_PATH, desc.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
private void updateIndex(Document doc, TestCaseDescription desc)
if (writer.getConfig().getOpenMode() == OpenMode.CREATE)
// New index, so we just add the document (no old document can be there):
LOG.debug(String.format("Adding testcase [%s] (%s)", desc.getId(), desc.getPath()));
} else
// Existing index (an old copy of this document may have been indexed) so
// we use updateDocument instead to replace the old one matching the exact
// path, if present:
LOG.debug(String.format("Updating testcase [%s] (%s)", desc.getId(), desc.getPath()));
writer.updateDocument(new Term(LuceneConstants.FIELD_PATH, desc.getPath()), doc);
} catch (IOException e)
throw new RuntimeException(String.format("Could not create or update index for testcase [%s] (%s)", desc.getId(),
desc.getPath()), e);
public void store()
} catch (IOException e)
throw new RuntimeException(String.format("Could not write index [%s]", writer.getDirectory().toString()));
writer = null;
import ...
public class Searcher
private static final Logger LOG = LoggerFactory.getLogger(Searcher.class);
private final Analyzer analyzer;
private final QueryParser parser;
private final File indexDir;
public Searcher(File indexDir)
this.indexDir = indexDir;
analyzer = new StandardAnalyzer(Version.LUCENE_34);
parser = new QueryParser(Version.LUCENE_34, LuceneConstants.FIELD_KEYWORDS, analyzer);
public List<String> search(String searchString)
List<String> testCaseIds = new ArrayList<String>();
IndexSearcher searcher = getIndexSearcher(indexDir);
Query query = parser.parse(searchString);
LOG.info("Searching for: " + query.toString(parser.getField()));
AllDocCollector results = new AllDocCollector();
searcher.search(query, results);
LOG.info("Found [{}] hit", results.getHits().size());
for (ScoreDoc scoreDoc : results.getHits())
Document doc = searcher.doc(scoreDoc.doc);
String id = doc.get(LuceneConstants.FIELD_ID);
return testCaseIds;
} catch (Exception e)
throw new RuntimeException(String.format("Could not search index [%s]", indexDir.getAbsolutePath()), e);
private IndexSearcher getIndexSearcher(File indexDir)
FSDirectory dir = FSDirectory.open(indexDir);
return new IndexSearcher(dir);
} catch (IOException e)
LOG.error(String.format("Could not open index directory [%s]", indexDir.getAbsolutePath()), e);
throw new RuntimeException(e);
Why are you using DOCS_ONLY?! If you only index docids, then you only have a basic inverted index with term->document mappings, but no proximity information. So thats why your phrase queries don't work.