/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ThreadInterruptedException;

Implements search over a single IndexReader.

Applications usually need only call the inherited search(Query, int) method. For performance reasons, if your index is unchanging, you should share a single IndexSearcher instance across multiple searches instead of creating a new one per-search. If your index has changed and you wish to see the changes reflected in searching, you should use DirectoryReader.openIfChanged(DirectoryReader) to obtain a new reader and then create a new IndexSearcher from that. Also, for low-latency turnaround it's best to use a near-real-time reader (DirectoryReader.open(IndexWriter)). Once you have a new IndexReader, it's relatively cheap to create a new IndexSearcher from it.

NOTE: The search and searchAfter methods are configured to only count top hits accurately up to 1,000 and may return a lower bound of the hit count if the hit count is greater than or equal to 1,000. On queries that match lots of documents, counting the number of hits may take much longer than computing the top hits so this trade-off allows to get some minimal information about the hit count without slowing down search too much. The TopDocs.scoreDocs array is always accurate however. If this behavior doesn't suit your needs, you should create collectors manually with either TopScoreDocCollector.create or TopFieldCollector.create and call search(Query, Collector).

NOTE: IndexSearcher instances are completely thread safe, meaning multiple threads can call any of its methods, concurrently. If your application requires external synchronization, you should not synchronize on the IndexSearcher instance; use your own (non-Lucene) objects instead.

/** Implements search over a single IndexReader. * * <p>Applications usually need only call the inherited * {@link #search(Query,int)} method. For * performance reasons, if your index is unchanging, you * should share a single IndexSearcher instance across * multiple searches instead of creating a new one * per-search. If your index has changed and you wish to * see the changes reflected in searching, you should * use {@link DirectoryReader#openIfChanged(DirectoryReader)} * to obtain a new reader and * then create a new IndexSearcher from that. Also, for * low-latency turnaround it's best to use a near-real-time * reader ({@link DirectoryReader#open(IndexWriter)}). * Once you have a new {@link IndexReader}, it's relatively * cheap to create a new IndexSearcher from it. * * <p><b>NOTE</b>: The {@link #search} and {@link #searchAfter} methods are * configured to only count top hits accurately up to {@code 1,000} and may * return a {@link TotalHits.Relation lower bound} of the hit count if the * hit count is greater than or equal to {@code 1,000}. On queries that match * lots of documents, counting the number of hits may take much longer than * computing the top hits so this trade-off allows to get some minimal * information about the hit count without slowing down search too much. The * {@link TopDocs#scoreDocs} array is always accurate however. If this behavior * doesn't suit your needs, you should create collectors manually with either * {@link TopScoreDocCollector#create} or {@link TopFieldCollector#create} and * call {@link #search(Query, Collector)}. * * <a name="thread-safety"></a><p><b>NOTE</b>: <code>{@link * IndexSearcher}</code> instances are completely * thread safe, meaning multiple threads can call any of its * methods, concurrently. If your application requires * external synchronization, you should <b>not</b> * synchronize on the <code>IndexSearcher</code> instance; * use your own (non-Lucene) objects instead.</p> */
public class IndexSearcher { private static QueryCache DEFAULT_QUERY_CACHE; private static QueryCachingPolicy DEFAULT_CACHING_POLICY = new UsageTrackingQueryCachingPolicy(); static { final int maxCachedQueries = 1000; // min of 32MB or 5% of the heap size final long maxRamBytesUsed = Math.min(1L << 25, Runtime.getRuntime().maxMemory() / 20); DEFAULT_QUERY_CACHE = new LRUQueryCache(maxCachedQueries, maxRamBytesUsed); }
By default we count hits accurately up to 1000. This makes sure that we don't spend most time on computing hit counts
/** * By default we count hits accurately up to 1000. This makes sure that we * don't spend most time on computing hit counts */
private static final int TOTAL_HITS_THRESHOLD = 1000; final IndexReader reader; // package private for testing! // NOTE: these members might change in incompatible ways // in the next release protected final IndexReaderContext readerContext; protected final List<LeafReaderContext> leafContexts;
used with executor - each slice holds a set of leafs executed within one thread
/** used with executor - each slice holds a set of leafs executed within one thread */
private final LeafSlice[] leafSlices; // These are only used for multi-threaded search private final Executor executor; // the default Similarity private static final Similarity defaultSimilarity = new BM25Similarity(); private QueryCache queryCache = DEFAULT_QUERY_CACHE; private QueryCachingPolicy queryCachingPolicy = DEFAULT_CACHING_POLICY;
Expert: returns a default Similarity instance. In general, this method is only called to initialize searchers and writers. User code and query implementations should respect getSimilarity().
@lucene.internal
/** * Expert: returns a default Similarity instance. * In general, this method is only called to initialize searchers and writers. * User code and query implementations should respect * {@link IndexSearcher#getSimilarity()}. * @lucene.internal */
public static Similarity getDefaultSimilarity() { return defaultSimilarity; }
Expert: Get the default QueryCache or null if the cache is disabled.
@lucene.internal
/** * Expert: Get the default {@link QueryCache} or {@code null} if the cache is disabled. * @lucene.internal */
public static QueryCache getDefaultQueryCache() { return DEFAULT_QUERY_CACHE; }
Expert: set the default QueryCache instance.
@lucene.internal
/** * Expert: set the default {@link QueryCache} instance. * @lucene.internal */
public static void setDefaultQueryCache(QueryCache defaultQueryCache) { DEFAULT_QUERY_CACHE = defaultQueryCache; }
Expert: Get the default QueryCachingPolicy.
@lucene.internal
/** * Expert: Get the default {@link QueryCachingPolicy}. * @lucene.internal */
public static QueryCachingPolicy getDefaultQueryCachingPolicy() { return DEFAULT_CACHING_POLICY; }
Expert: set the default QueryCachingPolicy instance.
@lucene.internal
/** * Expert: set the default {@link QueryCachingPolicy} instance. * @lucene.internal */
public static void setDefaultQueryCachingPolicy(QueryCachingPolicy defaultQueryCachingPolicy) { DEFAULT_CACHING_POLICY = defaultQueryCachingPolicy; }
The Similarity implementation used by this searcher.
/** The Similarity implementation used by this searcher. */
private Similarity similarity = defaultSimilarity;
Creates a searcher searching the provided index.
/** Creates a searcher searching the provided index. */
public IndexSearcher(IndexReader r) { this(r, null); }
Runs searches for each segment separately, using the provided Executor. NOTE: if you are using NIOFSDirectory, do not use the shutdownNow method of ExecutorService as this uses Thread.interrupt under-the-hood which can silently close file descriptors (see LUCENE-2239).
@lucene.experimental
/** Runs searches for each segment separately, using the * provided Executor. NOTE: * if you are using {@link NIOFSDirectory}, do not use * the shutdownNow method of ExecutorService as this uses * Thread.interrupt under-the-hood which can silently * close file descriptors (see <a * href="https://issues.apache.org/jira/browse/LUCENE-2239">LUCENE-2239</a>). * * @lucene.experimental */
public IndexSearcher(IndexReader r, Executor executor) { this(r.getContext(), executor); }
Creates a searcher searching the provided top-level IndexReaderContext.

Given a non-null Executor this method runs searches for each segment separately, using the provided Executor. NOTE: if you are using NIOFSDirectory, do not use the shutdownNow method of ExecutorService as this uses Thread.interrupt under-the-hood which can silently close file descriptors (see LUCENE-2239).

See Also:
@lucene.experimental
/** * Creates a searcher searching the provided top-level {@link IndexReaderContext}. * <p> * Given a non-<code>null</code> {@link Executor} this method runs * searches for each segment separately, using the provided Executor. * NOTE: if you are using {@link NIOFSDirectory}, do not use the shutdownNow method of * ExecutorService as this uses Thread.interrupt under-the-hood which can * silently close file descriptors (see <a * href="https://issues.apache.org/jira/browse/LUCENE-2239">LUCENE-2239</a>). * * @see IndexReaderContext * @see IndexReader#getContext() * @lucene.experimental */
public IndexSearcher(IndexReaderContext context, Executor executor) { assert context.isTopLevel: "IndexSearcher's ReaderContext must be topLevel for reader" + context.reader(); reader = context.reader(); this.executor = executor; this.readerContext = context; leafContexts = context.leaves(); this.leafSlices = executor == null ? null : slices(leafContexts); }
Creates a searcher searching the provided top-level IndexReaderContext.
See Also:
@lucene.experimental
/** * Creates a searcher searching the provided top-level {@link IndexReaderContext}. * * @see IndexReaderContext * @see IndexReader#getContext() * @lucene.experimental */
public IndexSearcher(IndexReaderContext context) { this(context, null); }
Set the QueryCache to use when scores are not needed. A value of null indicates that query matches should never be cached. This method should be called before starting using this IndexSearcher.

NOTE: When using a query cache, queries should not be modified after they have been passed to IndexSearcher.

See Also:
@lucene.experimental
/** * Set the {@link QueryCache} to use when scores are not needed. * A value of {@code null} indicates that query matches should never be * cached. This method should be called <b>before</b> starting using this * {@link IndexSearcher}. * <p>NOTE: When using a query cache, queries should not be modified after * they have been passed to IndexSearcher. * @see QueryCache * @lucene.experimental */
public void setQueryCache(QueryCache queryCache) { this.queryCache = queryCache; }
Return the query cache of this IndexSearcher. This will be either the default query cache or the query cache that was last set through setQueryCache(QueryCache). A return value of null indicates that caching is disabled.
@lucene.experimental
/** * Return the query cache of this {@link IndexSearcher}. This will be either * the {@link #getDefaultQueryCache() default query cache} or the query cache * that was last set through {@link #setQueryCache(QueryCache)}. A return * value of {@code null} indicates that caching is disabled. * @lucene.experimental */
public QueryCache getQueryCache() { return queryCache; }
Set the QueryCachingPolicy to use for query caching. This method should be called before starting using this IndexSearcher.
See Also:
@lucene.experimental
/** * Set the {@link QueryCachingPolicy} to use for query caching. * This method should be called <b>before</b> starting using this * {@link IndexSearcher}. * @see QueryCachingPolicy * @lucene.experimental */
public void setQueryCachingPolicy(QueryCachingPolicy queryCachingPolicy) { this.queryCachingPolicy = Objects.requireNonNull(queryCachingPolicy); }
Return the query cache of this IndexSearcher. This will be either the default policy or the policy that was last set through setQueryCachingPolicy(QueryCachingPolicy).
@lucene.experimental
/** * Return the query cache of this {@link IndexSearcher}. This will be either * the {@link #getDefaultQueryCachingPolicy() default policy} or the policy * that was last set through {@link #setQueryCachingPolicy(QueryCachingPolicy)}. * @lucene.experimental */
public QueryCachingPolicy getQueryCachingPolicy() { return queryCachingPolicy; }
Expert: Creates an array of leaf slices each holding a subset of the given leaves. Each LeafSlice is executed in a single thread. By default there will be one LeafSlice per leaf (LeafReaderContext).
/** * Expert: Creates an array of leaf slices each holding a subset of the given leaves. * Each {@link LeafSlice} is executed in a single thread. By default there * will be one {@link LeafSlice} per leaf ({@link org.apache.lucene.index.LeafReaderContext}). */
protected LeafSlice[] slices(List<LeafReaderContext> leaves) { LeafSlice[] slices = new LeafSlice[leaves.size()]; for (int i = 0; i < slices.length; i++) { slices[i] = new LeafSlice(leaves.get(i)); } return slices; }
Return the IndexReader this searches.
/** Return the {@link IndexReader} this searches. */
public IndexReader getIndexReader() { return reader; }
Sugar for .getIndexReader().document(docID)
See Also:
  • document.document(int)
/** * Sugar for <code>.getIndexReader().document(docID)</code> * @see IndexReader#document(int) */
public Document doc(int docID) throws IOException { return reader.document(docID); }
Sugar for .getIndexReader().document(docID, fieldVisitor)
See Also:
  • document.document(int, StoredFieldVisitor)
/** * Sugar for <code>.getIndexReader().document(docID, fieldVisitor)</code> * @see IndexReader#document(int, StoredFieldVisitor) */
public void doc(int docID, StoredFieldVisitor fieldVisitor) throws IOException { reader.document(docID, fieldVisitor); }
Sugar for .getIndexReader().document(docID, fieldsToLoad)
See Also:
  • document.document(int, Set)
/** * Sugar for <code>.getIndexReader().document(docID, fieldsToLoad)</code> * @see IndexReader#document(int, Set) */
public Document doc(int docID, Set<String> fieldsToLoad) throws IOException { return reader.document(docID, fieldsToLoad); }
Expert: Set the Similarity implementation used by this IndexSearcher.
/** Expert: Set the Similarity implementation used by this IndexSearcher. * */
public void setSimilarity(Similarity similarity) { this.similarity = similarity; }
Expert: Get the Similarity to use to compute scores. This returns the Similarity that has been set through setSimilarity(Similarity) or the default Similarity if none has been set explicitly.
/** Expert: Get the {@link Similarity} to use to compute scores. This returns the * {@link Similarity} that has been set through {@link #setSimilarity(Similarity)} * or the default {@link Similarity} if none has been set explicitly. */
public Similarity getSimilarity() { return similarity; }
Count how many documents match the given query.
/** * Count how many documents match the given query. */
public int count(Query query) throws IOException { query = rewrite(query); while (true) { // remove wrappers that don't matter for counts if (query instanceof ConstantScoreQuery) { query = ((ConstantScoreQuery) query).getQuery(); } else { break; } } // some counts can be computed in constant time if (query instanceof MatchAllDocsQuery) { return reader.numDocs(); } else if (query instanceof TermQuery && reader.hasDeletions() == false) { Term term = ((TermQuery) query).getTerm(); int count = 0; for (LeafReaderContext leaf : reader.leaves()) { count += leaf.reader().docFreq(term); } return count; } // general case: create a collecor and count matches final CollectorManager<TotalHitCountCollector, Integer> collectorManager = new CollectorManager<TotalHitCountCollector, Integer>() { @Override public TotalHitCountCollector newCollector() throws IOException { return new TotalHitCountCollector(); } @Override public Integer reduce(Collection<TotalHitCountCollector> collectors) throws IOException { int total = 0; for (TotalHitCountCollector collector : collectors) { total += collector.getTotalHits(); } return total; } }; return search(query, collectorManager); }
Returns the leaf slices used for concurrent searching, or null if no Executor was passed to the constructor.
@lucene.experimental
/** Returns the leaf slices used for concurrent searching, or null if no {@code Executor} was * passed to the constructor. * * @lucene.experimental */
public LeafSlice[] getSlices() { return leafSlices; }
Finds the top n hits for query where all results are after a previous result (after).

By passing the bottom result from a previous page as after, this method can be used for efficient 'deep-paging' across potentially large result sets.

Throws:
/** Finds the top <code>n</code> * hits for <code>query</code> where all results are after a previous * result (<code>after</code>). * <p> * By passing the bottom result from a previous page as <code>after</code>, * this method can be used for efficient 'deep-paging' across potentially * large result sets. * * @throws BooleanQuery.TooManyClauses If a query would exceed * {@link BooleanQuery#getMaxClauseCount()} clauses. */
public TopDocs searchAfter(ScoreDoc after, Query query, int numHits) throws IOException { final int limit = Math.max(1, reader.maxDoc()); if (after != null && after.doc >= limit) { throw new IllegalArgumentException("after.doc exceeds the number of documents in the reader: after.doc=" + after.doc + " limit=" + limit); } final int cappedNumHits = Math.min(numHits, limit); final CollectorManager<TopScoreDocCollector, TopDocs> manager = new CollectorManager<TopScoreDocCollector, TopDocs>() { @Override public TopScoreDocCollector newCollector() throws IOException { return TopScoreDocCollector.create(cappedNumHits, after, TOTAL_HITS_THRESHOLD); } @Override public TopDocs reduce(Collection<TopScoreDocCollector> collectors) throws IOException { final TopDocs[] topDocs = new TopDocs[collectors.size()]; int i = 0; for (TopScoreDocCollector collector : collectors) { topDocs[i++] = collector.topDocs(); } return TopDocs.merge(0, cappedNumHits, topDocs, true); } }; return search(query, manager); }
Finds the top n hits for query.
Throws:
/** Finds the top <code>n</code> * hits for <code>query</code>. * * @throws BooleanQuery.TooManyClauses If a query would exceed * {@link BooleanQuery#getMaxClauseCount()} clauses. */
public TopDocs search(Query query, int n) throws IOException { return searchAfter(null, query, n); }
Lower-level search API.

LeafCollector.collect(int) is called for every matching document.

Throws:
/** Lower-level search API. * * <p>{@link LeafCollector#collect(int)} is called for every matching document. * * @throws BooleanQuery.TooManyClauses If a query would exceed * {@link BooleanQuery#getMaxClauseCount()} clauses. */
public void search(Query query, Collector results) throws IOException { query = rewrite(query); search(leafContexts, createWeight(query, results.scoreMode(), 1), results); }
Search implementation with arbitrary sorting, plus control over whether hit scores and max score should be computed. Finds the top n hits for query, and sorting the hits by the criteria in sort. If doDocScores is true then the score of each hit will be computed and returned. If doMaxScore is true then the maximum score over all collected hits will be computed.
Throws:
/** Search implementation with arbitrary sorting, plus * control over whether hit scores and max score * should be computed. Finds * the top <code>n</code> hits for <code>query</code>, and sorting * the hits by the criteria in <code>sort</code>. * If <code>doDocScores</code> is <code>true</code> * then the score of each hit will be computed and * returned. If <code>doMaxScore</code> is * <code>true</code> then the maximum score over all * collected hits will be computed. * * @throws BooleanQuery.TooManyClauses If a query would exceed * {@link BooleanQuery#getMaxClauseCount()} clauses. */
public TopFieldDocs search(Query query, int n, Sort sort, boolean doDocScores) throws IOException { return searchAfter(null, query, n, sort, doDocScores); }
Search implementation with arbitrary sorting.
Params:
  • query – The query to search for
  • n – Return only the top n results
  • sort – The Sort object
Throws:
Returns:The top docs, sorted according to the supplied Sort instance
/** * Search implementation with arbitrary sorting. * @param query The query to search for * @param n Return only the top n results * @param sort The {@link org.apache.lucene.search.Sort} object * @return The top docs, sorted according to the supplied {@link org.apache.lucene.search.Sort} instance * @throws IOException if there is a low-level I/O error */
public TopFieldDocs search(Query query, int n, Sort sort) throws IOException { return searchAfter(null, query, n, sort, false); }
Finds the top n hits for query where all results are after a previous result (after).

By passing the bottom result from a previous page as after, this method can be used for efficient 'deep-paging' across potentially large result sets.

Throws:
/** Finds the top <code>n</code> * hits for <code>query</code> where all results are after a previous * result (<code>after</code>). * <p> * By passing the bottom result from a previous page as <code>after</code>, * this method can be used for efficient 'deep-paging' across potentially * large result sets. * * @throws BooleanQuery.TooManyClauses If a query would exceed * {@link BooleanQuery#getMaxClauseCount()} clauses. */
public TopDocs searchAfter(ScoreDoc after, Query query, int n, Sort sort) throws IOException { return searchAfter(after, query, n, sort, false); }
Finds the top n hits for query where all results are after a previous result (after), allowing control over whether hit scores and max score should be computed.

By passing the bottom result from a previous page as after, this method can be used for efficient 'deep-paging' across potentially large result sets. If doDocScores is true then the score of each hit will be computed and returned. If doMaxScore is true then the maximum score over all collected hits will be computed.

Throws:
/** Finds the top <code>n</code> * hits for <code>query</code> where all results are after a previous * result (<code>after</code>), allowing control over * whether hit scores and max score should be computed. * <p> * By passing the bottom result from a previous page as <code>after</code>, * this method can be used for efficient 'deep-paging' across potentially * large result sets. If <code>doDocScores</code> is <code>true</code> * then the score of each hit will be computed and * returned. If <code>doMaxScore</code> is * <code>true</code> then the maximum score over all * collected hits will be computed. * * @throws BooleanQuery.TooManyClauses If a query would exceed * {@link BooleanQuery#getMaxClauseCount()} clauses. */
public TopFieldDocs searchAfter(ScoreDoc after, Query query, int numHits, Sort sort, boolean doDocScores) throws IOException { if (after != null && !(after instanceof FieldDoc)) { // TODO: if we fix type safety of TopFieldDocs we can // remove this throw new IllegalArgumentException("after must be a FieldDoc; got " + after); } return searchAfter((FieldDoc) after, query, numHits, sort, doDocScores); } private TopFieldDocs searchAfter(FieldDoc after, Query query, int numHits, Sort sort, boolean doDocScores) throws IOException { final int limit = Math.max(1, reader.maxDoc()); if (after != null && after.doc >= limit) { throw new IllegalArgumentException("after.doc exceeds the number of documents in the reader: after.doc=" + after.doc + " limit=" + limit); } final int cappedNumHits = Math.min(numHits, limit); final Sort rewrittenSort = sort.rewrite(this); final CollectorManager<TopFieldCollector, TopFieldDocs> manager = new CollectorManager<TopFieldCollector, TopFieldDocs>() { @Override public TopFieldCollector newCollector() throws IOException { // TODO: don't pay the price for accurate hit counts by default return TopFieldCollector.create(rewrittenSort, cappedNumHits, after, TOTAL_HITS_THRESHOLD); } @Override public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException { final TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()]; int i = 0; for (TopFieldCollector collector : collectors) { topDocs[i++] = collector.topDocs(); } return TopDocs.merge(rewrittenSort, 0, cappedNumHits, topDocs, true); } }; TopFieldDocs topDocs = search(query, manager); if (doDocScores) { TopFieldCollector.populateScores(topDocs.scoreDocs, this, query); } return topDocs; }
Lower-level search API. Search all leaves using the given CollectorManager. In contrast to search(Query, Collector), this method will use the searcher's Executor in order to parallelize execution of the collection on the configured leafSlices.
See Also:
@lucene.experimental
/** * Lower-level search API. * Search all leaves using the given {@link CollectorManager}. In contrast * to {@link #search(Query, Collector)}, this method will use the searcher's * {@link Executor} in order to parallelize execution of the collection * on the configured {@link #leafSlices}. * @see CollectorManager * @lucene.experimental */
public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager) throws IOException { if (executor == null || leafSlices.length <= 1) { final C collector = collectorManager.newCollector(); search(query, collector); return collectorManager.reduce(Collections.singletonList(collector)); } else { final List<C> collectors = new ArrayList<>(leafSlices.length); ScoreMode scoreMode = null; for (int i = 0; i < leafSlices.length; ++i) { final C collector = collectorManager.newCollector(); collectors.add(collector); if (scoreMode == null) { scoreMode = collector.scoreMode(); } else if (scoreMode != collector.scoreMode()) { throw new IllegalStateException("CollectorManager does not always produce collectors with the same score mode"); } } if (scoreMode == null) { // no segments scoreMode = ScoreMode.COMPLETE; } query = rewrite(query); final Weight weight = createWeight(query, scoreMode, 1); final List<Future<C>> topDocsFutures = new ArrayList<>(leafSlices.length); for (int i = 0; i < leafSlices.length - 1; ++i) { final LeafReaderContext[] leaves = leafSlices[i].leaves; final C collector = collectors.get(i); FutureTask<C> task = new FutureTask<>(() -> { search(Arrays.asList(leaves), weight, collector); return collector; }); executor.execute(task); topDocsFutures.add(task); } final LeafReaderContext[] leaves = leafSlices[leafSlices.length - 1].leaves; final C collector = collectors.get(leafSlices.length - 1); // execute the last on the caller thread search(Arrays.asList(leaves), weight, collector); topDocsFutures.add(CompletableFuture.completedFuture(collector)); final List<C> collectedCollectors = new ArrayList<>(); for (Future<C> future : topDocsFutures) { try { collectedCollectors.add(future.get()); } catch (InterruptedException e) { throw new ThreadInterruptedException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } } return collectorManager.reduce(collectors); } }
Lower-level search API.

LeafCollector.collect(int) is called for every document.

NOTE: this method executes the searches on all given leaves exclusively. To search across all the searchers leaves use leafContexts.

Params:
  • leaves – the searchers leaves to execute the searches on
  • weight – to match documents
  • collector – to receive hits
Throws:
/** * Lower-level search API. * * <p> * {@link LeafCollector#collect(int)} is called for every document. <br> * * <p> * NOTE: this method executes the searches on all given leaves exclusively. * To search across all the searchers leaves use {@link #leafContexts}. * * @param leaves * the searchers leaves to execute the searches on * @param weight * to match documents * @param collector * to receive hits * @throws BooleanQuery.TooManyClauses If a query would exceed * {@link BooleanQuery#getMaxClauseCount()} clauses. */
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException { // TODO: should we make this // threaded...? the Collector could be sync'd? // always use single thread: for (LeafReaderContext ctx : leaves) { // search each subreader final LeafCollector leafCollector; try { leafCollector = collector.getLeafCollector(ctx); } catch (CollectionTerminatedException e) { // there is no doc of interest in this reader context // continue with the following leaf continue; } BulkScorer scorer = weight.bulkScorer(ctx); if (scorer != null) { try { scorer.score(leafCollector, ctx.reader().getLiveDocs()); } catch (CollectionTerminatedException e) { // collection was terminated prematurely // continue with the following leaf } } } }
Expert: called to re-write queries into primitive queries.
Throws:
/** Expert: called to re-write queries into primitive queries. * @throws BooleanQuery.TooManyClauses If a query would exceed * {@link BooleanQuery#getMaxClauseCount()} clauses. */
public Query rewrite(Query original) throws IOException { Query query = original; for (Query rewrittenQuery = query.rewrite(reader); rewrittenQuery != query; rewrittenQuery = query.rewrite(reader)) { query = rewrittenQuery; } return query; }
Returns an Explanation that describes how doc scored against query.

This is intended to be used in developing Similarity implementations, and, for good performance, should not be displayed with every hit. Computing an explanation is as expensive as executing the query over the entire index.

/** Returns an Explanation that describes how <code>doc</code> scored against * <code>query</code>. * * <p>This is intended to be used in developing Similarity implementations, * and, for good performance, should not be displayed with every hit. * Computing an explanation is as expensive as executing the query over the * entire index. */
public Explanation explain(Query query, int doc) throws IOException { query = rewrite(query); return explain(createWeight(query, ScoreMode.COMPLETE, 1), doc); }
Expert: low-level implementation method Returns an Explanation that describes how doc scored against weight.

This is intended to be used in developing Similarity implementations, and, for good performance, should not be displayed with every hit. Computing an explanation is as expensive as executing the query over the entire index.

Applications should call explain(Query, int).

Throws:
/** Expert: low-level implementation method * Returns an Explanation that describes how <code>doc</code> scored against * <code>weight</code>. * * <p>This is intended to be used in developing Similarity implementations, * and, for good performance, should not be displayed with every hit. * Computing an explanation is as expensive as executing the query over the * entire index. * <p>Applications should call {@link IndexSearcher#explain(Query, int)}. * @throws BooleanQuery.TooManyClauses If a query would exceed * {@link BooleanQuery#getMaxClauseCount()} clauses. */
protected Explanation explain(Weight weight, int doc) throws IOException { int n = ReaderUtil.subIndex(doc, leafContexts); final LeafReaderContext ctx = leafContexts.get(n); int deBasedDoc = doc - ctx.docBase; final Bits liveDocs = ctx.reader().getLiveDocs(); if (liveDocs != null && liveDocs.get(deBasedDoc) == false) { return Explanation.noMatch("Document " + doc + " is deleted"); } return weight.explain(ctx, deBasedDoc); }
Creates a Weight for the given query, potentially adding caching if possible and configured.
@lucene.experimental
/** * Creates a {@link Weight} for the given query, potentially adding caching * if possible and configured. * @lucene.experimental */
public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException { final QueryCache queryCache = this.queryCache; Weight weight = query.createWeight(this, scoreMode, boost); if (scoreMode.needsScores() == false && queryCache != null) { weight = queryCache.doCache(weight, queryCachingPolicy); } return weight; }
Returns this searchers the top-level IndexReaderContext.
See Also:
/** * Returns this searchers the top-level {@link IndexReaderContext}. * @see IndexReader#getContext() */
/* sugar for #getReader().getTopReaderContext() */ public IndexReaderContext getTopReaderContext() { return readerContext; }
A class holding a subset of the IndexSearchers leaf contexts to be executed within a single thread.
@lucene.experimental
/** * A class holding a subset of the {@link IndexSearcher}s leaf contexts to be * executed within a single thread. * * @lucene.experimental */
public static class LeafSlice {
The leaves that make up this slice. @lucene.experimental
/** The leaves that make up this slice. * * @lucene.experimental */
public final LeafReaderContext[] leaves; public LeafSlice(LeafReaderContext... leaves) { this.leaves = leaves; } } @Override public String toString() { return "IndexSearcher(" + reader + "; executor=" + executor + ")"; }
Returns TermStatistics for a term, or null if the term does not exist. This can be overridden for example, to return a term's statistics across a distributed collection.
@lucene.experimental
/** * Returns {@link TermStatistics} for a term, or {@code null} if * the term does not exist. * * This can be overridden for example, to return a term's statistics * across a distributed collection. * @lucene.experimental */
public TermStatistics termStatistics(Term term, TermStates context) throws IOException { if (context.docFreq() == 0) { return null; } else { return new TermStatistics(term.bytes(), context.docFreq(), context.totalTermFreq()); } }
Returns CollectionStatistics for a field, or null if the field does not exist (has no indexed terms) This can be overridden for example, to return a field's statistics across a distributed collection.
@lucene.experimental
/** * Returns {@link CollectionStatistics} for a field, or {@code null} if * the field does not exist (has no indexed terms) * * This can be overridden for example, to return a field's statistics * across a distributed collection. * @lucene.experimental */
public CollectionStatistics collectionStatistics(String field) throws IOException { assert field != null; long docCount = 0; long sumTotalTermFreq = 0; long sumDocFreq = 0; for (LeafReaderContext leaf : reader.leaves()) { final Terms terms = leaf.reader().terms(field); if (terms == null) { continue; } docCount += terms.getDocCount(); sumTotalTermFreq += terms.getSumTotalTermFreq(); sumDocFreq += terms.getSumDocFreq(); } if (docCount == 0) { return null; } return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq); }
Returns this searchers executor or null if no executor was provided
/** * Returns this searchers executor or <code>null</code> if no executor was provided */
public Executor getExecutor() { return executor; } }