/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
Expert: a FieldComparator compares hits so as to determine their sort order when collecting the top results with TopFieldCollector
. The concrete public FieldComparator classes here correspond to the SortField types. The document IDs passed to these methods must only
move forwards, since they are using doc values iterators
to retrieve sort values.
This API is designed to achieve high performance sorting, by exposing a tight interaction with FieldValueHitQueue
as it visits hits. Whenever a hit is competitive, it's enrolled into a virtual slot, which is an int ranging from 0 to numHits-1. Segment transitions are handled by creating a dedicated per-segment LeafFieldComparator
which also needs to interact with the FieldValueHitQueue
but can optimize based on the segment to collect.
The following functions need to be implemented
-
compare
Compare a hit at 'slot a' with hit 'slot b'. -
setTopValue
This method is called by TopFieldCollector
to notify the FieldComparator of the top most value, which is used by future calls to LeafFieldComparator.compareTop
. -
getLeafComparator(LeafReaderContext)
Invoked when the search is switching to the next segment. You may need to update internal state of the comparator, for example retrieving new values from DocValues. -
value
Return the sort value stored in the specified slot. This is only called at the end of the search, in order to populate FieldDoc.fields
when returning the top results.
See Also: @lucene.experimental
/**
* Expert: a FieldComparator compares hits so as to determine their
* sort order when collecting the top results with {@link
* TopFieldCollector}. The concrete public FieldComparator
* classes here correspond to the SortField types.
*
* <p>The document IDs passed to these methods must only
* move forwards, since they are using doc values iterators
* to retrieve sort values.</p>
*
* <p>This API is designed to achieve high performance
* sorting, by exposing a tight interaction with {@link
* FieldValueHitQueue} as it visits hits. Whenever a hit is
* competitive, it's enrolled into a virtual slot, which is
* an int ranging from 0 to numHits-1. Segment transitions are
* handled by creating a dedicated per-segment
* {@link LeafFieldComparator} which also needs to interact
* with the {@link FieldValueHitQueue} but can optimize based
* on the segment to collect.</p>
*
* <p>The following functions need to be implemented</p>
* <ul>
* <li> {@link #compare} Compare a hit at 'slot a'
* with hit 'slot b'.
*
* <li> {@link #setTopValue} This method is called by
* {@link TopFieldCollector} to notify the
* FieldComparator of the top most value, which is
* used by future calls to
* {@link LeafFieldComparator#compareTop}.
*
* <li> {@link #getLeafComparator(org.apache.lucene.index.LeafReaderContext)} Invoked
* when the search is switching to the next segment.
* You may need to update internal state of the
* comparator, for example retrieving new values from
* DocValues.
*
* <li> {@link #value} Return the sort value stored in
* the specified slot. This is only called at the end
* of the search, in order to populate {@link
* FieldDoc#fields} when returning the top results.
* </ul>
*
* @see LeafFieldComparator
* @lucene.experimental
*/
public abstract class FieldComparator<T> {
Compare hit at slot1 with hit at slot2.
Params: - slot1 – first slot to compare
- slot2 – second slot to compare
Returns: any N < 0
if slot2's value is sorted after slot1, any N > 0
if the slot2's value is sorted before slot1 and 0
if they are equal
/**
* Compare hit at slot1 with hit at slot2.
*
* @param slot1 first slot to compare
* @param slot2 second slot to compare
* @return any {@code N < 0} if slot2's value is sorted after
* slot1, any {@code N > 0} if the slot2's value is sorted before
* slot1 and {@code 0} if they are equal
*/
public abstract int compare(int slot1, int slot2);
Record the top value, for future calls to LeafFieldComparator.compareTop
. This is only called for searches that use searchAfter (deep paging), and is called before any calls to getLeafComparator(LeafReaderContext)
. /**
* Record the top value, for future calls to {@link
* LeafFieldComparator#compareTop}. This is only called for searches that
* use searchAfter (deep paging), and is called before any
* calls to {@link #getLeafComparator(LeafReaderContext)}.
*/
public abstract void setTopValue(T value);
Return the actual value in the slot.
Params: - slot – the value
Returns: value in this slot
/**
* Return the actual value in the slot.
*
* @param slot the value
* @return value in this slot
*/
public abstract T value(int slot);
Get a per-segment LeafFieldComparator
to collect the given LeafReaderContext
. All docIDs supplied to this LeafFieldComparator
are relative to the current reader (you must add docBase if you need to map it to a top-level docID). Params: - context – current reader context
Throws: - IOException – if there is a low-level IO error
Returns: the comparator to use for this segment
/**
* Get a per-segment {@link LeafFieldComparator} to collect the given
* {@link org.apache.lucene.index.LeafReaderContext}. All docIDs supplied to
* this {@link LeafFieldComparator} are relative to the current reader (you
* must add docBase if you need to map it to a top-level docID).
*
* @param context current reader context
* @return the comparator to use for this segment
* @throws IOException if there is a low-level IO error
*/
public abstract LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException;
Returns a negative integer if first is less than second,
0 if they are equal and a positive integer otherwise. Default
impl to assume the type implements Comparable and
invoke .compareTo; be sure to override this method if
your FieldComparator's type isn't a Comparable or
if your values may sometimes be null /** Returns a negative integer if first is less than second,
* 0 if they are equal and a positive integer otherwise. Default
* impl to assume the type implements Comparable and
* invoke .compareTo; be sure to override this method if
* your FieldComparator's type isn't a Comparable or
* if your values may sometimes be null */
@SuppressWarnings("unchecked")
public int compareValues(T first, T second) {
if (first == null) {
if (second == null) {
return 0;
} else {
return -1;
}
} else if (second == null) {
return 1;
} else {
return ((Comparable<T>) first).compareTo(second);
}
}
Informs the comparator that sort is done on this single field.
This is useful to enable some optimizations for skipping non-competitive documents.
/**
* Informs the comparator that sort is done on this single field.
* This is useful to enable some optimizations for skipping non-competitive documents.
*/
public void setSingleSort() {
}
For numeric comparators, setting this value, indicates that
the same numeric data has been indexed with two fields: doc values and points and
that these fields have the same name.
This allows to use sort optimization and skip non-competitive documents.
Not applicable for other comparators.
/**
* For numeric comparators, setting this value, indicates that
* the same numeric data has been indexed with two fields: doc values and points and
* that these fields have the same name.
* This allows to use sort optimization and skip non-competitive documents.
* Not applicable for other comparators.
*/
public void setCanUsePoints() {
}
Sorts by descending relevance. NOTE: if you are sorting only by descending relevance and then secondarily by ascending docID, performance is faster using TopScoreDocCollector
directly (which IndexSearcher.search
uses when no Sort
is specified). /** Sorts by descending relevance. NOTE: if you are
* sorting only by descending relevance and then
* secondarily by ascending docID, performance is faster
* using {@link TopScoreDocCollector} directly (which {@link
* IndexSearcher#search} uses when no {@link Sort} is
* specified). */
public static final class RelevanceComparator extends FieldComparator<Float> implements LeafFieldComparator {
private final float[] scores;
private float bottom;
private Scorable scorer;
private float topValue;
Creates a new comparator based on relevance for numHits
. /** Creates a new comparator based on relevance for {@code numHits}. */
public RelevanceComparator(int numHits) {
scores = new float[numHits];
}
@Override
public int compare(int slot1, int slot2) {
return Float.compare(scores[slot2], scores[slot1]);
}
@Override
public int compareBottom(int doc) throws IOException {
float score = scorer.score();
assert !Float.isNaN(score);
return Float.compare(score, bottom);
}
@Override
public void copy(int slot, int doc) throws IOException {
scores[slot] = scorer.score();
assert !Float.isNaN(scores[slot]);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
return this;
}
@Override
public void setBottom(final int bottom) {
this.bottom = scores[bottom];
}
@Override
public void setTopValue(Float value) {
topValue = value;
}
@Override
public void setScorer(Scorable scorer) {
// wrap with a ScoreCachingWrappingScorer so that successive calls to
// score() will not incur score computation over and
// over again.
if (!(scorer instanceof ScoreCachingWrappingScorer)) {
this.scorer = new ScoreCachingWrappingScorer(scorer);
} else {
this.scorer = scorer;
}
}
@Override
public Float value(int slot) {
return Float.valueOf(scores[slot]);
}
// Override because we sort reverse of natural Float order:
@Override
public int compareValues(Float first, Float second) {
// Reversed intentionally because relevance by default
// sorts descending:
return second.compareTo(first);
}
@Override
public int compareTop(int doc) throws IOException {
float docValue = scorer.score();
assert !Float.isNaN(docValue);
return Float.compare(docValue, topValue);
}
}
Sorts by field's natural Term sort order, using ordinals. This is functionally equivalent to TermValComparator
, but it first resolves the string to their relative ordinal positions (using the index returned by LeafReader.getSortedDocValues(String)
), and does most comparisons using the ordinals. For medium to large results, this comparator will be much faster than TermValComparator
. For very small result sets it may be slower. /** Sorts by field's natural Term sort order, using
* ordinals. This is functionally equivalent to {@link
* org.apache.lucene.search.FieldComparator.TermValComparator}, but it first resolves the string
* to their relative ordinal positions (using the index
* returned by {@link org.apache.lucene.index.LeafReader#getSortedDocValues(String)}), and
* does most comparisons using the ordinals. For medium
* to large results, this comparator will be much faster
* than {@link org.apache.lucene.search.FieldComparator.TermValComparator}. For very small
* result sets it may be slower. */
public static class TermOrdValComparator extends FieldComparator<BytesRef> implements LeafFieldComparator {
/* Ords for each slot.
@lucene.internal */
final int[] ords;
/* Values for each slot.
@lucene.internal */
final BytesRef[] values;
private final BytesRefBuilder[] tempBRs;
/* Which reader last copied a value into the slot. When
we compare two slots, we just compare-by-ord if the
readerGen is the same; else we must compare the
values (slower).
@lucene.internal */
final int[] readerGen;
/* Gen of current reader we are on.
@lucene.internal */
int currentReaderGen = -1;
/* Current reader's doc ord/values.
@lucene.internal */
SortedDocValues termsIndex;
private final String field;
/* Bottom slot, or -1 if queue isn't full yet
@lucene.internal */
int bottomSlot = -1;
/* Bottom ord (same as ords[bottomSlot] once bottomSlot
is set). Cached for faster compares.
@lucene.internal */
int bottomOrd;
/* True if current bottom slot matches the current
reader.
@lucene.internal */
boolean bottomSameReader;
/* Bottom value (same as values[bottomSlot] once
bottomSlot is set). Cached for faster compares.
@lucene.internal */
BytesRef bottomValue;
Set by setTopValue. /** Set by setTopValue. */
BytesRef topValue;
boolean topSameReader;
int topOrd;
-1 if missing values are sorted first, 1 if they are
sorted last /** -1 if missing values are sorted first, 1 if they are
* sorted last */
final int missingSortCmp;
Which ordinal to use for a missing value. /** Which ordinal to use for a missing value. */
final int missingOrd;
Creates this, sorting missing values first. /** Creates this, sorting missing values first. */
public TermOrdValComparator(int numHits, String field) {
this(numHits, field, false);
}
Creates this, with control over how missing values
are sorted. Pass sortMissingLast=true to put
missing values at the end. /** Creates this, with control over how missing values
* are sorted. Pass sortMissingLast=true to put
* missing values at the end. */
public TermOrdValComparator(int numHits, String field, boolean sortMissingLast) {
ords = new int[numHits];
values = new BytesRef[numHits];
tempBRs = new BytesRefBuilder[numHits];
readerGen = new int[numHits];
this.field = field;
if (sortMissingLast) {
missingSortCmp = 1;
missingOrd = Integer.MAX_VALUE;
} else {
missingSortCmp = -1;
missingOrd = -1;
}
}
private int getOrdForDoc(int doc) throws IOException {
if (termsIndex.advanceExact(doc)) {
return termsIndex.ordValue();
} else {
return -1;
}
}
@Override
public int compare(int slot1, int slot2) {
if (readerGen[slot1] == readerGen[slot2]) {
return ords[slot1] - ords[slot2];
}
final BytesRef val1 = values[slot1];
final BytesRef val2 = values[slot2];
if (val1 == null) {
if (val2 == null) {
return 0;
}
return missingSortCmp;
} else if (val2 == null) {
return -missingSortCmp;
}
return val1.compareTo(val2);
}
@Override
public int compareBottom(int doc) throws IOException {
assert bottomSlot != -1;
int docOrd = getOrdForDoc(doc);
if (docOrd == -1) {
docOrd = missingOrd;
}
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
return -1;
}
}
@Override
public void copy(int slot, int doc) throws IOException {
int ord = getOrdForDoc(doc);
if (ord == -1) {
ord = missingOrd;
values[slot] = null;
} else {
assert ord >= 0;
if (tempBRs[slot] == null) {
tempBRs[slot] = new BytesRefBuilder();
}
tempBRs[slot].copyBytes(termsIndex.lookupOrd(ord));
values[slot] = tempBRs[slot].get();
}
ords[slot] = ord;
readerGen[slot] = currentReaderGen;
}
Retrieves the SortedDocValues for the field in this segment /** Retrieves the SortedDocValues for the field in this segment */
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field) throws IOException {
return DocValues.getSorted(context.reader(), field);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
termsIndex = getSortedDocValues(context, field);
currentReaderGen++;
if (topValue != null) {
// Recompute topOrd/SameReader
int ord = termsIndex.lookupTerm(topValue);
if (ord >= 0) {
topSameReader = true;
topOrd = ord;
} else {
topSameReader = false;
topOrd = -ord-2;
}
} else {
topOrd = missingOrd;
topSameReader = true;
}
//System.out.println(" getLeafComparator topOrd=" + topOrd + " topSameReader=" + topSameReader);
if (bottomSlot != -1) {
// Recompute bottomOrd/SameReader
setBottom(bottomSlot);
}
return this;
}
@Override
public void setBottom(final int bottom) throws IOException {
bottomSlot = bottom;
bottomValue = values[bottomSlot];
if (currentReaderGen == readerGen[bottomSlot]) {
bottomOrd = ords[bottomSlot];
bottomSameReader = true;
} else {
if (bottomValue == null) {
// missingOrd is null for all segments
assert ords[bottomSlot] == missingOrd;
bottomOrd = missingOrd;
bottomSameReader = true;
readerGen[bottomSlot] = currentReaderGen;
} else {
final int ord = termsIndex.lookupTerm(bottomValue);
if (ord < 0) {
bottomOrd = -ord - 2;
bottomSameReader = false;
} else {
bottomOrd = ord;
// exact value match
bottomSameReader = true;
readerGen[bottomSlot] = currentReaderGen;
ords[bottomSlot] = bottomOrd;
}
}
}
}
@Override
public void setTopValue(BytesRef value) {
// null is fine: it means the last doc of the prior
// search was missing this value
topValue = value;
//System.out.println("setTopValue " + topValue);
}
@Override
public BytesRef value(int slot) {
return values[slot];
}
@Override
public int compareTop(int doc) throws IOException {
int ord = getOrdForDoc(doc);
if (ord == -1) {
ord = missingOrd;
}
if (topSameReader) {
// ord is precisely comparable, even in the equal
// case
//System.out.println("compareTop doc=" + doc + " ord=" + ord + " ret=" + (topOrd-ord));
return topOrd - ord;
} else if (ord <= topOrd) {
// the equals case always means doc is < value
// (because we set lastOrd to the lower bound)
return 1;
} else {
return -1;
}
}
@Override
public int compareValues(BytesRef val1, BytesRef val2) {
if (val1 == null) {
if (val2 == null) {
return 0;
}
return missingSortCmp;
} else if (val2 == null) {
return -missingSortCmp;
}
return val1.compareTo(val2);
}
@Override
public void setScorer(Scorable scorer) {}
}
Sorts by field's natural Term sort order. All
comparisons are done using BytesRef.compareTo, which is
slow for medium to large result sets but possibly
very fast for very small results sets. /** Sorts by field's natural Term sort order. All
* comparisons are done using BytesRef.compareTo, which is
* slow for medium to large result sets but possibly
* very fast for very small results sets. */
public static class TermValComparator extends FieldComparator<BytesRef> implements LeafFieldComparator {
private final BytesRef[] values;
private final BytesRefBuilder[] tempBRs;
private BinaryDocValues docTerms;
private final String field;
private BytesRef bottom;
private BytesRef topValue;
private final int missingSortCmp;
Sole constructor. /** Sole constructor. */
public TermValComparator(int numHits, String field, boolean sortMissingLast) {
values = new BytesRef[numHits];
tempBRs = new BytesRefBuilder[numHits];
this.field = field;
missingSortCmp = sortMissingLast ? 1 : -1;
}
private BytesRef getValueForDoc(int doc) throws IOException {
if (docTerms.advanceExact(doc)) {
return docTerms.binaryValue();
} else {
return null;
}
}
@Override
public int compare(int slot1, int slot2) {
final BytesRef val1 = values[slot1];
final BytesRef val2 = values[slot2];
return compareValues(val1, val2);
}
@Override
public int compareBottom(int doc) throws IOException {
final BytesRef comparableBytes = getValueForDoc(doc);
return compareValues(bottom, comparableBytes);
}
@Override
public void copy(int slot, int doc) throws IOException {
final BytesRef comparableBytes = getValueForDoc(doc);
if (comparableBytes == null) {
values[slot] = null;
} else {
if (tempBRs[slot] == null) {
tempBRs[slot] = new BytesRefBuilder();
}
tempBRs[slot].copyBytes(comparableBytes);
values[slot] = tempBRs[slot].get();
}
}
Retrieves the BinaryDocValues for the field in this segment /** Retrieves the BinaryDocValues for the field in this segment */
protected BinaryDocValues getBinaryDocValues(LeafReaderContext context, String field) throws IOException {
return DocValues.getBinary(context.reader(), field);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
docTerms = getBinaryDocValues(context, field);
return this;
}
@Override
public void setBottom(final int bottom) {
this.bottom = values[bottom];
}
@Override
public void setTopValue(BytesRef value) {
// null is fine: it means the last doc of the prior
// search was missing this value
topValue = value;
}
@Override
public BytesRef value(int slot) {
return values[slot];
}
@Override
public int compareValues(BytesRef val1, BytesRef val2) {
// missing always sorts first:
if (val1 == null) {
if (val2 == null) {
return 0;
}
return missingSortCmp;
} else if (val2 == null) {
return -missingSortCmp;
}
return val1.compareTo(val2);
}
@Override
public int compareTop(int doc) throws IOException {
return compareValues(topValue, getValueForDoc(doc));
}
@Override
public void setScorer(Scorable scorer) {}
}
}