/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;

A helper to propagate block boundaries for disjunctions. Because a disjunction matches if any of its sub clauses matches, it is tempting to return the minimum block boundary across all clauses. The problem is that it might then make the query slow when the minimum competitive score is high and low-scoring clauses don't drive iteration anymore. So this class computes block boundaries only across clauses whose maximum score is greater than or equal to the minimum competitive score, or the maximum scoring clause if there is no such clause.
/** * A helper to propagate block boundaries for disjunctions. * Because a disjunction matches if any of its sub clauses matches, it is * tempting to return the minimum block boundary across all clauses. The problem * is that it might then make the query slow when the minimum competitive score * is high and low-scoring clauses don't drive iteration anymore. So this class * computes block boundaries only across clauses whose maximum score is greater * than or equal to the minimum competitive score, or the maximum scoring clause * if there is no such clause. */
final class DisjunctionScoreBlockBoundaryPropagator { private static final Comparator<Scorer> MAX_SCORE_COMPARATOR = Comparator.comparing((Scorer s) -> { try { return s.getMaxScore(DocIdSetIterator.NO_MORE_DOCS); } catch (IOException e) { throw new RuntimeException(e); } }).thenComparing(Comparator.comparing(s -> s.iterator().cost())); private final Scorer[] scorers; private final float[] maxScores; private int leadIndex = 0; DisjunctionScoreBlockBoundaryPropagator(Collection<Scorer> scorers) throws IOException { this.scorers = scorers.toArray(new Scorer[0]); for (Scorer scorer : this.scorers) { scorer.advanceShallow(0); } Arrays.sort(this.scorers, MAX_SCORE_COMPARATOR); maxScores = new float[this.scorers.length]; for (int i = 0; i < this.scorers.length; ++i) { maxScores[i] = this.scorers[i].getMaxScore(DocIdSetIterator.NO_MORE_DOCS); } } /** * See {@link Scorer#advanceShallow(int)}. */ int advanceShallow(int target) throws IOException { // For scorers that are below the lead index, just propagate. for (int i = 0; i < leadIndex; ++i) { Scorer s = scorers[i]; if (s.docID() < target) { s.advanceShallow(target); } } // For scorers above the lead index, we take the minimum // boundary. Scorer leadScorer = scorers[leadIndex]; int upTo = leadScorer.advanceShallow(Math.max(leadScorer.docID(), target)); for (int i = leadIndex + 1; i < scorers.length; ++i) { Scorer scorer = scorers[i]; if (scorer.docID() <= target) { upTo = Math.min(scorer.advanceShallow(target), upTo); } } // If the maximum scoring clauses are beyond `target`, then we use their // docID as a boundary. It helps not consider them when computing the // maximum score and get a lower score upper bound. for (int i = scorers.length - 1; i > leadIndex; --i) { Scorer scorer = scorers[i]; if (scorer.docID() > target) { upTo = Math.min(upTo, scorer.docID() - 1); } else { break; } } return upTo; }
Set the minimum competitive score to filter out clauses that score less than this threshold.
See Also:
  • setMinCompetitiveScore.setMinCompetitiveScore
/** * Set the minimum competitive score to filter out clauses that score less * than this threshold. * @see Scorer#setMinCompetitiveScore */
void setMinCompetitiveScore(float minScore) throws IOException { // Update the lead index if necessary while (leadIndex < maxScores.length - 1 && minScore > maxScores[leadIndex]) { leadIndex++; } } }