/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.RamUsageEstimator;

Base rewrite method that translates each term into a query, and keeps the scores as computed by the query.

@lucene.internalOnly public to be accessible by spans package.
/** * Base rewrite method that translates each term into a query, and keeps * the scores as computed by the query. * <p> * @lucene.internal Only public to be accessible by spans package. */
public abstract class ScoringRewrite<B> extends TermCollectingRewrite<B> {
A rewrite method that first translates each term into Occur.SHOULD clause in a BooleanQuery, and keeps the scores as computed by the query. Note that typically such scores are meaningless to the user, and require non-trivial CPU to compute, so it's almost always better to use MultiTermQuery.CONSTANT_SCORE_REWRITE instead.

NOTE: This rewrite method will hit TooManyClauses if the number of terms exceeds BooleanQuery.getMaxClauseCount. @see MultiTermQuery#setRewriteMethod

/** A rewrite method that first translates each term into * {@link BooleanClause.Occur#SHOULD} clause in a * BooleanQuery, and keeps the scores as computed by the * query. Note that typically such scores are * meaningless to the user, and require non-trivial CPU * to compute, so it's almost always better to use {@link * MultiTermQuery#CONSTANT_SCORE_REWRITE} instead. * * <p><b>NOTE</b>: This rewrite method will hit {@link * BooleanQuery.TooManyClauses} if the number of terms * exceeds {@link BooleanQuery#getMaxClauseCount}. * * @see MultiTermQuery#setRewriteMethod */
public final static ScoringRewrite<BooleanQuery.Builder> SCORING_BOOLEAN_REWRITE = new ScoringRewrite<BooleanQuery.Builder>() { @Override protected BooleanQuery.Builder getTopLevelBuilder() { return new BooleanQuery.Builder(); } @Override protected Query build(BooleanQuery.Builder builder) { return builder.build(); } @Override protected void addClause(BooleanQuery.Builder topLevel, Term term, int docCount, float boost, TermStates states) { final TermQuery tq = new TermQuery(term, states); topLevel.add(new BoostQuery(tq, boost), BooleanClause.Occur.SHOULD); } @Override protected void checkMaxClauseCount(int count) { if (count > BooleanQuery.getMaxClauseCount()) throw new BooleanQuery.TooManyClauses(); } };
Like ScoringRewrite<B>.SCORING_BOOLEAN_REWRITE except scores are not computed. Instead, each matching document receives a constant score equal to the query's boost.

NOTE: This rewrite method will hit TooManyClauses if the number of terms exceeds BooleanQuery.getMaxClauseCount. @see MultiTermQuery#setRewriteMethod

/** Like {@link #SCORING_BOOLEAN_REWRITE} except * scores are not computed. Instead, each matching * document receives a constant score equal to the * query's boost. * * <p><b>NOTE</b>: This rewrite method will hit {@link * BooleanQuery.TooManyClauses} if the number of terms * exceeds {@link BooleanQuery#getMaxClauseCount}. * * @see MultiTermQuery#setRewriteMethod */
public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_REWRITE = new RewriteMethod() { @Override public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { final Query bq = SCORING_BOOLEAN_REWRITE.rewrite(reader, query); // strip the scores off return new ConstantScoreQuery(bq); } };
This method is called after every new term to check if the number of max clauses (e.g. in BooleanQuery) is not exceeded. Throws the corresponding RuntimeException.
/** This method is called after every new term to check if the number of max clauses * (e.g. in BooleanQuery) is not exceeded. Throws the corresponding {@link RuntimeException}. */
protected abstract void checkMaxClauseCount(int count) throws IOException; @Override public final Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { final B builder = getTopLevelBuilder(); final ParallelArraysTermCollector col = new ParallelArraysTermCollector(); collectTerms(reader, query, col); final int size = col.terms.size(); if (size > 0) { final int sort[] = col.terms.sort(); final float[] boost = col.array.boost; final TermStates[] termStates = col.array.termState; for (int i = 0; i < size; i++) { final int pos = sort[i]; final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef())); assert reader.docFreq(term) == termStates[pos].docFreq(); addClause(builder, term, termStates[pos].docFreq(), boost[pos], termStates[pos]); } } return build(builder); } final class ParallelArraysTermCollector extends TermCollector { final TermFreqBoostByteStart array = new TermFreqBoostByteStart(16); final BytesRefHash terms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array); TermsEnum termsEnum; private BoostAttribute boostAtt; @Override public void setNextEnum(TermsEnum termsEnum) { this.termsEnum = termsEnum; this.boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class); } @Override public boolean collect(BytesRef bytes) throws IOException { final int e = terms.add(bytes); final TermState state = termsEnum.termState(); assert state != null; if (e < 0) { // duplicate term: update docFreq final int pos = (-e)-1; array.termState[pos].register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums"; } else { // new entry: we populate the entry initially array.boost[e] = boostAtt.getBoost(); array.termState[e] = new TermStates(topReaderContext, state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); ScoringRewrite.this.checkMaxClauseCount(terms.size()); } return true; } }
Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq
/** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */
static final class TermFreqBoostByteStart extends DirectBytesStartArray { float[] boost; TermStates[] termState; public TermFreqBoostByteStart(int initSize) { super(initSize); } @Override public int[] init() { final int[] ord = super.init(); boost = new float[ArrayUtil.oversize(ord.length, Float.BYTES)]; termState = new TermStates[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; assert termState.length >= ord.length && boost.length >= ord.length; return ord; } @Override public int[] grow() { final int[] ord = super.grow(); boost = ArrayUtil.grow(boost, ord.length); if (termState.length < ord.length) { TermStates[] tmpTermState = new TermStates[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(termState, 0, tmpTermState, 0, termState.length); termState = tmpTermState; } assert termState.length >= ord.length && boost.length >= ord.length; return ord; } @Override public int[] clear() { boost = null; termState = null; return super.clear(); } } }