/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause.Occur;

A Query that matches documents matching boolean combinations of other queries, e.g. TermQuerys, PhraseQuerys or other BooleanQuerys.
/** A Query that matches documents matching boolean combinations of other * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other * BooleanQuerys. */
public class BooleanQuery extends Query implements Iterable<BooleanClause> { private static int maxClauseCount = 1024;
Thrown when an attempt is made to add more than BooleanQuery.getMaxClauseCount() clauses. This typically happens if a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery is expanded to many terms during search.
/** Thrown when an attempt is made to add more than {@link * #getMaxClauseCount()} clauses. This typically happens if * a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery * is expanded to many terms during search. */
public static class TooManyClauses extends RuntimeException { public TooManyClauses() { super("maxClauseCount is set to " + maxClauseCount); } }
Return the maximum number of clauses permitted, 1024 by default. Attempts to add more than the permitted number of clauses cause TooManyClauses to be thrown.
See Also:
/** Return the maximum number of clauses permitted, 1024 by default. * Attempts to add more than the permitted number of clauses cause {@link * TooManyClauses} to be thrown. * @see #setMaxClauseCount(int) */
public static int getMaxClauseCount() { return maxClauseCount; }
Set the maximum number of clauses permitted per BooleanQuery. Default value is 1024.
/** * Set the maximum number of clauses permitted per BooleanQuery. * Default value is 1024. */
public static void setMaxClauseCount(int maxClauseCount) { if (maxClauseCount < 1) { throw new IllegalArgumentException("maxClauseCount must be >= 1"); } BooleanQuery.maxClauseCount = maxClauseCount; }
A builder for boolean queries.
/** A builder for boolean queries. */
public static class Builder { private int minimumNumberShouldMatch; private final List<BooleanClause> clauses = new ArrayList<>();
Sole constructor.
/** Sole constructor. */
public Builder() {}
Specifies a minimum number of the optional BooleanClauses which must be satisfied.

By default no optional clauses are necessary for a match (unless there are no required clauses). If this method is used, then the specified number of clauses is required.

Use of this method is totally independent of specifying that any specific clauses are required (or prohibited). This number will only be compared against the number of matching optional clauses.

Params:
  • min – the number of optional clauses that must match
/** * Specifies a minimum number of the optional BooleanClauses * which must be satisfied. * * <p> * By default no optional clauses are necessary for a match * (unless there are no required clauses). If this method is used, * then the specified number of clauses is required. * </p> * <p> * Use of this method is totally independent of specifying that * any specific clauses are required (or prohibited). This number will * only be compared against the number of matching optional clauses. * </p> * * @param min the number of optional clauses that must match */
public Builder setMinimumNumberShouldMatch(int min) { this.minimumNumberShouldMatch = min; return this; }
Add a new clause to this Builder. Note that the order in which clauses are added does not have any impact on matching documents or query performance.
Throws:
  • TooManyClauses – if the new number of clauses exceeds the maximum clause number
/** * Add a new clause to this {@link Builder}. Note that the order in which * clauses are added does not have any impact on matching documents or query * performance. * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number */
public Builder add(BooleanClause clause) { if (clauses.size() >= maxClauseCount) { throw new TooManyClauses(); } clauses.add(clause); return this; }
Add a new clause to this Builder. Note that the order in which clauses are added does not have any impact on matching documents or query performance.
Throws:
  • TooManyClauses – if the new number of clauses exceeds the maximum clause number
/** * Add a new clause to this {@link Builder}. Note that the order in which * clauses are added does not have any impact on matching documents or query * performance. * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number */
public Builder add(Query query, Occur occur) { return add(new BooleanClause(query, occur)); }
Create a new BooleanQuery based on the parameters that have been set on this builder.
/** Create a new {@link BooleanQuery} based on the parameters that have * been set on this builder. */
public BooleanQuery build() { return new BooleanQuery(minimumNumberShouldMatch, clauses.toArray(new BooleanClause[0])); } } private final int minimumNumberShouldMatch; private final List<BooleanClause> clauses; // used for toString() and getClauses() private final Map<Occur, Collection<Query>> clauseSets; // used for equals/hashcode private BooleanQuery(int minimumNumberShouldMatch, BooleanClause[] clauses) { this.minimumNumberShouldMatch = minimumNumberShouldMatch; this.clauses = Collections.unmodifiableList(Arrays.asList(clauses)); clauseSets = new EnumMap<>(Occur.class); // duplicates matter for SHOULD and MUST clauseSets.put(Occur.SHOULD, new Multiset<>()); clauseSets.put(Occur.MUST, new Multiset<>()); // but not for FILTER and MUST_NOT clauseSets.put(Occur.FILTER, new HashSet<>()); clauseSets.put(Occur.MUST_NOT, new HashSet<>()); for (BooleanClause clause : clauses) { clauseSets.get(clause.getOccur()).add(clause.getQuery()); } }
Gets the minimum number of the optional BooleanClauses which must be satisfied.
/** * Gets the minimum number of the optional BooleanClauses * which must be satisfied. */
public int getMinimumNumberShouldMatch() { return minimumNumberShouldMatch; }
Return a list of the clauses of this BooleanQuery.
/** Return a list of the clauses of this {@link BooleanQuery}. */
public List<BooleanClause> clauses() { return clauses; }
Return the collection of queries for the given Occur.
/** Return the collection of queries for the given {@link Occur}. */
Collection<Query> getClauses(Occur occur) { return clauseSets.get(occur); }
Whether this query is a pure disjunction, ie. it only has SHOULD clauses and it is enough for a single clause to match for this boolean query to match.
/** * Whether this query is a pure disjunction, ie. it only has SHOULD clauses * and it is enough for a single clause to match for this boolean query to match. */
boolean isPureDisjunction() { return clauses.size() == getClauses(Occur.SHOULD).size() && minimumNumberShouldMatch <= 1; }
Returns an iterator on the clauses in this query. It implements the Iterable interface to make it possible to do:
for (BooleanClause clause : booleanQuery) {}
/** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to * make it possible to do: * <pre class="prettyprint">for (BooleanClause clause : booleanQuery) {}</pre> */
@Override public final Iterator<BooleanClause> iterator() { return clauses.iterator(); } private BooleanQuery rewriteNoScoring() { boolean keepShould = getMinimumNumberShouldMatch() > 0 || (clauseSets.get(Occur.MUST).size() + clauseSets.get(Occur.FILTER).size() == 0); if (clauseSets.get(Occur.MUST).size() == 0 && keepShould) { return this; } BooleanQuery.Builder newQuery = new BooleanQuery.Builder(); newQuery.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()); for (BooleanClause clause : clauses) { switch (clause.getOccur()) { case MUST: { newQuery.add(clause.getQuery(), Occur.FILTER); break; } case SHOULD: { if (keepShould) { newQuery.add(clause); } break; } default: { newQuery.add(clause); } } } return newQuery.build(); } @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { BooleanQuery query = this; if (scoreMode.needsScores() == false) { query = rewriteNoScoring(); } return new BooleanWeight(query, searcher, scoreMode, boost); } @Override public Query rewrite(IndexReader reader) throws IOException { if (clauses.size() == 0) { return new MatchNoDocsQuery("empty BooleanQuery"); } // optimize 1-clause queries if (clauses.size() == 1) { BooleanClause c = clauses.get(0); Query query = c.getQuery(); if (minimumNumberShouldMatch == 1 && c.getOccur() == Occur.SHOULD) { return query; } else if (minimumNumberShouldMatch == 0) { switch (c.getOccur()) { case SHOULD: case MUST: return query; case FILTER: // no scoring clauses, so return a score of 0 return new BoostQuery(new ConstantScoreQuery(query), 0); case MUST_NOT: // no positive clauses return new MatchNoDocsQuery("pure negative BooleanQuery"); default: throw new AssertionError(); } } } // recursively rewrite { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()); boolean actuallyRewritten = false; for (BooleanClause clause : this) { Query query = clause.getQuery(); Query rewritten = query.rewrite(reader); if (rewritten != query) { // rewrite clause actuallyRewritten = true; builder.add(rewritten, clause.getOccur()); } else { // leave as-is builder.add(clause); } } if (actuallyRewritten) { return builder.build(); } } // remove duplicate FILTER and MUST_NOT clauses { int clauseCount = 0; for (Collection<Query> queries : clauseSets.values()) { clauseCount += queries.size(); } if (clauseCount != clauses.size()) { // since clauseSets implicitly deduplicates FILTER and MUST_NOT // clauses, this means there were duplicates BooleanQuery.Builder rewritten = new BooleanQuery.Builder(); rewritten.setMinimumNumberShouldMatch(minimumNumberShouldMatch); for (Map.Entry<Occur, Collection<Query>> entry : clauseSets.entrySet()) { final Occur occur = entry.getKey(); for (Query query : entry.getValue()) { rewritten.add(query, occur); } } return rewritten.build(); } } // Check whether some clauses are both required and excluded final Collection<Query> mustNotClauses = clauseSets.get(Occur.MUST_NOT); if (!mustNotClauses.isEmpty()) { final Predicate<Query> p = clauseSets.get(Occur.MUST)::contains; if (mustNotClauses.stream().anyMatch(p.or(clauseSets.get(Occur.FILTER)::contains))) { return new MatchNoDocsQuery("FILTER or MUST clause also in MUST_NOT"); } if (mustNotClauses.contains(new MatchAllDocsQuery())) { return new MatchNoDocsQuery("MUST_NOT clause is MatchAllDocsQuery"); } } // remove FILTER clauses that are also MUST clauses or that match all documents if (clauseSets.get(Occur.FILTER).size() > 0) { final Set<Query> filters = new HashSet<>(clauseSets.get(Occur.FILTER)); boolean modified = false; if (filters.size() > 1 || clauseSets.get(Occur.MUST).isEmpty() == false) { modified = filters.remove(new MatchAllDocsQuery()); } modified |= filters.removeAll(clauseSets.get(Occur.MUST)); if (modified) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()); for (BooleanClause clause : clauses) { if (clause.getOccur() != Occur.FILTER) { builder.add(clause); } } for (Query filter : filters) { builder.add(filter, Occur.FILTER); } return builder.build(); } } // convert FILTER clauses that are also SHOULD clauses to MUST clauses if (clauseSets.get(Occur.SHOULD).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) { final Collection<Query> filters = clauseSets.get(Occur.FILTER); final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD); Set<Query> intersection = new HashSet<>(filters); intersection.retainAll(shoulds); if (intersection.isEmpty() == false) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); int minShouldMatch = getMinimumNumberShouldMatch(); for (BooleanClause clause : clauses) { if (intersection.contains(clause.getQuery())) { if (clause.getOccur() == Occur.SHOULD) { builder.add(new BooleanClause(clause.getQuery(), Occur.MUST)); minShouldMatch--; } } else { builder.add(clause); } } builder.setMinimumNumberShouldMatch(Math.max(0, minShouldMatch)); return builder.build(); } } // Deduplicate SHOULD clauses by summing up their boosts if (clauseSets.get(Occur.SHOULD).size() > 0 && minimumNumberShouldMatch <= 1) { Map<Query, Double> shouldClauses = new HashMap<>(); for (Query query : clauseSets.get(Occur.SHOULD)) { double boost = 1; while (query instanceof BoostQuery) { BoostQuery bq = (BoostQuery) query; boost *= bq.getBoost(); query = bq.getQuery(); } shouldClauses.put(query, shouldClauses.getOrDefault(query, 0d) + boost); } if (shouldClauses.size() != clauseSets.get(Occur.SHOULD).size()) { BooleanQuery.Builder builder = new BooleanQuery.Builder() .setMinimumNumberShouldMatch(minimumNumberShouldMatch); for (Map.Entry<Query,Double> entry : shouldClauses.entrySet()) { Query query = entry.getKey(); float boost = entry.getValue().floatValue(); if (boost != 1f) { query = new BoostQuery(query, boost); } builder.add(query, Occur.SHOULD); } for (BooleanClause clause : clauses) { if (clause.getOccur() != Occur.SHOULD) { builder.add(clause); } } return builder.build(); } } // Deduplicate MUST clauses by summing up their boosts if (clauseSets.get(Occur.MUST).size() > 0) { Map<Query, Double> mustClauses = new HashMap<>(); for (Query query : clauseSets.get(Occur.MUST)) { double boost = 1; while (query instanceof BoostQuery) { BoostQuery bq = (BoostQuery) query; boost *= bq.getBoost(); query = bq.getQuery(); } mustClauses.put(query, mustClauses.getOrDefault(query, 0d) + boost); } if (mustClauses.size() != clauseSets.get(Occur.MUST).size()) { BooleanQuery.Builder builder = new BooleanQuery.Builder() .setMinimumNumberShouldMatch(minimumNumberShouldMatch); for (Map.Entry<Query,Double> entry : mustClauses.entrySet()) { Query query = entry.getKey(); float boost = entry.getValue().floatValue(); if (boost != 1f) { query = new BoostQuery(query, boost); } builder.add(query, Occur.MUST); } for (BooleanClause clause : clauses) { if (clause.getOccur() != Occur.MUST) { builder.add(clause); } } return builder.build(); } } // Rewrite queries whose single scoring clause is a MUST clause on a // MatchAllDocsQuery to a ConstantScoreQuery { final Collection<Query> musts = clauseSets.get(Occur.MUST); final Collection<Query> filters = clauseSets.get(Occur.FILTER); if (musts.size() == 1 && filters.size() > 0) { Query must = musts.iterator().next(); float boost = 1f; if (must instanceof BoostQuery) { BoostQuery boostQuery = (BoostQuery) must; must = boostQuery.getQuery(); boost = boostQuery.getBoost(); } if (must.getClass() == MatchAllDocsQuery.class) { // our single scoring clause matches everything: rewrite to a CSQ on the filter // ignore SHOULD clause for now BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (BooleanClause clause : clauses) { switch (clause.getOccur()) { case FILTER: case MUST_NOT: builder.add(clause); break; default: // ignore break; } } Query rewritten = builder.build(); rewritten = new ConstantScoreQuery(rewritten); if (boost != 1f) { rewritten = new BoostQuery(rewritten, boost); } // now add back the SHOULD clauses builder = new BooleanQuery.Builder() .setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()) .add(rewritten, Occur.MUST); for (Query query : clauseSets.get(Occur.SHOULD)) { builder.add(query, Occur.SHOULD); } rewritten = builder.build(); return rewritten; } } } // Flatten nested disjunctions, this is important for block-max WAND to perform well if (minimumNumberShouldMatch <= 1) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch); boolean actuallyRewritten = false; try { for (BooleanClause clause : clauses) { if (clause.getOccur() == Occur.SHOULD && clause.getQuery() instanceof BooleanQuery) { BooleanQuery innerQuery = (BooleanQuery) clause.getQuery(); if (innerQuery.isPureDisjunction()) { actuallyRewritten = true; for (BooleanClause innerClause : innerQuery.clauses()) { builder.add(innerClause); } } else { builder.add(clause); } } else { builder.add(clause); } } if (actuallyRewritten) { return builder.build(); } } catch (TooManyClauses exception) { // No-op : Do not flatten when the new query will violate max clause count } } return super.rewrite(reader); } @Override public void visit(QueryVisitor visitor) { QueryVisitor sub = visitor.getSubVisitor(Occur.MUST, this); for (BooleanClause.Occur occur : clauseSets.keySet()) { if (clauseSets.get(occur).size() > 0) { if (occur == Occur.MUST) { for (Query q : clauseSets.get(occur)) { q.visit(sub); } } else { QueryVisitor v = sub.getSubVisitor(occur, this); for (Query q : clauseSets.get(occur)) { q.visit(v); } } } } }
Prints a user-readable version of this query.
/** Prints a user-readable version of this query. */
@Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); boolean needParens = getMinimumNumberShouldMatch() > 0; if (needParens) { buffer.append("("); } int i = 0; for (BooleanClause c : this) { buffer.append(c.getOccur().toString()); Query subQuery = c.getQuery(); if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens buffer.append("("); buffer.append(subQuery.toString(field)); buffer.append(")"); } else { buffer.append(subQuery.toString(field)); } if (i != clauses.size() - 1) { buffer.append(" "); } i += 1; } if (needParens) { buffer.append(")"); } if (getMinimumNumberShouldMatch()>0) { buffer.append('~'); buffer.append(getMinimumNumberShouldMatch()); } return buffer.toString(); }
Compares the specified object with this boolean query for equality. Returns true if and only if the provided object
/** * Compares the specified object with this boolean query for equality. * Returns true if and only if the provided object<ul> * <li>is also a {@link BooleanQuery},</li> * <li>has the same value of {@link #getMinimumNumberShouldMatch()}</li> * <li>has the same {@link Occur#SHOULD} clauses, regardless of the order</li> * <li>has the same {@link Occur#MUST} clauses, regardless of the order</li> * <li>has the same set of {@link Occur#FILTER} clauses, regardless of the * order and regardless of duplicates</li> * <li>has the same set of {@link Occur#MUST_NOT} clauses, regardless of * the order and regardless of duplicates</li></ul> */
@Override public boolean equals(Object o) { return sameClassAs(o) && equalsTo(getClass().cast(o)); } private boolean equalsTo(BooleanQuery other) { return getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch() && clauseSets.equals(other.clauseSets); } private int computeHashCode() { int hashCode = Objects.hash(minimumNumberShouldMatch, clauseSets); if (hashCode == 0) { hashCode = 1; } return hashCode; } // cached hash code is ok since boolean queries are immutable private int hashCode; @Override public int hashCode() { // no need for synchronization, in the worst case we would just compute the hash several times. if (hashCode == 0) { hashCode = computeHashCode(); assert hashCode != 0; } assert hashCode == computeHashCode(); return hashCode; } }