/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;


import java.io.IOException;

import org.apache.lucene.util.FrequencyTrackingRingBuffer;

A QueryCachingPolicy that tracks usage statistics of recently-used filters in order to decide on which filters are worth caching.
@lucene.experimental
/** * A {@link QueryCachingPolicy} that tracks usage statistics of recently-used * filters in order to decide on which filters are worth caching. * * @lucene.experimental */
public class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy { // the hash code that we use as a sentinel in the ring buffer. private static final int SENTINEL = Integer.MIN_VALUE; private static boolean isPointQuery(Query query) { // we need to check for super classes because we occasionally use anonymous // sub classes of eg. PointRangeQuery for (Class<?> clazz = query.getClass(); clazz != Query.class; clazz = clazz.getSuperclass()) { final String simpleName = clazz.getSimpleName(); if (simpleName.startsWith("Point") && simpleName.endsWith("Query")) { return true; } } return false; } static boolean isCostly(Query query) { // This does not measure the cost of iterating over the filter (for this we // already have the DocIdSetIterator#cost API) but the cost to build the // DocIdSet in the first place return query instanceof MultiTermQuery || query instanceof MultiTermQueryConstantScoreWrapper || query instanceof TermInSetQuery || isPointQuery(query); } private static boolean shouldNeverCache(Query query) { if (query instanceof TermQuery) { // We do not bother caching term queries since they are already plenty fast. return true; } if (query instanceof DocValuesFieldExistsQuery) { // We do not bother caching DocValuesFieldExistsQuery queries since they are already plenty fast. return true; } if (query instanceof MatchAllDocsQuery) { // MatchAllDocsQuery has an iterator that is faster than what a bit set could do. return true; } // For the below queries, it's cheap to notice they cannot match any docs so // we do not bother caching them. if (query instanceof MatchNoDocsQuery) { return true; } if (query instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) query; if (bq.clauses().isEmpty()) { return true; } } if (query instanceof DisjunctionMaxQuery) { DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query; if (dmq.getDisjuncts().isEmpty()) { return true; } } return false; } private final FrequencyTrackingRingBuffer recentlyUsedFilters;
Expert: Create a new instance with a configurable history size. Beware of passing too large values for the size of the history, either minFrequencyToCache returns low values and this means some filters that are rarely used will be cached, which would hurt performance. Or minFrequencyToCache returns high values that are function of the size of the history but then filters will be slow to make it to the cache.
Params:
  • historySize – the number of recently used filters to track
/** * Expert: Create a new instance with a configurable history size. Beware of * passing too large values for the size of the history, either * {@link #minFrequencyToCache} returns low values and this means some filters * that are rarely used will be cached, which would hurt performance. Or * {@link #minFrequencyToCache} returns high values that are function of the * size of the history but then filters will be slow to make it to the cache. * * @param historySize the number of recently used filters to track */
public UsageTrackingQueryCachingPolicy(int historySize) { this.recentlyUsedFilters = new FrequencyTrackingRingBuffer(historySize, SENTINEL); }
Create a new instance with an history size of 256. This should be a good default for most cases.
/** Create a new instance with an history size of 256. This should be a good * default for most cases. */
public UsageTrackingQueryCachingPolicy() { this(256); }
For a given filter, return how many times it should appear in the history before being cached. The default implementation returns 2 for filters that need to evaluate against the entire index to build a DocIdSetIterator, like MultiTermQuery, point-based queries or TermInSetQuery, and 5 for other filters.
/** * For a given filter, return how many times it should appear in the history * before being cached. The default implementation returns 2 for filters that * need to evaluate against the entire index to build a {@link DocIdSetIterator}, * like {@link MultiTermQuery}, point-based queries or {@link TermInSetQuery}, * and 5 for other filters. */
protected int minFrequencyToCache(Query query) { if (isCostly(query)) { return 2; } else { // default: cache after the filter has been seen 5 times int minFrequency = 5; if (query instanceof BooleanQuery || query instanceof DisjunctionMaxQuery) { // Say you keep reusing a boolean query that looks like "A OR B" and // never use the A and B queries out of that context. 5 times after it // has been used, we would cache both A, B and A OR B, which is // wasteful. So instead we cache compound queries a bit earlier so that // we would only cache "A OR B" in that case. minFrequency--; } return minFrequency; } } @Override public void onUse(Query query) { assert query instanceof BoostQuery == false; assert query instanceof ConstantScoreQuery == false; if (shouldNeverCache(query)) { return; } // call hashCode outside of sync block // in case it's somewhat expensive: int hashCode = query.hashCode(); // we only track hash codes to avoid holding references to possible // large queries; this may cause rare false positives, but at worse // this just means we cache a query that was not in fact used enough: synchronized (this) { recentlyUsedFilters.add(hashCode); } } int frequency(Query query) { assert query instanceof BoostQuery == false; assert query instanceof ConstantScoreQuery == false; // call hashCode outside of sync block // in case it's somewhat expensive: int hashCode = query.hashCode(); synchronized (this) { return recentlyUsedFilters.frequency(hashCode); } } @Override public boolean shouldCache(Query query) throws IOException { if (shouldNeverCache(query)) { return false; } final int frequency = frequency(query); final int minFrequency = minFrequencyToCache(query); return frequency >= minFrequency; } }