/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util;


import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

A ring buffer that tracks the frequency of the integers that it contains. This is typically useful to track the hash codes of popular recently-used items. This data-structure requires 22 bytes per entry on average (between 16 and 28).
@lucene.internal
/** * A ring buffer that tracks the frequency of the integers that it contains. * This is typically useful to track the hash codes of popular recently-used * items. * * This data-structure requires 22 bytes per entry on average (between 16 and * 28). * * @lucene.internal */
public final class FrequencyTrackingRingBuffer implements Accountable { private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FrequencyTrackingRingBuffer.class); private final int maxSize; private final int[] buffer; private int position; private final IntBag frequencies;
Create a new ring buffer that will contain at most maxSize items. This buffer will initially contain maxSize times the sentinel value.
/** Create a new ring buffer that will contain at most <code>maxSize</code> items. * This buffer will initially contain <code>maxSize</code> times the * <code>sentinel</code> value. */
public FrequencyTrackingRingBuffer(int maxSize, int sentinel) { if (maxSize < 2) { throw new IllegalArgumentException("maxSize must be at least 2"); } this.maxSize = maxSize; buffer = new int[maxSize]; position = 0; frequencies = new IntBag(maxSize); Arrays.fill(buffer, sentinel); for (int i = 0; i < maxSize; ++i) { frequencies.add(sentinel); } assert frequencies.frequency(sentinel) == maxSize; } @Override public long ramBytesUsed() { return BASE_RAM_BYTES_USED + frequencies.ramBytesUsed() + RamUsageEstimator.sizeOf(buffer); }
Add a new item to this ring buffer, potentially removing the oldest entry from this buffer if it is already full.
/** * Add a new item to this ring buffer, potentially removing the oldest * entry from this buffer if it is already full. */
public void add(int i) { // remove the previous value final int removed = buffer[position]; final boolean removedFromBag = frequencies.remove(removed); assert removedFromBag; // add the new value buffer[position] = i; frequencies.add(i); // increment the position position += 1; if (position == maxSize) { position = 0; } }
Returns the frequency of the provided key in the ring buffer.
/** * Returns the frequency of the provided key in the ring buffer. */
public int frequency(int key) { return frequencies.frequency(key); } // pkg-private for testing Map<Integer, Integer> asFrequencyMap() { return frequencies.asMap(); }
A bag of integers. Since in the context of the ring buffer the maximum size is known up-front there is no need to worry about resizing the underlying storage.
/** * A bag of integers. * Since in the context of the ring buffer the maximum size is known up-front * there is no need to worry about resizing the underlying storage. */
private static class IntBag implements Accountable { private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(IntBag.class); private final int[] keys; private final int[] freqs; private final int mask; IntBag(int maxSize) { // load factor of 2/3 int capacity = Math.max(2, maxSize * 3 / 2); // round up to the next power of two capacity = Integer.highestOneBit(capacity - 1) << 1; assert capacity > maxSize; keys = new int[capacity]; freqs = new int[capacity]; mask = capacity - 1; } @Override public long ramBytesUsed() { return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys) + RamUsageEstimator.sizeOf(freqs); }
Return the frequency of the give key in the bag.
/** Return the frequency of the give key in the bag. */
int frequency(int key) { for (int slot = key & mask; ; slot = (slot + 1) & mask) { if (keys[slot] == key) { return freqs[slot]; } else if (freqs[slot] == 0) { return 0; } } }
Increment the frequency of the given key by 1 and return its new frequency.
/** Increment the frequency of the given key by 1 and return its new frequency. */
int add(int key) { for (int slot = key & mask; ; slot = (slot + 1) & mask) { if (freqs[slot] == 0) { keys[slot] = key; return freqs[slot] = 1; } else if (keys[slot] == key) { return ++freqs[slot]; } } }
Decrement the frequency of the given key by one, or do nothing if the key is not present in the bag. Returns true iff the key was contained in the bag.
/** Decrement the frequency of the given key by one, or do nothing if the * key is not present in the bag. Returns true iff the key was contained * in the bag. */
boolean remove(int key) { for (int slot = key & mask; ; slot = (slot + 1) & mask) { if (freqs[slot] == 0) { // no such key in the bag return false; } else if (keys[slot] == key) { final int newFreq = --freqs[slot]; if (newFreq == 0) { // removed relocateAdjacentKeys(slot); } return true; } } } private void relocateAdjacentKeys(int freeSlot) { for (int slot = (freeSlot + 1) & mask; ; slot = (slot + 1) & mask) { final int freq = freqs[slot]; if (freq == 0) { // end of the collision chain, we're done break; } final int key = keys[slot]; // the slot where <code>key</code> should be if there were no collisions final int expectedSlot = key & mask; // if the free slot is between the expected slot and the slot where the // key is, then we can relocate there if (between(expectedSlot, slot, freeSlot)) { keys[freeSlot] = key; freqs[freeSlot] = freq; // slot is the new free slot freqs[slot] = 0; freeSlot = slot; } } }
Given a chain of occupied slots between chainStart and chainEnd, return whether slot is between the start and end of the chain.
/** Given a chain of occupied slots between <code>chainStart</code> * and <code>chainEnd</code>, return whether <code>slot</code> is * between the start and end of the chain. */
private static boolean between(int chainStart, int chainEnd, int slot) { if (chainStart <= chainEnd) { return chainStart <= slot && slot <= chainEnd; } else { // the chain is across the end of the array return slot >= chainStart || slot <= chainEnd; } } Map<Integer, Integer> asMap() { Map<Integer, Integer> map = new HashMap<>(); for (int i = 0; i < keys.length; ++i) { if (freqs[i] > 0) { map.put(keys[i], freqs[i]); } } return map; } } }