/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
A native int hash-based set where one value is reserved to mean "EMPTY" internally. The space overhead is fairly low as there is only one power-of-two sized int[] to hold the values. The set is re-hashed when adding a value that would make it >= 75% full. Consider extending and over-riding hash(int)
if the values might be poor hash keys; Lucene docids should be fine. The internal fields are exposed publicly to enable more efficient use at the expense of better O-O principles.
To iterate over the integers held in this set, simply use code like this:
SentinelIntSet set = ...
for (int v : set.keys) {
if (v == set.emptyVal)
continue;
//use v...
}
@lucene.internal
/**
* A native int hash-based set where one value is reserved to mean "EMPTY" internally. The space overhead is fairly low
* as there is only one power-of-two sized int[] to hold the values. The set is re-hashed when adding a value that
* would make it >= 75% full. Consider extending and over-riding {@link #hash(int)} if the values might be poor
* hash keys; Lucene docids should be fine.
* The internal fields are exposed publicly to enable more efficient use at the expense of better O-O principles.
* <p>
* To iterate over the integers held in this set, simply use code like this:
* <pre class="prettyprint">
* SentinelIntSet set = ...
* for (int v : set.keys) {
* if (v == set.emptyVal)
* continue;
* //use v...
* }</pre>
*
* @lucene.internal
*/
public class SentinelIntSet {
A power-of-2 over-sized array holding the integers in the set along with empty values. /** A power-of-2 over-sized array holding the integers in the set along with empty values. */
public int[] keys;
public int count;
public final int emptyVal;
the count at which a rehash should be done /** the count at which a rehash should be done */
public int rehashCount;
Params: - size – The minimum number of elements this set should be able to hold without rehashing
(i.e. the slots are guaranteed not to change)
- emptyVal – The integer value to use for EMPTY
/**
*
* @param size The minimum number of elements this set should be able to hold without rehashing
* (i.e. the slots are guaranteed not to change)
* @param emptyVal The integer value to use for EMPTY
*/
public SentinelIntSet(int size, int emptyVal) {
this.emptyVal = emptyVal;
int tsize = Math.max(org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo(size), 1);
rehashCount = tsize - (tsize>>2);
if (size >= rehashCount) { // should be able to hold "size" w/o re-hashing
tsize <<= 1;
rehashCount = tsize - (tsize>>2);
}
keys = new int[tsize];
if (emptyVal != 0)
clear();
}
public void clear() {
Arrays.fill(keys, emptyVal);
count = 0;
}
(internal) Return the hash for the key. The default implementation just returns the key,
which is not appropriate for general purpose use.
/** (internal) Return the hash for the key. The default implementation just returns the key,
* which is not appropriate for general purpose use.
*/
public int hash(int key) {
return key;
}
The number of integers in this set. /** The number of integers in this set. */
public int size() { return count; }
(internal) Returns the slot for this key /** (internal) Returns the slot for this key */
public int getSlot(int key) {
assert key != emptyVal;
int h = hash(key);
int s = h & (keys.length-1);
if (keys[s] == key || keys[s]== emptyVal) return s;
int increment = (h>>7)|1;
do {
s = (s + increment) & (keys.length-1);
} while (keys[s] != key && keys[s] != emptyVal);
return s;
}
(internal) Returns the slot for this key, or -slot-1 if not found /** (internal) Returns the slot for this key, or -slot-1 if not found */
public int find(int key) {
assert key != emptyVal;
int h = hash(key);
int s = h & (keys.length-1);
if (keys[s] == key) return s;
if (keys[s] == emptyVal) return -s-1;
int increment = (h>>7)|1;
for(;;) {
s = (s + increment) & (keys.length-1);
if (keys[s] == key) return s;
if (keys[s] == emptyVal) return -s-1;
}
}
Does this set contain the specified integer? /** Does this set contain the specified integer? */
public boolean exists(int key) {
return find(key) >= 0;
}
Puts this integer (key) in the set, and returns the slot index it was added to.
It rehashes if adding it would make the set more than 75% full. /** Puts this integer (key) in the set, and returns the slot index it was added to.
* It rehashes if adding it would make the set more than 75% full. */
public int put(int key) {
int s = find(key);
if (s < 0) {
count++;
if (count >= rehashCount) {
rehash();
s = getSlot(key);
} else {
s = -s-1;
}
keys[s] = key;
}
return s;
}
(internal) Rehashes by doubling int[] key
and filling with the old values. /** (internal) Rehashes by doubling {@code int[] key} and filling with the old values. */
public void rehash() {
int newSize = keys.length << 1;
int[] oldKeys = keys;
keys = new int[newSize];
if (emptyVal != 0) Arrays.fill(keys, emptyVal);
for (int key : oldKeys) {
if (key == emptyVal) continue;
int newSlot = getSlot(key);
keys[newSlot] = key;
}
rehashCount = newSize - (newSize>>2);
}
Return the memory footprint of this class in bytes. /** Return the memory footprint of this class in bytes. */
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
Integer.BYTES * 3
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF)
+ RamUsageEstimator.sizeOf(keys);
}
}