package org.apache.lucene.util;
import java.io.IOException;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
public class RoaringDocIdSet extends DocIdSet {
private static final int BLOCK_SIZE = 1 << 16;
private static final int MAX_ARRAY_LENGTH = 1 << 12;
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(RoaringDocIdSet.class);
public static class Builder {
private final int maxDoc;
private final DocIdSet[] sets;
private int cardinality;
private int lastDocId;
private int currentBlock;
private int currentBlockCardinality;
private final short[] buffer;
private FixedBitSet denseBuffer;
public Builder(int maxDoc) {
this.maxDoc = maxDoc;
sets = new DocIdSet[(maxDoc + (1 << 16) - 1) >>> 16];
lastDocId = -1;
currentBlock = -1;
buffer = new short[MAX_ARRAY_LENGTH];
}
private void flush() {
assert currentBlockCardinality <= BLOCK_SIZE;
if (currentBlockCardinality <= MAX_ARRAY_LENGTH) {
assert denseBuffer == null;
if (currentBlockCardinality > 0) {
sets[currentBlock] = new ShortArrayDocIdSet(ArrayUtil.copyOfSubArray(buffer, 0, currentBlockCardinality));
}
} else {
assert denseBuffer != null;
assert denseBuffer.cardinality() == currentBlockCardinality;
if (denseBuffer.length() == BLOCK_SIZE && BLOCK_SIZE - currentBlockCardinality < MAX_ARRAY_LENGTH) {
final short[] excludedDocs = new short[BLOCK_SIZE - currentBlockCardinality];
denseBuffer.flip(0, denseBuffer.length());
int excludedDoc = -1;
for (int i = 0; i < excludedDocs.length; ++i) {
excludedDoc = denseBuffer.nextSetBit(excludedDoc + 1);
assert excludedDoc != DocIdSetIterator.NO_MORE_DOCS;
excludedDocs[i] = (short) excludedDoc;
}
assert excludedDoc + 1 == denseBuffer.length() || denseBuffer.nextSetBit(excludedDoc + 1) == DocIdSetIterator.NO_MORE_DOCS;
sets[currentBlock] = new NotDocIdSet(BLOCK_SIZE, new ShortArrayDocIdSet(excludedDocs));
} else {
sets[currentBlock] = new BitDocIdSet(denseBuffer, currentBlockCardinality);
}
denseBuffer = null;
}
cardinality += currentBlockCardinality;
denseBuffer = null;
currentBlockCardinality = 0;
}
public Builder add(int docId) {
if (docId <= lastDocId) {
throw new IllegalArgumentException("Doc ids must be added in-order, got " + docId + " which is <= lastDocID=" + lastDocId);
}
final int block = docId >>> 16;
if (block != currentBlock) {
flush();
currentBlock = block;
}
if (currentBlockCardinality < MAX_ARRAY_LENGTH) {
buffer[currentBlockCardinality] = (short) docId;
} else {
if (denseBuffer == null) {
final int numBits = Math.min(1 << 16, maxDoc - (block << 16));
denseBuffer = new FixedBitSet(numBits);
for (short doc : buffer) {
denseBuffer.set(doc & 0xFFFF);
}
}
denseBuffer.set(docId & 0xFFFF);
}
lastDocId = docId;
currentBlockCardinality += 1;
return this;
}
public Builder add(DocIdSetIterator disi) throws IOException {
for (int doc = disi.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi.nextDoc()) {
add(doc);
}
return this;
}
public RoaringDocIdSet build() {
flush();
return new RoaringDocIdSet(sets, cardinality);
}
}
private static class ShortArrayDocIdSet extends DocIdSet {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ShortArrayDocIdSet.class);
private final short[] docIDs;
private ShortArrayDocIdSet(short[] docIDs) {
this.docIDs = docIDs;
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(docIDs);
}
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
int i = -1;
int doc = -1;
private int docId(int i) {
return docIDs[i] & 0xFFFF;
}
@Override
public int nextDoc() throws IOException {
if (++i >= docIDs.length) {
return doc = NO_MORE_DOCS;
}
return doc = docId(i);
}
@Override
public int docID() {
return doc;
}
@Override
public long cost() {
return docIDs.length;
}
@Override
public int advance(int target) throws IOException {
int lo = i + 1;
int hi = docIDs.length - 1;
while (lo <= hi) {
final int mid = (lo + hi) >>> 1;
final int midDoc = docId(mid);
if (midDoc < target) {
lo = mid + 1;
} else {
hi = mid - 1;
}
}
if (lo == docIDs.length) {
i = docIDs.length;
return doc = NO_MORE_DOCS;
} else {
i = lo;
return doc = docId(i);
}
}
};
}
}
private final DocIdSet[] docIdSets;
private final int cardinality;
private final long ramBytesUsed;
private RoaringDocIdSet(DocIdSet[] docIdSets, int cardinality) {
this.docIdSets = docIdSets;
long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(docIdSets);
for (DocIdSet set : this.docIdSets) {
if (set != null) {
ramBytesUsed += set.ramBytesUsed();
}
}
this.ramBytesUsed = ramBytesUsed;
this.cardinality = cardinality;
}
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
@Override
public DocIdSetIterator iterator() throws IOException {
if (cardinality == 0) {
return null;
}
return new Iterator();
}
private class Iterator extends DocIdSetIterator {
int block;
DocIdSetIterator sub = null;
int doc;
Iterator() throws IOException {
doc = -1;
block = -1;
sub = DocIdSetIterator.empty();
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
final int subNext = sub.nextDoc();
if (subNext == NO_MORE_DOCS) {
return firstDocFromNextBlock();
}
return doc = (block << 16) | subNext;
}
@Override
public int advance(int target) throws IOException {
final int targetBlock = target >>> 16;
if (targetBlock != block) {
block = targetBlock;
if (block >= docIdSets.length) {
sub = null;
return doc = NO_MORE_DOCS;
}
if (docIdSets[block] == null) {
return firstDocFromNextBlock();
}
sub = docIdSets[block].iterator();
}
final int subNext = sub.advance(target & 0xFFFF);
if (subNext == NO_MORE_DOCS) {
return firstDocFromNextBlock();
}
return doc = (block << 16) | subNext;
}
private int firstDocFromNextBlock() throws IOException {
while (true) {
block += 1;
if (block >= docIdSets.length) {
sub = null;
return doc = NO_MORE_DOCS;
} else if (docIdSets[block] != null) {
sub = docIdSets[block].iterator();
final int subNext = sub.nextDoc();
assert subNext != NO_MORE_DOCS;
return doc = (block << 16) | subNext;
}
}
}
@Override
public long cost() {
return cardinality;
}
}
public int cardinality() {
return cardinality;
}
@Override
public String toString() {
return "RoaringDocIdSet(cardinality=" + cardinality + ")";
}
}