package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
public class OrdinalMap implements Accountable {
private static class TermsEnumIndex {
public final static TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
final int subIndex;
final TermsEnum termsEnum;
BytesRef currentTerm;
public TermsEnumIndex(TermsEnum termsEnum, int subIndex) {
this.termsEnum = termsEnum;
this.subIndex = subIndex;
}
public BytesRef next() throws IOException {
currentTerm = termsEnum.next();
return currentTerm;
}
}
private static class SegmentMap implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);
private static int[] map(final long[] weights) {
final int[] newToOld = new int[weights.length];
for (int i = 0; i < weights.length; ++i) {
newToOld[i] = i;
}
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
final int tmp = newToOld[i];
newToOld[i] = newToOld[j];
newToOld[j] = tmp;
}
@Override
protected int compare(int i, int j) {
return Long.compare(weights[newToOld[j]], weights[newToOld[i]]);
}
}.sort(0, weights.length);
return newToOld;
}
private static int[] inverse(int[] map) {
final int[] inverse = new int[map.length];
for (int i = 0; i < map.length; ++i) {
inverse[map[i]] = i;
}
return inverse;
}
private final int[] newToOld, oldToNew;
SegmentMap(long[] weights) {
newToOld = map(weights);
oldToNew = inverse(newToOld);
assert Arrays.equals(newToOld, inverse(oldToNew));
}
int newToOld(int segment) {
return newToOld[segment];
}
int oldToNew(int segment) {
return oldToNew[segment];
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew);
}
}
public static OrdinalMap build(IndexReader.CacheKey owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
public static OrdinalMap build(IndexReader.CacheKey owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
public static OrdinalMap build(IndexReader.CacheKey owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException {
if (subs.length != weights.length) {
throw new IllegalArgumentException("subs and weights must have the same length");
}
final SegmentMap segmentMap = new SegmentMap(weights);
return new OrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio);
}
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class);
public final IndexReader.CacheKey owner;
final PackedLongValues globalOrdDeltas;
final PackedLongValues firstSegments;
final LongValues segmentToGlobalOrds[];
final SegmentMap segmentMap;
final long ramBytesUsed;
OrdinalMap(IndexReader.CacheKey owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException {
this.owner = owner;
this.segmentMap = segmentMap;
PackedLongValues.Builder globalOrdDeltas = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
PackedLongValues.Builder firstSegments = PackedLongValues.packedBuilder(PackedInts.COMPACT);
final PackedLongValues.Builder[] ordDeltas = new PackedLongValues.Builder[subs.length];
for (int i = 0; i < ordDeltas.length; i++) {
ordDeltas[i] = PackedLongValues.monotonicBuilder(acceptableOverheadRatio);
}
long[] ordDeltaBits = new long[subs.length];
long[] segmentOrds = new long[subs.length];
PriorityQueue<TermsEnumIndex> queue = new PriorityQueue<TermsEnumIndex>(subs.length) {
@Override
protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) {
return a.currentTerm.compareTo(b.currentTerm) < 0;
}
};
for (int i = 0; i < subs.length; i++) {
TermsEnumIndex sub = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
if (sub.next() != null) {
queue.add(sub);
}
}
BytesRefBuilder scratch = new BytesRefBuilder();
long globalOrd = 0;
while (queue.size() != 0) {
TermsEnumIndex top = queue.top();
scratch.copyBytes(top.currentTerm);
int firstSegmentIndex = Integer.MAX_VALUE;
long globalOrdDelta = Long.MAX_VALUE;
while (true) {
top = queue.top();
long segmentOrd = top.termsEnum.ord();
long delta = globalOrd - segmentOrd;
int segmentIndex = top.subIndex;
if (segmentIndex < firstSegmentIndex) {
firstSegmentIndex = segmentIndex;
globalOrdDelta = delta;
}
ordDeltaBits[segmentIndex] |= delta;
assert segmentOrds[segmentIndex] <= segmentOrd;
do {
ordDeltas[segmentIndex].add(delta);
segmentOrds[segmentIndex]++;
} while (segmentOrds[segmentIndex] <= segmentOrd);
if (top.next() == null) {
queue.pop();
if (queue.size() == 0) {
break;
}
} else {
queue.updateTop();
}
if (queue.top().currentTerm.equals(scratch.get()) == false) {
break;
}
}
firstSegments.add(firstSegmentIndex);
globalOrdDeltas.add(globalOrdDelta);
globalOrd++;
}
this.firstSegments = firstSegments.build();
this.globalOrdDeltas = globalOrdDeltas.build();
segmentToGlobalOrds = new LongValues[subs.length];
long ramBytesUsed = BASE_RAM_BYTES_USED + this.globalOrdDeltas.ramBytesUsed()
+ this.firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
+ segmentMap.ramBytesUsed();
for (int i = 0; i < ordDeltas.length; ++i) {
final PackedLongValues deltas = ordDeltas[i].build();
if (ordDeltaBits[i] == 0L) {
segmentToGlobalOrds[i] = LongValues.IDENTITY;
} else {
final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
final long monotonicBits = deltas.ramBytesUsed() * 8;
final long packedBits = bitsRequired * deltas.size();
if (deltas.size() <= Integer.MAX_VALUE
&& packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
final int size = (int) deltas.size();
final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
final PackedLongValues.Iterator it = deltas.iterator();
for (int ord = 0; ord < size; ++ord) {
newDeltas.set(ord, it.next());
}
assert it.hasNext() == false;
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + newDeltas.get((int) ord);
}
};
ramBytesUsed += newDeltas.ramBytesUsed();
} else {
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + deltas.get(ord);
}
};
ramBytesUsed += deltas.ramBytesUsed();
}
ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
}
}
this.ramBytesUsed = ramBytesUsed;
}
public LongValues getGlobalOrds(int segmentIndex) {
return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)];
}
public long getFirstSegmentOrd(long globalOrd) {
return globalOrd - globalOrdDeltas.get(globalOrd);
}
public int getFirstSegmentNumber(long globalOrd) {
return segmentMap.newToOld((int) firstSegments.get(globalOrd));
}
public long getValueCount() {
return globalOrdDeltas.size();
}
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("global ord deltas", globalOrdDeltas));
resources.add(Accountables.namedAccountable("first segments", firstSegments));
resources.add(Accountables.namedAccountable("segment map", segmentMap));
return resources;
}
}