package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRefHash.BytesStartArray;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IntBlockPool;
abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
private static final int HASH_INIT_SIZE = 4;
final TermsHash termsHash;
final TermsHashPerField nextPerField;
protected final DocumentsWriterPerThread.DocState docState;
protected final FieldInvertState fieldState;
TermToBytesRefAttribute termAtt;
protected TermFrequencyAttribute termFreqAtt;
final IntBlockPool intPool;
final ByteBlockPool bytePool;
final ByteBlockPool termBytePool;
final int streamCount;
final int numPostingInt;
protected final FieldInfo fieldInfo;
final BytesRefHash bytesHash;
ParallelPostingsArray postingsArray;
private final Counter bytesUsed;
public TermsHashPerField(int streamCount, FieldInvertState fieldState, TermsHash termsHash, TermsHashPerField nextPerField, FieldInfo fieldInfo) {
intPool = termsHash.intPool;
bytePool = termsHash.bytePool;
termBytePool = termsHash.termBytePool;
docState = termsHash.docState;
this.termsHash = termsHash;
bytesUsed = termsHash.bytesUsed;
this.fieldState = fieldState;
this.streamCount = streamCount;
numPostingInt = 2*streamCount;
this.fieldInfo = fieldInfo;
this.nextPerField = nextPerField;
PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed);
bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
}
void reset() {
bytesHash.clear(false);
if (nextPerField != null) {
nextPerField.reset();
}
}
public void initReader(ByteSliceReader reader, int termID, int stream) {
assert stream < streamCount;
int intStart = postingsArray.intStarts[termID];
final int[] ints = intPool.buffers[intStart >> IntBlockPool.INT_BLOCK_SHIFT];
final int upto = intStart & IntBlockPool.INT_BLOCK_MASK;
reader.init(bytePool,
postingsArray.byteStarts[termID]+stream*ByteBlockPool.FIRST_LEVEL_SIZE,
ints[upto+stream]);
}
int[] sortedTermIDs;
public int[] sortPostings() {
sortedTermIDs = bytesHash.sort();
return sortedTermIDs;
}
private boolean doNextCall;
public void add(int textStart) throws IOException {
int termID = bytesHash.addByPoolOffset(textStart);
if (termID >= 0) {
if (numPostingInt + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) {
intPool.nextBuffer();
}
if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) {
bytePool.nextBuffer();
}
intUptos = intPool.buffer;
intUptoStart = intPool.intUpto;
intPool.intUpto += streamCount;
postingsArray.intStarts[termID] = intUptoStart + intPool.intOffset;
for(int i=0;i<streamCount;i++) {
final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
intUptos[intUptoStart+i] = upto + bytePool.byteOffset;
}
postingsArray.byteStarts[termID] = intUptos[intUptoStart];
newTerm(termID);
} else {
termID = (-termID)-1;
int intStart = postingsArray.intStarts[termID];
intUptos = intPool.buffers[intStart >> IntBlockPool.INT_BLOCK_SHIFT];
intUptoStart = intStart & IntBlockPool.INT_BLOCK_MASK;
addTerm(termID);
}
}
void add() throws IOException {
int termID = bytesHash.add(termAtt.getBytesRef());
if (termID >= 0) {
bytesHash.byteStart(termID);
if (numPostingInt + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) {
intPool.nextBuffer();
}
if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) {
bytePool.nextBuffer();
}
intUptos = intPool.buffer;
intUptoStart = intPool.intUpto;
intPool.intUpto += streamCount;
postingsArray.intStarts[termID] = intUptoStart + intPool.intOffset;
for(int i=0;i<streamCount;i++) {
final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
intUptos[intUptoStart+i] = upto + bytePool.byteOffset;
}
postingsArray.byteStarts[termID] = intUptos[intUptoStart];
newTerm(termID);
} else {
termID = (-termID)-1;
int intStart = postingsArray.intStarts[termID];
intUptos = intPool.buffers[intStart >> IntBlockPool.INT_BLOCK_SHIFT];
intUptoStart = intStart & IntBlockPool.INT_BLOCK_MASK;
addTerm(termID);
}
if (doNextCall) {
nextPerField.add(postingsArray.textStarts[termID]);
}
}
int[] intUptos;
int intUptoStart;
void writeByte(int stream, byte b) {
int upto = intUptos[intUptoStart+stream];
byte[] bytes = bytePool.buffers[upto >> ByteBlockPool.BYTE_BLOCK_SHIFT];
assert bytes != null;
int offset = upto & ByteBlockPool.BYTE_BLOCK_MASK;
if (bytes[offset] != 0) {
offset = bytePool.allocSlice(bytes, offset);
bytes = bytePool.buffer;
intUptos[intUptoStart+stream] = offset + bytePool.byteOffset;
}
bytes[offset] = b;
(intUptos[intUptoStart+stream])++;
}
public void writeBytes(int stream, byte[] b, int offset, int len) {
final int end = offset + len;
for(int i=offset;i<end;i++)
writeByte(stream, b[i]);
}
void writeVInt(int stream, int i) {
assert stream < streamCount;
while ((i & ~0x7F) != 0) {
writeByte(stream, (byte)((i & 0x7f) | 0x80));
i >>>= 7;
}
writeByte(stream, (byte) i);
}
private static final class PostingsBytesStartArray extends BytesStartArray {
private final TermsHashPerField perField;
private final Counter bytesUsed;
private PostingsBytesStartArray(
TermsHashPerField perField, Counter bytesUsed) {
this.perField = perField;
this.bytesUsed = bytesUsed;
}
@Override
public int[] init() {
if (perField.postingsArray == null) {
perField.postingsArray = perField.createPostingsArray(2);
perField.newPostingsArray();
bytesUsed.addAndGet(perField.postingsArray.size * perField.postingsArray.bytesPerPosting());
}
return perField.postingsArray.textStarts;
}
@Override
public int[] grow() {
ParallelPostingsArray postingsArray = perField.postingsArray;
final int oldSize = perField.postingsArray.size;
postingsArray = perField.postingsArray = postingsArray.grow();
perField.newPostingsArray();
bytesUsed.addAndGet((postingsArray.bytesPerPosting() * (postingsArray.size - oldSize)));
return postingsArray.textStarts;
}
@Override
public int[] clear() {
if (perField.postingsArray != null) {
bytesUsed.addAndGet(-(perField.postingsArray.size * perField.postingsArray.bytesPerPosting()));
perField.postingsArray = null;
perField.newPostingsArray();
}
return null;
}
@Override
public Counter bytesUsed() {
return bytesUsed;
}
}
@Override
public int compareTo(TermsHashPerField other) {
return fieldInfo.name.compareTo(other.fieldInfo.name);
}
void finish() throws IOException {
if (nextPerField != null) {
nextPerField.finish();
}
}
boolean start(IndexableField field, boolean first) {
termAtt = fieldState.termAttribute;
termFreqAtt = fieldState.termFreqAttribute;
if (nextPerField != null) {
doNextCall = nextPerField.start(field, first);
}
return true;
}
abstract void newTerm(int termID) throws IOException;
abstract void addTerm(int termID) throws IOException;
abstract void newPostingsArray();
abstract ParallelPostingsArray createPostingsArray(int size);
}