/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.util.Arrays;
import java.util.List;
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
Class that Posting and PostingVector use to write byte
streams into shared fixed-size byte[] arrays. The idea
is to allocate slices of increasing lengths For
example, the first slice is 5 bytes, the next slice is
14, etc. We start by writing our bytes into the first
5 bytes. When we hit the end of the slice, we allocate
the next slice and then write the address of the new
slice into the last 4 bytes of the previous slice (the
"forwarding address").
Each slice is filled with 0's initially, and we mark
the end with a non-zero byte. This way the methods
that are writing into the slice don't need to record
its length and instead allocate a new slice once they
hit a non-zero byte.
@lucene.internal
/**
* Class that Posting and PostingVector use to write byte
* streams into shared fixed-size byte[] arrays. The idea
* is to allocate slices of increasing lengths For
* example, the first slice is 5 bytes, the next slice is
* 14, etc. We start by writing our bytes into the first
* 5 bytes. When we hit the end of the slice, we allocate
* the next slice and then write the address of the new
* slice into the last 4 bytes of the previous slice (the
* "forwarding address").
*
* Each slice is filled with 0's initially, and we mark
* the end with a non-zero byte. This way the methods
* that are writing into the slice don't need to record
* its length and instead allocate a new slice once they
* hit a non-zero byte.
*
* @lucene.internal
**/
public final class ByteBlockPool implements Accountable {
private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(ByteBlockPool.class);
public final static int BYTE_BLOCK_SHIFT = 15;
public final static int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT;
public final static int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1;
Abstract class for allocating and freeing byte
blocks. /** Abstract class for allocating and freeing byte
* blocks. */
public abstract static class Allocator {
protected final int blockSize;
public Allocator(int blockSize) {
this.blockSize = blockSize;
}
public abstract void recycleByteBlocks(byte[][] blocks, int start, int end);
public void recycleByteBlocks(List<byte[]> blocks) {
final byte[][] b = blocks.toArray(new byte[blocks.size()][]);
recycleByteBlocks(b, 0, b.length);
}
public byte[] getByteBlock() {
return new byte[blockSize];
}
}
A simple Allocator
that never recycles. /** A simple {@link Allocator} that never recycles. */
public static final class DirectAllocator extends Allocator {
public DirectAllocator() {
this(BYTE_BLOCK_SIZE);
}
public DirectAllocator(int blockSize) {
super(blockSize);
}
@Override
public void recycleByteBlocks(byte[][] blocks, int start, int end) {
}
}
A simple Allocator
that never recycles, but tracks how much total RAM is in use. /** A simple {@link Allocator} that never recycles, but
* tracks how much total RAM is in use. */
public static class DirectTrackingAllocator extends Allocator {
private final Counter bytesUsed;
public DirectTrackingAllocator(Counter bytesUsed) {
this(BYTE_BLOCK_SIZE, bytesUsed);
}
public DirectTrackingAllocator(int blockSize, Counter bytesUsed) {
super(blockSize);
this.bytesUsed = bytesUsed;
}
@Override
public byte[] getByteBlock() {
bytesUsed.addAndGet(blockSize);
return new byte[blockSize];
}
@Override
public void recycleByteBlocks(byte[][] blocks, int start, int end) {
bytesUsed.addAndGet(-((end-start)* blockSize));
for (int i = start; i < end; i++) {
blocks[i] = null;
}
}
};
array of buffers currently used in the pool. Buffers are allocated if
needed don't modify this outside of this class.
/**
* array of buffers currently used in the pool. Buffers are allocated if
* needed don't modify this outside of this class.
*/
public byte[][] buffers = new byte[10][];
index into the buffers array pointing to the current buffer used as the head /** index into the buffers array pointing to the current buffer used as the head */
private int bufferUpto = -1; // Which buffer we are upto
Where we are in head buffer /** Where we are in head buffer */
public int byteUpto = BYTE_BLOCK_SIZE;
Current head buffer /** Current head buffer */
public byte[] buffer;
Current head offset /** Current head offset */
public int byteOffset = -BYTE_BLOCK_SIZE;
private final Allocator allocator;
public ByteBlockPool(Allocator allocator) {
this.allocator = allocator;
}
Resets the pool to its initial state reusing the first buffer and fills all
buffers with 0 bytes before they reused or passed to Allocator.recycleByteBlocks(byte[][], int, int)
. Calling nextBuffer()
is not needed after reset. /**
* Resets the pool to its initial state reusing the first buffer and fills all
* buffers with <tt>0</tt> bytes before they reused or passed to
* {@link Allocator#recycleByteBlocks(byte[][], int, int)}. Calling
* {@link ByteBlockPool#nextBuffer()} is not needed after reset.
*/
public void reset() {
reset(true, true);
}
Expert: Resets the pool to its initial state reusing the first buffer. Calling nextBuffer()
is not needed after reset. Params: - zeroFillBuffers – if
true
the buffers are filled with 0.
This should be set to true
if this pool is used with slices. - reuseFirst – if
true
the first buffer will be reused and calling nextBuffer()
is not needed after reset iff the block pool was used before ie. nextBuffer()
was called before.
/**
* Expert: Resets the pool to its initial state reusing the first buffer. Calling
* {@link ByteBlockPool#nextBuffer()} is not needed after reset.
* @param zeroFillBuffers if <code>true</code> the buffers are filled with <tt>0</tt>.
* This should be set to <code>true</code> if this pool is used with slices.
* @param reuseFirst if <code>true</code> the first buffer will be reused and calling
* {@link ByteBlockPool#nextBuffer()} is not needed after reset iff the
* block pool was used before ie. {@link ByteBlockPool#nextBuffer()} was called before.
*/
public void reset(boolean zeroFillBuffers, boolean reuseFirst) {
if (bufferUpto != -1) {
// We allocated at least one buffer
if (zeroFillBuffers) {
for(int i=0;i<bufferUpto;i++) {
// Fully zero fill buffers that we fully used
Arrays.fill(buffers[i], (byte) 0);
}
// Partial zero fill the final buffer
Arrays.fill(buffers[bufferUpto], 0, byteUpto, (byte) 0);
}
if (bufferUpto > 0 || !reuseFirst) {
final int offset = reuseFirst ? 1 : 0;
// Recycle all but the first buffer
allocator.recycleByteBlocks(buffers, offset, 1+bufferUpto);
Arrays.fill(buffers, offset, 1+bufferUpto, null);
}
if (reuseFirst) {
// Re-use the first buffer
bufferUpto = 0;
byteUpto = 0;
byteOffset = 0;
buffer = buffers[0];
} else {
bufferUpto = -1;
byteUpto = BYTE_BLOCK_SIZE;
byteOffset = -BYTE_BLOCK_SIZE;
buffer = null;
}
}
}
Advances the pool to its next buffer. This method should be called once after the constructor to initialize the pool. In contrast to the constructor a reset()
call will advance the pool to its first buffer immediately. /**
* Advances the pool to its next buffer. This method should be called once
* after the constructor to initialize the pool. In contrast to the
* constructor a {@link ByteBlockPool#reset()} call will advance the pool to
* its first buffer immediately.
*/
public void nextBuffer() {
if (1+bufferUpto == buffers.length) {
byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length+1,
NUM_BYTES_OBJECT_REF)][];
System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
buffers = newBuffers;
}
buffer = buffers[1+bufferUpto] = allocator.getByteBlock();
bufferUpto++;
byteUpto = 0;
byteOffset += BYTE_BLOCK_SIZE;
}
Allocates a new slice with the given size.
See Also: - FIRST_LEVEL_SIZE.FIRST_LEVEL_SIZE
/**
* Allocates a new slice with the given size.
* @see ByteBlockPool#FIRST_LEVEL_SIZE
*/
public int newSlice(final int size) {
if (byteUpto > BYTE_BLOCK_SIZE-size)
nextBuffer();
final int upto = byteUpto;
byteUpto += size;
buffer[byteUpto-1] = 16;
return upto;
}
// Size of each slice. These arrays should be at most 16
// elements (index is encoded with 4 bits). First array
// is just a compact way to encode X+1 with a max. Second
// array is the length of each slice, ie first slice is 5
// bytes, next slice is 14 bytes, etc.
An array holding the offset into the LEVEL_SIZE_ARRAY
to quickly navigate to the next slice level. /**
* An array holding the offset into the {@link ByteBlockPool#LEVEL_SIZE_ARRAY}
* to quickly navigate to the next slice level.
*/
public final static int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
An array holding the level sizes for byte slices.
/**
* An array holding the level sizes for byte slices.
*/
public final static int[] LEVEL_SIZE_ARRAY = {5, 14, 20, 30, 40, 40, 80, 80, 120, 200};
The first level size for new slices
See Also: - newSlice.newSlice(int)
/**
* The first level size for new slices
* @see ByteBlockPool#newSlice(int)
*/
public final static int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
Creates a new byte slice with the given starting size and
returns the slices offset in the pool.
/**
* Creates a new byte slice with the given starting size and
* returns the slices offset in the pool.
*/
public int allocSlice(final byte[] slice, final int upto) {
final int level = slice[upto] & 15;
final int newLevel = NEXT_LEVEL_ARRAY[level];
final int newSize = LEVEL_SIZE_ARRAY[newLevel];
// Maybe allocate another block
if (byteUpto > BYTE_BLOCK_SIZE-newSize) {
nextBuffer();
}
final int newUpto = byteUpto;
final int offset = newUpto + byteOffset;
byteUpto += newSize;
// Copy forward the past 3 bytes (which we are about
// to overwrite with the forwarding address):
buffer[newUpto] = slice[upto-3];
buffer[newUpto+1] = slice[upto-2];
buffer[newUpto+2] = slice[upto-1];
// Write forwarding address at end of last slice:
slice[upto-3] = (byte) (offset >>> 24);
slice[upto-2] = (byte) (offset >>> 16);
slice[upto-1] = (byte) (offset >>> 8);
slice[upto] = (byte) offset;
// Write new level:
buffer[byteUpto-1] = (byte) (16|newLevel);
return newUpto+3;
}
Fill the provided BytesRef
with the bytes at the specified offset/length slice. This will avoid copying the bytes, if the slice fits into a single block; otherwise, it uses the provided BytesRefBuilder
to copy bytes over. /** Fill the provided {@link BytesRef} with the bytes at the specified offset/length slice.
* This will avoid copying the bytes, if the slice fits into a single block; otherwise, it uses
* the provided {@link BytesRefBuilder} to copy bytes over. */
void setBytesRef(BytesRefBuilder builder, BytesRef result, long offset, int length) {
result.length = length;
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
byte[] buffer = buffers[bufferIndex];
int pos = (int) (offset & BYTE_BLOCK_MASK);
if (pos + length <= BYTE_BLOCK_SIZE) {
// common case where the slice lives in a single block: just reference the buffer directly without copying
result.bytes = buffer;
result.offset = pos;
} else {
// uncommon case: the slice spans at least 2 blocks, so we must copy the bytes:
builder.grow(length);
result.bytes = builder.get().bytes;
result.offset = 0;
readBytes(offset, result.bytes, 0, length);
}
}
// Fill in a BytesRef from term's length & bytes encoded in
// byte block
public void setBytesRef(BytesRef term, int textStart) {
final byte[] bytes = term.bytes = buffers[textStart >> BYTE_BLOCK_SHIFT];
int pos = textStart & BYTE_BLOCK_MASK;
if ((bytes[pos] & 0x80) == 0) {
// length is 1 byte
term.length = bytes[pos];
term.offset = pos+1;
} else {
// length is 2 bytes
term.length = (bytes[pos]&0x7f) + ((bytes[pos+1]&0xff)<<7);
term.offset = pos+2;
}
assert term.length >= 0;
}
Appends the bytes in the provided BytesRef
at the current position. /**
* Appends the bytes in the provided {@link BytesRef} at
* the current position.
*/
public void append(final BytesRef bytes) {
int bytesLeft = bytes.length;
int offset = bytes.offset;
while (bytesLeft > 0) {
int bufferLeft = BYTE_BLOCK_SIZE - byteUpto;
if (bytesLeft < bufferLeft) {
// fits within current buffer
System.arraycopy(bytes.bytes, offset, buffer, byteUpto, bytesLeft);
byteUpto += bytesLeft;
break;
} else {
// fill up this buffer and move to next one
if (bufferLeft > 0) {
System.arraycopy(bytes.bytes, offset, buffer, byteUpto, bufferLeft);
}
nextBuffer();
bytesLeft -= bufferLeft;
offset += bufferLeft;
}
}
}
Reads bytes out of the pool starting at the given offset with the given
length into the given byte array at offset off.
Note: this method allows to copy across block boundaries.
/**
* Reads bytes out of the pool starting at the given offset with the given
* length into the given byte array at offset <tt>off</tt>.
* <p>Note: this method allows to copy across block boundaries.</p>
*/
public void readBytes(final long offset, final byte bytes[], int bytesOffset, int bytesLength) {
int bytesLeft = bytesLength;
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
int pos = (int) (offset & BYTE_BLOCK_MASK);
while (bytesLeft > 0) {
byte[] buffer = buffers[bufferIndex++];
int chunk = Math.min(bytesLeft, BYTE_BLOCK_SIZE - pos);
System.arraycopy(buffer, pos, bytes, bytesOffset, chunk);
bytesOffset += chunk;
bytesLeft -= chunk;
pos = 0;
}
}
Set the given BytesRef
so that its content is equal to the ref.length
bytes starting at offset
. Most of the time this method will set pointers to internal data-structures. However, in case a value crosses a boundary, a fresh copy will be returned. On the contrary to setBytesRef(BytesRef, int)
, this does not expect the length to be encoded with the data. /**
* Set the given {@link BytesRef} so that its content is equal to the
* {@code ref.length} bytes starting at {@code offset}. Most of the time this
* method will set pointers to internal data-structures. However, in case a
* value crosses a boundary, a fresh copy will be returned.
* On the contrary to {@link #setBytesRef(BytesRef, int)}, this does not
* expect the length to be encoded with the data.
*/
public void setRawBytesRef(BytesRef ref, final long offset) {
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
int pos = (int) (offset & BYTE_BLOCK_MASK);
if (pos + ref.length <= BYTE_BLOCK_SIZE) {
ref.bytes = buffers[bufferIndex];
ref.offset = pos;
} else {
ref.bytes = new byte[ref.length];
ref.offset = 0;
readBytes(offset, ref.bytes, 0, ref.length);
}
}
Read a single byte at the given offset
. /** Read a single byte at the given {@code offset}. */
public byte readByte(long offset) {
int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
int pos = (int) (offset & BYTE_BLOCK_MASK);
byte[] buffer = buffers[bufferIndex];
return buffer[pos];
}
@Override
public long ramBytesUsed() {
long size = BASE_RAM_BYTES;
size += RamUsageEstimator.sizeOfObject(buffer);
size += RamUsageEstimator.shallowSizeOf(buffers);
for (byte[] buf : buffers) {
if (buf == buffer) {
continue;
}
size += RamUsageEstimator.sizeOfObject(buf);
}
return size;
}
}