/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util.packed;


import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.RamUsageEstimator;

Simplistic compression for array of unsigned long values. Each value is >= 0 and <= a specified maximum value. The values are stored as packed ints, with each value consuming a fixed number of bits.
@lucene.internal
/** * Simplistic compression for array of unsigned long values. * Each value is {@code >= 0} and {@code <=} a specified maximum value. The * values are stored as packed ints, with each value * consuming a fixed number of bits. * * @lucene.internal */
public class PackedInts {
At most 700% memory overhead, always select a direct implementation.
/** * At most 700% memory overhead, always select a direct implementation. */
public static final float FASTEST = 7f;
At most 50% memory overhead, always select a reasonably fast implementation.
/** * At most 50% memory overhead, always select a reasonably fast implementation. */
public static final float FAST = 0.5f;
At most 25% memory overhead.
/** * At most 25% memory overhead. */
public static final float DEFAULT = 0.25f;
No memory overhead at all, but the returned implementation may be slow.
/** * No memory overhead at all, but the returned implementation may be slow. */
public static final float COMPACT = 0f;
Default amount of memory to use for bulk operations.
/** * Default amount of memory to use for bulk operations. */
public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K public final static String CODEC_NAME = "PackedInts"; public static final int VERSION_MONOTONIC_WITHOUT_ZIGZAG = 2; public final static int VERSION_START = VERSION_MONOTONIC_WITHOUT_ZIGZAG; public final static int VERSION_CURRENT = VERSION_MONOTONIC_WITHOUT_ZIGZAG;
Check the validity of a version number.
/** * Check the validity of a version number. */
public static void checkVersion(int version) { if (version < VERSION_START) { throw new IllegalArgumentException("Version is too old, should be at least " + VERSION_START + " (got " + version + ")"); } else if (version > VERSION_CURRENT) { throw new IllegalArgumentException("Version is too new, should be at most " + VERSION_CURRENT + " (got " + version + ")"); } }
A format to write packed ints.
@lucene.internal
/** * A format to write packed ints. * * @lucene.internal */
public enum Format {
Compact format, all bits are written contiguously.
/** * Compact format, all bits are written contiguously. */
PACKED(0) { @Override public long byteCount(int packedIntsVersion, int valueCount, int bitsPerValue) { return (long) Math.ceil((double) valueCount * bitsPerValue / 8); } },
A format that may insert padding bits to improve encoding and decoding speed. Since this format doesn't support all possible bits per value, you should never use it directly, but rather use PackedInts.fastestFormatAndBits(int, int, float) to find the format that best suits your needs.
/** * A format that may insert padding bits to improve encoding and decoding * speed. Since this format doesn't support all possible bits per value, you * should never use it directly, but rather use * {@link PackedInts#fastestFormatAndBits(int, int, float)} to find the * format that best suits your needs. */
PACKED_SINGLE_BLOCK(1) { @Override public int longCount(int packedIntsVersion, int valueCount, int bitsPerValue) { final int valuesPerBlock = 64 / bitsPerValue; return (int) Math.ceil((double) valueCount / valuesPerBlock); } @Override public boolean isSupported(int bitsPerValue) { return Packed64SingleBlock.isSupported(bitsPerValue); } @Override public float overheadPerValue(int bitsPerValue) { assert isSupported(bitsPerValue); final int valuesPerBlock = 64 / bitsPerValue; final int overhead = 64 % bitsPerValue; return (float) overhead / valuesPerBlock; } };
Get a format according to its ID.
/** * Get a format according to its ID. */
public static Format byId(int id) { for (Format format : Format.values()) { if (format.getId() == id) { return format; } } throw new IllegalArgumentException("Unknown format id: " + id); } private Format(int id) { this.id = id; } public int id;
Returns the ID of the format.
/** * Returns the ID of the format. */
public int getId() { return id; }
Computes how many byte blocks are needed to store values values of size bitsPerValue.
/** * Computes how many byte blocks are needed to store <code>values</code> * values of size <code>bitsPerValue</code>. */
public long byteCount(int packedIntsVersion, int valueCount, int bitsPerValue) { assert bitsPerValue >= 0 && bitsPerValue <= 64 : bitsPerValue; // assume long-aligned return 8L * longCount(packedIntsVersion, valueCount, bitsPerValue); }
Computes how many long blocks are needed to store values values of size bitsPerValue.
/** * Computes how many long blocks are needed to store <code>values</code> * values of size <code>bitsPerValue</code>. */
public int longCount(int packedIntsVersion, int valueCount, int bitsPerValue) { assert bitsPerValue >= 0 && bitsPerValue <= 64 : bitsPerValue; final long byteCount = byteCount(packedIntsVersion, valueCount, bitsPerValue); assert byteCount < 8L * Integer.MAX_VALUE; if ((byteCount % 8) == 0) { return (int) (byteCount / 8); } else { return (int) (byteCount / 8 + 1); } }
Tests whether the provided number of bits per value is supported by the format.
/** * Tests whether the provided number of bits per value is supported by the * format. */
public boolean isSupported(int bitsPerValue) { return bitsPerValue >= 1 && bitsPerValue <= 64; }
Returns the overhead per value, in bits.
/** * Returns the overhead per value, in bits. */
public float overheadPerValue(int bitsPerValue) { assert isSupported(bitsPerValue); return 0f; }
Returns the overhead ratio (overhead per value / bits per value).
/** * Returns the overhead ratio (<code>overhead per value / bits per value</code>). */
public final float overheadRatio(int bitsPerValue) { assert isSupported(bitsPerValue); return overheadPerValue(bitsPerValue) / bitsPerValue; } }
Simple class that holds a format and a number of bits per value.
/** * Simple class that holds a format and a number of bits per value. */
public static class FormatAndBits { public final Format format; public final int bitsPerValue; public FormatAndBits(Format format, int bitsPerValue) { this.format = format; this.bitsPerValue = bitsPerValue; } @Override public String toString() { return "FormatAndBits(format=" + format + " bitsPerValue=" + bitsPerValue + ")"; } }
Try to find the Format and number of bits per value that would restore from disk the fastest reader whose overhead is less than acceptableOverheadRatio.

The acceptableOverheadRatio parameter makes sense for random-access Readers. In case you only plan to perform sequential access on this stream later on, you should probably use COMPACT.

If you don't know how many values you are going to write, use valueCount = -1.

/** * Try to find the {@link Format} and number of bits per value that would * restore from disk the fastest reader whose overhead is less than * <code>acceptableOverheadRatio</code>. * <p> * The <code>acceptableOverheadRatio</code> parameter makes sense for * random-access {@link Reader}s. In case you only plan to perform * sequential access on this stream later on, you should probably use * {@link PackedInts#COMPACT}. * <p> * If you don't know how many values you are going to write, use * <code>valueCount = -1</code>. */
public static FormatAndBits fastestFormatAndBits(int valueCount, int bitsPerValue, float acceptableOverheadRatio) { if (valueCount == -1) { valueCount = Integer.MAX_VALUE; } acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio); acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio); float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue; int actualBitsPerValue = -1; Format format = Format.PACKED; if (bitsPerValue <= 8 && maxBitsPerValue >= 8) { actualBitsPerValue = 8; } else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) { actualBitsPerValue = 16; } else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) { actualBitsPerValue = 32; } else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) { actualBitsPerValue = 64; } else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) { actualBitsPerValue = 24; } else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) { actualBitsPerValue = 48; } else { for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) { if (Format.PACKED_SINGLE_BLOCK.isSupported(bpv)) { float overhead = Format.PACKED_SINGLE_BLOCK.overheadPerValue(bpv); float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv; if (overhead <= acceptableOverhead) { actualBitsPerValue = bpv; format = Format.PACKED_SINGLE_BLOCK; break; } } } if (actualBitsPerValue < 0) { actualBitsPerValue = bitsPerValue; } } return new FormatAndBits(format, actualBitsPerValue); }
A decoder for packed integers.
/** * A decoder for packed integers. */
public static interface Decoder {
The minimum number of long blocks to encode in a single iteration, when using long encoding.
/** * The minimum number of long blocks to encode in a single iteration, when * using long encoding. */
int longBlockCount();
The number of values that can be stored in longBlockCount() long blocks.
/** * The number of values that can be stored in {@link #longBlockCount()} long * blocks. */
int longValueCount();
The minimum number of byte blocks to encode in a single iteration, when using byte encoding.
/** * The minimum number of byte blocks to encode in a single iteration, when * using byte encoding. */
int byteBlockCount();
The number of values that can be stored in byteBlockCount() byte blocks.
/** * The number of values that can be stored in {@link #byteBlockCount()} byte * blocks. */
int byteValueCount();
Read iterations * blockCount() blocks from blocks, decode them and write iterations * valueCount() values into values.
Params:
  • blocks – the long blocks that hold packed integer values
  • blocksOffset – the offset where to start reading blocks
  • values – the values buffer
  • valuesOffset – the offset where to start writing values
  • iterations – controls how much data to decode
/** * Read <code>iterations * blockCount()</code> blocks from <code>blocks</code>, * decode them and write <code>iterations * valueCount()</code> values into * <code>values</code>. * * @param blocks the long blocks that hold packed integer values * @param blocksOffset the offset where to start reading blocks * @param values the values buffer * @param valuesOffset the offset where to start writing values * @param iterations controls how much data to decode */
void decode(long[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations);
Read 8 * iterations * blockCount() blocks from blocks, decode them and write iterations * valueCount() values into values.
Params:
  • blocks – the long blocks that hold packed integer values
  • blocksOffset – the offset where to start reading blocks
  • values – the values buffer
  • valuesOffset – the offset where to start writing values
  • iterations – controls how much data to decode
/** * Read <code>8 * iterations * blockCount()</code> blocks from <code>blocks</code>, * decode them and write <code>iterations * valueCount()</code> values into * <code>values</code>. * * @param blocks the long blocks that hold packed integer values * @param blocksOffset the offset where to start reading blocks * @param values the values buffer * @param valuesOffset the offset where to start writing values * @param iterations controls how much data to decode */
void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations);
Read iterations * blockCount() blocks from blocks, decode them and write iterations * valueCount() values into values.
Params:
  • blocks – the long blocks that hold packed integer values
  • blocksOffset – the offset where to start reading blocks
  • values – the values buffer
  • valuesOffset – the offset where to start writing values
  • iterations – controls how much data to decode
/** * Read <code>iterations * blockCount()</code> blocks from <code>blocks</code>, * decode them and write <code>iterations * valueCount()</code> values into * <code>values</code>. * * @param blocks the long blocks that hold packed integer values * @param blocksOffset the offset where to start reading blocks * @param values the values buffer * @param valuesOffset the offset where to start writing values * @param iterations controls how much data to decode */
void decode(long[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations);
Read 8 * iterations * blockCount() blocks from blocks, decode them and write iterations * valueCount() values into values.
Params:
  • blocks – the long blocks that hold packed integer values
  • blocksOffset – the offset where to start reading blocks
  • values – the values buffer
  • valuesOffset – the offset where to start writing values
  • iterations – controls how much data to decode
/** * Read <code>8 * iterations * blockCount()</code> blocks from <code>blocks</code>, * decode them and write <code>iterations * valueCount()</code> values into * <code>values</code>. * * @param blocks the long blocks that hold packed integer values * @param blocksOffset the offset where to start reading blocks * @param values the values buffer * @param valuesOffset the offset where to start writing values * @param iterations controls how much data to decode */
void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations); }
An encoder for packed integers.
/** * An encoder for packed integers. */
public static interface Encoder {
The minimum number of long blocks to encode in a single iteration, when using long encoding.
/** * The minimum number of long blocks to encode in a single iteration, when * using long encoding. */
int longBlockCount();
The number of values that can be stored in longBlockCount() long blocks.
/** * The number of values that can be stored in {@link #longBlockCount()} long * blocks. */
int longValueCount();
The minimum number of byte blocks to encode in a single iteration, when using byte encoding.
/** * The minimum number of byte blocks to encode in a single iteration, when * using byte encoding. */
int byteBlockCount();
The number of values that can be stored in byteBlockCount() byte blocks.
/** * The number of values that can be stored in {@link #byteBlockCount()} byte * blocks. */
int byteValueCount();
Read iterations * valueCount() values from values, encode them and write iterations * blockCount() blocks into blocks.
Params:
  • blocks – the long blocks that hold packed integer values
  • blocksOffset – the offset where to start writing blocks
  • values – the values buffer
  • valuesOffset – the offset where to start reading values
  • iterations – controls how much data to encode
/** * Read <code>iterations * valueCount()</code> values from <code>values</code>, * encode them and write <code>iterations * blockCount()</code> blocks into * <code>blocks</code>. * * @param blocks the long blocks that hold packed integer values * @param blocksOffset the offset where to start writing blocks * @param values the values buffer * @param valuesOffset the offset where to start reading values * @param iterations controls how much data to encode */
void encode(long[] values, int valuesOffset, long[] blocks, int blocksOffset, int iterations);
Read iterations * valueCount() values from values, encode them and write 8 * iterations * blockCount() blocks into blocks.
Params:
  • blocks – the long blocks that hold packed integer values
  • blocksOffset – the offset where to start writing blocks
  • values – the values buffer
  • valuesOffset – the offset where to start reading values
  • iterations – controls how much data to encode
/** * Read <code>iterations * valueCount()</code> values from <code>values</code>, * encode them and write <code>8 * iterations * blockCount()</code> blocks into * <code>blocks</code>. * * @param blocks the long blocks that hold packed integer values * @param blocksOffset the offset where to start writing blocks * @param values the values buffer * @param valuesOffset the offset where to start reading values * @param iterations controls how much data to encode */
void encode(long[] values, int valuesOffset, byte[] blocks, int blocksOffset, int iterations);
Read iterations * valueCount() values from values, encode them and write iterations * blockCount() blocks into blocks.
Params:
  • blocks – the long blocks that hold packed integer values
  • blocksOffset – the offset where to start writing blocks
  • values – the values buffer
  • valuesOffset – the offset where to start reading values
  • iterations – controls how much data to encode
/** * Read <code>iterations * valueCount()</code> values from <code>values</code>, * encode them and write <code>iterations * blockCount()</code> blocks into * <code>blocks</code>. * * @param blocks the long blocks that hold packed integer values * @param blocksOffset the offset where to start writing blocks * @param values the values buffer * @param valuesOffset the offset where to start reading values * @param iterations controls how much data to encode */
void encode(int[] values, int valuesOffset, long[] blocks, int blocksOffset, int iterations);
Read iterations * valueCount() values from values, encode them and write 8 * iterations * blockCount() blocks into blocks.
Params:
  • blocks – the long blocks that hold packed integer values
  • blocksOffset – the offset where to start writing blocks
  • values – the values buffer
  • valuesOffset – the offset where to start reading values
  • iterations – controls how much data to encode
/** * Read <code>iterations * valueCount()</code> values from <code>values</code>, * encode them and write <code>8 * iterations * blockCount()</code> blocks into * <code>blocks</code>. * * @param blocks the long blocks that hold packed integer values * @param blocksOffset the offset where to start writing blocks * @param values the values buffer * @param valuesOffset the offset where to start reading values * @param iterations controls how much data to encode */
void encode(int[] values, int valuesOffset, byte[] blocks, int blocksOffset, int iterations); }
A read-only random access array of positive integers.
@lucene.internal
/** * A read-only random access array of positive integers. * @lucene.internal */
public static abstract class Reader implements Accountable {
Get the long at the given index. Behavior is undefined for out-of-range indices.
/** Get the long at the given index. Behavior is undefined for out-of-range indices. */
public abstract long get(int index);
Bulk get: read at least one and at most len longs starting from index into arr[off:off+len] and return the actual number of values that have been read.
/** * Bulk get: read at least one and at most <code>len</code> longs starting * from <code>index</code> into <code>arr[off:off+len]</code> and return * the actual number of values that have been read. */
public int get(int index, long[] arr, int off, int len) { assert len > 0 : "len must be > 0 (got " + len + ")"; assert index >= 0 && index < size(); assert off + len <= arr.length; final int gets = Math.min(size() - index, len); for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) { arr[o] = get(i); } return gets; }
Returns:the number of values.
/** * @return the number of values. */
public abstract int size(); }
Run-once iterator interface, to decode previously saved PackedInts.
/** * Run-once iterator interface, to decode previously saved PackedInts. */
public static interface ReaderIterator {
Returns next value
/** Returns next value */
long next() throws IOException;
Returns at least 1 and at most count next values, the returned ref MUST NOT be modified
/** Returns at least 1 and at most <code>count</code> next values, * the returned ref MUST NOT be modified */
LongsRef next(int count) throws IOException;
Returns number of bits per value
/** Returns number of bits per value */
int getBitsPerValue();
Returns number of values
/** Returns number of values */
int size();
Returns the current position
/** Returns the current position */
int ord(); } static abstract class ReaderIteratorImpl implements ReaderIterator { protected final DataInput in; protected final int bitsPerValue; protected final int valueCount; protected ReaderIteratorImpl(int valueCount, int bitsPerValue, DataInput in) { this.in = in; this.bitsPerValue = bitsPerValue; this.valueCount = valueCount; } @Override public long next() throws IOException { LongsRef nextValues = next(1); assert nextValues.length > 0; final long result = nextValues.longs[nextValues.offset]; ++nextValues.offset; --nextValues.length; return result; } @Override public int getBitsPerValue() { return bitsPerValue; } @Override public int size() { return valueCount; } }
A packed integer array that can be modified.
@lucene.internal
/** * A packed integer array that can be modified. * @lucene.internal */
public static abstract class Mutable extends Reader {
Returns:the number of bits used to store any given value. Note: This does not imply that memory usage is bitsPerValue * #values as implementations are free to use non-space-optimal packing of bits.
/** * @return the number of bits used to store any given value. * Note: This does not imply that memory usage is * {@code bitsPerValue * #values} as implementations are free to * use non-space-optimal packing of bits. */
public abstract int getBitsPerValue();
Set the value at the given index in the array.
Params:
  • index – where the value should be positioned.
  • value – a value conforming to the constraints set by the array.
/** * Set the value at the given index in the array. * @param index where the value should be positioned. * @param value a value conforming to the constraints set by the array. */
public abstract void set(int index, long value);
Bulk set: set at least one and at most len longs starting at off in arr into this mutable, starting at index. Returns the actual number of values that have been set.
/** * Bulk set: set at least one and at most <code>len</code> longs starting * at <code>off</code> in <code>arr</code> into this mutable, starting at * <code>index</code>. Returns the actual number of values that have been * set. */
public int set(int index, long[] arr, int off, int len) { assert len > 0 : "len must be > 0 (got " + len + ")"; assert index >= 0 && index < size(); len = Math.min(len, size() - index); assert off + len <= arr.length; for (int i = index, o = off, end = index + len; i < end; ++i, ++o) { set(i, arr[o]); } return len; }
Fill the mutable from fromIndex (inclusive) to toIndex (exclusive) with val.
/** * Fill the mutable from <code>fromIndex</code> (inclusive) to * <code>toIndex</code> (exclusive) with <code>val</code>. */
public void fill(int fromIndex, int toIndex, long val) { assert val <= maxValue(getBitsPerValue()); assert fromIndex <= toIndex; for (int i = fromIndex; i < toIndex; ++i) { set(i, val); } }
Sets all values to 0.
/** * Sets all values to 0. */
public void clear() { fill(0, size(), 0); }
Save this mutable into out. Instantiating a reader from the generated data will return a reader with the same number of bits per value.
/** * Save this mutable into <code>out</code>. Instantiating a reader from * the generated data will return a reader with the same number of bits * per value. */
public void save(DataOutput out) throws IOException { Writer writer = getWriterNoHeader(out, getFormat(), size(), getBitsPerValue(), DEFAULT_BUFFER_SIZE); writer.writeHeader(); for (int i = 0; i < size(); ++i) { writer.add(get(i)); } writer.finish(); }
The underlying format.
/** The underlying format. */
Format getFormat() { return Format.PACKED; } }
A simple base for Readers that keeps track of valueCount and bitsPerValue.
@lucene.internal
/** * A simple base for Readers that keeps track of valueCount and bitsPerValue. * @lucene.internal */
static abstract class ReaderImpl extends Reader { protected final int valueCount; protected ReaderImpl(int valueCount) { this.valueCount = valueCount; } @Override public abstract long get(int index); @Override public final int size() { return valueCount; } } static abstract class MutableImpl extends Mutable { protected final int valueCount; protected final int bitsPerValue; protected MutableImpl(int valueCount, int bitsPerValue) { this.valueCount = valueCount; assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue; this.bitsPerValue = bitsPerValue; } @Override public final int getBitsPerValue() { return bitsPerValue; } @Override public final int size() { return valueCount; } @Override public String toString() { return getClass().getSimpleName() + "(valueCount=" + valueCount + ",bitsPerValue=" + bitsPerValue + ")"; } }
A Reader which has all its values equal to 0 (bitsPerValue = 0).
/** A {@link Reader} which has all its values equal to 0 (bitsPerValue = 0). */
public static final class NullReader extends Reader { private final int valueCount;
Sole constructor.
/** Sole constructor. */
public NullReader(int valueCount) { this.valueCount = valueCount; } @Override public long get(int index) { return 0; } @Override public int get(int index, long[] arr, int off, int len) { assert len > 0 : "len must be > 0 (got " + len + ")"; assert index >= 0 && index < valueCount; len = Math.min(len, valueCount - index); Arrays.fill(arr, off, off + len, 0); return len; } @Override public int size() { return valueCount; } @Override public long ramBytesUsed() { return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + Integer.BYTES); } }
A write-once Writer.
@lucene.internal
/** A write-once Writer. * @lucene.internal */
public static abstract class Writer { protected final DataOutput out; protected final int valueCount; protected final int bitsPerValue; protected Writer(DataOutput out, int valueCount, int bitsPerValue) { assert bitsPerValue <= 64; assert valueCount >= 0 || valueCount == -1; this.out = out; this.valueCount = valueCount; this.bitsPerValue = bitsPerValue; } void writeHeader() throws IOException { assert valueCount != -1; CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); out.writeVInt(bitsPerValue); out.writeVInt(valueCount); out.writeVInt(getFormat().getId()); }
The format used to serialize values.
/** The format used to serialize values. */
protected abstract PackedInts.Format getFormat();
Add a value to the stream.
/** Add a value to the stream. */
public abstract void add(long v) throws IOException;
The number of bits per value.
/** The number of bits per value. */
public final int bitsPerValue() { return bitsPerValue; }
Perform end-of-stream operations.
/** Perform end-of-stream operations. */
public abstract void finish() throws IOException;
Returns the current ord in the stream (number of values that have been written so far minus one).
/** * Returns the current ord in the stream (number of values that have been * written so far minus one). */
public abstract int ord(); }
Get a Decoder.
Params:
  • format – the format used to store packed ints
  • version – the compatibility version
  • bitsPerValue – the number of bits per value
Returns:a decoder
/** * Get a {@link Decoder}. * * @param format the format used to store packed ints * @param version the compatibility version * @param bitsPerValue the number of bits per value * @return a decoder */
public static Decoder getDecoder(Format format, int version, int bitsPerValue) { checkVersion(version); return BulkOperation.of(format, bitsPerValue); }
Get an Encoder.
Params:
  • format – the format used to store packed ints
  • version – the compatibility version
  • bitsPerValue – the number of bits per value
Returns:an encoder
/** * Get an {@link Encoder}. * * @param format the format used to store packed ints * @param version the compatibility version * @param bitsPerValue the number of bits per value * @return an encoder */
public static Encoder getEncoder(Format format, int version, int bitsPerValue) { checkVersion(version); return BulkOperation.of(format, bitsPerValue); }
Expert: Restore a Reader from a stream without reading metadata at the beginning of the stream. This method is useful to restore data from streams which have been created using getWriterNoHeader(DataOutput, Format, int, int, int).
Params:
  • in – the stream to read data from, positioned at the beginning of the packed values
  • format – the format used to serialize
  • version – the version used to serialize the data
  • valueCount – how many values the stream holds
  • bitsPerValue – the number of bits per value
Throws:
See Also:
Returns: a Reader
@lucene.internal
/** * Expert: Restore a {@link Reader} from a stream without reading metadata at * the beginning of the stream. This method is useful to restore data from * streams which have been created using * {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}. * * @param in the stream to read data from, positioned at the beginning of the packed values * @param format the format used to serialize * @param version the version used to serialize the data * @param valueCount how many values the stream holds * @param bitsPerValue the number of bits per value * @return a Reader * @throws IOException If there is a low-level I/O error * @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int) * @lucene.internal */
public static Reader getReaderNoHeader(DataInput in, Format format, int version, int valueCount, int bitsPerValue) throws IOException { checkVersion(version); switch (format) { case PACKED_SINGLE_BLOCK: return Packed64SingleBlock.create(in, valueCount, bitsPerValue); case PACKED: switch (bitsPerValue) { case 8: return new Direct8(version, in, valueCount); case 16: return new Direct16(version, in, valueCount); case 32: return new Direct32(version, in, valueCount); case 64: return new Direct64(version, in, valueCount); case 24: if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) { return new Packed8ThreeBlocks(version, in, valueCount); } break; case 48: if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) { return new Packed16ThreeBlocks(version, in, valueCount); } break; } return new Packed64(version, in, valueCount, bitsPerValue); default: throw new AssertionError("Unknown Writer format: " + format); } }
Restore a Reader from a stream.
Params:
  • in – the stream to read data from
Throws:
Returns: a Reader
@lucene.internal
/** * Restore a {@link Reader} from a stream. * * @param in the stream to read data from * @return a Reader * @throws IOException If there is a low-level I/O error * @lucene.internal */
public static Reader getReader(DataInput in) throws IOException { final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT); final int bitsPerValue = in.readVInt(); assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; final int valueCount = in.readVInt(); final Format format = Format.byId(in.readVInt()); return getReaderNoHeader(in, format, version, valueCount, bitsPerValue); }
Expert: Restore a ReaderIterator from a stream without reading metadata at the beginning of the stream. This method is useful to restore data from streams which have been created using getWriterNoHeader(DataOutput, Format, int, int, int).
Params:
  • in – the stream to read data from, positioned at the beginning of the packed values
  • format – the format used to serialize
  • version – the version used to serialize the data
  • valueCount – how many values the stream holds
  • bitsPerValue – the number of bits per value
  • mem – how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
See Also:
Returns: a ReaderIterator
@lucene.internal
/** * Expert: Restore a {@link ReaderIterator} from a stream without reading * metadata at the beginning of the stream. This method is useful to restore * data from streams which have been created using * {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}. * * @param in the stream to read data from, positioned at the beginning of the packed values * @param format the format used to serialize * @param version the version used to serialize the data * @param valueCount how many values the stream holds * @param bitsPerValue the number of bits per value * @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration) * @return a ReaderIterator * @see PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int) * @lucene.internal */
public static ReaderIterator getReaderIteratorNoHeader(DataInput in, Format format, int version, int valueCount, int bitsPerValue, int mem) { checkVersion(version); return new PackedReaderIterator(format, version, valueCount, bitsPerValue, in, mem); }
Retrieve PackedInts as a ReaderIterator
Params:
  • in – positioned at the beginning of a stored packed int structure.
  • mem – how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration)
Throws:
  • IOException – if the structure could not be retrieved.
Returns:an iterator to access the values
@lucene.internal
/** * Retrieve PackedInts as a {@link ReaderIterator} * @param in positioned at the beginning of a stored packed int structure. * @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up iteration) * @return an iterator to access the values * @throws IOException if the structure could not be retrieved. * @lucene.internal */
public static ReaderIterator getReaderIterator(DataInput in, int mem) throws IOException { final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT); final int bitsPerValue = in.readVInt(); assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; final int valueCount = in.readVInt(); final Format format = Format.byId(in.readVInt()); return getReaderIteratorNoHeader(in, format, version, valueCount, bitsPerValue, mem); }
Expert: Construct a direct Reader from a stream without reading metadata at the beginning of the stream. This method is useful to restore data from streams which have been created using getWriterNoHeader(DataOutput, Format, int, int, int).

The returned reader will have very little memory overhead, but every call to Reader.get(int) is likely to perform a disk seek.

Params:
  • in – the stream to read data from
  • format – the format used to serialize
  • version – the version used to serialize the data
  • valueCount – how many values the stream holds
  • bitsPerValue – the number of bits per value
Returns:a direct Reader
@lucene.internal
/** * Expert: Construct a direct {@link Reader} from a stream without reading * metadata at the beginning of the stream. This method is useful to restore * data from streams which have been created using * {@link PackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}. * <p> * The returned reader will have very little memory overhead, but every call * to {@link Reader#get(int)} is likely to perform a disk seek. * * @param in the stream to read data from * @param format the format used to serialize * @param version the version used to serialize the data * @param valueCount how many values the stream holds * @param bitsPerValue the number of bits per value * @return a direct Reader * @lucene.internal */
public static Reader getDirectReaderNoHeader(final IndexInput in, Format format, int version, int valueCount, int bitsPerValue) { checkVersion(version); switch (format) { case PACKED: return new DirectPackedReader(bitsPerValue, valueCount, in); case PACKED_SINGLE_BLOCK: return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in); default: throw new AssertionError("Unknwown format: " + format); } }
Construct a direct Reader from an IndexInput. This method is useful to restore data from streams which have been created using getWriter(DataOutput, int, int, float).

The returned reader will have very little memory overhead, but every call to Reader.get(int) is likely to perform a disk seek.

Params:
  • in – the stream to read data from
Throws:
Returns:a direct Reader
@lucene.internal
/** * Construct a direct {@link Reader} from an {@link IndexInput}. This method * is useful to restore data from streams which have been created using * {@link PackedInts#getWriter(DataOutput, int, int, float)}. * <p> * The returned reader will have very little memory overhead, but every call * to {@link Reader#get(int)} is likely to perform a disk seek. * * @param in the stream to read data from * @return a direct Reader * @throws IOException If there is a low-level I/O error * @lucene.internal */
public static Reader getDirectReader(IndexInput in) throws IOException { final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT); final int bitsPerValue = in.readVInt(); assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; final int valueCount = in.readVInt(); final Format format = Format.byId(in.readVInt()); return getDirectReaderNoHeader(in, format, version, valueCount, bitsPerValue); }
Create a packed integer array with the given amount of values initialized to 0. the valueCount and the bitsPerValue cannot be changed after creation. All Mutables known by this factory are kept fully in RAM.

Positive values of acceptableOverheadRatio will trade space for speed by selecting a faster but potentially less memory-efficient implementation. An acceptableOverheadRatio of COMPACT will make sure that the most memory-efficient implementation is selected whereas FASTEST will make sure that the fastest implementation is selected.

Params:
  • valueCount – the number of elements
  • bitsPerValue – the number of bits available for any given value
  • acceptableOverheadRatio – an acceptable overhead ratio per value
Returns:a mutable packed integer array
@lucene.internal
/** * Create a packed integer array with the given amount of values initialized * to 0. the valueCount and the bitsPerValue cannot be changed after creation. * All Mutables known by this factory are kept fully in RAM. * <p> * Positive values of <code>acceptableOverheadRatio</code> will trade space * for speed by selecting a faster but potentially less memory-efficient * implementation. An <code>acceptableOverheadRatio</code> of * {@link PackedInts#COMPACT} will make sure that the most memory-efficient * implementation is selected whereas {@link PackedInts#FASTEST} will make sure * that the fastest implementation is selected. * * @param valueCount the number of elements * @param bitsPerValue the number of bits available for any given value * @param acceptableOverheadRatio an acceptable overhead * ratio per value * @return a mutable packed integer array * @lucene.internal */
public static Mutable getMutable(int valueCount, int bitsPerValue, float acceptableOverheadRatio) { final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio); return getMutable(valueCount, formatAndBits.bitsPerValue, formatAndBits.format); }
Same as getMutable(int, int, float) with a pre-computed number of bits per value and format. @lucene.internal
/** Same as {@link #getMutable(int, int, float)} with a pre-computed number * of bits per value and format. * @lucene.internal */
public static Mutable getMutable(int valueCount, int bitsPerValue, PackedInts.Format format) { assert valueCount >= 0; switch (format) { case PACKED_SINGLE_BLOCK: return Packed64SingleBlock.create(valueCount, bitsPerValue); case PACKED: switch (bitsPerValue) { case 8: return new Direct8(valueCount); case 16: return new Direct16(valueCount); case 32: return new Direct32(valueCount); case 64: return new Direct64(valueCount); case 24: if (valueCount <= Packed8ThreeBlocks.MAX_SIZE) { return new Packed8ThreeBlocks(valueCount); } break; case 48: if (valueCount <= Packed16ThreeBlocks.MAX_SIZE) { return new Packed16ThreeBlocks(valueCount); } break; } return new Packed64(valueCount, bitsPerValue); default: throw new AssertionError(); } }
Expert: Create a packed integer array writer for the given output, format, value count, and number of bits per value.

The resulting stream will be long-aligned. This means that depending on the format which is used, up to 63 bits will be wasted. An easy way to make sure that no space is lost is to always use a valueCount that is a multiple of 64.

This method does not write any metadata to the stream, meaning that it is your responsibility to store it somewhere else in order to be able to recover data from the stream later on:

It is possible to start writing values without knowing how many of them you are actually going to write. To do this, just pass -1 as valueCount. On the other hand, for any positive value of valueCount, the returned writer will make sure that you don't write more values than expected and pad the end of stream with zeros in case you have written less than valueCount when calling Writer.finish().

The mem parameter lets you control how much memory can be used to buffer changes in memory before flushing to disk. High values of mem are likely to improve throughput. On the other hand, if speed is not that important to you, a value of 0 will use as little memory as possible and should already offer reasonable throughput.

Params:
  • out – the data output
  • format – the format to use to serialize the values
  • valueCount – the number of values
  • bitsPerValue – the number of bits per value
  • mem – how much memory (in bytes) can be used to speed up serialization
See Also:
Returns: a Writer
@lucene.internal
/** * Expert: Create a packed integer array writer for the given output, format, * value count, and number of bits per value. * <p> * The resulting stream will be long-aligned. This means that depending on * the format which is used, up to 63 bits will be wasted. An easy way to * make sure that no space is lost is to always use a <code>valueCount</code> * that is a multiple of 64. * <p> * This method does not write any metadata to the stream, meaning that it is * your responsibility to store it somewhere else in order to be able to * recover data from the stream later on: * <ul> * <li><code>format</code> (using {@link Format#getId()}),</li> * <li><code>valueCount</code>,</li> * <li><code>bitsPerValue</code>,</li> * <li>{@link #VERSION_CURRENT}.</li> * </ul> * <p> * It is possible to start writing values without knowing how many of them you * are actually going to write. To do this, just pass <code>-1</code> as * <code>valueCount</code>. On the other hand, for any positive value of * <code>valueCount</code>, the returned writer will make sure that you don't * write more values than expected and pad the end of stream with zeros in * case you have written less than <code>valueCount</code> when calling * {@link Writer#finish()}. * <p> * The <code>mem</code> parameter lets you control how much memory can be used * to buffer changes in memory before flushing to disk. High values of * <code>mem</code> are likely to improve throughput. On the other hand, if * speed is not that important to you, a value of <code>0</code> will use as * little memory as possible and should already offer reasonable throughput. * * @param out the data output * @param format the format to use to serialize the values * @param valueCount the number of values * @param bitsPerValue the number of bits per value * @param mem how much memory (in bytes) can be used to speed up serialization * @return a Writer * @see PackedInts#getReaderIteratorNoHeader(DataInput, Format, int, int, int, int) * @see PackedInts#getReaderNoHeader(DataInput, Format, int, int, int) * @lucene.internal */
public static Writer getWriterNoHeader( DataOutput out, Format format, int valueCount, int bitsPerValue, int mem) { return new PackedWriter(format, out, valueCount, bitsPerValue, mem); }
Create a packed integer array writer for the given output, format, value count, and number of bits per value.

The resulting stream will be long-aligned. This means that depending on the format which is used under the hoods, up to 63 bits will be wasted. An easy way to make sure that no space is lost is to always use a valueCount that is a multiple of 64.

This method writes metadata to the stream, so that the resulting stream is sufficient to restore a Reader from it. You don't need to track valueCount or bitsPerValue by yourself. In case this is a problem, you should probably look at getWriterNoHeader(DataOutput, Format, int, int, int).

The acceptableOverheadRatio parameter controls how readers that will be restored from this stream trade space for speed by selecting a faster but potentially less memory-efficient implementation. An acceptableOverheadRatio of COMPACT will make sure that the most memory-efficient implementation is selected whereas FASTEST will make sure that the fastest implementation is selected. In case you are only interested in reading this stream sequentially later on, you should probably use COMPACT.

Params:
  • out – the data output
  • valueCount – the number of values
  • bitsPerValue – the number of bits per value
  • acceptableOverheadRatio – an acceptable overhead ratio per value
Throws:
Returns: a Writer
@lucene.internal
/** * Create a packed integer array writer for the given output, format, value * count, and number of bits per value. * <p> * The resulting stream will be long-aligned. This means that depending on * the format which is used under the hoods, up to 63 bits will be wasted. * An easy way to make sure that no space is lost is to always use a * <code>valueCount</code> that is a multiple of 64. * <p> * This method writes metadata to the stream, so that the resulting stream is * sufficient to restore a {@link Reader} from it. You don't need to track * <code>valueCount</code> or <code>bitsPerValue</code> by yourself. In case * this is a problem, you should probably look at * {@link #getWriterNoHeader(DataOutput, Format, int, int, int)}. * <p> * The <code>acceptableOverheadRatio</code> parameter controls how * readers that will be restored from this stream trade space * for speed by selecting a faster but potentially less memory-efficient * implementation. An <code>acceptableOverheadRatio</code> of * {@link PackedInts#COMPACT} will make sure that the most memory-efficient * implementation is selected whereas {@link PackedInts#FASTEST} will make sure * that the fastest implementation is selected. In case you are only interested * in reading this stream sequentially later on, you should probably use * {@link PackedInts#COMPACT}. * * @param out the data output * @param valueCount the number of values * @param bitsPerValue the number of bits per value * @param acceptableOverheadRatio an acceptable overhead ratio per value * @return a Writer * @throws IOException If there is a low-level I/O error * @lucene.internal */
public static Writer getWriter(DataOutput out, int valueCount, int bitsPerValue, float acceptableOverheadRatio) throws IOException { assert valueCount >= 0; final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio); final Writer writer = getWriterNoHeader(out, formatAndBits.format, valueCount, formatAndBits.bitsPerValue, DEFAULT_BUFFER_SIZE); writer.writeHeader(); return writer; }
Returns how many bits are required to hold values up to and including maxValue NOTE: This method returns at least 1.
Params:
  • maxValue – the maximum value that should be representable.
Returns:the amount of bits needed to represent values from 0 to maxValue.
@lucene.internal
/** Returns how many bits are required to hold values up * to and including maxValue * NOTE: This method returns at least 1. * @param maxValue the maximum value that should be representable. * @return the amount of bits needed to represent values from 0 to maxValue. * @lucene.internal */
public static int bitsRequired(long maxValue) { if (maxValue < 0) { throw new IllegalArgumentException("maxValue must be non-negative (got: " + maxValue + ")"); } return unsignedBitsRequired(maxValue); }
Returns how many bits are required to store bits, interpreted as an unsigned value. NOTE: This method returns at least 1.
@lucene.internal
/** Returns how many bits are required to store <code>bits</code>, * interpreted as an unsigned value. * NOTE: This method returns at least 1. * @lucene.internal */
public static int unsignedBitsRequired(long bits) { return Math.max(1, 64 - Long.numberOfLeadingZeros(bits)); }
Calculates the maximum unsigned long that can be expressed with the given number of bits.
Params:
  • bitsPerValue – the number of bits available for any given value.
Returns:the maximum value for the given bits.
@lucene.internal
/** * Calculates the maximum unsigned long that can be expressed with the given * number of bits. * @param bitsPerValue the number of bits available for any given value. * @return the maximum value for the given bits. * @lucene.internal */
public static long maxValue(int bitsPerValue) { return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue); }
Copy src[srcPos:srcPos+len] into dest[destPos:destPos+len] using at most mem bytes.
/** * Copy <code>src[srcPos:srcPos+len]</code> into * <code>dest[destPos:destPos+len]</code> using at most <code>mem</code> * bytes. */
public static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, int mem) { assert srcPos + len <= src.size(); assert destPos + len <= dest.size(); final int capacity = mem >>> 3; if (capacity == 0) { for (int i = 0; i < len; ++i) { dest.set(destPos++, src.get(srcPos++)); } } else if (len > 0) { // use bulk operations final long[] buf = new long[Math.min(capacity, len)]; copy(src, srcPos, dest, destPos, len, buf); } }
Same as copy(Reader, int, Mutable, int, int, int) but using a pre-allocated buffer.
/** Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer. */
static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) { assert buf.length > 0; int remaining = 0; while (len > 0) { final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining)); assert read > 0; srcPos += read; len -= read; remaining += read; final int written = dest.set(destPos, buf, 0, remaining); assert written > 0; destPos += written; if (written < remaining) { System.arraycopy(buf, written, buf, 0, remaining - written); } remaining -= written; } while (remaining > 0) { final int written = dest.set(destPos, buf, 0, remaining); destPos += written; remaining -= written; System.arraycopy(buf, written, buf, 0, remaining); } }
Check that the block size is a power of 2, in the right bounds, and return its log in base 2.
/** Check that the block size is a power of 2, in the right bounds, and return * its log in base 2. */
static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) { if (blockSize < minBlockSize || blockSize > maxBlockSize) { throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize); } if ((blockSize & (blockSize - 1)) != 0) { throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize); } return Integer.numberOfTrailingZeros(blockSize); }
Return the number of blocks required to store size values on blockSize.
/** Return the number of blocks required to store <code>size</code> values on * <code>blockSize</code>. */
static int numBlocks(long size, int blockSize) { final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1); if ((long) numBlocks * blockSize < size) { throw new IllegalArgumentException("size is too large for this block size"); } return numBlocks; } }