/*
 * Copyright 2014 The Netty Project
 *
 * The Netty Project licenses this file to you under the Apache License,
 * version 2.0 (the "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at:
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */
package io.netty.handler.codec.compression;

import static io.netty.handler.codec.compression.Bzip2Constants.*;

Reads and decompresses a single Bzip2 block.

Block decoding consists of the following stages:
1. Read block header
2. Read Huffman tables
3. Read and decode Huffman encoded data - decodeHuffmanData(Bzip2HuffmanStageDecoder)
4. Run-Length Decoding[2] - decodeHuffmanData(Bzip2HuffmanStageDecoder)
5. Inverse Move To Front Transform - decodeHuffmanData(Bzip2HuffmanStageDecoder)
6. Inverse Burrows Wheeler Transform - initialiseInverseBWT()
7. Run-Length Decoding[1] - read()
8. Optional Block De-Randomisation - read() (through decodeNextBWTByte())
/** * Reads and decompresses a single Bzip2 block.<br><br> * * Block decoding consists of the following stages:<br> * 1. Read block header<br> * 2. Read Huffman tables<br> * 3. Read and decode Huffman encoded data - {@link #decodeHuffmanData(Bzip2HuffmanStageDecoder)}<br> * 4. Run-Length Decoding[2] - {@link #decodeHuffmanData(Bzip2HuffmanStageDecoder)}<br> * 5. Inverse Move To Front Transform - {@link #decodeHuffmanData(Bzip2HuffmanStageDecoder)}<br> * 6. Inverse Burrows Wheeler Transform - {@link #initialiseInverseBWT()}<br> * 7. Run-Length Decoding[1] - {@link #read()}<br> * 8. Optional Block De-Randomisation - {@link #read()} (through {@link #decodeNextBWTByte()}) */
final class Bzip2BlockDecompressor {
A reader that provides bit-level reads.
/** * A reader that provides bit-level reads. */
private final Bzip2BitReader reader;
Calculates the block CRC from the fully decoded bytes of the block.
/** * Calculates the block CRC from the fully decoded bytes of the block. */
private final Crc32 crc = new Crc32();
The CRC of the current block as read from the block header.
/** * The CRC of the current block as read from the block header. */
private final int blockCRC;
true if the current block is randomised, otherwise false.
/** * {@code true} if the current block is randomised, otherwise {@code false}. */
private final boolean blockRandomised; /* Huffman Decoding stage */
The end-of-block Huffman symbol. Decoding of the block ends when this is encountered.
/** * The end-of-block Huffman symbol. Decoding of the block ends when this is encountered. */
int huffmanEndOfBlockSymbol;
Bitmap, of ranges of 16 bytes, present/not present.
/** * Bitmap, of ranges of 16 bytes, present/not present. */
int huffmanInUse16;
A map from Huffman symbol index to output character. Some types of data (e.g. ASCII text) may contain only a limited number of byte values; Huffman symbols are only allocated to those values that actually occur in the uncompressed data.
/** * A map from Huffman symbol index to output character. Some types of data (e.g. ASCII text) * may contain only a limited number of byte values; Huffman symbols are only allocated to * those values that actually occur in the uncompressed data. */
final byte[] huffmanSymbolMap = new byte[256]; /* Move To Front stage */
Counts of each byte value within the huffmanSymbolMap data. Collected at the Move To Front stage, consumed by the Inverse Burrows Wheeler Transform stage.
/** * Counts of each byte value within the {@link Bzip2BlockDecompressor#huffmanSymbolMap} data. * Collected at the Move To Front stage, consumed by the Inverse Burrows Wheeler Transform stage. */
private final int[] bwtByteCounts = new int[256];
The Burrows-Wheeler Transform processed data. Read at the Move To Front stage, consumed by the Inverse Burrows Wheeler Transform stage.
/** * The Burrows-Wheeler Transform processed data. Read at the Move To Front stage, consumed by the * Inverse Burrows Wheeler Transform stage. */
private final byte[] bwtBlock;
Starting pointer into BWT for after untransform.
/** * Starting pointer into BWT for after untransform. */
private final int bwtStartPointer; /* Inverse Burrows-Wheeler Transform stage */
At each position contains the union of :- An output character (8 bits) A pointer from each position to its successor (24 bits, left shifted 8 bits) As the pointer cannot exceed the maximum block size of 900k, 24 bits is more than enough to hold it; Folding the character data into the spare bits while performing the inverse BWT, when both pieces of information are available, saves a large number of memory accesses in the final decoding stages.
/** * At each position contains the union of :- * An output character (8 bits) * A pointer from each position to its successor (24 bits, left shifted 8 bits) * As the pointer cannot exceed the maximum block size of 900k, 24 bits is more than enough to * hold it; Folding the character data into the spare bits while performing the inverse BWT, * when both pieces of information are available, saves a large number of memory accesses in * the final decoding stages. */
private int[] bwtMergedPointers;
The current merged pointer into the Burrow-Wheeler Transform array.
/** * The current merged pointer into the Burrow-Wheeler Transform array. */
private int bwtCurrentMergedPointer;
The actual length in bytes of the current block at the Inverse Burrows Wheeler Transform stage (before final Run-Length Decoding).
/** * The actual length in bytes of the current block at the Inverse Burrows Wheeler Transform * stage (before final Run-Length Decoding). */
private int bwtBlockLength;
The number of output bytes that have been decoded up to the Inverse Burrows Wheeler Transform stage.
/** * The number of output bytes that have been decoded up to the Inverse Burrows Wheeler Transform stage. */
private int bwtBytesDecoded; /* Run-Length Encoding and Random Perturbation stage */
The most recently RLE decoded byte.
/** * The most recently RLE decoded byte. */
private int rleLastDecodedByte = -1;
The number of previous identical output bytes decoded. After 4 identical bytes, the next byte decoded is an RLE repeat count.
/** * The number of previous identical output bytes decoded. After 4 identical bytes, the next byte * decoded is an RLE repeat count. */
private int rleAccumulator;
The RLE repeat count of the current decoded byte. When this reaches zero, a new byte is decoded.
/** * The RLE repeat count of the current decoded byte. When this reaches zero, a new byte is decoded. */
private int rleRepeat;
If the current block is randomised, the position within the RNUMS randomisation array.
/** * If the current block is randomised, the position within the RNUMS randomisation array. */
private int randomIndex;
If the current block is randomised, the remaining count at the current RNUMS position.
/** * If the current block is randomised, the remaining count at the current RNUMS position. */
private int randomCount = Bzip2Rand.rNums(0) - 1;
Table for Move To Front transformations.
/** * Table for Move To Front transformations. */
private final Bzip2MoveToFrontTable symbolMTF = new Bzip2MoveToFrontTable(); // This variables is used to save current state if we haven't got enough readable bits private int repeatCount; private int repeatIncrement = 1; private int mtfValue; Bzip2BlockDecompressor(final int blockSize, final int blockCRC, final boolean blockRandomised, final int bwtStartPointer, final Bzip2BitReader reader) { bwtBlock = new byte[blockSize]; this.blockCRC = blockCRC; this.blockRandomised = blockRandomised; this.bwtStartPointer = bwtStartPointer; this.reader = reader; }
Reads the Huffman encoded data from the input stream, performs Run-Length Decoding and applies the Move To Front transform to reconstruct the Burrows-Wheeler Transform array.
/** * Reads the Huffman encoded data from the input stream, performs Run-Length Decoding and * applies the Move To Front transform to reconstruct the Burrows-Wheeler Transform array. */
boolean decodeHuffmanData(final Bzip2HuffmanStageDecoder huffmanDecoder) { final Bzip2BitReader reader = this.reader; final byte[] bwtBlock = this.bwtBlock; final byte[] huffmanSymbolMap = this.huffmanSymbolMap; final int streamBlockSize = this.bwtBlock.length; final int huffmanEndOfBlockSymbol = this.huffmanEndOfBlockSymbol; final int[] bwtByteCounts = this.bwtByteCounts; final Bzip2MoveToFrontTable symbolMTF = this.symbolMTF; int bwtBlockLength = this.bwtBlockLength; int repeatCount = this.repeatCount; int repeatIncrement = this.repeatIncrement; int mtfValue = this.mtfValue; for (;;) { if (!reader.hasReadableBits(HUFFMAN_DECODE_MAX_CODE_LENGTH)) { this.bwtBlockLength = bwtBlockLength; this.repeatCount = repeatCount; this.repeatIncrement = repeatIncrement; this.mtfValue = mtfValue; return false; } final int nextSymbol = huffmanDecoder.nextSymbol(); if (nextSymbol == HUFFMAN_SYMBOL_RUNA) { repeatCount += repeatIncrement; repeatIncrement <<= 1; } else if (nextSymbol == HUFFMAN_SYMBOL_RUNB) { repeatCount += repeatIncrement << 1; repeatIncrement <<= 1; } else { if (repeatCount > 0) { if (bwtBlockLength + repeatCount > streamBlockSize) { throw new DecompressionException("block exceeds declared block size"); } final byte nextByte = huffmanSymbolMap[mtfValue]; bwtByteCounts[nextByte & 0xff] += repeatCount; while (--repeatCount >= 0) { bwtBlock[bwtBlockLength++] = nextByte; } repeatCount = 0; repeatIncrement = 1; } if (nextSymbol == huffmanEndOfBlockSymbol) { break; } if (bwtBlockLength >= streamBlockSize) { throw new DecompressionException("block exceeds declared block size"); } mtfValue = symbolMTF.indexToFront(nextSymbol - 1) & 0xff; final byte nextByte = huffmanSymbolMap[mtfValue]; bwtByteCounts[nextByte & 0xff]++; bwtBlock[bwtBlockLength++] = nextByte; } } this.bwtBlockLength = bwtBlockLength; initialiseInverseBWT(); return true; }
Set up the Inverse Burrows-Wheeler Transform merged pointer array.
/** * Set up the Inverse Burrows-Wheeler Transform merged pointer array. */
private void initialiseInverseBWT() { final int bwtStartPointer = this.bwtStartPointer; final byte[] bwtBlock = this.bwtBlock; final int[] bwtMergedPointers = new int[bwtBlockLength]; final int[] characterBase = new int[256]; if (bwtStartPointer < 0 || bwtStartPointer >= bwtBlockLength) { throw new DecompressionException("start pointer invalid"); } // Cumulative character counts System.arraycopy(bwtByteCounts, 0, characterBase, 1, 255); for (int i = 2; i <= 255; i++) { characterBase[i] += characterBase[i - 1]; } // Merged-Array Inverse Burrows-Wheeler Transform // Combining the output characters and forward pointers into a single array here, where we // have already read both of the corresponding values, cuts down on memory accesses in the // final walk through the array for (int i = 0; i < bwtBlockLength; i++) { int value = bwtBlock[i] & 0xff; bwtMergedPointers[characterBase[value]++] = (i << 8) + value; } this.bwtMergedPointers = bwtMergedPointers; bwtCurrentMergedPointer = bwtMergedPointers[bwtStartPointer]; }
Decodes a byte from the final Run-Length Encoding stage, pulling a new byte from the Burrows-Wheeler Transform stage when required.
Returns:The decoded byte, or -1 if there are no more bytes
/** * Decodes a byte from the final Run-Length Encoding stage, pulling a new byte from the * Burrows-Wheeler Transform stage when required. * @return The decoded byte, or -1 if there are no more bytes */
public int read() { while (rleRepeat < 1) { if (bwtBytesDecoded == bwtBlockLength) { return -1; } int nextByte = decodeNextBWTByte(); if (nextByte != rleLastDecodedByte) { // New byte, restart accumulation rleLastDecodedByte = nextByte; rleRepeat = 1; rleAccumulator = 1; crc.updateCRC(nextByte); } else { if (++rleAccumulator == 4) { // Accumulation complete, start repetition int rleRepeat = decodeNextBWTByte() + 1; this.rleRepeat = rleRepeat; rleAccumulator = 0; crc.updateCRC(nextByte, rleRepeat); } else { rleRepeat = 1; crc.updateCRC(nextByte); } } } rleRepeat--; return rleLastDecodedByte; }
Decodes a byte from the Burrows-Wheeler Transform stage. If the block has randomisation applied, reverses the randomisation.
Returns:The decoded byte
/** * Decodes a byte from the Burrows-Wheeler Transform stage. If the block has randomisation * applied, reverses the randomisation. * @return The decoded byte */
private int decodeNextBWTByte() { int mergedPointer = bwtCurrentMergedPointer; int nextDecodedByte = mergedPointer & 0xff; bwtCurrentMergedPointer = bwtMergedPointers[mergedPointer >>> 8]; if (blockRandomised) { if (--randomCount == 0) { nextDecodedByte ^= 1; randomIndex = (randomIndex + 1) % 512; randomCount = Bzip2Rand.rNums(randomIndex); } } bwtBytesDecoded++; return nextDecodedByte; } public int blockLength() { return bwtBlockLength; }
Verify and return the block CRC. This method may only be called after all of the block's bytes have been read.
Returns:The block CRC
/** * Verify and return the block CRC. This method may only be called * after all of the block's bytes have been read. * @return The block CRC */
int checkCRC() { final int computedBlockCRC = crc.getCRC(); if (blockCRC != computedBlockCRC) { throw new DecompressionException("block CRC error"); } return computedBlockCRC; } }