org.apache.commons/commons-compress/1.18 : org/apache/commons/compress/compressors/lzw/LZWInputStream.java

LZWInputStream

https://commons.apache.org/proper/commons-compress/: Apache Commons Compress software defines an API for working with compression and archive formats. These include: bzip2, gzip, pack200, lzma, xz, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4, Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj. (The Apache Software Foundation)

Apache License, Version 2.0

Wolfgang Glas
Christian Kohlschütte
Bear Giles
Michael Kuss
Lasse Collin
John Kodis
BELUGA BEHR
Simon Spero
Michael Hausegger

Torsten Curdt
Stefan Bodewig
Sebastian Bazley
Christian Grobmeier
Julius Davies
Damjan Jovanovic
Emmanuel Bourg
Gary Gregory
Rob Tompkins

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.commons.compress.compressors.lzw;

import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteOrder;

import org.apache.commons.compress.MemoryLimitException;
import org.apache.commons.compress.compressors.CompressorInputStream;
import org.apache.commons.compress.utils.BitInputStream;
import org.apache.commons.compress.utils.InputStreamStatistics;

Generic LZW implementation. It is used internally for
the Z decompressor and the Unshrinking Zip file compression method,
but may be useful for third-party projects in implementing their own LZW variations.
@NotThreadSafe 
Since: 1.10/**
 * <p>Generic LZW implementation. It is used internally for
 * the Z decompressor and the Unshrinking Zip file compression method,
 * but may be useful for third-party projects in implementing their own LZW variations.</p>
 *
 * @NotThreadSafe
 * @since 1.10
 */
public abstract class LZWInputStream extends CompressorInputStream implements InputStreamStatistics {
    protected static final int DEFAULT_CODE_SIZE = 9;
    protected static final int UNUSED_PREFIX = -1;

    private final byte[] oneByte = new byte[1];

    protected final BitInputStream in;
    private int clearCode = -1;
    private int codeSize = DEFAULT_CODE_SIZE;
    private byte previousCodeFirstChar;
    private int previousCode = UNUSED_PREFIX;
    private int tableSize;
    private int[] prefixes;
    private byte[] characters;
    private byte[] outputStack;
    private int outputStackLocation;

    protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
        this.in = new BitInputStream(inputStream, byteOrder);
    }

    @Override
    public void close() throws IOException {
        in.close();
    }

    @Override
    public int read() throws IOException {
        final int ret = read(oneByte);
        if (ret < 0) {
            return ret;
        }
        return 0xff & oneByte[0];
    }

    @Override
    public int read(final byte[] b, final int off, final int len) throws IOException {
        int bytesRead = readFromStack(b, off, len);
        while (len - bytesRead > 0) {
            final int result = decompressNextSymbol();
            if (result < 0) {
                if (bytesRead > 0) {
                    count(bytesRead);
                    return bytesRead;
                }
                return result;
            }
            bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
        }
        count(bytesRead);
        return bytesRead;
    }

    Since: 1.17/**
     * @since 1.17
     */
    @Override
    public long getCompressedCount() {
        return in.getBytesRead();
    }

    Read the next code and expand it.
Throws: IOException – on error
Returns: the expanded next code/**
     * Read the next code and expand it.
     * @return the expanded next code
     * @throws IOException on error
     */
    protected abstract int decompressNextSymbol() throws IOException;

    Add a new entry to the dictionary.
Params: previousCode – the previous code
character – the next character to append
Throws: IOException – on error
Returns: the new code/**
     * Add a new entry to the dictionary.
     * @param previousCode the previous code
     * @param character the next character to append
     * @return the new code
     * @throws IOException on error
     */
    protected abstract int addEntry(int previousCode, byte character)
        throws IOException;

    Sets the clear code based on the code size.
Params: codeSize – code size/**
     * Sets the clear code based on the code size.
     * @param codeSize code size
     */
    protected void setClearCode(final int codeSize) {
        clearCode = (1 << (codeSize - 1));
    }

    Initializes the arrays based on the maximum code size.
First checks that the estimated memory usage is below memoryLimitInKb
Params: maxCodeSize – maximum code size
memoryLimitInKb – maximum allowed estimated memory usage in Kb
Throws: MemoryLimitException – if estimated memory usage is greater than memoryLimitInKb/**
     * Initializes the arrays based on the maximum code size.
     * First checks that the estimated memory usage is below memoryLimitInKb
     *
     * @param maxCodeSize maximum code size
     * @param memoryLimitInKb maximum allowed estimated memory usage in Kb
     * @throws MemoryLimitException if estimated memory usage is greater than memoryLimitInKb
     */
    protected void initializeTables(final int maxCodeSize, final int memoryLimitInKb)
            throws MemoryLimitException {

        if (memoryLimitInKb > -1) {
            final int maxTableSize = 1 << maxCodeSize;
            //account for potential overflow
            long memoryUsageInBytes = (long) maxTableSize * 6;//(4 (prefixes) + 1 (characters) +1 (outputStack))
            long memoryUsageInKb = memoryUsageInBytes >> 10;

            if (memoryUsageInKb > memoryLimitInKb) {
                throw new MemoryLimitException(memoryUsageInKb, memoryLimitInKb);
            }
        }
        initializeTables(maxCodeSize);
    }

    Initializes the arrays based on the maximum code size.
Params: maxCodeSize – maximum code size/**
     * Initializes the arrays based on the maximum code size.
     * @param maxCodeSize maximum code size
     */
    protected void initializeTables(final int maxCodeSize) {
        final int maxTableSize = 1 << maxCodeSize;
        prefixes = new int[maxTableSize];
        characters = new byte[maxTableSize];
        outputStack = new byte[maxTableSize];
        outputStackLocation = maxTableSize;
        final int max = 1 << 8;
        for (int i = 0; i < max; i++) {
            prefixes[i] = -1;
            characters[i] = (byte) i;
        }
    }

    Reads the next code from the stream.
Throws: IOException – on error
Returns: the next code/**
     * Reads the next code from the stream.
     * @return the next code
     * @throws IOException on error
     */
    protected int readNextCode() throws IOException {
        if (codeSize > 31) {
            throw new IllegalArgumentException("code size must not be bigger than 31");
        }
        return (int) in.readBits(codeSize);
    }

    Adds a new entry if the maximum table size hasn't been exceeded
and returns the new index.
Params: previousCode – the previous code
character – the character to append
maxTableSize – the maximum table size
Returns: the new code/**
     * Adds a new entry if the maximum table size hasn't been exceeded
     * and returns the new index.
     * @param previousCode the previous code
     * @param character the character to append
     * @param maxTableSize the maximum table size
     * @return the new code
     */
    protected int addEntry(final int previousCode, final byte character, final int maxTableSize) {
        if (tableSize < maxTableSize) {
            prefixes[tableSize] = previousCode;
            characters[tableSize] = character;
            return tableSize++;
        }
        return -1;
    }

    Add entry for repeat of previousCode we haven't added, yet.
Throws: IOException – on error
Returns: new code for a repeat of the previous code/**
     * Add entry for repeat of previousCode we haven't added, yet.
     * @return new code for a repeat of the previous code
     * @throws IOException on error
     */
    protected int addRepeatOfPreviousCode() throws IOException {
        if (previousCode == -1) {
            // can't have a repeat for the very first code
            throw new IOException("The first code can't be a reference to its preceding code");
        }
        return addEntry(previousCode, previousCodeFirstChar);
    }

    Expands the entry with index code to the output stack and may
create a new entry
Params: code – the code
addedUnfinishedEntry – whether unfinished entries have been added
Throws: IOException – on error
Returns: the new location of the output stack/**
     * Expands the entry with index code to the output stack and may
     * create a new entry
     * @param code the code
     * @param addedUnfinishedEntry whether unfinished entries have been added
     * @return the new location of the output stack
     * @throws IOException on error
     */
    protected int expandCodeToOutputStack(final int code, final boolean addedUnfinishedEntry)
        throws IOException {
        for (int entry = code; entry >= 0; entry = prefixes[entry]) {
            outputStack[--outputStackLocation] = characters[entry];
        }
        if (previousCode != -1 && !addedUnfinishedEntry) {
            addEntry(previousCode, outputStack[outputStackLocation]);
        }
        previousCode = code;
        previousCodeFirstChar = outputStack[outputStackLocation];
        return outputStackLocation;
    }

    private int readFromStack(final byte[] b, final int off, final int len) {
        final int remainingInStack = outputStack.length - outputStackLocation;
        if (remainingInStack > 0) {
            final int maxLength = Math.min(remainingInStack, len);
            System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
            outputStackLocation += maxLength;
            return maxLength;
        }
        return 0;
    }

    protected int getCodeSize() {
        return codeSize;
    }

    protected void resetCodeSize() {
        setCodeSize(DEFAULT_CODE_SIZE);
    }

    protected void setCodeSize(final int cs) {
        this.codeSize = cs;
    }

    protected void incrementCodeSize() {
        codeSize++;
    }

    protected void resetPreviousCode() {
        this.previousCode = -1;
    }

    protected int getPrefix(final int offset) {
        return prefixes[offset];
    }

    protected void setPrefix(final int offset, final int value) {
        prefixes[offset] = value;
    }

    protected int getPrefixesLength() {
        return prefixes.length;
    }

    protected int getClearCode() {
        return clearCode;
    }

    protected int getTableSize() {
        return tableSize;
    }

    protected void setTableSize(final int newSize) {
        tableSize = newSize;
    }

}

Throws:	IOException – on error
Returns:	the expanded next code

Params:	previousCode – the previous code character – the next character to append
Throws:	IOException – on error
Returns:	the new code

Params:	maxCodeSize – maximum code size memoryLimitInKb – maximum allowed estimated memory usage in Kb
Throws:	MemoryLimitException – if estimated memory usage is greater than memoryLimitInKb

Params:	code – the code addedUnfinishedEntry – whether unfinished entries have been added
Throws:	IOException – on error
Returns:	the new location of the output stack

/

org.apache.commons/ commons-compress/ 1.18/ org/apache/commons/compress/compressors/lzw/LZWInputStream.java