org.apache.commons/commons-compress/1.18 : org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java

FramedSnappyCompressorInputStream
https://commons.apache.org/proper/commons-compress/: Apache Commons Compress software defines an API for working with compression and archive formats. These include: bzip2, gzip, pack200, lzma, xz, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4, Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj. (The Apache Software Foundation)
Apache License, Version 2.0
Wolfgang Glas
Christian Kohlschütte
Bear Giles
Michael Kuss
Lasse Collin
John Kodis
BELUGA BEHR
Simon Spero
Michael Hausegger
Torsten Curdt
Stefan Bodewig
Sebastian Bazley
Christian Grobmeier
Julius Davies
Damjan Jovanovic
Emmanuel Bourg
Gary Gregory
Rob Tompkins
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.commons.compress.compressors.snappy;

import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.Arrays;

import org.apache.commons.compress.compressors.CompressorInputStream;
import org.apache.commons.compress.utils.BoundedInputStream;
import org.apache.commons.compress.utils.ByteUtils;
import org.apache.commons.compress.utils.CountingInputStream;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.compress.utils.InputStreamStatistics;

CompressorInputStream for the framing Snappy format.
Based on the "spec" in the version "Last revised: 2013-10-25"
See Also: Snappy framing format description
Since: 1.7/**
 * CompressorInputStream for the framing Snappy format.
 *
 * <p>Based on the "spec" in the version "Last revised: 2013-10-25"</p>
 *
 * @see <a href="https://github.com/google/snappy/blob/master/framing_format.txt">Snappy framing format description</a>
 * @since 1.7
 */
public class FramedSnappyCompressorInputStream extends CompressorInputStream
    implements InputStreamStatistics {

    package private for tests only.
/**
     * package private for tests only.
     */
    static final long MASK_OFFSET = 0xa282ead8L;

    private static final int STREAM_IDENTIFIER_TYPE = 0xff;
    static final int COMPRESSED_CHUNK_TYPE = 0;
    private static final int UNCOMPRESSED_CHUNK_TYPE = 1;
    private static final int PADDING_CHUNK_TYPE = 0xfe;
    private static final int MIN_UNSKIPPABLE_TYPE = 2;
    private static final int MAX_UNSKIPPABLE_TYPE = 0x7f;
    private static final int MAX_SKIPPABLE_TYPE = 0xfd;

    // used by FramedSnappyCompressorOutputStream as well
    static final byte[] SZ_SIGNATURE = new byte[] { //NOSONAR
        (byte) STREAM_IDENTIFIER_TYPE, // tag
        6, 0, 0, // length
        's', 'N', 'a', 'P', 'p', 'Y'
    };

    private long unreadBytes;
    private final CountingInputStream countingStream;

    The underlying stream to read compressed data from /** The underlying stream to read compressed data from */
    private final PushbackInputStream in;

    The dialect to expect /** The dialect to expect */
    private final FramedSnappyDialect dialect;

    private SnappyCompressorInputStream currentCompressedChunk;

    // used in no-arg read method
    private final byte[] oneByte = new byte[1];

    private boolean endReached, inUncompressedChunk;

    private int uncompressedBytesRemaining;
    private long expectedChecksum = -1;
    private final int blockSize;
    private final PureJavaCrc32C checksum = new PureJavaCrc32C();

    private final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() {
        @Override
        public int getAsByte() throws IOException {
            return readOneByte();
        }
    };

    Constructs a new input stream that decompresses snappy-framed-compressed data from the specified input stream using the FramedSnappyDialect.STANDARD dialect. 
Params: in –  the InputStream from which to read the compressed data
Throws: IOException – if reading fails/**
     * Constructs a new input stream that decompresses
     * snappy-framed-compressed data from the specified input stream
     * using the {@link FramedSnappyDialect#STANDARD} dialect.
     * @param in  the InputStream from which to read the compressed data
     * @throws IOException if reading fails
     */
    public FramedSnappyCompressorInputStream(final InputStream in) throws IOException {
        this(in, FramedSnappyDialect.STANDARD);
    }

    Constructs a new input stream that decompresses snappy-framed-compressed data
from the specified input stream.
Params: in –  the InputStream from which to read the compressed data
dialect – the dialect used by the compressed stream
Throws: IOException – if reading fails/**
     * Constructs a new input stream that decompresses snappy-framed-compressed data
     * from the specified input stream.
     * @param in  the InputStream from which to read the compressed data
     * @param dialect the dialect used by the compressed stream
     * @throws IOException if reading fails
     */
    public FramedSnappyCompressorInputStream(final InputStream in,
                                             final FramedSnappyDialect dialect)
        throws IOException {
        this(in, SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE, dialect);
    }

    Constructs a new input stream that decompresses snappy-framed-compressed data
from the specified input stream.
Params: in –  the InputStream from which to read the compressed data
blockSize – the block size to use for the compressed stream
dialect – the dialect used by the compressed stream
Throws: IOException – if reading fails
Since: 1.14/**
     * Constructs a new input stream that decompresses snappy-framed-compressed data
     * from the specified input stream.
     * @param in  the InputStream from which to read the compressed data
     * @param blockSize the block size to use for the compressed stream
     * @param dialect the dialect used by the compressed stream
     * @throws IOException if reading fails
     * @since 1.14
     */
    public FramedSnappyCompressorInputStream(final InputStream in,
                                             final int blockSize,
                                             final FramedSnappyDialect dialect)
        throws IOException {
        countingStream = new CountingInputStream(in);
        this.in = new PushbackInputStream(countingStream, 1);
        this.blockSize = blockSize;
        this.dialect = dialect;
        if (dialect.hasStreamIdentifier()) {
            readStreamIdentifier();
        }
    }

    {@inheritDoc} /** {@inheritDoc} */
    @Override
    public int read() throws IOException {
        return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
    }

    {@inheritDoc} /** {@inheritDoc} */
    @Override
    public void close() throws IOException {
        try {
            if (currentCompressedChunk != null) {
                currentCompressedChunk.close();
                currentCompressedChunk = null;
            }
        } finally {
            in.close();
        }
    }

    {@inheritDoc} /** {@inheritDoc} */
    @Override
    public int read(final byte[] b, final int off, final int len) throws IOException {
        int read = readOnce(b, off, len);
        if (read == -1) {
            readNextBlock();
            if (endReached) {
                return -1;
            }
            read = readOnce(b, off, len);
        }
        return read;
    }

    {@inheritDoc} /** {@inheritDoc} */
    @Override
    public int available() throws IOException {
        if (inUncompressedChunk) {
            return Math.min(uncompressedBytesRemaining,
                            in.available());
        } else if (currentCompressedChunk != null) {
            return currentCompressedChunk.available();
        }
        return 0;
    }

    Since: 1.17/**
     * @since 1.17
     */
    @Override
    public long getCompressedCount() {
        return countingStream.getBytesRead() - unreadBytes;
    }

    Read from the current chunk into the given array.
Returns: -1 if there is no current chunk or the number of bytes
read from the current chunk (which may be -1 if the end of the
chunk is reached)./**
     * Read from the current chunk into the given array.
     *
     * @return -1 if there is no current chunk or the number of bytes
     * read from the current chunk (which may be -1 if the end of the
     * chunk is reached).
     */
    private int readOnce(final byte[] b, final int off, final int len) throws IOException {
        int read = -1;
        if (inUncompressedChunk) {
            final int amount = Math.min(uncompressedBytesRemaining, len);
            if (amount == 0) {
                return -1;
            }
            read = in.read(b, off, amount);
            if (read != -1) {
                uncompressedBytesRemaining -= read;
                count(read);
            }
        } else if (currentCompressedChunk != null) {
            final long before = currentCompressedChunk.getBytesRead();
            read = currentCompressedChunk.read(b, off, len);
            if (read == -1) {
                currentCompressedChunk.close();
                currentCompressedChunk = null;
            } else {
                count(currentCompressedChunk.getBytesRead() - before);
            }
        }
        if (read > 0) {
            checksum.update(b, off, read);
        }
        return read;
    }

    private void readNextBlock() throws IOException {
        verifyLastChecksumAndReset();
        inUncompressedChunk = false;
        final int type = readOneByte();
        if (type == -1) {
            endReached = true;
        } else if (type == STREAM_IDENTIFIER_TYPE) {
            in.unread(type);
            unreadBytes++;
            pushedBackBytes(1);
            readStreamIdentifier();
            readNextBlock();
        } else if (type == PADDING_CHUNK_TYPE
                   || (type > MAX_UNSKIPPABLE_TYPE && type <= MAX_SKIPPABLE_TYPE)) {
            skipBlock();
            readNextBlock();
        } else if (type >= MIN_UNSKIPPABLE_TYPE && type <= MAX_UNSKIPPABLE_TYPE) {
            throw new IOException("unskippable chunk with type " + type
                                  + " (hex " + Integer.toHexString(type) + ")"
                                  + " detected.");
        } else if (type == UNCOMPRESSED_CHUNK_TYPE) {
            inUncompressedChunk = true;
            uncompressedBytesRemaining = readSize() - 4 /* CRC */;
            expectedChecksum = unmask(readCrc());
        } else if (type == COMPRESSED_CHUNK_TYPE) {
            final boolean expectChecksum = dialect.usesChecksumWithCompressedChunks();
            final long size = readSize() - (expectChecksum ? 4L : 0L);
            if (expectChecksum) {
                expectedChecksum = unmask(readCrc());
            } else {
                expectedChecksum = -1;
            }
            currentCompressedChunk =
                new SnappyCompressorInputStream(new BoundedInputStream(in, size), blockSize);
            // constructor reads uncompressed size
            count(currentCompressedChunk.getBytesRead());
        } else {
            // impossible as all potential byte values have been covered
            throw new IOException("unknown chunk type " + type
                                  + " detected.");
        }
    }

    private long readCrc() throws IOException {
        final byte[] b = new byte[4];
        final int read = IOUtils.readFully(in, b);
        count(read);
        if (read != 4) {
            throw new IOException("premature end of stream");
        }
        return ByteUtils.fromLittleEndian(b);
    }

    static long unmask(long x) {
        // ugly, maybe we should just have used ints and deal with the
        // overflow
        x -= MASK_OFFSET;
        x &= 0xffffFFFFL;
        return ((x >> 17) | (x << 15)) & 0xffffFFFFL;
    }

    private int readSize() throws IOException {
        return (int) ByteUtils.fromLittleEndian(supplier, 3);
    }

    private void skipBlock() throws IOException {
        final int size = readSize();
        final long read = IOUtils.skip(in, size);
        count(read);
        if (read != size) {
            throw new IOException("premature end of stream");
        }
    }

    private void readStreamIdentifier() throws IOException {
        final byte[] b = new byte[10];
        final int read = IOUtils.readFully(in, b);
        count(read);
        if (10 != read || !matches(b, 10)) {
            throw new IOException("Not a framed Snappy stream");
        }
    }

    private int readOneByte() throws IOException {
        final int b = in.read();
        if (b != -1) {
            count(1);
            return b & 0xFF;
        }
        return -1;
    }

    private void verifyLastChecksumAndReset() throws IOException {
        if (expectedChecksum >= 0 && expectedChecksum != checksum.getValue()) {
            throw new IOException("Checksum verification failed");
        }
        expectedChecksum = -1;
        checksum.reset();
    }

    Checks if the signature matches what is expected for a .sz file.
.sz files start with a chunk with tag 0xff and content sNaPpY.
Params: signature – the bytes to check
length –    the number of bytes to check
Returns:          true if this is a .sz stream, false otherwise/**
     * Checks if the signature matches what is expected for a .sz file.
     *
     * <p>.sz files start with a chunk with tag 0xff and content sNaPpY.</p>
     *
     * @param signature the bytes to check
     * @param length    the number of bytes to check
     * @return          true if this is a .sz stream, false otherwise
     */
    public static boolean matches(final byte[] signature, final int length) {

        if (length < SZ_SIGNATURE.length) {
            return false;
        }

        byte[] shortenedSig = signature;
        if (signature.length > SZ_SIGNATURE.length) {
            shortenedSig = new byte[SZ_SIGNATURE.length];
            System.arraycopy(signature, 0, shortenedSig, 0, SZ_SIGNATURE.length);
        }

        return Arrays.equals(shortenedSig, SZ_SIGNATURE);
    }

}
Params:	in – the InputStream from which to read the compressed data
Throws:	IOException – if reading fails
Params:	signature – the bytes to check length – the number of bytes to check
Returns:	true if this is a .sz stream, false otherwise
/

org.apache.commons/ commons-compress/ 1.18/ org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java