/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.compressing;


import java.io.IOException;
import java.util.zip.DataFormatException;
import java.util.zip.Deflater;
import java.util.zip.Inflater;

import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.compress.LZ4;

A compression mode. Tells how much effort should be spent on compression and decompression of stored fields.
@lucene.experimental
/** * A compression mode. Tells how much effort should be spent on compression and * decompression of stored fields. * @lucene.experimental */
public abstract class CompressionMode {
A compression mode that trades compression ratio for speed. Although the compression ratio might remain high, compression and decompression are very fast. Use this mode with indices that have a high update rate but should be able to load documents from disk quickly.
/** * A compression mode that trades compression ratio for speed. Although the * compression ratio might remain high, compression and decompression are * very fast. Use this mode with indices that have a high update rate but * should be able to load documents from disk quickly. */
public static final CompressionMode FAST = new CompressionMode() { @Override public Compressor newCompressor() { return new LZ4FastCompressor(); } @Override public Decompressor newDecompressor() { return LZ4_DECOMPRESSOR; } @Override public String toString() { return "FAST"; } };
A compression mode that trades speed for compression ratio. Although compression and decompression might be slow, this compression mode should provide a good compression ratio. This mode might be interesting if/when your index size is much bigger than your OS cache.
/** * A compression mode that trades speed for compression ratio. Although * compression and decompression might be slow, this compression mode should * provide a good compression ratio. This mode might be interesting if/when * your index size is much bigger than your OS cache. */
public static final CompressionMode HIGH_COMPRESSION = new CompressionMode() { @Override public Compressor newCompressor() { // notes: // 3 is the highest level that doesn't have lazy match evaluation // 6 is the default, higher than that is just a waste of cpu return new DeflateCompressor(6); } @Override public Decompressor newDecompressor() { return new DeflateDecompressor(); } @Override public String toString() { return "HIGH_COMPRESSION"; } };
This compression mode is similar to FAST but it spends more time compressing in order to improve the compression ratio. This compression mode is best used with indices that have a low update rate but should be able to load documents from disk quickly.
/** * This compression mode is similar to {@link #FAST} but it spends more time * compressing in order to improve the compression ratio. This compression * mode is best used with indices that have a low update rate but should be * able to load documents from disk quickly. */
public static final CompressionMode FAST_DECOMPRESSION = new CompressionMode() { @Override public Compressor newCompressor() { return new LZ4HighCompressor(); } @Override public Decompressor newDecompressor() { return LZ4_DECOMPRESSOR; } @Override public String toString() { return "FAST_DECOMPRESSION"; } };
Sole constructor.
/** Sole constructor. */
protected CompressionMode() {}
Create a new Compressor instance.
/** * Create a new {@link Compressor} instance. */
public abstract Compressor newCompressor();
Create a new Decompressor instance.
/** * Create a new {@link Decompressor} instance. */
public abstract Decompressor newDecompressor(); private static final Decompressor LZ4_DECOMPRESSOR = new Decompressor() { @Override public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException { assert offset + length <= originalLength; // add 7 padding bytes, this is not necessary but can help decompression run faster if (bytes.bytes.length < originalLength + 7) { bytes.bytes = new byte[ArrayUtil.oversize(originalLength + 7, 1)]; } final int decompressedLength = LZ4.decompress(in, offset + length, bytes.bytes, 0); if (decompressedLength > originalLength) { throw new CorruptIndexException("Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength, in); } bytes.offset = offset; bytes.length = length; } @Override public Decompressor clone() { return this; } }; private static final class LZ4FastCompressor extends Compressor { private final LZ4.FastCompressionHashTable ht; LZ4FastCompressor() { ht = new LZ4.FastCompressionHashTable(); } @Override public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { LZ4.compress(bytes, off, len, out, ht); } @Override public void close() throws IOException { // no-op } } private static final class LZ4HighCompressor extends Compressor { private final LZ4.HighCompressionHashTable ht; LZ4HighCompressor() { ht = new LZ4.HighCompressionHashTable(); } @Override public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { LZ4.compress(bytes, off, len, out, ht); } @Override public void close() throws IOException { // no-op } } private static final class DeflateDecompressor extends Decompressor { byte[] compressed; DeflateDecompressor() { compressed = new byte[0]; } @Override public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException { assert offset + length <= originalLength; if (length == 0) { bytes.length = 0; return; } final int compressedLength = in.readVInt(); // pad with extra "dummy byte": see javadocs for using Inflater(true) // we do it for compliance, but it's unnecessary for years in zlib. final int paddedLength = compressedLength + 1; compressed = ArrayUtil.grow(compressed, paddedLength); in.readBytes(compressed, 0, compressedLength); compressed[compressedLength] = 0; // explicitly set dummy byte to 0 final Inflater decompressor = new Inflater(true); try { // extra "dummy byte" decompressor.setInput(compressed, 0, paddedLength); bytes.offset = bytes.length = 0; bytes.bytes = ArrayUtil.grow(bytes.bytes, originalLength); try { bytes.length = decompressor.inflate(bytes.bytes, bytes.length, originalLength); } catch (DataFormatException e) { throw new IOException(e); } if (!decompressor.finished()) { throw new CorruptIndexException("Invalid decoder state: needsInput=" + decompressor.needsInput() + ", needsDict=" + decompressor.needsDictionary(), in); } } finally { decompressor.end(); } if (bytes.length != originalLength) { throw new CorruptIndexException("Lengths mismatch: " + bytes.length + " != " + originalLength, in); } bytes.offset = offset; bytes.length = length; } @Override public Decompressor clone() { return new DeflateDecompressor(); } } private static class DeflateCompressor extends Compressor { final Deflater compressor; byte[] compressed; boolean closed; DeflateCompressor(int level) { compressor = new Deflater(level, true); compressed = new byte[64]; } @Override public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { compressor.reset(); compressor.setInput(bytes, off, len); compressor.finish(); if (compressor.needsInput()) { // no output assert len == 0 : len; out.writeVInt(0); return; } int totalCount = 0; for (;;) { final int count = compressor.deflate(compressed, totalCount, compressed.length - totalCount); totalCount += count; assert totalCount <= compressed.length; if (compressor.finished()) { break; } else { compressed = ArrayUtil.grow(compressed); } } out.writeVInt(totalCount); out.writeBytes(compressed, totalCount); } @Override public void close() throws IOException { if (closed == false) { compressor.end(); closed = true; } } } }