/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.commons.compress.archivers.zip;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;

A ZipEncoding, which uses a java.nio Charset to encode names.

The methods of this class are reentrant.

@Immutable
/** * A ZipEncoding, which uses a java.nio {@link * java.nio.charset.Charset Charset} to encode names. * <p>The methods of this class are reentrant.</p> * @Immutable */
class NioZipEncoding implements ZipEncoding, CharsetAccessor { private final Charset charset; private final boolean useReplacement; private static final char REPLACEMENT = '?'; private static final byte[] REPLACEMENT_BYTES = { (byte) REPLACEMENT }; private static final String REPLACEMENT_STRING = String.valueOf(REPLACEMENT); private static final char[] HEX_CHARS = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
Construct an NioZipEncoding using the given charset.
Params:
  • charset – The character set to use.
  • useReplacement – should invalid characters be replaced, or reported.
/** * Construct an NioZipEncoding using the given charset. * @param charset The character set to use. * @param useReplacement should invalid characters be replaced, or reported. */
NioZipEncoding(final Charset charset, boolean useReplacement) { this.charset = charset; this.useReplacement = useReplacement; } @Override public Charset getCharset() { return charset; }
See Also:
  • canEncode.canEncode(String)
/** * @see ZipEncoding#canEncode(java.lang.String) */
@Override public boolean canEncode(final String name) { final CharsetEncoder enc = newEncoder(); return enc.canEncode(name); }
See Also:
  • encode.encode(String)
/** * @see ZipEncoding#encode(java.lang.String) */
@Override public ByteBuffer encode(final String name) { final CharsetEncoder enc = newEncoder(); final CharBuffer cb = CharBuffer.wrap(name); CharBuffer tmp = null; ByteBuffer out = ByteBuffer.allocate(estimateInitialBufferSize(enc, cb.remaining())); while (cb.remaining() > 0) { final CoderResult res = enc.encode(cb, out, false); if (res.isUnmappable() || res.isMalformed()) { // write the unmappable characters in utf-16 // pseudo-URL encoding style to ByteBuffer. int spaceForSurrogate = estimateIncrementalEncodingSize(enc, 6 * res.length()); if (spaceForSurrogate > out.remaining()) { // if the destination buffer isn't over sized, assume that the presence of one // unmappable character makes it likely that there will be more. Find all the // un-encoded characters and allocate space based on those estimates. int charCount = 0; for (int i = cb.position() ; i < cb.limit(); i++) { charCount += !enc.canEncode(cb.get(i)) ? 6 : 1; } int totalExtraSpace = estimateIncrementalEncodingSize(enc, charCount); out = ZipEncodingHelper.growBufferBy(out, totalExtraSpace - out.remaining()); } if (tmp == null) { tmp = CharBuffer.allocate(6); } for (int i = 0; i < res.length(); ++i) { out = encodeFully(enc, encodeSurrogate(tmp, cb.get()), out); } } else if (res.isOverflow()) { int increment = estimateIncrementalEncodingSize(enc, cb.remaining()); out = ZipEncodingHelper.growBufferBy(out, increment); } } // tell the encoder we are done enc.encode(cb, out, true); // may have caused underflow, but that's been ignored traditionally out.limit(out.position()); out.rewind(); return out; }
See Also:
  • decode.decode(byte[])
/** * @see * ZipEncoding#decode(byte[]) */
@Override public String decode(final byte[] data) throws IOException { return newDecoder() .decode(ByteBuffer.wrap(data)).toString(); } private static ByteBuffer encodeFully(CharsetEncoder enc, CharBuffer cb, ByteBuffer out) { ByteBuffer o = out; while (cb.hasRemaining()) { CoderResult result = enc.encode(cb, o, false); if (result.isOverflow()) { int increment = estimateIncrementalEncodingSize(enc, cb.remaining()); o = ZipEncodingHelper.growBufferBy(o, increment); } } return o; } private static CharBuffer encodeSurrogate(CharBuffer cb, char c) { cb.position(0).limit(6); cb.put('%'); cb.put('U'); cb.put(HEX_CHARS[(c >> 12) & 0x0f]); cb.put(HEX_CHARS[(c >> 8) & 0x0f]); cb.put(HEX_CHARS[(c >> 4) & 0x0f]); cb.put(HEX_CHARS[c & 0x0f]); cb.flip(); return cb; } private CharsetEncoder newEncoder() { if (useReplacement) { return charset.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE) .replaceWith(REPLACEMENT_BYTES); } else { return charset.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); } } private CharsetDecoder newDecoder() { if (!useReplacement) { return this.charset.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); } else { return charset.newDecoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE) .replaceWith(REPLACEMENT_STRING); } }
Estimate the initial encoded size (in bytes) for a character buffer.

The estimate assumes that one character consumes uses the maximum length encoding, whilst the rest use an average size encoding. This accounts for any BOM for UTF-16, at the expense of a couple of extra bytes for UTF-8 encoded ASCII.

Params:
  • enc – encoder to use for estimates
  • charChount – number of characters in string
Returns:estimated size in bytes.
/** * Estimate the initial encoded size (in bytes) for a character buffer. * <p> * The estimate assumes that one character consumes uses the maximum length encoding, * whilst the rest use an average size encoding. This accounts for any BOM for UTF-16, at * the expense of a couple of extra bytes for UTF-8 encoded ASCII. * </p> * * @param enc encoder to use for estimates * @param charChount number of characters in string * @return estimated size in bytes. */
private static int estimateInitialBufferSize(CharsetEncoder enc, int charChount) { float first = enc.maxBytesPerChar(); float rest = (charChount - 1) * enc.averageBytesPerChar(); return (int) Math.ceil(first + rest); }
Estimate the size needed for remaining characters
Params:
  • enc – encoder to use for estimates
  • charCount – number of characters remaining
Returns:estimated size in bytes.
/** * Estimate the size needed for remaining characters * * @param enc encoder to use for estimates * @param charCount number of characters remaining * @return estimated size in bytes. */
private static int estimateIncrementalEncodingSize(CharsetEncoder enc, int charCount) { return (int) Math.ceil(charCount * enc.averageBytesPerChar()); } }