/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.tools.zip;

import java.io.IOException;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

This ZipEncoding implementation implements a simple 8 bit character set, which meets the following restrictions:
  • Characters 0x0000 to 0x007f are encoded as the corresponding byte values 0x00 to 0x7f.
  • All byte codes from 0x80 to 0xff are mapped to a unique Unicode character in the range 0x0080 to 0x7fff. (No support for UTF-16 surrogates)

These restrictions most notably apply to the most prominent omissions of Java 1.4 Charset implementation, Cp437 and Cp850.

The methods of this class are reentrant.

/** * This ZipEncoding implementation implements a simple 8 bit character * set, which meets the following restrictions: * * <ul> * <li>Characters 0x0000 to 0x007f are encoded as the corresponding * byte values 0x00 to 0x7f.</li> * <li>All byte codes from 0x80 to 0xff are mapped to a unique Unicode * character in the range 0x0080 to 0x7fff. (No support for * UTF-16 surrogates) * </ul> * * <p>These restrictions most notably apply to the most prominent * omissions of Java 1.4 {@link java.nio.charset.Charset Charset} * implementation, Cp437 and Cp850.</p> * * <p>The methods of this class are reentrant.</p> */
class Simple8BitZipEncoding implements ZipEncoding {
A character entity, which is put to the reverse mapping table of a simple encoding.
/** * A character entity, which is put to the reverse mapping table * of a simple encoding. */
private static final class Simple8BitChar implements Comparable<Simple8BitChar> { public final char unicode; public final byte code; Simple8BitChar(final byte code, final char unicode) { this.code = code; this.unicode = unicode; } public int compareTo(final Simple8BitChar a) { return this.unicode - a.unicode; } @Override public String toString() { return "0x" + Integer.toHexString(0xffff & unicode) + "->0x" + Integer.toHexString(0xff & code); } @Override public boolean equals(final Object o) { if (o instanceof Simple8BitChar) { final Simple8BitChar other = (Simple8BitChar) o; return unicode == other.unicode && code == other.code; } return false; } @Override public int hashCode() { return unicode; } }
The characters for byte values of 128 to 255 stored as an array of 128 chars.
/** * The characters for byte values of 128 to 255 stored as an array of * 128 chars. */
private final char[] highChars;
A list of Simple8BitChar objects sorted by the unicode field. This list is used to binary search reverse mapping of unicode characters with a character code greater than 127.
/** * A list of {@link Simple8BitChar} objects sorted by the unicode * field. This list is used to binary search reverse mapping of * unicode characters with a character code greater than 127. */
private final List<Simple8BitChar> reverseMapping;
Params:
  • highChars – The characters for byte values of 128 to 255 stored as an array of 128 chars.
/** * @param highChars The characters for byte values of 128 to 255 * stored as an array of 128 chars. */
public Simple8BitZipEncoding(final char[] highChars) { this.highChars = highChars.clone(); final List<Simple8BitChar> temp = new ArrayList<>(this.highChars.length); byte code = 127; for (char highChar : this.highChars) { temp.add(new Simple8BitChar(++code, highChar)); } Collections.sort(temp); this.reverseMapping = Collections.unmodifiableList(temp); }
Return the character code for a given encoded byte.
Params:
  • b – The byte to decode.
Returns:The associated character value.
/** * Return the character code for a given encoded byte. * * @param b The byte to decode. * @return The associated character value. */
public char decodeByte(final byte b) { // code 0-127 if (b >= 0) { return (char) b; } // byte is signed, so 128 == -128 and 255 == -1 return this.highChars[128 + b]; }
Params:
  • c – The character to encode.
Returns:Whether the given unicode character is covered by this encoding.
/** * @param c The character to encode. * @return Whether the given unicode character is covered by this encoding. */
public boolean canEncodeChar(final char c) { if (c >= 0 && c < 128) { return true; } final Simple8BitChar r = this.encodeHighChar(c); return r != null; }
Pushes the encoded form of the given character to the given byte buffer.
Params:
  • bb – The byte buffer to write to.
  • c – The character to encode.
Returns:Whether the given unicode character is covered by this encoding. If false is returned, nothing is pushed to the byte buffer.
/** * Pushes the encoded form of the given character to the given byte buffer. * * @param bb The byte buffer to write to. * @param c The character to encode. * @return Whether the given unicode character is covered by this encoding. * If {@code false} is returned, nothing is pushed to the * byte buffer. */
public boolean pushEncodedChar(final ByteBuffer bb, final char c) { if (c >= 0 && c < 128) { bb.put((byte) c); return true; } final Simple8BitChar r = this.encodeHighChar(c); if (r == null) { return false; } bb.put(r.code); return true; }
Params:
  • c – A unicode character in the range from 0x0080 to 0x7f00
Returns:A Simple8BitChar, if this character is covered by this encoding. A null value is returned, if this character is not covered by this encoding.
/** * @param c A unicode character in the range from 0x0080 to 0x7f00 * @return A Simple8BitChar, if this character is covered by this encoding. * A {@code null} value is returned, if this character is not * covered by this encoding. */
private Simple8BitChar encodeHighChar(final char c) { // for performance an simplicity, yet another reincarnation of // binary search... int i0 = 0; int i1 = this.reverseMapping.size(); while (i1 > i0) { final int i = i0 + (i1 - i0) / 2; final Simple8BitChar m = this.reverseMapping.get(i); if (m.unicode == c) { return m; } if (m.unicode < c) { i0 = i + 1; } else { i1 = i; } } if (i0 >= this.reverseMapping.size()) { return null; } final Simple8BitChar r = this.reverseMapping.get(i0); if (r.unicode != c) { return null; } return r; }
See Also:
  • canEncode.canEncode(String)
/** * @see org.apache.tools.zip.ZipEncoding#canEncode(java.lang.String) */
public boolean canEncode(final String name) { for (int i = 0; i < name.length(); ++i) { final char c = name.charAt(i); if (!this.canEncodeChar(c)) { return false; } } return true; }
See Also:
  • encode.encode(String)
/** * @see org.apache.tools.zip.ZipEncoding#encode(java.lang.String) */
public ByteBuffer encode(final String name) { ByteBuffer out = ByteBuffer.allocate(name.length() + 6 + (name.length() + 1) / 2); for (int i = 0; i < name.length(); ++i) { final char c = name.charAt(i); if (out.remaining() < 6) { out = ZipEncodingHelper.growBuffer(out, out.position() + 6); } if (!this.pushEncodedChar(out, c)) { ZipEncodingHelper.appendSurrogate(out, c); } } ZipEncodingHelper.prepareBufferForRead(out); return out; }
See Also:
  • decode.decode(byte[])
/** * @see org.apache.tools.zip.ZipEncoding#decode(byte[]) */
public String decode(final byte[] data) throws IOException { final char[] ret = new char[data.length]; for (int i = 0; i < data.length; ++i) { ret[i] = this.decodeByte(data[i]); } return new String(ret); } }