/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.codec.net;

import java.nio.ByteBuffer;
import java.util.BitSet;
import org.apache.commons.codec.BinaryDecoder;
import org.apache.commons.codec.BinaryEncoder;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;

Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification. For extensibility, an array of special US-ASCII characters can be specified in order to perform proper URI encoding for the different parts of the URI.

This class is immutable. It is also thread-safe besides using BitSet which is not thread-safe, but its public interface only call the access

See Also:
Since:1.12
/** * Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification. For extensibility, an array of * special US-ASCII characters can be specified in order to perform proper URI encoding for the different parts * of the URI. * <p> * This class is immutable. It is also thread-safe besides using BitSet which is not thread-safe, but its public * interface only call the access * </p> * * @see <a href="https://tools.ietf.org/html/rfc3986#section-2.1">Percent-Encoding</a> * @since 1.12 */
public class PercentCodec implements BinaryEncoder, BinaryDecoder {
The escape character used by the Percent-Encoding in order to introduce an encoded character.
/** * The escape character used by the Percent-Encoding in order to introduce an encoded character. */
private final byte ESCAPE_CHAR = '%';
The bit set used to store the character that should be always encoded
/** * The bit set used to store the character that should be always encoded */
private final BitSet alwaysEncodeChars = new BitSet();
The flag defining if the space character should be encoded as '+'
/** * The flag defining if the space character should be encoded as '+' */
private final boolean plusForSpace;
The minimum and maximum code of the bytes that is inserted in the bit set, used to prevent look-ups
/** * The minimum and maximum code of the bytes that is inserted in the bit set, used to prevent look-ups */
private int alwaysEncodeCharsMin = Integer.MAX_VALUE, alwaysEncodeCharsMax = Integer.MIN_VALUE;
Constructs a Percent coded that will encode all the non US-ASCII characters using the Percent-Encoding while it will not encode all the US-ASCII characters, except for character '%' that is used as escape character for Percent-Encoding.
/** * Constructs a Percent coded that will encode all the non US-ASCII characters using the Percent-Encoding * while it will not encode all the US-ASCII characters, except for character '%' that is used as escape * character for Percent-Encoding. */
public PercentCodec() { this.plusForSpace = false; insertAlwaysEncodeChar(ESCAPE_CHAR); }
Constructs a Percent codec by specifying the characters that belong to US-ASCII that should always be encoded. The rest US-ASCII characters will not be encoded, except for character '%' that is used as escape character for Percent-Encoding.
Params:
  • alwaysEncodeChars – the unsafe characters that should always be encoded
  • plusForSpace – the flag defining if the space character should be encoded as '+'
/** * Constructs a Percent codec by specifying the characters that belong to US-ASCII that should * always be encoded. The rest US-ASCII characters will not be encoded, except for character '%' that * is used as escape character for Percent-Encoding. * * @param alwaysEncodeChars the unsafe characters that should always be encoded * @param plusForSpace the flag defining if the space character should be encoded as '+' */
public PercentCodec(final byte[] alwaysEncodeChars, final boolean plusForSpace) { this.plusForSpace = plusForSpace; insertAlwaysEncodeChars(alwaysEncodeChars); }
Adds the byte array into a BitSet for faster lookup
Params:
  • alwaysEncodeCharsArray –
/** * Adds the byte array into a BitSet for faster lookup * * @param alwaysEncodeCharsArray */
private void insertAlwaysEncodeChars(final byte[] alwaysEncodeCharsArray) { if (alwaysEncodeCharsArray != null) { for (byte b : alwaysEncodeCharsArray) { insertAlwaysEncodeChar(b); } } insertAlwaysEncodeChar(ESCAPE_CHAR); }
Inserts a single character into a BitSet and maintains the min and max of the characters of the BitSet alwaysEncodeChars in order to avoid look-ups when a byte is out of this range.
Params:
  • b – the byte that is candidate for min and max limit
/** * Inserts a single character into a BitSet and maintains the min and max of the characters of the * {@code BitSet alwaysEncodeChars} in order to avoid look-ups when a byte is out of this range. * * @param b the byte that is candidate for min and max limit */
private void insertAlwaysEncodeChar(final byte b) { this.alwaysEncodeChars.set(b); if (b < alwaysEncodeCharsMin) { alwaysEncodeCharsMin = b; } if (b > alwaysEncodeCharsMax) { alwaysEncodeCharsMax = b; } }
Percent-Encoding based on RFC 3986. The non US-ASCII characters are encoded, as well as the US-ASCII characters that are configured to be always encoded.
/** * Percent-Encoding based on RFC 3986. The non US-ASCII characters are encoded, as well as the * US-ASCII characters that are configured to be always encoded. */
@Override public byte[] encode(final byte[] bytes) throws EncoderException { if (bytes == null) { return null; } int expectedEncodingBytes = expectedEncodingBytes(bytes); boolean willEncode = expectedEncodingBytes != bytes.length; if (willEncode || (plusForSpace && containsSpace(bytes))) { return doEncode(bytes, expectedEncodingBytes, willEncode); } return bytes; } private byte[] doEncode(final byte[] bytes, int expectedLength, boolean willEncode) { final ByteBuffer buffer = ByteBuffer.allocate(expectedLength); for (final byte b : bytes) { if (willEncode && canEncode(b)) { byte bb = b; if (bb < 0) { bb = (byte) (256 + bb); } final char hex1 = Utils.hexDigit(bb >> 4); final char hex2 = Utils.hexDigit(bb); buffer.put(ESCAPE_CHAR); buffer.put((byte) hex1); buffer.put((byte) hex2); } else { if (plusForSpace && b == ' ') { buffer.put((byte) '+'); } else { buffer.put(b); } } } return buffer.array(); } private int expectedEncodingBytes(final byte[] bytes) { int byteCount = 0; for (final byte b : bytes) { byteCount += canEncode(b) ? 3: 1; } return byteCount; } private boolean containsSpace(final byte[] bytes) { for (final byte b : bytes) { if (b == ' ') { return true; } } return false; } private boolean canEncode(final byte c) { return !isAsciiChar(c) || (inAlwaysEncodeCharsRange(c) && alwaysEncodeChars.get(c)); } private boolean inAlwaysEncodeCharsRange(final byte c) { return c >= alwaysEncodeCharsMin && c <= alwaysEncodeCharsMax; } private boolean isAsciiChar(final byte c) { return c >= 0; }
Decode bytes encoded with Percent-Encoding based on RFC 3986. The reverse process is performed in order to decode the encoded characters to Unicode.
/** * Decode bytes encoded with Percent-Encoding based on RFC 3986. The reverse process is performed in order to * decode the encoded characters to Unicode. */
@Override public byte[] decode(final byte[] bytes) throws DecoderException { if (bytes == null) { return null; } final ByteBuffer buffer = ByteBuffer.allocate(expectedDecodingBytes(bytes)); for (int i = 0; i < bytes.length; i++) { final byte b = bytes[i]; if (b == ESCAPE_CHAR) { try { final int u = Utils.digit16(bytes[++i]); final int l = Utils.digit16(bytes[++i]); buffer.put((byte) ((u << 4) + l)); } catch (final ArrayIndexOutOfBoundsException e) { throw new DecoderException("Invalid percent decoding: ", e); } } else { if (plusForSpace && b == '+') { buffer.put((byte) ' '); } else { buffer.put(b); } } } return buffer.array(); } private int expectedDecodingBytes(final byte[] bytes) { int byteCount = 0; for (int i = 0; i < bytes.length; ) { byte b = bytes[i]; i += b == ESCAPE_CHAR ? 3: 1; byteCount++; } return byteCount; }
Encodes an object into using the Percent-Encoding. Only byte[] objects are accepted.
Params:
  • obj – the object to encode
Throws:
Returns:the encoding result byte[] as Object
/** * Encodes an object into using the Percent-Encoding. Only byte[] objects are accepted. * * @param obj the object to encode * @return the encoding result byte[] as Object * @throws EncoderException if the object is not a byte array */
@Override public Object encode(final Object obj) throws EncoderException { if (obj == null) { return null; } else if (obj instanceof byte[]) { return encode((byte[]) obj); } else { throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent encoded"); } }
Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding.
Params:
  • obj – the object to decode
Throws:
Returns:the decoding result byte[] as Object
/** * Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding. * * @param obj the object to decode * @return the decoding result byte[] as Object * @throws DecoderException if the object is not a byte array */
@Override public Object decode(final Object obj) throws DecoderException { if (obj == null) { return null; } else if (obj instanceof byte[]) { return decode((byte[]) obj); } else { throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent decoded"); } } }