/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.vfs2.util;

import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.util.BitSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.vfs2.provider.GenericURLFileName;

The URI escape and character encoding and decoding utility.

This was forked from some needed methods such as #encodePath(...) in org.apache.commons.httpclient.util.URIUtil, in order to not be dependent on HttpClient v3 API, when generating and handling GenericURLFileNames, but it should work with any different HTTP backend provider implementations.

/** * The URI escape and character encoding and decoding utility. * <p> * This was forked from some needed methods such as <code>#encodePath(...)</code> in <code>org.apache.commons.httpclient.util.URIUtil</code>, * in order to not be dependent on HttpClient v3 API, when generating and handling {@link GenericURLFileName}s, * but it should work with any different HTTP backend provider implementations. * </p> */
public class URIUtils { private static final Log LOG = LogFactory.getLog(URIUtils.class);
The default charset of the protocol. RFC 2277, 2396
/** * The default charset of the protocol. RFC 2277, 2396 */
private static final String DEFAULT_PROTOCOL_CHARSET = "UTF-8"; private URIUtils() { }
Escape and encode a string regarded as the path component of an URI with the default protocol charset.
Params:
  • unescaped – an unescaped string
Throws:
Returns:the escaped string
/** * Escape and encode a string regarded as the path component of an URI with * the default protocol charset. * * @param unescaped an unescaped string * @return the escaped string * * @throws URISyntaxException if the default protocol charset is not supported */
public static String encodePath(final String unescaped) throws URISyntaxException { return encodePath(unescaped, DEFAULT_PROTOCOL_CHARSET); }
Escape and encode a string regarded as the path component of an URI with a given charset.
Params:
  • unescaped – an unescaped string
  • charset – the charset
Throws:
Returns:the escaped string
/** * Escape and encode a string regarded as the path component of an URI with * a given charset. * * @param unescaped an unescaped string * @param charset the charset * @return the escaped string * * @throws URISyntaxException if the charset is not supported */
public static String encodePath(final String unescaped, final String charset) throws URISyntaxException { if (unescaped == null) { throw new IllegalArgumentException("The string to encode may not be null."); } return encode(unescaped, URIBitSets.allowed_abs_path, charset); } private static String encode(final String unescaped, final BitSet allowed, final String charset) throws URISyntaxException { final byte[] rawdata = URLCodecUtils.encodeUrl(allowed, EncodingUtils.getBytes(unescaped, charset)); return EncodingUtils.getAsciiString(rawdata, 0, rawdata.length); }
Internal URL codec utilities.

This was forked from some needed methods such as #encodeUrl(...) and #hexDigit(int) in org.apache.commons.codec.net.URLCodec, as commons-codec library cannot be pulled in transitively via Http Client v3 library any more.

/** * Internal URL codec utilities. * <p> * This was forked from some needed methods such as <code>#encodeUrl(...)</code> and <code>#hexDigit(int)</code> * in <code>org.apache.commons.codec.net.URLCodec</code>, as commons-codec library cannot be pulled in transitively * via Http Client v3 library any more. * </p> */
private static class URLCodecUtils { private static final byte ESCAPE_CHAR = '%'; private static final BitSet WWW_FORM_URL_SAFE = new BitSet(256); // Static initializer for www_form_url static { // alpha characters for (int i = 'a'; i <= 'z'; i++) { WWW_FORM_URL_SAFE.set(i); } for (int i = 'A'; i <= 'Z'; i++) { WWW_FORM_URL_SAFE.set(i); } // numeric characters for (int i = '0'; i <= '9'; i++) { WWW_FORM_URL_SAFE.set(i); } // special chars WWW_FORM_URL_SAFE.set('-'); WWW_FORM_URL_SAFE.set('_'); WWW_FORM_URL_SAFE.set('.'); WWW_FORM_URL_SAFE.set('*'); // blank to be replaced with + WWW_FORM_URL_SAFE.set(' '); }
Radix used in encoding and decoding.
/** * Radix used in encoding and decoding. */
private static final int RADIX = 16; private URLCodecUtils() { } static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) { if (bytes == null) { return null; } if (urlsafe == null) { urlsafe = WWW_FORM_URL_SAFE; } final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); for (final byte c : bytes) { int b = c; if (b < 0) { b = 256 + b; } if (urlsafe.get(b)) { if (b == ' ') { b = '+'; } buffer.write(b); } else { buffer.write(ESCAPE_CHAR); final char hex1 = hexDigit(b >> 4); final char hex2 = hexDigit(b); buffer.write(hex1); buffer.write(hex2); } } return buffer.toByteArray(); } private static char hexDigit(final int b) { return Character.toUpperCase(Character.forDigit(b & 0xF, RADIX)); } }
Internal character encoding utilities.

This was forked from some needed methods such as #getBytes(...) and #getAsciiString(...) in org.apache.commons.httpclient.util.EncodingUtil, in order to not be dependent on HttpClient v3 API, when generating and handling GenericURLFileNames, but it should work with any different HTTP backend provider implementations.

/** * Internal character encoding utilities. * <p> * This was forked from some needed methods such as <code>#getBytes(...)</code> and <code>#getAsciiString(...)</code> * in <code>org.apache.commons.httpclient.util.EncodingUtil</code>, * in order to not be dependent on HttpClient v3 API, when generating and handling {@link GenericURLFileName}s, * but it should work with any different HTTP backend provider implementations. * </p> */
private static class EncodingUtils { private EncodingUtils() { }
Converts the specified string to a byte array. If the charset is not supported the default system charset is used.
Params:
  • data – the string to be encoded
  • charset – the desired character encoding
Returns:The resulting byte array.
/** * Converts the specified string to a byte array. If the charset is not supported the * default system charset is used. * * @param data the string to be encoded * @param charset the desired character encoding * @return The resulting byte array. */
static byte[] getBytes(final String data, final String charset) { if (data == null) { throw new IllegalArgumentException("data may not be null"); } if (charset == null || charset.length() == 0) { throw new IllegalArgumentException("charset may not be null or empty"); } try { return data.getBytes(charset); } catch (final UnsupportedEncodingException e) { if (LOG.isWarnEnabled()) { LOG.warn("Unsupported encoding: " + charset + ". System encoding used."); } return data.getBytes(); } }
Converts the byte array of ASCII characters to a string. This method is to be used when decoding content of HTTP elements (such as response headers)
Params:
  • data – the byte array to be encoded
  • offset – the index of the first byte to encode
  • length – the number of bytes to encode
Returns:The string representation of the byte array
/** * Converts the byte array of ASCII characters to a string. This method is * to be used when decoding content of HTTP elements (such as response * headers) * * @param data the byte array to be encoded * @param offset the index of the first byte to encode * @param length the number of bytes to encode * @return The string representation of the byte array */
static String getAsciiString(final byte[] data, final int offset, final int length) { try { return new String(data, offset, length, "US-ASCII"); } catch (final UnsupportedEncodingException e) { throw new RuntimeException("US-ASCII charset is not supported."); } } } }