/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.io.ByteArrayOutputStream;
import java.io.CharConversionException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.apache.tomcat.util.res.StringManager;
All URL decoding happens here. This way we can reuse, review, optimize
without adding complexity to the buffers.
The conversion will modify the original buffer.
@author Costin Manolache
/**
* All URL decoding happens here. This way we can reuse, review, optimize
* without adding complexity to the buffers.
*
* The conversion will modify the original buffer.
*
* @author Costin Manolache
*/
public final class UDecoder {
private static final StringManager sm = StringManager.getManager(UDecoder.class);
public static final boolean ALLOW_ENCODED_SLASH =
Boolean.parseBoolean(System.getProperty("org.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH", "false"));
private static class DecodeException extends CharConversionException {
private static final long serialVersionUID = 1L;
public DecodeException(String s) {
super(s);
}
@Override
public synchronized Throwable fillInStackTrace() {
// This class does not provide a stack trace
return this;
}
}
Unexpected end of data. /** Unexpected end of data. */
private static final IOException EXCEPTION_EOF = new DecodeException(sm.getString("uDecoder.eof"));
%xx with not-hex digit /** %xx with not-hex digit */
private static final IOException EXCEPTION_NOT_HEX_DIGIT = new DecodeException(
"isHexDigit");
%-encoded slash is forbidden in resource path /** %-encoded slash is forbidden in resource path */
private static final IOException EXCEPTION_SLASH = new DecodeException(
"noSlash");
public UDecoder()
{
}
URLDecode, will modify the source.
Params: - mb – The URL encoded bytes
- query –
true
if this is a query string
Throws: - IOException – Invalid %xx URL encoding
/**
* URLDecode, will modify the source.
* @param mb The URL encoded bytes
* @param query <code>true</code> if this is a query string
* @throws IOException Invalid %xx URL encoding
*/
public void convert( ByteChunk mb, boolean query )
throws IOException
{
int start=mb.getOffset();
byte buff[]=mb.getBytes();
int end=mb.getEnd();
int idx= ByteChunk.findByte( buff, start, end, (byte) '%' );
int idx2=-1;
if( query ) {
idx2= ByteChunk.findByte( buff, start, (idx >= 0 ? idx : end), (byte) '+' );
}
if( idx<0 && idx2<0 ) {
return;
}
// idx will be the smallest positive index ( first % or + )
if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) {
idx=idx2;
}
final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
for( int j=idx; j<end; j++, idx++ ) {
if( buff[ j ] == '+' && query) {
buff[idx]= (byte)' ' ;
} else if( buff[ j ] != '%' ) {
buff[idx]= buff[j];
} else {
// read next 2 digits
if( j+2 >= end ) {
throw EXCEPTION_EOF;
}
byte b1= buff[j+1];
byte b2=buff[j+2];
if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) {
throw EXCEPTION_NOT_HEX_DIGIT;
}
j+=2;
int res=x2c( b1, b2 );
if (noSlash && (res == '/')) {
throw EXCEPTION_SLASH;
}
buff[idx]=(byte)res;
}
}
mb.setEnd( idx );
}
// -------------------- Additional methods --------------------
// XXX What do we do about charset ????
In-buffer processing - the buffer will be modified.
Params: - mb – The URL encoded chars
- query –
true
if this is a query string
Throws: - IOException – Invalid %xx URL encoding
/**
* In-buffer processing - the buffer will be modified.
* @param mb The URL encoded chars
* @param query <code>true</code> if this is a query string
* @throws IOException Invalid %xx URL encoding
*/
public void convert( CharChunk mb, boolean query )
throws IOException
{
// log( "Converting a char chunk ");
int start=mb.getOffset();
char buff[]=mb.getBuffer();
int cend=mb.getEnd();
int idx= CharChunk.indexOf( buff, start, cend, '%' );
int idx2=-1;
if( query ) {
idx2= CharChunk.indexOf( buff, start, (idx >= 0 ? idx : cend), '+' );
}
if( idx<0 && idx2<0 ) {
return;
}
// idx will be the smallest positive index ( first % or + )
if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) {
idx=idx2;
}
final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
for( int j=idx; j<cend; j++, idx++ ) {
if( buff[ j ] == '+' && query ) {
buff[idx]=( ' ' );
} else if( buff[ j ] != '%' ) {
buff[idx]=buff[j];
} else {
// read next 2 digits
if( j+2 >= cend ) {
// invalid
throw EXCEPTION_EOF;
}
char b1= buff[j+1];
char b2=buff[j+2];
if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) {
throw EXCEPTION_NOT_HEX_DIGIT;
}
j+=2;
int res=x2c( b1, b2 );
if (noSlash && (res == '/')) {
throw EXCEPTION_SLASH;
}
buff[idx]=(char)res;
}
}
mb.setEnd( idx );
}
URLDecode, will modify the source
Params: - mb – The URL encoded String, bytes or chars
- query –
true
if this is a query string
Throws: - IOException – Invalid %xx URL encoding
/**
* URLDecode, will modify the source
* @param mb The URL encoded String, bytes or chars
* @param query <code>true</code> if this is a query string
* @throws IOException Invalid %xx URL encoding
*/
public void convert(MessageBytes mb, boolean query)
throws IOException
{
switch (mb.getType()) {
case MessageBytes.T_STR:
String strValue=mb.toString();
if( strValue==null ) {
return;
}
try {
mb.setString( convert( strValue, query ));
} catch (RuntimeException ex) {
throw new DecodeException(ex.getMessage());
}
break;
case MessageBytes.T_CHARS:
CharChunk charC=mb.getCharChunk();
convert( charC, query );
break;
case MessageBytes.T_BYTES:
ByteChunk bytesC=mb.getByteChunk();
convert( bytesC, query );
break;
}
}
%xx decoding of a string. FIXME: this is inefficient.
Params: - str – The URL encoded string
- query –
true
if this is a query string
Returns: the decoded string
/**
* %xx decoding of a string. FIXME: this is inefficient.
* @param str The URL encoded string
* @param query <code>true</code> if this is a query string
* @return the decoded string
*/
public final String convert(String str, boolean query)
{
if (str == null) {
return null;
}
if( (!query || str.indexOf( '+' ) < 0) && str.indexOf( '%' ) < 0 ) {
return str;
}
final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
StringBuilder dec = new StringBuilder(); // decoded string output
int strPos = 0;
int strLen = str.length();
dec.ensureCapacity(str.length());
while (strPos < strLen) {
int laPos; // lookahead position
// look ahead to next URLencoded metacharacter, if any
for (laPos = strPos; laPos < strLen; laPos++) {
char laChar = str.charAt(laPos);
if ((laChar == '+' && query) || (laChar == '%')) {
break;
}
}
// if there were non-metacharacters, copy them all as a block
if (laPos > strPos) {
dec.append(str.substring(strPos,laPos));
strPos = laPos;
}
// shortcut out of here if we're at the end of the string
if (strPos >= strLen) {
break;
}
// process next metacharacter
char metaChar = str.charAt(strPos);
if (metaChar == '+') {
dec.append(' ');
strPos++;
continue;
} else if (metaChar == '%') {
// We throw the original exception - the super will deal with
// it
// try {
char res = (char) Integer.parseInt(
str.substring(strPos + 1, strPos + 3), 16);
if (noSlash && (res == '/')) {
throw new IllegalArgumentException(sm.getString("uDecoder.noSlash"));
}
dec.append(res);
strPos += 3;
}
}
return dec.toString();
}
Decode and return the specified URL-encoded String.
When the byte array is converted to a string, UTF-8 is used. This may
be different than some other servers. It is assumed the string is not a
query string.
Params: - str – The url-encoded string
Throws: - IllegalArgumentException – if a '%' character is not followed
by a valid 2-digit hexadecimal number
Returns: the decoded string
/**
* Decode and return the specified URL-encoded String.
* When the byte array is converted to a string, UTF-8 is used. This may
* be different than some other servers. It is assumed the string is not a
* query string.
*
* @param str The url-encoded string
* @return the decoded string
* @exception IllegalArgumentException if a '%' character is not followed
* by a valid 2-digit hexadecimal number
*/
public static String URLDecode(String str) {
return URLDecode(str, StandardCharsets.UTF_8);
}
Decode and return the specified URL-encoded String. It is assumed the
string is not a query string.
Params: - str – The url-encoded string
- charset – The character encoding to use; if null, UTF-8 is used.
Throws: - IllegalArgumentException – if a '%' character is not followed
by a valid 2-digit hexadecimal number
Returns: the decoded string
/**
* Decode and return the specified URL-encoded String. It is assumed the
* string is not a query string.
*
* @param str The url-encoded string
* @param charset The character encoding to use; if null, UTF-8 is used.
* @return the decoded string
* @exception IllegalArgumentException if a '%' character is not followed
* by a valid 2-digit hexadecimal number
*/
public static String URLDecode(String str, Charset charset) {
if (str == null) {
return null;
}
if (str.indexOf('%') == -1) {
// No %nn sequences, so return string unchanged
return str;
}
if (charset == null) {
charset = StandardCharsets.UTF_8;
}
/*
* Decoding is required.
*
* Potential complications:
* - The source String may be partially decoded so it is not valid to
* assume that the source String is ASCII.
* - Have to process as characters since there is no guarantee that the
* byte sequence for '%' is going to be the same in all character
* sets.
* - We don't know how many '%nn' sequences are required for a single
* character. It varies between character sets and some use a variable
* length.
*/
// This isn't perfect but it is a reasonable guess for the size of the
// array required
ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length() * 2);
OutputStreamWriter osw = new OutputStreamWriter(baos, charset);
char[] sourceChars = str.toCharArray();
int len = sourceChars.length;
int ix = 0;
try {
while (ix < len) {
char c = sourceChars[ix++];
if (c == '%') {
osw.flush();
if (ix + 2 > len) {
throw new IllegalArgumentException(
sm.getString("uDecoder.urlDecode.missingDigit", str));
}
char c1 = sourceChars[ix++];
char c2 = sourceChars[ix++];
if (isHexDigit(c1) && isHexDigit(c2)) {
baos.write(x2c(c1, c2));
} else {
throw new IllegalArgumentException(
sm.getString("uDecoder.urlDecode.missingDigit", str));
}
} else {
osw.append(c);
}
}
osw.flush();
return baos.toString(charset.name());
} catch (IOException ioe) {
throw new IllegalArgumentException(
sm.getString("uDecoder.urlDecode.conversionError", str, charset.name()), ioe);
}
}
private static boolean isHexDigit( int c ) {
return ( ( c>='0' && c<='9' ) ||
( c>='a' && c<='f' ) ||
( c>='A' && c<='F' ));
}
private static int x2c( byte b1, byte b2 ) {
int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 :
(b1 -'0');
digit*=16;
digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 :
(b2 -'0');
return digit;
}
private static int x2c( char b1, char b2 ) {
int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 :
(b1 -'0');
digit*=16;
digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 :
(b2 -'0');
return digit;
}
}