/*
* Copyright (c) 2010, 2020 Oracle and/or its affiliates. All rights reserved.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License v. 2.0, which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* This Source Code may also be made available under the following Secondary
* Licenses when the conditions for such availability set forth in the
* Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
* version 2 with the GNU Classpath Exception, which is available at
* https://www.gnu.org/software/classpath/license.html.
*
* SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
*/
package org.glassfish.grizzly.http.util;
import static org.glassfish.grizzly.utils.Charsets.UTF8_CHARSET;
import java.io.CharConversionException;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.glassfish.grizzly.Buffer;
import org.glassfish.grizzly.Grizzly;
Utility class that make sure an HTTP url defined inside a MessageBytes
is normalized, converted and valid. It also makes sure there is no security hole. Mainly, this class can be used by doing:
HttpRequestURIDecoder.decode(decodedURI, urlDecoder, encoding, b2cConverter);
Author: Jeanfrancois Arcand
/**
* Utility class that make sure an HTTP url defined inside a {@link MessageBytes} is normalized, converted and valid. It
* also makes sure there is no security hole. Mainly, this class can be used by doing:
* <p>
*
* <pre>
* <code>
*
* HttpRequestURIDecoder.decode(decodedURI, urlDecoder, encoding, b2cConverter);
*
* </code>
* </pre>
*
* @author Jeanfrancois Arcand
*/
public class HttpRequestURIDecoder {
protected static final boolean ALLOW_BACKSLASH = false;
private static final boolean COLLAPSE_ADJACENT_SLASHES = Boolean.valueOf(System.getProperty("com.sun.enterprise.web.collapseAdjacentSlashes", "true"));
private static final Logger LOGGER = Grizzly.logger(HttpRequestURIDecoder.class);
Decode the http request represented by the bytes inside MessageBytes
using an UDecoder
. Params: - decodedURI – - The bytes to decode
- urlDecoder – - The urlDecoder to use to decode.
Throws:
/**
* Decode the http request represented by the bytes inside {@link MessageBytes} using an {@link UDecoder}.
*
* @param decodedURI - The bytes to decode
* @param urlDecoder - The urlDecoder to use to decode.
* @throws java.lang.Exception
*/
public static void decode(final MessageBytes decodedURI, final UDecoder urlDecoder) throws Exception {
decode(decodedURI, urlDecoder, null, null);
}
Decode the HTTP request represented by the bytes inside MessageBytes
using an UDecoder
, using the specified encoding, using the specified [@link B2CConverter} to decode the request. Params: - decodedURI – - The bytes to decode
- urlDecoder – - The urlDecoder to use to decode.
- encoding – the encoding value, default is UTF-8.
- b2cConverter – the Bytes to Char Converter.
Throws:
/**
* Decode the HTTP request represented by the bytes inside {@link MessageBytes} using an {@link UDecoder}, using the
* specified encoding, using the specified [@link B2CConverter} to decode the request.
*
* @param decodedURI - The bytes to decode
* @param urlDecoder - The urlDecoder to use to decode.
* @param encoding the encoding value, default is UTF-8.
* @param b2cConverter the Bytes to Char Converter.
* @throws java.lang.Exception
*/
public static void decode(final MessageBytes decodedURI, final UDecoder urlDecoder, String encoding, final B2CConverter b2cConverter) throws Exception {
// %xx decoding of the URL
urlDecoder.convert(decodedURI, false);
if (!normalize(decodedURI)) {
throw new IOException("Invalid URI character encoding");
}
if (encoding == null) {
encoding = "utf-8";
}
convertURI(decodedURI, encoding, b2cConverter);
// Check that the URI is still normalized
if (!checkNormalize(decodedURI.getCharChunk())) {
throw new IOException("Invalid URI character encoding");
}
}
Decode the HTTP request represented by the bytes inside DataChunk
. Params: - decodedURI – - The bytes to decode
Throws:
/**
* Decode the HTTP request represented by the bytes inside {@link DataChunk}.
*
* @param decodedURI - The bytes to decode
* @throws java.io.CharConversionException
*/
public static void decode(final DataChunk decodedURI) throws CharConversionException {
decode(decodedURI, false, UTF8_CHARSET);
}
Decode the HTTP request represented by the bytes inside DataChunk
. Params: - decodedURI – - The bytes to decode
- isSlashAllowed – allow encoded slashes
Throws:
/**
* Decode the HTTP request represented by the bytes inside {@link DataChunk}.
*
* @param decodedURI - The bytes to decode
* @param isSlashAllowed allow encoded slashes
* @throws java.io.CharConversionException
*/
public static void decode(final DataChunk decodedURI, final boolean isSlashAllowed) throws CharConversionException {
decode(decodedURI, isSlashAllowed, UTF8_CHARSET);
}
Decode the HTTP request represented by the bytes inside DataChunk
. Params: - decodedURI – - The bytes to decode
- isSlashAllowed – allow encoded slashes
- encoding – the encoding value, default is UTF-8.
Throws:
/**
* Decode the HTTP request represented by the bytes inside {@link DataChunk}.
*
* @param decodedURI - The bytes to decode
* @param isSlashAllowed allow encoded slashes
* @param encoding the encoding value, default is UTF-8.
* @throws java.io.CharConversionException
*/
public static void decode(final DataChunk decodedURI, final boolean isSlashAllowed, final Charset encoding) throws CharConversionException {
decode(decodedURI, decodedURI, isSlashAllowed, encoding);
}
Decode the HTTP request represented by the bytes inside DataChunk
. Params: - originalURI – - The bytes to decode
- targetDecodedURI – the target
DataChunk
URI will be decoded to - isSlashAllowed – is '/' an allowable character
- encoding – the encoding value, default is UTF-8
Throws:
/**
* Decode the HTTP request represented by the bytes inside {@link DataChunk}.
*
* @param originalURI - The bytes to decode
* @param targetDecodedURI the target {@link DataChunk} URI will be decoded to
* @param isSlashAllowed is '/' an allowable character
* @param encoding the encoding value, default is UTF-8
* @throws java.io.CharConversionException
*/
public static void decode(final DataChunk originalURI, final DataChunk targetDecodedURI, final boolean isSlashAllowed, final Charset encoding)
throws CharConversionException {
// %xx decoding of the URL
URLDecoder.decode(originalURI, targetDecodedURI, isSlashAllowed);
if (!normalize(targetDecodedURI)) {
throw new CharConversionException("Invalid URI character encoding");
}
convertToChars(targetDecodedURI, encoding);
}
Converts the normalized the HTTP request represented by the bytes inside DataChunk
to chars representation, using the passed encoding. Params: - decodedURI – - The bytes to decode
- encoding – the encoding value, default is UTF-8.
Throws:
/**
* Converts the normalized the HTTP request represented by the bytes inside {@link DataChunk} to chars representation,
* using the passed encoding.
*
* @param decodedURI - The bytes to decode
* @param encoding the encoding value, default is UTF-8.
* @throws java.io.CharConversionException
*/
public static void convertToChars(final DataChunk decodedURI, Charset encoding) throws CharConversionException {
if (encoding == null) {
encoding = UTF8_CHARSET;
}
decodedURI.toChars(encoding);
// Check that the URI is still normalized
if (!checkNormalize(decodedURI.getCharChunk())) {
throw new CharConversionException("Invalid URI character encoding");
}
}
Convert a URI using the specified encoding, using the specified [@link B2CConverter} to decode the request.
Params: - uri – - The bytes to decode
- encoding – the encoding value
- b2cConverter – the Bytes to Char Converter.
Throws:
/**
* Convert a URI using the specified encoding, using the specified [@link B2CConverter} to decode the request.
*
* @param uri - The bytes to decode
* @param encoding the encoding value
* @param b2cConverter the Bytes to Char Converter.
* @throws java.lang.Exception
*/
private static void convertURI(final MessageBytes uri, final String encoding, B2CConverter b2cConverter) throws Exception {
final ByteChunk bc = uri.getByteChunk();
final CharChunk cc = uri.getCharChunk();
cc.allocate(bc.getLength(), -1);
if (encoding != null && encoding.trim().length() != 0 && !"ISO-8859-1".equalsIgnoreCase(encoding)) {
try {
if (b2cConverter == null) {
b2cConverter = new B2CConverter(encoding);
}
} catch (IOException e) {
// Ignore
LOGGER.severe("Invalid URI encoding; using HTTP default");
}
if (b2cConverter != null) {
try {
b2cConverter.convert(bc, cc);
uri.setChars(cc.getBuffer(), cc.getStart(), cc.getLength());
return;
} catch (IOException e) {
LOGGER.severe("Invalid URI character encoding; trying ascii");
cc.recycle();
}
}
}
// Default encoding: fast conversion
final byte[] bbuf = bc.getBuffer();
final char[] cbuf = cc.getBuffer();
int start = bc.getStart();
for (int i = 0; i < bc.getLength(); i++) {
cbuf[i] = (char) (bbuf[i + start] & 0xff);
}
uri.setChars(cbuf, 0, bc.getLength());
}
Normalize URI.
This method normalizes "\", "//", "/./" and "/../". This method will return false when trying to go above the root,
or if the URI contains a null byte.
Params: - uriMB – URI to be normalized
Returns: true if normalization was successful, or false otherwise
/**
* Normalize URI.
* <p>
* This method normalizes "\", "//", "/./" and "/../". This method will return false when trying to go above the root,
* or if the URI contains a null byte.
*
* @param uriMB URI to be normalized
* @return <tt>true</tt> if normalization was successful, or <tt>false</tt> otherwise
*/
public static boolean normalize(MessageBytes uriMB) {
int type = uriMB.getType();
if (type == MessageBytes.T_CHARS) {
return normalizeChars(uriMB.getCharChunk());
} else {
return normalizeBytes(uriMB.getByteChunk());
}
}
Normalize URI.
This method normalizes "\", "//", "/./" and "/../". This method will return false when trying to go above the root,
or if the URI contains a null byte.
Params: - dataChunk – URI to be normalized
Returns: true if normalization was successful, or false otherwise
/**
* Normalize URI.
* <p>
* This method normalizes "\", "//", "/./" and "/../". This method will return false when trying to go above the root,
* or if the URI contains a null byte.
*
* @param dataChunk URI to be normalized
* @return <tt>true</tt> if normalization was successful, or <tt>false</tt> otherwise
*/
public static boolean normalize(final DataChunk dataChunk) {
switch (dataChunk.getType()) {
case Bytes:
return normalizeBytes(dataChunk.getByteChunk());
case Buffer:
return normalizeBuffer(dataChunk.getBufferChunk());
case String:
try {
dataChunk.toChars(null);
} catch (CharConversionException unexpected) {
// should never occur
throw new IllegalStateException("Unexpected exception", unexpected);
}
// pass to Chars case
case Chars:
return normalizeChars(dataChunk.getCharChunk());
default:
throw new NullPointerException();
}
}
Check that the URI is normalized following character decoding.
This method checks for "\", 0, "//", "/./" and "/../". This method will return false if sequences that are supposed
to be normalized are still present in the URI.
Params: - uriCC – URI to be checked (should be chars)
Returns: true if the uriCC represents a normalized URI, or false otherwise
/**
* Check that the URI is normalized following character decoding.
* <p>
* This method checks for "\", 0, "//", "/./" and "/../". This method will return false if sequences that are supposed
* to be normalized are still present in the URI.
*
* @param uriCC URI to be checked (should be chars)
* @return <tt>true</tt> if the uriCC represents a normalized URI, or <tt>false</tt> otherwise
*/
public static boolean checkNormalize(final CharChunk uriCC) {
char[] c = uriCC.getChars();
int start = uriCC.getStart();
int end = uriCC.getEnd();
int pos;
// Check for '\' and 0
for (pos = start; pos < end; pos++) {
if (c[pos] == '\\') {
return false;
}
if (c[pos] == 0) {
return false;
}
}
if (COLLAPSE_ADJACENT_SLASHES) {
// Check for "//"
for (pos = start; pos < end - 1; pos++) {
if (c[pos] == '/') {
if (c[pos + 1] == '/') {
return false;
}
}
}
}
// Check for ending with "/." or "/.."
if (end - start >= 2 && c[end - 1] == '.') {
if (c[end - 2] == '/' || c[end - 2] == '.' && c[end - 3] == '/') {
return false;
}
}
// Check for "/./"
return uriCC.indexOf("/./", 0, 3, 0) < 0;
}
public static boolean normalizeChars(final CharChunk uriCC) {
char[] c = uriCC.getChars();
int start = uriCC.getStart();
int end = uriCC.getEnd();
// URL * is acceptable
if (end - start == 1 && c[start] == '*') {
return true;
}
int pos;
int index;
// Replace '\' with '/'
// Check for null char
for (pos = start; pos < end; pos++) {
if (c[pos] == '\\') {
if (ALLOW_BACKSLASH) {
c[pos] = '/';
} else {
return false;
}
}
if (c[pos] == (char) 0) {
return false;
}
}
// The URL must start with '/'
if (c[start] != '/') {
return false;
}
// Replace "//" with "/"
if (COLLAPSE_ADJACENT_SLASHES) {
for (pos = start; pos < end - 1; pos++) {
if (c[pos] == '/') {
while (pos + 1 < end && c[pos + 1] == '/') {
copyChars(c, pos, pos + 1, end - pos - 1);
end--;
}
}
}
}
// If the URI ends with "/." or "/..", then we append an extra "/"
// Note: It is possible to extend the URI by 1 without any side effect
// as the next character is a non-significant WS.
if (end - start > 2 && c[end - 1] == '.') {
if (c[end - 2] == '/' || c[end - 2] == '.' && c[end - 3] == '/') {
c[end] = '/';
end++;
}
}
uriCC.setEnd(end);
index = 0;
// Resolve occurrences of "/./" in the normalized path
while (true) {
index = uriCC.indexOf("/./", 0, 3, index);
if (index < 0) {
break;
}
copyChars(c, start + index, start + index + 2, end - start - index - 2);
end = end - 2;
uriCC.setEnd(end);
}
index = 0;
// Resolve occurrences of "/../" in the normalized path
while (true) {
index = uriCC.indexOf("/../", 0, 4, index);
if (index < 0) {
break;
}
// Prevent from going outside our context
if (index == 0) {
return false;
}
int index2 = -1;
for (pos = start + index - 1; pos >= 0 && index2 < 0; pos--) {
if (c[pos] == '/') {
index2 = pos;
}
}
copyChars(c, start + index2, start + index + 3, end - start - index - 3);
end = end + index2 - index - 3;
uriCC.setEnd(end);
index = index2;
}
uriCC.setChars(c, start, end);
return true;
}
// ------------------------------------------------------ Protected Methods
Copy an array of bytes to a different position. Used during normalization.
/**
* Copy an array of bytes to a different position. Used during normalization.
*/
protected static void copyBytes(byte[] b, int dest, int src, int len) {
System.arraycopy(b, src, b, dest, len);
}
Copy an array of chars to a different position. Used during normalization.
/**
* Copy an array of chars to a different position. Used during normalization.
*/
private static void copyChars(char[] c, int dest, int src, int len) {
System.arraycopy(c, src, c, dest, len);
}
Log a message on the Logger associated with our Container (if any)
Params: - message – Message to be logged
/**
* Log a message on the Logger associated with our Container (if any)
*
* @param message Message to be logged
*/
protected void log(String message) {
LOGGER.info(message);
}
Log a message on the Logger associated with our Container (if any)
Params: - message – Message to be logged
- throwable – Associated exception
/**
* Log a message on the Logger associated with our Container (if any)
*
* @param message Message to be logged
* @param throwable Associated exception
*/
protected void log(String message, Throwable throwable) {
LOGGER.log(Level.SEVERE, message, throwable);
}
Character conversion of the a US-ASCII MessageBytes.
/**
* Character conversion of the a US-ASCII MessageBytes.
*/
protected void convertMB(MessageBytes mb) {
// This is of course only meaningful for bytes
if (mb.getType() != MessageBytes.T_BYTES) {
return;
}
ByteChunk bc = mb.getByteChunk();
CharChunk cc = mb.getCharChunk();
cc.allocate(bc.getLength(), -1);
// Default encoding: fast conversion
byte[] bbuf = bc.getBuffer();
char[] cbuf = cc.getBuffer();
int start = bc.getStart();
for (int i = 0; i < bc.getLength(); i++) {
cbuf[i] = (char) (bbuf[i + start] & 0xff);
}
mb.setChars(cbuf, 0, bc.getLength());
}
private static final int STATE_CHAR = 0;
private static final int STATE_SLASH = 1;
private static final int STATE_PERCENT = 2;
private static final int STATE_SLASHDOT = 3;
private static final int STATE_SLASHDOTDOT = 4;
public static boolean normalizeBytes(final ByteChunk bc) {
byte[] bs = bc.getBytes();
int start = bc.getStart();
int end = bc.getEnd();
// An empty URL is not acceptable
if (start == end) {
return false;
}
// URL * is acceptable
if (end - start == 1 && bs[start] == (byte) '*') {
return true;
}
// If the URI ends with "/." or "/..", then we append an extra "/"
// Note: It is possible to extend the URI by 1 without any side effect
// as the next character is a non-significant WS.
if (end - start > 2 && bs[end - 1] == (byte) '.') {
if (bs[end - 2] == (byte) '/' || bs[end - 2] == (byte) '.' && bs[end - 3] == (byte) '/') {
bs[end] = (byte) '/';
end++;
}
}
int state = STATE_CHAR;
int srcPos = start;
int lastSlash = -1;
int parentSlash = -1;
for (int pos = start; pos < end; pos++) {
if (bs[pos] == (byte) 0) {
return false;
}
if (bs[pos] == (byte) '\\') {
if (ALLOW_BACKSLASH) {
bs[pos] = (byte) '/';
} else {
return false;
}
}
if (bs[pos] == '/') {
if (state == STATE_CHAR) {
state = STATE_SLASH;
bs[srcPos] = bs[pos];
parentSlash = lastSlash;
lastSlash = srcPos;
srcPos++;
} else if (state == STATE_SLASH) {
// This is '//'. Ignore if COLLAPSE_ADJACENT_SLASHES is true.
// What is the behavior for '/../' patterns if collapse is false.
// Ignoring for now.
if (!COLLAPSE_ADJACENT_SLASHES) {
srcPos++;
}
} else if (state == STATE_SLASHDOT) {
// This is '/./' ==> move the srcPos one position back
srcPos--;
} else if (state == STATE_SLASHDOTDOT) {
// This is '/../' ==> search backward to reset lastSlash and parentSlash
if (parentSlash == -1) {
// This is an error
// System.out.print("Incorrect URI");
return false;
} else {
lastSlash = parentSlash;
srcPos = parentSlash;
// Find the parentSlash
parentSlash = -1;
for (int i = lastSlash - 1; i >= start; i--) {
if (bs[i] == '/') {
parentSlash = i;
break;
}
}
}
state = STATE_SLASH;
bs[srcPos++] = bs[pos];
}
} else if (bs[pos] == '.') {
if (state == STATE_CHAR) {
bs[srcPos++] = bs[pos];
} else if (state == STATE_SLASH) {
state = STATE_SLASHDOT;
bs[srcPos++] = bs[pos];
} else if (state == STATE_SLASHDOT) {
state = STATE_SLASHDOTDOT;
bs[srcPos++] = bs[pos];
}
} else {
state = STATE_CHAR;
bs[srcPos++] = bs[pos];
}
}
bc.setEnd(srcPos);
return true;
}
public static boolean normalizeBuffer(final BufferChunk bc) {
final Buffer bs = bc.getBuffer();
final int start = bc.getStart();
int end = bc.getEnd();
// An empty URL is not acceptable
if (start == end) {
return false;
}
// URL * is acceptable
if (end - start == 1 && bs.get(start) == (byte) '*') {
return true;
}
// If the URI ends with "/." or "/..", then we append an extra "/"
// Note: It is possible to extend the URI by 1 without any side effect
// as the next character is a non-significant WS.
if (end - start > 2 && bs.get(end - 1) == (byte) '.') {
final byte b = bs.get(end - 2);
if (b == (byte) '/' || b == (byte) '.' && bs.get(end - 3) == (byte) '/') {
bs.put(end, (byte) '/');
end++;
}
}
int state = STATE_CHAR;
int srcPos = start;
int lastSlash = -1;
int parentSlash = -1;
for (int pos = start; pos < end; pos++) {
final byte b = bs.get(pos);
if (b == (byte) 0) {
return false;
}
if (b == (byte) '\\') {
if (ALLOW_BACKSLASH) {
bs.put(pos, (byte) '/');
} else {
return false;
}
}
if (b == '/') {
if (state == STATE_CHAR) {
state = STATE_SLASH;
bs.put(srcPos, b);
parentSlash = lastSlash;
lastSlash = srcPos;
srcPos++;
} else if (state == STATE_SLASH) {
// This is '//'. Ignore if COLLAPSE_ADJACENT_SLASHES is true.
// What is the behavior for '/../' patterns if collapse is false.
// Ignoring for now.
if (!COLLAPSE_ADJACENT_SLASHES) {
srcPos++;
}
} else if (state == STATE_SLASHDOT) {
// This is '/./' ==> move the srcPos one position back
srcPos--;
} else if (state == STATE_SLASHDOTDOT) {
// This is '/../' ==> search backward to reset lastSlash and parentSlash
if (parentSlash == -1) {
// This is an error
// System.out.print("Incorrect URI");
return false;
} else {
lastSlash = parentSlash;
srcPos = parentSlash;
// Find the parentSlash
parentSlash = -1;
for (int i = lastSlash - 1; i >= start; i--) {
if (bs.get(i) == '/') {
parentSlash = i;
break;
}
}
}
state = STATE_SLASH;
bs.put(srcPos++, b);
}
} else if (b == '.') {
if (state == STATE_CHAR) {
bs.put(srcPos++, b);
} else if (state == STATE_SLASH) {
state = STATE_SLASHDOT;
bs.put(srcPos++, b);
} else if (state == STATE_SLASHDOT) {
state = STATE_SLASHDOTDOT;
bs.put(srcPos++, b);
}
} else {
state = STATE_CHAR;
bs.put(srcPos++, b);
}
}
bc.setEnd(srcPos);
return true;
}
}