org.apache.tomcat/tomcat-util/10.0.0 : org/apache/tomcat/util/buf/UDecoder.java

UDecoder
https://tomcat.apache.org/: Common code shared by multiple Tomcat components
Apache License, Version 2.0
/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.apache.tomcat.util.buf;

import java.io.ByteArrayOutputStream;
import java.io.CharConversionException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

import org.apache.tomcat.util.res.StringManager;

 All URL decoding happens here. This way we can reuse, review, optimize
 without adding complexity to the buffers.
 The conversion will modify the original buffer.
 @author Costin Manolache
/**
 *  All URL decoding happens here. This way we can reuse, review, optimize
 *  without adding complexity to the buffers.
 *
 *  The conversion will modify the original buffer.
 *
 *  @author Costin Manolache
 */
public final class UDecoder {

    private static final StringManager sm = StringManager.getManager(UDecoder.class);

    private static class DecodeException extends CharConversionException {
        private static final long serialVersionUID = 1L;
        public DecodeException(String s) {
            super(s);
        }

        @Override
        public synchronized Throwable fillInStackTrace() {
            // This class does not provide a stack trace
            return this;
        }
    }

    Unexpected end of data. /** Unexpected end of data. */
    private static final IOException EXCEPTION_EOF = new DecodeException(sm.getString("uDecoder.eof"));

    %xx with not-hex digit /** %xx with not-hex digit */
    private static final IOException EXCEPTION_NOT_HEX_DIGIT = new DecodeException(
            "isHexDigit");

    %-encoded slash is forbidden in resource path /** %-encoded slash is forbidden in resource path */
    private static final IOException EXCEPTION_SLASH = new DecodeException(
            "noSlash");


    URLDecode, will modify the source. Assumes source bytes are encoded using
a superset of US-ASCII as per RFC 7230. "%2f" will be rejected unless the
input is a query string.
Params: mb –    The URL encoded bytes
query – true if this is a query string. For a query string '+' will be decoded to ' '
Throws: IOException – Invalid %xx URL encoding/**
     * URLDecode, will modify the source. Assumes source bytes are encoded using
     * a superset of US-ASCII as per RFC 7230. "%2f" will be rejected unless the
     * input is a query string.
     *
     * @param mb    The URL encoded bytes
     * @param query {@code true} if this is a query string. For a query string
     *                  '+' will be decoded to ' '
     *
     * @throws IOException Invalid %xx URL encoding
     */
    public void convert(ByteChunk mb, boolean query) throws IOException {
        if (query) {
            convert(mb, true, EncodedSolidusHandling.DECODE);
        } else {
            convert(mb, false, EncodedSolidusHandling.REJECT);
        }
    }


    URLDecode, will modify the source. Assumes source bytes are encoded using
a superset of US-ASCII as per RFC 7230.
Params: mb –                        The URL encoded bytes
encodedSolidusHandling –    How should the %2f sequence handled by
                                     the decoder? For query strings this
                                     parameter will be ignored and the
                                     %2f sequence will be decoded
Throws: IOException – Invalid %xx URL encoding/**
     * URLDecode, will modify the source. Assumes source bytes are encoded using
     * a superset of US-ASCII as per RFC 7230.
     *
     * @param mb                        The URL encoded bytes
     * @param encodedSolidusHandling    How should the %2f sequence handled by
     *                                      the decoder? For query strings this
     *                                      parameter will be ignored and the
     *                                      %2f sequence will be decoded
     * @throws IOException Invalid %xx URL encoding
     */
    public void convert(ByteChunk mb, EncodedSolidusHandling encodedSolidusHandling) throws IOException {
        convert(mb, false, encodedSolidusHandling);
    }


    private void convert(ByteChunk mb, boolean query, EncodedSolidusHandling encodedSolidusHandling) throws IOException {

        int start=mb.getOffset();
        byte buff[]=mb.getBytes();
        int end=mb.getEnd();

        int idx= ByteChunk.findByte( buff, start, end, (byte) '%' );
        int idx2=-1;
        if( query ) {
            idx2= ByteChunk.findByte( buff, start, (idx >= 0 ? idx : end), (byte) '+' );
        }
        if( idx<0 && idx2<0 ) {
            return;
        }

        // idx will be the smallest positive index ( first % or + )
        if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) {
            idx=idx2;
        }

        for( int j=idx; j<end; j++, idx++ ) {
            if( buff[ j ] == '+' && query) {
                buff[idx]= (byte)' ' ;
            } else if( buff[ j ] != '%' ) {
                buff[idx]= buff[j];
            } else {
                // read next 2 digits
                if( j+2 >= end ) {
                    throw EXCEPTION_EOF;
                }
                byte b1= buff[j+1];
                byte b2=buff[j+2];
                if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) {
                    throw EXCEPTION_NOT_HEX_DIGIT;
                }

                j+=2;
                int res=x2c( b1, b2 );
                if (res == '/') {
                    switch (encodedSolidusHandling) {
                    case DECODE: {
                        buff[idx]=(byte)res;
                        break;
                    }
                    case REJECT: {
                        throw EXCEPTION_SLASH;
                    }
                    case PASS_THROUGH: {
                        buff[idx++] = buff[j-2];
                        buff[idx++] = buff[j-1];
                        buff[idx] = buff[j];
                    }
                    }
                } else {
                    buff[idx]=(byte)res;
                }
            }
        }

        mb.setEnd( idx );
    }

    // -------------------- Additional methods --------------------

    Decode and return the specified URL-encoded String. It is assumed the
string is not a query string.
Params: str – The url-encoded string
charset – The character encoding to use; if null, UTF-8 is used.
Throws: IllegalArgumentException – if a '%' character is not followed
by a valid 2-digit hexadecimal number
Returns: the decoded string/**
     * Decode and return the specified URL-encoded String. It is assumed the
     * string is not a query string.
     *
     * @param str The url-encoded string
     * @param charset The character encoding to use; if null, UTF-8 is used.
     * @return the decoded string
     * @exception IllegalArgumentException if a '%' character is not followed
     * by a valid 2-digit hexadecimal number
     */
    public static String URLDecode(String str, Charset charset) {
        if (str == null) {
            return null;
        }

        if (str.indexOf('%') == -1) {
            // No %nn sequences, so return string unchanged
            return str;
        }

        if (charset == null) {
            charset = StandardCharsets.UTF_8;
        }

        /*
         * Decoding is required.
         *
         * Potential complications:
         * - The source String may be partially decoded so it is not valid to
         *   assume that the source String is ASCII.
         * - Have to process as characters since there is no guarantee that the
         *   byte sequence for '%' is going to be the same in all character
         *   sets.
         * - We don't know how many '%nn' sequences are required for a single
         *   character. It varies between character sets and some use a variable
         *   length.
         */

        // This isn't perfect but it is a reasonable guess for the size of the
        // array required
        ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length() * 2);

        OutputStreamWriter osw = new OutputStreamWriter(baos, charset);

        char[] sourceChars = str.toCharArray();
        int len = sourceChars.length;
        int ix = 0;

        try {
            while (ix < len) {
                char c = sourceChars[ix++];
                if (c == '%') {
                    osw.flush();
                    if (ix + 2 > len) {
                        throw new IllegalArgumentException(
                                sm.getString("uDecoder.urlDecode.missingDigit", str));
                    }
                    char c1 = sourceChars[ix++];
                    char c2 = sourceChars[ix++];
                    if (isHexDigit(c1) && isHexDigit(c2)) {
                        baos.write(x2c(c1, c2));
                    } else {
                        throw new IllegalArgumentException(
                                sm.getString("uDecoder.urlDecode.missingDigit", str));
                    }
                } else {
                    osw.append(c);
                }
            }
            osw.flush();

            return baos.toString(charset.name());
        } catch (IOException ioe) {
            throw new IllegalArgumentException(
                    sm.getString("uDecoder.urlDecode.conversionError", str, charset.name()), ioe);
        }
    }


    private static boolean isHexDigit( int c ) {
        return ( ( c>='0' && c<='9' ) ||
                 ( c>='a' && c<='f' ) ||
                 ( c>='A' && c<='F' ));
    }


    private static int x2c( byte b1, byte b2 ) {
        int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 :
            (b1 -'0');
        digit*=16;
        digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 :
            (b2 -'0');
        return digit;
    }


    private static int x2c( char b1, char b2 ) {
        int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 :
            (b1 -'0');
        digit*=16;
        digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 :
            (b2 -'0');
        return digit;
    }
}
Params:	mb – The URL encoded bytes query – `true` if this is a query string. For a query string '+' will be decoded to ' '
Throws:	IOException – Invalid %xx URL encoding
Params:	mb – The URL encoded bytes encodedSolidusHandling – How should the %2f sequence handled by the decoder? For query strings this parameter will be ignored and the %2f sequence will be decoded
Throws:	IOException – Invalid %xx URL encoding
Params:	str – The url-encoded string charset – The character encoding to use; if null, UTF-8 is used.
Throws:	IllegalArgumentException – if a '%' character is not followed by a valid 2-digit hexadecimal number
Returns:	the decoded string
/

org.apache.tomcat/ tomcat-util/ 10.0.0/ org/apache/tomcat/util/buf/UDecoder.java