java/8 : sun/text/normalizer/Utility.java

Utility
https://openjdk.java.net/
GPLv2 + Classpath Exception
/*
 * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
/*
 *******************************************************************************
 * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
 * Agreement between IBM and Sun. This technology is protected by multiple     *
 * US and International patents. This notice and attribution to IBM may not    *
 * to removed.                                                                 *
 *******************************************************************************
 */

package sun.text.normalizer;

public final class Utility {

    Convenience utility to compare two Object[]s
Ought to be in System.
Params: len – the length to compare.
The start indices and start+len must be valid./**
     * Convenience utility to compare two Object[]s
     * Ought to be in System.
     * @param len the length to compare.
     * The start indices and start+len must be valid.
     */
    public final static boolean arrayRegionMatches(char[] source, int sourceStart,
                                            char[] target, int targetStart,
                                            int len)
    {
        int sourceEnd = sourceStart + len;
        int delta = targetStart - sourceStart;
        for (int i = sourceStart; i < sourceEnd; i++) {
            if (source[i]!=target[i + delta])
            return false;
        }
        return true;
    }

    Convert characters outside the range U+0020 to U+007F to
Unicode escapes, and convert backslash to a double backslash.
/**
     * Convert characters outside the range U+0020 to U+007F to
     * Unicode escapes, and convert backslash to a double backslash.
     */
    public static final String escape(String s) {
        StringBuffer buf = new StringBuffer();
        for (int i=0; i<s.length(); ) {
            int c = UTF16.charAt(s, i);
            i += UTF16.getCharCount(c);
            if (c >= ' ' && c <= 0x007F) {
                if (c == '\\') {
                    buf.append("\\\\"); // That is, "\\"
                } else {
                    buf.append((char)c);
                }
            } else {
                boolean four = c <= 0xFFFF;
                buf.append(four ? "\\u" : "\\U");
                hex(c, four ? 4 : 8, buf);
            }
        }
        return buf.toString();
    }

    /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
    static private final char[] UNESCAPE_MAP = {
        /*"   0x22, 0x22 */
        /*'   0x27, 0x27 */
        /*?   0x3F, 0x3F */
        /*\   0x5C, 0x5C */
        /*a*/ 0x61, 0x07,
        /*b*/ 0x62, 0x08,
        /*e*/ 0x65, 0x1b,
        /*f*/ 0x66, 0x0c,
        /*n*/ 0x6E, 0x0a,
        /*r*/ 0x72, 0x0d,
        /*t*/ 0x74, 0x09,
        /*v*/ 0x76, 0x0b
    };

    Convert an escape to a 32-bit code point value.  We attempt
to parallel the icu4c unescapeAt() function.
Params: offset16 – an array containing offset to the character
after the backslash.  Upon return offset16[0] will
be updated to point after the escape sequence.
Returns: character value from 0 to 10FFFF, or -1 on error./**
     * Convert an escape to a 32-bit code point value.  We attempt
     * to parallel the icu4c unescapeAt() function.
     * @param offset16 an array containing offset to the character
     * <em>after</em> the backslash.  Upon return offset16[0] will
     * be updated to point after the escape sequence.
     * @return character value from 0 to 10FFFF, or -1 on error.
     */
    public static int unescapeAt(String s, int[] offset16) {
        int c;
        int result = 0;
        int n = 0;
        int minDig = 0;
        int maxDig = 0;
        int bitsPerDigit = 4;
        int dig;
        int i;
        boolean braces = false;

        /* Check that offset is in range */
        int offset = offset16[0];
        int length = s.length();
        if (offset < 0 || offset >= length) {
            return -1;
        }

        /* Fetch first UChar after '\\' */
        c = UTF16.charAt(s, offset);
        offset += UTF16.getCharCount(c);

        /* Convert hexadecimal and octal escapes */
        switch (c) {
        case 'u':
            minDig = maxDig = 4;
            break;
        case 'U':
            minDig = maxDig = 8;
            break;
        case 'x':
            minDig = 1;
            if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
                ++offset;
                braces = true;
                maxDig = 8;
            } else {
                maxDig = 2;
            }
            break;
        default:
            dig = UCharacter.digit(c, 8);
            if (dig >= 0) {
                minDig = 1;
                maxDig = 3;
                n = 1; /* Already have first octal digit */
                bitsPerDigit = 3;
                result = dig;
            }
            break;
        }
        if (minDig != 0) {
            while (offset < length && n < maxDig) {
                c = UTF16.charAt(s, offset);
                dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
                if (dig < 0) {
                    break;
                }
                result = (result << bitsPerDigit) | dig;
                offset += UTF16.getCharCount(c);
                ++n;
            }
            if (n < minDig) {
                return -1;
            }
            if (braces) {
                if (c != 0x7D /*}*/) {
                    return -1;
                }
                ++offset;
            }
            if (result < 0 || result >= 0x110000) {
                return -1;
            }
            // If an escape sequence specifies a lead surrogate, see
            // if there is a trail surrogate after it, either as an
            // escape or as a literal.  If so, join them up into a
            // supplementary.
            if (offset < length &&
                UTF16.isLeadSurrogate((char) result)) {
                int ahead = offset+1;
                c = s.charAt(offset); // [sic] get 16-bit code unit
                if (c == '\\' && ahead < length) {
                    int o[] = new int[] { ahead };
                    c = unescapeAt(s, o);
                    ahead = o[0];
                }
                if (UTF16.isTrailSurrogate((char) c)) {
                    offset = ahead;
                result = UCharacterProperty.getRawSupplementary(
                                  (char) result, (char) c);
                }
            }
            offset16[0] = offset;
            return result;
        }

        /* Convert C-style escapes in table */
        for (i=0; i<UNESCAPE_MAP.length; i+=2) {
            if (c == UNESCAPE_MAP[i]) {
                offset16[0] = offset;
                return UNESCAPE_MAP[i+1];
            } else if (c < UNESCAPE_MAP[i]) {
                break;
            }
        }

        /* Map \cX to control-X: X & 0x1F */
        if (c == 'c' && offset < length) {
            c = UTF16.charAt(s, offset);
            offset16[0] = offset + UTF16.getCharCount(c);
            return 0x1F & c;
        }

        /* If no special forms are recognized, then consider
         * the backslash to generically escape the next character. */
        offset16[0] = offset;
        return c;
    }

    Convert a integer to size width hex uppercase digits.
E.g., hex('a', 4, str) => "0041".
Append the output to the given StringBuffer.
If width is too small to fit, nothing will be appended to output.
/**
     * Convert a integer to size width hex uppercase digits.
     * E.g., hex('a', 4, str) => "0041".
     * Append the output to the given StringBuffer.
     * If width is too small to fit, nothing will be appended to output.
     */
    public static StringBuffer hex(int ch, int width, StringBuffer output) {
        return appendNumber(output, ch, 16, width);
    }

    Convert a integer to size width (minimum) hex uppercase digits.
E.g., hex('a', 4, str) => "0041".  If the integer requires more
than width digits, more will be used.
/**
     * Convert a integer to size width (minimum) hex uppercase digits.
     * E.g., hex('a', 4, str) => "0041".  If the integer requires more
     * than width digits, more will be used.
     */
    public static String hex(int ch, int width) {
        StringBuffer buf = new StringBuffer();
        return appendNumber(buf, ch, 16, width).toString();
    }

    Skip over a sequence of zero or more white space characters
at pos.  Return the index of the first non-white-space character
at or after pos, or str.length(), if there is none.
/**
     * Skip over a sequence of zero or more white space characters
     * at pos.  Return the index of the first non-white-space character
     * at or after pos, or str.length(), if there is none.
     */
    public static int skipWhitespace(String str, int pos) {
        while (pos < str.length()) {
            int c = UTF16.charAt(str, pos);
            if (!UCharacterProperty.isRuleWhiteSpace(c)) {
                break;
            }
            pos += UTF16.getCharCount(c);
        }
        return pos;
    }

    static final char DIGITS[] = {
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
        'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
        'U', 'V', 'W', 'X', 'Y', 'Z'
    };

    Append the digits of a positive integer to the given
StringBuffer in the given radix. This is
done recursively since it is easiest to generate the low-
order digit first, but it must be appended last.
Params: result – is the StringBuffer to append to
n – is the positive integer
radix – is the radix, from 2 to 36 inclusive
minDigits – is the minimum number of digits to append./**
     * Append the digits of a positive integer to the given
     * <code>StringBuffer</code> in the given radix. This is
     * done recursively since it is easiest to generate the low-
     * order digit first, but it must be appended last.
     *
     * @param result is the <code>StringBuffer</code> to append to
     * @param n is the positive integer
     * @param radix is the radix, from 2 to 36 inclusive
     * @param minDigits is the minimum number of digits to append.
     */
    private static void recursiveAppendNumber(StringBuffer result, int n,
                                                int radix, int minDigits)
    {
        int digit = n % radix;

        if (n >= radix || minDigits > 1) {
            recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
        }

        result.append(DIGITS[digit]);
    }

    Append a number to the given StringBuffer in the given radix.
Standard digits '0'-'9' are used and letters 'A'-'Z' for
radices 11 through 36.
Params: result – the digits of the number are appended here
n – the number to be converted to digits; may be negative.
If negative, a '-' is prepended to the digits.
radix – a radix from 2 to 36 inclusive.
minDigits – the minimum number of digits, not including
any '-', to produce.  Values less than 2 have no effect.  One
digit is always emitted regardless of this parameter.
Returns: a reference to result/**
     * Append a number to the given StringBuffer in the given radix.
     * Standard digits '0'-'9' are used and letters 'A'-'Z' for
     * radices 11 through 36.
     * @param result the digits of the number are appended here
     * @param n the number to be converted to digits; may be negative.
     * If negative, a '-' is prepended to the digits.
     * @param radix a radix from 2 to 36 inclusive.
     * @param minDigits the minimum number of digits, not including
     * any '-', to produce.  Values less than 2 have no effect.  One
     * digit is always emitted regardless of this parameter.
     * @return a reference to result
     */
    public static StringBuffer appendNumber(StringBuffer result, int n,
                                             int radix, int minDigits)
        throws IllegalArgumentException
    {
        if (radix < 2 || radix > 36) {
            throw new IllegalArgumentException("Illegal radix " + radix);
        }


        int abs = n;

        if (n < 0) {
            abs = -n;
            result.append("-");
        }

        recursiveAppendNumber(result, abs, radix, minDigits);

        return result;
    }

    Return true if the character is NOT printable ASCII.  The tab,
newline and linefeed characters are considered unprintable.
/**
     * Return true if the character is NOT printable ASCII.  The tab,
     * newline and linefeed characters are considered unprintable.
     */
    public static boolean isUnprintable(int c) {
        return !(c >= 0x20 && c <= 0x7E);
    }

    Escape unprintable characters using uxxxx notation
for U+0000 to U+FFFF and Uxxxxxxxx for U+10000 and
above.  If the character is printable ASCII, then do nothing
and return FALSE.  Otherwise, append the escaped notation and
return TRUE.
/**
     * Escape unprintable characters using <backslash>uxxxx notation
     * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
     * above.  If the character is printable ASCII, then do nothing
     * and return FALSE.  Otherwise, append the escaped notation and
     * return TRUE.
     */
    public static boolean escapeUnprintable(StringBuffer result, int c) {
        if (isUnprintable(c)) {
            result.append('\\');
            if ((c & ~0xFFFF) != 0) {
                result.append('U');
                result.append(DIGITS[0xF&(c>>28)]);
                result.append(DIGITS[0xF&(c>>24)]);
                result.append(DIGITS[0xF&(c>>20)]);
                result.append(DIGITS[0xF&(c>>16)]);
            } else {
                result.append('u');
            }
            result.append(DIGITS[0xF&(c>>12)]);
            result.append(DIGITS[0xF&(c>>8)]);
            result.append(DIGITS[0xF&(c>>4)]);
            result.append(DIGITS[0xF&c]);
            return true;
        }
        return false;
    }

    Similar to StringBuffer.getChars, version 1.3.
Since JDK 1.2 implements StringBuffer.getChars differently, this method
is here to provide consistent results.
To be removed after JDK 1.2 ceased to be the reference platform.
Params: src – source string buffer
srcBegin – offset to the start of the src to retrieve from
srcEnd – offset to the end of the src to retrieve from
dst – char array to store the retrieved chars
dstBegin – offset to the start of the destination char array to
                store the retrieved chars/**
    * Similar to StringBuffer.getChars, version 1.3.
    * Since JDK 1.2 implements StringBuffer.getChars differently, this method
    * is here to provide consistent results.
    * To be removed after JDK 1.2 ceased to be the reference platform.
    * @param src source string buffer
    * @param srcBegin offset to the start of the src to retrieve from
    * @param srcEnd offset to the end of the src to retrieve from
    * @param dst char array to store the retrieved chars
    * @param dstBegin offset to the start of the destination char array to
    *                 store the retrieved chars
    */
    public static void getChars(StringBuffer src, int srcBegin, int srcEnd,
                                char dst[], int dstBegin)
    {
        if (srcBegin == srcEnd) {
            return;
        }
        src.getChars(srcBegin, srcEnd, dst, dstBegin);
    }

}
Params:	offset16 – an array containing offset to the character after the backslash. Upon return offset16[0] will be updated to point after the escape sequence.
Returns:	character value from 0 to 10FFFF, or -1 on error.
Params:	result – the digits of the number are appended here n – the number to be converted to digits; may be negative. If negative, a '-' is prepended to the digits. radix – a radix from 2 to 36 inclusive. minDigits – the minimum number of digits, not including any '-', to produce. Values less than 2 have no effect. One digit is always emitted regardless of this parameter.
Returns:	a reference to result
/

java/ 8/ sun/text/normalizer/Utility.java