/*
 * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
/*
 *******************************************************************************
 * Copyright (C) 1996-2011, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */

package sun.text.normalizer;

import java.io.IOException;
import java.util.Locale;

final class Utility {

    
Convert characters outside the range U+0020 to U+007F to Unicode escapes, and convert backslash to a double backslash.
/** * Convert characters outside the range U+0020 to U+007F to * Unicode escapes, and convert backslash to a double backslash. */
public static final String escape(String s) { StringBuilder buf = new StringBuilder(); for (int i=0; i<s.length(); ) { int c = Character.codePointAt(s, i); i += UTF16.getCharCount(c); if (c >= ' ' && c <= 0x007F) { if (c == '\\') { buf.append("\\\\"); // That is, "\\" } else { buf.append((char)c); } } else { boolean four = c <= 0xFFFF; buf.append(four ? "\\u" : "\\U"); buf.append(hex(c, four ? 4 : 8)); } } return buf.toString(); } /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ private static final char[] UNESCAPE_MAP = { /*" 0x22, 0x22 */ /*' 0x27, 0x27 */ /*? 0x3F, 0x3F */ /*\ 0x5C, 0x5C */ /*a*/ 0x61, 0x07, /*b*/ 0x62, 0x08, /*e*/ 0x65, 0x1b, /*f*/ 0x66, 0x0c, /*n*/ 0x6E, 0x0a, /*r*/ 0x72, 0x0d, /*t*/ 0x74, 0x09, /*v*/ 0x76, 0x0b };
Convert an escape to a 32-bit code point value. We attempt to parallel the icu4c unescapeAt() function.
Params:
  • offset16 – an array containing offset to the character after the backslash. Upon return offset16[0] will be updated to point after the escape sequence.
Returns:character value from 0 to 10FFFF, or -1 on error.
/** * Convert an escape to a 32-bit code point value. We attempt * to parallel the icu4c unescapeAt() function. * @param offset16 an array containing offset to the character * <em>after</em> the backslash. Upon return offset16[0] will * be updated to point after the escape sequence. * @return character value from 0 to 10FFFF, or -1 on error. */
public static int unescapeAt(String s, int[] offset16) { int c; int result = 0; int n = 0; int minDig = 0; int maxDig = 0; int bitsPerDigit = 4; int dig; int i; boolean braces = false; /* Check that offset is in range */ int offset = offset16[0]; int length = s.length(); if (offset < 0 || offset >= length) { return -1; } /* Fetch first UChar after '\\' */ c = Character.codePointAt(s, offset); offset += UTF16.getCharCount(c); /* Convert hexadecimal and octal escapes */ switch (c) { case 'u': minDig = maxDig = 4; break; case 'U': minDig = maxDig = 8; break; case 'x': minDig = 1; if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) { ++offset; braces = true; maxDig = 8; } else { maxDig = 2; } break; default: dig = UCharacter.digit(c, 8); if (dig >= 0) { minDig = 1; maxDig = 3; n = 1; /* Already have first octal digit */ bitsPerDigit = 3; result = dig; } break; } if (minDig != 0) { while (offset < length && n < maxDig) { c = UTF16.charAt(s, offset); dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16); if (dig < 0) { break; } result = (result << bitsPerDigit) | dig; offset += UTF16.getCharCount(c); ++n; } if (n < minDig) { return -1; } if (braces) { if (c != 0x7D /*}*/) { return -1; } ++offset; } if (result < 0 || result >= 0x110000) { return -1; } // If an escape sequence specifies a lead surrogate, see // if there is a trail surrogate after it, either as an // escape or as a literal. If so, join them up into a // supplementary. if (offset < length && UTF16.isLeadSurrogate((char) result)) { int ahead = offset+1; c = s.charAt(offset); // [sic] get 16-bit code unit if (c == '\\' && ahead < length) { int o[] = new int[] { ahead }; c = unescapeAt(s, o); ahead = o[0]; } if (UTF16.isTrailSurrogate((char) c)) { offset = ahead; result = UCharacterProperty.getRawSupplementary( (char) result, (char) c); } } offset16[0] = offset; return result; } /* Convert C-style escapes in table */ for (i=0; i<UNESCAPE_MAP.length; i+=2) { if (c == UNESCAPE_MAP[i]) { offset16[0] = offset; return UNESCAPE_MAP[i+1]; } else if (c < UNESCAPE_MAP[i]) { break; } } /* Map \cX to control-X: X & 0x1F */ if (c == 'c' && offset < length) { c = UTF16.charAt(s, offset); offset16[0] = offset + UTF16.getCharCount(c); return 0x1F & c; } /* If no special forms are recognized, then consider * the backslash to generically escape the next character. */ offset16[0] = offset; return c; }
Supplies a zero-padded hex representation of an integer (without 0x)
/** * Supplies a zero-padded hex representation of an integer (without 0x) */
public static String hex(long i, int places) { if (i == Long.MIN_VALUE) return "-8000000000000000"; boolean negative = i < 0; if (negative) { i = -i; } String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH); if (result.length() < places) { result = "0000000000000000".substring(result.length(),places) + result; } if (negative) { return '-' + result; } return result; } static final char DIGITS[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' };
Return true if the character is NOT printable ASCII. The tab, newline and linefeed characters are considered unprintable.
/** * Return true if the character is NOT printable ASCII. The tab, * newline and linefeed characters are considered unprintable. */
public static boolean isUnprintable(int c) { //0x20 = 32 and 0x7E = 126 return !(c >= 0x20 && c <= 0x7E); }
Escape unprintable characters using uxxxx notation for U+0000 to U+FFFF and Uxxxxxxxx for U+10000 and above. If the character is printable ASCII, then do nothing and return FALSE. Otherwise, append the escaped notation and return TRUE.
/** * Escape unprintable characters using <backslash>uxxxx notation * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and * above. If the character is printable ASCII, then do nothing * and return FALSE. Otherwise, append the escaped notation and * return TRUE. */
public static <T extends Appendable> boolean escapeUnprintable(T result, int c) { try { if (isUnprintable(c)) { result.append('\\'); if ((c & ~0xFFFF) != 0) { result.append('U'); result.append(DIGITS[0xF&(c>>28)]); result.append(DIGITS[0xF&(c>>24)]); result.append(DIGITS[0xF&(c>>20)]); result.append(DIGITS[0xF&(c>>16)]); } else { result.append('u'); } result.append(DIGITS[0xF&(c>>12)]); result.append(DIGITS[0xF&(c>>8)]); result.append(DIGITS[0xF&(c>>4)]); result.append(DIGITS[0xF&c]); return true; } return false; } catch (IOException e) { throw new IllegalArgumentException(e); } } }