java/8 : sun/tools/native2ascii/A2NFilter.java

A2NFilter
https://openjdk.java.net/
GPLv2 + Classpath Exception
/*
 * Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

This FilterReader class processes a sequence of characters from
a source stream containing a mixture of 7-bit ASCII data and
'back-tick U' escaped sequences representing characters which have
the possibility of being encoded in a user specified encoding
The filter relies on knowing the target encoding and makes a
determination as to whether a given supplied character in its
source character stream is encodeable in the target encoding.
If not, it is remains in its back-tick U escaped form.
/**
 * This FilterReader class processes a sequence of characters from
 * a source stream containing a mixture of 7-bit ASCII data and
 * 'back-tick U' escaped sequences representing characters which have
 * the possibility of being encoded in a user specified encoding
 * The filter relies on knowing the target encoding and makes a
 * determination as to whether a given supplied character in its
 * source character stream is encodeable in the target encoding.
 * If not, it is remains in its back-tick U escaped form.
 */

package sun.tools.native2ascii;
import java.io.*;


class A2NFilter extends FilterReader {

    // maintain a trailing buffer to hold any incompleted
    // unicode escaped sequences
    private char[] trailChars = null;

    public A2NFilter(Reader in) {
        super(in);
    }

    public int read(char[] buf, int off, int len) throws IOException {
        int numChars = 0;        // how many characters have been read
        int retChars = 0;        // how many characters we'll return

        char[] cBuf = new char[len];
        int cOffset = 0;         // offset at which we'll start reading
        boolean eof = false;

        // copy trailing chars from previous invocation to input buffer
        if (trailChars != null) {
            for (int i = 0; i < trailChars.length; i++)
                cBuf[i] = trailChars[i];
            numChars = trailChars.length;
            trailChars = null;
        }

        int n = in.read(cBuf, numChars, len - numChars);
        if (n < 0) {
            eof = true;
            if (numChars == 0)
                return -1;              // EOF;
        } else {
            numChars += n;
        }

        for (int i = 0; i < numChars;) {
            char c = cBuf[i++];

            if (c != '\\' || (eof && numChars <= 5)) {
                // Not a backslash, so copy and continue
                // Always pass non backslash chars straight thru
                // for regular encoding. If backslash occurs in
                // input stream at the final 5 chars then don't
                // attempt to read-ahead and de-escape since these
                // are literal occurrences of U+005C which need to
                // be encoded verbatim in the target encoding.
                buf[retChars++] = c;
                continue;
            }

            int remaining = numChars - i;
            if (remaining < 5) {
                // Might be the first character of a unicode escape, but we
                // don't have enough characters to tell, so save it and finish
                trailChars = new char[1 + remaining];
                trailChars[0] = c;
                for (int j = 0; j < remaining; j++)
                    trailChars[1 + j] = cBuf[i + j];
                break;
            }
            // At this point we have at least five characters remaining

            c = cBuf[i++];
            if (c != 'u') {
                // Not a unicode escape, so copy and continue
                buf[retChars++] = '\\';
                buf[retChars++] = c;
                continue;
            }

            // The next four characters are the hex part of a unicode escape
            char rc = 0;
            boolean isUE = true;
            try {
                rc = (char)Integer.parseInt(new String(cBuf, i, 4), 16);
            } catch (NumberFormatException x) {
                isUE = false;
            }
            if (isUE && Main.canConvert(rc)) {
                // We'll be able to convert this
                buf[retChars++] = rc;
                i += 4; // Align beyond the current uXXXX sequence
            } else {
                // We won't, so just retain the original sequence
                buf[retChars++] = '\\';
                buf[retChars++] = 'u';
                continue;
            }

        }

        return retChars;
    }

    public int read() throws IOException {
        char[] buf = new char[1];

        if (read(buf, 0, 1) == -1)
            return -1;
        else
            return (int)buf[0];
    }

}
/

java/ 8/ sun/tools/native2ascii/A2NFilter.java