/*
 * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package jdk.internal.util.xml.impl;

import java.io.Reader;
import java.io.InputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;

UTF-8 transformed UCS-2 character stream reader. This reader converts UTF-8 transformed UCS-2 characters to Java characters. The UCS-2 subset of UTF-8 transformation is described in RFC-2279 #2 "UTF-8 definition": 0000 0000-0000 007F 0xxxxxxx 0000 0080-0000 07FF 110xxxxx 10xxxxxx 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx This reader will return incorrect last character on broken UTF-8 stream.
/** * UTF-8 transformed UCS-2 character stream reader. * * This reader converts UTF-8 transformed UCS-2 characters to Java characters. * The UCS-2 subset of UTF-8 transformation is described in RFC-2279 #2 * "UTF-8 definition": * 0000 0000-0000 007F 0xxxxxxx * 0000 0080-0000 07FF 110xxxxx 10xxxxxx * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx * * This reader will return incorrect last character on broken UTF-8 stream. */
public class ReaderUTF8 extends Reader { private InputStream is;
Constructor.
Params:
  • is – A byte input stream.
/** * Constructor. * * @param is A byte input stream. */
public ReaderUTF8(InputStream is) { this.is = is; }
Reads characters into a portion of an array.
Params:
  • cbuf – Destination buffer.
  • off – Offset at which to start storing characters.
  • len – Maximum number of characters to read.
Throws:
/** * Reads characters into a portion of an array. * * @param cbuf Destination buffer. * @param off Offset at which to start storing characters. * @param len Maximum number of characters to read. * @exception IOException If any IO errors occur. * @exception UnsupportedEncodingException If UCS-4 character occur in the stream. */
public int read(char[] cbuf, int off, int len) throws IOException { int num = 0; int val; while (num < len) { if ((val = is.read()) < 0) { return (num != 0) ? num : -1; } switch (val & 0xf0) { case 0xc0: case 0xd0: cbuf[off++] = (char) (((val & 0x1f) << 6) | (is.read() & 0x3f)); break; case 0xe0: cbuf[off++] = (char) (((val & 0x0f) << 12) | ((is.read() & 0x3f) << 6) | (is.read() & 0x3f)); break; case 0xf0: // UCS-4 character throw new UnsupportedEncodingException("UTF-32 (or UCS-4) encoding not supported."); default: cbuf[off++] = (char) val; break; } num++; } return num; }
Reads a single character.
Throws:
Returns:The character read, as an integer in the range 0 to 65535 (0x00-0xffff), or -1 if the end of the stream has been reached.
/** * Reads a single character. * * @return The character read, as an integer in the range 0 to 65535 * (0x00-0xffff), or -1 if the end of the stream has been reached. * @exception IOException If any IO errors occur. * @exception UnsupportedEncodingException If UCS-4 character occur in the stream. */
public int read() throws IOException { int val; if ((val = is.read()) < 0) { return -1; } switch (val & 0xf0) { case 0xc0: case 0xd0: val = ((val & 0x1f) << 6) | (is.read() & 0x3f); break; case 0xe0: val = ((val & 0x0f) << 12) | ((is.read() & 0x3f) << 6) | (is.read() & 0x3f); break; case 0xf0: // UCS-4 character throw new UnsupportedEncodingException(); default: break; } return val; }
Closes the stream.
Throws:
  • IOException – If any IO errors occur.
/** * Closes the stream. * * @exception IOException If any IO errors occur. */
public void close() throws IOException { is.close(); } }