/*
* Copyright (c) 2006, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.stream.writers;
import java.io.Writer;
import java.io.OutputStream;
import java.io.IOException;
import com.sun.org.apache.xerces.internal.util.XMLChar;
This class is used to write a stream of chars as a stream of
bytes using the UTF8 encoding. It assumes that the underlying
output stream is buffered or does not need additional buffering.
It is more efficient than using a java.io.OutputStreamWriter
because it does not need to be wrapped in a
java.io.BufferedWriter
. Creating multiple instances
of java.io.BufferedWriter
has been shown to be very
expensive in JAX-WS.
Author: Santiago PericasGeertsen
/**
* <p>This class is used to write a stream of chars as a stream of
* bytes using the UTF8 encoding. It assumes that the underlying
* output stream is buffered or does not need additional buffering.</p>
*
* <p>It is more efficient than using a <code>java.io.OutputStreamWriter</code>
* because it does not need to be wrapped in a
* <code>java.io.BufferedWriter</code>. Creating multiple instances
* of <code>java.io.BufferedWriter</code> has been shown to be very
* expensive in JAX-WS.</p>
*
* @author Santiago PericasGeertsen
*/
public final class UTF8OutputStreamWriter extends Writer {
Undelying output stream. This class assumes that this
output stream does not need buffering.
/**
* Undelying output stream. This class assumes that this
* output stream does not need buffering.
*/
OutputStream out;
Java represents chars that are not in the Basic Multilingual
Plane (BMP) in UTF-16. This int stores the first code unit
for a code point encoded in two UTF-16 code units.
/**
* Java represents chars that are not in the Basic Multilingual
* Plane (BMP) in UTF-16. This int stores the first code unit
* for a code point encoded in two UTF-16 code units.
*/
int lastUTF16CodePoint = 0;
public UTF8OutputStreamWriter(OutputStream out) {
this.out = out;
}
public String getEncoding() {
return "UTF-8";
}
public void write(int c) throws IOException {
// Check in we are encoding at high and low surrogates
if (lastUTF16CodePoint != 0) {
final int uc =
(((lastUTF16CodePoint & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000;
if (uc < 0 || uc >= 0x200000) {
throw new IOException("Atttempting to write invalid Unicode code point '" + uc + "'");
}
out.write(0xF0 | (uc >> 18));
out.write(0x80 | ((uc >> 12) & 0x3F));
out.write(0x80 | ((uc >> 6) & 0x3F));
out.write(0x80 | (uc & 0x3F));
lastUTF16CodePoint = 0;
return;
}
// Otherwise, encode char as defined in UTF-8
if (c < 0x80) {
// 1 byte, 7 bits
out.write(c);
}
else if (c < 0x800) {
// 2 bytes, 11 bits
out.write(0xC0 | (c >> 6)); // first 5
out.write(0x80 | (c & 0x3F)); // second 6
}
else if (c <= '\uFFFF') {
if (!XMLChar.isHighSurrogate(c) && !XMLChar.isLowSurrogate(c)) {
// 3 bytes, 16 bits
out.write(0xE0 | (c >> 12)); // first 4
out.write(0x80 | ((c >> 6) & 0x3F)); // second 6
out.write(0x80 | (c & 0x3F)); // third 6
}
else {
lastUTF16CodePoint = c;
}
}
}
public void write(char cbuf[]) throws IOException {
for (int i = 0; i < cbuf.length; i++) {
write(cbuf[i]);
}
}
public void write(char cbuf[], int off, int len) throws IOException {
for (int i = 0; i < len; i++) {
write(cbuf[off + i]);
}
}
public void write(String str) throws IOException {
final int len = str.length();
for (int i = 0; i < len; i++) {
write(str.charAt(i));
}
}
public void write(String str, int off, int len) throws IOException {
for (int i = 0; i < len; i++) {
write(str.charAt(off + i));
}
}
public void flush() throws IOException {
out.flush();
}
public void close() throws IOException {
if (lastUTF16CodePoint != 0) {
throw new IllegalStateException("Attempting to close a UTF8OutputStreamWriter"
+ " while awaiting for a UTF-16 code unit");
}
out.close();
}
}