/* Woodstox Lite ("wool") XML processor
*
* Copyright (c) 2006- Tatu Saloranta, tatu.saloranta@iki.fi
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.fasterxml.aalto.out;
import java.io.*;
import javax.xml.stream.*;
import com.fasterxml.aalto.impl.IoStreamException;
import com.fasterxml.aalto.util.XmlConsts;
This is the generic implementation of XmlWriter
, used if the destination is byte-based OutputStream
, and encoding is UTF-8. /**
* This is the generic implementation of {@link XmlWriter}, used if
* the destination is byte-based {@link java.io.OutputStream}, and
* encoding is UTF-8.
*/
public final class Utf8XmlWriter
extends ByteXmlWriter
{
public Utf8XmlWriter(WriterConfig cfg, OutputStream out)
{
super(cfg, out, OutputCharTypes.getUtf8CharTypes());
}
/*
/**********************************************************************
/* Abstract method implementations
/**********************************************************************
*/
@Override
public int getHighestEncodable() {
return XmlConsts.MAX_UNICODE_CHAR;
}
@Override
public void writeRaw(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException
{
if (_out == null || len == 0) {
return;
}
if (_surrogate != 0) {
outputSurrogates(_surrogate, cbuf[offset]);
++offset;
--len;
}
len += offset; // now marks the end
// !!! TODO: combine input+output length checks into just one
main_loop:
while (offset < len) {
inner_loop:
while (true) {
int ch = (int) cbuf[offset];
if (ch >= 0x80) {
break inner_loop;
}
// !!! TODO: fast writes
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = (byte) ch;
if (++offset >= len) {
break main_loop;
}
}
char ch = cbuf[offset++];
if (ch < 0x800) { // 2-byte?
output2ByteChar(ch);
continue;
}
offset = outputMultiByteChar(ch, cbuf, offset, len);
}
}
@Override
protected WName doConstructName(String localName)
throws XMLStreamException
{
// !!! TODO: optimize:
try {
byte[] b = localName.getBytes("UTF-8");
return new ByteWName(localName, b);
} catch (IOException ioe) {
throw new IoStreamException(ioe);
}
}
@Override
protected WName doConstructName(String prefix, String localName)
throws XMLStreamException
{
// !!! TODO: optimize:
try {
byte[] b = (prefix+":"+localName).getBytes("UTF-8");
return new ByteWName(prefix, localName, b);
} catch (IOException ioe) {
throw new IoStreamException(ioe);
}
}
/*
/**********************************************************************
/* Internal methods, low-level write
/**********************************************************************
*/
@Override
protected final void outputSurrogates(int surr1, int surr2)
throws IOException, XMLStreamException
{
int c = calcSurrogate(surr1, surr2, " in content");
if ((_outputPtr + 4) > _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = (byte) (0xf0 | (c >> 18));
_outputBuffer[_outputPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
_outputBuffer[_outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
_outputBuffer[_outputPtr++] = (byte) (0x80 | (c & 0x3f));
}
@Override
final protected void output2ByteChar(int ch)
throws IOException, XMLStreamException
{
if ((_outputPtr + 2) > _outputBufferLen) {
flushBuffer();
}
byte[] bbuf = _outputBuffer;
bbuf[_outputPtr++] = (byte) (0xc0 | (ch >> 6));
bbuf[_outputPtr++] = (byte) (0x80 | (ch & 0x3f));
}
Method called to output a character that is beyond range of
1- and 2-byte UTF-8 encodings. This means it's either invalid
character, or needs to be encoded using 3- or 4-byte encoding.
Params: - inputOffset – Input pointer after character has been handled;
either same as one passed in, or one more if a surrogate character
was succesfully handled
/**
* Method called to output a character that is beyond range of
* 1- and 2-byte UTF-8 encodings. This means it's either invalid
* character, or needs to be encoded using 3- or 4-byte encoding.
*
* @param inputOffset Input pointer after character has been handled;
* either same as one passed in, or one more if a surrogate character
* was succesfully handled
*/
@Override
final protected int outputMultiByteChar(int ch, char[] cbuf, int inputOffset, int inputLen)
throws IOException, XMLStreamException
{
if (ch >= SURR1_FIRST) {
if (ch <= SURR2_LAST) { // yes, outside of BMP
// Do we have second part?
if (inputOffset >= inputLen) { // nope... have to note down
_surrogate = ch;
} else {
outputSurrogates(ch, cbuf[inputOffset]);
++inputOffset;
}
return inputOffset;
}
// Nope... but may be invalid
if (ch >= 0xFFFE) { // 0xFFFE, 0xFFFF are invalid
reportInvalidChar(ch);
}
}
if ((_outputPtr + 3) > _outputBufferLen) {
flushBuffer();
}
byte[] bbuf = _outputBuffer;
bbuf[_outputPtr++] = (byte) (0xe0 | (ch >> 12));
bbuf[_outputPtr++] = (byte) (0x80 | ((ch >> 6) & 0x3f));
bbuf[_outputPtr++] = (byte) (0x80 | (ch & 0x3f));
return inputOffset;
}
@Override
final protected int outputStrictMultiByteChar(int ch, char[] cbuf, int inputOffset, int inputLen)
throws IOException, XMLStreamException
{
if (ch >= SURR1_FIRST) {
if (ch <= SURR2_LAST) { // yes, outside of BMP
// Do we have second part?
if (inputOffset >= inputLen) { // nope... have to note down
_surrogate = ch;
} else {
outputSurrogates(ch, cbuf[inputOffset]);
++inputOffset;
}
return inputOffset;
}
// Nope... but may be invalid
if (ch >= 0xFFFE) { // 0xFFFE, 0xFFFF are invalid
reportInvalidChar(ch);
}
}
if ((_outputPtr + 3) > _outputBufferLen) {
flushBuffer();
}
byte[] bbuf = _outputBuffer;
bbuf[_outputPtr++] = (byte) (0xe0 | (ch >> 12));
bbuf[_outputPtr++] = (byte) (0x80 | ((ch >> 6) & 0x3f));
bbuf[_outputPtr++] = (byte) (0x80 | (ch & 0x3f));
return inputOffset;
}
}