/*
* Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.xml.internal.bind.v2.runtime.output;
import java.io.IOException;
Buffer for UTF-8 encoded string.
See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 for the UTF-8 encoding.
Author: Kohsuke Kawaguchi
/**
* Buffer for UTF-8 encoded string.
*
* See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 for the UTF-8 encoding.
*
* @author Kohsuke Kawaguchi
*/
public final class Encoded {
public byte[] buf;
public int len;
public Encoded() {}
public Encoded(String text) {
set(text);
}
public void ensureSize(int size) {
if(buf==null || buf.length<size)
buf = new byte[size];
}
public final void set( String text ) {
int length = text.length();
ensureSize(length*3+1); // +1 for append
int ptr = 0;
for (int i = 0; i < length; i++) {
final char chr = text.charAt(i);
if (chr > 0x7F) {
if (chr > 0x7FF) {
if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) {
// surrogate
int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000;
buf[ptr++] = (byte)(0xF0 | ((uc >> 18)));
buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F));
buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F));
buf[ptr++] = (byte)(0x80 + (uc & 0x3F));
continue;
}
buf[ptr++] = (byte)(0xE0 + (chr >> 12));
buf[ptr++] = (byte)(0x80 + ((chr >> 6) & 0x3F));
} else {
buf[ptr++] = (byte)(0xC0 + (chr >> 6));
}
buf[ptr++] = (byte)(0x80 + (chr & 0x3F));
} else {
buf[ptr++] = (byte)chr;
}
}
len = ptr;
}
Fill in the buffer by encoding the specified characters
while escaping characters like <
Params: - isAttribute –
if true, characters like \t, \r, and \n are also escaped.
/**
* Fill in the buffer by encoding the specified characters
* while escaping characters like <
*
* @param isAttribute
* if true, characters like \t, \r, and \n are also escaped.
*/
public final void setEscape(String text, boolean isAttribute) {
int length = text.length();
ensureSize(length*6+1); // in the worst case the text is like """""", so we need 6 bytes per char
int ptr = 0;
for (int i = 0; i < length; i++) {
final char chr = text.charAt(i);
int ptr1 = ptr;
if (chr > 0x7F) {
if (chr > 0x7FF) {
if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) {
// surrogate
int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000;
buf[ptr++] = (byte)(0xF0 | ((uc >> 18)));
buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F));
buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F));
buf[ptr++] = (byte)(0x80 + (uc & 0x3F));
continue;
}
buf[ptr1++] = (byte)(0xE0 + (chr >> 12));
buf[ptr1++] = (byte)(0x80 + ((chr >> 6) & 0x3F));
} else {
buf[ptr1++] = (byte)(0xC0 + (chr >> 6));
}
buf[ptr1++] = (byte)(0x80 + (chr & 0x3F));
} else {
byte[] ent;
if((ent=attributeEntities[chr])!=null) {
// the majority of the case is just printed as a char,
// so it's very important to reject them as quickly as possible
// check again to see if this really needs to be escaped
if(isAttribute || entities[chr]!=null)
ptr1 = writeEntity(ent,ptr1);
else
buf[ptr1++] = (byte)chr;
} else
buf[ptr1++] = (byte)chr;
}
ptr = ptr1;
}
len = ptr;
}
private int writeEntity( byte[] entity, int ptr ) {
System.arraycopy(entity,0,buf,ptr,entity.length);
return ptr+entity.length;
}
Writes the encoded bytes to the given output stream.
/**
* Writes the encoded bytes to the given output stream.
*/
public final void write(UTF8XmlOutput out) throws IOException {
out.write(buf,0,len);
}
Appends a new character to the end of the buffer.
This assumes that you have enough space in the buffer.
/**
* Appends a new character to the end of the buffer.
* This assumes that you have enough space in the buffer.
*/
public void append(char b) {
buf[len++] = (byte)b;
}
Reallocate the buffer to the exact size of the data
to reduce the memory footprint.
/**
* Reallocate the buffer to the exact size of the data
* to reduce the memory footprint.
*/
public void compact() {
byte[] b = new byte[len];
System.arraycopy(buf,0,b,0,len);
buf = b;
}
UTF-8 encoded entities keyed by their character code.
e.g., entities['&'] == AMP_ENTITY.
In attributes we need to encode more characters.
/**
* UTF-8 encoded entities keyed by their character code.
* e.g., entities['&'] == AMP_ENTITY.
*
* In attributes we need to encode more characters.
*/
private static final byte[][] entities = new byte[0x80][];
private static final byte[][] attributeEntities = new byte[0x80][];
static {
add('&',"&",false);
add('<',"<",false);
add('>',">",false);
add('"',""",true);
add('\t',"	",true);
add('\r',"
",false);
add('\n',"
",true);
}
private static void add(char c, String s, boolean attOnly) {
byte[] image = UTF8XmlOutput.toBytes(s);
attributeEntities[c] = image;
if(!attOnly)
entities[c] = image;
}
}