/*
* Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sun.org.apache.xml.internal.serializer;
import java.io.IOException;
import java.util.Properties;
import javax.xml.transform.Result;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
import com.sun.org.apache.xml.internal.serializer.utils.Utils;
This serializer takes a series of SAX or
SAX-like events and writes its output
to the given stream.
This class is not a public API, it is public
because it is used from another package.
@xsl.usage internal
/**
* This serializer takes a series of SAX or
* SAX-like events and writes its output
* to the given stream.
*
* This class is not a public API, it is public
* because it is used from another package.
*
* @xsl.usage internal
*/
public final class ToHTMLStream extends ToStream
{
This flag is set while receiving events from the DTD /** This flag is set while receiving events from the DTD */
protected boolean m_inDTD = false;
True if the previous element is a block element. /** True if the previous element is a block element. */
private boolean m_isprevblock = false;
Map that tells which XML characters should have special treatment, and it
provides character to entity name lookup.
/**
* Map that tells which XML characters should have special treatment, and it
* provides character to entity name lookup.
*/
private static final CharInfo m_htmlcharInfo =
// new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
A digital search trie for fast, case insensitive lookup of ElemDesc objects. /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
static final Trie m_elementFlags = new Trie();
static {
initTagReference(m_elementFlags);
}
static void initTagReference(Trie m_elementFlags) {
// HTML 4.0 loose DTD
m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
m_elementFlags.put(
"FRAME",
new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put(
"ISINDEX",
new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
m_elementFlags.put(
"APPLET",
new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
// HTML 4.0 strict DTD
m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
m_elementFlags.put(
"SUP",
new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
m_elementFlags.put(
"SUB",
new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
m_elementFlags.put(
"SPAN",
new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
m_elementFlags.put(
"BDO",
new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
m_elementFlags.put(
"BR",
new ElemDesc(
0
| ElemDesc.SPECIAL
| ElemDesc.ASPECIAL
| ElemDesc.EMPTY
| ElemDesc.BLOCK));
m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put(
"ADDRESS",
new ElemDesc(
0
| ElemDesc.BLOCK
| ElemDesc.BLOCKFORM
| ElemDesc.BLOCKFORMFIELDSET));
m_elementFlags.put(
"DIV",
new ElemDesc(
0
| ElemDesc.BLOCK
| ElemDesc.BLOCKFORM
| ElemDesc.BLOCKFORMFIELDSET));
m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
m_elementFlags.put(
"MAP",
new ElemDesc(
0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
m_elementFlags.put(
"AREA",
new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
m_elementFlags.put(
"LINK",
new ElemDesc(
0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
m_elementFlags.put(
"IMG",
new ElemDesc(
0
| ElemDesc.SPECIAL
| ElemDesc.ASPECIAL
| ElemDesc.EMPTY
| ElemDesc.WHITESPACESENSITIVE));
m_elementFlags.put(
"OBJECT",
new ElemDesc(
0
| ElemDesc.SPECIAL
| ElemDesc.ASPECIAL
| ElemDesc.HEADMISC
| ElemDesc.WHITESPACESENSITIVE));
m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
m_elementFlags.put(
"HR",
new ElemDesc(
0
| ElemDesc.BLOCK
| ElemDesc.BLOCKFORM
| ElemDesc.BLOCKFORMFIELDSET
| ElemDesc.EMPTY));
m_elementFlags.put(
"P",
new ElemDesc(
0
| ElemDesc.BLOCK
| ElemDesc.BLOCKFORM
| ElemDesc.BLOCKFORMFIELDSET));
m_elementFlags.put(
"H1",
new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
m_elementFlags.put(
"H2",
new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
m_elementFlags.put(
"H3",
new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
m_elementFlags.put(
"H4",
new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
m_elementFlags.put(
"H5",
new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
m_elementFlags.put(
"H6",
new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
m_elementFlags.put(
"PRE",
new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
m_elementFlags.put(
"Q",
new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
m_elementFlags.put(
"BLOCKQUOTE",
new ElemDesc(
0
| ElemDesc.BLOCK
| ElemDesc.BLOCKFORM
| ElemDesc.BLOCKFORMFIELDSET));
m_elementFlags.put("INS", new ElemDesc(0));
m_elementFlags.put("DEL", new ElemDesc(0));
m_elementFlags.put(
"DL",
new ElemDesc(
0
| ElemDesc.BLOCK
| ElemDesc.BLOCKFORM
| ElemDesc.BLOCKFORMFIELDSET));
m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put(
"OL",
new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
m_elementFlags.put(
"UL",
new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
m_elementFlags.put(
"INPUT",
new ElemDesc(
0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
m_elementFlags.put(
"SELECT",
new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
m_elementFlags.put("OPTGROUP", new ElemDesc(0));
m_elementFlags.put("OPTION", new ElemDesc(0));
m_elementFlags.put(
"TEXTAREA",
new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
m_elementFlags.put(
"FIELDSET",
new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
m_elementFlags.put("LEGEND", new ElemDesc(0));
m_elementFlags.put(
"BUTTON",
new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
m_elementFlags.put(
"TABLE",
new ElemDesc(
0
| ElemDesc.BLOCK
| ElemDesc.BLOCKFORM
| ElemDesc.BLOCKFORMFIELDSET));
m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put(
"COL",
new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put("TH", new ElemDesc(0));
m_elementFlags.put("TD", new ElemDesc(0));
m_elementFlags.put(
"HEAD",
new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
m_elementFlags.put(
"BASE",
new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
m_elementFlags.put(
"META",
new ElemDesc(
0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
m_elementFlags.put(
"STYLE",
new ElemDesc(
0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
m_elementFlags.put(
"SCRIPT",
new ElemDesc(
0
| ElemDesc.SPECIAL
| ElemDesc.ASPECIAL
| ElemDesc.HEADMISC
| ElemDesc.RAW));
m_elementFlags.put(
"NOSCRIPT",
new ElemDesc(
0
| ElemDesc.BLOCK
| ElemDesc.BLOCKFORM
| ElemDesc.BLOCKFORMFIELDSET));
m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK));
// From "John Ky" <hand@syd.speednet.com.au
// Transitional Document Type Definition ()
// file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
// file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
// file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
// From "John Ky" <hand@syd.speednet.com.au
m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
// HTML 4.0, section 16.5
m_elementFlags.put(
"IFRAME",
new ElemDesc(
0
| ElemDesc.BLOCK
| ElemDesc.BLOCKFORM
| ElemDesc.BLOCKFORMFIELDSET));
// Netscape 4 extension
m_elementFlags.put(
"LAYER",
new ElemDesc(
0
| ElemDesc.BLOCK
| ElemDesc.BLOCKFORM
| ElemDesc.BLOCKFORMFIELDSET));
// Netscape 4 extension
m_elementFlags.put(
"ILAYER",
new ElemDesc(
0
| ElemDesc.BLOCK
| ElemDesc.BLOCKFORM
| ElemDesc.BLOCKFORMFIELDSET));
// NOW FOR ATTRIBUTE INFORMATION . . .
ElemDesc elemDesc;
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("a");
elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("area");
elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("base");
elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("button");
elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("blockquote");
elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("del");
elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("dir");
elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("div");
elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("dl");
elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("form");
elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
// ----------------------------------------------
// Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
elemDesc = (ElemDesc) m_elementFlags.get("frame");
elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("head");
elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("hr");
elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
// ----------------------------------------------
// HTML 4.0, section 16.5
elemDesc = (ElemDesc) m_elementFlags.get("iframe");
elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
// ----------------------------------------------
// Netscape 4 extension
elemDesc = (ElemDesc) m_elementFlags.get("ilayer");
elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("img");
elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("input");
elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("ins");
elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
// ----------------------------------------------
// Netscape 4 extension
elemDesc = (ElemDesc) m_elementFlags.get("layer");
elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("link");
elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("menu");
elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("object");
elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("ol");
elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("optgroup");
elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("option");
elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("q");
elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("script");
elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("select");
elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("table");
elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("td");
elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("textarea");
elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("th");
elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
// ----------------------------------------------
// The nowrap attribute of a tr element is both
// a Netscape and Internet-Explorer extension
elemDesc = (ElemDesc) m_elementFlags.get("tr");
elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
// ----------------------------------------------
elemDesc = (ElemDesc) m_elementFlags.get("ul");
elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
}
Dummy element for elements not found.
/**
* Dummy element for elements not found.
*/
static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
True if URLs should be specially escaped with the %xx form. /** True if URLs should be specially escaped with the %xx form. */
private boolean m_specialEscapeURLs = true;
True if the META tag should be omitted. /** True if the META tag should be omitted. */
private boolean m_omitMetaTag = false;
Tells if the formatter should use special URL escaping.
Params: - bool – True if URLs should be specially escaped with the %xx form.
/**
* Tells if the formatter should use special URL escaping.
*
* @param bool True if URLs should be specially escaped with the %xx form.
*/
public void setSpecialEscapeURLs(boolean bool)
{
m_specialEscapeURLs = bool;
}
Tells if the formatter should omit the META tag.
Params: - bool – True if the META tag should be omitted.
/**
* Tells if the formatter should omit the META tag.
*
* @param bool True if the META tag should be omitted.
*/
public void setOmitMetaTag(boolean bool)
{
m_omitMetaTag = bool;
}
Specifies an output format for this serializer. It the
serializer has already been associated with an output format,
it will switch to the new format. This method should not be
called while the serializer is in the process of serializing
a document.
This method can be called multiple times before starting
the serialization of a particular result-tree. In principle
all serialization parameters can be changed, with the exception
of method="html" (it must be method="html" otherwise we
shouldn't even have a ToHTMLStream object here!)
Params: - format – The output format or serialzation parameters
to use.
/**
* Specifies an output format for this serializer. It the
* serializer has already been associated with an output format,
* it will switch to the new format. This method should not be
* called while the serializer is in the process of serializing
* a document.
*
* This method can be called multiple times before starting
* the serialization of a particular result-tree. In principle
* all serialization parameters can be changed, with the exception
* of method="html" (it must be method="html" otherwise we
* shouldn't even have a ToHTMLStream object here!)
*
* @param format The output format or serialzation parameters
* to use.
*/
public void setOutputFormat(Properties format)
{
m_specialEscapeURLs =
OutputPropertyUtils.getBooleanProperty(
OutputPropertiesFactory.S_USE_URL_ESCAPING,
format);
m_omitMetaTag =
OutputPropertyUtils.getBooleanProperty(
OutputPropertiesFactory.S_OMIT_META_TAG,
format);
super.setOutputFormat(format);
}
Tells if the formatter should use special URL escaping.
Returns: True if URLs should be specially escaped with the %xx form.
/**
* Tells if the formatter should use special URL escaping.
*
* @return True if URLs should be specially escaped with the %xx form.
*/
private final boolean getSpecialEscapeURLs()
{
return m_specialEscapeURLs;
}
Tells if the formatter should omit the META tag.
Returns: True if the META tag should be omitted.
/**
* Tells if the formatter should omit the META tag.
*
* @return True if the META tag should be omitted.
*/
private final boolean getOmitMetaTag()
{
return m_omitMetaTag;
}
Get a description of the given element.
Params: - name – non-null name of element, case insensitive.
Returns: non-null reference to ElemDesc, which may be m_dummy if no
element description matches the given name.
/**
* Get a description of the given element.
*
* @param name non-null name of element, case insensitive.
*
* @return non-null reference to ElemDesc, which may be m_dummy if no
* element description matches the given name.
*/
public static final ElemDesc getElemDesc(String name)
{
/* this method used to return m_dummy when name was null
* but now it doesn't check and and requires non-null name.
*/
Object obj = m_elementFlags.get(name);
if (null != obj)
return (ElemDesc)obj;
return m_dummy;
}
A Trie that is just a copy of the "static" one.
We need this one to be able to use the faster, but not thread-safe
method Trie.get2(name)
/**
* A Trie that is just a copy of the "static" one.
* We need this one to be able to use the faster, but not thread-safe
* method Trie.get2(name)
*/
private Trie m_htmlInfo = new Trie(m_elementFlags);
Calls to this method could be replaced with calls to
getElemDesc(name), but this one should be faster.
/**
* Calls to this method could be replaced with calls to
* getElemDesc(name), but this one should be faster.
*/
private ElemDesc getElemDesc2(String name)
{
Object obj = m_htmlInfo.get2(name);
if (null != obj)
return (ElemDesc)obj;
return m_dummy;
}
Default constructor.
/**
* Default constructor.
*/
public ToHTMLStream()
{
super();
m_charInfo = m_htmlcharInfo;
// initialize namespaces
m_prefixMap = new NamespaceMappings();
}
/** The name of the current element. */
// private String m_currentElementName = null;
Receive notification of the beginning of a document.
Throws: - SAXException – Any SAX exception, possibly
wrapping another exception.
- SAXException –
/**
* Receive notification of the beginning of a document.
*
* @throws org.xml.sax.SAXException Any SAX exception, possibly
* wrapping another exception.
*
* @throws org.xml.sax.SAXException
*/
protected void startDocumentInternal() throws org.xml.sax.SAXException
{
super.startDocumentInternal();
m_needToCallStartDocument = false;
m_needToOutputDocTypeDecl = true;
m_startNewLine = false;
setOmitXMLDeclaration(true);
if (true == m_needToOutputDocTypeDecl)
{
String doctypeSystem = getDoctypeSystem();
String doctypePublic = getDoctypePublic();
if ((null != doctypeSystem) || (null != doctypePublic))
{
final java.io.Writer writer = m_writer;
try
{
writer.write("<!DOCTYPE html");
if (null != doctypePublic)
{
writer.write(" PUBLIC \"");
writer.write(doctypePublic);
writer.write('"');
}
if (null != doctypeSystem)
{
if (null == doctypePublic)
writer.write(" SYSTEM \"");
else
writer.write(" \"");
writer.write(doctypeSystem);
writer.write('"');
}
writer.write('>');
outputLineSep();
}
catch(IOException e)
{
throw new SAXException(e);
}
}
}
m_needToOutputDocTypeDecl = false;
}
Receive notification of the end of a document.
Throws: - SAXException – Any SAX exception, possibly
wrapping another exception.
- SAXException –
/**
* Receive notification of the end of a document.
*
* @throws org.xml.sax.SAXException Any SAX exception, possibly
* wrapping another exception.
*
* @throws org.xml.sax.SAXException
*/
public final void endDocument() throws org.xml.sax.SAXException
{
if (m_doIndent) {
flushCharactersBuffer();
}
flushPending();
if (m_doIndent && !m_isprevtext)
{
try
{
outputLineSep();
}
catch(IOException e)
{
throw new SAXException(e);
}
}
flushWriter();
if (m_tracer != null)
super.fireEndDoc();
}
If the previous is an inline element, won't insert a new line before the
text.
/**
* If the previous is an inline element, won't insert a new line before the
* text.
*
*/
protected boolean shouldIndentForText() {
return super.shouldIndentForText() && m_isprevblock;
}
Only check m_doIndent, disregard m_ispreserveSpace.
Returns: True if the content should be formatted.
/**
* Only check m_doIndent, disregard m_ispreserveSpace.
*
* @return True if the content should be formatted.
*/
protected boolean shouldFormatOutput() {
return m_doIndent;
}
Receive notification of the beginning of an element.
Params: - namespaceURI –
- localName –
- name –
The element type name.
- atts –
The attributes attached to the element, if any.
Throws: - SAXException –
Any SAX exception, possibly wrapping another exception.
See Also:
/**
* Receive notification of the beginning of an element.
*
*
* @param namespaceURI
* @param localName
* @param name
* The element type name.
* @param atts
* The attributes attached to the element, if any.
* @throws org.xml.sax.SAXException
* Any SAX exception, possibly wrapping another exception.
* @see #endElement
* @see org.xml.sax.AttributeList
*/
public void startElement(
String namespaceURI,
String localName,
String name,
Attributes atts)
throws SAXException
{
if (m_doIndent) {
// will add extra one if having namespace but no matter
m_childNodeNum++;
flushCharactersBuffer();
}
ElemContext elemContext = m_elemContext;
// clean up any pending things first
if (elemContext.m_startTagOpen)
{
closeStartTag();
elemContext.m_startTagOpen = false;
}
else if (m_cdataTagOpen)
{
closeCDATA();
m_cdataTagOpen = false;
}
else if (m_needToCallStartDocument)
{
startDocumentInternal();
m_needToCallStartDocument = false;
}
// if this element has a namespace then treat it like XML
if (null != namespaceURI && namespaceURI.length() > 0)
{
super.startElement(namespaceURI, localName, name, atts);
return;
}
try
{
// getElemDesc2(name) is faster than getElemDesc(name)
ElemDesc elemDesc = getElemDesc2(name);
int elemFlags = elemDesc.getFlags();
// deal with indentation issues first
if (m_doIndent)
{
boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
if ((elemContext.m_elementName != null)
// If this element is a block element,
// or if this is not a block element, then if the
// previous is neither a text nor an inline
&& (isBlockElement || (!(m_isprevtext || !m_isprevblock))))
{
m_startNewLine = true;
indent();
}
m_isprevblock = isBlockElement;
}
// save any attributes for later processing
if (atts != null)
addAttributes(atts);
m_isprevtext = false;
final java.io.Writer writer = m_writer;
writer.write('<');
writer.write(name);
if (m_doIndent) {
m_childNodeNumStack.add(m_childNodeNum);
m_childNodeNum = 0;
}
if (m_tracer != null)
firePseudoAttributes();
if ((elemFlags & ElemDesc.EMPTY) != 0)
{
// an optimization for elements which are expected
// to be empty.
m_elemContext = elemContext.push();
/* XSLTC sometimes calls namespaceAfterStartElement()
* so we need to remember the name
*/
m_elemContext.m_elementName = name;
m_elemContext.m_elementDesc = elemDesc;
return;
}
else
{
elemContext = elemContext.push(namespaceURI,localName,name);
m_elemContext = elemContext;
elemContext.m_elementDesc = elemDesc;
elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
// set m_startNewLine for the next element
if (m_doIndent) {
// elemFlags is equivalent to m_elemContext.m_elementDesc.getFlags(),
// in this branch m_elemContext.m_elementName is not null
boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
if (isBlockElement)
m_startNewLine = true;
}
}
if ((elemFlags & ElemDesc.HEADELEM) != 0)
{
// This is the <HEAD> element, do some special processing
closeStartTag();
elemContext.m_startTagOpen = false;
if (!m_omitMetaTag)
{
if (m_doIndent)
indent();
writer.write(
"<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
String encoding = getEncoding();
String encode = Encodings.getMimeEncoding(encoding);
writer.write(encode);
writer.write("\">");
}
}
}
catch (IOException e)
{
throw new SAXException(e);
}
}
Receive notification of the end of an element.
@param namespaceURI
@param localName
@param name The element type name
@throws org.xml.sax.SAXException Any SAX exception, possibly
wrapping another exception.
/**
* Receive notification of the end of an element.
*
*
* @param namespaceURI
* @param localName
* @param name The element type name
* @throws org.xml.sax.SAXException Any SAX exception, possibly
* wrapping another exception.
*/
public final void endElement(
final String namespaceURI,
final String localName,
final String name)
throws org.xml.sax.SAXException
{
if (m_doIndent) {
flushCharactersBuffer();
}
// deal with any pending issues
if (m_cdataTagOpen)
closeCDATA();
// if the element has a namespace, treat it like XML, not HTML
if (null != namespaceURI && namespaceURI.length() > 0)
{
super.endElement(namespaceURI, localName, name);
return;
}
try
{
ElemContext elemContext = m_elemContext;
final ElemDesc elemDesc = elemContext.m_elementDesc;
final int elemFlags = elemDesc.getFlags();
final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
// deal with any indentation issues
if (m_doIndent)
{
final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
boolean shouldIndent = false;
// If this element is a block element,
// or if this is not a block element, then if the previous is
// neither a text nor an inline
if (isBlockElement || (!(m_isprevtext || !m_isprevblock)))
{
m_startNewLine = true;
shouldIndent = true;
}
if (!elemContext.m_startTagOpen && shouldIndent && (m_childNodeNum > 1 || !m_isprevtext))
indent(elemContext.m_currentElemDepth - 1);
m_isprevblock = isBlockElement;
}
final java.io.Writer writer = m_writer;
if (!elemContext.m_startTagOpen)
{
writer.write("</");
writer.write(name);
writer.write('>');
}
else
{
// the start-tag open when this method was called,
// so we need to process it now.
if (m_tracer != null)
super.fireStartElem(name);
// the starting tag was still open when we received this endElement() call
// so we need to process any gathered attributes NOW, before they go away.
int nAttrs = m_attributes.getLength();
if (nAttrs > 0)
{
processAttributes(m_writer, nAttrs);
// clear attributes object for re-use with next element
m_attributes.clear();
}
if (!elemEmpty)
{
// As per Dave/Paul recommendation 12/06/2000
// if (shouldIndent)
// writer.write('>');
// indent(m_currentIndent);
writer.write("></");
writer.write(name);
writer.write('>');
}
else
{
writer.write('>');
}
}
if (m_doIndent) {
m_childNodeNum = m_childNodeNumStack.remove(m_childNodeNumStack.size() - 1);
// clean up because the element has ended
m_isprevtext = false;
}
// fire off the end element event
if (m_tracer != null)
super.fireEndElem(name);
// OPTIMIZE-EMPTY
if (elemEmpty)
{
// a quick exit if the HTML element had no children.
// This block of code can be removed if the corresponding block of code
// in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
m_elemContext = elemContext.m_prev;
return;
}
// some more clean because the element has ended.
m_elemContext = elemContext.m_prev;
// m_isRawStack.pop();
}
catch (IOException e)
{
throw new SAXException(e);
}
}
Process an attribute.
Params: - writer – The writer to write the processed output to.
- name – The name of the attribute.
- value – The value of the attribute.
- elemDesc – The description of the HTML element
that has this attribute.
Throws:
/**
* Process an attribute.
* @param writer The writer to write the processed output to.
* @param name The name of the attribute.
* @param value The value of the attribute.
* @param elemDesc The description of the HTML element
* that has this attribute.
*
* @throws org.xml.sax.SAXException
*/
protected void processAttribute(
java.io.Writer writer,
String name,
String value,
ElemDesc elemDesc)
throws IOException
{
writer.write(' ');
if ( ((value.length() == 0) || value.equalsIgnoreCase(name))
&& elemDesc != null
&& elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
{
writer.write(name);
}
else
{
// %REVIEW% %OPT%
// Two calls to single-char write may NOT
// be more efficient than one to string-write...
writer.write(name);
writer.write("=\"");
if ( elemDesc != null
&& elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
writeAttrURI(writer, value, m_specialEscapeURLs);
else
writeAttrString(writer, value, this.getEncoding());
writer.write('"');
}
}
Tell if a character is an ASCII digit.
/**
* Tell if a character is an ASCII digit.
*/
private boolean isASCIIDigit(char c)
{
return (c >= '0' && c <= '9');
}
Make an integer into an HH hex value.
Does no checking on the size of the input, since this
is only meant to be used locally by writeAttrURI.
Params: - i – must be a value less than 255.
Returns: should be a two character string.
/**
* Make an integer into an HH hex value.
* Does no checking on the size of the input, since this
* is only meant to be used locally by writeAttrURI.
*
* @param i must be a value less than 255.
*
* @return should be a two character string.
*/
private static String makeHHString(int i)
{
String s = Integer.toHexString(i).toUpperCase();
if (s.length() == 1)
{
s = "0" + s;
}
return s;
}
Dmitri Ilyin: Makes sure if the String is HH encoded sign.
Params: - str – must be 2 characters long
Returns: true or false
/**
* Dmitri Ilyin: Makes sure if the String is HH encoded sign.
* @param str must be 2 characters long
*
* @return true or false
*/
private boolean isHHSign(String str)
{
boolean sign = true;
try
{
char r = (char) Integer.parseInt(str, 16);
}
catch (NumberFormatException e)
{
sign = false;
}
return sign;
}
Write the specified string after substituting non ASCII characters,
with %HH
, where HH is the hex of the byte value.
Params: - string – String to convert to XML format.
- doURLEscaping – True if we should try to encode as
per http://www.ietf.org/rfc/rfc2396.txt.
Throws: - SAXException – if a bad surrogate pair is detected.
/**
* Write the specified <var>string</var> after substituting non ASCII characters,
* with <CODE>%HH</CODE>, where HH is the hex of the byte value.
*
* @param string String to convert to XML format.
* @param doURLEscaping True if we should try to encode as
* per http://www.ietf.org/rfc/rfc2396.txt.
*
* @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
*/
public void writeAttrURI(
final java.io.Writer writer, String string, boolean doURLEscaping)
throws IOException
{
// http://www.ietf.org/rfc/rfc2396.txt says:
// A URI is always in an "escaped" form, since escaping or unescaping a
// completed URI might change its semantics. Normally, the only time
// escape encodings can safely be made is when the URI is being created
// from its component parts; each component may have its own set of
// characters that are reserved, so only the mechanism responsible for
// generating or interpreting that component can determine whether or
// not escaping a character will change its semantics. Likewise, a URI
// must be separated into its components before the escaped characters
// within those components can be safely decoded.
//
// ...So we do our best to do limited escaping of the URL, without
// causing damage. If the URL is already properly escaped, in theory, this
// function should not change the string value.
final int end = string.length();
if (end > m_attrBuff.length)
{
m_attrBuff = new char[end*2 + 1];
}
string.getChars(0,end, m_attrBuff, 0);
final char[] chars = m_attrBuff;
int cleanStart = 0;
int cleanLength = 0;
char ch = 0;
for (int i = 0; i < end; i++)
{
ch = chars[i];
if ((ch < 32) || (ch > 126))
{
if (cleanLength > 0)
{
writer.write(chars, cleanStart, cleanLength);
cleanLength = 0;
}
if (doURLEscaping)
{
// Encode UTF16 to UTF8.
// Reference is Unicode, A Primer, by Tony Graham.
// Page 92.
// Note that Kay doesn't escape 0x20...
// if(ch == 0x20) // Not sure about this... -sb
// {
// writer.write(ch);
// }
// else
if (ch <= 0x7F)
{
writer.write('%');
writer.write(makeHHString(ch));
}
else if (ch <= 0x7FF)
{
// Clear low 6 bits before rotate, put high 4 bits in low byte,
// and set two high bits.
int high = (ch >> 6) | 0xC0;
int low = (ch & 0x3F) | 0x80;
// First 6 bits, + high bit
writer.write('%');
writer.write(makeHHString(high));
writer.write('%');
writer.write(makeHHString(low));
}
else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
{
// I'm sure this can be done in 3 instructions, but I choose
// to try and do it exactly like it is done in the book, at least
// until we are sure this is totally clean. I don't think performance
// is a big issue with this particular function, though I could be
// wrong. Also, the stuff below clearly does more masking than
// it needs to do.
// Clear high 6 bits.
int highSurrogate = ((int) ch) & 0x03FF;
// Middle 4 bits (wwww) + 1
// "Note that the value of wwww from the high surrogate bit pattern
// is incremented to make the uuuuu bit pattern in the scalar value
// so the surrogate pair don't address the BMP."
int wwww = ((highSurrogate & 0x03C0) >> 6);
int uuuuu = wwww + 1;
// next 4 bits
int zzzz = (highSurrogate & 0x003C) >> 2;
// low 2 bits
int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
// Get low surrogate character.
ch = chars[++i];
// Clear high 6 bits.
int lowSurrogate = ((int) ch) & 0x03FF;
// put the middle 4 bits into the bottom of yyyyyy (byte 3)
yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
// bottom 6 bits.
int xxxxxx = (lowSurrogate & 0x003F);
int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
int byte2 =
0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
int byte3 = 0x80 | yyyyyy;
int byte4 = 0x80 | xxxxxx;
writer.write('%');
writer.write(makeHHString(byte1));
writer.write('%');
writer.write(makeHHString(byte2));
writer.write('%');
writer.write(makeHHString(byte3));
writer.write('%');
writer.write(makeHHString(byte4));
}
else
{
int high = (ch >> 12) | 0xE0; // top 4 bits
int middle = ((ch & 0x0FC0) >> 6) | 0x80;
// middle 6 bits
int low = (ch & 0x3F) | 0x80;
// First 6 bits, + high bit
writer.write('%');
writer.write(makeHHString(high));
writer.write('%');
writer.write(makeHHString(middle));
writer.write('%');
writer.write(makeHHString(low));
}
}
else if (escapingNotNeeded(ch))
{
writer.write(ch);
}
else
{
writer.write("&#");
writer.write(Integer.toString(ch));
writer.write(';');
}
// In this character range we have first written out any previously accumulated
// "clean" characters, then processed the current more complicated character,
// which may have incremented "i".
// We now we reset the next possible clean character.
cleanStart = i + 1;
}
// Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
// not allowing quotes in the URI proper syntax, nor in the fragment
// identifier, we believe that it's OK to double escape quotes.
else if (ch == '"')
{
// If the character is a '%' number number, try to avoid double-escaping.
// There is a question if this is legal behavior.
// Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
// The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
// if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
// We are no longer escaping '%'
if (cleanLength > 0)
{
writer.write(chars, cleanStart, cleanLength);
cleanLength = 0;
}
// Mike Kay encodes this as ", so he may know something I don't?
if (doURLEscaping)
writer.write("%22");
else
writer.write("""); // we have to escape this, I guess.
// We have written out any clean characters, then the escaped '%' and now we
// We now we reset the next possible clean character.
cleanStart = i + 1;
}
else if (ch == '&')
{
// HTML 4.01 reads, "Authors should use "&" (ASCII decimal 38)
// instead of "&" to avoid confusion with the beginning of a character
// reference (entity reference open delimiter).
if (cleanLength > 0)
{
writer.write(chars, cleanStart, cleanLength);
cleanLength = 0;
}
writer.write("&");
cleanStart = i + 1;
}
else
{
// no processing for this character, just count how
// many characters in a row that we have that need no processing
cleanLength++;
}
}
// are there any clean characters at the end of the array
// that we haven't processed yet?
if (cleanLength > 1)
{
// if the whole string can be written out as-is do so
// otherwise write out the clean chars at the end of the
// array
if (cleanStart == 0)
writer.write(string);
else
writer.write(chars, cleanStart, cleanLength);
}
else if (cleanLength == 1)
{
// a little optimization for 1 clean character
// (we could have let the previous if(...) handle them all)
writer.write(ch);
}
}
Writes the specified string after substituting specials,
and UTF-16 surrogates for character references &#xnn
.
Params: - string – String to convert to XML format.
- encoding – CURRENTLY NOT IMPLEMENTED.
Throws:
/**
* Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
* and UTF-16 surrogates for character references <CODE>&#xnn</CODE>.
*
* @param string String to convert to XML format.
* @param encoding CURRENTLY NOT IMPLEMENTED.
*
* @throws org.xml.sax.SAXException
*/
public void writeAttrString(
final java.io.Writer writer, String string, String encoding)
throws IOException
{
final int end = string.length();
if (end > m_attrBuff.length)
{
m_attrBuff = new char[end * 2 + 1];
}
string.getChars(0, end, m_attrBuff, 0);
final char[] chars = m_attrBuff;
int cleanStart = 0;
int cleanLength = 0;
char ch = 0;
for (int i = 0; i < end; i++)
{
ch = chars[i];
// System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
// System.out.println("ch: "+(int)ch);
// System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
// System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
{
cleanLength++;
}
else if ('<' == ch || '>' == ch)
{
cleanLength++; // no escaping in this case, as specified in 15.2
}
else if (
('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
{
cleanLength++; // no escaping in this case, as specified in 15.2
}
else
{
if (cleanLength > 0)
{
writer.write(chars,cleanStart,cleanLength);
cleanLength = 0;
}
int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
if (i != pos)
{
i = pos - 1;
}
else
{
if (Encodings.isHighUTF16Surrogate(ch))
{
writeUTF16Surrogate(ch, chars, i, end);
i++; // two input characters processed
// this increments by one and the for()
// loop itself increments by another one.
}
// The next is kind of a hack to keep from escaping in the case
// of Shift_JIS and the like.
/*
else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
&& (ch != 160))
{
writer.write(ch); // no escaping in this case
}
else
*/
String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
if (null != outputStringForChar)
{
writer.write(outputStringForChar);
}
else if (escapingNotNeeded(ch))
{
writer.write(ch); // no escaping in this case
}
else
{
writer.write("&#");
writer.write(Integer.toString(ch));
writer.write(';');
}
}
cleanStart = i + 1;
}
} // end of for()
// are there any clean characters at the end of the array
// that we haven't processed yet?
if (cleanLength > 1)
{
// if the whole string can be written out as-is do so
// otherwise write out the clean chars at the end of the
// array
if (cleanStart == 0)
writer.write(string);
else
writer.write(chars, cleanStart, cleanLength);
}
else if (cleanLength == 1)
{
// a little optimization for 1 clean character
// (we could have let the previous if(...) handle them all)
writer.write(ch);
}
}
Receive notification of character data.
The Parser will call this method to report each chunk of
character data. SAX parsers may return all contiguous character
data in a single chunk, or they may split it into several
chunks; however, all of the characters in any single event
must come from the same external entity, so that the Locator
provides useful information.
The application must not attempt to read from the array
outside of the specified range.
Note that some parsers will report whitespace using the
ignorableWhitespace() method rather than this one (validating
parsers must do so).
Params: - chars – The characters from the XML document.
- start – The start position in the array.
- length – The number of characters to read from the array.
Throws: - SAXException – Any SAX exception, possibly
wrapping another exception.
- SAXException –
See Also:
/**
* Receive notification of character data.
*
* <p>The Parser will call this method to report each chunk of
* character data. SAX parsers may return all contiguous character
* data in a single chunk, or they may split it into several
* chunks; however, all of the characters in any single event
* must come from the same external entity, so that the Locator
* provides useful information.</p>
*
* <p>The application must not attempt to read from the array
* outside of the specified range.</p>
*
* <p>Note that some parsers will report whitespace using the
* ignorableWhitespace() method rather than this one (validating
* parsers must do so).</p>
*
* @param chars The characters from the XML document.
* @param start The start position in the array.
* @param length The number of characters to read from the array.
* @throws org.xml.sax.SAXException Any SAX exception, possibly
* wrapping another exception.
* @see #ignorableWhitespace
* @see org.xml.sax.Locator
*
* @throws org.xml.sax.SAXException
*/
public final void characters(char chars[], int start, int length)
throws org.xml.sax.SAXException
{
if (m_elemContext.m_isRaw)
{
try
{
if (m_elemContext.m_startTagOpen)
{
closeStartTag();
m_elemContext.m_startTagOpen = false;
}
// With m_ispreserve just set true it looks like shouldIndent()
// will always return false, so drop any possible indentation.
// if (shouldIndent())
// indent();
// writer.write("<![CDATA[");
// writer.write(chars, start, length);
writeNormalizedChars(chars, start, length, false, m_lineSepUse);
m_isprevtext = true;
// writer.write("]]>");
// time to generate characters event
if (m_tracer != null)
super.fireCharEvent(chars, start, length);
return;
}
catch (IOException ioe)
{
throw new org.xml.sax.SAXException(
Utils.messages.createMessage(
MsgKey.ER_OIERROR,
null),
ioe);
//"IO error", ioe);
}
}
else
{
super.characters(chars, start, length);
}
}
Receive notification of cdata.
The Parser will call this method to report each chunk of
character data. SAX parsers may return all contiguous character
data in a single chunk, or they may split it into several
chunks; however, all of the characters in any single event
must come from the same external entity, so that the Locator
provides useful information.
The application must not attempt to read from the array
outside of the specified range.
Note that some parsers will report whitespace using the
ignorableWhitespace() method rather than this one (validating
parsers must do so).
@param ch The characters from the XML document.
@param start The start position in the array.
@param length The number of characters to read from the array.
@throws org.xml.sax.SAXException Any SAX exception, possibly
wrapping another exception.
@see #ignorableWhitespace
@see org.xml.sax.Locator
Throws:
/**
* Receive notification of cdata.
*
* <p>The Parser will call this method to report each chunk of
* character data. SAX parsers may return all contiguous character
* data in a single chunk, or they may split it into several
* chunks; however, all of the characters in any single event
* must come from the same external entity, so that the Locator
* provides useful information.</p>
*
* <p>The application must not attempt to read from the array
* outside of the specified range.</p>
*
* <p>Note that some parsers will report whitespace using the
* ignorableWhitespace() method rather than this one (validating
* parsers must do so).</p>
*
* @param ch The characters from the XML document.
* @param start The start position in the array.
* @param length The number of characters to read from the array.
* @throws org.xml.sax.SAXException Any SAX exception, possibly
* wrapping another exception.
* @see #ignorableWhitespace
* @see org.xml.sax.Locator
*
* @throws org.xml.sax.SAXException
*/
public final void cdata(char ch[], int start, int length)
throws org.xml.sax.SAXException
{
if ((null != m_elemContext.m_elementName)
&& (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
|| m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
{
try
{
if (m_elemContext.m_startTagOpen)
{
closeStartTag();
m_elemContext.m_startTagOpen = false;
}
if (shouldIndent())
indent();
// writer.write(ch, start, length);
writeNormalizedChars(ch, start, length, true, m_lineSepUse);
}
catch (IOException ioe)
{
throw new org.xml.sax.SAXException(
Utils.messages.createMessage(
MsgKey.ER_OIERROR,
null),
ioe);
//"IO error", ioe);
}
}
else
{
super.cdata(ch, start, length);
}
}
Receive notification of a processing instruction.
@param target The processing instruction target.
@param data The processing instruction data, or null if
none was supplied.
@throws org.xml.sax.SAXException Any SAX exception, possibly
wrapping another exception.
Throws:
/**
* Receive notification of a processing instruction.
*
* @param target The processing instruction target.
* @param data The processing instruction data, or null if
* none was supplied.
* @throws org.xml.sax.SAXException Any SAX exception, possibly
* wrapping another exception.
*
* @throws org.xml.sax.SAXException
*/
public void processingInstruction(String target, String data)
throws org.xml.sax.SAXException
{
if (m_doIndent) {
m_childNodeNum++;
flushCharactersBuffer();
}
// Process any pending starDocument and startElement first.
flushPending();
// Use a fairly nasty hack to tell if the next node is supposed to be
// unescaped text.
if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
{
startNonEscaping();
}
else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
{
endNonEscaping();
}
else
{
try
{
if (m_elemContext.m_startTagOpen)
{
closeStartTag();
m_elemContext.m_startTagOpen = false;
}
else if (m_needToCallStartDocument)
startDocumentInternal();
if (shouldIndent())
indent();
final java.io.Writer writer = m_writer;
//writer.write("<?" + target);
writer.write("<?");
writer.write(target);
if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
writer.write(' ');
//writer.write(data + ">"); // different from XML
writer.write(data); // different from XML
writer.write('>'); // different from XML
// Always output a newline char if not inside of an
// element. The whitespace is not significant in that
// case.
if (m_elemContext.m_currentElemDepth <= 0)
outputLineSep();
m_startNewLine = true;
}
catch(IOException e)
{
throw new SAXException(e);
}
}
// now generate the PI event
if (m_tracer != null)
super.fireEscapingEvent(target, data);
}
Receive notivication of a entityReference.
Params: - name – non-null reference to entity name string.
Throws:
/**
* Receive notivication of a entityReference.
*
* @param name non-null reference to entity name string.
*
* @throws org.xml.sax.SAXException
*/
public final void entityReference(String name)
throws org.xml.sax.SAXException
{
try
{
final java.io.Writer writer = m_writer;
writer.write('&');
writer.write(name);
writer.write(';');
} catch(IOException e)
{
throw new SAXException(e);
}
}
See Also: - endElement.endElement(String)
/**
* @see ExtendedContentHandler#endElement(String)
*/
public final void endElement(String elemName) throws SAXException
{
endElement(null, null, elemName);
}
Process the attributes, which means to write out the currently
collected attributes to the writer. The attributes are not
cleared by this method
Params: - writer – the writer to write processed attributes to.
- nAttrs – the number of attributes in m_attributes
to be processed
Throws:
/**
* Process the attributes, which means to write out the currently
* collected attributes to the writer. The attributes are not
* cleared by this method
*
* @param writer the writer to write processed attributes to.
* @param nAttrs the number of attributes in m_attributes
* to be processed
*
* @throws org.xml.sax.SAXException
*/
public void processAttributes(java.io.Writer writer, int nAttrs)
throws IOException,SAXException
{
/*
* process the collected attributes
*/
for (int i = 0; i < nAttrs; i++)
{
processAttribute(
writer,
m_attributes.getQName(i),
m_attributes.getValue(i),
m_elemContext.m_elementDesc);
}
}
For the enclosing elements starting tag write out out any attributes
followed by ">"
Throws: - SAXException –
/**
* For the enclosing elements starting tag write out out any attributes
* followed by ">"
*
*@throws org.xml.sax.SAXException
*/
protected void closeStartTag() throws SAXException
{
try
{
// finish processing attributes, time to fire off the start element event
if (m_tracer != null)
super.fireStartElem(m_elemContext.m_elementName);
int nAttrs = m_attributes.getLength();
if (nAttrs>0)
{
processAttributes(m_writer, nAttrs);
// clear attributes object for re-use with next element
m_attributes.clear();
}
m_writer.write('>');
/* whether Xalan or XSLTC, we have the prefix mappings now, so
* lets determine if the current element is specified in the cdata-
* section-elements list.
*/
if (m_StringOfCDATASections != null)
m_elemContext.m_isCdataSection = isCdataSection();
}
catch(IOException e)
{
throw new SAXException(e);
}
}
This method is used when a prefix/uri namespace mapping
is indicated after the element was started with a
startElement() and before and endElement().
startPrefixMapping(prefix,uri) would be used before the
startElement() call.
Params: - uri – the URI of the namespace
- prefix – the prefix associated with the given URI.
See Also:
/**
* This method is used when a prefix/uri namespace mapping
* is indicated after the element was started with a
* startElement() and before and endElement().
* startPrefixMapping(prefix,uri) would be used before the
* startElement() call.
* @param uri the URI of the namespace
* @param prefix the prefix associated with the given URI.
*
* @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
*/
public void namespaceAfterStartElement(String prefix, String uri)
throws SAXException
{
// hack for XSLTC with finding URI for default namespace
if (m_elemContext.m_elementURI == null)
{
String prefix1 = getPrefixPart(m_elemContext.m_elementName);
if (prefix1 == null && EMPTYSTRING.equals(prefix))
{
// the elements URI is not known yet, and it
// doesn't have a prefix, and we are currently
// setting the uri for prefix "", so we have
// the uri for the element... lets remember it
m_elemContext.m_elementURI = uri;
}
}
startPrefixMapping(prefix,uri,false);
}
public void startDTD(String name, String publicId, String systemId)
throws SAXException
{
m_inDTD = true;
super.startDTD(name, publicId, systemId);
}
Report the end of DTD declarations.
Throws: - SAXException – The application may raise an exception.
See Also:
/**
* Report the end of DTD declarations.
* @throws org.xml.sax.SAXException The application may raise an exception.
* @see #startDTD
*/
public void endDTD() throws org.xml.sax.SAXException
{
m_inDTD = false;
/* for ToHTMLStream the DOCTYPE is entirely output in the
* startDocumentInternal() method, so don't do anything here
*/
}
This method does nothing.
/**
* This method does nothing.
*/
public void attributeDecl(
String eName,
String aName,
String type,
String valueDefault,
String value)
throws SAXException
{
// The internal DTD subset is not serialized by the ToHTMLStream serializer
}
This method does nothing.
/**
* This method does nothing.
*/
public void elementDecl(String name, String model) throws SAXException
{
// The internal DTD subset is not serialized by the ToHTMLStream serializer
}
This method does nothing.
/**
* This method does nothing.
*/
public void internalEntityDecl(String name, String value)
throws SAXException
{
// The internal DTD subset is not serialized by the ToHTMLStream serializer
}
This method does nothing.
/**
* This method does nothing.
*/
public void externalEntityDecl(
String name,
String publicId,
String systemId)
throws SAXException
{
// The internal DTD subset is not serialized by the ToHTMLStream serializer
}
This method is used to add an attribute to the currently open element.
The caller has guaranted that this attribute is unique, which means that it
not been seen before and will not be seen again.
Params: - name – the qualified name of the attribute
- value – the value of the attribute which can contain only
ASCII printable characters characters in the range 32 to 127 inclusive.
- flags – the bit values of this integer give optimization information.
/**
* This method is used to add an attribute to the currently open element.
* The caller has guaranted that this attribute is unique, which means that it
* not been seen before and will not be seen again.
*
* @param name the qualified name of the attribute
* @param value the value of the attribute which can contain only
* ASCII printable characters characters in the range 32 to 127 inclusive.
* @param flags the bit values of this integer give optimization information.
*/
public void addUniqueAttribute(String name, String value, int flags)
throws SAXException
{
try
{
final java.io.Writer writer = m_writer;
if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
{
// "flags" has indicated that the characters
// '>' '<' '&' and '"' are not in the value and
// m_htmlcharInfo has recorded that there are no other
// entities in the range 0 to 127 so we write out the
// value directly
writer.write(' ');
writer.write(name);
writer.write("=\"");
writer.write(value);
writer.write('"');
}
else if (
(flags & HTML_ATTREMPTY) > 0
&& (value.length() == 0 || value.equalsIgnoreCase(name)))
{
writer.write(' ');
writer.write(name);
}
else
{
writer.write(' ');
writer.write(name);
writer.write("=\"");
if ((flags & HTML_ATTRURL) > 0)
{
writeAttrURI(writer, value, m_specialEscapeURLs);
}
else
{
writeAttrString(writer, value, this.getEncoding());
}
writer.write('"');
}
} catch (IOException e) {
throw new SAXException(e);
}
}
public void comment(char ch[], int start, int length)
throws SAXException
{
// The internal DTD subset is not serialized by the ToHTMLStream serializer
if (m_inDTD)
return;
super.comment(ch, start, length);
}
public boolean reset()
{
boolean ret = super.reset();
if (!ret)
return false;
initToHTMLStream();
return true;
}
private void initToHTMLStream()
{
m_isprevblock = false;
m_inDTD = false;
m_omitMetaTag = false;
m_specialEscapeURLs = true;
}
static class Trie
{
/**
* A digital search trie for 7-bit ASCII text
* The API is a subset of java.util.Hashtable
* The key must be a 7-bit ASCII string
* The value may be any Java Object
* One can get an object stored in a trie from its key,
* but the search is either case sensitive or case
* insensitive to the characters in the key, and this
* choice of sensitivity or insensitivity is made when
* the Trie is created, before any objects are put in it.
*
* This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
* It exists to cut the serializers dependancy on that package.
*
* @xsl.usage internal
*/
Size of the m_nextChar array. /** Size of the m_nextChar array. */
public static final int ALPHA_SIZE = 128;
The root node of the tree. /** The root node of the tree. */
final Node m_Root;
helper buffer to convert Strings to char arrays /** helper buffer to convert Strings to char arrays */
private char[] m_charBuffer = new char[0];
true if the search for an object is lower case only with the key /** true if the search for an object is lower case only with the key */
private final boolean m_lowerCaseOnly;
Construct the trie that has a case insensitive search.
/**
* Construct the trie that has a case insensitive search.
*/
public Trie()
{
m_Root = new Node();
m_lowerCaseOnly = false;
}
Construct the trie given the desired case sensitivity with the key.
Params: - lowerCaseOnly – true if the search keys are to be loser case only,
not case insensitive.
/**
* Construct the trie given the desired case sensitivity with the key.
* @param lowerCaseOnly true if the search keys are to be loser case only,
* not case insensitive.
*/
public Trie(boolean lowerCaseOnly)
{
m_Root = new Node();
m_lowerCaseOnly = lowerCaseOnly;
}
Put an object into the trie for lookup.
Params: - key – must be a 7-bit ASCII string
- value – any java object.
Returns: The old object that matched key, or null.
/**
* Put an object into the trie for lookup.
*
* @param key must be a 7-bit ASCII string
* @param value any java object.
*
* @return The old object that matched key, or null.
*/
public Object put(String key, Object value)
{
final int len = key.length();
if (len > m_charBuffer.length)
{
// make the biggest buffer ever needed in get(String)
m_charBuffer = new char[len];
}
Node node = m_Root;
for (int i = 0; i < len; i++)
{
Node nextNode =
node.m_nextChar[Character.toLowerCase(key.charAt(i))];
if (nextNode != null)
{
node = nextNode;
}
else
{
for (; i < len; i++)
{
Node newNode = new Node();
if (m_lowerCaseOnly)
{
// put this value into the tree only with a lower case key
node.m_nextChar[Character.toLowerCase(
key.charAt(i))] =
newNode;
}
else
{
// put this value into the tree with a case insensitive key
node.m_nextChar[Character.toUpperCase(
key.charAt(i))] =
newNode;
node.m_nextChar[Character.toLowerCase(
key.charAt(i))] =
newNode;
}
node = newNode;
}
break;
}
}
Object ret = node.m_Value;
node.m_Value = value;
return ret;
}
Get an object that matches the key.
Params: - key – must be a 7-bit ASCII string
Returns: The object that matches the key, or null.
/**
* Get an object that matches the key.
*
* @param key must be a 7-bit ASCII string
*
* @return The object that matches the key, or null.
*/
public Object get(final String key)
{
final int len = key.length();
/* If the name is too long, we won't find it, this also keeps us
* from overflowing m_charBuffer
*/
if (m_charBuffer.length < len)
return null;
Node node = m_Root;
switch (len) // optimize the look up based on the number of chars
{
// case 0 looks silly, but the generated bytecode runs
// faster for lookup of elements of length 2 with this in
// and a fair bit faster. Don't know why.
case 0 :
{
return null;
}
case 1 :
{
final char ch = key.charAt(0);
if (ch < ALPHA_SIZE)
{
node = node.m_nextChar[ch];
if (node != null)
return node.m_Value;
}
return null;
}
// comment out case 2 because the default is faster
// case 2 :
// {
// final char ch0 = key.charAt(0);
// final char ch1 = key.charAt(1);
// if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
// {
// node = node.m_nextChar[ch0];
// if (node != null)
// {
//
// if (ch1 < ALPHA_SIZE)
// {
// node = node.m_nextChar[ch1];
// if (node != null)
// return node.m_Value;
// }
// }
// }
// return null;
// }
default :
{
for (int i = 0; i < len; i++)
{
// A thread-safe way to loop over the characters
final char ch = key.charAt(i);
if (ALPHA_SIZE <= ch)
{
// the key is not 7-bit ASCII so we won't find it here
return null;
}
node = node.m_nextChar[ch];
if (node == null)
return null;
}
return node.m_Value;
}
}
}
The node representation for the trie.
@xsl.usage internal
/**
* The node representation for the trie.
* @xsl.usage internal
*/
private class Node
{
Constructor, creates a Node[ALPHA_SIZE].
/**
* Constructor, creates a Node[ALPHA_SIZE].
*/
Node()
{
m_nextChar = new Node[ALPHA_SIZE];
m_Value = null;
}
The next nodes. /** The next nodes. */
final Node m_nextChar[];
The value. /** The value. */
Object m_Value;
}
Construct the trie from another Trie.
Both the existing Trie and this new one share the same table for
lookup, and it is assumed that the table is fully populated and
not changing anymore.
Params: - existingTrie – the Trie that this one is a copy of.
/**
* Construct the trie from another Trie.
* Both the existing Trie and this new one share the same table for
* lookup, and it is assumed that the table is fully populated and
* not changing anymore.
*
* @param existingTrie the Trie that this one is a copy of.
*/
public Trie(Trie existingTrie)
{
// copy some fields from the existing Trie into this one.
m_Root = existingTrie.m_Root;
m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
// get a buffer just big enough to hold the longest key in the table.
int max = existingTrie.getLongestKeyLength();
m_charBuffer = new char[max];
}
Get an object that matches the key.
This method is faster than get(), but is not thread-safe.
Params: - key – must be a 7-bit ASCII string
Returns: The object that matches the key, or null.
/**
* Get an object that matches the key.
* This method is faster than get(), but is not thread-safe.
*
* @param key must be a 7-bit ASCII string
*
* @return The object that matches the key, or null.
*/
public Object get2(final String key)
{
final int len = key.length();
/* If the name is too long, we won't find it, this also keeps us
* from overflowing m_charBuffer
*/
if (m_charBuffer.length < len)
return null;
Node node = m_Root;
switch (len) // optimize the look up based on the number of chars
{
// case 0 looks silly, but the generated bytecode runs
// faster for lookup of elements of length 2 with this in
// and a fair bit faster. Don't know why.
case 0 :
{
return null;
}
case 1 :
{
final char ch = key.charAt(0);
if (ch < ALPHA_SIZE)
{
node = node.m_nextChar[ch];
if (node != null)
return node.m_Value;
}
return null;
}
default :
{
/* Copy string into array. This is not thread-safe because
* it modifies the contents of m_charBuffer. If multiple
* threads were to use this Trie they all would be
* using this same array (not good). So this
* method is not thread-safe, but it is faster because
* converting to a char[] and looping over elements of
* the array is faster than a String's charAt(i).
*/
key.getChars(0, len, m_charBuffer, 0);
for (int i = 0; i < len; i++)
{
final char ch = m_charBuffer[i];
if (ALPHA_SIZE <= ch)
{
// the key is not 7-bit ASCII so we won't find it here
return null;
}
node = node.m_nextChar[ch];
if (node == null)
return null;
}
return node.m_Value;
}
}
}
Get the length of the longest key used in the table.
/**
* Get the length of the longest key used in the table.
*/
public int getLongestKeyLength()
{
return m_charBuffer.length;
}
}
}