java/10 : jdk.xml.bind/com/sun/tools/internal/xjc/reader/internalizer/DOMForest.java

DOMForest
https://openjdk.java.net/
GPLv2 + Classpath Exception
/*
 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package com.sun.tools.internal.xjc.reader.internalizer;

import com.sun.istack.internal.NotNull;
import com.sun.istack.internal.XMLStreamReaderToContentHandler;
import com.sun.tools.internal.xjc.ErrorReceiver;
import com.sun.tools.internal.xjc.Options;
import com.sun.tools.internal.xjc.reader.Const;
import com.sun.tools.internal.xjc.reader.xmlschema.parser.SchemaConstraintChecker;
import com.sun.tools.internal.xjc.util.ErrorReceiverFilter;
import com.sun.xml.internal.bind.marshaller.DataWriter;
import com.sun.xml.internal.bind.v2.util.XmlFactory;
import com.sun.xml.internal.xsom.parser.JAXPParser;
import com.sun.xml.internal.xsom.parser.XMLParser;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.*;
import org.xml.sax.helpers.XMLFilterImpl;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.sax.SAXSource;
import javax.xml.validation.SchemaFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.*;

import static com.sun.xml.internal.bind.v2.util.XmlFactory.allowExternalAccess;
import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;


Builds a DOM forest and maintains association from
system IDs to DOM trees.
 A forest is a transitive reflexive closure of referenced documents. IOW, if a document is in a forest, all the documents referenced from it is in a forest, too. To support this semantics, DOMForest uses InternalizationLogic to find referenced documents. 

Some documents are marked as "root"s, meaning those documents were
put into a forest explicitly, not because it is referenced from another
document. (However, a root document can be referenced from other
documents, too.)
Author: 
    Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)/**
 * Builds a DOM forest and maintains association from
 * system IDs to DOM trees.
 *
 * <p>
 * A forest is a transitive reflexive closure of referenced documents.
 * IOW, if a document is in a forest, all the documents referenced from
 * it is in a forest, too. To support this semantics, {@link DOMForest}
 * uses {@link InternalizationLogic} to find referenced documents.
 *
 * <p>
 * Some documents are marked as "root"s, meaning those documents were
 * put into a forest explicitly, not because it is referenced from another
 * document. (However, a root document can be referenced from other
 * documents, too.)
 *
 * @author
 *     Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
 */
public final class DOMForest {
    actual data storage map<SystemId,Document>. /** actual data storage {@code map<SystemId,Document>}. */
    private final Map<String,Document> core = new LinkedHashMap<>();

    To correctly feed documents to a schema parser, we need to remember
which documents (of the forest) were given as the root
documents, and which of them are read as included/imported
documents.

Set of system ids as strings.
/**
     * To correctly feed documents to a schema parser, we need to remember
     * which documents (of the forest) were given as the root
     * documents, and which of them are read as included/imported
     * documents.
     *
     * <p>
     * Set of system ids as strings.
     */
    private final Set<String> rootDocuments = new LinkedHashSet<String>();

    Stores location information for all the trees in this forest. /** Stores location information for all the trees in this forest. */
    public final LocatorTable locatorTable = new LocatorTable();

    Stores all the outer-most <jaxb:bindings> customizations. /** Stores all the outer-most {@code <jaxb:bindings>} customizations. */
    public final Set<Element> outerMostBindings = new HashSet<Element>();

    Used to resolve references to other schema documents. /** Used to resolve references to other schema documents. */
    private EntityResolver entityResolver = null;

    Errors encountered during the parsing will be sent to this object. /** Errors encountered during the parsing will be sent to this object. */
    private ErrorReceiver errorReceiver = null;

    Schema language dependent part of the processing. /** Schema language dependent part of the processing. */
    protected final InternalizationLogic logic;

    private final SAXParserFactory parserFactory;
    private final DocumentBuilder documentBuilder;

    private final Options options;

    public DOMForest(
        SAXParserFactory parserFactory, DocumentBuilder documentBuilder,
        InternalizationLogic logic ) {

        this.parserFactory = parserFactory;
        this.documentBuilder = documentBuilder;
        this.logic = logic;
        this.options = null;
    }

    public DOMForest( InternalizationLogic logic, Options opt ) {

        if (opt == null) throw new AssertionError("Options object null");
        this.options = opt;

        try {
            DocumentBuilderFactory dbf = XmlFactory.createDocumentBuilderFactory(opt.disableXmlSecurity);
            this.documentBuilder = dbf.newDocumentBuilder();
            this.parserFactory = XmlFactory.createParserFactory(opt.disableXmlSecurity);
        } catch( ParserConfigurationException e ) {
            throw new AssertionError(e);
        }

        this.logic = logic;
    }

    Gets the DOM tree associated with the specified system ID,
or null if none is found.
/**
     * Gets the DOM tree associated with the specified system ID,
     * or null if none is found.
     */
    public Document get( String systemId ) {
        Document doc = core.get(systemId);

        if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) {
            // As of JDK1.4, java.net.URL.toExternal method returns URLs like
            // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738.
            // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"),
            // and this descripancy breaks DOM look up by system ID.

            // this extra check solves this problem.
            doc = core.get( "file://"+systemId.substring(5) );
        }

        if( doc==null && systemId.startsWith("file:") ) {
            // on Windows, filenames are case insensitive.
            // perform case-insensitive search for improved user experience
            String systemPath = getPath(systemId);
            for (String key : core.keySet()) {
                if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) {
                    doc = core.get(key);
                    break;
                }
            }
        }

        return doc;
    }

    Strips off the leading 'file:///' portion from an URL.
/**
     * Strips off the leading 'file:///' portion from an URL.
     */
    private String getPath(String key) {
        key = key.substring(5); // skip 'file:'
        while(key.length()>0 && key.charAt(0)=='/') {
            key = key.substring(1);
        }
        return key;
    }

    Returns a read-only set of root document system IDs.
/**
     * Returns a read-only set of root document system IDs.
     */
    public Set<String> getRootDocuments() {
        return Collections.unmodifiableSet(rootDocuments);
    }

    Picks one document at random and returns it.
/**
     * Picks one document at random and returns it.
     */
    public Document getOneDocument() {
        for (Document dom : core.values()) {
            if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
                return dom;
        }
        // we should have caught this error very early on
        throw new AssertionError();
    }

    Checks the correctness of the XML Schema documents and return true
if it's OK.
 This method performs a weaker version of the tests where error messages are provided without line number information. So whenever possible use SchemaConstraintChecker. 
See Also: SchemaConstraintChecker/**
     * Checks the correctness of the XML Schema documents and return true
     * if it's OK.
     *
     * <p>
     * This method performs a weaker version of the tests where error messages
     * are provided without line number information. So whenever possible
     * use {@link SchemaConstraintChecker}.
     *
     * @see SchemaConstraintChecker
     */
    public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) {
        try {
            boolean disableXmlSecurity = false;
            if (options != null) {
                disableXmlSecurity = options.disableXmlSecurity;
            }
            SchemaFactory sf = XmlFactory.createSchemaFactory(W3C_XML_SCHEMA_NS_URI, disableXmlSecurity);
            ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler);
            sf.setErrorHandler(filter);
            Set<String> roots = getRootDocuments();
            Source[] sources = new Source[roots.size()];
            int i=0;
            for (String root : roots) {
                sources[i++] = new DOMSource(get(root),root);
            }
            sf.newSchema(sources);
            return !filter.hadError();
        } catch (SAXException e) {
            // the errors should have been reported
            return false;
        }
    }

    Gets the system ID from which the given DOM is parsed.

Poor-man's base URI.
/**
     * Gets the system ID from which the given DOM is parsed.
     * <p>
     * Poor-man's base URI.
     */
    public String getSystemId( Document dom ) {
        for (Map.Entry<String,Document> e : core.entrySet()) {
            if (e.getValue() == dom)
                return e.getKey();
        }
        return null;
    }

    public Document parse( InputSource source, boolean root ) throws SAXException {
        if( source.getSystemId()==null )
            throw new IllegalArgumentException();

        return parse( source.getSystemId(), source, root );
    }

    Parses an XML at the given location (
and XMLs referenced by it) into DOM trees
and stores them to this forest.
Returns: the parsed DOM document object./**
     * Parses an XML at the given location (
     * and XMLs referenced by it) into DOM trees
     * and stores them to this forest.
     *
     * @return the parsed DOM document object.
     */
    public Document parse( String systemId, boolean root ) throws SAXException, IOException {

        systemId = Options.normalizeSystemId(systemId);

        if( core.containsKey(systemId) )
            // this document has already been parsed. Just ignore.
            return core.get(systemId);

        InputSource is=null;

        // allow entity resolver to find the actual byte stream.
        if( entityResolver!=null )
            is = entityResolver.resolveEntity(null,systemId);
        if( is==null )
            is = new InputSource(systemId);

        // but we still use the original system Id as the key.
        return parse( systemId, is, root );
    }

    Returns a ContentHandler to feed SAX events into. 
The client of this class can feed SAX events into the handler
to parse a document into this DOM forest.
This version requires that the DOM object to be created and registered
to the map beforehand.
/**
     * Returns a {@link ContentHandler} to feed SAX events into.
     *
     * <p>
     * The client of this class can feed SAX events into the handler
     * to parse a document into this DOM forest.
     *
     * This version requires that the DOM object to be created and registered
     * to the map beforehand.
     */
    private ContentHandler getParserHandler( Document dom ) {
        ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings);
        handler = new WhitespaceStripper(handler,errorReceiver,entityResolver);
        handler = new VersionChecker(handler,errorReceiver,entityResolver);

        // insert the reference finder so that
        // included/imported schemas will be also parsed
        XMLFilterImpl f = logic.createExternalReferenceFinder(this);
        f.setContentHandler(handler);

        if(errorReceiver!=null)
            f.setErrorHandler(errorReceiver);
        if(entityResolver!=null)
            f.setEntityResolver(entityResolver);

        return f;
    }

    public interface Handler extends ContentHandler {
        Gets the DOM that was built.
/**
         * Gets the DOM that was built.
         */
        public Document getDocument();
    }

    private static abstract class HandlerImpl extends XMLFilterImpl implements Handler {
    }

    Returns a ContentHandler to feed SAX events into. 
The client of this class can feed SAX events into the handler
to parse a document into this DOM forest.
/**
     * Returns a {@link ContentHandler} to feed SAX events into.
     *
     * <p>
     * The client of this class can feed SAX events into the handler
     * to parse a document into this DOM forest.
     */
    public Handler getParserHandler( String systemId, boolean root ) {
        final Document dom = documentBuilder.newDocument();
        core.put( systemId, dom );
        if(root)
            rootDocuments.add(systemId);

        ContentHandler handler = getParserHandler(dom);

        // we will register the DOM to the map once the system ID becomes available.
        // but the SAX allows the event source to not to provide that information,
        // so be prepared for such case.
        HandlerImpl x = new HandlerImpl() {
            public Document getDocument() {
                return dom;
            }
        };
        x.setContentHandler(handler);

        return x;
   }

    Parses the given document and add it to the DOM forest.
Returns: 
     null if there was a parse error. otherwise non-null./**
     * Parses the given document and add it to the DOM forest.
     *
     * @return
     *      null if there was a parse error. otherwise non-null.
     */
    public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException {
        Document dom = documentBuilder.newDocument();

        systemId = Options.normalizeSystemId(systemId);

        // put into the map before growing a tree, to
        // prevent recursive reference from causing infinite loop.
        core.put( systemId, dom );
        if(root)
            rootDocuments.add(systemId);

        try {
            XMLReader reader = parserFactory.newSAXParser().getXMLReader();
            reader.setContentHandler(getParserHandler(dom));
            if(errorReceiver!=null)
                reader.setErrorHandler(errorReceiver);
            if(entityResolver!=null)
                reader.setEntityResolver(entityResolver);
            reader.parse(inputSource);
        } catch( ParserConfigurationException e ) {
            // in practice, this exception won't happen.
            errorReceiver.error(e.getMessage(),e);
            core.remove(systemId);
            rootDocuments.remove(systemId);
            return null;
        } catch( IOException e ) {
            errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e);
            core.remove(systemId);
            rootDocuments.remove(systemId);
            return null;
        }

        return dom;
    }

    public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException {
        Document dom = documentBuilder.newDocument();

        systemId = Options.normalizeSystemId(systemId);

        if(root)
            rootDocuments.add(systemId);

        if(systemId==null)
            throw new IllegalArgumentException("system id cannot be null");
        core.put( systemId, dom );

        new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge();

        return dom;
    }

    Performs internalization.
This method should be called only once, only after all the
schemas are parsed.
Returns: 
     the returned bindings need to be applied after schema
     components are built./**
     * Performs internalization.
     *
     * This method should be called only once, only after all the
     * schemas are parsed.
     *
     * @return
     *      the returned bindings need to be applied after schema
     *      components are built.
     */
    public SCDBasedBindingSet transform(boolean enableSCD) {
        return Internalizer.transform(this, enableSCD, options.disableXmlSecurity);
    }

    Performs the schema correctness check by using JAXP 1.3.
 This is "weak", because SchemaFactory.newSchema(Source[]) doesn't handle inclusions very correctly (it ends up parsing it from its original source, not in this tree), and because it doesn't handle two documents for the same namespace very well. 

We should eventually fix JAXP (and Xerces), but meanwhile
this weaker and potentially wrong correctness check is still
better than nothing when used inside JAX-WS (JAXB CLI and Ant
does a better job of checking this.)
 To receive errors, use SchemaFactory.setErrorHandler(ErrorHandler). /**
     * Performs the schema correctness check by using JAXP 1.3.
     *
     * <p>
     * This is "weak", because {@link SchemaFactory#newSchema(Source[])}
     * doesn't handle inclusions very correctly (it ends up parsing it
     * from its original source, not in this tree), and because
     * it doesn't handle two documents for the same namespace very
     * well.
     *
     * <p>
     * We should eventually fix JAXP (and Xerces), but meanwhile
     * this weaker and potentially wrong correctness check is still
     * better than nothing when used inside JAX-WS (JAXB CLI and Ant
     * does a better job of checking this.)
     *
     * <p>
     * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}.
     */
    public void weakSchemaCorrectnessCheck(SchemaFactory sf) {
        List<SAXSource> sources = new ArrayList<SAXSource>();
        for( String systemId : getRootDocuments() ) {
            Document dom = get(systemId);
            if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
                continue;   // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error

            SAXSource ss = createSAXSource(systemId);
            try {
                ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true);
            } catch (SAXException e) {
                throw new AssertionError(e);    // Xerces wants this. See 6395322.
            }
            sources.add(ss);
        }

        try {
            allowExternalAccess(sf, "file,http", options.disableXmlSecurity).newSchema(sources.toArray(new SAXSource[0]));
        } catch (SAXException e) {
            // error should have been reported.
        } catch (RuntimeException re) {
            // JAXP RI isn't very trustworthy when it comes to schema error check,
            // and we know some cases where it just dies with NPE. So handle it gracefully.
            // this masks a bug in the JAXP RI, but we need a release that we have to make.
            try {
                sf.getErrorHandler().warning(
                    new SAXParseException(Messages.format(
                        Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,re.getMessage()),
                        null,null,-1,-1,re));
            } catch (SAXException e) {
                // ignore
            }
        }
    }

    Creates a SAXSource that, when parsed, reads from this DOMForest (instead of parsing the original source identified by the system ID.) /**
     * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest}
     * (instead of parsing the original source identified by the system ID.)
     */
    public @NotNull SAXSource createSAXSource(String systemId) {
        ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() {
            // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect
            // handlers, since SAX allows handlers to be changed while parsing.
            @Override
            public void parse(InputSource input) throws SAXException, IOException {
                createParser().parse(input, this, this, this);
            }

            @Override
            public void parse(String systemId) throws SAXException, IOException {
                parse(new InputSource(systemId));
            }
        });

        return new SAXSource(reader,new InputSource(systemId));
    }

    Creates XMLParser for XSOM which reads documents from this DOMForest rather than doing a fresh parse. The net effect is that XSOM will read transformed XML Schemas instead of the original documents. /**
     * Creates {@link XMLParser} for XSOM which reads documents from
     * this DOMForest rather than doing a fresh parse.
     *
     * The net effect is that XSOM will read transformed XML Schemas
     * instead of the original documents.
     */
    public XMLParser createParser() {
        return new DOMForestParser(this, new JAXPParser(XmlFactory.createParserFactory(options.disableXmlSecurity)));
    }

    public EntityResolver getEntityResolver() {
        return entityResolver;
    }

    public void setEntityResolver(EntityResolver entityResolver) {
        this.entityResolver = entityResolver;
    }

    public ErrorReceiver getErrorHandler() {
        return errorReceiver;
    }

    public void setErrorHandler(ErrorReceiver errorHandler) {
        this.errorReceiver = errorHandler;
    }

    Gets all the parsed documents.
/**
     * Gets all the parsed documents.
     */
    public Document[] listDocuments() {
        return core.values().toArray(new Document[core.size()]);
    }

    Gets all the system IDs of the documents.
/**
     * Gets all the system IDs of the documents.
     */
    public String[] listSystemIDs() {
        return core.keySet().toArray(new String[core.keySet().size()]);
    }

    Dumps the contents of the forest to the specified stream.
This is a debug method. As such, error handling is sloppy.
/**
     * Dumps the contents of the forest to the specified stream.
     *
     * This is a debug method. As such, error handling is sloppy.
     */
    @SuppressWarnings("CallToThreadDumpStack")
    public void dump( OutputStream out ) throws IOException {
        try {
            // create identity transformer
            boolean disableXmlSecurity = false;
            if (options != null) {
                disableXmlSecurity = options.disableXmlSecurity;
            }
            TransformerFactory tf = XmlFactory.createTransformerFactory(disableXmlSecurity);
            Transformer it = tf.newTransformer();

            for (Map.Entry<String, Document> e : core.entrySet()) {
                out.write( ("---<< "+e.getKey()+'\n').getBytes() );

                DataWriter dw = new DataWriter(new OutputStreamWriter(out),null);
                dw.setIndentStep("  ");
                it.transform( new DOMSource(e.getValue()),
                    new SAXResult(dw));

                out.write( "\n\n\n".getBytes() );
            }
        } catch( TransformerException e ) {
            e.printStackTrace();
        }
    }
}
/

java/ 10/ jdk.xml.bind/com/sun/tools/internal/xjc/reader/internalizer/DOMForest.java