FileDocCategorySizeDatePackage
DocumentBuilderImpl.javaAPI DocAndroid 1.5 API16729Wed May 06 22:41:06 BST 2009org.apache.harmony.xml.parsers

DocumentBuilderImpl.java

/*
 * Copyright (C) 2007 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.harmony.xml.parsers;

import java.io.IOException;
import java.util.StringTokenizer;

import javax.xml.parsers.DocumentBuilder;

import org.kxml2.io.KXmlParser;
import org.w3c.dom.Attr;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.LocatorImpl;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;

import org.apache.harmony.xml.dom.DOMImplementationImpl;

/**
 * Provides a straightforward DocumentBuilder implementation based on
 * XMLPull/KXML. The class is used internally only, thus only notable members
 * that are not already in the abstract superclass are documented. Hope that's
 * ok.
 */
class DocumentBuilderImpl extends DocumentBuilder {

    private static DOMImplementation dom = DOMImplementationImpl.getInstance();

    private EntityResolver entityResolver;

    private ErrorHandler errorHandler;

    private boolean ignoreComments;

    private boolean ignoreElementContentWhitespace;

    private boolean namespaceAware;

    DocumentBuilderImpl() {
        // Do nothing.
    }

    @Override
    public DOMImplementation getDOMImplementation() {
        return dom;
    }

    /**
     * Reflects whether this DocumentBuilder is configured to ignore comments.
     * 
     * @return True if and only if comments are ignored.
     */
    public boolean isIgnoringComments() {
        return ignoreComments;
    }

    /**
     * Reflects whether this DocumentBuilder is configured to ignore element
     * content whitespace.
     * 
     * @return True if and only if whitespace element content is ignored.
     */
    public boolean isIgnoringElementContentWhitespace() {
        return ignoreElementContentWhitespace;
    }

    @Override
    public boolean isNamespaceAware() {
        return namespaceAware;
    }

    @Override
    public boolean isValidating() {
        return false;
    }

    @Override
    public Document newDocument() {
        return dom.createDocument(null, null, null);
    }

    @Override
    public Document parse(InputSource source) throws SAXException, IOException {
        if (source == null) {
            throw new IllegalArgumentException();
        }
        
        Document document = newDocument();

        try {
            XmlPullParser parser = new KXmlParser();

            parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES,
                    namespaceAware);
            
            if (source.getByteStream() != null) {
                parser.setInput(source.getByteStream(), source.getEncoding());
            } else if (source.getCharacterStream() != null) {
                parser.setInput(source.getCharacterStream());
            } else {
                // TODO Accept other sources as well?
                throw new SAXParseException(
                        "InputSource needs either stream or reader", null);
            }

            if(parser.nextToken() == XmlPullParser.END_DOCUMENT) {
                throw new SAXParseException(
                        "Unexpected end of document", null);
            }

            parse(parser, document, document, XmlPullParser.END_DOCUMENT);

            parser.require(XmlPullParser.END_DOCUMENT, null, null);
        } catch (XmlPullParserException ex) {
            if(ex.getDetail() instanceof IOException) {
                throw (IOException)ex.getDetail();
            }
            if(ex.getDetail() instanceof RuntimeException) {
                throw (RuntimeException)ex.getDetail();
            }
            
            LocatorImpl locator = new LocatorImpl();

            locator.setPublicId(source.getPublicId());
            locator.setSystemId(source.getSystemId());
            locator.setLineNumber(ex.getLineNumber());
            locator.setColumnNumber(ex.getColumnNumber());

            SAXParseException newEx = new SAXParseException(ex.getMessage(),
                    locator);

            if (errorHandler != null) {
                errorHandler.error(newEx);
            }

            throw newEx;
        }

        return document;
    }

    /**
     * Implements the whole parsing of the XML document. The XML pull parser is
     * actually more of a tokenizer, and we are doing a classical recursive
     * descent parsing (the method invokes itself for XML elements). Our
     * approach to parsing does accept some illegal documents (more than one
     * root element, for example). The assumption is that the DOM implementation
     * throws the proper exceptions in these cases.
     * 
     * @param parser The XML pull parser we're reading from.
     * @param document The document we're building.
     * @param node The node we're currently on (initially the document itself).
     * @param endToken The token that will end this recursive call. Either
     *        XmlPullParser.END_DOCUMENT or XmlPullParser.END_TAG.
     * 
     * @throws XmlPullParserException If a parsing error occurs.
     * @throws IOException If a general IO error occurs.
     */
    private void parse(XmlPullParser parser, Document document, Node node,
            int endToken) throws XmlPullParserException, IOException {

        int token = parser.getEventType();

        /*
         * The main parsing loop. The precondition is that we are already on the
         * token to be processed. This holds for each iteration of the loop, so
         * the inner statements have to ensure that (in particular the recursive
         * call).
         */
        while (token != endToken && token != XmlPullParser.END_DOCUMENT) {
            if (token == XmlPullParser.PROCESSING_INSTRUCTION) {
                /*
                 * Found a processing instructions. We need to split the token
                 * text at the first whitespace character.
                 */
                String text = parser.getText();

                int dot = text.indexOf(' ');

                String target = (dot != -1 ? text.substring(0, dot) : text);
                String data = (dot != -1 ? text.substring(dot + 1) : "");

                node.appendChild(document.createProcessingInstruction(target,
                        data));
            } else if (token == XmlPullParser.DOCDECL) {
                /*
                 * Found a document type declaration. Unfortunately KXML doesn't
                 * have the necessary details. Do we parse it ourselves, or do
                 * we silently ignore it, since it isn't mandatory in DOM 2
                 * anyway?
                 */
                StringTokenizer tokenizer = new StringTokenizer(parser.getText());
                if (tokenizer.hasMoreTokens()) {
                    String name = tokenizer.nextToken();
                    String pubid = null;
                    String sysid = null;
                    
                    if (tokenizer.hasMoreTokens()) {
                        String text = tokenizer.nextToken();
                        
                        if ("SYSTEM".equals(text)) {
                            if (tokenizer.hasMoreTokens()) {
                                sysid = tokenizer.nextToken();
                            }
                        } else if ("PUBLIC".equals(text)) {
                            if (tokenizer.hasMoreTokens()) {
                                pubid = tokenizer.nextToken();
                            }
                            if (tokenizer.hasMoreTokens()) {
                                sysid = tokenizer.nextToken();
                            }
                        }
                    }
                    
                    if (pubid != null && pubid.length() >= 2 && pubid.startsWith("\"") && pubid.endsWith("\"")) {
                        pubid = pubid.substring(1, pubid.length() - 1);
                    }
                    
                    if (sysid != null && sysid.length() >= 2 && sysid.startsWith("\"") && sysid.endsWith("\"")) {
                        sysid = sysid.substring(1, sysid.length() - 1);
                    }
                    
                    document.appendChild(dom.createDocumentType(name, pubid, sysid));
                }
                
            } else if (token == XmlPullParser.COMMENT) {
                /*
                 * Found a comment. We simply take the token text, but we only
                 * create a node if the client wants to see comments at all.
                 */
                if (!ignoreComments) {
                    node.appendChild(document.createComment(parser.getText()));
                }
            } else if (token == XmlPullParser.IGNORABLE_WHITESPACE) {
                /*
                 * Found some ignorable whitespace. We simply take the token
                 * text, but we only create a node if the client wants to see
                 * whitespace at all.
                 */
                if (!ignoreElementContentWhitespace) {
                    node.appendChild(document.createTextNode(parser.getText()));
                }
            } else if (token == XmlPullParser.TEXT) {
                /*
                 * Found a piece of text. That's the easiest case. We simply
                 * take it and create a corresponding node.
                 */
                node.appendChild(document.createTextNode(parser.getText()));
            } else if (token == XmlPullParser.CDSECT) {
                /*
                 * Found a CDATA section. That's also trivial. We simply
                 * take it and create a corresponding node.
                 */
                node.appendChild(document.createCDATASection(parser.getText()));
            } else if (token == XmlPullParser.ENTITY_REF) {
                /*
                 * Found an entity reference. If an entity resolver is
                 * installed, we replace it by text (if possible). Otherwise we
                 * add an entity reference node.
                 */
                String entity = parser.getName();

                if (entityResolver != null) {
                    // TODO Implement this...
                }

                String replacement = resolveStandardEntity(entity);
                if (replacement != null) {
                    node.appendChild(document.createTextNode(replacement));
                } else {
                    node.appendChild(document.createEntityReference(entity));
                }
            } else if (token == XmlPullParser.START_TAG) {
                /*
                 * Found an element start tag. We create an element node with
                 * the proper info and attributes. We then invoke parse()
                 * recursively to handle the next level of nesting. When we
                 * return from this call, we check that we are on the proper
                 * element end tag. The whole handling differs somewhat
                 * depending on whether the parser is namespace-aware or not.
                 */
                if (namespaceAware) {
                    // Collect info for element node
                    String namespace = parser.getNamespace();
                    String name = parser.getName();
                    String prefix = parser.getPrefix();

                    if ("".equals(namespace)) {
                        namespace = null;
                    }
                    
                    // Create element node and wire it correctly
                    Element element = document.createElementNS(namespace, name);
                    element.setPrefix(prefix);
                    node.appendChild(element);

                    for (int i = 0; i < parser.getAttributeCount(); i++) {
                        // Collect info for a single attribute node
                        String attrNamespace = parser.getAttributeNamespace(i);
                        String attrPrefix = parser.getAttributePrefix(i);
                        String attrName = parser.getAttributeName(i);
                        String attrValue = parser.getAttributeValue(i);

                        if ("".equals(attrNamespace)) {
                            attrNamespace = null;
                        }
                        
                        // Create attribute node and wire it correctly
                        Attr attr = document.createAttributeNS(attrNamespace, attrName);
                        attr.setPrefix(attrPrefix);
                        attr.setValue(attrValue);
                        element.setAttributeNodeNS(attr);
                    }
                    
                    // Recursive descent
                    token = parser.nextToken();
                    parse(parser, document, element, XmlPullParser.END_TAG);

                    // Expect the element's end tag here
                    parser.require(XmlPullParser.END_TAG, namespace, name);
                    
                } else {
                    // Collect info for element node
                    String name = parser.getName();

                    // Create element node and wire it correctly
                    Element element = document.createElement(name);
                    node.appendChild(element);

                    for (int i = 0; i < parser.getAttributeCount(); i++) {
                        // Collect info for a single attribute node
                        String attrName = parser.getAttributeName(i);
                        String attrValue = parser.getAttributeValue(i);

                        // Create attribute node and wire it correctly
                        Attr attr = document.createAttribute(attrName);
                        attr.setValue(attrValue);
                        element.setAttributeNode(attr);
                    }

                    // Recursive descent
                    token = parser.nextToken();
                    parse(parser, document, element, XmlPullParser.END_TAG);

                    // Expect the element's end tag here
                    parser.require(XmlPullParser.END_TAG, "", name);
                }
            }

            token = parser.nextToken();
        }
    }

    @Override
    public void setEntityResolver(EntityResolver resolver) {
        entityResolver = resolver;
    }

    @Override
    public void setErrorHandler(ErrorHandler handler) {
        errorHandler = handler;
    }

    /**
     * Controls whether this DocumentBuilder ignores comments.
     * 
     * @param value Turns comment ignorance on or off.
     */
    public void setIgnoreComments(boolean value) {
        ignoreComments = value;
    }

    /**
     * Controls whether this DocumentBuilder ignores element content whitespace.
     * 
     * @param value Turns element whitespace content ignorance on or off.
     */
    public void setIgnoreElementContentWhitespace(boolean value) {
        ignoreElementContentWhitespace = value;
    }

    /**
     * Controls whether this DocumentBuilder is namespace-aware.
     * 
     * @param value Turns namespace awareness on or off.
     */
    public void setNamespaceAware(boolean value) {
        namespaceAware = value;
    }

    /**
     * Resolves one of the five standard XML entities.
     * 
     * @param entity The name of the entity to resolve, not including
     *               the ampersand or the semicolon.
     * 
     * @return The proper replacement, or null, if the entity is unknown.
     */
    private String resolveStandardEntity(String entity) {
        if ("lt".equals(entity)) {
            return "<";
        } else if ("gt".equals(entity)) {
            return ">";
        } else if ("amp".equals(entity)) {
            return "&";
        } else if ("apos".equals(entity)) {
            return "'";
        } else if ("quot".equals(entity)) {
            return "\"";
        } else {
            return null;
        }
    }
}