FileDocCategorySizeDatePackage
ExpatParser.javaAPI DocAndroid 1.5 API26002Wed May 06 22:41:06 BST 2009org.apache.harmony.xml

ExpatParser.java

/*
 * Copyright (C) 2007 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.harmony.xml;

import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.ext.LexicalHandler;

import java.io.Reader;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URLConnection;
import java.net.URL;
import java.util.logging.Logger;
import java.util.logging.Level;

/**
 * Adapts SAX API to the Expat native XML parser. Not intended for reuse
 * across documents.
 *
 * @see org.apache.harmony.xml.ExpatPullParser
 * @see org.apache.harmony.xml.ExpatReader
 */
class ExpatParser {

    private static final int BUFFER_SIZE = 8096; // in bytes

    /** Pointer to XML_Parser instance. */
    private int pointer;

    private boolean inStartElement = false;
    private int attributeCount = -1;
    private int attributePointer = 0;

    private final Locator locator = new ExpatLocator();

    private final ExpatReader xmlReader;

    private final String publicId;
    private final String systemId;

    private final String encoding;

    private final ExpatAttributes attributes = new CurrentAttributes();

    private static final String OUTSIDE_START_ELEMENT
            = "Attributes can only be used within the scope of startElement().";

    /** We default to UTF-8 when the user doesn't specify an encoding. */
    private static final String DEFAULT_ENCODING = "UTF-8";

    /** Encoding used for Java chars, used to parse Readers and Strings */
    /*package*/ static final String CHARACTER_ENCODING = "UTF-16";

    /** Timeout for HTTP connections (in ms) */
    private static final int TIMEOUT = 20 * 1000;

    /**
     * Constructs a new parser with the specified encoding.
     */
    /*package*/ ExpatParser(String encoding, ExpatReader xmlReader,
            boolean processNamespaces, String publicId, String systemId) {
        this.publicId = publicId;
        this.systemId = systemId;

        this.xmlReader = xmlReader;

        /*
         * TODO: Let Expat try to guess the encoding instead of defaulting.
         * Unfortunately, I don't know how to tell which encoding Expat picked,
         * so I won't know how to encode "<externalEntity>" below. The solution
         * I think is to fix Expat to not require the "<externalEntity>"
         * workaround.
         */
        this.encoding = encoding == null ? DEFAULT_ENCODING : encoding;
        this.pointer = initialize(
            this.encoding,
            processNamespaces
        );
    }

    /**
     * Used by {@link EntityParser}.
     */
    private ExpatParser(String encoding, ExpatReader xmlReader, int pointer,
            String publicId, String systemId) {
        this.encoding = encoding;
        this.xmlReader = xmlReader;
        this.pointer = pointer;
        this.systemId = systemId;
        this.publicId = publicId;
    }

    /**
     * Initializes native resources.
     *
     * @return the pointer to the native parser
     */
    private native int initialize(String encoding, boolean namespacesEnabled);

    /**
     * Called at the start of an element.
     *
     * @param uri namespace URI of element or "" if namespace processing is
     *  disabled
     * @param localName local name of element or "" if namespace processing is
     *  disabled
     * @param qName qualified name or "" if namespace processing is enabled
     * @param attributePointer pointer to native attribute char*--we keep
     *  a separate pointer so we can detach it from the parser instance
     * @param attributeCount number of attributes
     */
    /*package*/ void startElement(String uri, String localName, String qName,
            int attributePointer, int attributeCount) throws SAXException {
        ContentHandler contentHandler = xmlReader.contentHandler;
        if (contentHandler == null) {
            return;
        }

        try {
            inStartElement = true;
            this.attributePointer = attributePointer;
            this.attributeCount = attributeCount;

            contentHandler.startElement(
                    uri, localName, qName, this.attributes);
        }
        finally {
            inStartElement = false;
            this.attributeCount = -1;
            this.attributePointer = 0;
        }
    }

    /*package*/ void endElement(String uri, String localName, String qName)
            throws SAXException {
        ContentHandler contentHandler = xmlReader.contentHandler;
        if (contentHandler != null) {
            contentHandler.endElement(uri, localName, qName);
        }
    }

    /*package*/ void text(char[] text, int length) throws SAXException {
        ContentHandler contentHandler = xmlReader.contentHandler;
        if (contentHandler != null) {
            contentHandler.characters(text, 0, length);
        }
    }

    /*package*/ void comment(char[] text, int length) throws SAXException {
        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
        if (lexicalHandler != null) {
            lexicalHandler.comment(text, 0, length);
        }
    }

    /*package*/ void startCdata() throws SAXException {
        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
        if (lexicalHandler != null) {
            lexicalHandler.startCDATA();
        }
    }

    /*package*/ void endCdata() throws SAXException {
        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
        if (lexicalHandler != null) {
            lexicalHandler.endCDATA();
        }
    }

    /*package*/ void startNamespace(String prefix, String uri)
            throws SAXException {
        ContentHandler contentHandler = xmlReader.contentHandler;
        if (contentHandler != null) {
            contentHandler.startPrefixMapping(prefix, uri);
        }
    }

    /*package*/ void endNamespace(String prefix) throws SAXException {
        ContentHandler contentHandler = xmlReader.contentHandler;
        if (contentHandler != null) {
            contentHandler.endPrefixMapping(prefix);
        }
    }

    /*package*/ void startDtd(String name, String publicId, String systemId)
            throws SAXException {
        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
        if (lexicalHandler != null) {
            lexicalHandler.startDTD(name, publicId, systemId);
        }
    }

    /*package*/ void endDtd() throws SAXException {
        LexicalHandler lexicalHandler = xmlReader.lexicalHandler;
        if (lexicalHandler != null) {
            lexicalHandler.endDTD();
        }
    }

    /*package*/ void processingInstruction(String target, String data)
            throws SAXException {
        ContentHandler contentHandler = xmlReader.contentHandler;
        if (contentHandler != null) {
            contentHandler.processingInstruction(target, data);
        }        
    }

    /**
     * Handles an external entity.
     *
     * @param context to be passed back to Expat when we parse the entity
     * @param publicId the publicId of the entity
     * @param systemId the systemId of the entity
     */
    /*package*/ void handleExternalEntity(String context, String publicId,
            String systemId) throws SAXException, IOException {
        EntityResolver entityResolver = xmlReader.entityResolver;
        if (entityResolver == null) {
            return;
        }

        /*
         * The spec. is terribly under-specified here. It says that if the
         * systemId is a URL, we should try to resolve it, but it doesn't
         * specify how to tell whether or not the systemId is a URL let alone
         * how to resolve it.
         *
         * Other implementations do various insane things. We try to keep it
         * simple: if the systemId parses as a URI and it's relative, we try to
         * resolve it against the parent document's systemId. If anything goes
         * wrong, we go with the original systemId. If crazybob had designed
         * the API, he would have left all resolving to the EntityResolver.
         */
        if (this.systemId != null) {
            try {
                URI systemUri = new URI(systemId);
                if (!systemUri.isAbsolute() && !systemUri.isOpaque()) {
                    // It could be relative (or it may not be a URI at all!)
                    URI baseUri = new URI(this.systemId);
                    systemUri = baseUri.resolve(systemUri);

                    // Replace systemId w/ resolved URI
                    systemId = systemUri.toString();
                }
            } catch (Exception e) {
                Logger.getLogger(ExpatParser.class.getName()).log(Level.INFO,
                        "Could not resolve '" + systemId + "' relative to"
                        + " '" + this.systemId + "' at " + locator, e);
            }
        }

        InputSource inputSource = entityResolver.resolveEntity(
                publicId, systemId);
        if (inputSource == null) {
            /*
             * The spec. actually says that we should try to treat systemId
             * as a URL and download and parse its contents here, but an
             * entity resolver can easily accomplish the same by returning
             * new InputSource(systemId).
             *
             * Downloading external entities by default would result in several
             * unwanted DTD downloads, not to mention pose a security risk
             * when parsing untrusted XML (http://tinyurl.com/56ggrk),
             * so we just do nothing instead. This also enables the user to
             * opt out of entity parsing when using
             * {@link org.xml.sax.helpers.DefaultHandler}, something that
             * wouldn't be possible otherwise.
             */
            return;
        }

        String encoding = pickEncoding(inputSource);
        int pointer = createEntityParser(this.pointer, context, encoding);
        try {
            EntityParser entityParser = new EntityParser(encoding, xmlReader,
                    pointer, inputSource.getPublicId(),
                    inputSource.getSystemId());

            parseExternalEntity(entityParser, inputSource);
        } finally {
            releaseParser(pointer);
        }
    }

    /**
     * Picks an encoding for an external entity. Defaults to UTF-8.
     */
    private String pickEncoding(InputSource inputSource) {
        Reader reader = inputSource.getCharacterStream();
        if (reader != null) {
            return CHARACTER_ENCODING;
        }

        String encoding = inputSource.getEncoding();
        return encoding == null ? DEFAULT_ENCODING : encoding;
    }

    /**
     * Parses the the external entity provided by the input source.
     */
    private void parseExternalEntity(ExpatParser entityParser,
            InputSource inputSource) throws IOException, SAXException {
        /*
         * Expat complains if the external entity isn't wrapped with a root
         * element so we add one and ignore it later on during parsing.
         */
        
        // Try the character stream.
        Reader reader = inputSource.getCharacterStream();
        if (reader != null) {
            try {
                entityParser.append("<externalEntity>");
                entityParser.parseFragment(reader);
                entityParser.append("</externalEntity>");
            } finally {
                // TODO: Don't eat original exception when close() throws.
                reader.close();
            }
            return;
        }

        // Try the byte stream.
        InputStream in = inputSource.getByteStream();
        if (in != null) {
            try {
                entityParser.append("<externalEntity>"
                        .getBytes(entityParser.encoding));
                entityParser.parseFragment(in);
                entityParser.append("</externalEntity>"
                        .getBytes(entityParser.encoding));
            } finally {
                // TODO: Don't eat original exception when close() throws.
                in.close();
            }
            return;
        }

        // Make sure we use the user-provided systemId.
        String systemId = inputSource.getSystemId();
        if (systemId == null) {
            // TODO: We could just try our systemId here.
            throw new ParseException("No input specified.", locator);
        }

        // Try the system id.
        in = openUrl(systemId);
        try {
            entityParser.append("<externalEntity>"
                    .getBytes(entityParser.encoding));
            entityParser.parseFragment(in);
            entityParser.append("</externalEntity>"
                    .getBytes(entityParser.encoding));
        } finally {
            in.close();
        }
    }

    /**
     * Creates a native entity parser.
     *
     * @param parentPointer pointer to parent Expat parser
     * @param context passed to {@link #handleExternalEntity}
     * @param encoding
     * @return pointer to native parser
     */
    private static native int createEntityParser(int parentPointer,
            String context, String encoding);

    /**
     * Appends part of an XML document. This parser will parse the given XML to
     * the extent possible and dispatch to the appropriate methods.
     *
     * @param xml a whole or partial snippet of XML
     * @throws SAXException if an error occurs during parsing
     */
    /*package*/ void append(String xml) throws SAXException {
        try {
            append(this.pointer, xml, false);
        } catch (ExpatException e) {
            throw new ParseException(e.getMessage(), this.locator);
        }
    }

    private native void append(int pointer, String xml, boolean isFinal)
            throws SAXException, ExpatException;

    /**
     * Appends part of an XML document. This parser will parse the given XML to
     * the extent possible and dispatch to the appropriate methods.
     *
     * @param xml a whole or partial snippet of XML
     * @param offset into the char[]
     * @param length of characters to use
     * @throws SAXException if an error occurs during parsing
     */
    /*package*/ void append(char[] xml, int offset, int length)
            throws SAXException {
        try {
            append(this.pointer, xml, offset, length);
        } catch (ExpatException e) {
            throw new ParseException(e.getMessage(), this.locator);
        }
    }

    private native void append(int pointer, char[] xml, int offset,
            int length) throws SAXException, ExpatException;

    /**
     * Appends part of an XML document. This parser will parse the given XML to
     * the extent possible and dispatch to the appropriate methods.
     *
     * @param xml a whole or partial snippet of XML
     * @throws SAXException if an error occurs during parsing
     */
    /*package*/ void append(byte[] xml) throws SAXException {
        append(xml, 0, xml.length);
    }

    /**
     * Appends part of an XML document. This parser will parse the given XML to
     * the extent possible and dispatch to the appropriate methods.
     *
     * @param xml a whole or partial snippet of XML
     * @param offset into the byte[]
     * @param length of bytes to use
     * @throws SAXException if an error occurs during parsing
     */
    /*package*/ void append(byte[] xml, int offset, int length)
            throws SAXException {
        try {
            append(this.pointer, xml, offset, length);
        } catch (ExpatException e) {
            throw new ParseException(e.getMessage(), this.locator);
        }
    }

    private native void append(int pointer, byte[] xml, int offset,
            int length) throws SAXException, ExpatException;

    /**
     * Parses an XML document from the given input stream.
     */
    /*package*/ void parseDocument(InputStream in) throws IOException,
            SAXException {
        startDocument();
        parseFragment(in);
        finish();
        endDocument();
    }

    /**
     * Parses an XML Document from the given reader.
     */
    /*package*/ void parseDocument(Reader in) throws IOException, SAXException {
        startDocument();
        parseFragment(in);
        finish();
        endDocument();
    }

    /**
     * Parses XML from the given Reader.
     */
    private void parseFragment(Reader in) throws IOException, SAXException {
        char[] buffer = new char[BUFFER_SIZE / 2];
        int length;
        while ((length = in.read(buffer)) != -1) {
            try {
                append(this.pointer, buffer, 0, length);
            } catch (ExpatException e) {
                throw new ParseException(e.getMessage(), locator);
            }
        }
    }

    /**
     * Parses XML from the given input stream.
     */
    private void parseFragment(InputStream in)
            throws IOException, SAXException {
        byte[] buffer = new byte[BUFFER_SIZE];
        int length;
        while ((length = in.read(buffer)) != -1) {
            try {
                append(this.pointer, buffer, 0, length);
            } catch (ExpatException e) {
                throw new ParseException(e.getMessage(), this.locator);
            }
        }
    }

    private void startDocument() throws SAXException {
        ContentHandler contentHandler = xmlReader.contentHandler;
        if (contentHandler != null) {
            contentHandler.setDocumentLocator(this.locator);
            contentHandler.startDocument();
        }
    }

    private void endDocument() throws SAXException {
        ContentHandler contentHandler;
        contentHandler = xmlReader.contentHandler;
        if (contentHandler != null) {
            contentHandler.endDocument();
        }
    }

    /**
     * Indicate that we're finished parsing.
     *
     * @throws SAXException if the xml is incomplete
     */
    /*package*/ void finish() throws SAXException {
        try {
            append(this.pointer, "", true);
        } catch (ExpatException e) {
            throw new ParseException(e.getMessage(), this.locator);
        }
    }

    @Override
    @SuppressWarnings("FinalizeDoesntCallSuperFinalize")
    protected synchronized void finalize() throws Throwable {
        if (this.pointer != 0) {
            release(this.pointer);
            this.pointer = 0;
        }
    }

    /**
     * Releases all native objects.
     */
    private native void release(int pointer);

    /**
     * Releases native parser only.
     */
    private static native void releaseParser(int pointer);

    /**
     * Initialize static resources.
     */
    private static native void staticInitialize(String emptyString);

    static {
        staticInitialize("");
    }

    /**
     * Gets the current line number within the XML file.
     */
    private int line() {
        return line(this.pointer);
    }

    private static native int line(int pointer);

    /**
     * Gets the current column number within the XML file.
     */
    private int column() {
        return column(this.pointer);
    }

    private static native int column(int pointer);

    /**
     * Clones the current attributes so they can be used outside of
     * startElement().
     */
    /*package*/ Attributes cloneAttributes() {
        if (!inStartElement) {
            throw new IllegalStateException(OUTSIDE_START_ELEMENT);
        }

        if (attributeCount == 0) {
            return ClonedAttributes.EMPTY;
        }

        int clonePointer
                = cloneAttributes(this.attributePointer, this.attributeCount);
        return new ClonedAttributes(pointer, clonePointer, attributeCount);
    }

    private static native int cloneAttributes(int pointer, int attributeCount);

    /**
     * Used for cloned attributes.
     */
    private static class ClonedAttributes extends ExpatAttributes {

        private static final Attributes EMPTY = new ClonedAttributes(0, 0, 0);

        private final int parserPointer;
        private int pointer;
        private final int length;

        /**
         * Constructs a Java wrapper for native attributes.
         *
         * @param parserPointer pointer to the parse, can be 0 if length is 0.
         * @param pointer pointer to the attributes array, can be 0 if the
         *  length is 0.
         * @param length number of attributes
         */
        private ClonedAttributes(int parserPointer, int pointer, int length) {
            this.parserPointer = parserPointer;
            this.pointer = pointer;
            this.length = length;
        }

        @Override
        public int getParserPointer() {
            return this.parserPointer;
        }

        @Override
        public int getPointer() {
            return pointer;
        }

        @Override
        public int getLength() {
            return length;
        }

        @Override
        @SuppressWarnings("FinalizeDoesntCallSuperFinalize")
        protected synchronized void finalize() throws Throwable {
            if (pointer != 0) {
                freeAttributes(pointer);
                pointer = 0;
            }
        }
    }

    private class ExpatLocator implements Locator {

        public String getPublicId() {
            return publicId;
        }

        public String getSystemId() {
            return systemId;
        }

        public int getLineNumber() {
            return line();
        }

        public int getColumnNumber() {
            return column();
        }

        @Override
        public String toString() {
            return "Locator[publicId: " + publicId + ", systemId: " + systemId
                + ", line: " + getLineNumber()
                + ", column: " + getColumnNumber() + "]";
        }
    }

    /**
     * Attributes that are only valid during startElement().
     */
    private class CurrentAttributes extends ExpatAttributes {

        @Override
        public int getParserPointer() {
            return pointer;
        }

        @Override
        public int getPointer() {
            if (!inStartElement) {
                throw new IllegalStateException(OUTSIDE_START_ELEMENT);
            }
            return attributePointer;
        }

        @Override
        public int getLength() {
            if (!inStartElement) {
                throw new IllegalStateException(OUTSIDE_START_ELEMENT);
            }
            return attributeCount;
        }
    }

    /**
     * Includes line and column in the message.
     */
    private static class ParseException extends SAXParseException {

        private ParseException(String message, Locator locator) {
            super(makeMessage(message, locator), locator);
        }

        private static String makeMessage(String message, Locator locator) {
            return makeMessage(message, locator.getLineNumber(),
                    locator.getColumnNumber());
        }

        private static String makeMessage(
                String message, int line, int column) {
            return "At line " + line + ", column "
                    + column + ": " + message;
        }
    }
    
    /**
     * Opens an InputStream for the given URL.
     */
    /*package*/ static InputStream openUrl(String url) throws IOException {
        try {
            URLConnection urlConnection = new URL(url).openConnection();
            urlConnection.setConnectTimeout(TIMEOUT);
            urlConnection.setReadTimeout(TIMEOUT);
            urlConnection.setDoInput(true);
            urlConnection.setDoOutput(false);
            return urlConnection.getInputStream();
        } catch (Exception e) {
            IOException ioe = new IOException("Couldn't open " + url);
            ioe.initCause(e);
            throw ioe;
        }
    }

    /**
     * Parses an external entity.
     */
    private static class EntityParser extends ExpatParser {

        private int depth = 0; 

        private EntityParser(String encoding, ExpatReader xmlReader,
                int pointer, String publicId, String systemId) {
            super(encoding, xmlReader, pointer, publicId, systemId);
        }

        @Override
        void startElement(String uri, String localName, String qName,
                int attributePointer, int attributeCount) throws SAXException {
            /*
             * Skip topmost element generated by our workaround in
             * {@link #handleExternalEntity}.
             */
            if (depth++ > 0) {
                super.startElement(uri, localName, qName, attributePointer,
                        attributeCount);
            }
        }
        
        @Override
        void endElement(String uri, String localName, String qName)
                throws SAXException {
            if (--depth > 0) {
                super.endElement(uri, localName, qName);
            }
        }

        @Override
        @SuppressWarnings("FinalizeDoesntCallSuperFinalize")
        protected synchronized void finalize() throws Throwable {
            /*
             * Don't release our native resources. We do so explicitly in
             * {@link #handleExternalEntity} and we don't want to release the
             * parsing context--our parent is using it.
             */
        }
    }
}