FileDocCategorySizeDatePackage
DocumentBuilderImpl.javaAPI DocAndroid 1.5 API16729Wed May 06 22:41:06 BST 2009org.apache.harmony.xml.parsers

DocumentBuilderImpl

public class DocumentBuilderImpl extends DocumentBuilder
Provides a straightforward DocumentBuilder implementation based on XMLPull/KXML. The class is used internally only, thus only notable members that are not already in the abstract superclass are documented. Hope that's ok.

Fields Summary
private static DOMImplementation
dom
private EntityResolver
entityResolver
private ErrorHandler
errorHandler
private boolean
ignoreComments
private boolean
ignoreElementContentWhitespace
private boolean
namespaceAware
Constructors Summary
DocumentBuilderImpl()


     
        // Do nothing.
    
Methods Summary
public org.w3c.dom.DOMImplementationgetDOMImplementation()

        return dom;
    
public booleanisIgnoringComments()
Reflects whether this DocumentBuilder is configured to ignore comments.

return
True if and only if comments are ignored.

        return ignoreComments;
    
public booleanisIgnoringElementContentWhitespace()
Reflects whether this DocumentBuilder is configured to ignore element content whitespace.

return
True if and only if whitespace element content is ignored.

        return ignoreElementContentWhitespace;
    
public booleanisNamespaceAware()

        return namespaceAware;
    
public booleanisValidating()

        return false;
    
public org.w3c.dom.DocumentnewDocument()

        return dom.createDocument(null, null, null);
    
public org.w3c.dom.Documentparse(org.xml.sax.InputSource source)

        if (source == null) {
            throw new IllegalArgumentException();
        }
        
        Document document = newDocument();

        try {
            XmlPullParser parser = new KXmlParser();

            parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES,
                    namespaceAware);
            
            if (source.getByteStream() != null) {
                parser.setInput(source.getByteStream(), source.getEncoding());
            } else if (source.getCharacterStream() != null) {
                parser.setInput(source.getCharacterStream());
            } else {
                // TODO Accept other sources as well?
                throw new SAXParseException(
                        "InputSource needs either stream or reader", null);
            }

            if(parser.nextToken() == XmlPullParser.END_DOCUMENT) {
                throw new SAXParseException(
                        "Unexpected end of document", null);
            }

            parse(parser, document, document, XmlPullParser.END_DOCUMENT);

            parser.require(XmlPullParser.END_DOCUMENT, null, null);
        } catch (XmlPullParserException ex) {
            if(ex.getDetail() instanceof IOException) {
                throw (IOException)ex.getDetail();
            }
            if(ex.getDetail() instanceof RuntimeException) {
                throw (RuntimeException)ex.getDetail();
            }
            
            LocatorImpl locator = new LocatorImpl();

            locator.setPublicId(source.getPublicId());
            locator.setSystemId(source.getSystemId());
            locator.setLineNumber(ex.getLineNumber());
            locator.setColumnNumber(ex.getColumnNumber());

            SAXParseException newEx = new SAXParseException(ex.getMessage(),
                    locator);

            if (errorHandler != null) {
                errorHandler.error(newEx);
            }

            throw newEx;
        }

        return document;
    
private voidparse(org.xmlpull.v1.XmlPullParser parser, org.w3c.dom.Document document, org.w3c.dom.Node node, int endToken)
Implements the whole parsing of the XML document. The XML pull parser is actually more of a tokenizer, and we are doing a classical recursive descent parsing (the method invokes itself for XML elements). Our approach to parsing does accept some illegal documents (more than one root element, for example). The assumption is that the DOM implementation throws the proper exceptions in these cases.

param
parser The XML pull parser we're reading from.
param
document The document we're building.
param
node The node we're currently on (initially the document itself).
param
endToken The token that will end this recursive call. Either XmlPullParser.END_DOCUMENT or XmlPullParser.END_TAG.
throws
XmlPullParserException If a parsing error occurs.
throws
IOException If a general IO error occurs.


        int token = parser.getEventType();

        /*
         * The main parsing loop. The precondition is that we are already on the
         * token to be processed. This holds for each iteration of the loop, so
         * the inner statements have to ensure that (in particular the recursive
         * call).
         */
        while (token != endToken && token != XmlPullParser.END_DOCUMENT) {
            if (token == XmlPullParser.PROCESSING_INSTRUCTION) {
                /*
                 * Found a processing instructions. We need to split the token
                 * text at the first whitespace character.
                 */
                String text = parser.getText();

                int dot = text.indexOf(' ");

                String target = (dot != -1 ? text.substring(0, dot) : text);
                String data = (dot != -1 ? text.substring(dot + 1) : "");

                node.appendChild(document.createProcessingInstruction(target,
                        data));
            } else if (token == XmlPullParser.DOCDECL) {
                /*
                 * Found a document type declaration. Unfortunately KXML doesn't
                 * have the necessary details. Do we parse it ourselves, or do
                 * we silently ignore it, since it isn't mandatory in DOM 2
                 * anyway?
                 */
                StringTokenizer tokenizer = new StringTokenizer(parser.getText());
                if (tokenizer.hasMoreTokens()) {
                    String name = tokenizer.nextToken();
                    String pubid = null;
                    String sysid = null;
                    
                    if (tokenizer.hasMoreTokens()) {
                        String text = tokenizer.nextToken();
                        
                        if ("SYSTEM".equals(text)) {
                            if (tokenizer.hasMoreTokens()) {
                                sysid = tokenizer.nextToken();
                            }
                        } else if ("PUBLIC".equals(text)) {
                            if (tokenizer.hasMoreTokens()) {
                                pubid = tokenizer.nextToken();
                            }
                            if (tokenizer.hasMoreTokens()) {
                                sysid = tokenizer.nextToken();
                            }
                        }
                    }
                    
                    if (pubid != null && pubid.length() >= 2 && pubid.startsWith("\"") && pubid.endsWith("\"")) {
                        pubid = pubid.substring(1, pubid.length() - 1);
                    }
                    
                    if (sysid != null && sysid.length() >= 2 && sysid.startsWith("\"") && sysid.endsWith("\"")) {
                        sysid = sysid.substring(1, sysid.length() - 1);
                    }
                    
                    document.appendChild(dom.createDocumentType(name, pubid, sysid));
                }
                
            } else if (token == XmlPullParser.COMMENT) {
                /*
                 * Found a comment. We simply take the token text, but we only
                 * create a node if the client wants to see comments at all.
                 */
                if (!ignoreComments) {
                    node.appendChild(document.createComment(parser.getText()));
                }
            } else if (token == XmlPullParser.IGNORABLE_WHITESPACE) {
                /*
                 * Found some ignorable whitespace. We simply take the token
                 * text, but we only create a node if the client wants to see
                 * whitespace at all.
                 */
                if (!ignoreElementContentWhitespace) {
                    node.appendChild(document.createTextNode(parser.getText()));
                }
            } else if (token == XmlPullParser.TEXT) {
                /*
                 * Found a piece of text. That's the easiest case. We simply
                 * take it and create a corresponding node.
                 */
                node.appendChild(document.createTextNode(parser.getText()));
            } else if (token == XmlPullParser.CDSECT) {
                /*
                 * Found a CDATA section. That's also trivial. We simply
                 * take it and create a corresponding node.
                 */
                node.appendChild(document.createCDATASection(parser.getText()));
            } else if (token == XmlPullParser.ENTITY_REF) {
                /*
                 * Found an entity reference. If an entity resolver is
                 * installed, we replace it by text (if possible). Otherwise we
                 * add an entity reference node.
                 */
                String entity = parser.getName();

                if (entityResolver != null) {
                    // TODO Implement this...
                }

                String replacement = resolveStandardEntity(entity);
                if (replacement != null) {
                    node.appendChild(document.createTextNode(replacement));
                } else {
                    node.appendChild(document.createEntityReference(entity));
                }
            } else if (token == XmlPullParser.START_TAG) {
                /*
                 * Found an element start tag. We create an element node with
                 * the proper info and attributes. We then invoke parse()
                 * recursively to handle the next level of nesting. When we
                 * return from this call, we check that we are on the proper
                 * element end tag. The whole handling differs somewhat
                 * depending on whether the parser is namespace-aware or not.
                 */
                if (namespaceAware) {
                    // Collect info for element node
                    String namespace = parser.getNamespace();
                    String name = parser.getName();
                    String prefix = parser.getPrefix();

                    if ("".equals(namespace)) {
                        namespace = null;
                    }
                    
                    // Create element node and wire it correctly
                    Element element = document.createElementNS(namespace, name);
                    element.setPrefix(prefix);
                    node.appendChild(element);

                    for (int i = 0; i < parser.getAttributeCount(); i++) {
                        // Collect info for a single attribute node
                        String attrNamespace = parser.getAttributeNamespace(i);
                        String attrPrefix = parser.getAttributePrefix(i);
                        String attrName = parser.getAttributeName(i);
                        String attrValue = parser.getAttributeValue(i);

                        if ("".equals(attrNamespace)) {
                            attrNamespace = null;
                        }
                        
                        // Create attribute node and wire it correctly
                        Attr attr = document.createAttributeNS(attrNamespace, attrName);
                        attr.setPrefix(attrPrefix);
                        attr.setValue(attrValue);
                        element.setAttributeNodeNS(attr);
                    }
                    
                    // Recursive descent
                    token = parser.nextToken();
                    parse(parser, document, element, XmlPullParser.END_TAG);

                    // Expect the element's end tag here
                    parser.require(XmlPullParser.END_TAG, namespace, name);
                    
                } else {
                    // Collect info for element node
                    String name = parser.getName();

                    // Create element node and wire it correctly
                    Element element = document.createElement(name);
                    node.appendChild(element);

                    for (int i = 0; i < parser.getAttributeCount(); i++) {
                        // Collect info for a single attribute node
                        String attrName = parser.getAttributeName(i);
                        String attrValue = parser.getAttributeValue(i);

                        // Create attribute node and wire it correctly
                        Attr attr = document.createAttribute(attrName);
                        attr.setValue(attrValue);
                        element.setAttributeNode(attr);
                    }

                    // Recursive descent
                    token = parser.nextToken();
                    parse(parser, document, element, XmlPullParser.END_TAG);

                    // Expect the element's end tag here
                    parser.require(XmlPullParser.END_TAG, "", name);
                }
            }

            token = parser.nextToken();
        }
    
private java.lang.StringresolveStandardEntity(java.lang.String entity)
Resolves one of the five standard XML entities.

param
entity The name of the entity to resolve, not including the ampersand or the semicolon.
return
The proper replacement, or null, if the entity is unknown.

        if ("lt".equals(entity)) {
            return "<";
        } else if ("gt".equals(entity)) {
            return ">";
        } else if ("amp".equals(entity)) {
            return "&";
        } else if ("apos".equals(entity)) {
            return "'";
        } else if ("quot".equals(entity)) {
            return "\"";
        } else {
            return null;
        }
    
public voidsetEntityResolver(org.xml.sax.EntityResolver resolver)

        entityResolver = resolver;
    
public voidsetErrorHandler(org.xml.sax.ErrorHandler handler)

        errorHandler = handler;
    
public voidsetIgnoreComments(boolean value)
Controls whether this DocumentBuilder ignores comments.

param
value Turns comment ignorance on or off.

        ignoreComments = value;
    
public voidsetIgnoreElementContentWhitespace(boolean value)
Controls whether this DocumentBuilder ignores element content whitespace.

param
value Turns element whitespace content ignorance on or off.

        ignoreElementContentWhitespace = value;
    
public voidsetNamespaceAware(boolean value)
Controls whether this DocumentBuilder is namespace-aware.

param
value Turns namespace awareness on or off.

        namespaceAware = value;