File Doc Category Size Date Package
DocumentBuilderImpl.java API Doc Android 1.5 API 16729 Wed May 06 22:41:06 BST 2009 org.apache.harmony.xml.parsers

DocumentBuilderImpl

java.lang.Object
- javax.xml.parsers.DocumentBuilder

public class DocumentBuilderImpl extends DocumentBuilder

Provides a straightforward DocumentBuilder implementation based on XMLPull/KXML. The class is used internally only, thus only notable members that are not already in the abstract superclass are documented. Hope that's ok.

Fields Summary
private static DOMImplementation
dom
private EntityResolver
entityResolver
private ErrorHandler
errorHandler
private boolean
ignoreComments
private boolean
ignoreElementContentWhitespace
private boolean
namespaceAware
Constructors Summary
DocumentBuilderImpl()
// Do nothing.
Methods Summary
public org.w3c.dom.DOMImplementation getDOMImplementation()
return dom;
public boolean isIgnoringComments()
Reflects whether this DocumentBuilder is configured to ignore comments.
return
True if and only if comments are ignored.
return ignoreComments;
public boolean isIgnoringElementContentWhitespace()
Reflects whether this DocumentBuilder is configured to ignore element content whitespace.
return
True if and only if whitespace element content is ignored.
return ignoreElementContentWhitespace;
public boolean isNamespaceAware()
return namespaceAware;
public boolean isValidating()
return false;
public org.w3c.dom.Document newDocument()
return dom.createDocument(null, null, null);
public org.w3c.dom.Document parse(org.xml.sax.InputSource source)
if (source == null) { throw new IllegalArgumentException(); } Document document = newDocument(); try { XmlPullParser parser = new KXmlParser(); parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, namespaceAware); if (source.getByteStream() != null) { parser.setInput(source.getByteStream(), source.getEncoding()); } else if (source.getCharacterStream() != null) { parser.setInput(source.getCharacterStream()); } else { // TODO Accept other sources as well? throw new SAXParseException( "InputSource needs either stream or reader", null); } if(parser.nextToken() == XmlPullParser.END_DOCUMENT) { throw new SAXParseException( "Unexpected end of document", null); } parse(parser, document, document, XmlPullParser.END_DOCUMENT); parser.require(XmlPullParser.END_DOCUMENT, null, null); } catch (XmlPullParserException ex) { if(ex.getDetail() instanceof IOException) { throw (IOException)ex.getDetail(); } if(ex.getDetail() instanceof RuntimeException) { throw (RuntimeException)ex.getDetail(); } LocatorImpl locator = new LocatorImpl(); locator.setPublicId(source.getPublicId()); locator.setSystemId(source.getSystemId()); locator.setLineNumber(ex.getLineNumber()); locator.setColumnNumber(ex.getColumnNumber()); SAXParseException newEx = new SAXParseException(ex.getMessage(), locator); if (errorHandler != null) { errorHandler.error(newEx); } throw newEx; } return document;
private void parse(org.xmlpull.v1.XmlPullParser parser, org.w3c.dom.Document document, org.w3c.dom.Node node, int endToken)
Implements the whole parsing of the XML document. The XML pull parser is actually more of a tokenizer, and we are doing a classical recursive descent parsing (the method invokes itself for XML elements). Our approach to parsing does accept some illegal documents (more than one root element, for example). The assumption is that the DOM implementation throws the proper exceptions in these cases.
param
parser The XML pull parser we're reading from.
param
document The document we're building.
param
node The node we're currently on (initially the document itself).
param
endToken The token that will end this recursive call. Either XmlPullParser.END_DOCUMENT or XmlPullParser.END_TAG.
throws
XmlPullParserException If a parsing error occurs.
throws
IOException If a general IO error occurs.
int token = parser.getEventType(); /* * The main parsing loop. The precondition is that we are already on the * token to be processed. This holds for each iteration of the loop, so * the inner statements have to ensure that (in particular the recursive * call). */ while (token != endToken && token != XmlPullParser.END_DOCUMENT) { if (token == XmlPullParser.PROCESSING_INSTRUCTION) { /* * Found a processing instructions. We need to split the token * text at the first whitespace character. */ String text = parser.getText(); int dot = text.indexOf(' "); String target = (dot != -1 ? text.substring(0, dot) : text); String data = (dot != -1 ? text.substring(dot + 1) : ""); node.appendChild(document.createProcessingInstruction(target, data)); } else if (token == XmlPullParser.DOCDECL) { /* * Found a document type declaration. Unfortunately KXML doesn't * have the necessary details. Do we parse it ourselves, or do * we silently ignore it, since it isn't mandatory in DOM 2 * anyway? */ StringTokenizer tokenizer = new StringTokenizer(parser.getText()); if (tokenizer.hasMoreTokens()) { String name = tokenizer.nextToken(); String pubid = null; String sysid = null; if (tokenizer.hasMoreTokens()) { String text = tokenizer.nextToken(); if ("SYSTEM".equals(text)) { if (tokenizer.hasMoreTokens()) { sysid = tokenizer.nextToken(); } } else if ("PUBLIC".equals(text)) { if (tokenizer.hasMoreTokens()) { pubid = tokenizer.nextToken(); } if (tokenizer.hasMoreTokens()) { sysid = tokenizer.nextToken(); } } } if (pubid != null && pubid.length() >= 2 && pubid.startsWith("\"") && pubid.endsWith("\"")) { pubid = pubid.substring(1, pubid.length() - 1); } if (sysid != null && sysid.length() >= 2 && sysid.startsWith("\"") && sysid.endsWith("\"")) { sysid = sysid.substring(1, sysid.length() - 1); } document.appendChild(dom.createDocumentType(name, pubid, sysid)); } } else if (token == XmlPullParser.COMMENT) { /* * Found a comment. We simply take the token text, but we only * create a node if the client wants to see comments at all. */ if (!ignoreComments) { node.appendChild(document.createComment(parser.getText())); } } else if (token == XmlPullParser.IGNORABLE_WHITESPACE) { /* * Found some ignorable whitespace. We simply take the token * text, but we only create a node if the client wants to see * whitespace at all. */ if (!ignoreElementContentWhitespace) { node.appendChild(document.createTextNode(parser.getText())); } } else if (token == XmlPullParser.TEXT) { /* * Found a piece of text. That's the easiest case. We simply * take it and create a corresponding node. */ node.appendChild(document.createTextNode(parser.getText())); } else if (token == XmlPullParser.CDSECT) { /* * Found a CDATA section. That's also trivial. We simply * take it and create a corresponding node. */ node.appendChild(document.createCDATASection(parser.getText())); } else if (token == XmlPullParser.ENTITY_REF) { /* * Found an entity reference. If an entity resolver is * installed, we replace it by text (if possible). Otherwise we * add an entity reference node. */ String entity = parser.getName(); if (entityResolver != null) { // TODO Implement this... } String replacement = resolveStandardEntity(entity); if (replacement != null) { node.appendChild(document.createTextNode(replacement)); } else { node.appendChild(document.createEntityReference(entity)); } } else if (token == XmlPullParser.START_TAG) { /* * Found an element start tag. We create an element node with * the proper info and attributes. We then invoke parse() * recursively to handle the next level of nesting. When we * return from this call, we check that we are on the proper * element end tag. The whole handling differs somewhat * depending on whether the parser is namespace-aware or not. */ if (namespaceAware) { // Collect info for element node String namespace = parser.getNamespace(); String name = parser.getName(); String prefix = parser.getPrefix(); if ("".equals(namespace)) { namespace = null; } // Create element node and wire it correctly Element element = document.createElementNS(namespace, name); element.setPrefix(prefix); node.appendChild(element); for (int i = 0; i < parser.getAttributeCount(); i++) { // Collect info for a single attribute node String attrNamespace = parser.getAttributeNamespace(i); String attrPrefix = parser.getAttributePrefix(i); String attrName = parser.getAttributeName(i); String attrValue = parser.getAttributeValue(i); if ("".equals(attrNamespace)) { attrNamespace = null; } // Create attribute node and wire it correctly Attr attr = document.createAttributeNS(attrNamespace, attrName); attr.setPrefix(attrPrefix); attr.setValue(attrValue); element.setAttributeNodeNS(attr); } // Recursive descent token = parser.nextToken(); parse(parser, document, element, XmlPullParser.END_TAG); // Expect the element's end tag here parser.require(XmlPullParser.END_TAG, namespace, name); } else { // Collect info for element node String name = parser.getName(); // Create element node and wire it correctly Element element = document.createElement(name); node.appendChild(element); for (int i = 0; i < parser.getAttributeCount(); i++) { // Collect info for a single attribute node String attrName = parser.getAttributeName(i); String attrValue = parser.getAttributeValue(i); // Create attribute node and wire it correctly Attr attr = document.createAttribute(attrName); attr.setValue(attrValue); element.setAttributeNode(attr); } // Recursive descent token = parser.nextToken(); parse(parser, document, element, XmlPullParser.END_TAG); // Expect the element's end tag here parser.require(XmlPullParser.END_TAG, "", name); } } token = parser.nextToken(); }
private java.lang.String resolveStandardEntity(java.lang.String entity)
Resolves one of the five standard XML entities.
param
entity The name of the entity to resolve, not including the ampersand or the semicolon.
return
The proper replacement, or null, if the entity is unknown.
if ("lt".equals(entity)) { return "<"; } else if ("gt".equals(entity)) { return ">"; } else if ("amp".equals(entity)) { return "&"; } else if ("apos".equals(entity)) { return "'"; } else if ("quot".equals(entity)) { return "\""; } else { return null; }
public void setEntityResolver(org.xml.sax.EntityResolver resolver)
entityResolver = resolver;
public void setErrorHandler(org.xml.sax.ErrorHandler handler)
errorHandler = handler;
public void setIgnoreComments(boolean value)
Controls whether this DocumentBuilder ignores comments.
param
value Turns comment ignorance on or off.
ignoreComments = value;
public void setIgnoreElementContentWhitespace(boolean value)
Controls whether this DocumentBuilder ignores element content whitespace.
param
value Turns element whitespace content ignorance on or off.
ignoreElementContentWhitespace = value;
public void setNamespaceAware(boolean value)
Controls whether this DocumentBuilder is namespace-aware.
param
value Turns namespace awareness on or off.
namespaceAware = value;