FileDocCategorySizeDatePackage
XML11NSDocumentScannerImpl.javaAPI DocJava SE 5 API35598Fri Aug 26 14:55:46 BST 2005com.sun.org.apache.xerces.internal.impl

XML11NSDocumentScannerImpl

public class XML11NSDocumentScannerImpl extends XML11DocumentScannerImpl
The scanner acts as the source for the document information which is communicated to the document handler. This class scans an XML document, checks if document has a DTD, and if DTD is not found the scanner will remove the DTD Validator from the pipeline and perform namespace binding. Note: This scanner should only be used when the namespace processing is on!

This component requires the following features and properties from the component manager that uses it:

  • http://xml.org/sax/features/namespaces {true} -- if the value of this feature is set to false this scanner must not be used.
  • http://xml.org/sax/features/validation
  • http://apache.org/xml/features/nonvalidating/load-external-dtd
  • http://apache.org/xml/features/scanner/notify-char-refs
  • http://apache.org/xml/features/scanner/notify-builtin-refs
  • http://apache.org/xml/properties/internal/symbol-table
  • http://apache.org/xml/properties/internal/error-reporter
  • http://apache.org/xml/properties/internal/entity-manager
  • http://apache.org/xml/properties/internal/dtd-scanner
author
Elena Litani, IBM
author
Michael Glavassevich, IBM
version
$Id: XML11NSDocumentScannerImpl.java,v 1.11 2004/04/30 15:36:38 mrglavas Exp $

Fields Summary
protected boolean
fBindNamespaces
If is true, the dtd validator is no longer in the pipeline and the scanner should bind namespaces
protected boolean
fPerformValidation
If validating parser, make sure we report an error in the scanner if DTD grammar is missing.
private XMLDTDValidatorFilter
fDTDValidator
DTD validator
private boolean
fSawSpace
Saw spaces after element name or between attributes. This is reserved for the case where scanning of a start element spans several methods, as is the case when scanning the start of a root element where a DTD external subset may be read after scanning the element name.
Constructors Summary
Methods Summary
protected DispatchercreateContentDispatcher()
Creates a content dispatcher.

        return new NS11ContentDispatcher();
    
public voidreset(com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager componentManager)


        super.reset(componentManager);
        fPerformValidation = false;
        fBindNamespaces = false;
    
protected voidscanAttribute(com.sun.org.apache.xerces.internal.util.XMLAttributesImpl attributes)
Scans an attribute.

[41] Attribute ::= Name Eq AttValue

Note: This method assumes that the next character on the stream is the first character of the attribute name.

Note: This method uses the fAttributeQName and fQName variables. The contents of these variables will be destroyed.

param
attributes The attributes list for the scanned attribute.

        if (DEBUG_CONTENT_SCANNING)
            System.out.println(">>> scanAttribute()");

        // name
        fEntityScanner.scanQName(fAttributeQName);

        // equals
        fEntityScanner.skipSpaces();
        if (!fEntityScanner.skipChar('=")) {
            reportFatalError(
                "EqRequiredInAttribute",
                new Object[] {
                    fCurrentElement.rawname,
                    fAttributeQName.rawname });
        }
        fEntityScanner.skipSpaces();

        // content
        int attrIndex;

        if (fBindNamespaces) {
            attrIndex = attributes.getLength();
            attributes.addAttributeNS(
                fAttributeQName,
                XMLSymbols.fCDATASymbol,
                null);
        } else {
            int oldLen = attributes.getLength();
            attrIndex =
                attributes.addAttribute(
                    fAttributeQName,
                    XMLSymbols.fCDATASymbol,
                    null);

            // WFC: Unique Att Spec
            if (oldLen == attributes.getLength()) {
                reportFatalError(
                    "AttributeNotUnique",
                    new Object[] {
                        fCurrentElement.rawname,
                        fAttributeQName.rawname });
            }
        }

        //REVISIT: one more case needs to be included: external PE and standalone is no
        boolean isVC = fHasExternalDTD && !fStandalone;

        // REVISIT: it seems that this function should not take attributes, and length
        scanAttributeValue(
            this.fTempString,
            fTempString2,
            fAttributeQName.rawname,
            isVC,
            fCurrentElement.rawname);
        String value = fTempString.toString();
        attributes.setValue(attrIndex, value);
        attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
        attributes.setSpecified(attrIndex, true);

        // record namespace declarations if any.
        if (fBindNamespaces) {

            String localpart = fAttributeQName.localpart;
            String prefix =
                fAttributeQName.prefix != null
                    ? fAttributeQName.prefix
                    : XMLSymbols.EMPTY_STRING;
            // when it's of form xmlns="..." or xmlns:prefix="...",
            // it's a namespace declaration. but prefix:xmlns="..." isn't.
            if (prefix == XMLSymbols.PREFIX_XMLNS
                || prefix == XMLSymbols.EMPTY_STRING
                && localpart == XMLSymbols.PREFIX_XMLNS) {

                // get the internalized value of this attribute
                String uri = fSymbolTable.addSymbol(value);

                // 1. "xmlns" can't be bound to any namespace
                if (prefix == XMLSymbols.PREFIX_XMLNS
                    && localpart == XMLSymbols.PREFIX_XMLNS) {
                    fErrorReporter.reportError(
                        XMLMessageFormatter.XMLNS_DOMAIN,
                        "CantBindXMLNS",
                        new Object[] { fAttributeQName },
                        XMLErrorReporter.SEVERITY_FATAL_ERROR);
                }

                // 2. the namespace for "xmlns" can't be bound to any prefix
                if (uri == NamespaceContext.XMLNS_URI) {
                    fErrorReporter.reportError(
                        XMLMessageFormatter.XMLNS_DOMAIN,
                        "CantBindXMLNS",
                        new Object[] { fAttributeQName },
                        XMLErrorReporter.SEVERITY_FATAL_ERROR);
                }

                // 3. "xml" can't be bound to any other namespace than it's own
                if (localpart == XMLSymbols.PREFIX_XML) {
                    if (uri != NamespaceContext.XML_URI) {
                        fErrorReporter.reportError(
                            XMLMessageFormatter.XMLNS_DOMAIN,
                            "CantBindXML",
                            new Object[] { fAttributeQName },
                            XMLErrorReporter.SEVERITY_FATAL_ERROR);
                    }
                }
                // 4. the namespace for "xml" can't be bound to any other prefix
                else {
                    if (uri == NamespaceContext.XML_URI) {
                        fErrorReporter.reportError(
                            XMLMessageFormatter.XMLNS_DOMAIN,
                            "CantBindXML",
                            new Object[] { fAttributeQName },
                            XMLErrorReporter.SEVERITY_FATAL_ERROR);
                    }
                }

                prefix =
                    localpart != XMLSymbols.PREFIX_XMLNS
                        ? localpart
                        : XMLSymbols.EMPTY_STRING;

                // Declare prefix in context. Removing the association between a prefix and a 
                // namespace name is permitted in XML 1.1, so if the uri value is the empty string, 
                // the prefix is being unbound. -- mrglavas
                fNamespaceContext.declarePrefix(
                    prefix,
                    uri.length() != 0 ? uri : null);
                // bind namespace attribute to a namespace
                attributes.setURI(
                    attrIndex,
                    fNamespaceContext.getURI(XMLSymbols.PREFIX_XMLNS));

            } else {
                // attempt to bind attribute
                if (fAttributeQName.prefix != null) {
                    attributes.setURI(
                        attrIndex,
                        fNamespaceContext.getURI(fAttributeQName.prefix));
                }
            }
        }

        if (DEBUG_CONTENT_SCANNING)
            System.out.println("<<< scanAttribute()");
    
protected intscanEndElement()
Scans an end element.

[42] ETag ::= '</' Name S? '>'

Note: This method uses the fElementQName variable. The contents of this variable will be destroyed. The caller should copy the needed information out of this variable before calling this method.

return
The element depth.

        if (DEBUG_CONTENT_SCANNING)
            System.out.println(">>> scanEndElement()");

        // pop context
        fElementStack.popElement(fElementQName);

        // Take advantage of the fact that next string _should_ be "fElementQName.rawName",
        //In scanners most of the time is consumed on checks done for XML characters, we can
        // optimize on it and avoid the checks done for endElement,
        //we will also avoid symbol table lookup - neeraj.bajaj@sun.com

        // this should work both for namespace processing true or false...

        //REVISIT: if the string is not the same as expected.. we need to do better error handling..
        //We can skip this for now... In any case if the string doesn't match -- document is not well formed.
        if (!fEntityScanner.skipString(fElementQName.rawname)) {
            reportFatalError(
                "ETagRequired",
                new Object[] { fElementQName.rawname });
        }

        // end
        fEntityScanner.skipSpaces();
        if (!fEntityScanner.skipChar('>")) {
            reportFatalError(
                "ETagUnterminated",
                new Object[] { fElementQName.rawname });
        }
        fMarkupDepth--;

        //we have increased the depth for two markup "<" characters
        fMarkupDepth--;

        // check that this element was opened in the same entity
        if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
            reportFatalError(
                "ElementEntityMismatch",
                new Object[] { fCurrentElement.rawname });
        }

        // call handler
        if (fDocumentHandler != null) {

            fDocumentHandler.endElement(fElementQName, null);
            if (fBindNamespaces) {
                fNamespaceContext.popContext();
            }

        }

        return fMarkupDepth;

    
protected booleanscanStartElement()
Scans a start element. This method will handle the binding of namespace information and notifying the handler of the start of the element.

[44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
[40] STag ::= '<' Name (S Attribute)* S? '>'

Note: This method assumes that the leading '<' character has been consumed.

Note: This method uses the fElementQName and fAttributes variables. The contents of these variables will be destroyed. The caller should copy important information out of these variables before calling this method.

return
True if element is empty. (i.e. It matches production [44].

        if (DEBUG_CONTENT_SCANNING)
            System.out.println(">>> scanStartElementNS()");

        // Note: namespace processing is on by default
        fEntityScanner.scanQName(fElementQName);
        // REVISIT - [Q] Why do we need this local variable? -- mrglavas
        String rawname = fElementQName.rawname;
        if (fBindNamespaces) {
            fNamespaceContext.pushContext();
            if (fScannerState == SCANNER_STATE_ROOT_ELEMENT) {
                if (fPerformValidation) {
                    fErrorReporter.reportError(
                        XMLMessageFormatter.XML_DOMAIN,
                        "MSG_GRAMMAR_NOT_FOUND",
                        new Object[] { rawname },
                        XMLErrorReporter.SEVERITY_ERROR);

                    if (fDoctypeName == null
                        || !fDoctypeName.equals(rawname)) {
                        fErrorReporter.reportError(
                            XMLMessageFormatter.XML_DOMAIN,
                            "RootElementTypeMustMatchDoctypedecl",
                            new Object[] { fDoctypeName, rawname },
                            XMLErrorReporter.SEVERITY_ERROR);
                    }
                }
            }
        }

        // push element stack
        fCurrentElement = fElementStack.pushElement(fElementQName);

        // attributes
        boolean empty = false;
        fAttributes.removeAllAttributes();
        do {
            // spaces
            boolean sawSpace = fEntityScanner.skipSpaces();

            // end tag?
            int c = fEntityScanner.peekChar();
            if (c == '>") {
                fEntityScanner.scanChar();
                break;
            } else if (c == '/") {
                fEntityScanner.scanChar();
                if (!fEntityScanner.skipChar('>")) {
                    reportFatalError(
                        "ElementUnterminated",
                        new Object[] { rawname });
                }
                empty = true;
                break;
            } else if (!isValidNameStartChar(c) || !sawSpace) {
                // Second chance. Check if this character is a high
                // surrogate of a valid name start character.
                if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
                    reportFatalError(
                        "ElementUnterminated",
                        new Object[] { rawname });
                }
            }

            // attributes
            scanAttribute(fAttributes);
            if (fSecurityManager != null && fAttributes.getLength() > fElementAttributeLimit){                
                fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
                                             "ElementAttributeLimit",
                                             new Object[]{rawname, new Integer(fAttributes.getLength()) },
                                             XMLErrorReporter.SEVERITY_FATAL_ERROR );
            }           

        } while (true);

        if (fBindNamespaces) {
            // REVISIT: is it required? forbit xmlns prefix for element
            if (fElementQName.prefix == XMLSymbols.PREFIX_XMLNS) {
                fErrorReporter.reportError(
                    XMLMessageFormatter.XMLNS_DOMAIN,
                    "ElementXMLNSPrefix",
                    new Object[] { fElementQName.rawname },
                    XMLErrorReporter.SEVERITY_FATAL_ERROR);
            }

            // bind the element
            String prefix =
                fElementQName.prefix != null
                    ? fElementQName.prefix
                    : XMLSymbols.EMPTY_STRING;
            // assign uri to the element
            fElementQName.uri = fNamespaceContext.getURI(prefix);
            // make sure that object in the element stack is updated as well
            fCurrentElement.uri = fElementQName.uri;

            if (fElementQName.prefix == null && fElementQName.uri != null) {
                fElementQName.prefix = XMLSymbols.EMPTY_STRING;
                // making sure that the object in the element stack is updated too.
                fCurrentElement.prefix = XMLSymbols.EMPTY_STRING;
            }
            if (fElementQName.prefix != null && fElementQName.uri == null) {
                fErrorReporter.reportError(
                    XMLMessageFormatter.XMLNS_DOMAIN,
                    "ElementPrefixUnbound",
                    new Object[] {
                        fElementQName.prefix,
                        fElementQName.rawname },
                    XMLErrorReporter.SEVERITY_FATAL_ERROR);
            }

            // bind attributes (xmlns are already bound bellow)
            int length = fAttributes.getLength();
            for (int i = 0; i < length; i++) {
                fAttributes.getName(i, fAttributeQName);

                String aprefix =
                    fAttributeQName.prefix != null
                        ? fAttributeQName.prefix
                        : XMLSymbols.EMPTY_STRING;
                String uri = fNamespaceContext.getURI(aprefix);
                // REVISIT: try removing the first "if" and see if it is faster.
                //
                if (fAttributeQName.uri != null
                    && fAttributeQName.uri == uri) {
                    continue;
                }
                if (aprefix != XMLSymbols.EMPTY_STRING) {
                    fAttributeQName.uri = uri;
                    if (uri == null) {
                        fErrorReporter.reportError(
                            XMLMessageFormatter.XMLNS_DOMAIN,
                            "AttributePrefixUnbound",
                            new Object[] {
                                fElementQName.rawname,
                                fAttributeQName.rawname,
                                aprefix },
                            XMLErrorReporter.SEVERITY_FATAL_ERROR);
                    }
                    fAttributes.setURI(i, uri);
                }
            }

            if (length > 1) {
                QName name = fAttributes.checkDuplicatesNS();
                if (name != null) {
                    if (name.uri != null) {
                        fErrorReporter.reportError(
                            XMLMessageFormatter.XMLNS_DOMAIN,
                            "AttributeNSNotUnique",
                            new Object[] {
                                fElementQName.rawname,
                                name.localpart,
                                name.uri },
                            XMLErrorReporter.SEVERITY_FATAL_ERROR);
                    } else {
                        fErrorReporter.reportError(
                            XMLMessageFormatter.XMLNS_DOMAIN,
                            "AttributeNotUnique",
                            new Object[] {
                                fElementQName.rawname,
                                name.rawname },
                            XMLErrorReporter.SEVERITY_FATAL_ERROR);
                    }
                }
            }
        }

        // call handler
        if (fDocumentHandler != null) {
            if (empty) {

                //decrease the markup depth..
                fMarkupDepth--;

                // check that this element was opened in the same entity
                if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
                    reportFatalError(
                        "ElementEntityMismatch",
                        new Object[] { fCurrentElement.rawname });
                }

                fDocumentHandler.emptyElement(fElementQName, fAttributes, null);

                if (fBindNamespaces) {
                    fNamespaceContext.popContext();
                }
                //pop the element off the stack..
                fElementStack.popElement(fElementQName);
            } else {
                fDocumentHandler.startElement(fElementQName, fAttributes, null);
            }
        }

        if (DEBUG_CONTENT_SCANNING)
            System.out.println("<<< scanStartElement(): " + empty);
        return empty;

    
protected booleanscanStartElementAfterName()
Scans the remainder of a start or empty tag after the element name.

see
#scanStartElement
return
True if element is empty.


        // REVISIT - [Q] Why do we need this local variable? -- mrglavas
        String rawname = fElementQName.rawname;
        if (fBindNamespaces) {
            fNamespaceContext.pushContext();
            if (fScannerState == SCANNER_STATE_ROOT_ELEMENT) {
                if (fPerformValidation) {
                    fErrorReporter.reportError(
                        XMLMessageFormatter.XML_DOMAIN,
                        "MSG_GRAMMAR_NOT_FOUND",
                        new Object[] { rawname },
                        XMLErrorReporter.SEVERITY_ERROR);

                    if (fDoctypeName == null
                        || !fDoctypeName.equals(rawname)) {
                        fErrorReporter.reportError(
                            XMLMessageFormatter.XML_DOMAIN,
                            "RootElementTypeMustMatchDoctypedecl",
                            new Object[] { fDoctypeName, rawname },
                            XMLErrorReporter.SEVERITY_ERROR);
                    }
                }
            }
        }

        // push element stack
        fCurrentElement = fElementStack.pushElement(fElementQName);

        // attributes
        boolean empty = false;
        fAttributes.removeAllAttributes();
        do {
        	
            // end tag?
            int c = fEntityScanner.peekChar();
            if (c == '>") {
                fEntityScanner.scanChar();
                break;
            } else if (c == '/") {
                fEntityScanner.scanChar();
                if (!fEntityScanner.skipChar('>")) {
                    reportFatalError(
                        "ElementUnterminated",
                        new Object[] { rawname });
                }
                empty = true;
                break;
            } else if (!isValidNameStartChar(c) || !fSawSpace) {
                // Second chance. Check if this character is a high
                // surrogate of a valid name start character.
                if (!isValidNameStartHighSurrogate(c) || !fSawSpace) {
                    reportFatalError(
                        "ElementUnterminated",
                        new Object[] { rawname });
                }
            }

            // attributes
            scanAttribute(fAttributes);
            
            // spaces
            fSawSpace = fEntityScanner.skipSpaces();

        } while (true);

        if (fBindNamespaces) {
            // REVISIT: is it required? forbit xmlns prefix for element
            if (fElementQName.prefix == XMLSymbols.PREFIX_XMLNS) {
                fErrorReporter.reportError(
                    XMLMessageFormatter.XMLNS_DOMAIN,
                    "ElementXMLNSPrefix",
                    new Object[] { fElementQName.rawname },
                    XMLErrorReporter.SEVERITY_FATAL_ERROR);
            }

            // bind the element
            String prefix =
                fElementQName.prefix != null
                    ? fElementQName.prefix
                    : XMLSymbols.EMPTY_STRING;
            // assign uri to the element
            fElementQName.uri = fNamespaceContext.getURI(prefix);
            // make sure that object in the element stack is updated as well
            fCurrentElement.uri = fElementQName.uri;

            if (fElementQName.prefix == null && fElementQName.uri != null) {
                fElementQName.prefix = XMLSymbols.EMPTY_STRING;
                // making sure that the object in the element stack is updated too.
                fCurrentElement.prefix = XMLSymbols.EMPTY_STRING;
            }
            if (fElementQName.prefix != null && fElementQName.uri == null) {
                fErrorReporter.reportError(
                    XMLMessageFormatter.XMLNS_DOMAIN,
                    "ElementPrefixUnbound",
                    new Object[] {
                        fElementQName.prefix,
                        fElementQName.rawname },
                    XMLErrorReporter.SEVERITY_FATAL_ERROR);
            }

            // bind attributes (xmlns are already bound bellow)
            int length = fAttributes.getLength();
            for (int i = 0; i < length; i++) {
                fAttributes.getName(i, fAttributeQName);

                String aprefix =
                    fAttributeQName.prefix != null
                        ? fAttributeQName.prefix
                        : XMLSymbols.EMPTY_STRING;
                String uri = fNamespaceContext.getURI(aprefix);
                // REVISIT: try removing the first "if" and see if it is faster.
                //
                if (fAttributeQName.uri != null
                    && fAttributeQName.uri == uri) {
                    continue;
                }
                if (aprefix != XMLSymbols.EMPTY_STRING) {
                    fAttributeQName.uri = uri;
                    if (uri == null) {
                        fErrorReporter.reportError(
                            XMLMessageFormatter.XMLNS_DOMAIN,
                            "AttributePrefixUnbound",
                            new Object[] {
                                fElementQName.rawname,
                                fAttributeQName.rawname,
                                aprefix },
                            XMLErrorReporter.SEVERITY_FATAL_ERROR);
                    }
                    fAttributes.setURI(i, uri);
                }
            }

            if (length > 1) {
                QName name = fAttributes.checkDuplicatesNS();
                if (name != null) {
                    if (name.uri != null) {
                        fErrorReporter.reportError(
                            XMLMessageFormatter.XMLNS_DOMAIN,
                            "AttributeNSNotUnique",
                            new Object[] {
                                fElementQName.rawname,
                                name.localpart,
                                name.uri },
                            XMLErrorReporter.SEVERITY_FATAL_ERROR);
                    } else {
                        fErrorReporter.reportError(
                            XMLMessageFormatter.XMLNS_DOMAIN,
                            "AttributeNotUnique",
                            new Object[] {
                                fElementQName.rawname,
                                name.rawname },
                            XMLErrorReporter.SEVERITY_FATAL_ERROR);
                    }
                }
            }
        }

        // call handler
        if (fDocumentHandler != null) {
            if (empty) {

                //decrease the markup depth..
                fMarkupDepth--;

                // check that this element was opened in the same entity
                if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
                    reportFatalError(
                        "ElementEntityMismatch",
                        new Object[] { fCurrentElement.rawname });
                }

                fDocumentHandler.emptyElement(fElementQName, fAttributes, null);

                if (fBindNamespaces) {
                    fNamespaceContext.popContext();
                }
                //pop the element off the stack..
                fElementStack.popElement(fElementQName);
            } else {
                fDocumentHandler.startElement(fElementQName, fAttributes, null);
            }
        }

        if (DEBUG_CONTENT_SCANNING)
            System.out.println("<<< scanStartElementAfterName(): " + empty);
        return empty;
        
    
protected voidscanStartElementName()
Scans the name of an element in a start or empty tag.

see
#scanStartElement()

        // Note: namespace processing is on by default
        fEntityScanner.scanQName(fElementQName);
        // Must skip spaces here because the DTD scanner
        // would consume them at the end of the external subset.
        fSawSpace = fEntityScanner.skipSpaces();
    
public voidsetDTDValidator(com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDValidatorFilter validator)
The scanner is responsible for removing DTD validator from the pipeline if it is not needed.

param
validator the DTD validator from the pipeline

        fDTDValidator = validator;