XML11DocumentScannerImplpublic class XML11DocumentScannerImpl extends XMLDocumentScannerImpl This class is responsible for scanning XML document structure
and content. The scanner acts as the source for the document
information which is communicated to the document handler.
This component requires the following features and properties from the
component manager that uses it:
- http://xml.org/sax/features/namespaces
- http://xml.org/sax/features/validation
- http://apache.org/xml/features/nonvalidating/load-external-dtd
- http://apache.org/xml/features/scanner/notify-char-refs
- http://apache.org/xml/features/scanner/notify-builtin-refs
- http://apache.org/xml/properties/internal/symbol-table
- http://apache.org/xml/properties/internal/error-reporter
- http://apache.org/xml/properties/internal/entity-manager
- http://apache.org/xml/properties/internal/dtd-scanner
|
Fields Summary |
---|
private String[] | fStringsArray of 3 strings. | private XMLStringBuffer | fStringBufferString buffer. | private XMLStringBuffer | fStringBuffer2 | private XMLStringBuffer | fStringBuffer3 |
Constructors Summary |
---|
public XML11DocumentScannerImpl()Default constructor.
//
// Constructors
//
super();
|
Methods Summary |
---|
protected java.lang.String | getVersionNotSupportedKey()
return "VersionNotSupported11";
| protected boolean | isInvalid(int value)
return (XML11Char.isXML11Invalid(value));
| protected boolean | isInvalidLiteral(int value)
return (!XML11Char.isXML11ValidLiteral(value));
| protected int | isUnchangedByNormalization(com.sun.org.apache.xerces.internal.xni.XMLString value)Checks whether this string would be unchanged by normalization.
int end = value.offset + value.length;
for (int i = value.offset; i < end; ++i) {
int c = value.ch[i];
if (XMLChar.isSpace(c)) {
return i - value.offset;
}
}
return -1;
| protected boolean | isValidNCName(int value)
return (XML11Char.isXML11NCName(value));
| protected boolean | isValidNameChar(int value)
return (XML11Char.isXML11Name(value));
| protected boolean | isValidNameStartChar(int value)
return (XML11Char.isXML11NameStart(value));
| protected boolean | isValidNameStartHighSurrogate(int value)
return XML11Char.isXML11NameHighSurrogate(value);
| protected void | normalizeWhitespace(com.sun.org.apache.xerces.internal.xni.XMLString value)Normalize whitespace in an XMLString converting all whitespace
characters to space characters.
int end = value.offset + value.length;
for (int i = value.offset; i < end; ++i) {
int c = value.ch[i];
if (XMLChar.isSpace(c)) {
value.ch[i] = ' ";
}
}
| protected void | normalizeWhitespace(com.sun.org.apache.xerces.internal.xni.XMLString value, int fromIndex)Normalize whitespace in an XMLString converting all whitespace
characters to space characters.
int end = value.offset + value.length;
for (int i = value.offset + fromIndex; i < end; ++i) {
int c = value.ch[i];
if (XMLChar.isSpace(c)) {
value.ch[i] = ' ";
}
}
| protected boolean | scanAttributeValue(com.sun.org.apache.xerces.internal.xni.XMLString value, com.sun.org.apache.xerces.internal.xni.XMLString nonNormalizedValue, java.lang.String atName, boolean checkEntities, java.lang.String eleName)Scans an attribute value and normalizes whitespace converting all
whitespace characters to space characters.
[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
// quote
int quote = fEntityScanner.peekChar();
if (quote != '\'" && quote != '"") {
reportFatalError("OpenQuoteExpected", new Object[]{eleName,atName});
}
fEntityScanner.scanChar();
int entityDepth = fEntityDepth;
int c = fEntityScanner.scanLiteral(quote, value);
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** scanLiteral -> \""
+ value.toString() + "\"");
}
int fromIndex = 0;
if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
/** Both the non-normalized and normalized attribute values are equal. **/
nonNormalizedValue.setValues(value);
int cquote = fEntityScanner.scanChar();
if (cquote != quote) {
reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
}
return true;
}
fStringBuffer2.clear();
fStringBuffer2.append(value);
normalizeWhitespace(value, fromIndex);
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** normalizeWhitespace -> \""
+ value.toString() + "\"");
}
if (c != quote) {
fScanningAttribute = true;
fStringBuffer.clear();
do {
fStringBuffer.append(value);
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** value2: \""
+ fStringBuffer.toString() + "\"");
}
if (c == '&") {
fEntityScanner.skipChar('&");
if (entityDepth == fEntityDepth) {
fStringBuffer2.append('&");
}
if (fEntityScanner.skipChar('#")) {
if (entityDepth == fEntityDepth) {
fStringBuffer2.append('#");
}
int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
if (ch != -1) {
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** value3: \""
+ fStringBuffer.toString()
+ "\"");
}
}
}
else {
String entityName = fEntityScanner.scanName();
if (entityName == null) {
reportFatalError("NameRequiredInReference", null);
}
else if (entityDepth == fEntityDepth) {
fStringBuffer2.append(entityName);
}
if (!fEntityScanner.skipChar(';")) {
reportFatalError("SemicolonRequiredInReference",
new Object []{entityName});
}
else if (entityDepth == fEntityDepth) {
fStringBuffer2.append(';");
}
if (entityName == fAmpSymbol) {
fStringBuffer.append('&");
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** value5: \""
+ fStringBuffer.toString()
+ "\"");
}
}
else if (entityName == fAposSymbol) {
fStringBuffer.append('\'");
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** value7: \""
+ fStringBuffer.toString()
+ "\"");
}
}
else if (entityName == fLtSymbol) {
fStringBuffer.append('<");
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** value9: \""
+ fStringBuffer.toString()
+ "\"");
}
}
else if (entityName == fGtSymbol) {
fStringBuffer.append('>");
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** valueB: \""
+ fStringBuffer.toString()
+ "\"");
}
}
else if (entityName == fQuotSymbol) {
fStringBuffer.append('"");
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** valueD: \""
+ fStringBuffer.toString()
+ "\"");
}
}
else {
if (fEntityManager.isExternalEntity(entityName)) {
reportFatalError("ReferenceToExternalEntity",
new Object[] { entityName });
}
else {
if (!fEntityManager.isDeclaredEntity(entityName)) {
//WFC & VC: Entity Declared
if (checkEntities) {
if (fValidation) {
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
"EntityNotDeclared",
new Object[]{entityName},
XMLErrorReporter.SEVERITY_ERROR);
}
}
else {
reportFatalError("EntityNotDeclared",
new Object[]{entityName});
}
}
fEntityManager.startEntity(entityName, true);
}
}
}
}
else if (c == '<") {
reportFatalError("LessthanInAttValue",
new Object[] { eleName, atName });
fEntityScanner.scanChar();
if (entityDepth == fEntityDepth) {
fStringBuffer2.append((char)c);
}
}
else if (c == '%" || c == ']") {
fEntityScanner.scanChar();
fStringBuffer.append((char)c);
if (entityDepth == fEntityDepth) {
fStringBuffer2.append((char)c);
}
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** valueF: \""
+ fStringBuffer.toString() + "\"");
}
}
// note that none of these characters should ever get through
// XML11EntityScanner. Not sure why
// this check was originally necessary. - NG
else if (c == '\n" || c == '\r" || c == 0x85 || c == 0x2028) {
fEntityScanner.scanChar();
fStringBuffer.append(' ");
if (entityDepth == fEntityDepth) {
fStringBuffer2.append('\n");
}
}
else if (c != -1 && XMLChar.isHighSurrogate(c)) {
fStringBuffer3.clear();
if (scanSurrogates(fStringBuffer3)) {
fStringBuffer.append(fStringBuffer3);
if (entityDepth == fEntityDepth) {
fStringBuffer2.append(fStringBuffer3);
}
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** valueI: \""
+ fStringBuffer.toString()
+ "\"");
}
}
}
else if (c != -1 && XML11Char.isXML11Invalid(c)) {
reportFatalError("InvalidCharInAttValue",
new Object[] {eleName, atName, Integer.toString(c, 16)});
fEntityScanner.scanChar();
if (entityDepth == fEntityDepth) {
fStringBuffer2.append((char)c);
}
}
c = fEntityScanner.scanLiteral(quote, value);
if (entityDepth == fEntityDepth) {
fStringBuffer2.append(value);
}
normalizeWhitespace(value);
} while (c != quote || entityDepth != fEntityDepth);
fStringBuffer.append(value);
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** valueN: \""
+ fStringBuffer.toString() + "\"");
}
value.setValues(fStringBuffer);
fScanningAttribute = false;
}
nonNormalizedValue.setValues(fStringBuffer2);
// quote
int cquote = fEntityScanner.scanChar();
if (cquote != quote) {
reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
}
return nonNormalizedValue.equals(value.ch, value.offset, value.length);
| protected int | scanContent(com.sun.org.apache.xerces.internal.util.XMLStringBuffer content)Scans element content.
fTempString.length = 0;
int c = fEntityScanner.scanContent(fTempString);
content.append(fTempString);
if (c == '\r" || c == 0x85 || c == 0x2028) {
// happens when there is the character reference
// but scanContent doesn't do entity expansions...
// is this *really* necessary??? - NG
fEntityScanner.scanChar();
content.append((char)c);
c = -1;
}
/*if (fDocumentHandler != null && content.length > 0) {
fDocumentHandler.characters(content, null);
} */
if (c == ']") {
content.append((char)fEntityScanner.scanChar());
// remember where we are in case we get an endEntity before we
// could flush the buffer out - this happens when we're parsing an
// entity which ends with a ]
fInScanContent = true;
//
// We work on a single character basis to handle cases such as:
// ']]]>' which we might otherwise miss.
//
if (fEntityScanner.skipChar(']")) {
content.append(']");
while (fEntityScanner.skipChar(']")) {
content.append(']");
}
if (fEntityScanner.skipChar('>")) {
reportFatalError("CDEndInContent", null);
}
}
/*if (fDocumentHandler != null && fStringBuffer.length != 0) {
fDocumentHandler.characters(fStringBuffer, null);
}*/
fInScanContent = false;
c = -1;
}
return c;
| protected boolean | scanPubidLiteral(com.sun.org.apache.xerces.internal.xni.XMLString literal)Scans public ID literal.
[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
[13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
The returned string is normalized according to the following rule,
from http://www.w3.org/TR/REC-xml#dt-pubid:
Before a match is attempted, all strings of white space in the public
identifier must be normalized to single space characters (#x20), and
leading and trailing white space must be removed.
int quote = fEntityScanner.scanChar();
if (quote != '\'" && quote != '"") {
reportFatalError("QuoteRequiredInPublicID", null);
return false;
}
fStringBuffer.clear();
// skip leading whitespace
boolean skipSpace = true;
boolean dataok = true;
while (true) {
int c = fEntityScanner.scanChar();
// REVISIT: none of these except \n and 0x20 should make it past the entity scanner
if (c == ' " || c == '\n" || c == '\r" || c == 0x85 || c == 0x2028) {
if (!skipSpace) {
// take the first whitespace as a space and skip the others
fStringBuffer.append(' ");
skipSpace = true;
}
}
else if (c == quote) {
if (skipSpace) {
// if we finished on a space let's trim it
fStringBuffer.length--;
}
literal.setValues(fStringBuffer);
break;
}
else if (XMLChar.isPubid(c)) {
fStringBuffer.append((char)c);
skipSpace = false;
}
else if (c == -1) {
reportFatalError("PublicIDUnterminated", null);
return false;
}
else {
dataok = false;
reportFatalError("InvalidCharInPublicID",
new Object[]{Integer.toHexString(c)});
}
}
return dataok;
| protected boolean | versionSupported(java.lang.String version)
return (version.equals("1.1") || version.equals("1.0"));
|
|