XMLScannerpublic abstract class XMLScanner extends Object implements org.apache.xerces.xni.parser.XMLComponentThis class is responsible for holding scanning methods common to
scanning the XML document structure and content as well as the DTD
structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit
from this base class.
This component requires the following features and properties from the
component manager that uses it:
- http://xml.org/sax/features/validation
- http://xml.org/sax/features/namespaces
- http://apache.org/xml/features/scanner/notify-char-refs
- http://apache.org/xml/properties/internal/symbol-table
- http://apache.org/xml/properties/internal/error-reporter
- http://apache.org/xml/properties/internal/entity-manager
|
Fields Summary |
---|
protected static final String | VALIDATIONFeature identifier: validation. | protected static final String | NAMESPACESFeature identifier: namespaces. | protected static final String | NOTIFY_CHAR_REFSFeature identifier: notify character references. | protected static final String | PARSER_SETTINGS | protected static final String | SYMBOL_TABLEProperty identifier: symbol table. | protected static final String | ERROR_REPORTERProperty identifier: error reporter. | protected static final String | ENTITY_MANAGERProperty identifier: entity manager. | protected static final boolean | DEBUG_ATTR_NORMALIZATIONDebug attribute normalization. | protected boolean | fValidationValidation. This feature identifier is:
http://xml.org/sax/features/validation | protected boolean | fNamespacesNamespaces. | protected boolean | fNotifyCharRefsCharacter references notification. | protected boolean | fParserSettingsInternal parser-settings feature | protected org.apache.xerces.util.SymbolTable | fSymbolTableSymbol table. | protected XMLErrorReporter | fErrorReporterError reporter. | protected XMLEntityManager | fEntityManagerEntity manager. | protected XMLEntityScanner | fEntityScannerEntity scanner. | protected int | fEntityDepthEntity depth. | protected String | fCharRefLiteralLiteral value of the last character refence scanned. | protected boolean | fScanningAttributeScanning attribute. | protected boolean | fReportEntityReport entity boundary. | protected static final String | fVersionSymbolSymbol: "version". | protected static final String | fEncodingSymbolSymbol: "encoding". | protected static final String | fStandaloneSymbolSymbol: "standalone". | protected static final String | fAmpSymbolSymbol: "amp". | protected static final String | fLtSymbolSymbol: "lt". | protected static final String | fGtSymbolSymbol: "gt". | protected static final String | fQuotSymbolSymbol: "quot". | protected static final String | fAposSymbolSymbol: "apos". | private final org.apache.xerces.xni.XMLString | fStringString. | private final org.apache.xerces.util.XMLStringBuffer | fStringBufferString buffer. | private final org.apache.xerces.util.XMLStringBuffer | fStringBuffer2String buffer. | private final org.apache.xerces.util.XMLStringBuffer | fStringBuffer3String buffer. | protected final org.apache.xerces.util.XMLResourceIdentifierImpl | fResourceIdentifier |
Methods Summary |
---|
public void | endEntity(java.lang.String name, org.apache.xerces.xni.Augmentations augs)This method notifies the end of an entity. The document entity has
the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
parameter entity names start with '%'; and general entities are just
specified by their name.
// keep track of the entity depth
fEntityDepth--;
| public boolean | getFeature(java.lang.String featureId)
if (VALIDATION.equals(featureId)) {
return fValidation;
} else if (NOTIFY_CHAR_REFS.equals(featureId)) {
return fNotifyCharRefs;
}
throw new XMLConfigurationException(XMLConfigurationException.NOT_RECOGNIZED, featureId);
| protected java.lang.String | getVersionNotSupportedKey()
return "VersionNotSupported";
| private void | init()
fEntityScanner = null;
// initialize vars
fEntityDepth = 0;
fReportEntity = true;
fResourceIdentifier.clear();
| protected boolean | isInvalid(int value)
return (XMLChar.isInvalid(value));
| protected boolean | isInvalidLiteral(int value)
return (XMLChar.isInvalid(value));
| protected int | isUnchangedByNormalization(org.apache.xerces.xni.XMLString value)Checks whether this string would be unchanged by normalization.
int end = value.offset + value.length;
for (int i = value.offset; i < end; ++i) {
int c = value.ch[i];
// Performance: For XML 1.0 documents take advantage of
// the fact that the only legal characters below 0x20
// are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
// already determined the well-formedness of these
// characters it is sufficient (and safe) to check
// against 0x20. -- mrglavas
if (c < 0x20) {
return i - value.offset;
}
}
return -1;
| protected boolean | isValidNCName(int value)
return (XMLChar.isNCName(value));
| protected boolean | isValidNameChar(int value)
return (XMLChar.isName(value));
| protected boolean | isValidNameStartChar(int value)
return (XMLChar.isNameStart(value));
| protected boolean | isValidNameStartHighSurrogate(int value)
return false;
| protected void | normalizeWhitespace(org.apache.xerces.xni.XMLString value)Normalize whitespace in an XMLString converting all whitespace
characters to space characters.
int end = value.offset + value.length;
for (int i = value.offset; i < end; ++i) {
int c = value.ch[i];
// Performance: For XML 1.0 documents take advantage of
// the fact that the only legal characters below 0x20
// are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
// already determined the well-formedness of these
// characters it is sufficient (and safe) to check
// against 0x20. -- mrglavas
if (c < 0x20) {
value.ch[i] = ' ";
}
}
| protected void | normalizeWhitespace(org.apache.xerces.xni.XMLString value, int fromIndex)Normalize whitespace in an XMLString converting all whitespace
characters to space characters.
int end = value.offset + value.length;
for (int i = value.offset + fromIndex; i < end; ++i) {
int c = value.ch[i];
// Performance: For XML 1.0 documents take advantage of
// the fact that the only legal characters below 0x20
// are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
// already determined the well-formedness of these
// characters it is sufficient (and safe) to check
// against 0x20. -- mrglavas
if (c < 0x20) {
value.ch[i] = ' ";
}
}
| protected void | reportFatalError(java.lang.String msgId, java.lang.Object[] args)Convenience function used in all XML scanners.
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
msgId, args,
XMLErrorReporter.SEVERITY_FATAL_ERROR);
| public void | reset(org.apache.xerces.xni.parser.XMLComponentManager componentManager)
//
// XMLComponent methods
//
try {
fParserSettings = componentManager.getFeature(PARSER_SETTINGS);
} catch (XMLConfigurationException e) {
fParserSettings = true;
}
if (!fParserSettings) {
// parser settings have not been changed
init();
return;
}
// Xerces properties
fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER);
// sax features
try {
fValidation = componentManager.getFeature(VALIDATION);
}
catch (XMLConfigurationException e) {
fValidation = false;
}
try {
fNamespaces = componentManager.getFeature(NAMESPACES);
}
catch (XMLConfigurationException e) {
fNamespaces = true;
}
try {
fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS);
}
catch (XMLConfigurationException e) {
fNotifyCharRefs = false;
}
init();
| protected void | reset()
init();
// DTD preparsing defaults:
fValidation = true;
fNotifyCharRefs = false;
| protected boolean | scanAttributeValue(org.apache.xerces.xni.XMLString value, org.apache.xerces.xni.XMLString nonNormalizedValue, java.lang.String atName, boolean checkEntities, java.lang.String eleName)Scans an attribute value and normalizes whitespace converting all
whitespace characters to space characters.
[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
// quote
int quote = fEntityScanner.peekChar();
if (quote != '\'" && quote != '"") {
reportFatalError("OpenQuoteExpected", new Object[]{eleName,atName});
}
fEntityScanner.scanChar();
int entityDepth = fEntityDepth;
int c = fEntityScanner.scanLiteral(quote, value);
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** scanLiteral -> \""
+ value.toString() + "\"");
}
int fromIndex = 0;
if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
/** Both the non-normalized and normalized attribute values are equal. **/
nonNormalizedValue.setValues(value);
int cquote = fEntityScanner.scanChar();
if (cquote != quote) {
reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
}
return true;
}
fStringBuffer2.clear();
fStringBuffer2.append(value);
normalizeWhitespace(value, fromIndex);
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** normalizeWhitespace -> \""
+ value.toString() + "\"");
}
if (c != quote) {
fScanningAttribute = true;
fStringBuffer.clear();
do {
fStringBuffer.append(value);
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** value2: \""
+ fStringBuffer.toString() + "\"");
}
if (c == '&") {
fEntityScanner.skipChar('&");
if (entityDepth == fEntityDepth) {
fStringBuffer2.append('&");
}
if (fEntityScanner.skipChar('#")) {
if (entityDepth == fEntityDepth) {
fStringBuffer2.append('#");
}
int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
if (ch != -1) {
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** value3: \""
+ fStringBuffer.toString()
+ "\"");
}
}
}
else {
String entityName = fEntityScanner.scanName();
if (entityName == null) {
reportFatalError("NameRequiredInReference", null);
}
else if (entityDepth == fEntityDepth) {
fStringBuffer2.append(entityName);
}
if (!fEntityScanner.skipChar(';")) {
reportFatalError("SemicolonRequiredInReference",
new Object []{entityName});
}
else if (entityDepth == fEntityDepth) {
fStringBuffer2.append(';");
}
if (entityName == fAmpSymbol) {
fStringBuffer.append('&");
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** value5: \""
+ fStringBuffer.toString()
+ "\"");
}
}
else if (entityName == fAposSymbol) {
fStringBuffer.append('\'");
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** value7: \""
+ fStringBuffer.toString()
+ "\"");
}
}
else if (entityName == fLtSymbol) {
fStringBuffer.append('<");
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** value9: \""
+ fStringBuffer.toString()
+ "\"");
}
}
else if (entityName == fGtSymbol) {
fStringBuffer.append('>");
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** valueB: \""
+ fStringBuffer.toString()
+ "\"");
}
}
else if (entityName == fQuotSymbol) {
fStringBuffer.append('"");
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** valueD: \""
+ fStringBuffer.toString()
+ "\"");
}
}
else {
if (fEntityManager.isExternalEntity(entityName)) {
reportFatalError("ReferenceToExternalEntity",
new Object[] { entityName });
}
else {
if (!fEntityManager.isDeclaredEntity(entityName)) {
//WFC & VC: Entity Declared
if (checkEntities) {
if (fValidation) {
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
"EntityNotDeclared",
new Object[]{entityName},
XMLErrorReporter.SEVERITY_ERROR);
}
}
else {
reportFatalError("EntityNotDeclared",
new Object[]{entityName});
}
}
fEntityManager.startEntity(entityName, true);
}
}
}
}
else if (c == '<") {
reportFatalError("LessthanInAttValue",
new Object[] { eleName, atName });
fEntityScanner.scanChar();
if (entityDepth == fEntityDepth) {
fStringBuffer2.append((char)c);
}
}
else if (c == '%" || c == ']") {
fEntityScanner.scanChar();
fStringBuffer.append((char)c);
if (entityDepth == fEntityDepth) {
fStringBuffer2.append((char)c);
}
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** valueF: \""
+ fStringBuffer.toString() + "\"");
}
}
else if (c == '\n" || c == '\r") {
fEntityScanner.scanChar();
fStringBuffer.append(' ");
if (entityDepth == fEntityDepth) {
fStringBuffer2.append('\n");
}
}
else if (c != -1 && XMLChar.isHighSurrogate(c)) {
fStringBuffer3.clear();
if (scanSurrogates(fStringBuffer3)) {
fStringBuffer.append(fStringBuffer3);
if (entityDepth == fEntityDepth) {
fStringBuffer2.append(fStringBuffer3);
}
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** valueI: \""
+ fStringBuffer.toString()
+ "\"");
}
}
}
else if (c != -1 && isInvalidLiteral(c)) {
reportFatalError("InvalidCharInAttValue",
new Object[] {eleName, atName, Integer.toString(c, 16)});
fEntityScanner.scanChar();
if (entityDepth == fEntityDepth) {
fStringBuffer2.append((char)c);
}
}
c = fEntityScanner.scanLiteral(quote, value);
if (entityDepth == fEntityDepth) {
fStringBuffer2.append(value);
}
normalizeWhitespace(value);
} while (c != quote || entityDepth != fEntityDepth);
fStringBuffer.append(value);
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** valueN: \""
+ fStringBuffer.toString() + "\"");
}
value.setValues(fStringBuffer);
fScanningAttribute = false;
}
nonNormalizedValue.setValues(fStringBuffer2);
// quote
int cquote = fEntityScanner.scanChar();
if (cquote != quote) {
reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
}
return nonNormalizedValue.equals(value.ch, value.offset, value.length);
| protected int | scanCharReferenceValue(org.apache.xerces.util.XMLStringBuffer buf, org.apache.xerces.util.XMLStringBuffer buf2)Scans a character reference and append the corresponding chars to the
specified buffer.
[66] CharRef ::= '' [0-9]+ ';' | '' [0-9a-fA-F]+ ';'
Note: This method uses fStringBuffer, anything in it
at the time of calling is lost.
// scan hexadecimal value
boolean hex = false;
if (fEntityScanner.skipChar('x")) {
if (buf2 != null) { buf2.append('x"); }
hex = true;
fStringBuffer3.clear();
boolean digit = true;
int c = fEntityScanner.peekChar();
digit = (c >= '0" && c <= '9") ||
(c >= 'a" && c <= 'f") ||
(c >= 'A" && c <= 'F");
if (digit) {
if (buf2 != null) { buf2.append((char)c); }
fEntityScanner.scanChar();
fStringBuffer3.append((char)c);
do {
c = fEntityScanner.peekChar();
digit = (c >= '0" && c <= '9") ||
(c >= 'a" && c <= 'f") ||
(c >= 'A" && c <= 'F");
if (digit) {
if (buf2 != null) { buf2.append((char)c); }
fEntityScanner.scanChar();
fStringBuffer3.append((char)c);
}
} while (digit);
}
else {
reportFatalError("HexdigitRequiredInCharRef", null);
}
}
// scan decimal value
else {
fStringBuffer3.clear();
boolean digit = true;
int c = fEntityScanner.peekChar();
digit = c >= '0" && c <= '9";
if (digit) {
if (buf2 != null) { buf2.append((char)c); }
fEntityScanner.scanChar();
fStringBuffer3.append((char)c);
do {
c = fEntityScanner.peekChar();
digit = c >= '0" && c <= '9";
if (digit) {
if (buf2 != null) { buf2.append((char)c); }
fEntityScanner.scanChar();
fStringBuffer3.append((char)c);
}
} while (digit);
}
else {
reportFatalError("DigitRequiredInCharRef", null);
}
}
// end
if (!fEntityScanner.skipChar(';")) {
reportFatalError("SemicolonRequiredInCharRef", null);
}
if (buf2 != null) { buf2.append(';"); }
// convert string to number
int value = -1;
try {
value = Integer.parseInt(fStringBuffer3.toString(),
hex ? 16 : 10);
// character reference must be a valid XML character
if (isInvalid(value)) {
StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
if (hex) errorBuf.append('x");
errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
reportFatalError("InvalidCharRef",
new Object[]{errorBuf.toString()});
}
}
catch (NumberFormatException e) {
// Conversion failed, let -1 value drop through.
// If we end up here, the character reference was invalid.
StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
if (hex) errorBuf.append('x");
errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
reportFatalError("InvalidCharRef",
new Object[]{errorBuf.toString()});
}
// append corresponding chars to the given buffer
if (!XMLChar.isSupplemental(value)) {
buf.append((char) value);
}
else {
// character is supplemental, split it into surrogate chars
buf.append(XMLChar.highSurrogate(value));
buf.append(XMLChar.lowSurrogate(value));
}
// char refs notification code
if (fNotifyCharRefs && value != -1) {
String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString();
if (!fScanningAttribute) {
fCharRefLiteral = literal;
}
}
return value;
| protected void | scanComment(org.apache.xerces.util.XMLStringBuffer text)Scans a comment.
[15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
Note: Called after scanning past '<!--'
Note: This method uses fString, anything in it
at the time of calling is lost.
// text
// REVISIT: handle invalid character, eof
text.clear();
while (fEntityScanner.scanData("--", text)) {
int c = fEntityScanner.peekChar();
if (c != -1) {
if (XMLChar.isHighSurrogate(c)) {
scanSurrogates(text);
}
else if (isInvalidLiteral(c)) {
reportFatalError("InvalidCharInComment",
new Object[] { Integer.toHexString(c) });
fEntityScanner.scanChar();
}
}
}
if (!fEntityScanner.skipChar('>")) {
reportFatalError("DashDashInComment", null);
}
| protected void | scanExternalID(java.lang.String[] identifiers, boolean optionalSystemId)Scans External ID and return the public and system IDs.
String systemId = null;
String publicId = null;
if (fEntityScanner.skipString("PUBLIC")) {
if (!fEntityScanner.skipSpaces()) {
reportFatalError("SpaceRequiredAfterPUBLIC", null);
}
scanPubidLiteral(fString);
publicId = fString.toString();
if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
reportFatalError("SpaceRequiredBetweenPublicAndSystem", null);
}
}
if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
if (publicId == null && !fEntityScanner.skipSpaces()) {
reportFatalError("SpaceRequiredAfterSYSTEM", null);
}
int quote = fEntityScanner.peekChar();
if (quote != '\'" && quote != '"") {
if (publicId != null && optionalSystemId) {
// looks like we don't have any system id
// simply return the public id
identifiers[0] = null;
identifiers[1] = publicId;
return;
}
reportFatalError("QuoteRequiredInSystemID", null);
}
fEntityScanner.scanChar();
XMLString ident = fString;
if (fEntityScanner.scanLiteral(quote, ident) != quote) {
fStringBuffer.clear();
do {
fStringBuffer.append(ident);
int c = fEntityScanner.peekChar();
if (XMLChar.isMarkup(c) || c == ']") {
fStringBuffer.append((char)fEntityScanner.scanChar());
}
} while (fEntityScanner.scanLiteral(quote, ident) != quote);
fStringBuffer.append(ident);
ident = fStringBuffer;
}
systemId = ident.toString();
if (!fEntityScanner.skipChar(quote)) {
reportFatalError("SystemIDUnterminated", null);
}
}
// store result in array
identifiers[0] = systemId;
identifiers[1] = publicId;
| protected void | scanPI()Scans a processing instruction.
[16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
Note: This method uses fString, anything in it
at the time of calling is lost.
// target
fReportEntity = false;
String target = null;
if(fNamespaces) {
target = fEntityScanner.scanNCName();
} else {
target = fEntityScanner.scanName();
}
if (target == null) {
reportFatalError("PITargetRequired", null);
}
// scan data
scanPIData(target, fString);
fReportEntity = true;
| protected void | scanPIData(java.lang.String target, org.apache.xerces.xni.XMLString data)Scans a processing data. This is needed to handle the situation
where a document starts with a processing instruction whose
target name starts with "xml". (e.g. xmlfoo)
Note: This method uses fStringBuffer, anything in it
at the time of calling is lost.
// check target
if (target.length() == 3) {
char c0 = Character.toLowerCase(target.charAt(0));
char c1 = Character.toLowerCase(target.charAt(1));
char c2 = Character.toLowerCase(target.charAt(2));
if (c0 == 'x" && c1 == 'm" && c2 == 'l") {
reportFatalError("ReservedPITarget", null);
}
}
// spaces
if (!fEntityScanner.skipSpaces()) {
if (fEntityScanner.skipString("?>")) {
// we found the end, there is no data
data.clear();
return;
}
else {
if(fNamespaces && fEntityScanner.peekChar() == ':") {
fEntityScanner.scanChar();
XMLStringBuffer colonName = new XMLStringBuffer(target);
colonName.append(":");
String str = fEntityScanner.scanName();
if (str != null)
colonName.append(str);
reportFatalError("ColonNotLegalWithNS", new Object[] {colonName.toString()});
fEntityScanner.skipSpaces();
} else {
// if there is data there should be some space
reportFatalError("SpaceRequiredInPI", null);
}
}
}
fStringBuffer.clear();
// data
if (fEntityScanner.scanData("?>", fStringBuffer)) {
do {
int c = fEntityScanner.peekChar();
if (c != -1) {
if (XMLChar.isHighSurrogate(c)) {
scanSurrogates(fStringBuffer);
}
else if (isInvalidLiteral(c)) {
reportFatalError("InvalidCharInPI",
new Object[]{Integer.toHexString(c)});
fEntityScanner.scanChar();
}
}
} while (fEntityScanner.scanData("?>", fStringBuffer));
}
data.setValues(fStringBuffer);
| public java.lang.String | scanPseudoAttribute(boolean scanningTextDecl, org.apache.xerces.xni.XMLString value)Scans a pseudo attribute.
// REVISIT: This method is used for generic scanning of
// pseudo attributes, but since there are only three such
// attributes: version, encoding, and standalone there are
// for performant ways of scanning them. Every decl must
// have a version, and in TextDecls this version must
// be followed by an encoding declaration. Also the
// methods we invoke on the scanners allow non-ASCII
// characters to be parsed in the decls, but since
// we don't even know what the actual encoding of the
// document is until we scan the encoding declaration
// you cannot reliably read any characters outside
// of the ASCII range here. -- mrglavas
String name = fEntityScanner.scanName();
XMLEntityManager.print(fEntityManager.getCurrentEntity());
if (name == null) {
reportFatalError("PseudoAttrNameExpected", null);
}
fEntityScanner.skipDeclSpaces();
if (!fEntityScanner.skipChar('=")) {
reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
: "EqRequiredInXMLDecl", new Object[]{name});
}
fEntityScanner.skipDeclSpaces();
int quote = fEntityScanner.peekChar();
if (quote != '\'" && quote != '"") {
reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl"
: "QuoteRequiredInXMLDecl" , new Object[]{name});
}
fEntityScanner.scanChar();
int c = fEntityScanner.scanLiteral(quote, value);
if (c != quote) {
fStringBuffer2.clear();
do {
fStringBuffer2.append(value);
if (c != -1) {
if (c == '&" || c == '%" || c == '<" || c == ']") {
fStringBuffer2.append((char)fEntityScanner.scanChar());
}
// REVISIT: Even if you could reliably read non-ASCII chars
// why bother scanning for surrogates here? Only ASCII chars
// match the productions in XMLDecls and TextDecls. -- mrglavas
else if (XMLChar.isHighSurrogate(c)) {
scanSurrogates(fStringBuffer2);
}
else if (isInvalidLiteral(c)) {
String key = scanningTextDecl
? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl";
reportFatalError(key,
new Object[] {Integer.toString(c, 16)});
fEntityScanner.scanChar();
}
}
c = fEntityScanner.scanLiteral(quote, value);
} while (c != quote);
fStringBuffer2.append(value);
value.setValues(fStringBuffer2);
}
if (!fEntityScanner.skipChar(quote)) {
reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl"
: "CloseQuoteMissingInXMLDecl",
new Object[]{name});
}
// return
return name;
| protected boolean | scanPubidLiteral(org.apache.xerces.xni.XMLString literal)Scans public ID literal.
[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
[13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
The returned string is normalized according to the following rule,
from http://www.w3.org/TR/REC-xml#dt-pubid:
Before a match is attempted, all strings of white space in the public
identifier must be normalized to single space characters (#x20), and
leading and trailing white space must be removed.
int quote = fEntityScanner.scanChar();
if (quote != '\'" && quote != '"") {
reportFatalError("QuoteRequiredInPublicID", null);
return false;
}
fStringBuffer.clear();
// skip leading whitespace
boolean skipSpace = true;
boolean dataok = true;
while (true) {
int c = fEntityScanner.scanChar();
if (c == ' " || c == '\n" || c == '\r") {
if (!skipSpace) {
// take the first whitespace as a space and skip the others
fStringBuffer.append(' ");
skipSpace = true;
}
}
else if (c == quote) {
if (skipSpace) {
// if we finished on a space let's trim it
fStringBuffer.length--;
}
literal.setValues(fStringBuffer);
break;
}
else if (XMLChar.isPubid(c)) {
fStringBuffer.append((char)c);
skipSpace = false;
}
else if (c == -1) {
reportFatalError("PublicIDUnterminated", null);
return false;
}
else {
dataok = false;
reportFatalError("InvalidCharInPublicID",
new Object[]{Integer.toHexString(c)});
}
}
return dataok;
| protected boolean | scanSurrogates(org.apache.xerces.util.XMLStringBuffer buf)Scans surrogates and append them to the specified buffer.
Note: This assumes the current char has already been
identified as a high surrogate.
int high = fEntityScanner.scanChar();
int low = fEntityScanner.peekChar();
if (!XMLChar.isLowSurrogate(low)) {
reportFatalError("InvalidCharInContent",
new Object[] {Integer.toString(high, 16)});
return false;
}
fEntityScanner.scanChar();
// convert surrogates to supplemental character
int c = XMLChar.supplemental((char)high, (char)low);
// supplemental character must be a valid XML character
if (isInvalid(c)) {
reportFatalError("InvalidCharInContent",
new Object[]{Integer.toString(c, 16)});
return false;
}
// fill in the buffer
buf.append((char)high);
buf.append((char)low);
return true;
| protected void | scanXMLDeclOrTextDecl(boolean scanningTextDecl, java.lang.String[] pseudoAttributeValues)Scans an XML or text declaration.
[23] XMLDecl ::= ''
[24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
| ('"' ('yes' | 'no') '"'))
[77] TextDecl ::= ''
// pseudo-attribute values
String version = null;
String encoding = null;
String standalone = null;
// scan pseudo-attributes
final int STATE_VERSION = 0;
final int STATE_ENCODING = 1;
final int STATE_STANDALONE = 2;
final int STATE_DONE = 3;
int state = STATE_VERSION;
boolean dataFoundForTarget = false;
boolean sawSpace = fEntityScanner.skipDeclSpaces();
// since pseudoattributes are *not* attributes,
// their quotes don't need to be preserved in external parameter entities.
// the XMLEntityScanner#scanLiteral method will continue to
// emit -1 in such cases when it finds a quote; this is
// fine for other methods that parse scanned entities,
// but not for the scanning of pseudoattributes. So,
// temporarily, we must mark the current entity as not being "literal"
XMLEntityManager.ScannedEntity currEnt = fEntityManager.getCurrentEntity();
boolean currLiteral = currEnt.literal;
currEnt.literal = false;
while (fEntityScanner.peekChar() != '?") {
dataFoundForTarget = true;
String name = scanPseudoAttribute(scanningTextDecl, fString);
switch (state) {
case STATE_VERSION: {
if (name == fVersionSymbol) {
if (!sawSpace) {
reportFatalError(scanningTextDecl
? "SpaceRequiredBeforeVersionInTextDecl"
: "SpaceRequiredBeforeVersionInXMLDecl",
null);
}
version = fString.toString();
state = STATE_ENCODING;
if (!versionSupported(version)) {
reportFatalError(getVersionNotSupportedKey(),
new Object[]{version});
}
}
else if (name == fEncodingSymbol) {
if (!scanningTextDecl) {
reportFatalError("VersionInfoRequired", null);
}
if (!sawSpace) {
reportFatalError(scanningTextDecl
? "SpaceRequiredBeforeEncodingInTextDecl"
: "SpaceRequiredBeforeEncodingInXMLDecl",
null);
}
encoding = fString.toString();
state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
}
else {
if (scanningTextDecl) {
reportFatalError("EncodingDeclRequired", null);
}
else {
reportFatalError("VersionInfoRequired", null);
}
}
break;
}
case STATE_ENCODING: {
if (name == fEncodingSymbol) {
if (!sawSpace) {
reportFatalError(scanningTextDecl
? "SpaceRequiredBeforeEncodingInTextDecl"
: "SpaceRequiredBeforeEncodingInXMLDecl",
null);
}
encoding = fString.toString();
state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
// TODO: check encoding name; set encoding on
// entity scanner
}
else if (!scanningTextDecl && name == fStandaloneSymbol) {
if (!sawSpace) {
reportFatalError("SpaceRequiredBeforeStandalone",
null);
}
standalone = fString.toString();
state = STATE_DONE;
if (!standalone.equals("yes") && !standalone.equals("no")) {
reportFatalError("SDDeclInvalid", new Object[] {standalone});
}
}
else {
reportFatalError("EncodingDeclRequired", null);
}
break;
}
case STATE_STANDALONE: {
if (name == fStandaloneSymbol) {
if (!sawSpace) {
reportFatalError("SpaceRequiredBeforeStandalone",
null);
}
standalone = fString.toString();
state = STATE_DONE;
if (!standalone.equals("yes") && !standalone.equals("no")) {
reportFatalError("SDDeclInvalid", new Object[] {standalone});
}
}
else {
reportFatalError("EncodingDeclRequired", null);
}
break;
}
default: {
reportFatalError("NoMorePseudoAttributes", null);
}
}
sawSpace = fEntityScanner.skipDeclSpaces();
}
// restore original literal value
if(currLiteral)
currEnt.literal = true;
// REVISIT: should we remove this error reporting?
if (scanningTextDecl && state != STATE_DONE) {
reportFatalError("MorePseudoAttributes", null);
}
// If there is no data in the xml or text decl then we fail to report error
// for version or encoding info above.
if (scanningTextDecl) {
if (!dataFoundForTarget && encoding == null) {
reportFatalError("EncodingDeclRequired", null);
}
}
else {
if (!dataFoundForTarget && version == null) {
reportFatalError("VersionInfoRequired", null);
}
}
// end
if (!fEntityScanner.skipChar('?")) {
reportFatalError("XMLDeclUnterminated", null);
}
if (!fEntityScanner.skipChar('>")) {
reportFatalError("XMLDeclUnterminated", null);
}
// fill in return array
pseudoAttributeValues[0] = version;
pseudoAttributeValues[1] = encoding;
pseudoAttributeValues[2] = standalone;
| public void | setFeature(java.lang.String featureId, boolean value)
if (VALIDATION.equals(featureId)) {
fValidation = value;
} else if (NOTIFY_CHAR_REFS.equals(featureId)) {
fNotifyCharRefs = value;
}
| public void | setProperty(java.lang.String propertyId, java.lang.Object value)Sets the value of a property during parsing.
// Xerces properties
if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() &&
propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) {
fSymbolTable = (SymbolTable)value;
}
else if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() &&
propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) {
fErrorReporter = (XMLErrorReporter)value;
}
else if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() &&
propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
fEntityManager = (XMLEntityManager)value;
}
}
| public void | startEntity(java.lang.String name, org.apache.xerces.xni.XMLResourceIdentifier identifier, java.lang.String encoding, org.apache.xerces.xni.Augmentations augs)This method notifies of the start of an entity. The document entity
has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
parameter entity names start with '%'; and general entities are just
specified by their name.
// keep track of the entity depth
fEntityDepth++;
// must reset entity scanner
fEntityScanner = fEntityManager.getEntityScanner();
| protected boolean | versionSupported(java.lang.String version)
return version.equals("1.0");
|
|