XMLEncodingDetectorpublic class XMLEncodingDetector extends Object
Fields Summary |
---|
private InputStream | stream | private String | encoding | private boolean | isEncodingSetInProlog | private boolean | isBomPresent | private int | skip | private Boolean | isBigEndian | private Reader | reader | public static final int | DEFAULT_BUFFER_SIZE | public static final int | DEFAULT_XMLDECL_BUFFER_SIZE | private boolean | fAllowJavaEncodings | private SymbolTable | fSymbolTable | private XMLEncodingDetector | fCurrentEntity | private int | fBufferSize | private int | lineNumber | private int | columnNumber | private boolean | literal | private char[] | ch | private int | position | private int | count | private boolean | mayReadChunks | private XMLString | fString | private XMLStringBuffer | fStringBuffer | private XMLStringBuffer | fStringBuffer2 | private static final String | fVersionSymbol | private static final String | fEncodingSymbol | private static final String | fStandaloneSymbol | private int | fMarkupDepth | private String[] | fStrings | private org.apache.jasper.compiler.ErrorDispatcher | err |
Constructors Summary |
---|
public XMLEncodingDetector()Constructor
fSymbolTable = new SymbolTable();
fCurrentEntity = this;
|
Methods Summary |
---|
private void | createInitialReader()
// wrap this stream in RewindableInputStream
stream = new RewindableInputStream(stream);
// perform auto-detect of encoding if necessary
if (encoding == null) {
// read first four bytes and determine encoding
final byte[] b4 = new byte[4];
int count = 0;
for (; count<4; count++ ) {
b4[count] = (byte)stream.read();
}
if (count == 4) {
Object [] encodingDesc = getEncodingName(b4, count);
encoding = (String)(encodingDesc[0]);
isBigEndian = (Boolean)(encodingDesc[1]);
if (encodingDesc.length > 3) {
isBomPresent = (Boolean)(encodingDesc[2]);
skip = (Integer)(encodingDesc[3]);
} else {
isBomPresent = true;
skip = (Integer)(encodingDesc[2]);
}
stream.reset();
// Special case UTF-8 files with BOM created by Microsoft
// tools. It's more efficient to consume the BOM than make
// the reader perform extra checks. -Ac
if (count > 2 && encoding.equals("UTF-8")) {
int b0 = b4[0] & 0xFF;
int b1 = b4[1] & 0xFF;
int b2 = b4[2] & 0xFF;
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
// ignore first three bytes...
stream.skip(3);
}
}
reader = createReader(stream, encoding, isBigEndian);
} else {
reader = createReader(stream, encoding, isBigEndian);
}
}
| private java.io.Reader | createReader(java.io.InputStream inputStream, java.lang.String encoding, java.lang.Boolean isBigEndian)Creates a reader capable of reading the given input stream in
the specified encoding.
// normalize encoding name
if (encoding == null) {
encoding = "UTF-8";
}
// try to use an optimized reader
String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
if (ENCODING.equals("UTF-8")) {
return new UTF8Reader(inputStream, fBufferSize);
}
if (ENCODING.equals("US-ASCII")) {
return new ASCIIReader(inputStream, fBufferSize);
}
if (ENCODING.equals("ISO-10646-UCS-4")) {
if (isBigEndian != null) {
boolean isBE = isBigEndian.booleanValue();
if (isBE) {
return new UCSReader(inputStream, UCSReader.UCS4BE);
} else {
return new UCSReader(inputStream, UCSReader.UCS4LE);
}
} else {
err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
encoding);
}
}
if (ENCODING.equals("ISO-10646-UCS-2")) {
if (isBigEndian != null) { // sould never happen with this encoding...
boolean isBE = isBigEndian.booleanValue();
if (isBE) {
return new UCSReader(inputStream, UCSReader.UCS2BE);
} else {
return new UCSReader(inputStream, UCSReader.UCS2LE);
}
} else {
err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
encoding);
}
}
// check for valid name
boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
boolean validJava = XMLChar.isValidJavaEncoding(encoding);
if (!validIANA || (fAllowJavaEncodings && !validJava)) {
err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
// NOTE: AndyH suggested that, on failure, we use ISO Latin 1
// because every byte is a valid ISO Latin 1 character.
// It may not translate correctly but if we failed on
// the encoding anyway, then we're expecting the content
// of the document to be bad. This will just prevent an
// invalid UTF-8 sequence to be detected. This is only
// important when continue-after-fatal-error is turned
// on. -Ac
encoding = "ISO-8859-1";
}
// try to use a Java reader
String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
if (javaEncoding == null) {
if (fAllowJavaEncodings) {
javaEncoding = encoding;
} else {
err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
// see comment above.
javaEncoding = "ISO8859_1";
}
}
return new InputStreamReader(inputStream, javaEncoding);
| void | endEntity()
| public static java.lang.Object[] | getEncoding(java.lang.String fname, java.util.jar.JarFile jarFile, org.apache.jasper.JspCompilationContext ctxt, org.apache.jasper.compiler.ErrorDispatcher err)Autodetects the encoding of the XML document supplied by the given
input stream.
Encoding autodetection is done according to the XML 1.0 specification,
Appendix F.1: Detection Without External Encoding Information.
InputStream inStream = JspUtil.getInputStream(fname, jarFile, ctxt,
err);
XMLEncodingDetector detector = new XMLEncodingDetector();
Object[] ret = detector.getEncoding(inStream, err);
inStream.close();
return ret;
| private java.lang.Object[] | getEncoding(java.io.InputStream in, org.apache.jasper.compiler.ErrorDispatcher err)
this.stream = in;
this.err=err;
createInitialReader();
scanXMLDecl();
return new Object[] { this.encoding,
Boolean.valueOf(this.isEncodingSetInProlog),
Boolean.valueOf(this.isBomPresent),
Integer.valueOf(this.skip) };
| private java.lang.Object[] | getEncodingName(byte[] b4, int count)Returns the IANA encoding name that is auto-detected from
the bytes specified, with the endian-ness of that encoding where
appropriate.
if (count < 2) {
return new Object[]{"UTF-8", null, Boolean.FALSE, Integer.valueOf(0)};
}
// UTF-16, with BOM
int b0 = b4[0] & 0xFF;
int b1 = b4[1] & 0xFF;
if (b0 == 0xFE && b1 == 0xFF) {
// UTF-16, big-endian
return new Object [] {"UTF-16BE", Boolean.TRUE, Integer.valueOf(2)};
}
if (b0 == 0xFF && b1 == 0xFE) {
// UTF-16, little-endian
return new Object [] {"UTF-16LE", Boolean.FALSE, Integer.valueOf(2)};
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 3) {
return new Object [] {"UTF-8", null, Boolean.FALSE, Integer.valueOf(0)};
}
// UTF-8 with a BOM
int b2 = b4[2] & 0xFF;
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
return new Object [] {"UTF-8", null, Integer.valueOf(3)};
}
// default to UTF-8 if we don't have enough bytes to make a
// good determination of the encoding
if (count < 4) {
return new Object [] {"UTF-8", null, Integer.valueOf(0)};
}
// other encodings
int b3 = b4[3] & 0xFF;
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
// UCS-4, big endian (1234)
return new Object [] {"ISO-10646-UCS-4", new Boolean(true), Integer.valueOf(4)};
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
// UCS-4, little endian (4321)
return new Object [] {"ISO-10646-UCS-4", new Boolean(false), Integer.valueOf(4)};
}
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
// UCS-4, unusual octet order (2143)
// REVISIT: What should this be?
return new Object [] {"ISO-10646-UCS-4", null, Integer.valueOf(4)};
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
// UCS-4, unusual octect order (3412)
// REVISIT: What should this be?
return new Object [] {"ISO-10646-UCS-4", null, Integer.valueOf(4)};
}
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
// UTF-16, big-endian, no BOM
// (or could turn out to be UCS-2...
// REVISIT: What should this be?
return new Object [] {"UTF-16BE", new Boolean(true), Integer.valueOf(4)};
}
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
// UTF-16, little-endian, no BOM
// (or could turn out to be UCS-2...
return new Object [] {"UTF-16LE", new Boolean(false), Integer.valueOf(4)};
}
if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
// EBCDIC
// a la xerces1, return CP037 instead of EBCDIC here
return new Object [] {"CP037", null, Integer.valueOf(4)};
}
// default encoding
return new Object [] {"UTF-8", null, Boolean.FALSE, Integer.valueOf(0)};
| public boolean | isExternal()Returns true if the current entity being scanned is external.
return true;
| final boolean | load(int offset, boolean changeEntity)Loads a chunk of text.
// read characters
int length = fCurrentEntity.mayReadChunks?
(fCurrentEntity.ch.length - offset):
(DEFAULT_XMLDECL_BUFFER_SIZE);
int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset,
length);
// reset count and position
boolean entityChanged = false;
if (count != -1) {
if (count != 0) {
fCurrentEntity.count = count + offset;
fCurrentEntity.position = offset;
}
}
// end of this entity
else {
fCurrentEntity.count = offset;
fCurrentEntity.position = offset;
entityChanged = true;
if (changeEntity) {
endEntity();
if (fCurrentEntity == null) {
throw new EOFException();
}
// handle the trailing edges
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, false);
}
}
}
return entityChanged;
| public int | peekChar()Returns the next character on the input.
Note: The character is not consumed.
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
// peek at character
int c = fCurrentEntity.ch[fCurrentEntity.position];
// return peeked character
if (fCurrentEntity.isExternal()) {
return c != '\r" ? c : '\n";
}
else {
return c;
}
| private void | reportFatalError(java.lang.String msgId, java.lang.String arg)Convenience function used in all XML scanners.
err.jspError(msgId, arg);
| public int | scanChar()Returns the next character on the input.
Note: The character is consumed.
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
// scan character
int c = fCurrentEntity.ch[fCurrentEntity.position++];
boolean external = false;
if (c == '\n" ||
(c == '\r" && (external = fCurrentEntity.isExternal()))) {
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
fCurrentEntity.ch[0] = (char)c;
load(1, false);
}
if (c == '\r" && external) {
if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n") {
fCurrentEntity.position--;
}
c = '\n";
}
}
// return character that was scanned
fCurrentEntity.columnNumber++;
return c;
| public boolean | scanData(java.lang.String delimiter, XMLStringBuffer buffer)Scans a range of character data up to the specified delimiter,
setting the fields of the XMLString structure, appropriately.
Note: The characters are consumed.
Note: This assumes that the internal buffer is
at least the same size, or bigger, than the length of the delimiter
and that the delimiter contains at least one character.
Note: This method does not guarantee to return
the longest run of character data. This method may return before
the delimiter due to reaching the end of the input buffer or any
other reason.
Note: The fields contained in the XMLString
structure are not guaranteed to remain valid upon subsequent calls
to the entity scanner. Therefore, the caller is responsible for
immediately using the returned character data or making a copy of
the character data.
boolean done = false;
int delimLen = delimiter.length();
char charAt0 = delimiter.charAt(0);
boolean external = fCurrentEntity.isExternal();
do {
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position,
fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position);
load(fCurrentEntity.count - fCurrentEntity.position, false);
fCurrentEntity.position = 0;
}
if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
// something must be wrong with the input: e.g., file ends an
// unterminated comment
int length = fCurrentEntity.count - fCurrentEntity.position;
buffer.append (fCurrentEntity.ch, fCurrentEntity.position,
length);
fCurrentEntity.columnNumber += fCurrentEntity.count;
fCurrentEntity.position = fCurrentEntity.count;
load(0,true);
return false;
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
if (c == '\n" || (c == '\r" && external)) {
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\r" && external) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false)) {
break;
}
}
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n") {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
}
else if (c == '\n") {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
fCurrentEntity.count = newlines;
if (load(newlines, false)) {
break;
}
}
}
else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n";
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
buffer.append(fCurrentEntity.ch, offset, length);
return true;
}
}
// iterate over buffer looking for delimiter
OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == charAt0) {
// looks like we just hit the delimiter
int delimOffset = fCurrentEntity.position - 1;
for (int i = 1; i < delimLen; i++) {
if (fCurrentEntity.position == fCurrentEntity.count) {
fCurrentEntity.position -= i;
break OUTER;
}
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (delimiter.charAt(i) != c) {
fCurrentEntity.position--;
break;
}
}
if (fCurrentEntity.position == delimOffset + delimLen) {
done = true;
break;
}
}
else if (c == '\n" || (external && c == '\r")) {
fCurrentEntity.position--;
break;
}
else if (XMLChar.isInvalid(c)) {
fCurrentEntity.position--;
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length - newlines;
buffer.append(fCurrentEntity.ch, offset, length);
return true;
}
}
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length - newlines;
if (done) {
length -= delimLen;
}
buffer.append (fCurrentEntity.ch, offset, length);
// return true if string was skipped
} while (!done);
return !done;
| public int | scanLiteral(int quote, XMLString content)Scans a range of attribute value data, setting the fields of the
XMLString structure, appropriately.
Note: The characters are consumed.
Note: This method does not guarantee to return
the longest run of attribute value data. This method may return
before the quote character due to reaching the end of the input
buffer or any other reason.
Note: The fields contained in the XMLString
structure are not guaranteed to remain valid upon subsequent calls
to the entity scanner. Therefore, the caller is responsible for
immediately using the returned character data or making a copy of
the character data.
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
} else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
load(1, false);
fCurrentEntity.position = 0;
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
boolean external = fCurrentEntity.isExternal();
if (c == '\n" || (c == '\r" && external)) {
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\r" && external) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false)) {
break;
}
}
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n") {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
/***/
}
else if (c == '\n") {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false)) {
break;
}
}
/*** NEWLINE NORMALIZATION ***
if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
&& external) {
fCurrentEntity.position++;
offset++;
}
/***/
}
else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n";
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
content.setValues(fCurrentEntity.ch, offset, length);
return -1;
}
}
// scan literal value
while (fCurrentEntity.position < fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if ((c == quote &&
(!fCurrentEntity.literal || external))
|| c == '%" || !XMLChar.isContent(c)) {
fCurrentEntity.position--;
break;
}
}
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length - newlines;
content.setValues(fCurrentEntity.ch, offset, length);
// return next character
if (fCurrentEntity.position != fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position];
// NOTE: We don't want to accidentally signal the
// end of the literal if we're expanding an
// entity appearing in the literal. -Ac
if (c == quote && fCurrentEntity.literal) {
c = -1;
}
}
else {
c = -1;
}
return c;
| public java.lang.String | scanName()Returns a string matching the Name production appearing immediately
on the input as a symbol, or null if no Name string is present.
Note: The Name characters are consumed.
Note: The string returned must be a symbol. The
SymbolTable can be used for this purpose.
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
// scan name
int offset = fCurrentEntity.position;
if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
if (++fCurrentEntity.position == fCurrentEntity.count) {
fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
offset = 0;
if (load(1, false)) {
fCurrentEntity.columnNumber++;
String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch,
0, 1);
return symbol;
}
}
while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
if (++fCurrentEntity.position == fCurrentEntity.count) {
int length = fCurrentEntity.position - offset;
if (length == fBufferSize) {
// bad luck we have to resize our buffer
char[] tmp = new char[fBufferSize * 2];
System.arraycopy(fCurrentEntity.ch, offset,
tmp, 0, length);
fCurrentEntity.ch = tmp;
fBufferSize *= 2;
} else {
System.arraycopy(fCurrentEntity.ch, offset,
fCurrentEntity.ch, 0, length);
}
offset = 0;
if (load(length, false)) {
break;
}
}
}
}
int length = fCurrentEntity.position - offset;
fCurrentEntity.columnNumber += length;
// return name
String symbol = null;
if (length > 0) {
symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
}
return symbol;
| private void | scanPIData(java.lang.String target, XMLString data)Scans a processing data. This is needed to handle the situation
where a document starts with a processing instruction whose
target name starts with "xml". (e.g. xmlfoo)
Note: This method uses fStringBuffer, anything in it
at the time of calling is lost.
// check target
if (target.length() == 3) {
char c0 = Character.toLowerCase(target.charAt(0));
char c1 = Character.toLowerCase(target.charAt(1));
char c2 = Character.toLowerCase(target.charAt(2));
if (c0 == 'x" && c1 == 'm" && c2 == 'l") {
err.jspError("jsp.error.xml.reservedPITarget");
}
}
// spaces
if (!skipSpaces()) {
if (skipString("?>")) {
// we found the end, there is no data
data.clear();
return;
}
else {
// if there is data there should be some space
err.jspError("jsp.error.xml.spaceRequiredInPI");
}
}
fStringBuffer.clear();
// data
if (scanData("?>", fStringBuffer)) {
do {
int c = peekChar();
if (c != -1) {
if (XMLChar.isHighSurrogate(c)) {
scanSurrogates(fStringBuffer);
} else if (XMLChar.isInvalid(c)) {
err.jspError("jsp.error.xml.invalidCharInPI",
Integer.toHexString(c));
scanChar();
}
}
} while (scanData("?>", fStringBuffer));
}
data.setValues(fStringBuffer);
| public java.lang.String | scanPseudoAttribute(boolean scanningTextDecl, XMLString value)Scans a pseudo attribute.
String name = scanName();
if (name == null) {
err.jspError("jsp.error.xml.pseudoAttrNameExpected");
}
skipSpaces();
if (!skipChar('=")) {
reportFatalError(scanningTextDecl ?
"jsp.error.xml.eqRequiredInTextDecl"
: "jsp.error.xml.eqRequiredInXMLDecl",
name);
}
skipSpaces();
int quote = peekChar();
if (quote != '\'" && quote != '"") {
reportFatalError(scanningTextDecl ?
"jsp.error.xml.quoteRequiredInTextDecl"
: "jsp.error.xml.quoteRequiredInXMLDecl" ,
name);
}
scanChar();
int c = scanLiteral(quote, value);
if (c != quote) {
fStringBuffer2.clear();
do {
fStringBuffer2.append(value);
if (c != -1) {
if (c == '&" || c == '%" || c == '<" || c == ']") {
fStringBuffer2.append((char)scanChar());
}
else if (XMLChar.isHighSurrogate(c)) {
scanSurrogates(fStringBuffer2);
}
else if (XMLChar.isInvalid(c)) {
String key = scanningTextDecl
? "jsp.error.xml.invalidCharInTextDecl"
: "jsp.error.xml.invalidCharInXMLDecl";
reportFatalError(key, Integer.toString(c, 16));
scanChar();
}
}
c = scanLiteral(quote, value);
} while (c != quote);
fStringBuffer2.append(value);
value.setValues(fStringBuffer2);
}
if (!skipChar(quote)) {
reportFatalError(scanningTextDecl ?
"jsp.error.xml.closeQuoteMissingInTextDecl"
: "jsp.error.xml.closeQuoteMissingInXMLDecl",
name);
}
// return
return name;
| private boolean | scanSurrogates(XMLStringBuffer buf)Scans surrogates and append them to the specified buffer.
Note: This assumes the current char has already been
identified as a high surrogate.
int high = scanChar();
int low = peekChar();
if (!XMLChar.isLowSurrogate(low)) {
err.jspError("jsp.error.xml.invalidCharInContent",
Integer.toString(high, 16));
return false;
}
scanChar();
// convert surrogates to supplemental character
int c = XMLChar.supplemental((char)high, (char)low);
// supplemental character must be a valid XML character
if (!XMLChar.isValid(c)) {
err.jspError("jsp.error.xml.invalidCharInContent",
Integer.toString(c, 16));
return false;
}
// fill in the buffer
buf.append((char)high);
buf.append((char)low);
return true;
| private void | scanXMLDecl()
if (skipString("<?xml")) {
fMarkupDepth++;
// NOTE: special case where document starts with a PI
// whose name starts with "xml" (e.g. "xmlfoo")
if (XMLChar.isName(peekChar())) {
fStringBuffer.clear();
fStringBuffer.append("xml");
while (XMLChar.isName(peekChar())) {
fStringBuffer.append((char)scanChar());
}
String target = fSymbolTable.addSymbol(fStringBuffer.ch,
fStringBuffer.offset,
fStringBuffer.length);
scanPIData(target, fString);
}
// standard XML declaration
else {
scanXMLDeclOrTextDecl(false);
}
}
| private void | scanXMLDeclOrTextDecl(boolean scanningTextDecl)Scans an XML or text declaration.
[23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
[24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
| ('"' ('yes' | 'no') '"'))
[77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
// scan decl
scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
fMarkupDepth--;
// pseudo-attribute values
String encodingPseudoAttr = fStrings[1];
// set encoding on reader
if (encodingPseudoAttr != null) {
isEncodingSetInProlog = true;
encoding = encodingPseudoAttr;
}
| private void | scanXMLDeclOrTextDecl(boolean scanningTextDecl, java.lang.String[] pseudoAttributeValues)Scans an XML or text declaration.
[23] XMLDecl ::= ''
[24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
| ('"' ('yes' | 'no') '"'))
[77] TextDecl ::= ''
// pseudo-attribute values
String version = null;
String encoding = null;
String standalone = null;
// scan pseudo-attributes
final int STATE_VERSION = 0;
final int STATE_ENCODING = 1;
final int STATE_STANDALONE = 2;
final int STATE_DONE = 3;
int state = STATE_VERSION;
boolean dataFoundForTarget = false;
boolean sawSpace = skipSpaces();
while (peekChar() != '?") {
dataFoundForTarget = true;
String name = scanPseudoAttribute(scanningTextDecl, fString);
switch (state) {
case STATE_VERSION: {
if (name == fVersionSymbol) {
if (!sawSpace) {
reportFatalError(scanningTextDecl
? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
: "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
null);
}
version = fString.toString();
state = STATE_ENCODING;
if (!version.equals("1.0")) {
// REVISIT: XML REC says we should throw an error
// in such cases.
// some may object the throwing of fatalError.
err.jspError("jsp.error.xml.versionNotSupported",
version);
}
} else if (name == fEncodingSymbol) {
if (!scanningTextDecl) {
err.jspError("jsp.error.xml.versionInfoRequired");
}
if (!sawSpace) {
reportFatalError(scanningTextDecl
? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
: "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
null);
}
encoding = fString.toString();
state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
} else {
if (scanningTextDecl) {
err.jspError("jsp.error.xml.encodingDeclRequired");
}
else {
err.jspError("jsp.error.xml.versionInfoRequired");
}
}
break;
}
case STATE_ENCODING: {
if (name == fEncodingSymbol) {
if (!sawSpace) {
reportFatalError(scanningTextDecl
? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
: "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
null);
}
encoding = fString.toString();
state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
// TODO: check encoding name; set encoding on
// entity scanner
} else if (!scanningTextDecl && name == fStandaloneSymbol) {
if (!sawSpace) {
err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
}
standalone = fString.toString();
state = STATE_DONE;
if (!standalone.equals("yes") && !standalone.equals("no")) {
err.jspError("jsp.error.xml.sdDeclInvalid");
}
} else {
err.jspError("jsp.error.xml.encodingDeclRequired");
}
break;
}
case STATE_STANDALONE: {
if (name == fStandaloneSymbol) {
if (!sawSpace) {
err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
}
standalone = fString.toString();
state = STATE_DONE;
if (!standalone.equals("yes") && !standalone.equals("no")) {
err.jspError("jsp.error.xml.sdDeclInvalid");
}
} else {
err.jspError("jsp.error.xml.encodingDeclRequired");
}
break;
}
default: {
err.jspError("jsp.error.xml.noMorePseudoAttributes");
}
}
sawSpace = skipSpaces();
}
// REVISIT: should we remove this error reporting?
if (scanningTextDecl && state != STATE_DONE) {
err.jspError("jsp.error.xml.morePseudoAttributes");
}
// If there is no data in the xml or text decl then we fail to report
// error for version or encoding info above.
if (scanningTextDecl) {
if (!dataFoundForTarget && encoding == null) {
err.jspError("jsp.error.xml.encodingDeclRequired");
}
} else {
if (!dataFoundForTarget && version == null) {
err.jspError("jsp.error.xml.versionInfoRequired");
}
}
// end
if (!skipChar('?")) {
err.jspError("jsp.error.xml.xmlDeclUnterminated");
}
if (!skipChar('>")) {
err.jspError("jsp.error.xml.xmlDeclUnterminated");
}
// fill in return array
pseudoAttributeValues[0] = version;
pseudoAttributeValues[1] = encoding;
pseudoAttributeValues[2] = standalone;
| public boolean | skipChar(int c)Skips a character appearing immediately on the input.
Note: The character is consumed only if it matches
the specified character.
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
// skip character
int cc = fCurrentEntity.ch[fCurrentEntity.position];
if (cc == c) {
fCurrentEntity.position++;
if (c == '\n") {
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
}
else {
fCurrentEntity.columnNumber++;
}
return true;
} else if (c == '\n" && cc == '\r" && fCurrentEntity.isExternal()) {
// handle newlines
if (fCurrentEntity.position == fCurrentEntity.count) {
fCurrentEntity.ch[0] = (char)cc;
load(1, false);
}
fCurrentEntity.position++;
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n") {
fCurrentEntity.position++;
}
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
return true;
}
// character was not skipped
return false;
| public boolean | skipSpaces()Skips space characters appearing immediately on the input.
Note: The characters are consumed only if they are
space characters.
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
// skip spaces
int c = fCurrentEntity.ch[fCurrentEntity.position];
if (XMLChar.isSpace(c)) {
boolean external = fCurrentEntity.isExternal();
do {
boolean entityChanged = false;
// handle newlines
if (c == '\n" || (external && c == '\r")) {
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
fCurrentEntity.ch[0] = (char)c;
entityChanged = load(1, true);
if (!entityChanged)
// the load change the position to be 1,
// need to restore it when entity not changed
fCurrentEntity.position = 0;
}
if (c == '\r" && external) {
// REVISIT: Does this need to be updated to fix the
// #x0D ^#x0A newline normalization problem? -Ac
if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n") {
fCurrentEntity.position--;
}
}
/*** NEWLINE NORMALIZATION ***
else {
if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
&& external) {
fCurrentEntity.position++;
}
}
/***/
}
else {
fCurrentEntity.columnNumber++;
}
// load more characters, if needed
if (!entityChanged)
fCurrentEntity.position++;
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
} while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
return true;
}
// no spaces were found
return false;
| public boolean | skipString(java.lang.String s)Skips the specified string appearing immediately on the input.
Note: The characters are consumed only if they are
space characters.
// load more characters, if needed
if (fCurrentEntity.position == fCurrentEntity.count) {
load(0, true);
}
// skip string
final int length = s.length();
for (int i = 0; i < length; i++) {
char c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c != s.charAt(i)) {
fCurrentEntity.position -= i + 1;
return false;
}
if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
// REVISIT: Can a string to be skipped cross an
// entity boundary? -Ac
if (load(i + 1, false)) {
fCurrentEntity.position -= i + 1;
return false;
}
}
}
fCurrentEntity.columnNumber += length;
return true;
|
|