FileDocCategorySizeDatePackage
XMLEncodingDetector.javaAPI DocGlassfish v2 API57391Fri May 04 22:33:00 BST 2007org.apache.jasper.xmlparser

XMLEncodingDetector

public class XMLEncodingDetector extends Object

Fields Summary
private InputStream
stream
private String
encoding
private boolean
isEncodingSetInProlog
private Boolean
isBigEndian
private Boolean
hasBom
private Reader
reader
public static final int
DEFAULT_BUFFER_SIZE
public static final int
DEFAULT_XMLDECL_BUFFER_SIZE
private boolean
fAllowJavaEncodings
private SymbolTable
fSymbolTable
private XMLEncodingDetector
fCurrentEntity
private int
fBufferSize
private int
lineNumber
private int
columnNumber
private boolean
literal
private char[]
ch
private int
position
private int
count
private boolean
mayReadChunks
private XMLString
fString
private XMLStringBuffer
fStringBuffer
private XMLStringBuffer
fStringBuffer2
private static final String
fVersionSymbol
private static final String
fEncodingSymbol
private static final String
fStandaloneSymbol
private int
fMarkupDepth
private String[]
fStrings
private org.apache.jasper.compiler.ErrorDispatcher
err
Constructors Summary
public XMLEncodingDetector()
Constructor


          
      
        fSymbolTable = new SymbolTable();
        fCurrentEntity = this;
    
Methods Summary
private voidcreateInitialReader()


	// wrap this stream in RewindableInputStream
	stream = new RewindableInputStream(stream);

	// perform auto-detect of encoding if necessary
	if (encoding == null) {
	    // read first four bytes and determine encoding
	    final byte[] b4 = new byte[4];
	    int count = 0;
	    for (; count<4; count++ ) {
		b4[count] = (byte)stream.read();
	    }
            if (count == 4) {
                Object [] encodingDesc = getEncodingName(b4, count);
                encoding = (String)(encodingDesc[0]);
                isBigEndian = (Boolean)(encodingDesc[1]);
                hasBom = (Boolean)(encodingDesc[2]);

		stream.reset();
		// Special case UTF-8 files with BOM created by Microsoft
		// tools. It's more efficient to consume the BOM than make
		// the reader perform extra checks. -Ac
		if (count > 2 && encoding.equals("UTF-8")) {
		    int b0 = b4[0] & 0xFF;
		    int b1 = b4[1] & 0xFF;
		    int b2 = b4[2] & 0xFF;
		    if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
			// ignore first three bytes...
			stream.skip(3);
		    }
		}
		reader = createReader(stream, encoding, isBigEndian);
	    } else {
		reader = createReader(stream, encoding, isBigEndian);
	    }
	}
    
private java.io.ReadercreateReader(java.io.InputStream inputStream, java.lang.String encoding, java.lang.Boolean isBigEndian)
Creates a reader capable of reading the given input stream in the specified encoding.

param
inputStream The input stream.
param
encoding The encoding name that the input stream is encoded using. If the user has specified that Java encoding names are allowed, then the encoding name may be a Java encoding name; otherwise, it is an ianaEncoding name.
param
isBigEndian For encodings (like uCS-4), whose names cannot specify a byte order, this tells whether the order is bigEndian. null means unknown or not relevant.
return
Returns a reader.


        // normalize encoding name
        if (encoding == null) {
            encoding = "UTF-8";
        }

        // try to use an optimized reader
        String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
        if (ENCODING.equals("UTF-8")) {
            return new UTF8Reader(inputStream, fBufferSize);
        }
        if (ENCODING.equals("US-ASCII")) {
            return new ASCIIReader(inputStream, fBufferSize);
        }
        if (ENCODING.equals("ISO-10646-UCS-4")) {
            if (isBigEndian != null) {
                boolean isBE = isBigEndian.booleanValue();
                if (isBE) {
                    return new UCSReader(inputStream, UCSReader.UCS4BE);
                } else {
                    return new UCSReader(inputStream, UCSReader.UCS4LE);
                }
            } else {
                err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
			     encoding);
            }
        }
        if (ENCODING.equals("ISO-10646-UCS-2")) {
            if (isBigEndian != null) { // sould never happen with this encoding...
                boolean isBE = isBigEndian.booleanValue();
                if (isBE) {
                    return new UCSReader(inputStream, UCSReader.UCS2BE);
                } else {
                    return new UCSReader(inputStream, UCSReader.UCS2LE);
                }
            } else {
                err.jspError("jsp.error.xml.encodingByteOrderUnsupported",
			     encoding);
            }
        }

        // check for valid name
        boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
        boolean validJava = XMLChar.isValidJavaEncoding(encoding);
        if (!validIANA || (fAllowJavaEncodings && !validJava)) {
            err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
            // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
            //       because every byte is a valid ISO Latin 1 character.
            //       It may not translate correctly but if we failed on
            //       the encoding anyway, then we're expecting the content
            //       of the document to be bad. This will just prevent an
            //       invalid UTF-8 sequence to be detected. This is only
            //       important when continue-after-fatal-error is turned
            //       on. -Ac
            encoding = "ISO-8859-1";
        }

        // try to use a Java reader
        String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
        if (javaEncoding == null) {
            if (fAllowJavaEncodings) {
		javaEncoding = encoding;
            } else {
                err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
                // see comment above.
                javaEncoding = "ISO8859_1";
            }
        }
        return new InputStreamReader(inputStream, javaEncoding);

    
voidendEntity()

    
public static java.lang.Object[]getEncoding(java.lang.String fname, java.util.jar.JarFile jarFile, org.apache.jasper.JspCompilationContext ctxt, org.apache.jasper.compiler.ErrorDispatcher err)
Autodetects the encoding of the XML document supplied by the given input stream. Encoding autodetection is done according to the XML 1.0 specification, Appendix F.1: Detection Without External Encoding Information.

return
Two-element array, where the first element (of type java.lang.String) contains the name of the (auto)detected encoding, and the second element (of type java.lang.Boolean) specifies whether the encoding was specified using the 'encoding' attribute of an XML prolog (TRUE) or autodetected (FALSE).

        InputStream inStream = JspUtil.getInputStream(fname, jarFile, ctxt,
                                                      err);
        XMLEncodingDetector detector = new XMLEncodingDetector();
        Object[] ret = detector.getEncoding(inStream, err);
        inStream.close();

        return ret;
    
private java.lang.Object[]getEncoding(java.io.InputStream in, org.apache.jasper.compiler.ErrorDispatcher err)

        this.stream = in;
        this.err=err;
        createInitialReader();
        scanXMLDecl();
	
        return new Object[] { this.encoding,
                              Boolean.valueOf(this.isEncodingSetInProlog),
                              this.hasBom };
    
private java.lang.Object[]getEncodingName(byte[] b4, int count)
Returns the IANA encoding name that is auto-detected from the bytes specified, with the endian-ness of that encoding where appropriate.

param
b4 The first four bytes of the input.
param
count The number of bytes actually read.
return
a 2-element array: the first element, an IANA-encoding string, the second element a Boolean which is true iff the document is big endian, false if it's little-endian, and null if the distinction isn't relevant.


        if (count < 2) {
            return new Object[]{"UTF-8", null, null};
        }

        // UTF-16, with BOM
        int b0 = b4[0] & 0xFF;
        int b1 = b4[1] & 0xFF;
        if (b0 == 0xFE && b1 == 0xFF) {
            // UTF-16, big-endian, with a BOM
            return new Object [] {"UTF-16BE", Boolean.TRUE,
                                  Boolean.TRUE};
        }
        /* SJSAS 6307968
        if (b0 == 0xFF && b1 == 0xFE) {
        */
        // BEGIN SJSAS 6307968
        if (count == 2 && b0 == 0xFF && b1 == 0xFE) {
        // END SJSAS 6307968
            // UTF-16, little-endian, with a BOM
            return new Object [] {"UTF-16LE", Boolean.FALSE,
                                  Boolean.TRUE};
        }

        // default to UTF-8 if we don't have enough bytes to make a
        // good determination of the encoding
        if (count < 3) {
            return new Object [] {"UTF-8", null, null};
        }

        // UTF-8 with a BOM
        int b2 = b4[2] & 0xFF;
        if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
            return new Object [] {"UTF-8", null, Boolean.TRUE};
        }

        // default to UTF-8 if we don't have enough bytes to make a
        // good determination of the encoding
        if (count < 4) {
            return new Object [] {"UTF-8", null, null};
        }

        // other encodings
        int b3 = b4[3] & 0xFF;
        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
            // UCS-4, big endian (1234)
            return new Object [] {"ISO-10646-UCS-4", Boolean.TRUE, null};
        }
        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
            // UCS-4, little endian (4321)
            return new Object [] {"ISO-10646-UCS-4", Boolean.FALSE, null};
        }
        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
            // UCS-4, unusual octet order (2143)
            // REVISIT: What should this be?
            return new Object [] {"ISO-10646-UCS-4", null, null};
        }
        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
            // UCS-4, unusual octect order (3412)
            // REVISIT: What should this be?
            return new Object [] {"ISO-10646-UCS-4", null, null};
        }
        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
            // UTF-16, big-endian, no BOM
            // (or could turn out to be UCS-2...
            // REVISIT: What should this be?
            return new Object [] {"UTF-16BE", Boolean.TRUE, null};
        }
        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
            // UTF-16, little-endian, no BOM
            // (or could turn out to be UCS-2...
            return new Object [] {"UTF-16LE", Boolean.FALSE, null};
        }
        if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
            // EBCDIC
            // a la xerces1, return CP037 instead of EBCDIC here
            return new Object [] {"CP037", null, null};
        }
        if (b0 == 0x00 && b1 == 0x00 && b2 == 0xFE && b3 == 0xFF) {
            // UTF-32, big-endian, with a BOM
            return new Object [] {"UTF-32BE", Boolean.TRUE,
                                  Boolean.TRUE};
        }
        if (b0 == 0xFF && b1 == 0xFE && b2 == 0x00 && b3 == 0x00) {
            // UTF-32, little-endian, with a BOM
            return new Object [] {"UTF-32LE", Boolean.FALSE,
                                  Boolean.TRUE};
        }
        // BEGIN SJSAS 6307968
        if (b0 == 0xFF && b1 == 0xFE) {
            // UTF-16, little-endian, with a BOM
            return new Object [] {"UTF-16LE", Boolean.FALSE,
                                  Boolean.TRUE};
        }
        // END SJSAS 6307968

        // default encoding
        return new Object [] {"UTF-8", null, null};

    
public booleanisExternal()
Returns true if the current entity being scanned is external.

	return true;
    
final booleanload(int offset, boolean changeEntity)
Loads a chunk of text.

param
offset The offset into the character buffer to read the next batch of characters.
param
changeEntity True if the load should change entities at the end of the entity, otherwise leave the current entity in place and the entity boundary will be signaled by the return value.
returns
Returns true if the entity changed as a result of this load operation.


	// read characters
	int length = fCurrentEntity.mayReadChunks?
	    (fCurrentEntity.ch.length - offset):
	    (DEFAULT_XMLDECL_BUFFER_SIZE);
	int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset,
					       length);

	// reset count and position
	boolean entityChanged = false;
	if (count != -1) {
	    if (count != 0) {
		fCurrentEntity.count = count + offset;
		fCurrentEntity.position = offset;
	    }
	}

	// end of this entity
	else {
	    fCurrentEntity.count = offset;
	    fCurrentEntity.position = offset;
	    entityChanged = true;
	    if (changeEntity) {
		endEntity();
		if (fCurrentEntity == null) {
		    throw new EOFException();
		}
		// handle the trailing edges
		if (fCurrentEntity.position == fCurrentEntity.count) {
		    load(0, false);
		}
	    }
	}

	return entityChanged;

    
public intpeekChar()
Returns the next character on the input.

Note: The character is not consumed.

throws
IOException Thrown if i/o error occurs.
throws
EOFException Thrown on end of file.

	
	// load more characters, if needed
	if (fCurrentEntity.position == fCurrentEntity.count) {
	    load(0, true);
	}
	
	// peek at character
	int c = fCurrentEntity.ch[fCurrentEntity.position];

	// return peeked character
	if (fCurrentEntity.isExternal()) {
	    return c != '\r" ? c : '\n";
	}
	else {
	    return c;
	}
	
    
private voidreportFatalError(java.lang.String msgId, java.lang.String arg)
Convenience function used in all XML scanners.

        err.jspError(msgId, arg);
    
public intscanChar()
Returns the next character on the input.

Note: The character is consumed.

throws
IOException Thrown if i/o error occurs.
throws
EOFException Thrown on end of file.


	// load more characters, if needed
	if (fCurrentEntity.position == fCurrentEntity.count) {
	    load(0, true);
	}

	// scan character
	int c = fCurrentEntity.ch[fCurrentEntity.position++];
	boolean external = false;
	if (c == '\n" ||
	    (c == '\r" && (external = fCurrentEntity.isExternal()))) {
	    fCurrentEntity.lineNumber++;
	    fCurrentEntity.columnNumber = 1;
	    if (fCurrentEntity.position == fCurrentEntity.count) {
		fCurrentEntity.ch[0] = (char)c;
		load(1, false);
	    }
	    if (c == '\r" && external) {
		if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n") {
		    fCurrentEntity.position--;
		}
		c = '\n";
	    }
	}

	// return character that was scanned
	fCurrentEntity.columnNumber++;
	return c;
	
    
public booleanscanData(java.lang.String delimiter, XMLStringBuffer buffer)
Scans a range of character data up to the specified delimiter, setting the fields of the XMLString structure, appropriately.

Note: The characters are consumed.

Note: This assumes that the internal buffer is at least the same size, or bigger, than the length of the delimiter and that the delimiter contains at least one character.

Note: This method does not guarantee to return the longest run of character data. This method may return before the delimiter due to reaching the end of the input buffer or any other reason.

Note: The fields contained in the XMLString structure are not guaranteed to remain valid upon subsequent calls to the entity scanner. Therefore, the caller is responsible for immediately using the returned character data or making a copy of the character data.

param
delimiter The string that signifies the end of the character data to be scanned.
param
buffer The data structure to fill.
return
Returns true if there is more data to scan, false otherwise.
throws
IOException Thrown if i/o error occurs.
throws
EOFException Thrown on end of file.


	boolean done = false;
	int delimLen = delimiter.length();
	char charAt0 = delimiter.charAt(0);
	boolean external = fCurrentEntity.isExternal();
	do {
    
	    // load more characters, if needed
    
	    if (fCurrentEntity.position == fCurrentEntity.count) {
		load(0, true);
	    }
	    else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
		System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position,
				 fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position);
		load(fCurrentEntity.count - fCurrentEntity.position, false);
		fCurrentEntity.position = 0;
	    } 
	    if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
		// something must be wrong with the input: e.g., file ends an
		// unterminated comment
		int length = fCurrentEntity.count - fCurrentEntity.position;
		buffer.append (fCurrentEntity.ch, fCurrentEntity.position,
			       length); 
		fCurrentEntity.columnNumber += fCurrentEntity.count;
		fCurrentEntity.position = fCurrentEntity.count;
		load(0,true);
		return false;
	    }
    
	    // normalize newlines
	    int offset = fCurrentEntity.position;
	    int c = fCurrentEntity.ch[offset];
	    int newlines = 0;
	    if (c == '\n" || (c == '\r" && external)) {
		do {
		    c = fCurrentEntity.ch[fCurrentEntity.position++];
		    if (c == '\r" && external) {
			newlines++;
			fCurrentEntity.lineNumber++;
			fCurrentEntity.columnNumber = 1;
			if (fCurrentEntity.position == fCurrentEntity.count) {
			    offset = 0;
			    fCurrentEntity.position = newlines;
			    if (load(newlines, false)) {
				break;
			    }
			}
			if (fCurrentEntity.ch[fCurrentEntity.position] == '\n") {
			    fCurrentEntity.position++;
			    offset++;
			}
			/*** NEWLINE NORMALIZATION ***/
			else {
			    newlines++;
			}
		    }
		    else if (c == '\n") {
			newlines++;
			fCurrentEntity.lineNumber++;
			fCurrentEntity.columnNumber = 1;
			if (fCurrentEntity.position == fCurrentEntity.count) {
			    offset = 0;
			    fCurrentEntity.position = newlines;
			    fCurrentEntity.count = newlines;
			    if (load(newlines, false)) {
				break;
			    }
			}
		    }
		    else {
			fCurrentEntity.position--;
			break;
		    }
		} while (fCurrentEntity.position < fCurrentEntity.count - 1);
		for (int i = offset; i < fCurrentEntity.position; i++) {
		    fCurrentEntity.ch[i] = '\n";
		}
		int length = fCurrentEntity.position - offset;
		if (fCurrentEntity.position == fCurrentEntity.count - 1) {
		    buffer.append(fCurrentEntity.ch, offset, length);
		    return true;
		}
	    }
    
	    // iterate over buffer looking for delimiter
	OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
	    c = fCurrentEntity.ch[fCurrentEntity.position++];
	    if (c == charAt0) {
		// looks like we just hit the delimiter
		int delimOffset = fCurrentEntity.position - 1;
		for (int i = 1; i < delimLen; i++) {
		    if (fCurrentEntity.position == fCurrentEntity.count) {
			fCurrentEntity.position -= i;
			break OUTER;
		    }
		    c = fCurrentEntity.ch[fCurrentEntity.position++];
		    if (delimiter.charAt(i) != c) {
			fCurrentEntity.position--;
			break;
		    }
		}
		if (fCurrentEntity.position == delimOffset + delimLen) {
		    done = true;
		    break;
		}
	    }
	    else if (c == '\n" || (external && c == '\r")) {
		fCurrentEntity.position--;
		break;
	    }
	    else if (XMLChar.isInvalid(c)) {
		fCurrentEntity.position--;
		int length = fCurrentEntity.position - offset;
		fCurrentEntity.columnNumber += length - newlines;
		buffer.append(fCurrentEntity.ch, offset, length); 
		return true;
	    }
	}
	    int length = fCurrentEntity.position - offset;
	    fCurrentEntity.columnNumber += length - newlines;
	    if (done) {
		length -= delimLen;
	    }
	    buffer.append (fCurrentEntity.ch, offset, length);
    
	    // return true if string was skipped
	} while (!done);
	return !done;

    
public intscanLiteral(int quote, XMLString content)
Scans a range of attribute value data, setting the fields of the XMLString structure, appropriately.

Note: The characters are consumed.

Note: This method does not guarantee to return the longest run of attribute value data. This method may return before the quote character due to reaching the end of the input buffer or any other reason.

Note: The fields contained in the XMLString structure are not guaranteed to remain valid upon subsequent calls to the entity scanner. Therefore, the caller is responsible for immediately using the returned character data or making a copy of the character data.

param
quote The quote character that signifies the end of the attribute value data.
param
content The content structure to fill.
return
Returns the next character on the input, if known. This value may be -1 but this does note designate end of file.
throws
IOException Thrown if i/o error occurs.
throws
EOFException Thrown on end of file.


	// load more characters, if needed
	if (fCurrentEntity.position == fCurrentEntity.count) {
	    load(0, true);
	} else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
	    fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
	    load(1, false);
	    fCurrentEntity.position = 0;
	}

	// normalize newlines
	int offset = fCurrentEntity.position;
	int c = fCurrentEntity.ch[offset];
	int newlines = 0;
	boolean external = fCurrentEntity.isExternal();
	if (c == '\n" || (c == '\r" && external)) {
	    do {
		c = fCurrentEntity.ch[fCurrentEntity.position++];
		if (c == '\r" && external) {
		    newlines++;
		    fCurrentEntity.lineNumber++;
		    fCurrentEntity.columnNumber = 1;
		    if (fCurrentEntity.position == fCurrentEntity.count) {
			offset = 0;
			fCurrentEntity.position = newlines;
			if (load(newlines, false)) {
			    break;
			}
		    }
		    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n") {
			fCurrentEntity.position++;
			offset++;
		    }
		    /*** NEWLINE NORMALIZATION ***/
		    else {
			newlines++;
		    }
		    /***/
		}
		else if (c == '\n") {
		    newlines++;
		    fCurrentEntity.lineNumber++;
		    fCurrentEntity.columnNumber = 1;
		    if (fCurrentEntity.position == fCurrentEntity.count) {
			offset = 0;
			fCurrentEntity.position = newlines;
			if (load(newlines, false)) {
			    break;
			}
		    }
		    /*** NEWLINE NORMALIZATION ***
			 if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
			 && external) {
			 fCurrentEntity.position++;
			 offset++;
			 }
			 /***/
		}
		else {
		    fCurrentEntity.position--;
		    break;
		}
	    } while (fCurrentEntity.position < fCurrentEntity.count - 1);
	    for (int i = offset; i < fCurrentEntity.position; i++) {
		fCurrentEntity.ch[i] = '\n";
	    }
	    int length = fCurrentEntity.position - offset;
	    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
		content.setValues(fCurrentEntity.ch, offset, length);
		return -1;
	    }
	}

	// scan literal value
	while (fCurrentEntity.position < fCurrentEntity.count) {
	    c = fCurrentEntity.ch[fCurrentEntity.position++];
	    if ((c == quote &&
		 (!fCurrentEntity.literal || external))
		|| c == '%" || !XMLChar.isContent(c)) {
		fCurrentEntity.position--;
		break;
	    }
	}
	int length = fCurrentEntity.position - offset;
	fCurrentEntity.columnNumber += length - newlines;
	content.setValues(fCurrentEntity.ch, offset, length);

	// return next character
	if (fCurrentEntity.position != fCurrentEntity.count) {
	    c = fCurrentEntity.ch[fCurrentEntity.position];
	    // NOTE: We don't want to accidentally signal the
	    //       end of the literal if we're expanding an
	    //       entity appearing in the literal. -Ac
	    if (c == quote && fCurrentEntity.literal) {
		c = -1;
	    }
	}
	else {
	    c = -1;
	}
	return c;

    
public java.lang.StringscanName()
Returns a string matching the Name production appearing immediately on the input as a symbol, or null if no Name string is present.

Note: The Name characters are consumed.

Note: The string returned must be a symbol. The SymbolTable can be used for this purpose.

throws
IOException Thrown if i/o error occurs.
throws
EOFException Thrown on end of file.
see
SymbolTable
see
XMLChar#isName
see
XMLChar#isNameStart

	
	// load more characters, if needed
	if (fCurrentEntity.position == fCurrentEntity.count) {
	    load(0, true);
	}
	
	// scan name
	int offset = fCurrentEntity.position;
	if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
	    if (++fCurrentEntity.position == fCurrentEntity.count) {
		fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
		offset = 0;
		if (load(1, false)) {
		    fCurrentEntity.columnNumber++;
		    String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch,
							   0, 1);
		    return symbol;
		}
	    }
	    while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
		if (++fCurrentEntity.position == fCurrentEntity.count) {
		    int length = fCurrentEntity.position - offset;
		    if (length == fBufferSize) {
			// bad luck we have to resize our buffer
			char[] tmp = new char[fBufferSize * 2];
			System.arraycopy(fCurrentEntity.ch, offset,
					 tmp, 0, length);
			fCurrentEntity.ch = tmp;
			fBufferSize *= 2;
		    } else {
			System.arraycopy(fCurrentEntity.ch, offset,
					 fCurrentEntity.ch, 0, length);
		    }
		    offset = 0;
		    if (load(length, false)) {
			break;
		    }
		}
	    }
	}
	int length = fCurrentEntity.position - offset;
	fCurrentEntity.columnNumber += length;

	// return name
	String symbol = null;
	if (length > 0) {
	    symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
	}
	return symbol;
	
    
private voidscanPIData(java.lang.String target, XMLString data)
Scans a processing data. This is needed to handle the situation where a document starts with a processing instruction whose target name starts with "xml". (e.g. xmlfoo) Note: This method uses fStringBuffer, anything in it at the time of calling is lost.

param
target The PI target
param
data The string to fill in with the data


        // check target
        if (target.length() == 3) {
            char c0 = Character.toLowerCase(target.charAt(0));
            char c1 = Character.toLowerCase(target.charAt(1));
            char c2 = Character.toLowerCase(target.charAt(2));
            if (c0 == 'x" && c1 == 'm" && c2 == 'l") {
                err.jspError("jsp.error.xml.reservedPITarget");
            }
        }

        // spaces
        if (!skipSpaces()) {
            if (skipString("?>")) {
                // we found the end, there is no data
                data.clear();
                return;
            }
            else {
                // if there is data there should be some space
                err.jspError("jsp.error.xml.spaceRequiredInPI");
            }
        }

        fStringBuffer.clear();
        // data
        if (scanData("?>", fStringBuffer)) {
            do {
                int c = peekChar();
                if (c != -1) {
                    if (XMLChar.isHighSurrogate(c)) {
                        scanSurrogates(fStringBuffer);
                    } else if (XMLChar.isInvalid(c)) {
                        err.jspError("jsp.error.xml.invalidCharInPI",
				     Integer.toHexString(c));
                        scanChar();
                    }
                }
            } while (scanData("?>", fStringBuffer));
        }
        data.setValues(fStringBuffer);

    
public java.lang.StringscanPseudoAttribute(boolean scanningTextDecl, XMLString value)
Scans a pseudo attribute.

param
scanningTextDecl True if scanning this pseudo-attribute for a TextDecl; false if scanning XMLDecl. This flag is needed to report the correct type of error.
param
value The string to fill in with the attribute value.
return
The name of the attribute Note: This method uses fStringBuffer2, anything in it at the time of calling is lost.


        String name = scanName();
        if (name == null) {
            err.jspError("jsp.error.xml.pseudoAttrNameExpected");
        }
        skipSpaces();
        if (!skipChar('=")) {
            reportFatalError(scanningTextDecl ?
			     "jsp.error.xml.eqRequiredInTextDecl"
                             : "jsp.error.xml.eqRequiredInXMLDecl",
			     name);
        }
        skipSpaces();
        int quote = peekChar();
        if (quote != '\'" && quote != '"") {
            reportFatalError(scanningTextDecl ?
			     "jsp.error.xml.quoteRequiredInTextDecl"
                             : "jsp.error.xml.quoteRequiredInXMLDecl" ,
			     name);
        }
        scanChar();
        int c = scanLiteral(quote, value);
        if (c != quote) {
            fStringBuffer2.clear();
            do {
                fStringBuffer2.append(value);
                if (c != -1) {
                    if (c == '&" || c == '%" || c == '<" || c == ']") {
                        fStringBuffer2.append((char)scanChar());
                    }
                    else if (XMLChar.isHighSurrogate(c)) {
                        scanSurrogates(fStringBuffer2);
                    }
                    else if (XMLChar.isInvalid(c)) {
                        String key = scanningTextDecl
                            ? "jsp.error.xml.invalidCharInTextDecl"
			    : "jsp.error.xml.invalidCharInXMLDecl";
                        reportFatalError(key, Integer.toString(c, 16));
                        scanChar();
                    }
                }
                c = scanLiteral(quote, value);
            } while (c != quote);
            fStringBuffer2.append(value);
            value.setValues(fStringBuffer2);
        }
        if (!skipChar(quote)) {
            reportFatalError(scanningTextDecl ?
			     "jsp.error.xml.closeQuoteMissingInTextDecl"
                             : "jsp.error.xml.closeQuoteMissingInXMLDecl",
			     name);
        }

        // return
        return name;

    
private booleanscanSurrogates(XMLStringBuffer buf)
Scans surrogates and append them to the specified buffer.

Note: This assumes the current char has already been identified as a high surrogate.

param
buf The StringBuffer to append the read surrogates to.
returns
True if it succeeded.


        int high = scanChar();
        int low = peekChar();
        if (!XMLChar.isLowSurrogate(low)) {
            err.jspError("jsp.error.xml.invalidCharInContent",
			 Integer.toString(high, 16));
            return false;
        }
        scanChar();

        // convert surrogates to supplemental character
        int c = XMLChar.supplemental((char)high, (char)low);

        // supplemental character must be a valid XML character
        if (!XMLChar.isValid(c)) {
            err.jspError("jsp.error.xml.invalidCharInContent",
			 Integer.toString(c, 16)); 
            return false;
        }

        // fill in the buffer
        buf.append((char)high);
        buf.append((char)low);

        return true;

    
private voidscanXMLDecl()


	if (skipString("<?xml")) {
	    fMarkupDepth++;
	    // NOTE: special case where document starts with a PI
	    //       whose name starts with "xml" (e.g. "xmlfoo")
	    if (XMLChar.isName(peekChar())) {
		fStringBuffer.clear();
		fStringBuffer.append("xml");
		while (XMLChar.isName(peekChar())) {
		    fStringBuffer.append((char)scanChar());
		}
		String target = fSymbolTable.addSymbol(fStringBuffer.ch,
						       fStringBuffer.offset,
						       fStringBuffer.length);
		scanPIData(target, fString);
	    }

	    // standard XML declaration
	    else {
		scanXMLDeclOrTextDecl(false);
	    }
	}
    
private voidscanXMLDeclOrTextDecl(boolean scanningTextDecl)
Scans an XML or text declaration.

[23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
[24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
| ('"' ('yes' | 'no') '"'))

[77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'

param
scanningTextDecl True if a text declaration is to be scanned instead of an XML declaration.


        // scan decl
        scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
        fMarkupDepth--;

        // pseudo-attribute values
        String encodingPseudoAttr = fStrings[1];

        // set encoding on reader
        if (encodingPseudoAttr != null) {
            isEncodingSetInProlog = true;
	    encoding = encodingPseudoAttr;
        }
    
private voidscanXMLDeclOrTextDecl(boolean scanningTextDecl, java.lang.String[] pseudoAttributeValues)
Scans an XML or text declaration.

[23] XMLDecl ::= ''
[24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
| ('"' ('yes' | 'no') '"'))

[77] TextDecl ::= ''

param
scanningTextDecl True if a text declaration is to be scanned instead of an XML declaration.
param
pseudoAttributeValues An array of size 3 to return the version, encoding and standalone pseudo attribute values (in that order). Note: This method uses fString, anything in it at the time of calling is lost.


        // pseudo-attribute values
        String version = null;
        String encoding = null;
        String standalone = null;

        // scan pseudo-attributes
        final int STATE_VERSION = 0;
        final int STATE_ENCODING = 1;
        final int STATE_STANDALONE = 2;
        final int STATE_DONE = 3;
        int state = STATE_VERSION;

        boolean dataFoundForTarget = false;
        boolean sawSpace = skipSpaces();
        while (peekChar() != '?") {
            dataFoundForTarget = true;
            String name = scanPseudoAttribute(scanningTextDecl, fString);
            switch (state) {
                case STATE_VERSION: {
                    if (name == fVersionSymbol) {
                        if (!sawSpace) {
                            reportFatalError(scanningTextDecl
                                       ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
                                       : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
                                             null);
                        }
                        version = fString.toString();
                        state = STATE_ENCODING;
                        if (!version.equals("1.0")) {
                            // REVISIT: XML REC says we should throw an error
			    // in such cases.
                            // some may object the throwing of fatalError.
                            err.jspError("jsp.error.xml.versionNotSupported",
					 version);
                        }
                    } else if (name == fEncodingSymbol) {
                        if (!scanningTextDecl) {
                            err.jspError("jsp.error.xml.versionInfoRequired");
                        }
                        if (!sawSpace) {
                            reportFatalError(scanningTextDecl
                                      ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
                                      : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
                                             null);
                        }
                        encoding = fString.toString();
                        state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
                    } else {
                        if (scanningTextDecl) {
                            err.jspError("jsp.error.xml.encodingDeclRequired");
                        }
                        else {
                            err.jspError("jsp.error.xml.versionInfoRequired");
                        }
                    }
                    break;
                }
                case STATE_ENCODING: {
                    if (name == fEncodingSymbol) {
                        if (!sawSpace) {
                            reportFatalError(scanningTextDecl
                                      ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
                                      : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
                                             null);
                        }
                        encoding = fString.toString();
                        state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
                        // TODO: check encoding name; set encoding on
                        //       entity scanner
                    } else if (!scanningTextDecl && name == fStandaloneSymbol) {
                        if (!sawSpace) {
                            err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
                        }
                        standalone = fString.toString();
                        state = STATE_DONE;
                        if (!standalone.equals("yes") && !standalone.equals("no")) {
                            err.jspError("jsp.error.xml.sdDeclInvalid");
                        }
                    } else {
                        err.jspError("jsp.error.xml.encodingDeclRequired");
                    }
                    break;
                }
                case STATE_STANDALONE: {
                    if (name == fStandaloneSymbol) {
                        if (!sawSpace) {
                            err.jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
                        }
                        standalone = fString.toString();
                        state = STATE_DONE;
                        if (!standalone.equals("yes") && !standalone.equals("no")) {
                            err.jspError("jsp.error.xml.sdDeclInvalid");
                        }
                    } else {
			err.jspError("jsp.error.xml.encodingDeclRequired");
                    }
                    break;
                }
                default: {
                    err.jspError("jsp.error.xml.noMorePseudoAttributes");
                }
            }
            sawSpace = skipSpaces();
        }
        // REVISIT: should we remove this error reporting?
        if (scanningTextDecl && state != STATE_DONE) {
            err.jspError("jsp.error.xml.morePseudoAttributes");
        }
        
        // If there is no data in the xml or text decl then we fail to report
	// error for version or encoding info above.
        if (scanningTextDecl) {
            if (!dataFoundForTarget && encoding == null) {
                err.jspError("jsp.error.xml.encodingDeclRequired");
            }
        } else {
            if (!dataFoundForTarget && version == null) {
                err.jspError("jsp.error.xml.versionInfoRequired");
            }
        }

        // end
        if (!skipChar('?")) {
            err.jspError("jsp.error.xml.xmlDeclUnterminated");
        }
        if (!skipChar('>")) {
            err.jspError("jsp.error.xml.xmlDeclUnterminated");

        }
        
        // fill in return array
        pseudoAttributeValues[0] = version;
        pseudoAttributeValues[1] = encoding;
        pseudoAttributeValues[2] = standalone;
    
public booleanskipChar(int c)
Skips a character appearing immediately on the input.

Note: The character is consumed only if it matches the specified character.

param
c The character to skip.
return
Returns true if the character was skipped.
throws
IOException Thrown if i/o error occurs.
throws
EOFException Thrown on end of file.


	// load more characters, if needed
	if (fCurrentEntity.position == fCurrentEntity.count) {
	    load(0, true);
	}

	// skip character
	int cc = fCurrentEntity.ch[fCurrentEntity.position];
	if (cc == c) {
	    fCurrentEntity.position++;
	    if (c == '\n") {
		fCurrentEntity.lineNumber++;
		fCurrentEntity.columnNumber = 1;
	    }
	    else {
		fCurrentEntity.columnNumber++;
	    }
	    return true;
	} else if (c == '\n" && cc == '\r" && fCurrentEntity.isExternal()) {
	    // handle newlines
	    if (fCurrentEntity.position == fCurrentEntity.count) {
		fCurrentEntity.ch[0] = (char)cc;
		load(1, false);
	    }
	    fCurrentEntity.position++;
	    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n") {
		fCurrentEntity.position++;
	    }
	    fCurrentEntity.lineNumber++;
	    fCurrentEntity.columnNumber = 1;
	    return true;
	}

	// character was not skipped
	return false;

    
public booleanskipSpaces()
Skips space characters appearing immediately on the input.

Note: The characters are consumed only if they are space characters.

return
Returns true if at least one space character was skipped.
throws
IOException Thrown if i/o error occurs.
throws
EOFException Thrown on end of file.
see
XMLChar#isSpace


	// load more characters, if needed
	if (fCurrentEntity.position == fCurrentEntity.count) {
	    load(0, true);
	}

	// skip spaces
	int c = fCurrentEntity.ch[fCurrentEntity.position];
	if (XMLChar.isSpace(c)) {
	    boolean external = fCurrentEntity.isExternal();
	    do {
		boolean entityChanged = false;
		// handle newlines
		if (c == '\n" || (external && c == '\r")) {
		    fCurrentEntity.lineNumber++;
		    fCurrentEntity.columnNumber = 1;
		    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
			fCurrentEntity.ch[0] = (char)c;
			entityChanged = load(1, true);
			if (!entityChanged)
                                // the load change the position to be 1,
                                // need to restore it when entity not changed
			    fCurrentEntity.position = 0;
		    }
		    if (c == '\r" && external) {
			// REVISIT: Does this need to be updated to fix the
			//          #x0D ^#x0A newline normalization problem? -Ac
			if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n") {
			    fCurrentEntity.position--;
			}
		    }
		    /*** NEWLINE NORMALIZATION ***
			 else {
			 if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
			 && external) {
			 fCurrentEntity.position++;
			 }
			 }
			 /***/
		}
		else {
		    fCurrentEntity.columnNumber++;
		}
		// load more characters, if needed
		if (!entityChanged)
		    fCurrentEntity.position++;
		if (fCurrentEntity.position == fCurrentEntity.count) {
		    load(0, true);
		}
	    } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
	    return true;
	}

	// no spaces were found
	return false;

    
public booleanskipString(java.lang.String s)
Skips the specified string appearing immediately on the input.

Note: The characters are consumed only if they are space characters.

param
s The string to skip.
return
Returns true if the string was skipped.
throws
IOException Thrown if i/o error occurs.
throws
EOFException Thrown on end of file.


	// load more characters, if needed
	if (fCurrentEntity.position == fCurrentEntity.count) {
	    load(0, true);
	}

	// skip string
	final int length = s.length();
	for (int i = 0; i < length; i++) {
	    char c = fCurrentEntity.ch[fCurrentEntity.position++];
	    if (c != s.charAt(i)) {
		fCurrentEntity.position -= i + 1;
		return false;
	    }
	    if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
		System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
		// REVISIT: Can a string to be skipped cross an
		//          entity boundary? -Ac
		if (load(i + 1, false)) {
		    fCurrentEntity.position -= i + 1;
		    return false;
		}
	    }
	}
	fCurrentEntity.columnNumber += length;
	return true;