Methods Summary |
---|
private void | attr(Pair att)Parses an attribute.
This recursive method is responsible for prefix addition
(mPref ) and prefix mapping reports on the way down. The
element's start tag end triggers the return process. The method then
on it's way back resolves prefixes and accumulates attributes.
Note that this method will not report namespace declaration attributes
(xmlns* attributes), the
{@link DefaultHandler#startPrefixMapping startPrefixMapping} method
is invoked instead.
Pair next = null;
char norm = 'c"; // CDATA-type normalization by default [#3.3.3]
String val;
String type;
try {
switch (wsskip()) {
case '/":
case '>":
// Go through all defined attributes on current tag to
// find defaults
for (Pair def = att.list; def != null; def = def.next) {
if (def.list != null) {
// Attribut definition with default value
Pair act = att.next;
while (act != null) {
if (act.eqname(def.chars) == true)
break;
act = act.next;
}
if (act == null) {
// Add default attribute
push(new Input(def.list.chars));
attr(att);
return;
}
}
}
// Ensure the attribute string array capacity
mAttrs.setLength(mAttrIdx);
mItems = mAttrs.mItems;
return;
default:
// Read the attribute name and value
att.chars = qname(mIsNSAware);
att.name = att.local();
type = "CDATA";
if (att.list != null) {
Pair attr = find(att.list, att.chars);
if (attr != null) {
switch (attr.id) {
case 'i":
type = "ID";
norm = 'i";
break;
case 'r":
type = "IDREF";
norm = 'i";
break;
case 'R":
type = "IDREFS";
norm = 'i";
break;
case 'n":
type = "ENTITY";
norm = 'i";
break;
case 'N":
type = "ENTITIES";
norm = 'i";
break;
case 't":
type = "NMTOKEN";
norm = 'i";
break;
case 'T":
type = "NMTOKENS";
norm = 'i";
break;
case 'u":
type = "NMTOKEN";
norm = 'i";
break;
case 'o":
type = "NOTATION";
norm = 'i";
break;
case 'c":
norm = 'c";
break;
default:
panic(FAULT);
break;
}
}
}
wsskip();
if (next() != '=")
panic(FAULT);
bqstr(norm); // read the value with normalization.
val = new String(mBuff, 1, mBuffIdx);
// Put a namespace declaration on top of the prefix stack
if ((mIsNSAware == false) || (isdecl(att, val) == false)) {
// An ordinary attribute
mAttrIdx++;
// Recursive call to parse the next attribute
next = pair(att);
next.list = att.list;
attr(next);
mAttrIdx--;
// Add the attribute to the attributes string array
char idx = (char)(mAttrIdx << 3);
mItems[idx + 1] = att.qname(); // attr qname
mItems[idx + 2] = (mIsNSAware)? att.name: ""; // attr local name
mItems[idx + 3] = val; // attr value
mItems[idx + 4] = type; // attr type
// Resolve the prefix if any and report the attribute
// NOTE: The attribute does not accept the default namespace.
mItems[idx + 0] = (att.chars[0] != 0)? rslv(att.chars): "";
} else {
// A namespace declaration
// Report a start of the new mapping
mHand.startPrefixMapping(mPref.name, mPref.value);
// Recursive call to parse the next attribute
next = pair(att);
next.list = att.list;
attr(next);
// NOTE: The namespace declaration is not reported.
}
break;
}
} finally {
if (next != null)
del(next);
}
|
private void | back()Puts back the last read character.
This method MUST NOT be called more then once after
each call of {@link #next next} method.
if(mChIdx <= 0)
panic(FAULT);
mChIdx--;
|
private void | bappend(char ch, char mode)Appends a character to parser's buffer with normalization.
// This implements attribute value normalization as
// described in the XML specification [#3.3.3].
switch (mode) {
case 'i": // non CDATA normalization
switch (ch) {
case ' ":
case '\n":
case '\r":
case '\t":
if ((mBuffIdx > 0) && (mBuff[mBuffIdx] != ' "))
bappend(' ");
return;
default:
break;
}
break;
case 'c": // CDATA normalization
switch (ch) {
case '\n":
case '\r":
case '\t":
ch = ' ";
break;
default:
break;
}
break;
default: // no normalization
break;
}
mBuffIdx++;
if (mBuffIdx < mBuff.length) {
mBuff[mBuffIdx] = ch;
} else {
mBuffIdx--;
bappend(ch);
}
|
private void | bappend(char ch)Appends a character to parser's buffer.
try {
mBuff[++mBuffIdx] = ch;
} catch (Exception exp) {
// Double the buffer size
char buff[] = new char[mBuff.length << 1];
System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
mBuff = buff;
mBuff[mBuffIdx] = ch;
}
|
private void | bcopy(int cidx, int bidx)Appends (mChIdx - cidx) characters from character buffer (mChars) to
parser's buffer (mBuff).
int length = mChIdx - cidx;
if ((bidx + length + 1) >= mBuff.length) {
// Expand the buffer
char buff[] = new char[mBuff.length + length];
System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
mBuff = buff;
}
System.arraycopy(mChars, cidx, mBuff, bidx, length);
mBuffIdx += length;
|
private void | bflash()Reports characters and empties the parser's buffer.
if (mBuffIdx >= 0) {
// Textual data has been read
mHand.characters(mBuff, 0, (mBuffIdx + 1));
mBuffIdx = -1;
}
|
private char | bkeyword()Recognizes a keyword.
This is low level routine which recognizes one of keywords in the buffer.
Keyword Id
ID - i
IDREF - r
IDREFS - R
ENTITY - n
ENTITIES - N
NMTOKEN - t
NMTOKENS - T
ELEMENT - e
ATTLIST - a
NOTATION - o
CDATA - c
REQUIRED - Q
IMPLIED - I
FIXED - F
String str = new String(mBuff, 1, mBuffIdx);
switch (str.length()) {
case 2: // ID
return ("ID".equals(str) == true)? 'i": '?";
case 5: // IDREF, CDATA, FIXED
switch (mBuff[1]) {
case 'I":
return ("IDREF".equals(str) == true)? 'r": '?";
case 'C":
return ("CDATA".equals(str) == true)? 'c": '?";
case 'F":
return ("FIXED".equals(str) == true)? 'F": '?";
default:
break;
}
break;
case 6: // IDREFS, ENTITY
switch (mBuff[1]) {
case 'I":
return ("IDREFS".equals(str) == true)? 'R": '?";
case 'E":
return ("ENTITY".equals(str) == true)? 'n": '?";
default:
break;
}
break;
case 7: // NMTOKEN, IMPLIED, ATTLIST, ELEMENT
switch (mBuff[1]) {
case 'I":
return ("IMPLIED".equals(str) == true)? 'I": '?";
case 'N":
return ("NMTOKEN".equals(str) == true)? 't": '?";
case 'A":
return ("ATTLIST".equals(str) == true)? 'a": '?";
case 'E":
return ("ELEMENT".equals(str) == true)? 'e": '?";
default:
break;
}
break;
case 8: // ENTITIES, NMTOKENS, NOTATION, REQUIRED
switch (mBuff[2]) {
case 'N":
return ("ENTITIES".equals(str) == true)? 'N": '?";
case 'M":
return ("NMTOKENS".equals(str) == true)? 'T": '?";
case 'O":
return ("NOTATION".equals(str) == true)? 'o": '?";
case 'E":
return ("REQUIRED".equals(str) == true)? 'Q": '?";
default:
break;
}
break;
default:
break;
}
return '?";
|
private void | bname(boolean ns)Reads a qualified xml name.
This is low level routine which leaves a qName in the buffer.
The characters of a qualified name is an array of characters. The
first (chars[0]) character is the index of the colon character which
separates the prefix from the local name. If the index is zero, the
name does not contain separator or the parser works in the namespace
unaware mode. The length of qualified name is the length of the array
minus one.
char ch;
char type;
mBuffIdx++; // allocate a char for colon offset
int bqname = mBuffIdx;
int bcolon = bqname;
int bchidx = bqname + 1;
int bstart = bchidx;
int cstart = mChIdx;
short st = (short)((ns == true)? 0: 2);
while (true) {
// Read next character
if (mChIdx >= mChLen) {
bcopy(cstart, bstart);
next();
mChIdx--; // back();
cstart = mChIdx;
bstart = bchidx;
}
ch = mChars[mChIdx++];
type = (char)0; // [X]
if (ch < 0x80) {
type = (char)nmttyp[ch];
} else if (ch == EOS) {
panic(FAULT);
}
// Parse QName
switch (st) {
case 0: // read the first char of the prefix
case 2: // read the first char of the suffix
switch (type) {
case 0: // [aA_X]
bchidx++; // append char to the buffer
st++; // (st == 0)? 1: 3;
break;
case 1: // [:]
mChIdx--; // back();
st++; // (st == 0)? 1: 3;
break;
default:
panic(FAULT);
}
break;
case 1: // read the prefix
case 3: // read the suffix
switch (type) {
case 0: // [aA_X]
case 2: // [.-d]
bchidx++; // append char to the buffer
break;
case 1: // [:]
bchidx++; // append char to the buffer
if (ns == true) {
if (bcolon != bqname)
panic(FAULT); // it must be only one colon
bcolon = bchidx - 1;
if (st == 1)
st = 2;
}
break;
default:
mChIdx--; // back();
bcopy(cstart, bstart);
mBuff[bqname] = (char)(bcolon - bqname);
return;
}
break;
default:
panic(FAULT);
}
}
|
private void | bntok()Reads a nmtoken.
This is low level routine which leaves a nmtoken in the buffer.
char ch;
mBuffIdx = -1;
bappend((char)0); // default offset to the colon char
while (true) {
ch = next();
switch (chtyp(ch)) {
case 'a":
case 'A":
case 'd":
case '.":
case ':":
case '-":
case '_":
case 'X":
bappend(ch);
break;
default:
back();
return;
}
}
|
private java.io.Reader | bom(java.io.InputStream is, char hint)Determines the entity encoding.
This method gets encoding from Byte Order Mask [#4.3.3] if any.
Note, the first byte returned by the entity's byte stream has
to be the first byte in the entity. Also, there is no support
for UCS-4.
int val = is.read();
switch (val) {
case 0xef: // UTF-8
if (hint == 'U") // must be UTF-16
panic(FAULT);
if (is.read() != 0xbb)
panic(FAULT);
if (is.read() != 0xbf)
panic(FAULT);
return new ReaderUTF8(is);
case 0xfe: // UTF-16, big-endian
if (is.read() != 0xff)
panic(FAULT);
return new ReaderUTF16(is, 'b");
case 0xff: // UTF-16, little-endian
if (is.read() != 0xfe)
panic(FAULT);
return new ReaderUTF16(is, 'l");
case -1:
mChars[mChIdx++] = EOS;
return new ReaderUTF8(is);
default:
if (hint == 'U") // must be UTF-16
panic(FAULT);
// Read the rest of UTF-8 character
switch (val & 0xf0) {
case 0xc0:
case 0xd0:
mChars[mChIdx++] = (char)(((val & 0x1f) << 6) | (is.read() & 0x3f));
break;
case 0xe0:
mChars[mChIdx++] = (char)(((val & 0x0f) << 12) |
((is.read() & 0x3f) << 6) | (is.read() & 0x3f));
break;
case 0xf0: // UCS-4 character
throw new UnsupportedEncodingException();
default:
mChars[mChIdx++] = (char)val;
break;
}
return null;
}
|
private void | bqstr(char flag)Reads a single or double quotted string in to the buffer.
This method resolves entities inside a string unless the parser
parses DTD.
Input inp = mInp; // remember the original input
mBuffIdx = -1;
bappend((char)0); // default offset to the colon char
char ch;
for (short st = 0; st >= 0;) {
ch = (mChIdx < mChLen)? mChars[mChIdx++]: next();
switch (st) {
case 0: // read a single or double quote
switch (ch) {
case ' ":
case '\n":
case '\r":
case '\t":
break;
case '\'":
st = 2; // read a single quoted string
break;
case '\"":
st = 3; // read a double quoted string
break;
default:
panic(FAULT);
break;
}
break;
case 2: // read a single quoted string
case 3: // read a double quoted string
switch (ch) {
case '\'":
if ((st == 2) && (mInp == inp))
st = -1;
else
bappend(ch);
break;
case '\"":
if ((st == 3) && (mInp == inp))
st = -1;
else
bappend(ch);
break;
case '&":
if (flag != 'd")
ent(flag);
else
bappend(ch);
break;
case '%":
if (flag == 'd")
pent('-");
else
bappend(ch);
break;
case '<":
if ((flag == '-") || (flag == 'd"))
bappend(ch);
else
panic(FAULT);
break;
case EOS: // EOS before single/double quote
panic(FAULT);
case '\r": // EOL processing [#2.11 & #3.3.3]
if (flag != ' " && mInp.next == null) {
if (next() != '\n")
back();
ch = '\n";
}
default:
bappend(ch, flag);
break;
}
break;
default:
panic(FAULT);
}
}
// There is maximum one space at the end of the string in
// i-mode (non CDATA normalization) and it has to be removed.
if ((flag == 'i") && (mBuff[mBuffIdx] == ' "))
mBuffIdx -= 1;
|
private void | cdat()Parses a character data.
char ch;
mBuffIdx = -1;
for (short st = 0; st >= 0;) {
ch = next();
switch (st) {
case 0: // the first '[' of the CDATA open
if (ch == '[")
st = 1;
else
panic(FAULT);
break;
case 1: // read "CDATA"
if (chtyp(ch) == 'A") {
bappend(ch);
} else {
if ("CDATA".equals(
new String(mBuff, 0, mBuffIdx + 1)) != true)
panic(FAULT);
back();
st = 2;
}
break;
case 2: // the second '[' of the CDATA open
if (ch != '[")
panic(FAULT);
mBuffIdx = -1;
st = 3;
break;
case 3: // read data before the first ']'
if (ch != ']")
bappend(ch);
else
st = 4;
break;
case 4: // read the second ']' or continue to read the data
if (ch != ']") {
bappend(']");
bappend(ch);
st = 3;
} else {
st = 5;
}
break;
case 5: // read '>' or continue to read the data
switch (ch) {
case ']":
bappend(']");
break;
case '>":
bflash();
st = -1;
break;
default:
bappend(']");
bappend(']");
bappend(ch);
st = 3;
break;
}
break;
default:
panic(FAULT);
}
}
|
private char | chtyp(char ch)Maps a character to it's type.
Possible character type values are:
- ' ' for any kind of white space character;
- 'a' for any lower case alphabetical character value;
- 'A' for any upper case alphabetical character value;
- 'd' for any decimal digit character value;
- 'z' for any character less then ' ' except
'\t', '\n', '\r';
- 'X' for any not ASCII character;
- 'Z' for EOS character.
An ASCII (7 bit) character which does not fall in any category listed
above is mapped to it self.
if (ch < 0x80)
return (char)asctyp[ch];
return (ch != EOS)? 'X": 'Z";
|
private void | comm()Parses a comment.
if (mSt == 0)
mSt = 1; // misc before DTD
char ch;
for (short st = 0; st >= 0;) {
ch = (mChIdx < mChLen)? mChars[mChIdx++]: next();
switch (st) {
case 0: // first '-' of the comment open
if (ch == '-")
st = 1;
else
panic(FAULT);
break;
case 1: // secind '-' of the comment open
if (ch == '-")
st = 2;
else
panic(FAULT);
break;
case 2: // skip the comment body
switch (ch) {
case '-":
st = 3;
break;
case EOS:
panic(FAULT);
break;
default:
break;
}
break;
case 3: // second '-' of the comment close
st = (ch == '-")? (short)4: (short)2;
break;
case 4: // '>' of the comment close
if (ch == '>")
st = -1;
else
panic(FAULT);
break;
default:
panic(FAULT);
}
}
|
private Pair | del(Pair pair)Deletes an instance of a pair.
Pair next = pair.next;
pair.name = null;
pair.value = null;
pair.chars = null;
pair.list = null;
pair.next = mDltd;
mDltd = pair;
return next;
|
private void | dtd()Parses the document type declaration.
char ch;
String str = null;
String name = null;
Pair psid = null;
// read 'DOCTYPE'
if ("DOCTYPE".equals(name(false)) != true)
panic(FAULT);
mSt = 2; // DTD
for (short st = 0; st >= 0;) {
ch = next();
switch (st) {
case 0: // read the document type name
if (chtyp(ch) != ' ") {
back();
name = name(mIsNSAware);
wsskip();
st = 1; // read 'PUPLIC' or 'SYSTEM'
}
break;
case 1: // read 'PUPLIC' or 'SYSTEM'
switch (chtyp(ch)) {
case 'A":
back();
psid = pubsys(' ");
st = 2; // skip spaces before internal subset
break;
case '[":
back();
st = 2; // skip spaces before internal subset
break;
case '>":
back();
st = 3; // skip spaces after internal subset
break;
default:
panic(FAULT);
}
break;
case 2: // skip spaces before internal subset
switch (chtyp(ch)) {
case '[":
// Process internal subset
dtdsub();
st = 3; // skip spaces after internal subset
break;
case '>":
// There is no internal subset
back();
st = 3; // skip spaces after internal subset
break;
case ' ":
// skip white spaces
break;
default:
panic(FAULT);
}
break;
case 3: // skip spaces after internal subset
switch (chtyp(ch)) {
case '>":
if (psid != null) {
// Report the DTD external subset
InputSource is = mHand.resolveEntity(psid.name, psid.value);
if (is != null) {
if (mIsSAlone == false) {
// Set the end of DTD external subset char
back();
setch(']");
// Set the DTD external subset InputSource
push(new Input(BUFFSIZE_READER));
setinp(is);
mInp.pubid = psid.name;
mInp.sysid = psid.value;
// Parse the DTD external subset
dtdsub();
} else {
// Unresolved DTD external subset
mHand.skippedEntity("[dtd]");
// Release reader and stream
if (is.getCharacterStream() != null) {
try {
is.getCharacterStream().close();
} catch (IOException ioe) {
}
}
if (is.getByteStream() != null) {
try {
is.getByteStream().close();
} catch (IOException ioe) {
}
}
}
} else {
// Unresolved DTD external subset
mHand.skippedEntity("[dtd]");
}
del(psid);
}
st = -1; // end of DTD
break;
case ' ":
// skip white spaces
break;
default:
panic(FAULT);
}
break;
default:
panic(FAULT);
}
}
mSt = 3; // misc after DTD
|
private void | dtdatt(Pair elm)Parses an attribute declaration.
The attribut uses the following fields of Pair object:
chars - characters of qualified name
id - the type identifier of the attribute
list - a pair which holds the default value (chars field)
char attqn[] = null;
Pair att = null;
char ch;
for (short st = 0; st >= 0;) {
ch = next();
switch (st) {
case 0: // the attribute name
switch (chtyp(ch)) {
case 'a":
case 'A":
case '_":
case 'X":
case ':":
back();
// Get the attribut from the list or add a new one.
attqn = qname(mIsNSAware);
att = find(elm.list, attqn);
if (att == null) {
// New attribute declaration
att = pair(elm.list);
att.chars = attqn;
elm.list = att;
} else {
// Do not override the attribute declaration [#3.3]
att = pair(null);
att.chars = attqn;
att.id = 'c";
}
wsskip();
st = 1;
break;
case '%":
pent(' ");
break;
case ' ":
break;
default:
panic(FAULT);
break;
}
break;
case 1: // the attribute type
switch (chtyp(ch)) {
case '(":
att.id = 'u"; // enumeration type
st = 2; // read the first element of the list
break;
case '%":
pent(' ");
break;
case ' ":
break;
default:
back();
bntok(); // read type id
att.id = bkeyword();
switch (att.id) {
case 'o": // NOTATION
if (wsskip() != '(")
panic(FAULT);
ch = next();
st = 2; // read the first element of the list
break;
case 'i": // ID
case 'r": // IDREF
case 'R": // IDREFS
case 'n": // ENTITY
case 'N": // ENTITIES
case 't": // NMTOKEN
case 'T": // NMTOKENS
case 'c": // CDATA
wsskip();
st = 4; // read default declaration
break;
default:
panic(FAULT);
break;
}
break;
}
break;
case 2: // read the first element of the list
switch (chtyp(ch)) {
case 'a":
case 'A":
case 'd":
case '.":
case ':":
case '-":
case '_":
case 'X":
back();
switch (att.id) {
case 'u": // enumeration type
bntok();
break;
case 'o": // NOTATION
mBuffIdx = -1;
bname(false);
break;
default:
panic(FAULT);
break;
}
wsskip();
st = 3; // read next element of the list
break;
case '%":
pent(' ");
break;
case ' ":
break;
default:
panic(FAULT);
break;
}
break;
case 3: // read next element of the list
switch (ch) {
case ')":
wsskip();
st = 4; // read default declaration
break;
case '|":
wsskip();
switch (att.id) {
case 'u": // enumeration type
bntok();
break;
case 'o": // NOTATION
mBuffIdx = -1;
bname(false);
break;
default:
panic(FAULT);
break;
}
wsskip();
break;
case '%":
pent(' ");
break;
default:
panic(FAULT);
break;
}
break;
case 4: // read default declaration
switch (ch) {
case '#":
bntok();
switch (bkeyword()) {
case 'F": // FIXED
switch (wsskip()) {
case '\"":
case '\'":
st = 5; // read the default value
break;
default:
st = -1;
break;
}
break;
case 'Q": // REQUIRED
case 'I": // IMPLIED
st = -1;
break;
default:
panic(FAULT);
break;
}
break;
case '\"":
case '\'":
back();
st = 5; // read the default value
break;
case ' ":
case '\n":
case '\r":
case '\t":
break;
case '%":
pent(' ");
break;
default:
back();
st = -1;
break;
}
break;
case 5: // read the default value
switch (ch) {
case '\"":
case '\'":
back();
bqstr('d"); // the value in the mBuff now
att.list = pair(null);
// Create a string like "attqname='value' "
att.list.chars = new char[att.chars.length + mBuffIdx + 3];
System.arraycopy(
att.chars, 1, att.list.chars, 0, att.chars.length - 1);
att.list.chars[att.chars.length - 1] = '=";
att.list.chars[att.chars.length] = ch;
System.arraycopy(
mBuff, 1, att.list.chars, att.chars.length + 1, mBuffIdx);
att.list.chars[att.chars.length + mBuffIdx + 1] = ch;
att.list.chars[att.chars.length + mBuffIdx + 2] = ' ";
st = -1;
break;
default:
panic(FAULT);
break;
}
break;
default:
panic(FAULT);
break;
}
}
|
private void | dtdattl()Parses an attribute list declaration.
This method parses the declaration up to the closing angle
bracket.
char elmqn[] = null;
Pair elm = null;
char ch;
for (short st = 0; st >= 0;) {
ch = next();
switch (st) {
case 0: // read the element name
switch (chtyp(ch)) {
case 'a":
case 'A":
case '_":
case 'X":
case ':":
back();
// Get the element from the list or add a new one.
elmqn = qname(mIsNSAware);
elm = find(mAttL, elmqn);
if (elm == null) {
elm = pair(mAttL);
elm.chars = elmqn;
mAttL = elm;
}
st = 1; // read an attribute declaration
break;
case ' ":
break;
case '%":
pent(' ");
break;
default:
panic(FAULT);
break;
}
break;
case 1: // read an attribute declaration
switch (chtyp(ch)) {
case 'a":
case 'A":
case '_":
case 'X":
case ':":
back();
dtdatt(elm);
if (wsskip() == '>")
return;
break;
case ' ":
break;
case '%":
pent(' ");
break;
default:
panic(FAULT);
break;
}
break;
default:
panic(FAULT);
break;
}
}
|
private void | dtdelm()Parses an element declaration.
This method parses the declaration up to the closing angle
bracket.
// This is stub implementation which skips an element
// declaration.
wsskip();
name(mIsNSAware);
char ch;
while (true) {
ch = next();
switch (ch) {
case '>":
back();
return;
case EOS:
panic(FAULT);
default:
break;
}
}
|
private void | dtdent()Parses an entity declaration.
This method fills the general (mEnt ) and parameter
(mPEnt ) entity look up table.
String str = null;
char[] val = null;
Input inp = null;
Pair ids = null;
char ch;
for (short st = 0; st >= 0;) {
ch = next();
switch (st) {
case 0: // skip white spaces before entity name
switch (chtyp(ch)) {
case ' ":
// Skip white spaces
break;
case '%":
// Parameter entity or parameter entity declaration.
ch = next();
back();
if (chtyp(ch) == ' ") {
// Parameter entity declaration.
wsskip();
str = name(false);
switch (chtyp(wsskip())) {
case 'A":
// Read the external identifier
ids = pubsys(' ");
if (wsskip() == '>") {
// External parsed entity
if (mPEnt.containsKey(str) == false) { // [#4.2]
inp = new Input();
inp.pubid = ids.name;
inp.sysid = ids.value;
mPEnt.put(str, inp);
}
} else {
panic(FAULT);
}
del(ids);
st = -1; // the end of declaration
break;
case '\"":
case '\'":
// Read the parameter entity value
bqstr('d");
// Create the parameter entity value
val = new char[mBuffIdx + 1];
System.arraycopy(mBuff, 1, val, 1, val.length - 1);
// Add surrounding spaces [#4.4.8]
val[0] = ' ";
// Add the entity to the entity look up table
if (mPEnt.containsKey(str) == false) { // [#4.2]
inp = new Input(val);
inp.pubid = mInp.pubid;
inp.sysid = mInp.sysid;
mPEnt.put(str, inp);
}
st = -1; // the end of declaration
break;
default:
panic(FAULT);
break;
}
} else {
// Parameter entity reference.
pent(' ");
}
break;
default:
back();
str = name(false);
st = 1; // read entity declaration value
break;
}
break;
case 1: // read entity declaration value
switch (chtyp(ch)) {
case '\"": // internal entity
case '\'":
back();
bqstr('d"); // read a string into the buffer
if (mEnt.get(str) == null) {
// Create general entity value
val = new char[mBuffIdx];
System.arraycopy(mBuff, 1, val, 0, val.length);
// Add the entity to the entity look up table
if (mEnt.containsKey(str) == false) { // [#4.2]
inp = new Input(val);
inp.pubid = mInp.pubid;
inp.sysid = mInp.sysid;
mEnt.put(str, inp);
}
}
st = -1; // the end of declaration
break;
case 'A": // external entity
back();
ids = pubsys(' ");
switch (wsskip()) {
case '>": // external parsed entity
if (mEnt.containsKey(str) == false) { // [#4.2]
inp = new Input();
inp.pubid = ids.name;
inp.sysid = ids.value;
mEnt.put(str, inp);
}
break;
case 'N": // external general unparsed entity
if ("NDATA".equals(name(false)) == true) {
wsskip();
mHand.unparsedEntityDecl(
str, ids.name, ids.value, name(false));
break;
}
default:
panic(FAULT);
break;
}
del(ids);
st = -1; // the end of declaration
break;
case ' ":
// Skip white spaces
break;
default:
panic(FAULT);
break;
}
break;
default:
panic(FAULT);
}
}
|
private void | dtdnot()Parses a notation declaration.
This method parses the declaration up to the closing angle
bracket.
wsskip();
String name = name(false);
wsskip();
Pair ids = pubsys('N");
mHand.notationDecl(name, ids.name, ids.value);
del(ids);
|
private void | dtdsub()Parses the document type declaration subset.
char ch;
for (short st = 0; st >= 0;) {
ch = next();
switch (st) {
case 0: // skip white spaces before a declaration
switch (chtyp(ch)) {
case '<":
ch = next();
switch (ch) {
case '?":
pi();
break;
case '!":
ch = next();
back();
if (ch == '-") {
comm();
break;
}
// markup or entity declaration
bntok();
switch (bkeyword()) {
case 'n":
dtdent();
break;
case 'a":
dtdattl(); // parse attributes declaration
break;
case 'e":
dtdelm(); // parse element declaration
break;
case 'o":
dtdnot(); // parse notation declaration
break;
default:
panic(FAULT); // unsupported markup declaration
break;
}
st = 1; // read the end of declaration
break;
default:
panic(FAULT);
break;
}
break;
case '%":
// A parameter entity reference
pent(' ");
break;
case ']":
// End of DTD subset
st = -1;
break;
case ' ":
// Skip white spaces
break;
case 'Z":
// End of stream
if (next() != ']")
panic(FAULT);
st = -1;
break;
default:
panic(FAULT);
}
break;
case 1: // read the end of declaration
switch (ch) {
case '>": // there is no notation
st = 0; // skip white spaces before a declaration
break;
case ' ":
case '\n":
case '\r":
case '\t":
// Skip white spaces
break;
default:
panic(FAULT);
break;
}
break;
default:
panic(FAULT);
}
}
|
private void | eappend(char ch)Recognizes the built-in entities lt, gt, amp,
apos, quot.
The initial state is 0x100. Any state belowe 0x100 is a built-in
entity replacement character.
switch (mESt) {
case 0x100: // "l" or "g" or "a" or "q"
switch (ch) {
case 'l": mESt = 0x101; break;
case 'g": mESt = 0x102; break;
case 'a": mESt = 0x103; break;
case 'q": mESt = 0x107; break;
default: mESt = 0x200; break;
}
break;
case 0x101: // "lt"
mESt = (ch == 't")? '<": (char)0x200;
break;
case 0x102: // "gt"
mESt = (ch == 't")? '>": (char)0x200;
break;
case 0x103: // "am" or "ap"
switch (ch) {
case 'm": mESt = 0x104; break;
case 'p": mESt = 0x105; break;
default: mESt = 0x200; break;
}
break;
case 0x104: // "amp"
mESt = (ch == 'p")? '&": (char)0x200;
break;
case 0x105: // "apo"
mESt = (ch == 'o")? (char)0x106: (char)0x200;
break;
case 0x106: // "apos"
mESt = (ch == 's")? '\'": (char)0x200;
break;
case 0x107: // "qu"
mESt = (ch == 'u")? (char)0x108: (char)0x200;
break;
case 0x108: // "quo"
mESt = (ch == 'o")? (char)0x109: (char)0x200;
break;
case 0x109: // "quot"
mESt = (ch == 't")? '\"": (char)0x200;
break;
case '<": // "lt"
case '>": // "gt"
case '&": // "amp"
case '\'": // "apos"
case '\"": // "quot"
mESt = 0x200;
default:
break;
}
|
private void | elm()Parses an element.
This recursive method is responsible for prefix scope control
(mPref ). When the element is leaving the scope all
prefixes defined within the element are removed from the prefix
stack.
// Save the current top of the prefix stack
Pair pref = mPref;
// Read an element name and put it on top of the element stack
mElm = pair(mElm);
mElm.chars = qname(mIsNSAware);
mElm.name = mElm.local();
// Find the list of defined attributs of the current element
Pair elm = find(mAttL, mElm.chars);
// Read attributes till the end of the element tag
mAttrIdx = 0;
Pair att = pair(null);
att.list = (elm != null)? elm.list: null; // attrs defined on this elm
attr(att);
del(att);
// Read the element and it's content
mBuffIdx = -1;
char ch;
for (short st = 0; st >= 0;) {
ch = (mChIdx < mChLen)? mChars[mChIdx++]: next();
switch (st) {
case 0: // read the end of the element tag
case 1: // read the end of the empty element
switch (ch) {
case '>":
// Report the element
if (mIsNSAware == true) {
mElm.value = rslv(mElm.chars);
mHand.startElement(
mElm.value,
mElm.name,
"",
mAttrs);
} else {
mHand.startElement(
"",
"",
mElm.name,
mAttrs);
}
mItems = null;
st = (st == 0)? (short)2: (short)-1;
break;
case '/":
if (st != 0)
panic(FAULT);
st = 1;
break;
default:
panic(FAULT);
}
break;
case 2: // skip white space between tags
switch (ch) {
case ' ":
case '\t":
case '\n":
bappend(ch);
break;
case '\r": // EOL processing [#2.11]
if (next() != '\n")
back();
bappend('\n");
break;
case '<":
// Need revisit: With additional info from DTD and xml:space attr [#2.10]
// the following call can be supported:
// mHand.ignorableWhitespace(mBuff, 0, (mBuffIdx + 1));
bflash();
default:
back();
st = 3;
break;
}
break;
case 3: // read the text content of the element
switch (ch) {
case '&":
ent('x");
break;
case '<":
bflash();
switch (next()) {
case '/": // the end of the element content
// Check element's open/close tags balance
mBuffIdx = -1;
bname(mIsNSAware);
char[] chars = mElm.chars;
if (chars.length == (mBuffIdx + 1)) {
for (char i = 1; i <= mBuffIdx; i += 1) {
if (chars[i] != mBuff[i])
panic(FAULT);
}
} else {
panic(FAULT);
}
// Skip white spaces before '>'
if (wsskip() != '>")
panic(FAULT);
ch = next();
st = -1;
break;
case '!": // a comment or a CDATA
ch = next();
back();
switch (ch) {
case '-": // must be a comment
comm();
break;
case '[": // must be a CDATA section
cdat();
break;
default:
panic(FAULT);
}
break;
case '?": // processing instruction
pi();
break;
default: // must be the first char of an xml name
back();
elm(); // recursive call
break;
}
mBuffIdx = -1;
if (st != -1)
st = 2;
break;
case '\r": // EOL processing [#2.11]
if (next() != '\n")
back();
bappend('\n");
break;
case EOS:
panic(FAULT);
break;
default:
bappend(ch);
break;
}
break;
default:
panic(FAULT);
}
}
// Report the end of element
if (mIsNSAware == true)
mHand.endElement(mElm.value, mElm.name, "");
else
mHand.endElement("", "", mElm.name);
// Remove the top element tag
mElm = del(mElm);
// Restore the top of the prefix stack
while (mPref != pref) {
mHand.endPrefixMapping(mPref.name);
mPref = del(mPref);
}
|
private java.io.Reader | enc(java.lang.String name, java.io.InputStream is)Sets up the document reader.
// DO NOT CLOSE current reader if any!
if (name.equals("UTF-8"))
return new ReaderUTF8(is);
else if (name.equals("UTF-16LE"))
return new ReaderUTF16(is, 'l");
else if (name.equals("UTF-16BE"))
return new ReaderUTF16(is, 'b");
else
return new InputStreamReader(is, name);
|
private void | ent(char flag)Resoves an entity.
This method resolves built-in and character entity references. It is
also reports external entities to the application.
char ch;
int idx = mBuffIdx + 1;
Input inp = null;
String str = null;
mESt = 0x100; // reset the built-in entity recognizer
bappend('&");
for (short st = 0; st >= 0;) {
ch = (mChIdx < mChLen)? mChars[mChIdx++]: next();
switch (st) {
case 0: // the first character of the entity name
case 1: // read built-in entity name
switch (chtyp(ch)) {
case 'd":
case '.":
case '-":
if (st != 1)
panic(FAULT);
case 'a":
case 'A":
case '_":
case 'X":
bappend(ch);
eappend(ch);
st = 1;
break;
case ':":
if (mIsNSAware != false)
panic(FAULT);
bappend(ch);
eappend(ch);
st = 1;
break;
case ';":
if (mESt < 0x100) {
// The entity is a built-in entity
mBuffIdx = idx - 1;
bappend(mESt);
st = -1;
break;
} else if (mSt == 2) {
// In DTD entity declaration has to resolve character
// entities and include "as is" others. [#4.4.7]
bappend(';");
st = -1;
break;
}
// Convert an entity name to a string
str = new String(mBuff, idx + 1, mBuffIdx - idx);
inp = (Input)mEnt.get(str);
// Restore the buffer offset
mBuffIdx = idx - 1;
if (inp != null) {
if (inp.chars == null) {
// External entity
InputSource is = mHand.resolveEntity(inp.pubid, inp.sysid);
if (is != null) {
push(new Input(BUFFSIZE_READER));
setinp(is);
mInp.pubid = inp.pubid;
mInp.sysid = inp.sysid;
} else {
// Unresolved external entity
bflash();
if (flag != 'x")
panic(FAULT); // unknown entity within marckup
mHand.skippedEntity(str);
}
} else {
// Internal entity
push(inp);
}
} else {
// Unknown or general unparsed entity
bflash();
if (flag != 'x")
panic(FAULT); // unknown entity within marckup
mHand.skippedEntity(str);
}
st = -1;
break;
case '#":
if (st != 0)
panic(FAULT);
st = 2;
break;
default:
panic(FAULT);
}
break;
case 2: // read character entity
switch (chtyp(ch)) {
case 'd":
bappend(ch);
break;
case ';":
// Convert the character entity to a character
try {
int i = Integer.parseInt(
new String(mBuff, idx + 1, mBuffIdx - idx), 10);
if (i >= 0xffff)
panic(FAULT);
ch = (char)i;
} catch (NumberFormatException nfe) {
panic(FAULT);
}
// Restore the buffer offset
mBuffIdx = idx - 1;
if (ch == ' " || mInp.next != null)
bappend(ch, flag);
else
bappend(ch);
st = -1;
break;
case 'a":
// If the entity buffer is empty and ch == 'x'
if ((mBuffIdx == idx) && (ch == 'x")) {
st = 3;
break;
}
default:
panic(FAULT);
}
break;
case 3: // read hex character entity
switch (chtyp(ch)) {
case 'A":
case 'a":
case 'd":
bappend(ch);
break;
case ';":
// Convert the character entity to a character
try {
int i = Integer.parseInt(
new String(mBuff, idx + 1, mBuffIdx - idx), 16);
if (i >= 0xffff)
panic(FAULT);
ch = (char)i;
} catch (NumberFormatException nfe) {
panic(FAULT);
}
// Restore the buffer offset
mBuffIdx = idx - 1;
if (ch == ' " || mInp.next != null)
bappend(ch, flag);
else
bappend(ch);
st = -1;
break;
default:
panic(FAULT);
}
break;
default:
panic(FAULT);
}
}
|
private java.lang.String | eqstr(char flag)Reads an attribute value.
The grammar which this method can read is:
eqstr := S "=" qstr
qstr := S ("'" string "'") |
('"' string '"')
This method resolves entities inside a string unless the parser
parses DTD.
if (flag == '=") {
wsskip();
if (next() != '=")
panic(FAULT);
}
bqstr('-");
return new String(mBuff, 1, mBuffIdx);
|
private Pair | find(Pair chain, char[] qname)Finds a pair in the pair chain by a qualified name.
for (Pair pair = chain; pair != null; pair = pair.next) {
if (pair.eqname(qname) == true)
return pair;
}
return null;
|
public int | getColumnNumber()Return the column number where the current document event ends.
return -1;
|
public int | getLineNumber()Return the line number where the current document event ends.
return -1;
|
public java.lang.String | getPublicId()Return the public identifier for the current document event.
The return value is the public identifier of the document
entity or of the external parsed entity in which the markup
triggering the event appears.
return (mInp != null)? mInp.pubid: null;
|
public java.lang.String | getSystemId()Return the system identifier for the current document event.
The return value is the system identifier of the document
entity or of the external parsed entity in which the markup
triggering the event appears.
If the system identifier is a URL, the parser must resolve it
fully before passing it to the application.
return (mInp != null)? mInp.sysid: null;
|
public boolean | isNamespaceAware()Indicates whether or not this parser is configured to
understand namespaces.
return mIsNSAware;
|
public boolean | isValidating()Indicates whether or not this parser is configured to validate
XML documents.
return false;
|
private boolean | isdecl(Pair name, java.lang.String value)Recognizes and handles a namespace declaration.
This method identifies a type of namespace declaration if any and
puts new mapping on top of prefix stack.
if (name.chars[0] == 0) {
if ("xmlns".equals(name.name) == true) {
// New default namespace declaration
mPref = pair(mPref);
mPref.value = value;
mPref.name = "";
mPref.chars = NONS;
return true;
}
} else {
if (name.eqpref(XMLNS) == true) {
// New prefix declaration
int len = name.name.length();
mPref = pair(mPref);
mPref.value = value;
mPref.name = name.name;
mPref.chars = new char[len + 1];
mPref.chars[0] = (char)(len + 1);
name.name.getChars(0, len, mPref.chars, 1);
return true;
}
}
return false;
|
private java.lang.String | name(boolean ns)Reads a xml name.
The xml name must conform "Namespaces in XML" specification. Therefore
the ':' character is not allowed in the name. This method should be
used for PI and entity names which may not have a namespace according
to the specification mentioned above.
mBuffIdx = -1;
bname(ns);
return new String(mBuff, 1, mBuffIdx);
|
private char | next()Retrives the next character in the document.
if (mChIdx >= mChLen) {
if (mInp.src == null) {
pop(); // remove internal entity
return next();
}
// Read new portion of the document characters
int Num = mInp.src.read(mChars, 0, mChars.length);
if (Num < 0) {
if (mInp != mDoc) {
pop(); // restore the previous input
return next();
} else {
mChars[0] = EOS;
mChLen = 1;
}
}
else
mChLen = Num;
mChIdx = 0;
}
return mChars[mChIdx++];
|
private Pair | pair(Pair next)Provedes an instance of a pair.
Pair pair;
if (mDltd != null) {
pair = mDltd;
mDltd = pair.next;
} else {
pair = new Pair();
}
pair.next = next;
return pair;
|
private void | panic(java.lang.String msg)Notifies the handler about fatal parsing error.
SAXParseException spe = new SAXParseException(msg, this);
mHand.fatalError(spe);
throw spe; // [#1.2] fatal error definition
|
private void | parse(org.xml.sax.helpers.DefaultHandler handler)Parse the XML document content using the specified
{@link org.xml.sax.helpers.DefaultHandler}.
try {
// Initialize the parser
mPEnt = new Hashtable();
mEnt = new Hashtable();
mDoc = mInp; // current input is document entity
mChars = mInp.chars; // use document entity buffer
// Parse an xml document
char ch;
mHand.setDocumentLocator(this);
mHand.startDocument();
mSt = 1;
while ((ch = next()) != EOS) {
switch (chtyp(ch)) {
case '<":
ch = next();
switch (ch) {
case '?":
pi();
break;
case '!":
ch = next();
back();
if (ch == '-")
comm();
else
dtd();
break;
default: // must be the first char of an xml name
if (mSt == 5) // misc after document's element
panic(FAULT);
// Document's element.
back();
mSt = 4; // document's element
elm();
mSt = 5; // misc after document's element
break;
}
break;
case ' ":
// Skip white spaces
break;
default:
panic(FAULT);
}
}
if (mSt != 5) // misc after document's element
panic(FAULT);
} finally {
mHand.endDocument();
while (mAttL != null) {
while (mAttL.list != null) {
if (mAttL.list.list != null)
del(mAttL.list.list);
mAttL.list = del(mAttL.list);
}
mAttL = del(mAttL);
}
while (mElm != null)
mElm = del(mElm);
while (mPref != mXml)
mPref = del(mPref);
while (mInp != null)
pop();
if ((mDoc != null) && (mDoc.src != null)) {
try { mDoc.src.close(); } catch (IOException ioe) {}
}
mPEnt = null;
mEnt = null;
mDoc = null;
mHand = null;
mSt = 0;
}
|
public void | parse(java.io.InputStream src, org.xml.sax.helpers.DefaultHandler handler)Parse the content of the given {@link java.io.InputStream}
instance as XML using the specified
{@link org.xml.sax.helpers.DefaultHandler}.
if ((src == null) || (handler == null))
throw new IllegalArgumentException("");
parse(new InputSource(src), handler);
|
public void | parse(org.xml.sax.InputSource is, org.xml.sax.helpers.DefaultHandler handler)Parse the content given {@link org.xml.sax.InputSource}
as XML using the specified
{@link org.xml.sax.helpers.DefaultHandler}.
if ((is == null) || (handler == null))
throw new IllegalArgumentException("");
// Set up the handler
mHand = handler;
// Set up the document
mInp = new Input(BUFFSIZE_READER);
setinp(is);
parse(handler);
|
private void | pent(char flag)Resoves a parameter entity.
This method resolves a parameter entity references. It is also reports
external entities to the application.
char ch;
int idx = mBuffIdx + 1;
Input inp = null;
String str = null;
bappend('%");
if (mSt != 2) // the DTD internal subset
return; // Not Recognized [#4.4.1]
// Read entity name
bname(false);
str = new String(mBuff, idx + 2, mBuffIdx - idx - 1);
if (next() != ';")
panic(FAULT);
inp = (Input)mPEnt.get(str);
// Restore the buffer offset
mBuffIdx = idx - 1;
if (inp != null) {
if (inp.chars == null) {
// External parameter entity
InputSource is = mHand.resolveEntity(inp.pubid, inp.sysid);
if (is != null) {
if (flag != '-")
bappend(' "); // tail space
push(new Input(BUFFSIZE_READER));
// Need revisit: there is no leading space! [#4.4.8]
setinp(is);
mInp.pubid = inp.pubid;
mInp.sysid = inp.sysid;
} else {
// Unresolved external parameter entity
mHand.skippedEntity("%" + str);
}
} else {
// Internal parameter entity
if (flag == '-") {
// No surrounding spaces
inp.chIdx = 1;
} else {
// Insert surrounding spaces
bappend(' "); // tail space
inp.chIdx = 0;
}
push(inp);
}
} else {
// Unknown parameter entity
mHand.skippedEntity("%" + str);
}
|
private void | pi()Parses a processing instruction.
char ch;
String str = null;
mBuffIdx = -1;
for (short st = 0; st >= 0;) {
ch = next();
switch (st) {
case 0: // read the PI target name
switch (chtyp(ch)) {
case 'a":
case 'A":
case '_":
case ':":
case 'X":
back();
str = name(false);
// PI target name may not be empty string [#2.6]
// PI target name 'XML' is reserved [#2.6]
if ((str.length() == 0) ||
(mXml.name.equals(str.toLowerCase()) == true))
panic(FAULT);
// This is processing instruction
if (mSt == 0) // the begining of the document
mSt = 1; // misc before DTD
wsskip(); // skip spaces after the PI target name
st = 1; // accumulate the PI body
mBuffIdx = -1;
break;
case 'Z": // EOS
panic(FAULT);
break;
default:
panic(FAULT);
}
break;
case 1: // accumulate the PI body
switch (ch) {
case '?":
st = 2; // end of the PI body
break;
case EOS:
panic(FAULT);
break;
default:
bappend(ch);
break;
}
break;
case 2: // end of the PI body
switch (ch) {
case '>":
// PI has been read.
mHand.processingInstruction(
str, new String(mBuff, 0, mBuffIdx + 1));
st = -1;
break;
case '?":
bappend('?");
break;
case EOS:
panic(FAULT);
break;
default:
bappend('?");
bappend(ch);
st = 1; // accumulate the PI body
break;
}
break;
default:
panic(FAULT);
}
}
|
private void | pop()Restores previous input on the top of the input stack.
if (mInp.src != null) {
try { mInp.src.close(); } catch (IOException ioe) {}
mInp.src = null;
}
mInp = mInp.next;
if (mInp != null) {
mChars = mInp.chars;
mChLen = mInp.chLen;
mChIdx = mInp.chIdx;
} else {
mChars = null;
mChLen = 0;
mChIdx = 0;
}
|
private void | pubsys(Input inp)Reads the public or/and system identifiers.
Pair pair = pubsys(' ");
inp.pubid = pair.name;
inp.sysid = pair.value;
del(pair);
|
private Pair | pubsys(char flag)Reads the public or/and system identifiers.
Pair ids = pair(null);
String str = name(false);
if ("PUBLIC".equals(str) == true) {
bqstr('i"); // non-CDATA normalization [#4.2.2]
ids.name = new String(mBuff, 1, mBuffIdx);
switch (wsskip()) {
case '\"":
case '\'":
bqstr(' ");
ids.value = new String(mBuff, 1, mBuffIdx);
break;
default:
if (flag != 'N") // [#4.7]
panic(FAULT);
ids.value = null;
break;
}
return ids;
} else if ("SYSTEM".equals(str) == true) {
ids.name = null;
bqstr(' ");
ids.value = new String(mBuff, 1, mBuffIdx);
return ids;
}
panic(FAULT);
return null;
|
private void | push(Input inp)Sets up current input on the top of the input stack.
mInp.chLen = mChLen;
mInp.chIdx = mChIdx;
inp.next = mInp;
mInp = inp;
mChars = inp.chars;
mChLen = inp.chLen;
mChIdx = inp.chIdx;
|
private char[] | qname(boolean ns)Reads a qualified xml name.
The characters of a qualified name is an array of characters. The
first (chars[0]) character is the index of the colon character which
separates the prefix from the local name. If the index is zero, the
name does not contain separator or the parser works in the namespace
unaware mode. The length of qualified name is the length of the array
minus one.
mBuffIdx = -1;
bname(ns);
char chars[] = new char[mBuffIdx + 1];
System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1);
return chars;
|
private java.lang.String | rslv(char[] qname)Resolves a prefix.
for (Pair pref = mPref; pref != null; pref = pref.next) {
if (pref.eqpref(qname) == true)
return pref.value;
}
if (qname[0] == 1) { // QNames like ':local'
for (Pair pref = mPref; pref != null; pref = pref.next) {
if (pref.chars[0] == 0)
return pref.value;
}
}
panic(FAULT);
return null;
|
private void | setch(char ch)Sets the current character.
mChars[mChIdx] = ch;
|
private void | setinp(org.xml.sax.InputSource is)Sets up a new input source on the top of the input stack.
Note, the first byte returned by the entity's byte stream has to be the
first byte in the entity. However, the parser does not expect the byte
order mask in both cases when encoding is provided by the input source.
Reader reader = null;
mChIdx = 0;
mChLen = 0;
mChars = mInp.chars;
mInp.src = null;
if (mSt == 0)
mIsSAlone = false; // default [#2.9]
if (is.getCharacterStream() != null) {
// Ignore encoding in the xml text decl.
reader = is.getCharacterStream();
xml(reader);
} else if (is.getByteStream() != null) {
String expenc;
if (is.getEncoding() != null) {
// Ignore encoding in the xml text decl.
expenc = is.getEncoding().toUpperCase();
if (expenc.equals("UTF-16"))
reader = bom(is.getByteStream(), 'U"); // UTF-16 [#4.3.3]
else
reader = enc(expenc, is.getByteStream());
xml(reader);
} else {
// Get encoding from BOM or the xml text decl.
reader = bom(is.getByteStream(), ' ");
if (reader == null) {
// Encoding is defined by the xml text decl.
reader = enc("UTF-8", is.getByteStream());
expenc = xml(reader);
if (expenc.startsWith("UTF-16"))
panic(FAULT); // UTF-16 must have BOM [#4.3.3]
reader = enc(expenc, is.getByteStream());
} else {
// Encoding is defined by the BOM.
xml(reader);
}
}
} else {
// There is no support for public/system identifiers.
panic(FAULT);
}
mInp.src = reader;
mInp.pubid = is.getPublicId();
mInp.sysid = is.getSystemId();
|
private char | wsskip()Skips xml white space characters.
This method skips white space characters (' ', '\t', '\n', '\r') and
looks ahead not white space character.
char ch;
char type;
while (true) {
// Read next character
ch = (mChIdx < mChLen)? mChars[mChIdx++]: next();
type = (char)0; // [X]
if (ch < 0x80) {
type = (char)nmttyp[ch];
} else if (ch == EOS) {
panic(FAULT);
}
if (type != 3) { // [ \t\n\r]
mChIdx--; // back();
return ch;
}
}
|
private java.lang.String | xml(java.io.Reader reader)Parses the xml text declaration.
This method gets encoding from the xml text declaration [#4.3.1] if any.
The method assumes the buffer (mChars) is big enough to accomodate whole
xml text declaration.
String str = null;
String enc = "UTF-8";
char ch;
int val;
short st;
// Read the xml text declaration into the buffer
if (mChIdx != 0) {
// The bom method have read ONE char into the buffer.
st = (short)((mChars[0] == '<")? 1: -1);
} else {
st = 0;
}
while (st >= 0 && mChIdx < mChars.length) {
ch = ((val = reader.read()) >= 0)? (char)val: EOS;
mChars[mChIdx++] = ch;
switch (st) {
case 0: // read '<' of xml declaration
switch (ch) {
case '<":
st = 1;
break;
case 0xfeff: // the byte order mask
ch = ((val = reader.read()) >= 0)? (char)val: EOS;
mChars[mChIdx - 1] = ch;
st = (short)((ch == '<")? 1: -1);
break;
default:
st = -1;
break;
}
break;
case 1: // read '?' of xml declaration [#4.3.1]
st = (short)((ch == '?")? 2: -1);
break;
case 2: // read 'x' of xml declaration [#4.3.1]
st = (short)((ch == 'x")? 3: -1);
break;
case 3: // read 'm' of xml declaration [#4.3.1]
st = (short)((ch == 'm")? 4: -1);
break;
case 4: // read 'l' of xml declaration [#4.3.1]
st = (short)((ch == 'l")? 5: -1);
break;
case 5: // read white space after 'xml'
switch (ch) {
case ' ":
case '\t":
case '\r":
case '\n":
st = 6;
break;
default:
st = -1;
break;
}
break;
case 6: // read content of xml declaration
switch (ch) {
case '?":
st = 7;
break;
case EOS:
st = -2;
break;
default:
break;
}
break;
case 7: // read '>' after '?' of xml declaration
switch (ch) {
case '>":
case EOS:
st = -2;
break;
default:
st = 6;
break;
}
break;
default:
panic(FAULT);
break;
}
}
mChLen = mChIdx;
mChIdx = 0;
// If there is no xml text declaration, the encoding is default.
if (st == -1) {
return enc;
}
mChIdx = 5; // the first white space after "<?xml"
// Parse the xml text declaration
for (st = 0; st >= 0;) {
ch = next();
switch (st) {
case 0: // skip spaces after the xml declaration name
if (chtyp(ch) != ' ") {
back();
st = 1;
}
break;
case 1: // read xml declaration version
case 2: // read xml declaration encoding or standalone
case 3: // read xml declaration standalone
switch (chtyp(ch)) {
case 'a":
case 'A":
case '_":
back();
str = name(false).toLowerCase();
if ("version".equals(str) == true) {
if (st != 1)
panic(FAULT);
if ("1.0".equals(eqstr('=")) != true)
panic(FAULT);
st = 2;
} else if ("encoding".equals(str) == true) {
if (st != 2)
panic(FAULT);
enc = eqstr('=").toUpperCase();
st = 3;
} else if ("standalone".equals(str) == true) {
if ((st == 1) || (mSt != 0)) // [#4.3.1]
panic(FAULT);
str = eqstr('=").toLowerCase();
// Check the 'standalone' value and use it
if (str.equals("yes") == true) {
mIsSAlone = true;
} else if (str.equals("no") == true) {
mIsSAlone = false;
} else {
panic(FAULT);
}
st = 4;
} else {
panic(FAULT);
}
break;
case ' ":
break;
case '?":
if (st == 1)
panic(FAULT);
back();
st = 4;
break;
default:
panic(FAULT);
}
break;
case 4: // end of xml declaration
switch (chtyp(ch)) {
case '?":
if (next() != '>")
panic(FAULT);
if (mSt == 0) // the begining of the document
mSt = 1; // misc before DTD
st = -1;
break;
case ' ":
break;
default:
panic(FAULT);
}
break;
default:
panic(FAULT);
}
}
return enc;
|