File Doc Category Size Date Package
Parser.java API Doc phoneME MR2 API (J2ME) 75389 Wed May 02 18:00:34 BST 2007 com.sun.ukit.jaxp

Parser

java.lang.Object
- javax.xml.parsers.SAXParser

public final class Parser extends SAXParser implements Locator

XML non-validating parser. This non-validating parser conforms to Extensible Markup Language (XML) 1.0 and "Namespaces in XML" specifications. The API used by the parser is JSR-172 subset of JAXP and SAX2.

see: org.xml.sax.helpers.DefaultHandler

Fields Summary
public static final String
FAULT
private static final int
BUFFSIZE_READER
private static final int
BUFFSIZE_PARSER
public static final char
EOS
The end of stream character.
private Pair
mNoNS
private Pair
mXml
private DefaultHandler
mHand
private Hashtable
mEnt
private Hashtable
mPEnt
private boolean
mIsSAlone
private boolean
mIsNSAware
private short
mSt
private char
mESt
private char[]
mBuff
private int
mBuffIdx
private Pair
mPref
private Pair
mElm
private Pair
mAttL
private Input
mInp
private Input
mDoc
private char[]
mChars
private int
mChLen
private int
mChIdx
private Attrs
mAttrs
private String[]
mItems
private char
mAttrIdx
private Pair
mDltd
private static final char[]
NONS
Default prefixes
private static final char[]
XML
private static final char[]
XMLNS
private static final byte[]
asctyp
ASCII character type array. This array maps an ASCII (7 bit) character to the character type.
Possible character type values are:
- ' ' for any kind of white space character;
- 'a' for any lower case alphabetical character value;
- 'A' for any upper case alphabetical character value;
- 'd' for any decimal digit character value;
- 'z' for any character less then ' ' except '\t', '\n', '\r';
An ASCII (7 bit) character which does not fall in any category listed above is mapped to it self.
private static final byte[]
nmttyp
NMTOKEN character type array. This array maps an ASCII (7 bit) character to the character type.
Possible character type values are:
- 0 for underscore ('_') or any lower and upper case alphabetical character value;
- 1 for colon (':') character;
- 2 for dash ('-') and dot ('.') or any decimal digit character value;
- 3 for any kind of white space character
An ASCII (7 bit) character which does not fall in any category listed above is mapped to 0xff.
Constructors Summary
public Parser(boolean nsaware)
Constructor.
short i = 0; asctyp = new byte[0x80]; while (i < ' ") asctyp[i++] = (byte)'z"; asctyp['\t"] = (byte)' "; asctyp['\r"] = (byte)' "; asctyp['\n"] = (byte)' "; while (i < '0") asctyp[i] = (byte)i++; while (i <= '9") asctyp[i++] = (byte)'d"; while (i < 'A") asctyp[i] = (byte)i++; while (i <= 'Z") asctyp[i++] = (byte)'A"; while (i < 'a") asctyp[i] = (byte)i++; while (i <= 'z") asctyp[i++] = (byte)'a"; while (i < 0x80) asctyp[i] = (byte)i++; nmttyp = new byte[0x80]; for (i = 0; i < '0"; i++) nmttyp[i] = (byte)0xff; while (i <= '9") nmttyp[i++] = (byte)2; // digits while (i < 'A") nmttyp[i++] = (byte)0xff; // skiped upper case alphabetical character are already 0 for (i = '["; i < 'a"; i++) nmttyp[i] = (byte)0xff; // skiped lower case alphabetical character are already 0 for (i = '{"; i < 0x80; i++) nmttyp[i] = (byte)0xff; nmttyp['_"] = 0; nmttyp[':"] = 1; nmttyp['."] = 2; nmttyp['-"] = 2; nmttyp[' "] = 3; nmttyp['\t"] = 3; nmttyp['\r"] = 3; nmttyp['\n"] = 3; super(); mIsNSAware = nsaware; // Initialize the parser mBuff = new char[BUFFSIZE_PARSER]; mAttrs = new Attrs(); // Default namespace mPref = pair(mPref); mPref.name = ""; mPref.value = ""; mPref.chars = NONS; mNoNS = mPref; // no namespace // XML namespace mPref = pair(mPref); mPref.name = "xml"; mPref.value = "http://www.w3.org/XML/1998/namespace"; mPref.chars = XML; mXml = mPref; // XML namespace
Methods Summary
private void attr(Pair att)
Parses an attribute. This recursive method is responsible for prefix addition (mPref) and prefix mapping reports on the way down. The element's start tag end triggers the return process. The method then on it's way back resolves prefixes and accumulates attributes.
Note that this method will not report namespace declaration attributes (xmlns* attributes), the {@link DefaultHandler#startPrefixMapping startPrefixMapping} method is invoked instead.
param
att An object which reprecents current attribute.
exception
SAXException
exception
IOException
Pair next = null; char norm = 'c"; // CDATA-type normalization by default [#3.3.3] String val; String type; try { switch (wsskip()) { case '/": case '>": // Go through all defined attributes on current tag to // find defaults for (Pair def = att.list; def != null; def = def.next) { if (def.list != null) { // Attribut definition with default value Pair act = att.next; while (act != null) { if (act.eqname(def.chars) == true) break; act = act.next; } if (act == null) { // Add default attribute push(new Input(def.list.chars)); attr(att); return; } } } // Ensure the attribute string array capacity mAttrs.setLength(mAttrIdx); mItems = mAttrs.mItems; return; default: // Read the attribute name and value att.chars = qname(mIsNSAware); att.name = att.local(); type = "CDATA"; if (att.list != null) { Pair attr = find(att.list, att.chars); if (attr != null) { switch (attr.id) { case 'i": type = "ID"; norm = 'i"; break; case 'r": type = "IDREF"; norm = 'i"; break; case 'R": type = "IDREFS"; norm = 'i"; break; case 'n": type = "ENTITY"; norm = 'i"; break; case 'N": type = "ENTITIES"; norm = 'i"; break; case 't": type = "NMTOKEN"; norm = 'i"; break; case 'T": type = "NMTOKENS"; norm = 'i"; break; case 'u": type = "NMTOKEN"; norm = 'i"; break; case 'o": type = "NOTATION"; norm = 'i"; break; case 'c": norm = 'c"; break; default: panic(FAULT); break; } } } wsskip(); if (next() != '=") panic(FAULT); bqstr(norm); // read the value with normalization. val = new String(mBuff, 1, mBuffIdx); // Put a namespace declaration on top of the prefix stack if ((mIsNSAware == false) || (isdecl(att, val) == false)) { // An ordinary attribute mAttrIdx++; // Recursive call to parse the next attribute next = pair(att); next.list = att.list; attr(next); mAttrIdx--; // Add the attribute to the attributes string array char idx = (char)(mAttrIdx << 3); mItems[idx + 1] = att.qname(); // attr qname mItems[idx + 2] = (mIsNSAware)? att.name: ""; // attr local name mItems[idx + 3] = val; // attr value mItems[idx + 4] = type; // attr type // Resolve the prefix if any and report the attribute // NOTE: The attribute does not accept the default namespace. mItems[idx + 0] = (att.chars[0] != 0)? rslv(att.chars): ""; } else { // A namespace declaration // Report a start of the new mapping mHand.startPrefixMapping(mPref.name, mPref.value); // Recursive call to parse the next attribute next = pair(att); next.list = att.list; attr(next); // NOTE: The namespace declaration is not reported. } break; } } finally { if (next != null) del(next); }
private void back()
Puts back the last read character. This method MUST NOT be called more then once after each call of {@link #next next} method.
if(mChIdx <= 0) panic(FAULT); mChIdx--;
private void bappend(char ch, char mode)
Appends a character to parser's buffer with normalization.
param
ch The character to append to the buffer.
param
mode The normalization mode.
// This implements attribute value normalization as // described in the XML specification [#3.3.3]. switch (mode) { case 'i": // non CDATA normalization switch (ch) { case ' ": case '\n": case '\r": case '\t": if ((mBuffIdx > 0) && (mBuff[mBuffIdx] != ' ")) bappend(' "); return; default: break; } break; case 'c": // CDATA normalization switch (ch) { case '\n": case '\r": case '\t": ch = ' "; break; default: break; } break; default: // no normalization break; } mBuffIdx++; if (mBuffIdx < mBuff.length) { mBuff[mBuffIdx] = ch; } else { mBuffIdx--; bappend(ch); }
private void bappend(char ch)
Appends a character to parser's buffer.
param
ch The character to append to the buffer.
try { mBuff[++mBuffIdx] = ch; } catch (Exception exp) { // Double the buffer size char buff[] = new char[mBuff.length << 1]; System.arraycopy(mBuff, 0, buff, 0, mBuff.length); mBuff = buff; mBuff[mBuffIdx] = ch; }
private void bcopy(int cidx, int bidx)
Appends (mChIdx - cidx) characters from character buffer (mChars) to parser's buffer (mBuff).
param
cidx The character buffer (mChars) start index.
param
bidx The parser buffer (mBuff) start index.
int length = mChIdx - cidx; if ((bidx + length + 1) >= mBuff.length) { // Expand the buffer char buff[] = new char[mBuff.length + length]; System.arraycopy(mBuff, 0, buff, 0, mBuff.length); mBuff = buff; } System.arraycopy(mChars, cidx, mBuff, bidx, length); mBuffIdx += length;
private void bflash()
Reports characters and empties the parser's buffer.
if (mBuffIdx >= 0) { // Textual data has been read mHand.characters(mBuff, 0, (mBuffIdx + 1)); mBuffIdx = -1; }
private char bkeyword()
Recognizes a keyword. This is low level routine which recognizes one of keywords in the buffer. Keyword Id ID - i IDREF - r IDREFS - R ENTITY - n ENTITIES - N NMTOKEN - t NMTOKENS - T ELEMENT - e ATTLIST - a NOTATION - o CDATA - c REQUIRED - Q IMPLIED - I FIXED - F
return
an id of a keyword or '?'.
exception
SAXException When incorrect character appear in the name.
exception
IOException
String str = new String(mBuff, 1, mBuffIdx); switch (str.length()) { case 2: // ID return ("ID".equals(str) == true)? 'i": '?"; case 5: // IDREF, CDATA, FIXED switch (mBuff[1]) { case 'I": return ("IDREF".equals(str) == true)? 'r": '?"; case 'C": return ("CDATA".equals(str) == true)? 'c": '?"; case 'F": return ("FIXED".equals(str) == true)? 'F": '?"; default: break; } break; case 6: // IDREFS, ENTITY switch (mBuff[1]) { case 'I": return ("IDREFS".equals(str) == true)? 'R": '?"; case 'E": return ("ENTITY".equals(str) == true)? 'n": '?"; default: break; } break; case 7: // NMTOKEN, IMPLIED, ATTLIST, ELEMENT switch (mBuff[1]) { case 'I": return ("IMPLIED".equals(str) == true)? 'I": '?"; case 'N": return ("NMTOKEN".equals(str) == true)? 't": '?"; case 'A": return ("ATTLIST".equals(str) == true)? 'a": '?"; case 'E": return ("ELEMENT".equals(str) == true)? 'e": '?"; default: break; } break; case 8: // ENTITIES, NMTOKENS, NOTATION, REQUIRED switch (mBuff[2]) { case 'N": return ("ENTITIES".equals(str) == true)? 'N": '?"; case 'M": return ("NMTOKENS".equals(str) == true)? 'T": '?"; case 'O": return ("NOTATION".equals(str) == true)? 'o": '?"; case 'E": return ("REQUIRED".equals(str) == true)? 'Q": '?"; default: break; } break; default: break; } return '?";
private void bname(boolean ns)
Reads a qualified xml name. This is low level routine which leaves a qName in the buffer. The characters of a qualified name is an array of characters. The first (chars[0]) character is the index of the colon character which separates the prefix from the local name. If the index is zero, the name does not contain separator or the parser works in the namespace unaware mode. The length of qualified name is the length of the array minus one.
param
ns The true value turns namespace conformance on.
exception
SAXException When incorrect character appear in the name.
exception
IOException
char ch; char type; mBuffIdx++; // allocate a char for colon offset int bqname = mBuffIdx; int bcolon = bqname; int bchidx = bqname + 1; int bstart = bchidx; int cstart = mChIdx; short st = (short)((ns == true)? 0: 2); while (true) { // Read next character if (mChIdx >= mChLen) { bcopy(cstart, bstart); next(); mChIdx--; // back(); cstart = mChIdx; bstart = bchidx; } ch = mChars[mChIdx++]; type = (char)0; // [X] if (ch < 0x80) { type = (char)nmttyp[ch]; } else if (ch == EOS) { panic(FAULT); } // Parse QName switch (st) { case 0: // read the first char of the prefix case 2: // read the first char of the suffix switch (type) { case 0: // [aA_X] bchidx++; // append char to the buffer st++; // (st == 0)? 1: 3; break; case 1: // [:] mChIdx--; // back(); st++; // (st == 0)? 1: 3; break; default: panic(FAULT); } break; case 1: // read the prefix case 3: // read the suffix switch (type) { case 0: // [aA_X] case 2: // [.-d] bchidx++; // append char to the buffer break; case 1: // [:] bchidx++; // append char to the buffer if (ns == true) { if (bcolon != bqname) panic(FAULT); // it must be only one colon bcolon = bchidx - 1; if (st == 1) st = 2; } break; default: mChIdx--; // back(); bcopy(cstart, bstart); mBuff[bqname] = (char)(bcolon - bqname); return; } break; default: panic(FAULT); } }
private void bntok()
Reads a nmtoken. This is low level routine which leaves a nmtoken in the buffer.
exception
SAXException When incorrect character appear in the name.
exception
IOException
char ch; mBuffIdx = -1; bappend((char)0); // default offset to the colon char while (true) { ch = next(); switch (chtyp(ch)) { case 'a": case 'A": case 'd": case '.": case ':": case '-": case '_": case 'X": bappend(ch); break; default: back(); return; } }
private java.io.Reader bom(java.io.InputStream is, char hint)
Determines the entity encoding. This method gets encoding from Byte Order Mask [#4.3.3] if any. Note, the first byte returned by the entity's byte stream has to be the first byte in the entity. Also, there is no support for UCS-4.
param
is A byte stream of the entity.
param
hint An encoding hint, character U means UTF-16.
return
a reader constructed from the BOM or UTF-8 by default.
exception
SAXException
exception
IOException
int val = is.read(); switch (val) { case 0xef: // UTF-8 if (hint == 'U") // must be UTF-16 panic(FAULT); if (is.read() != 0xbb) panic(FAULT); if (is.read() != 0xbf) panic(FAULT); return new ReaderUTF8(is); case 0xfe: // UTF-16, big-endian if (is.read() != 0xff) panic(FAULT); return new ReaderUTF16(is, 'b"); case 0xff: // UTF-16, little-endian if (is.read() != 0xfe) panic(FAULT); return new ReaderUTF16(is, 'l"); case -1: mChars[mChIdx++] = EOS; return new ReaderUTF8(is); default: if (hint == 'U") // must be UTF-16 panic(FAULT); // Read the rest of UTF-8 character switch (val & 0xf0) { case 0xc0: case 0xd0: mChars[mChIdx++] = (char)(((val & 0x1f) << 6) | (is.read() & 0x3f)); break; case 0xe0: mChars[mChIdx++] = (char)(((val & 0x0f) << 12) | ((is.read() & 0x3f) << 6) | (is.read() & 0x3f)); break; case 0xf0: // UCS-4 character throw new UnsupportedEncodingException(); default: mChars[mChIdx++] = (char)val; break; } return null; }
private void bqstr(char flag)
Reads a single or double quotted string in to the buffer. This method resolves entities inside a string unless the parser parses DTD.
param
flag 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; '-' - not an attribute value; 'd' - in DTD context.
exception
SAXException
exception
IOException
Input inp = mInp; // remember the original input mBuffIdx = -1; bappend((char)0); // default offset to the colon char char ch; for (short st = 0; st >= 0;) { ch = (mChIdx < mChLen)? mChars[mChIdx++]: next(); switch (st) { case 0: // read a single or double quote switch (ch) { case ' ": case '\n": case '\r": case '\t": break; case '\'": st = 2; // read a single quoted string break; case '\"": st = 3; // read a double quoted string break; default: panic(FAULT); break; } break; case 2: // read a single quoted string case 3: // read a double quoted string switch (ch) { case '\'": if ((st == 2) && (mInp == inp)) st = -1; else bappend(ch); break; case '\"": if ((st == 3) && (mInp == inp)) st = -1; else bappend(ch); break; case '&": if (flag != 'd") ent(flag); else bappend(ch); break; case '%": if (flag == 'd") pent('-"); else bappend(ch); break; case '<": if ((flag == '-") || (flag == 'd")) bappend(ch); else panic(FAULT); break; case EOS: // EOS before single/double quote panic(FAULT); case '\r": // EOL processing [#2.11 & #3.3.3] if (flag != ' " && mInp.next == null) { if (next() != '\n") back(); ch = '\n"; } default: bappend(ch, flag); break; } break; default: panic(FAULT); } } // There is maximum one space at the end of the string in // i-mode (non CDATA normalization) and it has to be removed. if ((flag == 'i") && (mBuff[mBuffIdx] == ' ")) mBuffIdx -= 1;
private void cdat()
Parses a character data.
exception
SAXException
exception
IOException
char ch; mBuffIdx = -1; for (short st = 0; st >= 0;) { ch = next(); switch (st) { case 0: // the first '[' of the CDATA open if (ch == '[") st = 1; else panic(FAULT); break; case 1: // read "CDATA" if (chtyp(ch) == 'A") { bappend(ch); } else { if ("CDATA".equals( new String(mBuff, 0, mBuffIdx + 1)) != true) panic(FAULT); back(); st = 2; } break; case 2: // the second '[' of the CDATA open if (ch != '[") panic(FAULT); mBuffIdx = -1; st = 3; break; case 3: // read data before the first ']' if (ch != ']") bappend(ch); else st = 4; break; case 4: // read the second ']' or continue to read the data if (ch != ']") { bappend(']"); bappend(ch); st = 3; } else { st = 5; } break; case 5: // read '>' or continue to read the data switch (ch) { case ']": bappend(']"); break; case '>": bflash(); st = -1; break; default: bappend(']"); bappend(']"); bappend(ch); st = 3; break; } break; default: panic(FAULT); } }
private char chtyp(char ch)
Maps a character to it's type. Possible character type values are:
- ' ' for any kind of white space character;
- 'a' for any lower case alphabetical character value;
- 'A' for any upper case alphabetical character value;
- 'd' for any decimal digit character value;
- 'z' for any character less then ' ' except '\t', '\n', '\r';
- 'X' for any not ASCII character;
- 'Z' for EOS character.
An ASCII (7 bit) character which does not fall in any category listed above is mapped to it self.
param
ch The character to map.
return
The type of character.
exception
SAXException When End Of Stream character typed.
if (ch < 0x80) return (char)asctyp[ch]; return (ch != EOS)? 'X": 'Z";
private void comm()
Parses a comment.
exception
org.xml.SAXException
exception
java.io.IOException
if (mSt == 0) mSt = 1; // misc before DTD char ch; for (short st = 0; st >= 0;) { ch = (mChIdx < mChLen)? mChars[mChIdx++]: next(); switch (st) { case 0: // first '-' of the comment open if (ch == '-") st = 1; else panic(FAULT); break; case 1: // secind '-' of the comment open if (ch == '-") st = 2; else panic(FAULT); break; case 2: // skip the comment body switch (ch) { case '-": st = 3; break; case EOS: panic(FAULT); break; default: break; } break; case 3: // second '-' of the comment close st = (ch == '-")? (short)4: (short)2; break; case 4: // '>' of the comment close if (ch == '>") st = -1; else panic(FAULT); break; default: panic(FAULT); } }
private Pair del(Pair pair)
Deletes an instance of a pair.
param
pair The pair to delete.
return
A reference to the next pair in a chain.
Pair next = pair.next; pair.name = null; pair.value = null; pair.chars = null; pair.list = null; pair.next = mDltd; mDltd = pair; return next;
private void dtd()
Parses the document type declaration.
exception
SAXException
exception
IOException
char ch; String str = null; String name = null; Pair psid = null; // read 'DOCTYPE' if ("DOCTYPE".equals(name(false)) != true) panic(FAULT); mSt = 2; // DTD for (short st = 0; st >= 0;) { ch = next(); switch (st) { case 0: // read the document type name if (chtyp(ch) != ' ") { back(); name = name(mIsNSAware); wsskip(); st = 1; // read 'PUPLIC' or 'SYSTEM' } break; case 1: // read 'PUPLIC' or 'SYSTEM' switch (chtyp(ch)) { case 'A": back(); psid = pubsys(' "); st = 2; // skip spaces before internal subset break; case '[": back(); st = 2; // skip spaces before internal subset break; case '>": back(); st = 3; // skip spaces after internal subset break; default: panic(FAULT); } break; case 2: // skip spaces before internal subset switch (chtyp(ch)) { case '[": // Process internal subset dtdsub(); st = 3; // skip spaces after internal subset break; case '>": // There is no internal subset back(); st = 3; // skip spaces after internal subset break; case ' ": // skip white spaces break; default: panic(FAULT); } break; case 3: // skip spaces after internal subset switch (chtyp(ch)) { case '>": if (psid != null) { // Report the DTD external subset InputSource is = mHand.resolveEntity(psid.name, psid.value); if (is != null) { if (mIsSAlone == false) { // Set the end of DTD external subset char back(); setch(']"); // Set the DTD external subset InputSource push(new Input(BUFFSIZE_READER)); setinp(is); mInp.pubid = psid.name; mInp.sysid = psid.value; // Parse the DTD external subset dtdsub(); } else { // Unresolved DTD external subset mHand.skippedEntity("[dtd]"); // Release reader and stream if (is.getCharacterStream() != null) { try { is.getCharacterStream().close(); } catch (IOException ioe) { } } if (is.getByteStream() != null) { try { is.getByteStream().close(); } catch (IOException ioe) { } } } } else { // Unresolved DTD external subset mHand.skippedEntity("[dtd]"); } del(psid); } st = -1; // end of DTD break; case ' ": // skip white spaces break; default: panic(FAULT); } break; default: panic(FAULT); } } mSt = 3; // misc after DTD
private void dtdatt(Pair elm)
Parses an attribute declaration. The attribut uses the following fields of Pair object: chars - characters of qualified name id - the type identifier of the attribute list - a pair which holds the default value (chars field)
param
elm An object which reprecents all defined attributes on an element.
exception
SAXException
exception
IOException
char attqn[] = null; Pair att = null; char ch; for (short st = 0; st >= 0;) { ch = next(); switch (st) { case 0: // the attribute name switch (chtyp(ch)) { case 'a": case 'A": case '_": case 'X": case ':": back(); // Get the attribut from the list or add a new one. attqn = qname(mIsNSAware); att = find(elm.list, attqn); if (att == null) { // New attribute declaration att = pair(elm.list); att.chars = attqn; elm.list = att; } else { // Do not override the attribute declaration [#3.3] att = pair(null); att.chars = attqn; att.id = 'c"; } wsskip(); st = 1; break; case '%": pent(' "); break; case ' ": break; default: panic(FAULT); break; } break; case 1: // the attribute type switch (chtyp(ch)) { case '(": att.id = 'u"; // enumeration type st = 2; // read the first element of the list break; case '%": pent(' "); break; case ' ": break; default: back(); bntok(); // read type id att.id = bkeyword(); switch (att.id) { case 'o": // NOTATION if (wsskip() != '(") panic(FAULT); ch = next(); st = 2; // read the first element of the list break; case 'i": // ID case 'r": // IDREF case 'R": // IDREFS case 'n": // ENTITY case 'N": // ENTITIES case 't": // NMTOKEN case 'T": // NMTOKENS case 'c": // CDATA wsskip(); st = 4; // read default declaration break; default: panic(FAULT); break; } break; } break; case 2: // read the first element of the list switch (chtyp(ch)) { case 'a": case 'A": case 'd": case '.": case ':": case '-": case '_": case 'X": back(); switch (att.id) { case 'u": // enumeration type bntok(); break; case 'o": // NOTATION mBuffIdx = -1; bname(false); break; default: panic(FAULT); break; } wsskip(); st = 3; // read next element of the list break; case '%": pent(' "); break; case ' ": break; default: panic(FAULT); break; } break; case 3: // read next element of the list switch (ch) { case ')": wsskip(); st = 4; // read default declaration break; case '|": wsskip(); switch (att.id) { case 'u": // enumeration type bntok(); break; case 'o": // NOTATION mBuffIdx = -1; bname(false); break; default: panic(FAULT); break; } wsskip(); break; case '%": pent(' "); break; default: panic(FAULT); break; } break; case 4: // read default declaration switch (ch) { case '#": bntok(); switch (bkeyword()) { case 'F": // FIXED switch (wsskip()) { case '\"": case '\'": st = 5; // read the default value break; default: st = -1; break; } break; case 'Q": // REQUIRED case 'I": // IMPLIED st = -1; break; default: panic(FAULT); break; } break; case '\"": case '\'": back(); st = 5; // read the default value break; case ' ": case '\n": case '\r": case '\t": break; case '%": pent(' "); break; default: back(); st = -1; break; } break; case 5: // read the default value switch (ch) { case '\"": case '\'": back(); bqstr('d"); // the value in the mBuff now att.list = pair(null); // Create a string like "attqname='value' " att.list.chars = new char[att.chars.length + mBuffIdx + 3]; System.arraycopy( att.chars, 1, att.list.chars, 0, att.chars.length - 1); att.list.chars[att.chars.length - 1] = '="; att.list.chars[att.chars.length] = ch; System.arraycopy( mBuff, 1, att.list.chars, att.chars.length + 1, mBuffIdx); att.list.chars[att.chars.length + mBuffIdx + 1] = ch; att.list.chars[att.chars.length + mBuffIdx + 2] = ' "; st = -1; break; default: panic(FAULT); break; } break; default: panic(FAULT); break; } }
private void dtdattl()
Parses an attribute list declaration. This method parses the declaration up to the closing angle bracket.
exception
SAXException
exception
IOException
char elmqn[] = null; Pair elm = null; char ch; for (short st = 0; st >= 0;) { ch = next(); switch (st) { case 0: // read the element name switch (chtyp(ch)) { case 'a": case 'A": case '_": case 'X": case ':": back(); // Get the element from the list or add a new one. elmqn = qname(mIsNSAware); elm = find(mAttL, elmqn); if (elm == null) { elm = pair(mAttL); elm.chars = elmqn; mAttL = elm; } st = 1; // read an attribute declaration break; case ' ": break; case '%": pent(' "); break; default: panic(FAULT); break; } break; case 1: // read an attribute declaration switch (chtyp(ch)) { case 'a": case 'A": case '_": case 'X": case ':": back(); dtdatt(elm); if (wsskip() == '>") return; break; case ' ": break; case '%": pent(' "); break; default: panic(FAULT); break; } break; default: panic(FAULT); break; } }
private void dtdelm()
Parses an element declaration. This method parses the declaration up to the closing angle bracket.
exception
SAXException
exception
IOException
// This is stub implementation which skips an element // declaration. wsskip(); name(mIsNSAware); char ch; while (true) { ch = next(); switch (ch) { case '>": back(); return; case EOS: panic(FAULT); default: break; } }
private void dtdent()
Parses an entity declaration. This method fills the general (mEnt) and parameter (mPEnt) entity look up table.
exception
SAXException
exception
IOException
String str = null; char[] val = null; Input inp = null; Pair ids = null; char ch; for (short st = 0; st >= 0;) { ch = next(); switch (st) { case 0: // skip white spaces before entity name switch (chtyp(ch)) { case ' ": // Skip white spaces break; case '%": // Parameter entity or parameter entity declaration. ch = next(); back(); if (chtyp(ch) == ' ") { // Parameter entity declaration. wsskip(); str = name(false); switch (chtyp(wsskip())) { case 'A": // Read the external identifier ids = pubsys(' "); if (wsskip() == '>") { // External parsed entity if (mPEnt.containsKey(str) == false) { // [#4.2] inp = new Input(); inp.pubid = ids.name; inp.sysid = ids.value; mPEnt.put(str, inp); } } else { panic(FAULT); } del(ids); st = -1; // the end of declaration break; case '\"": case '\'": // Read the parameter entity value bqstr('d"); // Create the parameter entity value val = new char[mBuffIdx + 1]; System.arraycopy(mBuff, 1, val, 1, val.length - 1); // Add surrounding spaces [#4.4.8] val[0] = ' "; // Add the entity to the entity look up table if (mPEnt.containsKey(str) == false) { // [#4.2] inp = new Input(val); inp.pubid = mInp.pubid; inp.sysid = mInp.sysid; mPEnt.put(str, inp); } st = -1; // the end of declaration break; default: panic(FAULT); break; } } else { // Parameter entity reference. pent(' "); } break; default: back(); str = name(false); st = 1; // read entity declaration value break; } break; case 1: // read entity declaration value switch (chtyp(ch)) { case '\"": // internal entity case '\'": back(); bqstr('d"); // read a string into the buffer if (mEnt.get(str) == null) { // Create general entity value val = new char[mBuffIdx]; System.arraycopy(mBuff, 1, val, 0, val.length); // Add the entity to the entity look up table if (mEnt.containsKey(str) == false) { // [#4.2] inp = new Input(val); inp.pubid = mInp.pubid; inp.sysid = mInp.sysid; mEnt.put(str, inp); } } st = -1; // the end of declaration break; case 'A": // external entity back(); ids = pubsys(' "); switch (wsskip()) { case '>": // external parsed entity if (mEnt.containsKey(str) == false) { // [#4.2] inp = new Input(); inp.pubid = ids.name; inp.sysid = ids.value; mEnt.put(str, inp); } break; case 'N": // external general unparsed entity if ("NDATA".equals(name(false)) == true) { wsskip(); mHand.unparsedEntityDecl( str, ids.name, ids.value, name(false)); break; } default: panic(FAULT); break; } del(ids); st = -1; // the end of declaration break; case ' ": // Skip white spaces break; default: panic(FAULT); break; } break; default: panic(FAULT); } }
private void dtdnot()
Parses a notation declaration. This method parses the declaration up to the closing angle bracket.
exception
SAXException
exception
IOException
wsskip(); String name = name(false); wsskip(); Pair ids = pubsys('N"); mHand.notationDecl(name, ids.name, ids.value); del(ids);
private void dtdsub()
Parses the document type declaration subset.
exception
SAXException
exception
IOException
char ch; for (short st = 0; st >= 0;) { ch = next(); switch (st) { case 0: // skip white spaces before a declaration switch (chtyp(ch)) { case '<": ch = next(); switch (ch) { case '?": pi(); break; case '!": ch = next(); back(); if (ch == '-") { comm(); break; } // markup or entity declaration bntok(); switch (bkeyword()) { case 'n": dtdent(); break; case 'a": dtdattl(); // parse attributes declaration break; case 'e": dtdelm(); // parse element declaration break; case 'o": dtdnot(); // parse notation declaration break; default: panic(FAULT); // unsupported markup declaration break; } st = 1; // read the end of declaration break; default: panic(FAULT); break; } break; case '%": // A parameter entity reference pent(' "); break; case ']": // End of DTD subset st = -1; break; case ' ": // Skip white spaces break; case 'Z": // End of stream if (next() != ']") panic(FAULT); st = -1; break; default: panic(FAULT); } break; case 1: // read the end of declaration switch (ch) { case '>": // there is no notation st = 0; // skip white spaces before a declaration break; case ' ": case '\n": case '\r": case '\t": // Skip white spaces break; default: panic(FAULT); break; } break; default: panic(FAULT); } }
private void eappend(char ch)
Recognizes the built-in entities lt, gt, amp, apos, quot. The initial state is 0x100. Any state belowe 0x100 is a built-in entity replacement character.
param
ch the next character of an entity name.
switch (mESt) { case 0x100: // "l" or "g" or "a" or "q" switch (ch) { case 'l": mESt = 0x101; break; case 'g": mESt = 0x102; break; case 'a": mESt = 0x103; break; case 'q": mESt = 0x107; break; default: mESt = 0x200; break; } break; case 0x101: // "lt" mESt = (ch == 't")? '<": (char)0x200; break; case 0x102: // "gt" mESt = (ch == 't")? '>": (char)0x200; break; case 0x103: // "am" or "ap" switch (ch) { case 'm": mESt = 0x104; break; case 'p": mESt = 0x105; break; default: mESt = 0x200; break; } break; case 0x104: // "amp" mESt = (ch == 'p")? '&": (char)0x200; break; case 0x105: // "apo" mESt = (ch == 'o")? (char)0x106: (char)0x200; break; case 0x106: // "apos" mESt = (ch == 's")? '\'": (char)0x200; break; case 0x107: // "qu" mESt = (ch == 'u")? (char)0x108: (char)0x200; break; case 0x108: // "quo" mESt = (ch == 'o")? (char)0x109: (char)0x200; break; case 0x109: // "quot" mESt = (ch == 't")? '\"": (char)0x200; break; case '<": // "lt" case '>": // "gt" case '&": // "amp" case '\'": // "apos" case '\"": // "quot" mESt = 0x200; default: break; }
private void elm()
Parses an element. This recursive method is responsible for prefix scope control (mPref). When the element is leaving the scope all prefixes defined within the element are removed from the prefix stack.
exception
SAXException
exception
IOException
// Save the current top of the prefix stack Pair pref = mPref; // Read an element name and put it on top of the element stack mElm = pair(mElm); mElm.chars = qname(mIsNSAware); mElm.name = mElm.local(); // Find the list of defined attributs of the current element Pair elm = find(mAttL, mElm.chars); // Read attributes till the end of the element tag mAttrIdx = 0; Pair att = pair(null); att.list = (elm != null)? elm.list: null; // attrs defined on this elm attr(att); del(att); // Read the element and it's content mBuffIdx = -1; char ch; for (short st = 0; st >= 0;) { ch = (mChIdx < mChLen)? mChars[mChIdx++]: next(); switch (st) { case 0: // read the end of the element tag case 1: // read the end of the empty element switch (ch) { case '>": // Report the element if (mIsNSAware == true) { mElm.value = rslv(mElm.chars); mHand.startElement( mElm.value, mElm.name, "", mAttrs); } else { mHand.startElement( "", "", mElm.name, mAttrs); } mItems = null; st = (st == 0)? (short)2: (short)-1; break; case '/": if (st != 0) panic(FAULT); st = 1; break; default: panic(FAULT); } break; case 2: // skip white space between tags switch (ch) { case ' ": case '\t": case '\n": bappend(ch); break; case '\r": // EOL processing [#2.11] if (next() != '\n") back(); bappend('\n"); break; case '<": // Need revisit: With additional info from DTD and xml:space attr [#2.10] // the following call can be supported: // mHand.ignorableWhitespace(mBuff, 0, (mBuffIdx + 1)); bflash(); default: back(); st = 3; break; } break; case 3: // read the text content of the element switch (ch) { case '&": ent('x"); break; case '<": bflash(); switch (next()) { case '/": // the end of the element content // Check element's open/close tags balance mBuffIdx = -1; bname(mIsNSAware); char[] chars = mElm.chars; if (chars.length == (mBuffIdx + 1)) { for (char i = 1; i <= mBuffIdx; i += 1) { if (chars[i] != mBuff[i]) panic(FAULT); } } else { panic(FAULT); } // Skip white spaces before '>' if (wsskip() != '>") panic(FAULT); ch = next(); st = -1; break; case '!": // a comment or a CDATA ch = next(); back(); switch (ch) { case '-": // must be a comment comm(); break; case '[": // must be a CDATA section cdat(); break; default: panic(FAULT); } break; case '?": // processing instruction pi(); break; default: // must be the first char of an xml name back(); elm(); // recursive call break; } mBuffIdx = -1; if (st != -1) st = 2; break; case '\r": // EOL processing [#2.11] if (next() != '\n") back(); bappend('\n"); break; case EOS: panic(FAULT); break; default: bappend(ch); break; } break; default: panic(FAULT); } } // Report the end of element if (mIsNSAware == true) mHand.endElement(mElm.value, mElm.name, ""); else mHand.endElement("", "", mElm.name); // Remove the top element tag mElm = del(mElm); // Restore the top of the prefix stack while (mPref != pref) { mHand.endPrefixMapping(mPref.name); mPref = del(mPref); }
private java.io.Reader enc(java.lang.String name, java.io.InputStream is)
Sets up the document reader.
param
name an encoding name.
param
is the document byte input stream.
return
a reader constructed from encoding name and input stream.
exception
UnsupportedEncodingException
// DO NOT CLOSE current reader if any! if (name.equals("UTF-8")) return new ReaderUTF8(is); else if (name.equals("UTF-16LE")) return new ReaderUTF16(is, 'l"); else if (name.equals("UTF-16BE")) return new ReaderUTF16(is, 'b"); else return new InputStreamReader(is, name);
private void ent(char flag)
Resoves an entity. This method resolves built-in and character entity references. It is also reports external entities to the application.
param
flag The 'x' character forces the method to report a skipped entity; 'i' character - indicates non-CDATA normalization.
exception
SAXException
exception
IOException
char ch; int idx = mBuffIdx + 1; Input inp = null; String str = null; mESt = 0x100; // reset the built-in entity recognizer bappend('&"); for (short st = 0; st >= 0;) { ch = (mChIdx < mChLen)? mChars[mChIdx++]: next(); switch (st) { case 0: // the first character of the entity name case 1: // read built-in entity name switch (chtyp(ch)) { case 'd": case '.": case '-": if (st != 1) panic(FAULT); case 'a": case 'A": case '_": case 'X": bappend(ch); eappend(ch); st = 1; break; case ':": if (mIsNSAware != false) panic(FAULT); bappend(ch); eappend(ch); st = 1; break; case ';": if (mESt < 0x100) { // The entity is a built-in entity mBuffIdx = idx - 1; bappend(mESt); st = -1; break; } else if (mSt == 2) { // In DTD entity declaration has to resolve character // entities and include "as is" others. [#4.4.7] bappend(';"); st = -1; break; } // Convert an entity name to a string str = new String(mBuff, idx + 1, mBuffIdx - idx); inp = (Input)mEnt.get(str); // Restore the buffer offset mBuffIdx = idx - 1; if (inp != null) { if (inp.chars == null) { // External entity InputSource is = mHand.resolveEntity(inp.pubid, inp.sysid); if (is != null) { push(new Input(BUFFSIZE_READER)); setinp(is); mInp.pubid = inp.pubid; mInp.sysid = inp.sysid; } else { // Unresolved external entity bflash(); if (flag != 'x") panic(FAULT); // unknown entity within marckup mHand.skippedEntity(str); } } else { // Internal entity push(inp); } } else { // Unknown or general unparsed entity bflash(); if (flag != 'x") panic(FAULT); // unknown entity within marckup mHand.skippedEntity(str); } st = -1; break; case '#": if (st != 0) panic(FAULT); st = 2; break; default: panic(FAULT); } break; case 2: // read character entity switch (chtyp(ch)) { case 'd": bappend(ch); break; case ';": // Convert the character entity to a character try { int i = Integer.parseInt( new String(mBuff, idx + 1, mBuffIdx - idx), 10); if (i >= 0xffff) panic(FAULT); ch = (char)i; } catch (NumberFormatException nfe) { panic(FAULT); } // Restore the buffer offset mBuffIdx = idx - 1; if (ch == ' " || mInp.next != null) bappend(ch, flag); else bappend(ch); st = -1; break; case 'a": // If the entity buffer is empty and ch == 'x' if ((mBuffIdx == idx) && (ch == 'x")) { st = 3; break; } default: panic(FAULT); } break; case 3: // read hex character entity switch (chtyp(ch)) { case 'A": case 'a": case 'd": bappend(ch); break; case ';": // Convert the character entity to a character try { int i = Integer.parseInt( new String(mBuff, idx + 1, mBuffIdx - idx), 16); if (i >= 0xffff) panic(FAULT); ch = (char)i; } catch (NumberFormatException nfe) { panic(FAULT); } // Restore the buffer offset mBuffIdx = idx - 1; if (ch == ' " || mInp.next != null) bappend(ch, flag); else bappend(ch); st = -1; break; default: panic(FAULT); } break; default: panic(FAULT); } }
private java.lang.String eqstr(char flag)
Reads an attribute value. The grammar which this method can read is:
eqstr := S "=" qstr
qstr := S ("'" string "'") | ('"' string '"')
This method resolves entities inside a string unless the parser parses DTD.
param
flag The '=' character forces the method to accept the '=' character before quoted string.
return
The name has been read.
exception
SAXException
exception
IOException
if (flag == '=") { wsskip(); if (next() != '=") panic(FAULT); } bqstr('-"); return new String(mBuff, 1, mBuffIdx);
private Pair find(Pair chain, char[] qname)
Finds a pair in the pair chain by a qualified name.
param
chain The first element of the chain of pairs.
param
qname The qualified name.
return
A pair with the specified qualified name or null.
for (Pair pair = chain; pair != null; pair = pair.next) { if (pair.eqname(qname) == true) return pair; } return null;
public int getColumnNumber()
Return the column number where the current document event ends.
return
Always returns -1 indicating the column number is not available.
see
#getLineNumber
return -1;
public int getLineNumber()
Return the line number where the current document event ends.
return
Always returns -1 indicating the line number is not available.
see
#getColumnNumber
return -1;
public java.lang.String getPublicId()
Return the public identifier for the current document event.
The return value is the public identifier of the document entity or of the external parsed entity in which the markup triggering the event appears.
return
A string containing the public identifier, or null if none is available.
see
#getSystemId
return (mInp != null)? mInp.pubid: null;
public java.lang.String getSystemId()
Return the system identifier for the current document event.
The return value is the system identifier of the document entity or of the external parsed entity in which the markup triggering the event appears.

If the system identifier is a URL, the parser must resolve it fully before passing it to the application.
return
A string containing the system identifier, or null if none is available.
see
#getPublicId
return (mInp != null)? mInp.sysid: null;
public boolean isNamespaceAware()
Indicates whether or not this parser is configured to understand namespaces.
return
true if this parser is configured to understand namespaces; false otherwise.
return mIsNSAware;
public boolean isValidating()
Indicates whether or not this parser is configured to validate XML documents.
return
true if this parser is configured to validate XML documents; false otherwise.
return false;
private boolean isdecl(Pair name, java.lang.String value)
Recognizes and handles a namespace declaration. This method identifies a type of namespace declaration if any and puts new mapping on top of prefix stack.
param
name The attribute qualified name (name.value is a String object which represents the attribute prefix).
param
value The attribute value.
return
true if a namespace declaration is recognized.
if (name.chars[0] == 0) { if ("xmlns".equals(name.name) == true) { // New default namespace declaration mPref = pair(mPref); mPref.value = value; mPref.name = ""; mPref.chars = NONS; return true; } } else { if (name.eqpref(XMLNS) == true) { // New prefix declaration int len = name.name.length(); mPref = pair(mPref); mPref.value = value; mPref.name = name.name; mPref.chars = new char[len + 1]; mPref.chars[0] = (char)(len + 1); name.name.getChars(0, len, mPref.chars, 1); return true; } } return false;
private java.lang.String name(boolean ns)
Reads a xml name. The xml name must conform "Namespaces in XML" specification. Therefore the ':' character is not allowed in the name. This method should be used for PI and entity names which may not have a namespace according to the specification mentioned above.
param
ns The true value turns namespace conformance on.
return
The name has been read.
exception
SAXException When incorrect character appear in the name.
exception
IOException
mBuffIdx = -1; bname(ns); return new String(mBuff, 1, mBuffIdx);
private char next()
Retrives the next character in the document.
return
The next character in the document.
if (mChIdx >= mChLen) { if (mInp.src == null) { pop(); // remove internal entity return next(); } // Read new portion of the document characters int Num = mInp.src.read(mChars, 0, mChars.length); if (Num < 0) { if (mInp != mDoc) { pop(); // restore the previous input return next(); } else { mChars[0] = EOS; mChLen = 1; } } else mChLen = Num; mChIdx = 0; } return mChars[mChIdx++];
private Pair pair(Pair next)
Provedes an instance of a pair.
param
next The reference to a next pair.
return
An instance of a pair.
Pair pair; if (mDltd != null) { pair = mDltd; mDltd = pair.next; } else { pair = new Pair(); } pair.next = next; return pair;
private void panic(java.lang.String msg)
Notifies the handler about fatal parsing error.
param
msg The problem description message.
SAXParseException spe = new SAXParseException(msg, this); mHand.fatalError(spe); throw spe; // [#1.2] fatal error definition
private void parse(org.xml.sax.helpers.DefaultHandler handler)
Parse the XML document content using the specified {@link org.xml.sax.helpers.DefaultHandler}.
param
handler The SAX DefaultHandler to use.
exception
IOException If any IO errors occur.
exception
SAXException If the underlying parser throws a SAXException while parsing.
see
org.xml.sax.helpers.DefaultHandler
try { // Initialize the parser mPEnt = new Hashtable(); mEnt = new Hashtable(); mDoc = mInp; // current input is document entity mChars = mInp.chars; // use document entity buffer // Parse an xml document char ch; mHand.setDocumentLocator(this); mHand.startDocument(); mSt = 1; while ((ch = next()) != EOS) { switch (chtyp(ch)) { case '<": ch = next(); switch (ch) { case '?": pi(); break; case '!": ch = next(); back(); if (ch == '-") comm(); else dtd(); break; default: // must be the first char of an xml name if (mSt == 5) // misc after document's element panic(FAULT); // Document's element. back(); mSt = 4; // document's element elm(); mSt = 5; // misc after document's element break; } break; case ' ": // Skip white spaces break; default: panic(FAULT); } } if (mSt != 5) // misc after document's element panic(FAULT); } finally { mHand.endDocument(); while (mAttL != null) { while (mAttL.list != null) { if (mAttL.list.list != null) del(mAttL.list.list); mAttL.list = del(mAttL.list); } mAttL = del(mAttL); } while (mElm != null) mElm = del(mElm); while (mPref != mXml) mPref = del(mPref); while (mInp != null) pop(); if ((mDoc != null) && (mDoc.src != null)) { try { mDoc.src.close(); } catch (IOException ioe) {} } mPEnt = null; mEnt = null; mDoc = null; mHand = null; mSt = 0; }
public void parse(java.io.InputStream src, org.xml.sax.helpers.DefaultHandler handler)
Parse the content of the given {@link java.io.InputStream} instance as XML using the specified {@link org.xml.sax.helpers.DefaultHandler}.
param
src InputStream containing the content to be parsed.
param
handler The SAX DefaultHandler to use.
exception
IOException If any IO errors occur.
exception
IllegalArgumentException If the given InputStream or handler is null.
exception
SAXException If the underlying parser throws a SAXException while parsing.
see
org.xml.sax.helpers.DefaultHandler
if ((src == null) || (handler == null)) throw new IllegalArgumentException(""); parse(new InputSource(src), handler);
public void parse(org.xml.sax.InputSource is, org.xml.sax.helpers.DefaultHandler handler)
Parse the content given {@link org.xml.sax.InputSource} as XML using the specified {@link org.xml.sax.helpers.DefaultHandler}.
param
is The InputSource containing the content to be parsed.
param
handler The SAX DefaultHandler to use.
exception
IOException If any IO errors occur.
exception
IllegalArgumentException If the InputSource or handler is null.
exception
SAXException If the underlying parser throws a SAXException while parsing.
see
org.xml.sax.helpers.DefaultHandler
if ((is == null) || (handler == null)) throw new IllegalArgumentException(""); // Set up the handler mHand = handler; // Set up the document mInp = new Input(BUFFSIZE_READER); setinp(is); parse(handler);
private void pent(char flag)
Resoves a parameter entity. This method resolves a parameter entity references. It is also reports external entities to the application.
param
flag The '-' instruct the method to do not set up surrounding spaces [#4.4.8].
exception
SAXException
exception
IOException
char ch; int idx = mBuffIdx + 1; Input inp = null; String str = null; bappend('%"); if (mSt != 2) // the DTD internal subset return; // Not Recognized [#4.4.1] // Read entity name bname(false); str = new String(mBuff, idx + 2, mBuffIdx - idx - 1); if (next() != ';") panic(FAULT); inp = (Input)mPEnt.get(str); // Restore the buffer offset mBuffIdx = idx - 1; if (inp != null) { if (inp.chars == null) { // External parameter entity InputSource is = mHand.resolveEntity(inp.pubid, inp.sysid); if (is != null) { if (flag != '-") bappend(' "); // tail space push(new Input(BUFFSIZE_READER)); // Need revisit: there is no leading space! [#4.4.8] setinp(is); mInp.pubid = inp.pubid; mInp.sysid = inp.sysid; } else { // Unresolved external parameter entity mHand.skippedEntity("%" + str); } } else { // Internal parameter entity if (flag == '-") { // No surrounding spaces inp.chIdx = 1; } else { // Insert surrounding spaces bappend(' "); // tail space inp.chIdx = 0; } push(inp); } } else { // Unknown parameter entity mHand.skippedEntity("%" + str); }
private void pi()
Parses a processing instruction.
exception
SAXException
exception
IOException
char ch; String str = null; mBuffIdx = -1; for (short st = 0; st >= 0;) { ch = next(); switch (st) { case 0: // read the PI target name switch (chtyp(ch)) { case 'a": case 'A": case '_": case ':": case 'X": back(); str = name(false); // PI target name may not be empty string [#2.6] // PI target name 'XML' is reserved [#2.6] if ((str.length() == 0) || (mXml.name.equals(str.toLowerCase()) == true)) panic(FAULT); // This is processing instruction if (mSt == 0) // the begining of the document mSt = 1; // misc before DTD wsskip(); // skip spaces after the PI target name st = 1; // accumulate the PI body mBuffIdx = -1; break; case 'Z": // EOS panic(FAULT); break; default: panic(FAULT); } break; case 1: // accumulate the PI body switch (ch) { case '?": st = 2; // end of the PI body break; case EOS: panic(FAULT); break; default: bappend(ch); break; } break; case 2: // end of the PI body switch (ch) { case '>": // PI has been read. mHand.processingInstruction( str, new String(mBuff, 0, mBuffIdx + 1)); st = -1; break; case '?": bappend('?"); break; case EOS: panic(FAULT); break; default: bappend('?"); bappend(ch); st = 1; // accumulate the PI body break; } break; default: panic(FAULT); } }
private void pop()
Restores previous input on the top of the input stack.
if (mInp.src != null) { try { mInp.src.close(); } catch (IOException ioe) {} mInp.src = null; } mInp = mInp.next; if (mInp != null) { mChars = mInp.chars; mChLen = mInp.chLen; mChIdx = mInp.chIdx; } else { mChars = null; mChLen = 0; mChIdx = 0; }
private void pubsys(Input inp)
Reads the public or/and system identifiers.
param
inp The input object.
exception
SAXException
exception
IOException
Pair pair = pubsys(' "); inp.pubid = pair.name; inp.sysid = pair.value; del(pair);
private Pair pubsys(char flag)
Reads the public or/and system identifiers.
param
flag The 'N' allows public id be without system id.
return
The public or/and system identifiers pair.
exception
SAXException
exception
IOException
Pair ids = pair(null); String str = name(false); if ("PUBLIC".equals(str) == true) { bqstr('i"); // non-CDATA normalization [#4.2.2] ids.name = new String(mBuff, 1, mBuffIdx); switch (wsskip()) { case '\"": case '\'": bqstr(' "); ids.value = new String(mBuff, 1, mBuffIdx); break; default: if (flag != 'N") // [#4.7] panic(FAULT); ids.value = null; break; } return ids; } else if ("SYSTEM".equals(str) == true) { ids.name = null; bqstr(' "); ids.value = new String(mBuff, 1, mBuffIdx); return ids; } panic(FAULT); return null;
private void push(Input inp)
Sets up current input on the top of the input stack.
param
inp A new input to set up.
mInp.chLen = mChLen; mInp.chIdx = mChIdx; inp.next = mInp; mInp = inp; mChars = inp.chars; mChLen = inp.chLen; mChIdx = inp.chIdx;
private char[] qname(boolean ns)
Reads a qualified xml name. The characters of a qualified name is an array of characters. The first (chars[0]) character is the index of the colon character which separates the prefix from the local name. If the index is zero, the name does not contain separator or the parser works in the namespace unaware mode. The length of qualified name is the length of the array minus one.
param
ns The true value turns namespace conformance on.
return
The characters of a qualified name.
exception
SAXException When incorrect character appear in the name.
exception
IOException
mBuffIdx = -1; bname(ns); char chars[] = new char[mBuffIdx + 1]; System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1); return chars;
private java.lang.String rslv(char[] qname)
Resolves a prefix.
return
The namespace assigned to the prefix.
exception
SAXException When mapping for specified prefix is not found.
for (Pair pref = mPref; pref != null; pref = pref.next) { if (pref.eqpref(qname) == true) return pref.value; } if (qname[0] == 1) { // QNames like ':local' for (Pair pref = mPref; pref != null; pref = pref.next) { if (pref.chars[0] == 0) return pref.value; } } panic(FAULT); return null;
private void setch(char ch)
Sets the current character.
param
ch The character to set.
mChars[mChIdx] = ch;
private void setinp(org.xml.sax.InputSource is)
Sets up a new input source on the top of the input stack. Note, the first byte returned by the entity's byte stream has to be the first byte in the entity. However, the parser does not expect the byte order mask in both cases when encoding is provided by the input source.
param
is A new input source to set up.
exception
IOException If any IO errors occur.
exception
SAXException If the input source cannot be read.
Reader reader = null; mChIdx = 0; mChLen = 0; mChars = mInp.chars; mInp.src = null; if (mSt == 0) mIsSAlone = false; // default [#2.9] if (is.getCharacterStream() != null) { // Ignore encoding in the xml text decl. reader = is.getCharacterStream(); xml(reader); } else if (is.getByteStream() != null) { String expenc; if (is.getEncoding() != null) { // Ignore encoding in the xml text decl. expenc = is.getEncoding().toUpperCase(); if (expenc.equals("UTF-16")) reader = bom(is.getByteStream(), 'U"); // UTF-16 [#4.3.3] else reader = enc(expenc, is.getByteStream()); xml(reader); } else { // Get encoding from BOM or the xml text decl. reader = bom(is.getByteStream(), ' "); if (reader == null) { // Encoding is defined by the xml text decl. reader = enc("UTF-8", is.getByteStream()); expenc = xml(reader); if (expenc.startsWith("UTF-16")) panic(FAULT); // UTF-16 must have BOM [#4.3.3] reader = enc(expenc, is.getByteStream()); } else { // Encoding is defined by the BOM. xml(reader); } } } else { // There is no support for public/system identifiers. panic(FAULT); } mInp.src = reader; mInp.pubid = is.getPublicId(); mInp.sysid = is.getSystemId();
private char wsskip()
Skips xml white space characters. This method skips white space characters (' ', '\t', '\n', '\r') and looks ahead not white space character.
return
The first not white space look ahead character.
exception
SAXException When End Of Stream character typed.
exception
IOException
char ch; char type; while (true) { // Read next character ch = (mChIdx < mChLen)? mChars[mChIdx++]: next(); type = (char)0; // [X] if (ch < 0x80) { type = (char)nmttyp[ch]; } else if (ch == EOS) { panic(FAULT); } if (type != 3) { // [ \t\n\r] mChIdx--; // back(); return ch; } }
private java.lang.String xml(java.io.Reader reader)
Parses the xml text declaration. This method gets encoding from the xml text declaration [#4.3.1] if any. The method assumes the buffer (mChars) is big enough to accomodate whole xml text declaration.
param
reader is entity reader.
return
The xml text declaration encoding or default UTF-8 encoding.
exception
SAXException
exception
IOException
String str = null; String enc = "UTF-8"; char ch; int val; short st; // Read the xml text declaration into the buffer if (mChIdx != 0) { // The bom method have read ONE char into the buffer. st = (short)((mChars[0] == '<")? 1: -1); } else { st = 0; } while (st >= 0 && mChIdx < mChars.length) { ch = ((val = reader.read()) >= 0)? (char)val: EOS; mChars[mChIdx++] = ch; switch (st) { case 0: // read '<' of xml declaration switch (ch) { case '<": st = 1; break; case 0xfeff: // the byte order mask ch = ((val = reader.read()) >= 0)? (char)val: EOS; mChars[mChIdx - 1] = ch; st = (short)((ch == '<")? 1: -1); break; default: st = -1; break; } break; case 1: // read '?' of xml declaration [#4.3.1] st = (short)((ch == '?")? 2: -1); break; case 2: // read 'x' of xml declaration [#4.3.1] st = (short)((ch == 'x")? 3: -1); break; case 3: // read 'm' of xml declaration [#4.3.1] st = (short)((ch == 'm")? 4: -1); break; case 4: // read 'l' of xml declaration [#4.3.1] st = (short)((ch == 'l")? 5: -1); break; case 5: // read white space after 'xml' switch (ch) { case ' ": case '\t": case '\r": case '\n": st = 6; break; default: st = -1; break; } break; case 6: // read content of xml declaration switch (ch) { case '?": st = 7; break; case EOS: st = -2; break; default: break; } break; case 7: // read '>' after '?' of xml declaration switch (ch) { case '>": case EOS: st = -2; break; default: st = 6; break; } break; default: panic(FAULT); break; } } mChLen = mChIdx; mChIdx = 0; // If there is no xml text declaration, the encoding is default. if (st == -1) { return enc; } mChIdx = 5; // the first white space after "<?xml" // Parse the xml text declaration for (st = 0; st >= 0;) { ch = next(); switch (st) { case 0: // skip spaces after the xml declaration name if (chtyp(ch) != ' ") { back(); st = 1; } break; case 1: // read xml declaration version case 2: // read xml declaration encoding or standalone case 3: // read xml declaration standalone switch (chtyp(ch)) { case 'a": case 'A": case '_": back(); str = name(false).toLowerCase(); if ("version".equals(str) == true) { if (st != 1) panic(FAULT); if ("1.0".equals(eqstr('=")) != true) panic(FAULT); st = 2; } else if ("encoding".equals(str) == true) { if (st != 2) panic(FAULT); enc = eqstr('=").toUpperCase(); st = 3; } else if ("standalone".equals(str) == true) { if ((st == 1) || (mSt != 0)) // [#4.3.1] panic(FAULT); str = eqstr('=").toLowerCase(); // Check the 'standalone' value and use it if (str.equals("yes") == true) { mIsSAlone = true; } else if (str.equals("no") == true) { mIsSAlone = false; } else { panic(FAULT); } st = 4; } else { panic(FAULT); } break; case ' ": break; case '?": if (st == 1) panic(FAULT); back(); st = 4; break; default: panic(FAULT); } break; case 4: // end of xml declaration switch (chtyp(ch)) { case '?": if (next() != '>") panic(FAULT); if (mSt == 0) // the begining of the document mSt = 1; // misc before DTD st = -1; break; case ' ": break; default: panic(FAULT); } break; default: panic(FAULT); } } return enc;