File Doc Category Size Date Package
ParserForXMLSchema.java API Doc Apache Xerces 3.0.1 23128 Fri Sep 14 20:33:52 BST 2007 org.apache.xerces.impl.xpath.regex

ParserForXMLSchema

java.lang.Object
- RegexParser

public class ParserForXMLSchema extends RegexParser

A regular expression parser for the XML Schema.

xerces.internal
author: TAMURA Kent <kent@trl.ibm.co.jp>
version: $Id: ParserForXMLSchema.java 571468 2007-08-31 14:01:58Z knoaman $

Fields Summary
private static Hashtable
ranges
private static Hashtable
ranges2
private static final String
SPACES
private static final String
NAMECHARS
private static final String
LETTERS
private static final String
DIGITS
Constructors Summary
public ParserForXMLSchema()
//this.setLocale(Locale.getDefault());
public ParserForXMLSchema(Locale locale)
//this.setLocale(locale);
Methods Summary
boolean checkQuestion(int off)
return false;
int decodeEscaped()
if (this.read() != T_BACKSOLIDUS) throw ex("parser.next.1", this.offset-1); int c = this.chardata; switch (c) { case 'n": c = '\n"; break; // LINE FEED U+000A case 'r": c = '\r"; break; // CRRIAGE RETURN U+000D case 't": c = '\t"; break; // HORIZONTAL TABULATION U+0009 case '\\": case '|": case '.": case '^": case '-": case '?": case '*": case '+": case '{": case '}": case '(": case ')": case '[": case ']": break; // return actucal char default: throw ex("parser.process.1", this.offset-2); } return c;
protected static synchronized RangeToken getRange(java.lang.String name, boolean positive)
if (ranges == null) { ranges = new Hashtable(); ranges2 = new Hashtable(); Token tok = Token.createRange(); setupRange(tok, SPACES); ranges.put("xml:isSpace", tok); ranges2.put("xml:isSpace", Token.complementRanges(tok)); tok = Token.createRange(); setupRange(tok, DIGITS); ranges.put("xml:isDigit", tok); ranges2.put("xml:isDigit", Token.complementRanges(tok)); tok = Token.createRange(); setupRange(tok, DIGITS); ranges.put("xml:isDigit", tok); ranges2.put("xml:isDigit", Token.complementRanges(tok)); tok = Token.createRange(); setupRange(tok, LETTERS); tok.mergeRanges((Token)ranges.get("xml:isDigit")); ranges.put("xml:isWord", tok); ranges2.put("xml:isWord", Token.complementRanges(tok)); tok = Token.createRange(); setupRange(tok, NAMECHARS); ranges.put("xml:isNameChar", tok); ranges2.put("xml:isNameChar", Token.complementRanges(tok)); tok = Token.createRange(); setupRange(tok, LETTERS); tok.addRange('_", '_"); tok.addRange(':", ':"); ranges.put("xml:isInitialNameChar", tok); ranges2.put("xml:isInitialNameChar", Token.complementRanges(tok)); } RangeToken tok = positive ? (RangeToken)ranges.get(name) : (RangeToken)ranges2.get(name); return tok;
Token getTokenForShorthand(int ch)
switch (ch) { case 'd": return ParserForXMLSchema.getRange("xml:isDigit", true); case 'D": return ParserForXMLSchema.getRange("xml:isDigit", false); case 'w": return ParserForXMLSchema.getRange("xml:isWord", true); case 'W": return ParserForXMLSchema.getRange("xml:isWord", false); case 's": return ParserForXMLSchema.getRange("xml:isSpace", true); case 'S": return ParserForXMLSchema.getRange("xml:isSpace", false); case 'c": return ParserForXMLSchema.getRange("xml:isNameChar", true); case 'C": return ParserForXMLSchema.getRange("xml:isNameChar", false); case 'i": return ParserForXMLSchema.getRange("xml:isInitialNameChar", true); case 'I": return ParserForXMLSchema.getRange("xml:isInitialNameChar", false); default: throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16)); }
protected RangeToken parseCharacterClass(boolean useNrange)
Parses a character-class-expression, not a character-class-escape. c-c-expression ::= '[' c-group ']' c-group ::= positive-c-group | negative-c-group | c-c-subtraction positive-c-group ::= (c-range | c-c-escape)+ negative-c-group ::= '^' positive-c-group c-c-subtraction ::= (positive-c-group | negative-c-group) subtraction subtraction ::= '-' c-c-expression c-range ::= single-range | from-to-range single-range ::= multi-c-escape | category-c-escape | block-c-escape | <any XML char> cc-normal-c ::= <any character except [, ], \> from-to-range ::= cc-normal-c '-' cc-normal-c
param
useNrage Ignored.
return
This returns no NrageToken.
this.setContext(S_INBRACKETS); this.next(); // '[' boolean nrange = false; boolean wasDecoded = false; // used to detect if the last - was escaped. RangeToken base = null; RangeToken tok; if (this.read() == T_CHAR && this.chardata == '^") { nrange = true; this.next(); // '^' base = Token.createRange(); base.addRange(0, Token.UTF16_MAX); tok = Token.createRange(); } else { tok = Token.createRange(); } int type; boolean firstloop = true; while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop. wasDecoded = false; // single-range | from-to-range | subtraction if (type == T_CHAR && this.chardata == ']" && !firstloop) { if (nrange) { base.subtractRanges(tok); tok = base; } break; } int c = this.chardata; boolean end = false; if (type == T_BACKSOLIDUS) { switch (c) { case 'd": case 'D": case 'w": case 'W": case 's": case 'S": tok.mergeRanges(this.getTokenForShorthand(c)); end = true; break; case 'i": case 'I": case 'c": case 'C": c = this.processCIinCharacterClass(tok, c); if (c < 0) end = true; break; case 'p": case 'P": int pstart = this.offset; RangeToken tok2 = this.processBacksolidus_pP(c); if (tok2 == null) throw this.ex("parser.atom.5", pstart); tok.mergeRanges(tok2); end = true; break; case '-": c = this.decodeEscaped(); wasDecoded = true; break; default: c = this.decodeEscaped(); } // \ + c } // backsolidus else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) { // Subraction if (nrange) { base.subtractRanges(tok); tok = base; } RangeToken range2 = this.parseCharacterClass(false); tok.subtractRanges(range2); if (this.read() != T_CHAR || this.chardata != ']") throw this.ex("parser.cc.5", this.offset); break; // Exit this loop } this.next(); if (!end) { // if not shorthands... if (type == T_CHAR) { if (c == '[") throw this.ex("parser.cc.6", this.offset-2); if (c == ']") throw this.ex("parser.cc.7", this.offset-2); if (c == '-" && this.chardata != ']" && !firstloop) throw this.ex("parser.cc.8", this.offset-2); // if regex = '[-]' then invalid } if (this.read() != T_CHAR || this.chardata != '-" || c == '-" && firstloop) { // Here is no '-'. tok.addRange(c, c); } else { // Found '-' // Is this '-' is a from-to token?? this.next(); // Skips '-' if ((type = this.read()) == T_EOF) throw this.ex("parser.cc.2", this.offset); // c '-' ']' -> '-' is a single-range. if(type == T_CHAR && this.chardata == ']") { // if - is at the last position of the group tok.addRange(c, c); tok.addRange('-", '-"); } else if (type == T_XMLSCHEMA_CC_SUBTRACTION) { throw this.ex("parser.cc.8", this.offset-1); } else { int rangeend = this.chardata; if (type == T_CHAR) { if (rangeend == '[") throw this.ex("parser.cc.6", this.offset-1); if (rangeend == ']") throw this.ex("parser.cc.7", this.offset-1); if (rangeend == '-") throw this.ex("parser.cc.8", this.offset-2); } else if (type == T_BACKSOLIDUS) rangeend = this.decodeEscaped(); this.next(); if (c > rangeend) throw this.ex("parser.ope.3", this.offset-1); tok.addRange(c, rangeend); } } } firstloop = false; } if (this.read() == T_EOF) throw this.ex("parser.cc.2", this.offset); tok.sortRanges(); tok.compactRanges(); //tok.dumpRanges(); this.setContext(S_NORMAL); this.next(); // Skips ']' return tok;
protected RangeToken parseSetOperations()
throw this.ex("parser.process.1", this.offset);
Token processBackreference()
throw ex("parser.process.1", this.offset-4);
Token processBacksolidus_A()
throw ex("parser.process.1", this.offset);
Token processBacksolidus_B()
throw ex("parser.process.1", this.offset);
Token processBacksolidus_C()
this.next(); return this.getTokenForShorthand('C");
Token processBacksolidus_I()
this.next(); return this.getTokenForShorthand('I");
Token processBacksolidus_X()
throw ex("parser.process.1", this.offset-2);
Token processBacksolidus_Z()
throw ex("parser.process.1", this.offset);
Token processBacksolidus_b()
throw ex("parser.process.1", this.offset);
Token processBacksolidus_c()
this.next(); return this.getTokenForShorthand('c");
Token processBacksolidus_g()
throw this.ex("parser.process.1", this.offset-2);
Token processBacksolidus_gt()
throw ex("parser.process.1", this.offset);
Token processBacksolidus_i()
this.next(); return this.getTokenForShorthand('i");
Token processBacksolidus_lt()
throw ex("parser.process.1", this.offset);
Token processBacksolidus_z()
throw ex("parser.process.1", this.offset);
int processCIinCharacterClass(RangeToken tok, int c)
tok.mergeRanges(this.getTokenForShorthand(c)); return -1;
Token processCaret()
this.next(); return Token.createChar('^");
Token processCondition()
throw ex("parser.process.1", this.offset);
Token processDollar()
this.next(); return Token.createChar('$");
Token processIndependent()
throw ex("parser.process.1", this.offset);
Token processLookahead()
throw ex("parser.process.1", this.offset);
Token processLookbehind()
throw ex("parser.process.1", this.offset);
Token processModifiers()
throw ex("parser.process.1", this.offset);
Token processNegativelookahead()
throw ex("parser.process.1", this.offset);
Token processNegativelookbehind()
throw ex("parser.process.1", this.offset);
Token processParen()
this.next(); Token tok = Token.createParen(this.parseRegex(), 0); if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1); this.next(); // Skips ')' return tok;
Token processParen2()
throw ex("parser.process.1", this.offset);
Token processPlus(Token tok)
// X+ -> XX* this.next(); return Token.createConcat(tok, Token.createClosure(tok));
Token processQuestion(Token tok)
// X? -> X| this.next(); Token par = Token.createUnion(); par.addChild(tok); par.addChild(Token.createEmpty()); return par;
Token processStar(Token tok)
this.next(); return Token.createClosure(tok);
static void setupRange(Token range, java.lang.String src)
int len = src.length(); for (int i = 0; i < len; i += 2) range.addRange(src.charAt(i), src.charAt(i+1));