Methods Summary |
---|
boolean | checkQuestion(int off)
return false;
|
int | decodeEscaped()
if (this.read() != T_BACKSOLIDUS) throw ex("parser.next.1", this.offset-1);
int c = this.chardata;
switch (c) {
case 'n": c = '\n"; break; // LINE FEED U+000A
case 'r": c = '\r"; break; // CRRIAGE RETURN U+000D
case 't": c = '\t"; break; // HORIZONTAL TABULATION U+0009
case '\\":
case '|":
case '.":
case '^":
case '-":
case '?":
case '*":
case '+":
case '{":
case '}":
case '(":
case ')":
case '[":
case ']":
break; // return actucal char
default:
throw ex("parser.process.1", this.offset-2);
}
return c;
|
protected static synchronized RangeToken | getRange(java.lang.String name, boolean positive)
if (ranges == null) {
ranges = new Hashtable();
ranges2 = new Hashtable();
Token tok = Token.createRange();
setupRange(tok, SPACES);
ranges.put("xml:isSpace", tok);
ranges2.put("xml:isSpace", Token.complementRanges(tok));
tok = Token.createRange();
setupRange(tok, DIGITS);
ranges.put("xml:isDigit", tok);
ranges2.put("xml:isDigit", Token.complementRanges(tok));
tok = Token.createRange();
setupRange(tok, DIGITS);
ranges.put("xml:isDigit", tok);
ranges2.put("xml:isDigit", Token.complementRanges(tok));
tok = Token.createRange();
setupRange(tok, LETTERS);
tok.mergeRanges((Token)ranges.get("xml:isDigit"));
ranges.put("xml:isWord", tok);
ranges2.put("xml:isWord", Token.complementRanges(tok));
tok = Token.createRange();
setupRange(tok, NAMECHARS);
ranges.put("xml:isNameChar", tok);
ranges2.put("xml:isNameChar", Token.complementRanges(tok));
tok = Token.createRange();
setupRange(tok, LETTERS);
tok.addRange('_", '_");
tok.addRange(':", ':");
ranges.put("xml:isInitialNameChar", tok);
ranges2.put("xml:isInitialNameChar", Token.complementRanges(tok));
}
RangeToken tok = positive ? (RangeToken)ranges.get(name)
: (RangeToken)ranges2.get(name);
return tok;
|
Token | getTokenForShorthand(int ch)
switch (ch) {
case 'd":
return ParserForXMLSchema.getRange("xml:isDigit", true);
case 'D":
return ParserForXMLSchema.getRange("xml:isDigit", false);
case 'w":
return ParserForXMLSchema.getRange("xml:isWord", true);
case 'W":
return ParserForXMLSchema.getRange("xml:isWord", false);
case 's":
return ParserForXMLSchema.getRange("xml:isSpace", true);
case 'S":
return ParserForXMLSchema.getRange("xml:isSpace", false);
case 'c":
return ParserForXMLSchema.getRange("xml:isNameChar", true);
case 'C":
return ParserForXMLSchema.getRange("xml:isNameChar", false);
case 'i":
return ParserForXMLSchema.getRange("xml:isInitialNameChar", true);
case 'I":
return ParserForXMLSchema.getRange("xml:isInitialNameChar", false);
default:
throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16));
}
|
protected RangeToken | parseCharacterClass(boolean useNrange)Parses a character-class-expression, not a character-class-escape.
c-c-expression ::= '[' c-group ']'
c-group ::= positive-c-group | negative-c-group | c-c-subtraction
positive-c-group ::= (c-range | c-c-escape)+
negative-c-group ::= '^' positive-c-group
c-c-subtraction ::= (positive-c-group | negative-c-group) subtraction
subtraction ::= '-' c-c-expression
c-range ::= single-range | from-to-range
single-range ::= multi-c-escape | category-c-escape | block-c-escape | <any XML char>
cc-normal-c ::= <any character except [, ], \>
from-to-range ::= cc-normal-c '-' cc-normal-c
this.setContext(S_INBRACKETS);
this.next(); // '['
boolean nrange = false;
boolean wasDecoded = false; // used to detect if the last - was escaped.
RangeToken base = null;
RangeToken tok;
if (this.read() == T_CHAR && this.chardata == '^") {
nrange = true;
this.next(); // '^'
base = Token.createRange();
base.addRange(0, Token.UTF16_MAX);
tok = Token.createRange();
} else {
tok = Token.createRange();
}
int type;
boolean firstloop = true;
while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop.
wasDecoded = false;
// single-range | from-to-range | subtraction
if (type == T_CHAR && this.chardata == ']" && !firstloop) {
if (nrange) {
base.subtractRanges(tok);
tok = base;
}
break;
}
int c = this.chardata;
boolean end = false;
if (type == T_BACKSOLIDUS) {
switch (c) {
case 'd": case 'D":
case 'w": case 'W":
case 's": case 'S":
tok.mergeRanges(this.getTokenForShorthand(c));
end = true;
break;
case 'i": case 'I":
case 'c": case 'C":
c = this.processCIinCharacterClass(tok, c);
if (c < 0) end = true;
break;
case 'p":
case 'P":
int pstart = this.offset;
RangeToken tok2 = this.processBacksolidus_pP(c);
if (tok2 == null) throw this.ex("parser.atom.5", pstart);
tok.mergeRanges(tok2);
end = true;
break;
case '-":
c = this.decodeEscaped();
wasDecoded = true;
break;
default:
c = this.decodeEscaped();
} // \ + c
} // backsolidus
else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
// Subraction
if (nrange) {
base.subtractRanges(tok);
tok = base;
}
RangeToken range2 = this.parseCharacterClass(false);
tok.subtractRanges(range2);
if (this.read() != T_CHAR || this.chardata != ']")
throw this.ex("parser.cc.5", this.offset);
break; // Exit this loop
}
this.next();
if (!end) { // if not shorthands...
if (type == T_CHAR) {
if (c == '[") throw this.ex("parser.cc.6", this.offset-2);
if (c == ']") throw this.ex("parser.cc.7", this.offset-2);
if (c == '-" && this.chardata != ']" && !firstloop) throw this.ex("parser.cc.8", this.offset-2); // if regex = '[-]' then invalid
}
if (this.read() != T_CHAR || this.chardata != '-" || c == '-" && firstloop) { // Here is no '-'.
tok.addRange(c, c);
} else { // Found '-'
// Is this '-' is a from-to token??
this.next(); // Skips '-'
if ((type = this.read()) == T_EOF) throw this.ex("parser.cc.2", this.offset);
// c '-' ']' -> '-' is a single-range.
if(type == T_CHAR && this.chardata == ']") { // if - is at the last position of the group
tok.addRange(c, c);
tok.addRange('-", '-");
}
else if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
throw this.ex("parser.cc.8", this.offset-1);
} else {
int rangeend = this.chardata;
if (type == T_CHAR) {
if (rangeend == '[") throw this.ex("parser.cc.6", this.offset-1);
if (rangeend == ']") throw this.ex("parser.cc.7", this.offset-1);
if (rangeend == '-") throw this.ex("parser.cc.8", this.offset-2);
}
else if (type == T_BACKSOLIDUS)
rangeend = this.decodeEscaped();
this.next();
if (c > rangeend) throw this.ex("parser.ope.3", this.offset-1);
tok.addRange(c, rangeend);
}
}
}
firstloop = false;
}
if (this.read() == T_EOF)
throw this.ex("parser.cc.2", this.offset);
tok.sortRanges();
tok.compactRanges();
//tok.dumpRanges();
this.setContext(S_NORMAL);
this.next(); // Skips ']'
return tok;
|
protected RangeToken | parseSetOperations()
throw this.ex("parser.process.1", this.offset);
|
Token | processBackreference()
throw ex("parser.process.1", this.offset-4);
|
Token | processBacksolidus_A()
throw ex("parser.process.1", this.offset);
|
Token | processBacksolidus_B()
throw ex("parser.process.1", this.offset);
|
Token | processBacksolidus_C()
this.next();
return this.getTokenForShorthand('C");
|
Token | processBacksolidus_I()
this.next();
return this.getTokenForShorthand('I");
|
Token | processBacksolidus_X()
throw ex("parser.process.1", this.offset-2);
|
Token | processBacksolidus_Z()
throw ex("parser.process.1", this.offset);
|
Token | processBacksolidus_b()
throw ex("parser.process.1", this.offset);
|
Token | processBacksolidus_c()
this.next();
return this.getTokenForShorthand('c");
|
Token | processBacksolidus_g()
throw this.ex("parser.process.1", this.offset-2);
|
Token | processBacksolidus_gt()
throw ex("parser.process.1", this.offset);
|
Token | processBacksolidus_i()
this.next();
return this.getTokenForShorthand('i");
|
Token | processBacksolidus_lt()
throw ex("parser.process.1", this.offset);
|
Token | processBacksolidus_z()
throw ex("parser.process.1", this.offset);
|
int | processCIinCharacterClass(RangeToken tok, int c)
tok.mergeRanges(this.getTokenForShorthand(c));
return -1;
|
Token | processCaret()
this.next();
return Token.createChar('^");
|
Token | processCondition()
throw ex("parser.process.1", this.offset);
|
Token | processDollar()
this.next();
return Token.createChar('$");
|
Token | processIndependent()
throw ex("parser.process.1", this.offset);
|
Token | processLookahead()
throw ex("parser.process.1", this.offset);
|
Token | processLookbehind()
throw ex("parser.process.1", this.offset);
|
Token | processModifiers()
throw ex("parser.process.1", this.offset);
|
Token | processNegativelookahead()
throw ex("parser.process.1", this.offset);
|
Token | processNegativelookbehind()
throw ex("parser.process.1", this.offset);
|
Token | processParen()
this.next();
Token tok = Token.createParen(this.parseRegex(), 0);
if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1);
this.next(); // Skips ')'
return tok;
|
Token | processParen2()
throw ex("parser.process.1", this.offset);
|
Token | processPlus(Token tok)
// X+ -> XX*
this.next();
return Token.createConcat(tok, Token.createClosure(tok));
|
Token | processQuestion(Token tok)
// X? -> X|
this.next();
Token par = Token.createUnion();
par.addChild(tok);
par.addChild(Token.createEmpty());
return par;
|
Token | processStar(Token tok)
this.next();
return Token.createClosure(tok);
|
static void | setupRange(Token range, java.lang.String src)
int len = src.length();
for (int i = 0; i < len; i += 2)
range.addRange(src.charAt(i), src.charAt(i+1));
|