FileDocCategorySizeDatePackage
ParserForXMLSchema.javaAPI DocJava SE 5 API23975Fri Aug 26 14:55:48 BST 2005com.sun.org.apache.xerces.internal.impl.xpath.regex

ParserForXMLSchema

public class ParserForXMLSchema extends RegexParser
A regular expression parser for the XML Shema.
author
TAMURA Kent <kent@trl.ibm.co.jp>
version
$Id: ParserForXMLSchema.java,v 1.5 2003/03/24 23:37:55 sandygao Exp $

Fields Summary
private static Hashtable
ranges
private static Hashtable
ranges2
private static final String
SPACES
private static final String
NAMECHARS
private static final String
LETTERS
private static final String
DIGITS
Constructors Summary
public ParserForXMLSchema()

        //this.setLocale(Locale.getDefault());
    
public ParserForXMLSchema(Locale locale)

        //this.setLocale(locale);
    
Methods Summary
booleancheckQuestion(int off)

        return false;
    
intdecodeEscaped()

        if (this.read() != T_BACKSOLIDUS)  throw ex("parser.next.1", this.offset-1);
        int c = this.chardata;
        switch (c) {
          case 'n":  c = '\n";  break; // LINE FEED U+000A
          case 'r":  c = '\r";  break; // CRRIAGE RETURN U+000D
          case 't":  c = '\t";  break; // HORIZONTAL TABULATION U+0009
          case '\\":
          case '|":
          case '.":
          case '^":
          case '-":
          case '?":
          case '*":
          case '+":
          case '{":
          case '}":
          case '(":
          case ')":
          case '[":
          case ']":
            break; // return actucal char
          default:
            throw ex("parser.process.1", this.offset-2);
        }
        return c;
    
protected static synchronized com.sun.org.apache.xerces.internal.impl.xpath.regex.RangeTokengetRange(java.lang.String name, boolean positive)

            
        if (ranges == null) {
            ranges = new Hashtable();
            ranges2 = new Hashtable();

            Token tok = Token.createRange();
            setupRange(tok, SPACES);
            ranges.put("xml:isSpace", tok);
            ranges2.put("xml:isSpace", Token.complementRanges(tok));

            tok = Token.createRange();
            setupRange(tok, DIGITS);
            ranges.put("xml:isDigit", tok);
            ranges2.put("xml:isDigit", Token.complementRanges(tok));

            tok = Token.createRange();
            setupRange(tok, DIGITS);
            ranges.put("xml:isDigit", tok);
            ranges2.put("xml:isDigit", Token.complementRanges(tok));

            tok = Token.createRange();
            setupRange(tok, LETTERS);
            tok.mergeRanges((Token)ranges.get("xml:isDigit"));
            ranges.put("xml:isWord", tok);
            ranges2.put("xml:isWord", Token.complementRanges(tok));

            tok = Token.createRange();
            setupRange(tok, NAMECHARS);
            ranges.put("xml:isNameChar", tok);
            ranges2.put("xml:isNameChar", Token.complementRanges(tok));

            tok = Token.createRange();
            setupRange(tok, LETTERS);
            tok.addRange('_", '_");
            tok.addRange(':", ':");
            ranges.put("xml:isInitialNameChar", tok);
            ranges2.put("xml:isInitialNameChar", Token.complementRanges(tok));
        }
        RangeToken tok = positive ? (RangeToken)ranges.get(name)
            : (RangeToken)ranges2.get(name);
        return tok;
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokengetTokenForShorthand(int ch)

        switch (ch) {
          case 'd":
            return ParserForXMLSchema.getRange("xml:isDigit", true);
          case 'D":
            return ParserForXMLSchema.getRange("xml:isDigit", false);
          case 'w":
            return ParserForXMLSchema.getRange("xml:isWord", true);
          case 'W":
            return ParserForXMLSchema.getRange("xml:isWord", false);
          case 's":
            return ParserForXMLSchema.getRange("xml:isSpace", true);
          case 'S":
            return ParserForXMLSchema.getRange("xml:isSpace", false);
          case 'c":
            return ParserForXMLSchema.getRange("xml:isNameChar", true);
          case 'C":
            return ParserForXMLSchema.getRange("xml:isNameChar", false);
          case 'i":
            return ParserForXMLSchema.getRange("xml:isInitialNameChar", true);
          case 'I":
            return ParserForXMLSchema.getRange("xml:isInitialNameChar", false);
          default:
            throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16));
        }
    
protected com.sun.org.apache.xerces.internal.impl.xpath.regex.RangeTokenparseCharacterClass(boolean useNrange)
Parses a character-class-expression, not a character-class-escape. c-c-expression ::= '[' c-group ']' c-group ::= positive-c-group | negative-c-group | c-c-subtraction positive-c-group ::= (c-range | c-c-escape)+ negative-c-group ::= '^' positive-c-group c-c-subtraction ::= (positive-c-group | negative-c-group) subtraction subtraction ::= '-' c-c-expression c-range ::= single-range | from-to-range single-range ::= multi-c-escape | category-c-escape | block-c-escape | cc-normal-c ::= from-to-range ::= cc-normal-c '-' cc-normal-c

param
useNrage Ignored.
return
This returns no NrageToken.

        this.setContext(S_INBRACKETS);
        this.next();                            // '['
        boolean nrange = false;
        RangeToken base = null;
        RangeToken tok;
        if (this.read() == T_CHAR && this.chardata == '^") {
            nrange = true;
            this.next();                        // '^'
            base = Token.createRange();
            base.addRange(0, Token.UTF16_MAX);
            tok = Token.createRange();
        } else {
            tok = Token.createRange();
        }
        int type;
        boolean firstloop = true;
        while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop.
            // single-range | from-to-range | subtraction
            if (type == T_CHAR && this.chardata == ']" && !firstloop) {
                if (nrange) {
                    base.subtractRanges(tok);
                    tok = base;
                }
                break;
            }
            int c = this.chardata;
            boolean end = false;
            if (type == T_BACKSOLIDUS) {
                switch (c) {
                  case 'd":  case 'D":
                  case 'w":  case 'W":
                  case 's":  case 'S":
                    tok.mergeRanges(this.getTokenForShorthand(c));
                    end = true;
                    break;

                  case 'i":  case 'I":
                  case 'c":  case 'C":
                    c = this.processCIinCharacterClass(tok, c);
                    if (c < 0)  end = true;
                    break;
                    
                  case 'p":
                  case 'P":
                    int pstart = this.offset;
                    RangeToken tok2 = this.processBacksolidus_pP(c);
                    if (tok2 == null)  throw this.ex("parser.atom.5", pstart);
                    tok.mergeRanges(tok2);
                    end = true;
                    break;

                  default:
                    c = this.decodeEscaped();
                } // \ + c
            } // backsolidus
            else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
                                                // Subraction
                if (nrange) {
                    base.subtractRanges(tok);
                    tok = base;
                }
                RangeToken range2 = this.parseCharacterClass(false);
                tok.subtractRanges(range2);
                if (this.read() != T_CHAR || this.chardata != ']")
                    throw this.ex("parser.cc.5", this.offset);
                break;                          // Exit this loop
            }
            this.next();
            if (!end) {                         // if not shorthands...
                if (type == T_CHAR) {
                    if (c == '[")  throw this.ex("parser.cc.6", this.offset-2);
                    if (c == ']")  throw this.ex("parser.cc.7", this.offset-2);
                    if (c == '-")  throw this.ex("parser.cc.8", this.offset-2);
                }
                if (this.read() != T_CHAR || this.chardata != '-") { // Here is no '-'.
                    tok.addRange(c, c);
                } else {                        // Found '-'
                                                // Is this '-' is a from-to token??
                    this.next(); // Skips '-'
                    if ((type = this.read()) == T_EOF)  throw this.ex("parser.cc.2", this.offset);
                                                // c '-' ']' -> '-' is a single-range.
                    if ((type == T_CHAR && this.chardata == ']")
                        || type == T_XMLSCHEMA_CC_SUBTRACTION) {
                        throw this.ex("parser.cc.8", this.offset-1);
                    } else {
                        int rangeend = this.chardata;
                        if (type == T_CHAR) {
                            if (rangeend == '[")  throw this.ex("parser.cc.6", this.offset-1);
                            if (rangeend == ']")  throw this.ex("parser.cc.7", this.offset-1);
                            if (rangeend == '-")  throw this.ex("parser.cc.8", this.offset-2);
                        }
                        else if (type == T_BACKSOLIDUS)
                            rangeend = this.decodeEscaped();
                        this.next();

                        if (c > rangeend)  throw this.ex("parser.ope.3", this.offset-1);
                        tok.addRange(c, rangeend);
                    }
                }
            }
            firstloop = false;
        }
        if (this.read() == T_EOF)
            throw this.ex("parser.cc.2", this.offset);
        tok.sortRanges();
        tok.compactRanges();
        //tok.dumpRanges();
        this.setContext(S_NORMAL);
        this.next();                    // Skips ']'

        return tok;
    
protected com.sun.org.apache.xerces.internal.impl.xpath.regex.RangeTokenparseSetOperations()

        throw this.ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBackreference()

        throw ex("parser.process.1", this.offset-4);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_A()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_B()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_C()

        this.next();
        return this.getTokenForShorthand('C");
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_I()

        this.next();
        return this.getTokenForShorthand('I");
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_X()

        throw ex("parser.process.1", this.offset-2);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_Z()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_b()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_c()

        this.next();
        return this.getTokenForShorthand('c");
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_g()

        throw this.ex("parser.process.1", this.offset-2);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_gt()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_i()

        this.next();
        return this.getTokenForShorthand('i");
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_lt()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessBacksolidus_z()

        throw ex("parser.process.1", this.offset);
    
intprocessCIinCharacterClass(com.sun.org.apache.xerces.internal.impl.xpath.regex.RangeToken tok, int c)

        tok.mergeRanges(this.getTokenForShorthand(c));
        return -1;
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessCaret()

        this.next();
        return Token.createChar('^");
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessCondition()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessDollar()

        this.next();
        return Token.createChar('$");
     
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessIndependent()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessLookahead()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessLookbehind()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessModifiers()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessNegativelookahead()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessNegativelookbehind()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessParen()

        this.next();
        Token tok = Token.createParen(this.parseRegex(), 0);
        if (this.read() != super.T_RPAREN)  throw ex("parser.factor.1", this.offset-1);
        this.next();                            // Skips ')'
        return tok;
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessParen2()

        throw ex("parser.process.1", this.offset);
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessPlus(com.sun.org.apache.xerces.internal.impl.xpath.regex.Token tok)

        // X+ -> XX*
        this.next();
        return Token.createConcat(tok, Token.createClosure(tok));
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessQuestion(com.sun.org.apache.xerces.internal.impl.xpath.regex.Token tok)

        // X? -> X|
        this.next();
        Token par = Token.createUnion();
        par.addChild(tok);
        par.addChild(Token.createEmpty());
        return par;
    
com.sun.org.apache.xerces.internal.impl.xpath.regex.TokenprocessStar(com.sun.org.apache.xerces.internal.impl.xpath.regex.Token tok)

        this.next();
        return Token.createClosure(tok);
    
static voidsetupRange(com.sun.org.apache.xerces.internal.impl.xpath.regex.Token range, java.lang.String src)

        int len = src.length();
        for (int i = 0;  i < len;  i += 2)
            range.addRange(src.charAt(i), src.charAt(i+1));