FileDocCategorySizeDatePackage
LexerCore.javaAPI DocphoneME MR2 API (J2ME)23663Wed May 02 18:00:42 BST 2007gov.nist.core

LexerCore

public abstract class LexerCore extends StringTokenizer
A lexical analyzer that is used by all parsers in this implementation.

Fields Summary
public static final int
START
START token.
public static final int
END
END token.
public static final int
ID
ID token.
public static final int
WHITESPACE
WHITESPACE token.
public static final int
DIGIT
DIGIT (numeric) token.
public static final int
ALPHA
ALPHA (alphabetic) token.
public static final int
BACKSLASH
BACKSLASH (escaping) token.
public static final int
QUOTE
Single QUOTE token.
public static final int
AT
AT sign token.
public static final int
SP
SPACE token.
public static final int
HT
HT (Horizontal tab) token.
public static final int
COLON
COLON token.
public static final int
STAR
STAR (asterisk) token.
public static final int
DOLLAR
DOLLAR token.
public static final int
PLUS
PLUS token.
public static final int
POUND
POUND token.
public static final int
MINUS
MINUS token.
public static final int
DOUBLEQUOTE
DOUBLEQUOTE token.
public static final int
TILDE
TILDE token.
public static final int
BACK_QUOTE
BACK_QUOTE token.
public static final int
NULL
NULL token.
public static final int
EQUALS
EQUALS (equals sign) token.
public static final int
SEMICOLON
SEMICOLON token.
public static final int
SLASH
Forward SLASH token.
public static final int
L_SQUARE_BRACKET
L_SQUARE_BRACKET (left square bracket) token.
public static final int
R_SQUARE_BRACKET
R_SQUARE_BRACKET (right square bracket) token.
public static final int
R_CURLY
R_CURLY (right curly bracket) token.
public static final int
L_CURLY
L_CURLY (left curly bracket) token.
public static final int
HAT
HAT (carot) token.
public static final int
BAR
Veritcal BAR token.
public static final int
DOT
DOT (period) token.
public static final int
EXCLAMATION
EXCLAMATION token.
public static final int
LPAREN
LPAREN (left paren) token.
public static final int
RPAREN
RPAREN (right paren) token.
public static final int
GREATER_THAN
GREATER_THAN token.
public static final int
LESS_THAN
LESS_THAN token.
public static final int
PERCENT
PERCENT token.
public static final int
QUESTION
QUESTION mark token.
public static final int
AND
AND (ampersand) token.
public static final int
UNDERSCORE
UNDERSCPRE token.
protected static Hashtable
globalSymbolTable
Global symbol table for intermediate elements.
protected static Hashtable
lexerTables
Lexical rules tables.
protected Hashtable
currentLexer
Current elements of current lexing operation.
protected String
currentLexerName
Name of the current Lexer.
protected Token
currentMatch
Current matched token.
Constructors Summary
protected LexerCore()
Default constructor. Allocates a new hashtable and labels the Lexer as "charLexer".

        this.currentLexer = new Hashtable();
        this.currentLexerName = "charLexer";
    
public LexerCore(String lexerName)
Constructs a new lexer by name.

param
lexerName the name for the lexer

        selectLexer(lexerName);
    
public LexerCore(String lexerName, String buffer)
Initialize the lexer with a buffer.

param
lexerName the requested lexer
param
buffer initial buffer to process

        this(lexerName);
        this.buffer = buffer;
    
Methods Summary
public voidSPorHT()
Checks for space or horiizontal tab. The tokens are consumed if present. All parsing errors are ignored.(if any)

        try {
            while (lookAhead(0) == ' " || lookAhead(0) == '\t")
                consume(1);
        } catch (ParseException ex) {
            // Ignore
        }
    
protected voidaddKeyword(java.lang.String name, int value)
Adds a new keyword and value pair.

param
name the name of the keyword
param
value the content of the keyword


                   
     
        globalSymbolTable = new Hashtable();
        lexerTables = new Hashtable();
    
        // System.out.println("addKeyword " + name + " value = " + value);
        // new Exception().printStackTrace();
        Integer val = new Integer(value);
        currentLexer.put(name, val);
        if (! globalSymbolTable.containsKey(val))
            globalSymbolTable.put(val, name);
    
protected java.util.HashtableaddLexer(java.lang.String lexerName)
Adds a new Lexer. If the named lexer does not exist anew hashtable is allocated.

param
lexerName the lexer name
return
the current lexer Hashtable

        currentLexer = (Hashtable) lexerTables.get(lexerName);
        if (currentLexer == null) {
            currentLexer = new Hashtable();
            lexerTables.put(lexerName, currentLexer);
        }
        return currentLexer;
    
public java.lang.StringbyteStringNoComma()
Gets a token up to the next comma or end of line. The end of line or terminating comma are not consumed. If a parsing exception occurs, the consumed characters are returned.

return
the next token without embedded commas

        StringBuffer retval = new StringBuffer();
        try {
            char next;
            while ((next = lookAhead(0)) != '\0") {
                if (next == '\n" || next == ',") {
                    break;
                } else {
                    consume(1);
                    retval.append(next);
                }
            }
        } catch (ParseException ex) {
        }
        return retval.toString();
    
public java.lang.StringbyteStringNoSemicolon()
Gets a token up to the next semicolon or end of line. The end of line or terminating semicolon are not consumed. If a parsing exception occurs, the consumed characters are returned.

return
the next token without embedded semicolons

        StringBuffer retval = new StringBuffer();
        try {
            char next;
            while ((next = lookAhead(0)) != '\0") {
                if (next == '\n" || next == ';") {
                    break;
                } else {
                    consume(1);
                    retval.append(next);
                }
            }
        } catch (ParseException ex) {
            return retval.toString();
        }
        return retval.toString();
    
public static java.lang.StringcharAsString(char ch)
Converts a character to a string.

param
ch the character to enclose
return
a string containing the single character

        return new Character(ch).toString();
    
public java.lang.StringcharAsString(int nchars)
Lookahead in the inputBuffer for n chars and return as a string. Do not consume the input. In the event of a parsing error return the characters that could be consumed.

param
nchars the number of characters to look ahead
return
a string containing the designated characters


        StringBuffer retval = new StringBuffer();
        try {
            for (int i = 0; i < nchars; i++) {
                retval.append(lookAhead(i));
            }
            return retval.toString();
        } catch (ParseException ex) {
            return retval.toString();

        }
    
public java.lang.Stringcomment()
Gets a comment string. Consumes all characters between left and right parens. Back slashed escaped characters are preserved.

return
the comment string, both starting and ending parens are consumed.
exception
ParseException if any parsing errors occur, or if the comment is not properly closed

        StringBuffer retval = new StringBuffer();
        if (lookAhead(0) != '(")
            return null;
        consume(1);
        while (true) {
            char next = getNextChar();
            if (next == ')") {
                break;
            } else if (next == '\0") {
                throw new ParseException(this.buffer + " :unexpected EOL",
                        this.ptr);
            } else if (next == '\\") {
                retval.append(next);
                next = getNextChar();
                if (next == '\0")
                    throw new ParseException(this.buffer +
                            " : unexpected EOL", this.ptr);
                retval.append(next);
            } else {
                retval.append(next);
            }
        }
        return retval.toString();
    
public ParseExceptioncreateParseException()
Creates a parse exception.

return
an exception with the current buffer and offset in the exception contents

        return new ParseException(this.buffer, this.ptr);
    
public java.lang.StringgetBuffer()
Gets the buffer.

return
the parsing buffer

 return this.buffer; 
public java.lang.StringgetNextId()
Gets the next id.

return
textual ID of the next token

        return ttoken();
    
public TokengetNextToken()
Gets the next token.

return
the next token

        return this.currentMatch;

    
public intgetPtr()
Gets the read pointer.

return
offset in the buffer

 return this.ptr; 
public java.lang.StringgetRest()
Gets the rest of the string buffer.

return
the remaining text in the buffer, or null if the buffer has been consumed.

        if (ptr >= buffer.length())
            return null;
        else
            return buffer.substring(ptr);
    
public java.lang.StringgetString(char c)
Gets the sub-String until the requested character is encountered.

param
c the character to match
return
the string up til the separator caharacter
exception
ParseException if a parsing error occurs

        int savedPtr = ptr;
        StringBuffer retval = new StringBuffer();
        while (true) {
            char next = lookAhead(0);

            if (next == '\0") {
                ParseException exception = new ParseException
                        (this.buffer +
                        "unexpected EOL", this.ptr);
                ptr = savedPtr;
                throw exception;
            } else if (next == c) {
                consume(1);
                break;
            } else if (next == '\\") {
                consume(1);
                char nextchar = lookAhead(0);
                if (nextchar == '\0") {
                    ParseException exception =
                            new ParseException(this.buffer +
                            "unexpected EOL", this.ptr);
                    ptr = savedPtr;
                    throw exception;
                } else {
                    consume(1);
                    retval.append(nextchar);
                }
            } else {
                consume(1);
                retval.append(next);
            }
        }
        return retval.toString();
    
public java.lang.StringlookupToken(int value)
Looks up a requested token.

param
value the token to find
return
the value of the token

        if (value > START) {
            return (String) globalSymbolTable.get(new Integer(value));
        } else {
            Character ch = new Character((char)value);
            return ch.toString();
        }
    
public intmarkInputPosition()
Mark the position for backtracking.

return
the current pointer in the parsed content

        return ptr;
    
public Tokenmatch(int tok)
Match the given token or throw an exception, if no such token can be matched.

param
tok the token to be checked
return
the matched token
exception
ParseException if an error occurs during parsing

        if (Logging.REPORT_LEVEL <= Logging.INFORMATION) {
            Logging.report(Logging.INFORMATION, LogChannels.LC_JSR180,
                "match " + tok);
        }

        if (tok > START && tok < END) {
            if (tok == ID) {
                // Generic ID sought.
                if (!startsId())
                    throw new ParseException(buffer + "\nID expected", ptr);
                String id = getNextId();
                this.currentMatch = new Token();
                this.currentMatch.tokenValue = id;
                this.currentMatch.tokenType = ID;
            } else {
                String nexttok = getNextId();
                Integer cur =
                        (Integer) currentLexer.get(nexttok.toUpperCase());

                if (cur == null || cur.intValue() != tok)
                    throw new ParseException
                            (buffer + "\nUnexpected Token : "+
                            nexttok, ptr);
                this.currentMatch = new Token();
                this.currentMatch.tokenValue = nexttok;
                this.currentMatch.tokenType = tok;
            }
        } else if (tok > END) {
            // Character classes.
            char next = lookAhead(0);
            if (tok == DIGIT) {
                if (! isDigit(next))
                    throw new
                            ParseException(buffer + "\nExpecting DIGIT", ptr);
                this.currentMatch = new Token();
                this.currentMatch.tokenValue =
                        new StringBuffer().append(next).toString();
                this.currentMatch.tokenType = tok;
                consume(1);

            } else if (tok == ALPHA) {
                if (! isAlpha(next))
                    throw new ParseException
                            (buffer + "\nExpecting ALPHA", ptr);
                this.currentMatch = new Token();
                this.currentMatch.tokenValue =
                        new StringBuffer().append(next).toString();
                this.currentMatch.tokenType = tok;
                consume(1);

            }

        } else {
            // This is a direct character spec.
            Character ch = new Character((char)tok);
            char next = lookAhead(0);
            if (next == ch.charValue()) {
                this.currentMatch = new Token();
                this.currentMatch.tokenValue =
                        new StringBuffer().append(ch.charValue()).toString();
                this.currentMatch.tokenType = tok;
                consume(1);
            } else throw new
                    ParseException(buffer + "\nExpecting " +
                                   ch.charValue(), ptr);
        }
        return this.currentMatch;
    
public java.lang.Stringnumber()
Gets and consumes the next number. Only digits are included in the returned string.

return
the parsed number as a string
exception
ParseException if any parsing errors occur


        StringBuffer retval = new StringBuffer();
        if (! isDigit(lookAhead(0))) {
            throw new ParseException
                    (buffer + ": Unexpected token at " +lookAhead(0), ptr);
        }
        retval.append(lookAhead(0));
        consume(1);
        while (true) {
            char next = lookAhead(0);
            if (isDigit(next)) {
                retval.append(next);
                consume(1);
            } else
                break;
        }
        return retval.toString();
    
public java.lang.StringpeekNextId()
Peeks at the next id, but doesn't move the buffer pointer forward.

return
the textual ID of the next token

        int oldPtr = ptr;
        String retval = ttoken();
        savedPtr = ptr;
        ptr = oldPtr;
        return retval;
    
public TokenpeekNextToken()
Looks ahead for one token.

return
the next token
exception
ParseException if an error occurs during parsing

        return (Token) peekNextToken(1).elementAt(0);
    
public java.util.VectorpeekNextToken(int ntokens)
Peeks at the next token.

param
ntokens the number of tokens to look ahead
return
a list of next tokens
exception
ParseException if an error occurs during parsing

        int old = ptr;
        Vector retval = new Vector();
        for (int i = 0; i < ntokens; i++) {
            Token tok = new Token();
            if (startsId()) {
                String id = ttoken();
                tok.tokenValue = id;
                if (currentLexer.containsKey(id.toUpperCase())) {
                    Integer type = (Integer) currentLexer.get(id.toUpperCase());
                    tok.tokenType = type.intValue();
                } else tok.tokenType = ID;
            } else {
                char nextChar = getNextChar();
                tok.tokenValue =
                        new StringBuffer().append(nextChar).toString();
                if (isAlpha(nextChar)) {
                    tok.tokenType = ALPHA;
                } else if (isDigit(nextChar)) {
                    tok.tokenType = DIGIT;
                } else tok.tokenType = (int) nextChar;
            }
            retval.addElement(tok);
        }
        savedPtr = ptr;
        ptr = old;
        return retval;
    
public java.lang.StringquotedString()
Gets a quoted string. Read all the characters between double quotes into the next textual token. Preserve all back slash escaped characters.

return
the contents of the quoted string, both starting and ending double quote characters are consumed.
exception
ParseException if any parsing errors occur

        StringBuffer retval = new StringBuffer();
        if (lookAhead(0) != '\"")
            return null;
        consume(1);
        while (true) {
            char next = getNextChar();
            if (next == '\"")
                break;
            else if (next == '\\") {
                retval.append(next);
                next = getNextChar();
                retval.append(next);
            } else {
                retval.append(next);
            }
        }
        return retval.toString();
    
public voidrewindInputPosition(int position)
Rewinds the input pointer to the marked position.

param
position the desired parsing location

        this.ptr = position;
    
public abstract voidselectLexer(java.lang.String lexerName)
Selects a specific lexer by name.

param
lexerName the requested lexer

public booleanstartsId()
Checks for staring IDs.

return
true if next char is alphanumeric or begins with appropriate punctuation characters.

        try {
            char nextChar = lookAhead(0);
            return isValidChar(nextChar);
        } catch (ParseException ex) {
            return false;
        }
    
public java.lang.Stringttoken()
Gets the next textual token.

return
the next token as a string

        StringBuffer nextId = new StringBuffer();
        try {
            while (hasMoreChars()) {
                char nextChar = lookAhead(0);
                // println("nextChar = " + nextChar);
                if (isValidChar(nextChar)) {
                    consume(1);
                    nextId.append(nextChar);
                } else break;

            }
            return nextId.toString();
        } catch (ParseException ex) {
            return nextId.toString();
        }
    
public java.lang.StringttokenAllowSpace()
Gets the next textual token including embedded white space

return
the next text token as a string with embedded space and tab characters

        StringBuffer nextId = new StringBuffer();
        try {
            while (hasMoreChars()) {
                char nextChar = lookAhead(0);
                // println("nextChar = " + nextChar);
                if (isAlpha(nextChar) ||
                        isDigit(nextChar) ||
                        nextChar == '_" ||
                        nextChar == '+" ||
                        nextChar == '-" ||
                        nextChar == '!" ||
                        nextChar == '`" ||
                        nextChar == '\'" ||
                        nextChar == '~" ||
                        nextChar == '." ||
                        nextChar == ' " ||
                        nextChar == '\t" ||
                        nextChar == '*") {
                    nextId.append(nextChar);
                    consume(1);
                } else break;

            }
            return nextId.toString();
        } catch (ParseException ex) {
            return nextId.toString();
        }