FileDocCategorySizeDatePackage
LexerCore.javaAPI DocphoneME MR2 API (J2ME)23663Wed May 02 18:00:42 BST 2007gov.nist.core

LexerCore.java

/*
 * Portions Copyright  2000-2007 Sun Microsystems, Inc. All Rights
 * Reserved.  Use is subject to license terms.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version
 * 2 only, as published by the Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License version 2 for more details (a copy is
 * included at /legal/license.txt).
 * 
 * You should have received a copy of the GNU General Public License
 * version 2 along with this work; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA
 * 
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
 * Clara, CA 95054 or visit www.sun.com if you need additional
 * information or have any questions.
 */
package gov.nist.core;

import java.util.Hashtable;
import java.util.Vector;

import com.sun.midp.log.Logging;
import com.sun.midp.log.LogChannels;

/**
 * A lexical analyzer that is used by all parsers in this implementation.
 */
public abstract class LexerCore extends StringTokenizer {


    // IMPORTANT - All keyword matches should be between START and END
    /** START token. */
    public static final int START = 2048;
    /** END token. */
    public static final int END = START + 2048;
    // IMPORTANT -- This should be < END
    /** ID token. */
    public static final int ID = END - 1;
    // Individial token classes.
    /** WHITESPACE token. */
    public static final int WHITESPACE = END + 1;
    /** DIGIT (numeric) token. */
    public static final int DIGIT = END + 2;
    /** ALPHA (alphabetic) token. */
    public static final int ALPHA = END + 3;
    /** BACKSLASH (escaping) token. */
    public static final int BACKSLASH = (int) '\\';
    /** Single QUOTE token. */
    public static final int QUOTE = (int) '\'';
    /** AT sign token. */
    public static final int AT = (int) '@';
    /** SPACE token. */
    public static final int SP = (int) ' ';
    /** HT (Horizontal tab) token. */
    public static final int HT = (int) '\t';
    /** COLON token. */
    public static final int COLON = (int) ':';
    /** STAR (asterisk) token. */
    public static final int STAR = (int) '*';
    /** DOLLAR token. */
    public static final int DOLLAR = (int) '$';
    /** PLUS token. */
    public static final int PLUS = (int) '+';
    /** POUND token. */
    public static final int POUND = (int) '#';
    /** MINUS token. */
    public static final int MINUS = (int) '-';
    /** DOUBLEQUOTE token. */
    public static final int DOUBLEQUOTE = (int) '\"';
    /** TILDE token. */
    public static final int TILDE = (int) '~';
    /** BACK_QUOTE token. */
    public static final int BACK_QUOTE = (int) '`';
    /** NULL token. */
    public static final int NULL = (int) '\0';
    /** EQUALS (equals sign) token. */
    public static final int EQUALS = (int) '=';
    /** SEMICOLON token. */
    public static final int SEMICOLON = (int) ';';
    /** Forward SLASH token. */
    public static final int SLASH = (int) '/';
    /** L_SQUARE_BRACKET (left square bracket) token. */
    public static final int L_SQUARE_BRACKET = (int) '[';
    /** R_SQUARE_BRACKET (right square bracket) token. */
    public static final int R_SQUARE_BRACKET = (int) ']';
    /** R_CURLY (right curly bracket) token. */
    public static final int R_CURLY = (int) '}';
    /** L_CURLY (left curly bracket) token. */
    public static final int L_CURLY = (int) '{';
    /** HAT (carot) token. */
    public static final int HAT = (int) '^';
    /** Veritcal BAR token. */
    public static final int BAR = (int) '|';
    /** DOT (period) token. */
    public static final int DOT = (int) '.';
    /** EXCLAMATION token. */
    public static final int EXCLAMATION = (int) '!';
    /** LPAREN (left paren) token. */
    public static final int LPAREN = (int) '(';
    /** RPAREN (right paren) token. */
    public static final int RPAREN = (int) ')';
    /** GREATER_THAN token. */
    public static final int GREATER_THAN = (int) '>';
    /** LESS_THAN token. */
    public static final int LESS_THAN = (int) '<';
    /** PERCENT token. */
    public static final int PERCENT = (int) '%';
    /** QUESTION mark token. */
    public static final int QUESTION = (int) '?';
    /** AND (ampersand)  token. */
    public static final int AND = (int) '&';
    /** UNDERSCPRE token. */
    public static final int UNDERSCORE = (int) '_';

    /** Global symbol table for intermediate elements. */
    protected static Hashtable globalSymbolTable;
    /** Lexical rules tables. */
    protected static Hashtable lexerTables;
    /** Current elements of current lexing operation. */
    protected Hashtable currentLexer;
    /** Name of the  current Lexer. */
    protected String currentLexerName;
    /** Current matched token. */
    protected Token currentMatch;

    /**
     * Initializes the hash tables on first
     * loading of the class.
     */
    static {
        globalSymbolTable = new Hashtable();
        lexerTables = new Hashtable();
    }

    /**
     * Adds a new keyword and value pair.
     * @param name the name of the keyword
     * @param value the content of the keyword
     */
    protected void addKeyword(String name, int value) {
        // System.out.println("addKeyword " + name + " value = " + value);
        // new Exception().printStackTrace();
        Integer val = new Integer(value);
        currentLexer.put(name, val);
        if (! globalSymbolTable.containsKey(val))
            globalSymbolTable.put(val, name);
    }

    /**
     * Looks up a requested token.
     * @param value the token to find
     * @return the value of the token
     */
    public String lookupToken(int value) {
        if (value > START) {
            return (String) globalSymbolTable.get(new Integer(value));
        } else {
            Character ch = new Character((char)value);
            return ch.toString();
        }
    }

    /**
     * Adds a new Lexer. If the named lexer
     * does not exist anew hashtable is allocated.
     * @param lexerName the lexer name
     * @return the current lexer Hashtable
     */
    protected Hashtable addLexer(String lexerName) {
        currentLexer = (Hashtable) lexerTables.get(lexerName);
        if (currentLexer == null) {
            currentLexer = new Hashtable();
            lexerTables.put(lexerName, currentLexer);
        }
        return currentLexer;
    }


    /**
     * Selects a specific lexer by name.
     * @param lexerName the requested lexer
     */
    public abstract void selectLexer(String lexerName);

    /**
     * Default constructor.
     * Allocates a new hashtable and labels the Lexer
     * as "charLexer".
     */
    protected LexerCore() {
        this.currentLexer = new Hashtable();
        this.currentLexerName = "charLexer";
    }


    /**
     * Constructs a new lexer by name.
     * @param lexerName the name for the lexer
     */
    public LexerCore(String lexerName) {
        selectLexer(lexerName);
    }


    /**
     * Initialize the lexer with a buffer.
     * @param lexerName the requested lexer
     * @param buffer initial buffer to process
     */
    public LexerCore(String lexerName, String buffer) {
        this(lexerName);
        this.buffer = buffer;
    }

    /**
     * Peeks at the next id, but doesn't move the buffer pointer forward.
     * @return the textual ID of the next token
     */

    public String peekNextId() {
        int oldPtr = ptr;
        String retval = ttoken();
        savedPtr = ptr;
        ptr = oldPtr;
        return retval;
    }


    /**
     * Gets the next id.
     * @return textual ID of the next token
     */
    public String getNextId() {
        return ttoken();
    }

    // call this after you call match
    /**
     * Gets the next token.
     * @return the next token
     */
    public Token getNextToken() {
        return this.currentMatch;

    }

    /**
     * Looks ahead for one token.
     * @return the next token
     * @exception ParseException if an error occurs during parsing
     */
    public Token peekNextToken() throws ParseException {
        return (Token) peekNextToken(1).elementAt(0);
    }


    /**
     * Peeks at the next token.
     * @param ntokens the number of tokens to look ahead
     * @return a list of next tokens
     * @exception ParseException if an error occurs during parsing
     */
    public Vector peekNextToken(int ntokens) throws ParseException {
        int old = ptr;
        Vector retval = new Vector();
        for (int i = 0; i < ntokens; i++) {
            Token tok = new Token();
            if (startsId()) {
                String id = ttoken();
                tok.tokenValue = id;
                if (currentLexer.containsKey(id.toUpperCase())) {
                    Integer type = (Integer) currentLexer.get(id.toUpperCase());
                    tok.tokenType = type.intValue();
                } else tok.tokenType = ID;
            } else {
                char nextChar = getNextChar();
                tok.tokenValue =
                        new StringBuffer().append(nextChar).toString();
                if (isAlpha(nextChar)) {
                    tok.tokenType = ALPHA;
                } else if (isDigit(nextChar)) {
                    tok.tokenType = DIGIT;
                } else tok.tokenType = (int) nextChar;
            }
            retval.addElement(tok);
        }
        savedPtr = ptr;
        ptr = old;
        return retval;
    }

    /**
     * Match the given token or throw an exception, if no such token
     * can be matched.
     * @param tok the token to be checked
     * @return the matched token
     * @exception ParseException if an error occurs during parsing
     */
    public Token match(int tok) throws ParseException {
        if (Logging.REPORT_LEVEL <= Logging.INFORMATION) {
            Logging.report(Logging.INFORMATION, LogChannels.LC_JSR180,
                "match " + tok);
        }

        if (tok > START && tok < END) {
            if (tok == ID) {
                // Generic ID sought.
                if (!startsId())
                    throw new ParseException(buffer + "\nID expected", ptr);
                String id = getNextId();
                this.currentMatch = new Token();
                this.currentMatch.tokenValue = id;
                this.currentMatch.tokenType = ID;
            } else {
                String nexttok = getNextId();
                Integer cur =
                        (Integer) currentLexer.get(nexttok.toUpperCase());

                if (cur == null || cur.intValue() != tok)
                    throw new ParseException
                            (buffer + "\nUnexpected Token : "+
                            nexttok, ptr);
                this.currentMatch = new Token();
                this.currentMatch.tokenValue = nexttok;
                this.currentMatch.tokenType = tok;
            }
        } else if (tok > END) {
            // Character classes.
            char next = lookAhead(0);
            if (tok == DIGIT) {
                if (! isDigit(next))
                    throw new
                            ParseException(buffer + "\nExpecting DIGIT", ptr);
                this.currentMatch = new Token();
                this.currentMatch.tokenValue =
                        new StringBuffer().append(next).toString();
                this.currentMatch.tokenType = tok;
                consume(1);

            } else if (tok == ALPHA) {
                if (! isAlpha(next))
                    throw new ParseException
                            (buffer + "\nExpecting ALPHA", ptr);
                this.currentMatch = new Token();
                this.currentMatch.tokenValue =
                        new StringBuffer().append(next).toString();
                this.currentMatch.tokenType = tok;
                consume(1);

            }

        } else {
            // This is a direct character spec.
            Character ch = new Character((char)tok);
            char next = lookAhead(0);
            if (next == ch.charValue()) {
                this.currentMatch = new Token();
                this.currentMatch.tokenValue =
                        new StringBuffer().append(ch.charValue()).toString();
                this.currentMatch.tokenType = tok;
                consume(1);
            } else throw new
                    ParseException(buffer + "\nExpecting " +
                                   ch.charValue(), ptr);
        }
        return this.currentMatch;
    }

    /**
     * Checks for space or horiizontal tab.
     * The tokens are consumed if present.
     * All parsing errors are ignored.(if any)
     */
    public void SPorHT() {
        try {
            while (lookAhead(0) == ' ' || lookAhead(0) == '\t')
                consume(1);
        } catch (ParseException ex) {
            // Ignore
        }
    }

    /**
     * Checks for staring IDs.
     * @return true if next char is alphanumeric or
     * begins with appropriate punctuation characters.
     */
    public boolean startsId() {
        try {
            char nextChar = lookAhead(0);
            return isValidChar(nextChar);
        } catch (ParseException ex) {
            return false;
        }
    }

    /**
     * Gets the next textual token.
     * @return the next token as a string
     */
    public String ttoken() {
        StringBuffer nextId = new StringBuffer();
        try {
            while (hasMoreChars()) {
                char nextChar = lookAhead(0);
                // println("nextChar = " + nextChar);
                if (isValidChar(nextChar)) {
                    consume(1);
                    nextId.append(nextChar);
                } else break;

            }
            return nextId.toString();
        } catch (ParseException ex) {
            return nextId.toString();
        }
    }

    /**
     * Gets the next textual token including embedded
     * white space
     * @return the next text token as a string with embedded space and
     * tab characters
     */
    public String ttokenAllowSpace() {
        StringBuffer nextId = new StringBuffer();
        try {
            while (hasMoreChars()) {
                char nextChar = lookAhead(0);
                // println("nextChar = " + nextChar);
                if (isAlpha(nextChar) ||
                        isDigit(nextChar) ||
                        nextChar == '_' ||
                        nextChar == '+' ||
                        nextChar == '-' ||
                        nextChar == '!' ||
                        nextChar == '`' ||
                        nextChar == '\'' ||
                        nextChar == '~' ||
                        nextChar == '.' ||
                        nextChar == ' ' ||
                        nextChar == '\t' ||
                        nextChar == '*') {
                    nextId.append(nextChar);
                    consume(1);
                } else break;

            }
            return nextId.toString();
        } catch (ParseException ex) {
            return nextId.toString();
        }
    }


    // Assume the cursor is at a quote.
    /**
     * Gets a quoted string.
     * Read all the characters between double
     * quotes into the next textual token.
     * Preserve all back slash escaped characters.
     * @return the contents of the quoted string, both
     * starting and ending double quote characters
     * are consumed.
     * @exception ParseException if any parsing errors occur
     */
    public String quotedString() throws ParseException {
        StringBuffer retval = new StringBuffer();
        if (lookAhead(0) != '\"')
            return null;
        consume(1);
        while (true) {
            char next = getNextChar();
            if (next == '\"')
                break;
            else if (next == '\\') {
                retval.append(next);
                next = getNextChar();
                retval.append(next);
            } else {
                retval.append(next);
            }
        }
        return retval.toString();
    }

    // Assume the cursor is at a "("
    /**
     * Gets a comment string.
     * Consumes all characters between left and right
     * parens. Back slashed escaped characters are preserved.
     * @return the comment string, both starting and ending parens are
     * consumed.
     * @exception  ParseException if any parsing errors occur, or if the
     * comment is not properly closed
     */
    public String comment() throws ParseException {
        StringBuffer retval = new StringBuffer();
        if (lookAhead(0) != '(')
            return null;
        consume(1);
        while (true) {
            char next = getNextChar();
            if (next == ')') {
                break;
            } else if (next == '\0') {
                throw new ParseException(this.buffer + " :unexpected EOL",
                        this.ptr);
            } else if (next == '\\') {
                retval.append(next);
                next = getNextChar();
                if (next == '\0')
                    throw new ParseException(this.buffer +
                            " : unexpected EOL", this.ptr);
                retval.append(next);
            } else {
                retval.append(next);
            }
        }
        return retval.toString();
    }


    /**
     * Gets a token up to the next semicolon or end of line.
     * The end of line or terminating semicolon are not
     * consumed. If a parsing exception occurs, the consumed
     * characters are returned.
     * @return the next token without embedded semicolons
     */
    public String byteStringNoSemicolon() {
        StringBuffer retval = new StringBuffer();
        try {
            char next;
            while ((next = lookAhead(0)) != '\0') {
                if (next == '\n' || next == ';') {
                    break;
                } else {
                    consume(1);
                    retval.append(next);
                }
            }
        } catch (ParseException ex) {
            return retval.toString();
        }
        return retval.toString();
    }

    /**
     * Gets a token up to the next comma or end of line.
     * The end of line or terminating comma are not
     * consumed. If a parsing exception occurs, the consumed
     * characters are returned.
     * @return the next token without embedded commas
     */
    public String byteStringNoComma() {
        StringBuffer retval = new StringBuffer();
        try {
            char next;
            while ((next = lookAhead(0)) != '\0') {
                if (next == '\n' || next == ',') {
                    break;
                } else {
                    consume(1);
                    retval.append(next);
                }
            }
        } catch (ParseException ex) {
        }
        return retval.toString();
    }


    /**
     * Converts a character to a string.
     * @param ch the character to enclose
     * @return a string containing the single character
     */
    public static String charAsString(char ch) {
        return new Character(ch).toString();
    }

    /**
     * Lookahead in the inputBuffer for n chars and return as a string.
     * Do not consume the input. In the event of a parsing
     * error return the characters that could be consumed.
     * @param nchars the number of characters to look ahead
     * @return a string containing the designated characters
     */
    public String charAsString(int nchars) {

        StringBuffer retval = new StringBuffer();
        try {
            for (int i = 0; i < nchars; i++) {
                retval.append(lookAhead(i));
            }
            return retval.toString();
        } catch (ParseException ex) {
            return retval.toString();

        }
    }

    /**
     * Gets and consumes the next number.
     * Only digits are included in the returned string.
     * @return the parsed number as a string
     * @exception ParseException if any parsing errors occur
     */
    public String number() throws ParseException {

        StringBuffer retval = new StringBuffer();
        if (! isDigit(lookAhead(0))) {
            throw new ParseException
                    (buffer + ": Unexpected token at " +lookAhead(0), ptr);
        }
        retval.append(lookAhead(0));
        consume(1);
        while (true) {
            char next = lookAhead(0);
            if (isDigit(next)) {
                retval.append(next);
                consume(1);
            } else
                break;
        }
        return retval.toString();
    }

    /**
     * Mark the position for backtracking.
     * @return the current pointer in the parsed content
     */
    public int markInputPosition() {
        return ptr;
    }

    /**
     * Rewinds the input pointer to the marked position.
     * @param position the desired parsing location
     */
    public void rewindInputPosition(int position) {
        this.ptr = position;
    }

    /**
     * Gets the rest of the string buffer.
     * @return the remaining text in the buffer, or null if the
     * buffer has been consumed.
     */
    public String getRest() {
        if (ptr >= buffer.length())
            return null;
        else
            return buffer.substring(ptr);
    }

    /**
     * Gets the sub-String until the requested character is
     * encountered.
     * @param  c the character to match
     * @return the string up til the separator caharacter
     * @exception ParseException if a parsing error occurs
     */
    public String getString(char c) throws ParseException {
        int savedPtr = ptr;
        StringBuffer retval = new StringBuffer();
        while (true) {
            char next = lookAhead(0);

            if (next == '\0') {
                ParseException exception = new ParseException
                        (this.buffer +
                        "unexpected EOL", this.ptr);
                ptr = savedPtr;
                throw exception;
            } else if (next == c) {
                consume(1);
                break;
            } else if (next == '\\') {
                consume(1);
                char nextchar = lookAhead(0);
                if (nextchar == '\0') {
                    ParseException exception =
                            new ParseException(this.buffer +
                            "unexpected EOL", this.ptr);
                    ptr = savedPtr;
                    throw exception;
                } else {
                    consume(1);
                    retval.append(nextchar);
                }
            } else {
                consume(1);
                retval.append(next);
            }
        }
        return retval.toString();
    }






    /**
     * Gets the read pointer.
     * @return offset in the buffer
     */
    public int getPtr() { return this.ptr; }

    /**
     * Gets the buffer.
     * @return the parsing buffer
     */
    public String getBuffer() { return this.buffer; }

    /**
     * Creates a parse exception.
     * @return an exception with the current buffer and offset
     * in the exception contents
     */
    public ParseException createParseException() {
        return new ParseException(this.buffer, this.ptr);
    }
}