/*
* Portions Copyright 2000-2007 Sun Microsystems, Inc. All Rights
* Reserved. Use is subject to license terms.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 only, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is
* included at /legal/license.txt).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
* Clara, CA 95054 or visit www.sun.com if you need additional
* information or have any questions.
*/
package gov.nist.core;
import java.util.Hashtable;
import java.util.Vector;
import com.sun.midp.log.Logging;
import com.sun.midp.log.LogChannels;
/**
* A lexical analyzer that is used by all parsers in this implementation.
*/
public abstract class LexerCore extends StringTokenizer {
// IMPORTANT - All keyword matches should be between START and END
/** START token. */
public static final int START = 2048;
/** END token. */
public static final int END = START + 2048;
// IMPORTANT -- This should be < END
/** ID token. */
public static final int ID = END - 1;
// Individial token classes.
/** WHITESPACE token. */
public static final int WHITESPACE = END + 1;
/** DIGIT (numeric) token. */
public static final int DIGIT = END + 2;
/** ALPHA (alphabetic) token. */
public static final int ALPHA = END + 3;
/** BACKSLASH (escaping) token. */
public static final int BACKSLASH = (int) '\\';
/** Single QUOTE token. */
public static final int QUOTE = (int) '\'';
/** AT sign token. */
public static final int AT = (int) '@';
/** SPACE token. */
public static final int SP = (int) ' ';
/** HT (Horizontal tab) token. */
public static final int HT = (int) '\t';
/** COLON token. */
public static final int COLON = (int) ':';
/** STAR (asterisk) token. */
public static final int STAR = (int) '*';
/** DOLLAR token. */
public static final int DOLLAR = (int) '$';
/** PLUS token. */
public static final int PLUS = (int) '+';
/** POUND token. */
public static final int POUND = (int) '#';
/** MINUS token. */
public static final int MINUS = (int) '-';
/** DOUBLEQUOTE token. */
public static final int DOUBLEQUOTE = (int) '\"';
/** TILDE token. */
public static final int TILDE = (int) '~';
/** BACK_QUOTE token. */
public static final int BACK_QUOTE = (int) '`';
/** NULL token. */
public static final int NULL = (int) '\0';
/** EQUALS (equals sign) token. */
public static final int EQUALS = (int) '=';
/** SEMICOLON token. */
public static final int SEMICOLON = (int) ';';
/** Forward SLASH token. */
public static final int SLASH = (int) '/';
/** L_SQUARE_BRACKET (left square bracket) token. */
public static final int L_SQUARE_BRACKET = (int) '[';
/** R_SQUARE_BRACKET (right square bracket) token. */
public static final int R_SQUARE_BRACKET = (int) ']';
/** R_CURLY (right curly bracket) token. */
public static final int R_CURLY = (int) '}';
/** L_CURLY (left curly bracket) token. */
public static final int L_CURLY = (int) '{';
/** HAT (carot) token. */
public static final int HAT = (int) '^';
/** Veritcal BAR token. */
public static final int BAR = (int) '|';
/** DOT (period) token. */
public static final int DOT = (int) '.';
/** EXCLAMATION token. */
public static final int EXCLAMATION = (int) '!';
/** LPAREN (left paren) token. */
public static final int LPAREN = (int) '(';
/** RPAREN (right paren) token. */
public static final int RPAREN = (int) ')';
/** GREATER_THAN token. */
public static final int GREATER_THAN = (int) '>';
/** LESS_THAN token. */
public static final int LESS_THAN = (int) '<';
/** PERCENT token. */
public static final int PERCENT = (int) '%';
/** QUESTION mark token. */
public static final int QUESTION = (int) '?';
/** AND (ampersand) token. */
public static final int AND = (int) '&';
/** UNDERSCPRE token. */
public static final int UNDERSCORE = (int) '_';
/** Global symbol table for intermediate elements. */
protected static Hashtable globalSymbolTable;
/** Lexical rules tables. */
protected static Hashtable lexerTables;
/** Current elements of current lexing operation. */
protected Hashtable currentLexer;
/** Name of the current Lexer. */
protected String currentLexerName;
/** Current matched token. */
protected Token currentMatch;
/**
* Initializes the hash tables on first
* loading of the class.
*/
static {
globalSymbolTable = new Hashtable();
lexerTables = new Hashtable();
}
/**
* Adds a new keyword and value pair.
* @param name the name of the keyword
* @param value the content of the keyword
*/
protected void addKeyword(String name, int value) {
// System.out.println("addKeyword " + name + " value = " + value);
// new Exception().printStackTrace();
Integer val = new Integer(value);
currentLexer.put(name, val);
if (! globalSymbolTable.containsKey(val))
globalSymbolTable.put(val, name);
}
/**
* Looks up a requested token.
* @param value the token to find
* @return the value of the token
*/
public String lookupToken(int value) {
if (value > START) {
return (String) globalSymbolTable.get(new Integer(value));
} else {
Character ch = new Character((char)value);
return ch.toString();
}
}
/**
* Adds a new Lexer. If the named lexer
* does not exist anew hashtable is allocated.
* @param lexerName the lexer name
* @return the current lexer Hashtable
*/
protected Hashtable addLexer(String lexerName) {
currentLexer = (Hashtable) lexerTables.get(lexerName);
if (currentLexer == null) {
currentLexer = new Hashtable();
lexerTables.put(lexerName, currentLexer);
}
return currentLexer;
}
/**
* Selects a specific lexer by name.
* @param lexerName the requested lexer
*/
public abstract void selectLexer(String lexerName);
/**
* Default constructor.
* Allocates a new hashtable and labels the Lexer
* as "charLexer".
*/
protected LexerCore() {
this.currentLexer = new Hashtable();
this.currentLexerName = "charLexer";
}
/**
* Constructs a new lexer by name.
* @param lexerName the name for the lexer
*/
public LexerCore(String lexerName) {
selectLexer(lexerName);
}
/**
* Initialize the lexer with a buffer.
* @param lexerName the requested lexer
* @param buffer initial buffer to process
*/
public LexerCore(String lexerName, String buffer) {
this(lexerName);
this.buffer = buffer;
}
/**
* Peeks at the next id, but doesn't move the buffer pointer forward.
* @return the textual ID of the next token
*/
public String peekNextId() {
int oldPtr = ptr;
String retval = ttoken();
savedPtr = ptr;
ptr = oldPtr;
return retval;
}
/**
* Gets the next id.
* @return textual ID of the next token
*/
public String getNextId() {
return ttoken();
}
// call this after you call match
/**
* Gets the next token.
* @return the next token
*/
public Token getNextToken() {
return this.currentMatch;
}
/**
* Looks ahead for one token.
* @return the next token
* @exception ParseException if an error occurs during parsing
*/
public Token peekNextToken() throws ParseException {
return (Token) peekNextToken(1).elementAt(0);
}
/**
* Peeks at the next token.
* @param ntokens the number of tokens to look ahead
* @return a list of next tokens
* @exception ParseException if an error occurs during parsing
*/
public Vector peekNextToken(int ntokens) throws ParseException {
int old = ptr;
Vector retval = new Vector();
for (int i = 0; i < ntokens; i++) {
Token tok = new Token();
if (startsId()) {
String id = ttoken();
tok.tokenValue = id;
if (currentLexer.containsKey(id.toUpperCase())) {
Integer type = (Integer) currentLexer.get(id.toUpperCase());
tok.tokenType = type.intValue();
} else tok.tokenType = ID;
} else {
char nextChar = getNextChar();
tok.tokenValue =
new StringBuffer().append(nextChar).toString();
if (isAlpha(nextChar)) {
tok.tokenType = ALPHA;
} else if (isDigit(nextChar)) {
tok.tokenType = DIGIT;
} else tok.tokenType = (int) nextChar;
}
retval.addElement(tok);
}
savedPtr = ptr;
ptr = old;
return retval;
}
/**
* Match the given token or throw an exception, if no such token
* can be matched.
* @param tok the token to be checked
* @return the matched token
* @exception ParseException if an error occurs during parsing
*/
public Token match(int tok) throws ParseException {
if (Logging.REPORT_LEVEL <= Logging.INFORMATION) {
Logging.report(Logging.INFORMATION, LogChannels.LC_JSR180,
"match " + tok);
}
if (tok > START && tok < END) {
if (tok == ID) {
// Generic ID sought.
if (!startsId())
throw new ParseException(buffer + "\nID expected", ptr);
String id = getNextId();
this.currentMatch = new Token();
this.currentMatch.tokenValue = id;
this.currentMatch.tokenType = ID;
} else {
String nexttok = getNextId();
Integer cur =
(Integer) currentLexer.get(nexttok.toUpperCase());
if (cur == null || cur.intValue() != tok)
throw new ParseException
(buffer + "\nUnexpected Token : "+
nexttok, ptr);
this.currentMatch = new Token();
this.currentMatch.tokenValue = nexttok;
this.currentMatch.tokenType = tok;
}
} else if (tok > END) {
// Character classes.
char next = lookAhead(0);
if (tok == DIGIT) {
if (! isDigit(next))
throw new
ParseException(buffer + "\nExpecting DIGIT", ptr);
this.currentMatch = new Token();
this.currentMatch.tokenValue =
new StringBuffer().append(next).toString();
this.currentMatch.tokenType = tok;
consume(1);
} else if (tok == ALPHA) {
if (! isAlpha(next))
throw new ParseException
(buffer + "\nExpecting ALPHA", ptr);
this.currentMatch = new Token();
this.currentMatch.tokenValue =
new StringBuffer().append(next).toString();
this.currentMatch.tokenType = tok;
consume(1);
}
} else {
// This is a direct character spec.
Character ch = new Character((char)tok);
char next = lookAhead(0);
if (next == ch.charValue()) {
this.currentMatch = new Token();
this.currentMatch.tokenValue =
new StringBuffer().append(ch.charValue()).toString();
this.currentMatch.tokenType = tok;
consume(1);
} else throw new
ParseException(buffer + "\nExpecting " +
ch.charValue(), ptr);
}
return this.currentMatch;
}
/**
* Checks for space or horiizontal tab.
* The tokens are consumed if present.
* All parsing errors are ignored.(if any)
*/
public void SPorHT() {
try {
while (lookAhead(0) == ' ' || lookAhead(0) == '\t')
consume(1);
} catch (ParseException ex) {
// Ignore
}
}
/**
* Checks for staring IDs.
* @return true if next char is alphanumeric or
* begins with appropriate punctuation characters.
*/
public boolean startsId() {
try {
char nextChar = lookAhead(0);
return isValidChar(nextChar);
} catch (ParseException ex) {
return false;
}
}
/**
* Gets the next textual token.
* @return the next token as a string
*/
public String ttoken() {
StringBuffer nextId = new StringBuffer();
try {
while (hasMoreChars()) {
char nextChar = lookAhead(0);
// println("nextChar = " + nextChar);
if (isValidChar(nextChar)) {
consume(1);
nextId.append(nextChar);
} else break;
}
return nextId.toString();
} catch (ParseException ex) {
return nextId.toString();
}
}
/**
* Gets the next textual token including embedded
* white space
* @return the next text token as a string with embedded space and
* tab characters
*/
public String ttokenAllowSpace() {
StringBuffer nextId = new StringBuffer();
try {
while (hasMoreChars()) {
char nextChar = lookAhead(0);
// println("nextChar = " + nextChar);
if (isAlpha(nextChar) ||
isDigit(nextChar) ||
nextChar == '_' ||
nextChar == '+' ||
nextChar == '-' ||
nextChar == '!' ||
nextChar == '`' ||
nextChar == '\'' ||
nextChar == '~' ||
nextChar == '.' ||
nextChar == ' ' ||
nextChar == '\t' ||
nextChar == '*') {
nextId.append(nextChar);
consume(1);
} else break;
}
return nextId.toString();
} catch (ParseException ex) {
return nextId.toString();
}
}
// Assume the cursor is at a quote.
/**
* Gets a quoted string.
* Read all the characters between double
* quotes into the next textual token.
* Preserve all back slash escaped characters.
* @return the contents of the quoted string, both
* starting and ending double quote characters
* are consumed.
* @exception ParseException if any parsing errors occur
*/
public String quotedString() throws ParseException {
StringBuffer retval = new StringBuffer();
if (lookAhead(0) != '\"')
return null;
consume(1);
while (true) {
char next = getNextChar();
if (next == '\"')
break;
else if (next == '\\') {
retval.append(next);
next = getNextChar();
retval.append(next);
} else {
retval.append(next);
}
}
return retval.toString();
}
// Assume the cursor is at a "("
/**
* Gets a comment string.
* Consumes all characters between left and right
* parens. Back slashed escaped characters are preserved.
* @return the comment string, both starting and ending parens are
* consumed.
* @exception ParseException if any parsing errors occur, or if the
* comment is not properly closed
*/
public String comment() throws ParseException {
StringBuffer retval = new StringBuffer();
if (lookAhead(0) != '(')
return null;
consume(1);
while (true) {
char next = getNextChar();
if (next == ')') {
break;
} else if (next == '\0') {
throw new ParseException(this.buffer + " :unexpected EOL",
this.ptr);
} else if (next == '\\') {
retval.append(next);
next = getNextChar();
if (next == '\0')
throw new ParseException(this.buffer +
" : unexpected EOL", this.ptr);
retval.append(next);
} else {
retval.append(next);
}
}
return retval.toString();
}
/**
* Gets a token up to the next semicolon or end of line.
* The end of line or terminating semicolon are not
* consumed. If a parsing exception occurs, the consumed
* characters are returned.
* @return the next token without embedded semicolons
*/
public String byteStringNoSemicolon() {
StringBuffer retval = new StringBuffer();
try {
char next;
while ((next = lookAhead(0)) != '\0') {
if (next == '\n' || next == ';') {
break;
} else {
consume(1);
retval.append(next);
}
}
} catch (ParseException ex) {
return retval.toString();
}
return retval.toString();
}
/**
* Gets a token up to the next comma or end of line.
* The end of line or terminating comma are not
* consumed. If a parsing exception occurs, the consumed
* characters are returned.
* @return the next token without embedded commas
*/
public String byteStringNoComma() {
StringBuffer retval = new StringBuffer();
try {
char next;
while ((next = lookAhead(0)) != '\0') {
if (next == '\n' || next == ',') {
break;
} else {
consume(1);
retval.append(next);
}
}
} catch (ParseException ex) {
}
return retval.toString();
}
/**
* Converts a character to a string.
* @param ch the character to enclose
* @return a string containing the single character
*/
public static String charAsString(char ch) {
return new Character(ch).toString();
}
/**
* Lookahead in the inputBuffer for n chars and return as a string.
* Do not consume the input. In the event of a parsing
* error return the characters that could be consumed.
* @param nchars the number of characters to look ahead
* @return a string containing the designated characters
*/
public String charAsString(int nchars) {
StringBuffer retval = new StringBuffer();
try {
for (int i = 0; i < nchars; i++) {
retval.append(lookAhead(i));
}
return retval.toString();
} catch (ParseException ex) {
return retval.toString();
}
}
/**
* Gets and consumes the next number.
* Only digits are included in the returned string.
* @return the parsed number as a string
* @exception ParseException if any parsing errors occur
*/
public String number() throws ParseException {
StringBuffer retval = new StringBuffer();
if (! isDigit(lookAhead(0))) {
throw new ParseException
(buffer + ": Unexpected token at " +lookAhead(0), ptr);
}
retval.append(lookAhead(0));
consume(1);
while (true) {
char next = lookAhead(0);
if (isDigit(next)) {
retval.append(next);
consume(1);
} else
break;
}
return retval.toString();
}
/**
* Mark the position for backtracking.
* @return the current pointer in the parsed content
*/
public int markInputPosition() {
return ptr;
}
/**
* Rewinds the input pointer to the marked position.
* @param position the desired parsing location
*/
public void rewindInputPosition(int position) {
this.ptr = position;
}
/**
* Gets the rest of the string buffer.
* @return the remaining text in the buffer, or null if the
* buffer has been consumed.
*/
public String getRest() {
if (ptr >= buffer.length())
return null;
else
return buffer.substring(ptr);
}
/**
* Gets the sub-String until the requested character is
* encountered.
* @param c the character to match
* @return the string up til the separator caharacter
* @exception ParseException if a parsing error occurs
*/
public String getString(char c) throws ParseException {
int savedPtr = ptr;
StringBuffer retval = new StringBuffer();
while (true) {
char next = lookAhead(0);
if (next == '\0') {
ParseException exception = new ParseException
(this.buffer +
"unexpected EOL", this.ptr);
ptr = savedPtr;
throw exception;
} else if (next == c) {
consume(1);
break;
} else if (next == '\\') {
consume(1);
char nextchar = lookAhead(0);
if (nextchar == '\0') {
ParseException exception =
new ParseException(this.buffer +
"unexpected EOL", this.ptr);
ptr = savedPtr;
throw exception;
} else {
consume(1);
retval.append(nextchar);
}
} else {
consume(1);
retval.append(next);
}
}
return retval.toString();
}
/**
* Gets the read pointer.
* @return offset in the buffer
*/
public int getPtr() { return this.ptr; }
/**
* Gets the buffer.
* @return the parsing buffer
*/
public String getBuffer() { return this.buffer; }
/**
* Creates a parse exception.
* @return an exception with the current buffer and offset
* in the exception contents
*/
public ParseException createParseException() {
return new ParseException(this.buffer, this.ptr);
}
}
|