Methods Summary |
---|
private void | beginNewToken()
ensureChars();
if (!eof) {
tokenStart = p;
tokenColumn = column;
tokenLine = line;
}
|
protected abstract void | createBuffer(int bufferSize)Create the {@link #text} buffer to use for parsing. This method may
put text in the buffer, but it is not required to. In either case, it
should set {@link #numChars} appropriately. This method will be called
once, before tokenizing begins.
|
private void | ensureChars()
if (text == null) {
createBuffer(maximumTokenLength); // create text[], set numChars
p = tokenStart = tokenEnd = 0; // initialize other state
if (trackPosition) line = column = 1;
}
if (!eof && p >= numChars) // Fill the text[] buffer if needed
eof = !fillBuffer();
// Make sure our class invariants hold true before we start a token
assert text != null && 0 <= tokenStart && tokenStart <= tokenEnd &&
tokenEnd <= p && (p < numChars || (p == numChars && eof)) &&
numChars <= text.length;
|
protected abstract boolean | fillBuffer()Fill or refill the {@link #text} buffer and adjust related fields.
This method will be called when the tokenizer needs more characters to
tokenize. Concrete subclasses must implement this method to put
characters into the @{link #text} buffer, blocking if necessary to wait
for characters to become available. This method may make room in the
buffer by shifting the contents down to remove any characters before
tokenStart. It must preserve any characters after {@link #tokenStart}
and before {@link #numChars}, however. After such a shift, it must
adjust {@link #tokenStart}, {@link #tokenEnd} and {@link #p}
appropriately. After the optional shift, the method should add as many
new characters as possible to {@link #text} (and always at least 1) and
adjust {@link #numChars} appropriately.
|
public Tokenizer | keywords(java.lang.String[] keywords)
if (keywords != null) {
keywordMap = new HashMap(keywords.length);
for(int i = 0; i < keywords.length; i++)
keywordMap.put(keywords[i], new Integer(i));
}
else keywordMap = null;
return this;
|
public Tokenizer | maximumTokenLength(int size)
if (size < 1) throw new IllegalArgumentException();
if (text != null) throw new IllegalStateException();
maximumTokenLength = size;
return this;
|
public int | next()
int quoteindex;
beginNewToken();
if (eof) return tokenType = EOF;
char c = text[p];
if ((skipSpaces||tokenizeSpaces) && Character.isWhitespace(c)) {
tokenType = SPACE;
do {
if (trackPosition) updatePosition(text[p]);
p++;
if (p >= numChars) eof = !fillBuffer();
} while(!eof && Character.isWhitespace(text[p]));
// If we don't return space tokens then recursively call
// this method to find another token. Note that the next character
// is not space, so we will not get into infinite recursion
if (skipSpaces) return next();
tokenEnd = p;
}
else if (tokenizeNumbers && Character.isDigit(c)) {
tokenType = NUMBER;
do {
if (trackPosition) column++;
p++;
if (p >= numChars) eof = !fillBuffer();
} while(!eof && Character.isDigit(text[p]));
tokenEnd = p;
}
else if (tokenizeWords &&
(wordRecognizer!=null
?wordRecognizer.isWordStart(c)
:Character.isJavaIdentifierStart(c))) {
tokenType = WORD;
do {
if (trackPosition) column++;
p++;
if (p >= numChars) eof = !fillBuffer();
} while(!eof &&
(wordRecognizer!=null
?wordRecognizer.isWordPart(text[p], c)
:Character.isJavaIdentifierPart(text[p])));
if (keywordMap != null) {
String ident = new String(text,tokenStart,p-tokenStart);
Integer index = (Integer) keywordMap.get(ident);
if (index != null) {
tokenType = KEYWORD;
tokenKeyword = index.intValue();
}
}
tokenEnd = p;
}
else if (testquotes && (quoteindex = openquotes.indexOf(c)) != -1) {
// Notes: we do not recognize any escape characters.
// We do not include the opening or closing quote.
// We do not report an error on EOF or OVERFLOW.
if (trackPosition) column++;
p++;
// Scan until we find a matching quote, but do not include
// the opening or closing quote. Set the token type to the
// opening delimiter
char closequote = closequotes.charAt(quoteindex);
scan(closequote, false, false, true);
tokenType = c;
// the call to scan set tokenEnd, so we don't have to
}
else {
// Otherwise, the character itself is the token
if (trackPosition) updatePosition(text[p]);
tokenType = text[p];
p++;
tokenEnd = p;
}
// Check the invariants before returning
assert text != null && 0 <= tokenStart && tokenStart <= tokenEnd &&
tokenEnd <= p && p <= numChars && numChars <= text.length;
return tokenType;
|
public int | nextChar()
beginNewToken();
if (eof) return tokenType = EOF;
tokenType = text[p];
if (trackPosition) updatePosition(text[p]);
tokenEnd = ++p;
// Check the invariants before returning
assert text != null && 0 <= tokenStart && tokenStart <= tokenEnd &&
tokenEnd <= p && p <= numChars && numChars <= text.length;
return tokenType;
|
public Tokenizer | quotes(java.lang.String openquotes, java.lang.String closequotes)
if (openquotes == null || closequotes == null)
throw new NullPointerException("arguments must be non-null");
if (openquotes.length() != closequotes.length())
throw new IllegalArgumentException("argument lengths differ");
this.openquotes = openquotes;
this.closequotes = closequotes;
this.testquotes = openquotes.length() > 0;
return this;
|
public int | scan(char delimiter, boolean extendCurrentToken, boolean includeDelimiter, boolean skipDelimiter)
return scan(new char[] { delimiter }, false,
extendCurrentToken, includeDelimiter, skipDelimiter);
|
public int | scan(java.lang.String delimiter, boolean matchall, boolean extendCurrentToken, boolean includeDelimiter, boolean skipDelimiter)
return scan(delimiter.toCharArray(), matchall,
extendCurrentToken, includeDelimiter, skipDelimiter);
|
protected int | scan(char[] delimiter, boolean matchall, boolean extendCurrentToken, boolean includeDelimiter, boolean skipDelimiter)
if (matchall && !includeDelimiter && !skipDelimiter)
throw new IllegalArgumentException("must include or skip " +
"delimiter when matchall is true");
if (extendCurrentToken) ensureChars();
else beginNewToken();
tokenType = TEXT; // Even if return value differs
if (eof) return EOF;
int delimiterMatchIndex = 0;
String delimString = null;
if (!matchall && delimiter.length > 0)
delimString = new String(delimiter);
while(!eof) {
// See if we've found the delimiter. There are 3 cases here:
// 1) single-character delimiter
// 2) multi-char delimiter, and all must be matched sequentially
// 3) multi-char delimiter, must match any one of them.
if (delimiter.length == 1) {
if (text[p] == delimiter[0]) break;
}
else if (matchall) {
if (text[p] == delimiter[delimiterMatchIndex]) {
delimiterMatchIndex++;
if (delimiterMatchIndex == delimiter.length) break;
}
else delimiterMatchIndex = 0;
}
else {
if (delimString.indexOf(text[p]) != -1) break;
}
if (trackPosition) updatePosition(text[p]);
p++;
if (p >= numChars) { // Do we need more text?
if (tokenStart > 0) // Do we have room for more?
eof = !fillBuffer(); // Yes, so go get some
else { // No room for more characters
tokenEnd = p; // so report an overflow
return OVERFLOW;
}
}
}
if (eof) {
tokenEnd = p;
return EOF;
}
if (includeDelimiter) {
if (trackPosition) updatePosition(text[p]);
p++;
tokenEnd = p;
}
else if (skipDelimiter) {
if (trackPosition) updatePosition(text[p]);
p++;
if (matchall) tokenEnd = p - delimiter.length;
else tokenEnd = p - 1;
}
else {
// we know the delimiter length is 1 in this case
tokenEnd = p;
}
// Check the invariants before returning
assert text != null && 0 <= tokenStart && tokenStart <= tokenEnd &&
tokenEnd <= p && p <= numChars && numChars <= text.length;
return TEXT;
|
public Tokenizer | skipSpaces(boolean skip)
skipSpaces = skip;
return this;
|
public int | tokenColumn()
if (trackPosition && tokenStart < numChars) return tokenColumn;
else return 0;
|
public int | tokenKeyword()
if (tokenType == KEYWORD) return tokenKeyword;
else return -1;
|
public int | tokenLine()
if (trackPosition && tokenStart < numChars) return tokenLine;
else return 0;
|
public java.lang.String | tokenText()
if (text == null || tokenStart >= numChars) return null;
return new String(text, tokenStart, tokenEnd-tokenStart);
|
public int | tokenType() return tokenType;
|
public Tokenizer | tokenizeNumbers(boolean tokenize)
tokenizeNumbers = tokenize;
return this;
|
public Tokenizer | tokenizeSpaces(boolean tokenize)
tokenizeSpaces = tokenize;
return this;
|
public Tokenizer | tokenizeWords(boolean tokenize)
tokenizeWords = tokenize;
return this;
|
public Tokenizer | trackPosition(boolean track)
if (text != null) throw new IllegalStateException();
trackPosition = track;
return this;
|
private void | updatePosition(char c)
if (c == '\n") {
line++;
column = 1;
}
else column++;
|
public Tokenizer | wordRecognizer(Tokenizer.WordRecognizer wordRecognizer)
this.wordRecognizer = wordRecognizer;
return this;
|