Fields Summary |
---|
public double | nvalContains a number if the current token is a number ({@code ttype} ==
{@code TT_NUMBER}). |
public String | svalContains a string if the current token is a word ({@code ttype} ==
{@code TT_WORD}). |
public static final int | TT_EOFThe constant representing the end of the stream. |
public static final int | TT_EOLThe constant representing the end of the line. |
public static final int | TT_NUMBERThe constant representing a number token. |
public static final int | TT_WORDThe constant representing a word token. |
private static final int | TT_UNKNOWNInternal representation of unknown state. |
public int | ttypeAfter calling {@code nextToken()}, {@code ttype} contains the type of
token that has been read. When a single character is read, its value
converted to an integer is stored in {@code ttype}. For a quoted string,
the value is the quoted character. Otherwise, its value is one of the
following:
- {@code TT_WORD} - the token is a word.
- {@code TT_NUMBER} - the token is a number.
- {@code TT_EOL} - the end of line has been reached. Depends on
whether {@code eolIsSignificant} is {@code true}.
- {@code TT_EOF} - the end of the stream has been reached.
|
private byte[] | tokenTypesInternal character meanings, 0 implies TOKEN_ORDINARY |
private static final byte | TOKEN_COMMENT |
private static final byte | TOKEN_QUOTE |
private static final byte | TOKEN_WHITE |
private static final byte | TOKEN_WORD |
private static final byte | TOKEN_DIGIT |
private int | lineNumber |
private boolean | forceLowercase |
private boolean | isEOLSignificant |
private boolean | slashStarComments |
private boolean | slashSlashComments |
private boolean | pushBackToken |
private boolean | lastCr |
private InputStream | inStream |
private Reader | inReader |
private int | peekChar |
Constructors Summary |
---|
private StreamTokenizer()Private constructor to initialize the default values according to the
specification.
/**
* Initialize the default state per specification. All byte values 'A'
* through 'Z', 'a' through 'z', and '\u00A0' through '\u00FF' are
* considered to be alphabetic.
*/
wordChars('A", 'Z");
wordChars('a", 'z");
wordChars(160, 255);
/**
* All byte values '\u0000' through '\u0020' are considered to be white
* space.
*/
whitespaceChars(0, 32);
/**
* '/' is a comment character. Single quote '\'' and double quote '"'
* are string quote characters.
*/
commentChar('/");
quoteChar('"");
quoteChar('\'");
/**
* Numbers are parsed.
*/
parseNumbers();
/**
* Ends of lines are treated as white space, not as separate tokens.
* C-style and C++-style comments are not recognized. These are the
* defaults and are not needed in constructor.
*/
|
public StreamTokenizer(InputStream is)Constructs a new {@code StreamTokenizer} with {@code is} as source input
stream. This constructor is deprecated; instead, the constructor that
takes a {@code Reader} as an arugment should be used.
this();
if (is == null) {
throw new NullPointerException();
}
inStream = is;
|
public StreamTokenizer(Reader r)Constructs a new {@code StreamTokenizer} with {@code r} as source reader.
The tokenizer's initial state is as follows:
- All byte values 'A' through 'Z', 'a' through 'z', and '\u00A0'
through '\u00FF' are considered to be alphabetic.
- All byte values '\u0000' through '\u0020' are considered to
be white space. '/' is a comment character.
- Single quote '\'' and double quote '"' are string quote characters.
- Numbers are parsed.
- End of lines are considered to be white space rather than separate
tokens.
- C-style and C++-style comments are not recognized.
this();
if (r == null) {
throw new NullPointerException();
}
inReader = r;
|
Methods Summary |
---|
public void | commentChar(int ch)Specifies that the character {@code ch} shall be treated as a comment
character.
if (0 <= ch && ch < tokenTypes.length) {
tokenTypes[ch] = TOKEN_COMMENT;
}
|
public void | eolIsSignificant(boolean flag)Specifies whether the end of a line is significant and should be returned
as {@code TT_EOF} in {@code ttype} by this tokenizer.
isEOLSignificant = flag;
|
public int | lineno()Returns the current line number.
return lineNumber;
|
public void | lowerCaseMode(boolean flag)Specifies whether word tokens should be converted to lower case when they
are stored in {@code sval}.
forceLowercase = flag;
|
public int | nextToken()Parses the next token from this tokenizer's source stream or reader. The
type of the token is stored in the {@code ttype} field, additional
information may be stored in the {@code nval} or {@code sval} fields.
if (pushBackToken) {
pushBackToken = false;
if (ttype != TT_UNKNOWN) {
return ttype;
}
}
sval = null; // Always reset sval to null
int currentChar = peekChar == -2 ? read() : peekChar;
if (lastCr && currentChar == '\n") {
lastCr = false;
currentChar = read();
}
if (currentChar == -1) {
return (ttype = TT_EOF);
}
byte currentType = currentChar > 255 ? TOKEN_WORD
: tokenTypes[currentChar];
while ((currentType & TOKEN_WHITE) != 0) {
/**
* Skip over white space until we hit a new line or a real token
*/
if (currentChar == '\r") {
lineNumber++;
if (isEOLSignificant) {
lastCr = true;
peekChar = -2;
return (ttype = TT_EOL);
}
if ((currentChar = read()) == '\n") {
currentChar = read();
}
} else if (currentChar == '\n") {
lineNumber++;
if (isEOLSignificant) {
peekChar = -2;
return (ttype = TT_EOL);
}
currentChar = read();
} else {
// Advance over this white space character and try again.
currentChar = read();
}
if (currentChar == -1) {
return (ttype = TT_EOF);
}
currentType = currentChar > 255 ? TOKEN_WORD
: tokenTypes[currentChar];
}
/**
* Check for digits before checking for words since digits can be
* contained within words.
*/
if ((currentType & TOKEN_DIGIT) != 0) {
StringBuilder digits = new StringBuilder(20);
boolean haveDecimal = false, checkJustNegative = currentChar == '-";
while (true) {
if (currentChar == '.") {
haveDecimal = true;
}
digits.append((char) currentChar);
currentChar = read();
if ((currentChar < '0" || currentChar > '9")
&& (haveDecimal || currentChar != '.")) {
break;
}
}
peekChar = currentChar;
if (checkJustNegative && digits.length() == 1) {
// Didn't get any other digits other than '-'
return (ttype = '-");
}
try {
nval = Double.valueOf(digits.toString()).doubleValue();
} catch (NumberFormatException e) {
// Unsure what to do, will write test.
nval = 0;
}
return (ttype = TT_NUMBER);
}
// Check for words
if ((currentType & TOKEN_WORD) != 0) {
StringBuffer word = new StringBuffer(20);
while (true) {
word.append((char) currentChar);
currentChar = read();
if (currentChar == -1
|| (currentChar < 256 && (tokenTypes[currentChar] & (TOKEN_WORD | TOKEN_DIGIT)) == 0)) {
break;
}
}
peekChar = currentChar;
sval = forceLowercase ? word.toString().toLowerCase() : word
.toString();
return (ttype = TT_WORD);
}
// Check for quoted character
if (currentType == TOKEN_QUOTE) {
int matchQuote = currentChar;
StringBuffer quoteString = new StringBuffer();
int peekOne = read();
while (peekOne >= 0 && peekOne != matchQuote && peekOne != '\r"
&& peekOne != '\n") {
boolean readPeek = true;
if (peekOne == '\\") {
int c1 = read();
// Check for quoted octal IE: \377
if (c1 <= '7" && c1 >= '0") {
int digitValue = c1 - '0";
c1 = read();
if (c1 > '7" || c1 < '0") {
readPeek = false;
} else {
digitValue = digitValue * 8 + (c1 - '0");
c1 = read();
// limit the digit value to a byte
if (digitValue > 037 || c1 > '7" || c1 < '0") {
readPeek = false;
} else {
digitValue = digitValue * 8 + (c1 - '0");
}
}
if (!readPeek) {
// We've consumed one to many
quoteString.append((char) digitValue);
peekOne = c1;
} else {
peekOne = digitValue;
}
} else {
switch (c1) {
case 'a":
peekOne = 0x7;
break;
case 'b":
peekOne = 0x8;
break;
case 'f":
peekOne = 0xc;
break;
case 'n":
peekOne = 0xA;
break;
case 'r":
peekOne = 0xD;
break;
case 't":
peekOne = 0x9;
break;
case 'v":
peekOne = 0xB;
break;
default:
peekOne = c1;
}
}
}
if (readPeek) {
quoteString.append((char) peekOne);
peekOne = read();
}
}
if (peekOne == matchQuote) {
peekOne = read();
}
peekChar = peekOne;
ttype = matchQuote;
sval = quoteString.toString();
return ttype;
}
// Do comments, both "//" and "/*stuff*/"
if (currentChar == '/" && (slashSlashComments || slashStarComments)) {
if ((currentChar = read()) == '*" && slashStarComments) {
int peekOne = read();
while (true) {
currentChar = peekOne;
peekOne = read();
if (currentChar == -1) {
peekChar = -1;
return (ttype = TT_EOF);
}
if (currentChar == '\r") {
if (peekOne == '\n") {
peekOne = read();
}
lineNumber++;
} else if (currentChar == '\n") {
lineNumber++;
} else if (currentChar == '*" && peekOne == '/") {
peekChar = read();
return nextToken();
}
}
} else if (currentChar == '/" && slashSlashComments) {
// Skip to EOF or new line then return the next token
while ((currentChar = read()) >= 0 && currentChar != '\r"
&& currentChar != '\n") {
// Intentionally empty
}
peekChar = currentChar;
return nextToken();
} else if (currentType != TOKEN_COMMENT) {
// Was just a slash by itself
peekChar = currentChar;
return (ttype = '/");
}
}
// Check for comment character
if (currentType == TOKEN_COMMENT) {
// Skip to EOF or new line then return the next token
while ((currentChar = read()) >= 0 && currentChar != '\r"
&& currentChar != '\n") {
// Intentionally empty
}
peekChar = currentChar;
return nextToken();
}
peekChar = read();
return (ttype = currentChar);
|
public void | ordinaryChar(int ch)Specifies that the character {@code ch} shall be treated as an ordinary
character by this tokenizer. That is, it has no special meaning as a
comment character, word component, white space, string delimiter or
number.
if (0 <= ch && ch < tokenTypes.length) {
tokenTypes[ch] = 0;
}
|
public void | ordinaryChars(int low, int hi)Specifies that the characters in the range from {@code low} to {@code hi}
shall be treated as an ordinary character by this tokenizer. That is,
they have no special meaning as a comment character, word component,
white space, string delimiter or number.
if (low < 0) {
low = 0;
}
if (hi > tokenTypes.length) {
hi = tokenTypes.length - 1;
}
for (int i = low; i <= hi; i++) {
tokenTypes[i] = 0;
}
|
public void | parseNumbers()Specifies that this tokenizer shall parse numbers.
for (int i = '0"; i <= '9"; i++) {
tokenTypes[i] |= TOKEN_DIGIT;
}
tokenTypes['."] |= TOKEN_DIGIT;
tokenTypes['-"] |= TOKEN_DIGIT;
|
public void | pushBack()Indicates that the current token should be pushed back and returned again
the next time {@code nextToken()} is called.
pushBackToken = true;
|
public void | quoteChar(int ch)Specifies that the character {@code ch} shall be treated as a quote
character.
if (0 <= ch && ch < tokenTypes.length) {
tokenTypes[ch] = TOKEN_QUOTE;
}
|
private int | read()
// Call the read for the appropriate stream
if (inStream == null) {
return inReader.read();
}
return inStream.read();
|
public void | resetSyntax()Specifies that all characters shall be treated as ordinary characters.
for (int i = 0; i < 256; i++) {
tokenTypes[i] = 0;
}
|
public void | slashSlashComments(boolean flag)Specifies whether "slash-slash" (C++-style) comments shall be recognized.
This kind of comment ends at the end of the line.
slashSlashComments = flag;
|
public void | slashStarComments(boolean flag)Specifies whether "slash-star" (C-style) comments shall be recognized.
Slash-star comments cannot be nested and end when a star-slash
combination is found.
slashStarComments = flag;
|
public java.lang.String | toString()Returns the state of this tokenizer in a readable format.
// Values determined through experimentation
StringBuilder result = new StringBuilder();
result.append("Token["); //$NON-NLS-1$
switch (ttype) {
case TT_EOF:
result.append("EOF"); //$NON-NLS-1$
break;
case TT_EOL:
result.append("EOL"); //$NON-NLS-1$
break;
case TT_NUMBER:
result.append("n="); //$NON-NLS-1$
result.append(nval);
break;
case TT_WORD:
result.append(sval);
break;
default:
// BEGIN android-changed
// copied from a newer version of harmony
if (ttype == TT_UNKNOWN || tokenTypes[ttype] == TOKEN_QUOTE) {
result.append(sval);
} else {
result.append('\'");
result.append((char) ttype);
result.append('\'");
}
// END android-changed
}
result.append("], line "); //$NON-NLS-1$
result.append(lineNumber);
return result.toString();
|
public void | whitespaceChars(int low, int hi)Specifies that the characters in the range from {@code low} to {@code hi}
shall be treated as whitespace characters by this tokenizer.
if (low < 0) {
low = 0;
}
if (hi > tokenTypes.length) {
hi = tokenTypes.length - 1;
}
for (int i = low; i <= hi; i++) {
tokenTypes[i] = TOKEN_WHITE;
}
|
public void | wordChars(int low, int hi)Specifies that the characters in the range from {@code low} to {@code hi}
shall be treated as word characters by this tokenizer. A word consists of
a word character followed by zero or more word or number characters.
if (low < 0) {
low = 0;
}
if (hi > tokenTypes.length) {
hi = tokenTypes.length - 1;
}
for (int i = low; i <= hi; i++) {
tokenTypes[i] |= TOKEN_WORD;
}
|