HeaderTokenizerpublic class HeaderTokenizer extends Object This class tokenizes RFC822 and MIME headers into the basic
symbols specified by RFC822 and MIME.
This class handles folded headers (ie headers with embedded
CRLF SPACE sequences). The folds are removed in the returned
tokens. |
Fields Summary |
---|
private String | string | private boolean | skipComments | private String | delimiters | private int | currentPos | private int | maxPos | private int | nextPos | private int | peekPos | public static final String | RFC822RFC822 specials | public static final String | MIMEMIME specials | private static final Token | EOFToken |
Constructors Summary |
---|
public HeaderTokenizer(String header, String delimiters, boolean skipComments)Constructor that takes a rfc822 style header.
string = (header == null) ? "" : header; // paranoia ?!
this.skipComments = skipComments;
this.delimiters = delimiters;
currentPos = nextPos = peekPos = 0;
maxPos = string.length();
| public HeaderTokenizer(String header, String delimiters)Constructor. Comments are ignored and not returned as tokens
this(header, delimiters, true);
| public HeaderTokenizer(String header)Constructor. The RFC822 defined delimiters - RFC822 - are
used to delimit ATOMS. Also comments are skipped and not
returned as tokens
this(header, RFC822);
|
Methods Summary |
---|
private static java.lang.String | filterToken(java.lang.String s, int start, int end)
StringBuffer sb = new StringBuffer();
char c;
boolean gotEscape = false;
boolean gotCR = false;
for (int i = start; i < end; i++) {
c = s.charAt(i);
if (c == '\n" && gotCR) {
// This LF is part of an unescaped
// CRLF sequence (i.e, LWSP). Skip it.
gotCR = false;
continue;
}
gotCR = false;
if (!gotEscape) {
// Previous character was NOT '\'
if (c == '\\") // skip this character
gotEscape = true;
else if (c == '\r") // skip this character
gotCR = true;
else // append this character
sb.append(c);
} else {
// Previous character was '\'. So no need to
// bother with any special processing, just
// append this character
sb.append(c);
gotEscape = false;
}
}
return sb.toString();
| private javax.mail.internet.HeaderTokenizer$Token | getNext()
// If we're already at end of string, return EOF
if (currentPos >= maxPos)
return EOFToken;
// Skip white-space, position currentPos beyond the space
if (skipWhiteSpace() == Token.EOF)
return EOFToken;
char c;
int start;
boolean filter = false;
c = string.charAt(currentPos);
// Check or Skip comments and position currentPos
// beyond the comment
while (c == '(") {
// Parsing comment ..
int nesting;
for (start = ++currentPos, nesting = 1;
nesting > 0 && currentPos < maxPos;
currentPos++) {
c = string.charAt(currentPos);
if (c == '\\") { // Escape sequence
currentPos++; // skip the escaped character
filter = true;
} else if (c == '\r")
filter = true;
else if (c == '(")
nesting++;
else if (c == ')")
nesting--;
}
if (nesting != 0)
throw new ParseException("Unbalanced comments");
if (!skipComments) {
// Return the comment, if we are asked to.
// Note that the comment start & end markers are ignored.
String s;
if (filter) // need to go thru the token again.
s = filterToken(string, start, currentPos-1);
else
s = string.substring(start,currentPos-1);
return new Token(Token.COMMENT, s);
}
// Skip any whitespace after the comment.
if (skipWhiteSpace() == Token.EOF)
return EOFToken;
c = string.charAt(currentPos);
}
// Check for quoted-string and position currentPos
// beyond the terminating quote
if (c == '"") {
for (start = ++currentPos; currentPos < maxPos; currentPos++) {
c = string.charAt(currentPos);
if (c == '\\") { // Escape sequence
currentPos++;
filter = true;
} else if (c == '\r")
filter = true;
else if (c == '"") {
currentPos++;
String s;
if (filter)
s = filterToken(string, start, currentPos-1);
else
s = string.substring(start,currentPos-1);
return new Token(Token.QUOTEDSTRING, s);
}
}
throw new ParseException("Unbalanced quoted string");
}
// Check for SPECIAL or CTL
if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
currentPos++; // re-position currentPos
char ch[] = new char[1];
ch[0] = c;
return new Token((int)c, new String(ch));
}
// Check for ATOM
for (start = currentPos; currentPos < maxPos; currentPos++) {
c = string.charAt(currentPos);
// ATOM is delimited by either SPACE, CTL, "(", <">
// or the specified SPECIALS
if (c < 040 || c >= 0177 || c == '(" || c == ' " ||
c == '"" || delimiters.indexOf(c) >= 0)
break;
}
return new Token(Token.ATOM, string.substring(start, currentPos));
| public java.lang.String | getRemainder()Return the rest of the Header.
return string.substring(nextPos);
| public javax.mail.internet.HeaderTokenizer$Token | next()Parses the next token from this String.
Clients sit in a loop calling next() to parse successive
tokens until an EOF Token is returned.
Token tk;
currentPos = nextPos; // setup currentPos
tk = getNext();
nextPos = peekPos = currentPos; // update currentPos and peekPos
return tk;
| public javax.mail.internet.HeaderTokenizer$Token | peek()Peek at the next token, without actually removing the token
from the parse stream. Invoking this method multiple times
will return successive tokens, until next() is
called.
Token tk;
currentPos = peekPos; // setup currentPos
tk = getNext();
peekPos = currentPos; // update peekPos
return tk;
| private int | skipWhiteSpace()
char c;
for (; currentPos < maxPos; currentPos++)
if (((c = string.charAt(currentPos)) != ' ") &&
(c != '\t") && (c != '\r") && (c != '\n"))
return currentPos;
return Token.EOF;
|
|