File Doc Category Size Date Package
HeaderTokenizer.java API Doc JavaMail 1.4.3 13052 Tue Nov 17 10:38:12 GMT 2009 javax.mail.internet

HeaderTokenizer

java.lang.Object

public class HeaderTokenizer extends Object

This class tokenizes RFC822 and MIME headers into the basic symbols specified by RFC822 and MIME.

This class handles folded headers (ie headers with embedded CRLF SPACE sequences). The folds are removed in the returned tokens.

author: John Mani

Fields Summary
private String
string
private boolean
skipComments
private String
delimiters
private int
currentPos
private int
maxPos
private int
nextPos
private int
peekPos
public static final String
RFC822
RFC822 specials
public static final String
MIME
MIME specials
private static final Token
EOFToken
Constructors Summary
public HeaderTokenizer(String header, String delimiters, boolean skipComments)
Constructor that takes a rfc822 style header.
param
header The rfc822 header to be tokenized
param
delimiters Set of delimiter characters to be used to delimit ATOMS. These are usually RFC822 or MIME
param
skipComments If true, comments are skipped and not returned as tokens
string = (header == null) ? "" : header; // paranoia ?! this.skipComments = skipComments; this.delimiters = delimiters; currentPos = nextPos = peekPos = 0; maxPos = string.length();
public HeaderTokenizer(String header, String delimiters)
Constructor. Comments are ignored and not returned as tokens
param
header The header that is tokenized
param
delimiters The delimiters to be used
this(header, delimiters, true);
public HeaderTokenizer(String header)
Constructor. The RFC822 defined delimiters - RFC822 - are used to delimit ATOMS. Also comments are skipped and not returned as tokens
this(header, RFC822);
Methods Summary
private javax.mail.internet.HeaderTokenizer$Token collectString(char eos)
int start; boolean filter = false; for (start = currentPos; currentPos < maxPos; currentPos++) { char c = string.charAt(currentPos); if (c == '\\") { // Escape sequence currentPos++; filter = true; } else if (c == '\r") filter = true; else if (c == eos) { currentPos++; String s; if (filter) s = filterToken(string, start, currentPos-1); else s = string.substring(start, currentPos-1); if (c != '"") { // not a real quoted string s = trimWhiteSpace(s); currentPos--; // back up before the eos char } return new Token(Token.QUOTEDSTRING, s); } } // ran off the end of the string // if we're looking for a matching quote, that's an error if (eos == '"") throw new ParseException("Unbalanced quoted string"); // otherwise, just return whatever's left String s; if (filter) s = filterToken(string, start, currentPos); else s = string.substring(start, currentPos); s = trimWhiteSpace(s); return new Token(Token.QUOTEDSTRING, s);
private static java.lang.String filterToken(java.lang.String s, int start, int end)
StringBuffer sb = new StringBuffer(); char c; boolean gotEscape = false; boolean gotCR = false; for (int i = start; i < end; i++) { c = s.charAt(i); if (c == '\n" && gotCR) { // This LF is part of an unescaped // CRLF sequence (i.e, LWSP). Skip it. gotCR = false; continue; } gotCR = false; if (!gotEscape) { // Previous character was NOT '\' if (c == '\\") // skip this character gotEscape = true; else if (c == '\r") // skip this character gotCR = true; else // append this character sb.append(c); } else { // Previous character was '\'. So no need to // bother with any special processing, just // append this character sb.append(c); gotEscape = false; } } return sb.toString();
private javax.mail.internet.HeaderTokenizer$Token getNext(char endOfAtom)
// If we're already at end of string, return EOF if (currentPos >= maxPos) return EOFToken; // Skip white-space, position currentPos beyond the space if (skipWhiteSpace() == Token.EOF) return EOFToken; char c; int start; boolean filter = false; c = string.charAt(currentPos); // Check or Skip comments and position currentPos // beyond the comment while (c == '(") { // Parsing comment .. int nesting; for (start = ++currentPos, nesting = 1; nesting > 0 && currentPos < maxPos; currentPos++) { c = string.charAt(currentPos); if (c == '\\") { // Escape sequence currentPos++; // skip the escaped character filter = true; } else if (c == '\r") filter = true; else if (c == '(") nesting++; else if (c == ')") nesting--; } if (nesting != 0) throw new ParseException("Unbalanced comments"); if (!skipComments) { // Return the comment, if we are asked to. // Note that the comment start & end markers are ignored. String s; if (filter) // need to go thru the token again. s = filterToken(string, start, currentPos-1); else s = string.substring(start,currentPos-1); return new Token(Token.COMMENT, s); } // Skip any whitespace after the comment. if (skipWhiteSpace() == Token.EOF) return EOFToken; c = string.charAt(currentPos); } // Check for quoted-string and position currentPos // beyond the terminating quote if (c == '"") { currentPos++; // skip initial quote return collectString('""); } // Check for SPECIAL or CTL if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) { if (endOfAtom > 0 && c != endOfAtom) { // not expecting a special character here, // pretend it's a quoted string return collectString(endOfAtom); } currentPos++; // re-position currentPos char ch[] = new char[1]; ch[0] = c; return new Token((int)c, new String(ch)); } // Check for ATOM for (start = currentPos; currentPos < maxPos; currentPos++) { c = string.charAt(currentPos); // ATOM is delimited by either SPACE, CTL, "(", <"> // or the specified SPECIALS if (c < 040 || c >= 0177 || c == '(" || c == ' " || c == '"" || delimiters.indexOf(c) >= 0) { if (endOfAtom > 0 && c != endOfAtom) { // not the expected atom after all; // back up and pretend it's a quoted string currentPos = start; return collectString(endOfAtom); } break; } } return new Token(Token.ATOM, string.substring(start, currentPos));
public java.lang.String getRemainder()
Return the rest of the Header.
return
String rest of header. null is returned if we are already at end of header
return string.substring(nextPos);
public javax.mail.internet.HeaderTokenizer$Token next()
Parses the next token from this String.
Clients sit in a loop calling next() to parse successive tokens until an EOF Token is returned.
return
the next Token
exception
ParseException if the parse fails
Token tk; currentPos = nextPos; // setup currentPos tk = getNext('\0"); nextPos = peekPos = currentPos; // update currentPos and peekPos return tk;
javax.mail.internet.HeaderTokenizer$Token next(char endOfAtom)
Token tk; currentPos = nextPos; // setup currentPos tk = getNext(endOfAtom); nextPos = peekPos = currentPos; // update currentPos and peekPos return tk;
public javax.mail.internet.HeaderTokenizer$Token peek()
Peek at the next token, without actually removing the token from the parse stream. Invoking this method multiple times will return successive tokens, until next() is called.
return
the next Token
exception
ParseException if the parse fails
Token tk; currentPos = peekPos; // setup currentPos tk = getNext('\0"); peekPos = currentPos; // update peekPos return tk;
private int skipWhiteSpace()
char c; for (; currentPos < maxPos; currentPos++) if (((c = string.charAt(currentPos)) != ' ") && (c != '\t") && (c != '\r") && (c != '\n")) return currentPos; return Token.EOF;
private static java.lang.String trimWhiteSpace(java.lang.String s)
char c; int i; for (i = s.length() - 1; i >= 0; i--) { if (((c = s.charAt(i)) != ' ") && (c != '\t") && (c != '\r") && (c != '\n")) break; } if (i <= 0) return ""; else return s.substring(0, i + 1);