File Doc Category Size Date Package
HeaderTokenizer.java API Doc Glassfish v2 API 11459 Mon May 14 15:28:48 BST 2007 javax.mail.internet

HeaderTokenizer

java.lang.Object

public class HeaderTokenizer extends Object

This class tokenizes RFC822 and MIME headers into the basic symbols specified by RFC822 and MIME.

This class handles folded headers (ie headers with embedded CRLF SPACE sequences). The folds are removed in the returned tokens.

version: 1.11, 07/05/04
author: John Mani

Fields Summary
private String
string
private boolean
skipComments
private String
delimiters
private int
currentPos
private int
maxPos
private int
nextPos
private int
peekPos
public static final String
RFC822
RFC822 specials
public static final String
MIME
MIME specials
private static final Token
EOFToken
Constructors Summary
public HeaderTokenizer(String header, String delimiters, boolean skipComments)
Constructor that takes a rfc822 style header.
param
header The rfc822 header to be tokenized
param
delimiters Set of delimiter characters to be used to delimit ATOMS. These are usually RFC822 or MIME
param
skipComments If true, comments are skipped and not returned as tokens
string = (header == null) ? "" : header; // paranoia ?! this.skipComments = skipComments; this.delimiters = delimiters; currentPos = nextPos = peekPos = 0; maxPos = string.length();
public HeaderTokenizer(String header, String delimiters)
Constructor. Comments are ignored and not returned as tokens
param
header The header that is tokenized
param
delimiters The delimiters to be used
this(header, delimiters, true);
public HeaderTokenizer(String header)
Constructor. The RFC822 defined delimiters - RFC822 - are used to delimit ATOMS. Also comments are skipped and not returned as tokens
this(header, RFC822);
Methods Summary
private static java.lang.String filterToken(java.lang.String s, int start, int end)
StringBuffer sb = new StringBuffer(); char c; boolean gotEscape = false; boolean gotCR = false; for (int i = start; i < end; i++) { c = s.charAt(i); if (c == '\n" && gotCR) { // This LF is part of an unescaped // CRLF sequence (i.e, LWSP). Skip it. gotCR = false; continue; } gotCR = false; if (!gotEscape) { // Previous character was NOT '\' if (c == '\\") // skip this character gotEscape = true; else if (c == '\r") // skip this character gotCR = true; else // append this character sb.append(c); } else { // Previous character was '\'. So no need to // bother with any special processing, just // append this character sb.append(c); gotEscape = false; } } return sb.toString();
private javax.mail.internet.HeaderTokenizer$Token getNext()
// If we're already at end of string, return EOF if (currentPos >= maxPos) return EOFToken; // Skip white-space, position currentPos beyond the space if (skipWhiteSpace() == Token.EOF) return EOFToken; char c; int start; boolean filter = false; c = string.charAt(currentPos); // Check or Skip comments and position currentPos // beyond the comment while (c == '(") { // Parsing comment .. int nesting; for (start = ++currentPos, nesting = 1; nesting > 0 && currentPos < maxPos; currentPos++) { c = string.charAt(currentPos); if (c == '\\") { // Escape sequence currentPos++; // skip the escaped character filter = true; } else if (c == '\r") filter = true; else if (c == '(") nesting++; else if (c == ')") nesting--; } if (nesting != 0) throw new ParseException("Unbalanced comments"); if (!skipComments) { // Return the comment, if we are asked to. // Note that the comment start & end markers are ignored. String s; if (filter) // need to go thru the token again. s = filterToken(string, start, currentPos-1); else s = string.substring(start,currentPos-1); return new Token(Token.COMMENT, s); } // Skip any whitespace after the comment. if (skipWhiteSpace() == Token.EOF) return EOFToken; c = string.charAt(currentPos); } // Check for quoted-string and position currentPos // beyond the terminating quote if (c == '"") { for (start = ++currentPos; currentPos < maxPos; currentPos++) { c = string.charAt(currentPos); if (c == '\\") { // Escape sequence currentPos++; filter = true; } else if (c == '\r") filter = true; else if (c == '"") { currentPos++; String s; if (filter) s = filterToken(string, start, currentPos-1); else s = string.substring(start,currentPos-1); return new Token(Token.QUOTEDSTRING, s); } } throw new ParseException("Unbalanced quoted string"); } // Check for SPECIAL or CTL if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) { currentPos++; // re-position currentPos char ch[] = new char[1]; ch[0] = c; return new Token((int)c, new String(ch)); } // Check for ATOM for (start = currentPos; currentPos < maxPos; currentPos++) { c = string.charAt(currentPos); // ATOM is delimited by either SPACE, CTL, "(", <"> // or the specified SPECIALS if (c < 040 || c >= 0177 || c == '(" || c == ' " || c == '"" || delimiters.indexOf(c) >= 0) break; } return new Token(Token.ATOM, string.substring(start, currentPos));
public java.lang.String getRemainder()
Return the rest of the Header.
return
String rest of header. null is returned if we are already at end of header
return string.substring(nextPos);
public javax.mail.internet.HeaderTokenizer$Token next()
Parses the next token from this String.
Clients sit in a loop calling next() to parse successive tokens until an EOF Token is returned.
return
the next Token
exception
ParseException if the parse fails
Token tk; currentPos = nextPos; // setup currentPos tk = getNext(); nextPos = peekPos = currentPos; // update currentPos and peekPos return tk;
public javax.mail.internet.HeaderTokenizer$Token peek()
Peek at the next token, without actually removing the token from the parse stream. Invoking this method multiple times will return successive tokens, until next() is called.
return
the next Token
exception
ParseException if the parse fails
Token tk; currentPos = peekPos; // setup currentPos tk = getNext(); peekPos = currentPos; // update peekPos return tk;
private int skipWhiteSpace()
char c; for (; currentPos < maxPos; currentPos++) if (((c = string.charAt(currentPos)) != ' ") && (c != '\t") && (c != '\r") && (c != '\n")) return currentPos; return Token.EOF;