FileDocCategorySizeDatePackage
HeaderTokenizer.javaAPI DocJavaMail 1.4.313052Tue Nov 17 10:38:12 GMT 2009javax.mail.internet

HeaderTokenizer

public class HeaderTokenizer extends Object
This class tokenizes RFC822 and MIME headers into the basic symbols specified by RFC822 and MIME.

This class handles folded headers (ie headers with embedded CRLF SPACE sequences). The folds are removed in the returned tokens.

author
John Mani

Fields Summary
private String
string
private boolean
skipComments
private String
delimiters
private int
currentPos
private int
maxPos
private int
nextPos
private int
peekPos
public static final String
RFC822
RFC822 specials
public static final String
MIME
MIME specials
private static final Token
EOFToken
Constructors Summary
public HeaderTokenizer(String header, String delimiters, boolean skipComments)
Constructor that takes a rfc822 style header.

param
header The rfc822 header to be tokenized
param
delimiters Set of delimiter characters to be used to delimit ATOMS. These are usually RFC822 or MIME
param
skipComments If true, comments are skipped and not returned as tokens


            		      	          				      				    				           				        
        
    			     
	string = (header == null) ? "" : header; // paranoia ?!
	this.skipComments = skipComments;
	this.delimiters = delimiters;
	currentPos = nextPos = peekPos = 0;
	maxPos = string.length();
    
public HeaderTokenizer(String header, String delimiters)
Constructor. Comments are ignored and not returned as tokens

param
header The header that is tokenized
param
delimiters The delimiters to be used

	this(header, delimiters, true);
    
public HeaderTokenizer(String header)
Constructor. The RFC822 defined delimiters - RFC822 - are used to delimit ATOMS. Also comments are skipped and not returned as tokens

	this(header, RFC822);
    
Methods Summary
private javax.mail.internet.HeaderTokenizer$TokencollectString(char eos)

	int start;
	boolean filter = false;
	for (start = currentPos; currentPos < maxPos; currentPos++) {
	    char c = string.charAt(currentPos);
	    if (c == '\\") { // Escape sequence
		currentPos++;
		filter = true;
	    } else if (c == '\r")
		filter = true;
	    else if (c == eos) {
		currentPos++;
		String s;

		if (filter)
		    s = filterToken(string, start, currentPos-1);
		else
		    s = string.substring(start, currentPos-1);

		if (c != '"") {		// not a real quoted string
		    s = trimWhiteSpace(s);
		    currentPos--;	// back up before the eos char
		}

		return new Token(Token.QUOTEDSTRING, s);
	    }
	}

	// ran off the end of the string

	// if we're looking for a matching quote, that's an error
	if (eos == '"")
	    throw new ParseException("Unbalanced quoted string");

	// otherwise, just return whatever's left
	String s;
	if (filter)
	    s = filterToken(string, start, currentPos);
	else
	    s = string.substring(start, currentPos);
	s = trimWhiteSpace(s);
	return new Token(Token.QUOTEDSTRING, s);
    
private static java.lang.StringfilterToken(java.lang.String s, int start, int end)

	StringBuffer sb = new StringBuffer();
	char c;
	boolean gotEscape = false;
	boolean gotCR = false;

	for (int i = start; i < end; i++) {
	    c = s.charAt(i);
	    if (c == '\n" && gotCR) {
		// This LF is part of an unescaped 
		// CRLF sequence (i.e, LWSP). Skip it.
		gotCR = false;
		continue;
	    }

	    gotCR = false;
	    if (!gotEscape) {
		// Previous character was NOT '\'
		if (c == '\\") // skip this character
		    gotEscape = true;
		else if (c == '\r") // skip this character
		    gotCR = true;
		else // append this character
		    sb.append(c);
	    } else {
		// Previous character was '\'. So no need to 
		// bother with any special processing, just 
		// append this character
		sb.append(c);
		gotEscape = false;
	    }
	}
	return sb.toString();
    
private javax.mail.internet.HeaderTokenizer$TokengetNext(char endOfAtom)

	// If we're already at end of string, return EOF
	if (currentPos >= maxPos)
	    return EOFToken;

	// Skip white-space, position currentPos beyond the space
	if (skipWhiteSpace() == Token.EOF)
	    return EOFToken;

	char c; 
	int start; 
	boolean filter = false;
	
	c = string.charAt(currentPos);

	// Check or Skip comments and position currentPos
	// beyond the comment
	while (c == '(") {
	    // Parsing comment ..
	    int nesting;
	    for (start = ++currentPos, nesting = 1; 
		 nesting > 0 && currentPos < maxPos;
		 currentPos++) {
		c = string.charAt(currentPos);
		if (c == '\\") {  // Escape sequence
		    currentPos++; // skip the escaped character
		    filter = true;
		} else if (c == '\r")
		    filter = true;
		else if (c == '(")
		    nesting++;
		else if (c == ')")
		    nesting--;
	    }
	    if (nesting != 0)
		throw new ParseException("Unbalanced comments");

	    if (!skipComments) {
		// Return the comment, if we are asked to.
		// Note that the comment start & end markers are ignored.
		String s;
		if (filter) // need to go thru the token again.
		    s = filterToken(string, start, currentPos-1);
		else
		    s = string.substring(start,currentPos-1);

		return new Token(Token.COMMENT, s);
	    }

	    // Skip any whitespace after the comment.
	    if (skipWhiteSpace() == Token.EOF)
		return EOFToken;
	    c = string.charAt(currentPos);
	}

	// Check for quoted-string and position currentPos 
	//  beyond the terminating quote
	if (c == '"") {
	    currentPos++;	// skip initial quote
	    return collectString('"");
	}
	
	// Check for SPECIAL or CTL
	if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
	    if (endOfAtom > 0 && c != endOfAtom) {
		// not expecting a special character here,
		// pretend it's a quoted string
		return collectString(endOfAtom);
	    }
	    currentPos++; // re-position currentPos
	    char ch[] = new char[1];
	    ch[0] = c;
	    return new Token((int)c, new String(ch));
	}

	// Check for ATOM
	for (start = currentPos; currentPos < maxPos; currentPos++) {
	    c = string.charAt(currentPos);
	    // ATOM is delimited by either SPACE, CTL, "(", <"> 
	    // or the specified SPECIALS
	    if (c < 040 || c >= 0177 || c == '(" || c == ' " ||
			c == '"" || delimiters.indexOf(c) >= 0) {
		if (endOfAtom > 0 && c != endOfAtom) {
		    // not the expected atom after all;
		    // back up and pretend it's a quoted string
		    currentPos = start;
		    return collectString(endOfAtom);
		}
		break;
	    }
	}
	return new Token(Token.ATOM, string.substring(start, currentPos));
    
public java.lang.StringgetRemainder()
Return the rest of the Header.

return
String rest of header. null is returned if we are already at end of header

	return string.substring(nextPos);
    
public javax.mail.internet.HeaderTokenizer$Tokennext()
Parses the next token from this String.

Clients sit in a loop calling next() to parse successive tokens until an EOF Token is returned.

return
the next Token
exception
ParseException if the parse fails

 
	Token tk;

	currentPos = nextPos; // setup currentPos
	tk = getNext('\0");
	nextPos = peekPos = currentPos; // update currentPos and peekPos
	return tk;
    
javax.mail.internet.HeaderTokenizer$Tokennext(char endOfAtom)

 
	Token tk;

	currentPos = nextPos; // setup currentPos
	tk = getNext(endOfAtom);
	nextPos = peekPos = currentPos; // update currentPos and peekPos
	return tk;
    
public javax.mail.internet.HeaderTokenizer$Tokenpeek()
Peek at the next token, without actually removing the token from the parse stream. Invoking this method multiple times will return successive tokens, until next() is called.

return
the next Token
exception
ParseException if the parse fails

	Token tk;

	currentPos = peekPos; // setup currentPos
	tk = getNext('\0");
	peekPos = currentPos; // update peekPos
	return tk;
    
private intskipWhiteSpace()

	char c;
	for (; currentPos < maxPos; currentPos++)
	    if (((c = string.charAt(currentPos)) != ' ") && 
		(c != '\t") && (c != '\r") && (c != '\n"))
		return currentPos;
	return Token.EOF;
    
private static java.lang.StringtrimWhiteSpace(java.lang.String s)

	char c;
	int i;
	for (i = s.length() - 1; i >= 0; i--) {
	    if (((c = s.charAt(i)) != ' ") && 
		(c != '\t") && (c != '\r") && (c != '\n"))
		break;
	}
	if (i <= 0)
	    return "";
	else
	    return s.substring(0, i + 1);