FileDocCategorySizeDatePackage
MailcapTokenizer.javaAPI DocGlassfish v2 API9317Mon May 14 15:29:52 BST 2007package com.sun.activation.registries

MailcapTokenizer

public class MailcapTokenizer extends Object
A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ". Useful for parsing MIME content types.

Fields Summary
public static final int
UNKNOWN_TOKEN
public static final int
START_TOKEN
public static final int
STRING_TOKEN
public static final int
EOI_TOKEN
public static final int
SLASH_TOKEN
public static final int
SEMICOLON_TOKEN
public static final int
EQUALS_TOKEN
private String
data
private int
dataIndex
private int
dataLength
private int
currentToken
private String
currentTokenValue
private boolean
isAutoquoting
private char
autoquoteChar
Constructors Summary
public MailcapTokenizer(String inputString)
Constructor

parameter
inputString the string to tokenize


                   
       
	data = inputString;
	dataIndex = 0;
	dataLength = inputString.length();

	currentToken = START_TOKEN;
	currentTokenValue = "";

	isAutoquoting = false;
	autoquoteChar = ';";
    
Methods Summary
private static java.lang.StringfixEscapeSequences(java.lang.String inputString)

	int inputLength = inputString.length();
	StringBuffer buffer = new StringBuffer();
	buffer.ensureCapacity(inputLength);

	for (int i = 0; i < inputLength; ++i) {
	    char currentChar = inputString.charAt(i);
	    if (currentChar != '\\") {
		buffer.append(currentChar);
	    } else {
		if (i < inputLength - 1) {
		    char nextChar = inputString.charAt(i + 1);
		    buffer.append(nextChar);

		    //  force a skip over the next character too
		    ++i;
		} else {
		    buffer.append(currentChar);
		}
	    }
	}

	return buffer.toString();
    
public intgetCurrentToken()
Retrieve current token.

returns
The current token value

	return currentToken;
    
public java.lang.StringgetCurrentTokenValue()

	return currentTokenValue;
    
private static booleanisControlChar(char c)

	return Character.isISOControl(c);
    
private static booleanisSpecialChar(char c)

	boolean lAnswer = false;

	switch(c) {
	    case '(":
	    case ')":
	    case '<":
	    case '>":
	    case '@":
	    case ',":
	    case ';":
	    case ':":
	    case '\\":
	    case '"":
	    case '/":
	    case '[":
	    case ']":
	    case '?":
	    case '=":
		lAnswer = true;
		break;
	}

	return lAnswer;
    
private static booleanisStringTokenChar(char c)

	return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c);
    
private static booleanisWhiteSpaceChar(char c)

	return Character.isWhitespace(c);
    
public static java.lang.StringnameForToken(int token)

	String name = "really unknown";

	switch(token) {
	    case UNKNOWN_TOKEN:
		name = "unknown";
		break;
	    case START_TOKEN:
		name = "start";
		break;
	    case STRING_TOKEN:
		name = "string";
		break;
	    case EOI_TOKEN:
		name = "EOI";
		break;
	    case SLASH_TOKEN:
		name = "'/'";
		break;
	    case SEMICOLON_TOKEN:
		name = "';'";
		break;
	    case EQUALS_TOKEN:
		name = "'='";
		break;
	}

	return name;
    
public intnextToken()

	if (dataIndex < dataLength) {
	    //  skip white space
	    while ((dataIndex < dataLength) &&
		    (isWhiteSpaceChar(data.charAt(dataIndex)))) {
		++dataIndex;
	    }

	    if (dataIndex < dataLength) {
		//  examine the current character and see what kind of token we have
		char c = data.charAt(dataIndex);
		if (isAutoquoting) {
		    if (c == ';" || c == '=") {
			currentToken = c;
			currentTokenValue = new Character(c).toString();
			++dataIndex;
		    } else {
			processAutoquoteToken();
		    }
		} else {
		    if (isStringTokenChar(c)) {
			processStringToken();
		    } else if ((c == '/") || (c == ';") || (c == '=")) {
			currentToken = c;
			currentTokenValue = new Character(c).toString();
			++dataIndex;
		    } else {
			currentToken = UNKNOWN_TOKEN;
			currentTokenValue = new Character(c).toString();
			++dataIndex;
		    }
		}
	    } else {
		currentToken = EOI_TOKEN;
		currentTokenValue = null;
	    }
	} else {
	    currentToken = EOI_TOKEN;
	    currentTokenValue = null;
	}

	return currentToken;
    
private voidprocessAutoquoteToken()

	//  capture the initial index
	int initialIndex = dataIndex;

	//  now skip to the 1st non-escaped autoquote termination character
	//  XXX - doesn't actually consider escaping
	boolean foundTerminator = false;
	while ((dataIndex < dataLength) && !foundTerminator) {
	    char c = data.charAt(dataIndex);
	    if (c != autoquoteChar) {
		++dataIndex;
	    } else {
		foundTerminator = true;
	    }
	}

	currentToken = STRING_TOKEN;
	currentTokenValue =
	    fixEscapeSequences(data.substring(initialIndex, dataIndex));
    
private voidprocessStringToken()

	//  capture the initial index
	int initialIndex = dataIndex;

	//  skip to 1st non string token character
	while ((dataIndex < dataLength) &&
		isStringTokenChar(data.charAt(dataIndex))) {
	    ++dataIndex;
	}

	currentToken = STRING_TOKEN;
	currentTokenValue = data.substring(initialIndex, dataIndex);
    
public voidsetIsAutoquoting(boolean value)
Set whether auto-quoting is on or off. Auto-quoting means that all characters after the first non-whitespace, non-control character up to the auto-quote terminator character or EOI (minus any whitespace immediatley preceeding it) is considered a token. This is required for handling command strings in a mailcap entry.

	isAutoquoting = value;