File Doc Category Size Date Package
CSSParser.java API Doc Java SE 5 API 20967 Fri Aug 26 14:58:18 BST 2005 javax.swing.text.html

CSSParser

java.lang.Object

public class CSSParser extends Object

A CSS parser. This works by way of a delegate that implements the CSSParserCallback interface. The delegate is notified of the following events:

Import statement: handleImport
Selectors handleSelector. This is invoked for each string. For example if the Reader contained p, bar , a {}, the delegate would be notified 4 times, for 'p,' 'bar' ',' and 'a'.
When a rule starts, startRule
Properties in the rule via the handleProperty. This is invoked one per property/value key, eg font size: foo;, would cause the delegate to be notified once with a value of 'font size'.
Values in the rule via the handleValue, this is notified for the total value.
When a rule ends, endRule

This will parse much more than CSS 1, and loosely implements the recommendation for Forward-compatible parsing in section 7.1 of the CSS spec found at: http://www.w3.org/TR/REC-CSS1. If an error results in parsing, a RuntimeException will be thrown.

This will preserve case. If the callback wishes to treat certain poritions case insensitively (such as selectors), it should use toLowerCase, or something similar.

author: Scott Violet
version: 1.8 12/19/03

Fields Summary
private static final int
IDENTIFIER
private static final int
BRACKET_OPEN
private static final int
BRACKET_CLOSE
private static final int
BRACE_OPEN
private static final int
BRACE_CLOSE
private static final int
PAREN_OPEN
private static final int
PAREN_CLOSE
private static final int
END
private static final char[]
charMapping
private boolean
didPushChar
Set to true if one character has been read ahead.
private int
pushedChar
The read ahead character.
private StringBuffer
unitBuffer
Temporary place to hold identifiers.
private int[]
unitStack
Used to indicate blocks.
private int
stackCount
Number of valid blocks.
private Reader
reader
Holds the incoming CSS rules.
private boolean
encounteredRuleSet
Set to true when the first non @ rule is encountered.
private CSSParserCallback
callback
Notified of state.
private char[]
tokenBuffer
nextToken() inserts the string here.
private int
tokenBufferLength
Current number of chars in tokenBufferLength.
private boolean
readWS
Set to true if any whitespace is read.
Constructors Summary
CSSParser()
// The delegate interface. // There is currently no way to distinguish between '"foo,"' and // 'foo,'. But this generally isn't valid CSS. If it becomes // a problem, handleSelector will have to be told if the string is // quoted. // Property names are mapped to lower case before being passed to // the delegate. unitStack = new int[2]; tokenBuffer = new char[80]; unitBuffer = new StringBuffer();
Methods Summary
private void append(char character)
if (tokenBufferLength == tokenBuffer.length) { char[] newBuffer = new char[tokenBuffer.length * 2]; System.arraycopy(tokenBuffer, 0, newBuffer, 0, tokenBuffer.length); tokenBuffer = newBuffer; } tokenBuffer[tokenBufferLength++] = character;
private void endBlock(int endToken)
Called when an end block is encountered )]}
int startToken; switch (endToken) { case BRACKET_CLOSE: startToken = BRACKET_OPEN; break; case BRACE_CLOSE: startToken = BRACE_OPEN; break; case PAREN_CLOSE: startToken = PAREN_OPEN; break; default: // Will never happen. startToken = -1; break; } if (stackCount > 0 && unitStack[stackCount - 1] == startToken) { stackCount--; } else { // Invalid state, should do something. throw new RuntimeException("Unmatched block"); }
private boolean getIdentifier(char stopChar)
Gets an identifier, returning true if the length of the string is greater than 0, stopping when stopChar, whitespace, or one of {}()[] is hit.
boolean lastWasEscape = false; boolean done = false; int escapeCount = 0; int escapeChar = 0; int nextChar; int intStopChar = (int)stopChar; // 1 for '\', 2 for valid escape char [0-9a-fA-F], 3 for // stop character (white space, ()[]{}) 0 otherwise short type; int escapeOffset = 0; tokenBufferLength = 0; while (!done) { nextChar = readChar(); switch (nextChar) { case '\\": type = 1; break; case '0": case '1": case '2": case '3": case '4": case '5": case '6": case '7": case '8": case '9": type = 2; escapeOffset = nextChar - '0"; break; case 'a": case 'b": case 'c": case 'd": case 'e": case 'f": type = 2; escapeOffset = nextChar - 'a" + 10; break; case 'A": case 'B": case 'C": case 'D": case 'E": case 'F": type = 2; escapeOffset = nextChar - 'A" + 10; break; case '\'": case '"": case '[": case ']": case '{": case '}": case '(": case ')": case ' ": case '\n": case '\t": case '\r": type = 3; break; case '/": type = 4; break; case -1: // Reached the end done = true; type = 0; break; default: type = 0; break; } if (lastWasEscape) { if (type == 2) { // Continue with escape. escapeChar = escapeChar * 16 + escapeOffset; if (++escapeCount == 4) { lastWasEscape = false; append((char)escapeChar); } } else { // no longer escaped lastWasEscape = false; if (escapeCount > 0) { append((char)escapeChar); // Make this simpler, reprocess the character. pushChar(nextChar); } else if (!done) { append((char)nextChar); } } } else if (!done) { if (type == 1) { lastWasEscape = true; escapeChar = escapeCount = 0; } else if (type == 3) { done = true; pushChar(nextChar); } else if (type == 4) { // Potential comment nextChar = readChar(); if (nextChar == '*") { done = true; readComment(); readWS = true; } else { append('/"); if (nextChar == -1) { done = true; } else { pushChar(nextChar); } } } else { append((char)nextChar); if (nextChar == intStopChar) { done = true; } } } } return (tokenBufferLength > 0);
private boolean getNextStatement()
Gets the next statement, returning false if the end is reached. A statement is either an @rule, or a ruleset.
unitBuffer.setLength(0); int token = nextToken((char)0); switch (token) { case IDENTIFIER: if (tokenBufferLength > 0) { if (tokenBuffer[0] == '@") { parseAtRule(); } else { encounteredRuleSet = true; parseRuleSet(); } } return true; case BRACKET_OPEN: case BRACE_OPEN: case PAREN_OPEN: parseTillClosed(token); return true; case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE: // Shouldn't happen... throw new RuntimeException("Unexpected top level block close"); case END: return false; } return true;
private boolean inBlock()
return
true if currently in a block.
return (stackCount > 0);
private int nextToken(char idChar)
Fetches the next token.
readWS = false; int nextChar = readWS(); switch (nextChar) { case '\'": readTill('\'"); if (tokenBufferLength > 0) { tokenBufferLength--; } return IDENTIFIER; case '"": readTill('""); if (tokenBufferLength > 0) { tokenBufferLength--; } return IDENTIFIER; case '[": return BRACKET_OPEN; case ']": return BRACKET_CLOSE; case '{": return BRACE_OPEN; case '}": return BRACE_CLOSE; case '(": return PAREN_OPEN; case ')": return PAREN_CLOSE; case -1: return END; default: pushChar(nextChar); getIdentifier(idChar); return IDENTIFIER; }
void parse(java.io.Reader reader, javax.swing.text.html.CSSParser$CSSParserCallback callback, boolean inRule)
this.callback = callback; stackCount = tokenBufferLength = 0; this.reader = reader; encounteredRuleSet = false; try { if (inRule) { parseDeclarationBlock(); } else { while (getNextStatement()); } } finally { callback = null; reader = null; }
private void parseAtRule()
Parses an @ rule, stopping at a matching brace pair, or ;.
// PENDING: make this more effecient. boolean done = false; boolean isImport = (tokenBufferLength == 7 && tokenBuffer[0] == '@" && tokenBuffer[1] == 'i" && tokenBuffer[2] == 'm" && tokenBuffer[3] == 'p" && tokenBuffer[4] == 'o" && tokenBuffer[5] == 'r" && tokenBuffer[6] == 't"); unitBuffer.setLength(0); while (!done) { int nextToken = nextToken(';"); switch (nextToken) { case IDENTIFIER: if (tokenBufferLength > 0 && tokenBuffer[tokenBufferLength - 1] == ';") { --tokenBufferLength; done = true; } if (tokenBufferLength > 0) { if (unitBuffer.length() > 0 && readWS) { unitBuffer.append(' "); } unitBuffer.append(tokenBuffer, 0, tokenBufferLength); } break; case BRACE_OPEN: if (unitBuffer.length() > 0 && readWS) { unitBuffer.append(' "); } unitBuffer.append(charMapping[nextToken]); parseTillClosed(nextToken); done = true; // Skip a tailing ';', not really to spec. { int nextChar = readWS(); if (nextChar != -1 && nextChar != ';") { pushChar(nextChar); } } break; case BRACKET_OPEN: case PAREN_OPEN: unitBuffer.append(charMapping[nextToken]); parseTillClosed(nextToken); break; case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE: throw new RuntimeException("Unexpected close in @ rule"); case END: done = true; break; } } if (isImport && !encounteredRuleSet) { callback.handleImport(unitBuffer.toString()); }
private int parseDeclaration()
Parses a single declaration, which is an identifier a : and another identifier. This returns the last token seen.
int token; if ((token = parseIdentifiers(':", false)) != IDENTIFIER) { return token; } // Make the property name to lowercase for (int counter = unitBuffer.length() - 1; counter >= 0; counter--) { unitBuffer.setCharAt(counter, Character.toLowerCase (unitBuffer.charAt(counter))); } callback.handleProperty(unitBuffer.toString()); token = parseIdentifiers(';", true); callback.handleValue(unitBuffer.toString()); return token;
private void parseDeclarationBlock()
Parses a declaration block. Which a number of declarations followed by a })].
for (;;) { int token = parseDeclaration(); switch (token) { case END: case BRACE_CLOSE: return; case BRACKET_CLOSE: case PAREN_CLOSE: // Bail throw new RuntimeException("Unexpected close in declaration block"); case IDENTIFIER: break; } }
private int parseIdentifiers(char extraChar, boolean wantsBlocks)
Parses identifiers until extraChar is encountered, returning the ending token, which will be IDENTIFIER if extraChar is found.
int nextToken; int ubl; unitBuffer.setLength(0); for (;;) { nextToken = nextToken(extraChar); switch (nextToken) { case IDENTIFIER: if (tokenBufferLength > 0) { if (tokenBuffer[tokenBufferLength - 1] == extraChar) { if (--tokenBufferLength > 0) { if (readWS && unitBuffer.length() > 0) { unitBuffer.append(' "); } unitBuffer.append(tokenBuffer, 0, tokenBufferLength); } return IDENTIFIER; } if (readWS && unitBuffer.length() > 0) { unitBuffer.append(' "); } unitBuffer.append(tokenBuffer, 0, tokenBufferLength); } break; case BRACKET_OPEN: case BRACE_OPEN: case PAREN_OPEN: ubl = unitBuffer.length(); if (wantsBlocks) { unitBuffer.append(charMapping[nextToken]); } parseTillClosed(nextToken); if (!wantsBlocks) { unitBuffer.setLength(ubl); } break; case BRACE_CLOSE: // No need to throw for these two, we return token and // caller can do whatever. case BRACKET_CLOSE: case PAREN_CLOSE: case END: // Hit the end return nextToken; } }
private void parseRuleSet()
Parses the next rule set, which is a selector followed by a declaration block.
if (parseSelectors()) { callback.startRule(); parseDeclarationBlock(); callback.endRule(); }
private boolean parseSelectors()
Parses a set of selectors, returning false if the end of the stream is reached.
// Parse the selectors int nextToken; if (tokenBufferLength > 0) { callback.handleSelector(new String(tokenBuffer, 0, tokenBufferLength)); } unitBuffer.setLength(0); for (;;) { while ((nextToken = nextToken((char)0)) == IDENTIFIER) { if (tokenBufferLength > 0) { callback.handleSelector(new String(tokenBuffer, 0, tokenBufferLength)); } } switch (nextToken) { case BRACE_OPEN: return true; case BRACKET_OPEN: case PAREN_OPEN: parseTillClosed(nextToken); // Not too sure about this, how we handle this isn't very // well spec'd. unitBuffer.setLength(0); break; case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE: throw new RuntimeException("Unexpected block close in selector"); case END: // Prematurely hit end. return false; } }
private void parseTillClosed(int openToken)
Parses till a matching block close is encountered. This is only appropriate to be called at the top level (no nesting).
int nextToken; boolean done = false; startBlock(openToken); while (!done) { nextToken = nextToken((char)0); switch (nextToken) { case IDENTIFIER: if (unitBuffer.length() > 0 && readWS) { unitBuffer.append(' "); } if (tokenBufferLength > 0) { unitBuffer.append(tokenBuffer, 0, tokenBufferLength); } break; case BRACKET_OPEN: case BRACE_OPEN: case PAREN_OPEN: if (unitBuffer.length() > 0 && readWS) { unitBuffer.append(' "); } unitBuffer.append(charMapping[nextToken]); startBlock(nextToken); break; case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE: if (unitBuffer.length() > 0 && readWS) { unitBuffer.append(' "); } unitBuffer.append(charMapping[nextToken]); endBlock(nextToken); if (!inBlock()) { done = true; } break; case END: // Prematurely hit end. throw new RuntimeException("Unclosed block"); } }
private void pushChar(int tempChar)
Supports one character look ahead, this will throw if called twice in a row.
if (didPushChar) { // Should never happen. throw new RuntimeException("Can not handle look ahead of more than one character"); } didPushChar = true; pushedChar = tempChar;
private int readChar()
Reads a character from the stream.
if (didPushChar) { didPushChar = false; return pushedChar; } return reader.read(); // Uncomment the following to do case insensitive parsing. /* if (retValue != -1) { return (int)Character.toLowerCase((char)retValue); } return retValue; */
private void readComment()
Parses a comment block.
int nextChar; for(;;) { nextChar = readChar(); switch (nextChar) { case -1: throw new RuntimeException("Unclosed comment"); case '*": nextChar = readChar(); if (nextChar == '/") { return; } else if (nextChar == -1) { throw new RuntimeException("Unclosed comment"); } else { pushChar(nextChar); } break; default: break; } }
private void readTill(char stopChar)
Reads till a stopChar is encountered, escaping characters as necessary.
boolean lastWasEscape = false; int escapeCount = 0; int escapeChar = 0; int nextChar; boolean done = false; int intStopChar = (int)stopChar; // 1 for '\', 2 for valid escape char [0-9a-fA-F], 0 otherwise short type; int escapeOffset = 0; tokenBufferLength = 0; while (!done) { nextChar = readChar(); switch (nextChar) { case '\\": type = 1; break; case '0": case '1": case '2": case '3": case '4":case '5": case '6": case '7": case '8": case '9": type = 2; escapeOffset = nextChar - '0"; break; case 'a": case 'b": case 'c": case 'd": case 'e": case 'f": type = 2; escapeOffset = nextChar - 'a" + 10; break; case 'A": case 'B": case 'C": case 'D": case 'E": case 'F": type = 2; escapeOffset = nextChar - 'A" + 10; break; case -1: // Prematurely reached the end! throw new RuntimeException("Unclosed " + stopChar); default: type = 0; break; } if (lastWasEscape) { if (type == 2) { // Continue with escape. escapeChar = escapeChar * 16 + escapeOffset; if (++escapeCount == 4) { lastWasEscape = false; append((char)escapeChar); } } else { // no longer escaped if (escapeCount > 0) { append((char)escapeChar); if (type == 1) { lastWasEscape = true; escapeChar = escapeCount = 0; } else { if (nextChar == intStopChar) { done = true; } append((char)nextChar); lastWasEscape = false; } } else { append((char)nextChar); lastWasEscape = false; } } } else if (type == 1) { lastWasEscape = true; escapeChar = escapeCount = 0; } else { if (nextChar == intStopChar) { done = true; } append((char)nextChar); } }
private int readWS()
Skips any white space, returning the character after the white space.
int nextChar; while ((nextChar = readChar()) != -1 && Character.isWhitespace((char)nextChar)) { readWS = true; } return nextChar;
private void startBlock(int startToken)
Called when a block start is encountered ({[.
if (stackCount == unitStack.length) { int[] newUS = new int[stackCount * 2]; System.arraycopy(unitStack, 0, newUS, 0, stackCount); unitStack = newUS; } unitStack[stackCount++] = startToken;