Methods Summary |
---|
private void | addToken(com.google.android.util.AbstractMessageParser$Token token)Adds the given token to the parsed output.
tokens.add(token);
|
private void | addURLToken(java.lang.String url, java.lang.String text)Adds the appropriate token for the given URL. This might be a simple
link or it might be a recognized media type.
addToken(tokenForUrl(url, text));
|
private void | buildParts(java.lang.String meText)Builds the parts list.
for (int i = 0; i < tokens.size(); ++i) {
Token token = tokens.get(i);
if (token.isMedia() || (parts.size() == 0) || lastPart().isMedia()) {
parts.add(new Part());
}
lastPart().add(token);
}
// The first part inherits the meText of the line.
if (parts.size() > 0) {
parts.get(0).setMeText(meText);
}
|
private int | getCharClass(int index)Returns the class for the character at the given index.
if ((index < 0) || (text.length() <= index)) {
return 0;
}
char ch = text.charAt(index);
if (Character.isWhitespace(ch)) {
return 1;
} else if (Character.isLetter(ch)) {
return 2;
} else if (Character.isDigit(ch)) {
return 3;
} else if (isPunctuation(ch)) {
// For punctuation, we return a unique value every time so that they are
// always different from any other character. Punctuation should always
// be considered a possible word break.
return ++nextClass;
} else {
return 4;
}
|
public final com.google.android.util.AbstractMessageParser$Part | getPart(int index)Return the part at the given index. return parts.get(index);
|
public final int | getPartCount()Return the number of parts. return parts.size();
|
public final java.util.List | getParts()Return the list of parts from the parsed text return parts;
|
public final java.lang.String | getRawText()Returns the raw text being parsed. return text;
|
protected abstract com.google.android.util.AbstractMessageParser$Resources | getResources()Subclasses must define the schemes, domains, smileys and acronyms
that are necessary for parsing
|
private boolean | isDomainChar(char c)Determines if this is an allowable domain character.
return c == '-" || Character.isLetter(c) || Character.isDigit(c);
|
private static boolean | isFormatChar(char ch)Determines whether the given character is the beginning or end of a
section with special formatting.
switch (ch) {
case '*": case '_": case '^":
return true;
default:
return false;
}
|
private static boolean | isPunctuation(char ch)Determines whether the given character is punctuation.
switch (ch) {
case '.": case ',": case '"": case ':": case ';":
case '?": case '!": case '(": case ')":
return true;
default:
return false;
}
|
private boolean | isSmileyBreak(int index)Determines whether the given index could be a possible smiley break.
if (index > 0 && index < text.length()) {
if (isSmileyBreak(text.charAt(index - 1), text.charAt(index))) {
return true;
}
}
return false;
|
private static boolean | isSmileyBreak(char c1, char c2)Returns true if c1 could be the last character of
a smiley and c2 could be the first character of
a different smiley, if {@link #isWordBreak} would not already
recognize that this is possible.
switch (c1) {
/*
* These characters can end smileys, but don't normally end words.
*/
case '$": case '&": case '*": case '+": case '-":
case '/": case '<": case '=": case '>": case '@":
case '[": case '\\": case ']": case '^": case '|":
case '}": case '~":
switch (c2) {
/*
* These characters can begin smileys, but don't normally
* begin words.
*/
case '#": case '$": case '%": case '*": case '/":
case '<": case '=": case '>": case '@": case '[":
case '\\": case '^": case '~":
return true;
}
}
return false;
|
private boolean | isURLBreak(int index)Verifies that the character before the given index is end of line,
whitespace, or punctuation.
switch (getCharClass(index - 1)) {
case 2:
case 3:
case 4:
return false;
case 0:
case 1:
default:
return true;
}
|
private boolean | isValidDomain(java.lang.String domain)Determines if the given string is a valid domain.
// For hostnames, check that it ends with a known domain suffix
if (matches(getResources().getDomainSuffixes(), reverse(domain))) {
return true;
}
return false;
|
private boolean | isWordBreak(int index)Determines whether the given index could be a possible word break.
return getCharClass(index - 1) != getCharClass(index);
|
private com.google.android.util.AbstractMessageParser$Part | lastPart()Returns the last part in the list. return parts.get(parts.size() - 1);
|
private static com.google.android.util.AbstractMessageParser$TrieNode | longestMatch(com.google.android.util.AbstractMessageParser$TrieNode root, com.google.android.util.AbstractMessageParser p, int start)Returns the longest substring of the given string, starting at the given
index, that exists in the trie.
return longestMatch(root, p, start, false);
|
private static com.google.android.util.AbstractMessageParser$TrieNode | longestMatch(com.google.android.util.AbstractMessageParser$TrieNode root, com.google.android.util.AbstractMessageParser p, int start, boolean smiley)Returns the longest substring of the given string, starting at the given
index, that exists in the trie, with a special tokenizing case for
smileys if specified.
int index = start;
TrieNode bestMatch = null;
while (index < p.getRawText().length()) {
root = root.getChild(p.getRawText().charAt(index++));
if (root == null) {
break;
} else if (root.exists()) {
if (p.isWordBreak(index)) {
bestMatch = root;
} else if (smiley && p.isSmileyBreak(index)) {
bestMatch = root;
}
}
}
return bestMatch;
|
private static boolean | matches(com.google.android.util.AbstractMessageParser$TrieNode root, java.lang.String str)Determines whether the given string is in the given trie.
int index = 0;
while (index < str.length()) {
root = root.getChild(str.charAt(index++));
if (root == null) {
break;
} else if (root.exists()) {
return true;
}
}
return false;
|
public void | parse()Parses the text string into an internal representation.
// Look for music track (of which there would be only one and it'll be the
// first token)
if (parseMusicTrack()) {
buildParts(null);
return;
}
// Look for me commands.
String meText = null;
if (parseMeText && text.startsWith("/me") && (text.length() > 3) &&
Character.isWhitespace(text.charAt(3))) {
meText = text.substring(0, 4);
text = text.substring(4);
}
// Break the text into tokens.
boolean wasSmiley = false;
while (nextChar < text.length()) {
if (!isWordBreak(nextChar)) {
if (!wasSmiley || !isSmileyBreak(nextChar)) {
throw new AssertionError("last chunk did not end at word break");
}
}
if (parseSmiley()) {
wasSmiley = true;
} else {
wasSmiley = false;
if (!parseAcronym() && !parseURL() && !parseFormatting()) {
parseText();
}
}
}
// Trim the whitespace before and after media components.
for (int i = 0; i < tokens.size(); ++i) {
if (tokens.get(i).isMedia()) {
if ((i > 0) && (tokens.get(i - 1) instanceof Html)) {
((Html)tokens.get(i - 1)).trimLeadingWhitespace();
}
if ((i + 1 < tokens.size()) && (tokens.get(i + 1) instanceof Html)) {
((Html)tokens.get(i + 1)).trimTrailingWhitespace();
}
}
}
// Remove any empty html tokens.
for (int i = 0; i < tokens.size(); ++i) {
if (tokens.get(i).isHtml() &&
(tokens.get(i).toHtml(true).length() == 0)) {
tokens.remove(i);
--i; // visit this index again
}
}
buildParts(meText);
|
private boolean | parseAcronym()Looks for acronyms (e.g., "lol") in the text.
if(!parseAcronyms) {
return false;
}
TrieNode match = longestMatch(getResources().getAcronyms(), this, nextChar);
if (match == null) {
return false;
} else {
addToken(new Acronym(match.getText(), match.getValue()));
nextChar += match.getText().length();
return true;
}
|
private boolean | parseFormatting()Deal with formatting characters.
Parsing is as follows:
- Treat all contiguous strings of formatting characters as one block.
(This method processes one block.)
- Only a single instance of a particular format character within a block
is used to determine whether to turn on/off that type of formatting;
other instances simply print the character itself.
- If the format is to be turned on, we use the _first_ instance; if it
is to be turned off, we use the _last_ instance (by appending the
format.)
Example:
**string** turns into *string*
if(!parseFormatting) {
return false;
}
int endChar = nextChar;
while ((endChar < text.length()) && isFormatChar(text.charAt(endChar))) {
endChar += 1;
}
if ((endChar == nextChar) || !isWordBreak(endChar)) {
return false;
}
// Keeps track of whether we've seen a character (in map if we've seen it)
// and whether we should append a closing format token (if value in
// map is TRUE). Linked hashmap for consistent ordering.
LinkedHashMap<Character, Boolean> seenCharacters =
new LinkedHashMap<Character, Boolean>();
for (int index = nextChar; index < endChar; ++index) {
char ch = text.charAt(index);
Character key = Character.valueOf(ch);
if (seenCharacters.containsKey(key)) {
// Already seen this character, just append an unmatched token, which
// will print plaintext character
addToken(new Format(ch, false));
} else {
Format start = formatStart.get(key);
if (start != null) {
// Match the start token, and ask an end token to be appended
start.setMatched(true);
formatStart.remove(key);
seenCharacters.put(key, Boolean.TRUE);
} else {
// Append start token
start = new Format(ch, true);
formatStart.put(key, start);
addToken(start);
seenCharacters.put(key, Boolean.FALSE);
}
}
}
// Append any necessary end tokens
for (Character key : seenCharacters.keySet()) {
if (seenCharacters.get(key) == Boolean.TRUE) {
Format end = new Format(key.charValue(), false);
end.setMatched(true);
addToken(end);
}
}
nextChar = endChar;
return true;
|
private boolean | parseMusicTrack()Looks for a music track (\u266B is first character, everything else is
track info).
if (parseMusic && text.startsWith(musicNote)) {
addToken(new MusicTrack(text.substring(musicNote.length())));
nextChar = text.length();
return true;
}
return false;
|
private boolean | parseSmiley()Looks for smileys (e.g., ":)") in the text. The set of known smileys is
loaded from a file into a trie at server start.
if(!parseSmilies) {
return false;
}
TrieNode match = longestMatch(getResources().getSmileys(), this, nextChar,
true);
if (match == null) {
return false;
} else {
int previousCharClass = getCharClass(nextChar - 1);
int nextCharClass = getCharClass(nextChar + match.getText().length());
if ((previousCharClass == 2 || previousCharClass == 3)
&& (nextCharClass == 2 || nextCharClass == 3)) {
return false;
}
addToken(new Smiley(match.getText()));
nextChar += match.getText().length();
return true;
}
|
private void | parseText()Consumes all of the text in the next word .
StringBuilder buf = new StringBuilder();
int start = nextChar;
do {
char ch = text.charAt(nextChar++);
switch (ch) {
case '<": buf.append("<"); break;
case '>": buf.append(">"); break;
case '&": buf.append("&"); break;
case '"": buf.append("""); break;
case '\'": buf.append("'"); break;
case '\n": buf.append("<br>"); break;
default: buf.append(ch); break;
}
} while (!isWordBreak(nextChar));
addToken(new Html(text.substring(start, nextChar), buf.toString()));
|
private boolean | parseURL()Looks for a URL in two possible forms: either a proper URL with a known
scheme or a domain name optionally followed by a path, query, or query.
// Make sure this is a valid place to start a URL.
if (!parseUrls || !isURLBreak(nextChar)) {
return false;
}
int start = nextChar;
// Search for the first block of letters.
int index = start;
while ((index < text.length()) && isDomainChar(text.charAt(index))) {
index += 1;
}
String url = "";
boolean done = false;
if (index == text.length()) {
return false;
} else if (text.charAt(index) == ':") {
// Make sure this is a known scheme.
String scheme = text.substring(nextChar, index);
if (!getResources().getSchemes().contains(scheme)) {
return false;
}
} else if (text.charAt(index) == '.") {
// Search for the end of the domain name.
while (index < text.length()) {
char ch = text.charAt(index);
if ((ch != '.") && !isDomainChar(ch)) {
break;
} else {
index += 1;
}
}
// Make sure the domain name has a valid suffix. Since tries look for
// prefix matches, we reverse all the strings to get suffix comparisons.
String domain = text.substring(nextChar, index);
if (!isValidDomain(domain)) {
return false;
}
// Search for a port. We deal with this specially because a colon can
// also be a punctuation character.
if ((index + 1 < text.length()) && (text.charAt(index) == ':")) {
char ch = text.charAt(index + 1);
if (Character.isDigit(ch)) {
index += 1;
while ((index < text.length()) &&
Character.isDigit(text.charAt(index))) {
index += 1;
}
}
}
// The domain name should be followed by end of line, whitespace,
// punctuation, or a colon, slash, question, or hash character. The
// tricky part here is that some URL characters are also punctuation, so
// we need to distinguish them. Since we looked for ports above, a colon
// is always punctuation here. To distinguish '?' cases, we look at the
// character that follows it.
if (index == text.length()) {
done = true;
} else {
char ch = text.charAt(index);
if (ch == '?") {
// If the next character is whitespace or punctuation (or missing),
// then this question mark looks like punctuation.
if (index + 1 == text.length()) {
done = true;
} else {
char ch2 = text.charAt(index + 1);
if (Character.isWhitespace(ch2) || isPunctuation(ch2)) {
done = true;
}
}
} else if (isPunctuation(ch)) {
done = true;
} else if (Character.isWhitespace(ch)) {
done = true;
} else if ((ch == '/") || (ch == '#")) {
// In this case, the URL is not done. We will search for the end of
// it below.
} else {
return false;
}
}
// We will assume the user meant HTTP. (One weird case is where they
// type a port of 443. That could mean HTTPS, but they might also want
// HTTP. We'll let them specify if they don't want HTTP.)
url = "http://";
} else {
return false;
}
// If the URL is not done, search for the end, which is just before the
// next whitespace character.
if (!done) {
while ((index < text.length()) &&
!Character.isWhitespace(text.charAt(index))) {
index += 1;
}
}
String urlText = text.substring(start, index);
url += urlText;
// Figure out the appropriate token type.
addURLToken(url, urlText);
nextChar = index;
return true;
|
protected static java.lang.String | reverse(java.lang.String str)Returns the reverse of the given string.
StringBuilder buf = new StringBuilder();
for (int i = str.length() - 1; i >= 0; --i) {
buf.append(str.charAt(i));
}
return buf.toString();
|
public java.lang.String | toHtml()Converts the entire message into a single HTML display string.
StringBuilder html = new StringBuilder();
for (Part part : parts) {
boolean caps = false;
html.append("<p>");
for (Token token : part.getTokens()) {
if (token.isHtml()) {
html.append(token.toHtml(caps));
} else {
switch (token.getType()) {
case LINK:
html.append("<a href=\"");
html.append(((Link)token).getURL());
html.append("\">");
html.append(token.getRawText());
html.append("</a>");
break;
case SMILEY:
// TODO: link to an appropriate image
html.append(token.getRawText());
break;
case ACRONYM:
html.append(token.getRawText());
break;
case MUSIC:
// TODO: include a music glyph
html.append(((MusicTrack)token).getTrack());
break;
case GOOGLE_VIDEO:
// TODO: include a Google Video icon
html.append("<a href=\"");
html.append(((Video)token).getURL(((Video)token).getDocID()));
html.append("\">");
html.append(token.getRawText());
html.append("</a>");
break;
case YOUTUBE_VIDEO:
// TODO: include a YouTube icon
html.append("<a href=\"");
html.append(((YouTubeVideo)token).getURL(
((YouTubeVideo)token).getDocID()));
html.append("\">");
html.append(token.getRawText());
html.append("</a>");
break;
case PHOTO: {
// TODO: include a Picasa Web icon
html.append("<a href=\"");
html.append(Photo.getAlbumURL(
((Photo)token).getUser(), ((Photo)token).getAlbum()));
html.append("\">");
html.append(token.getRawText());
html.append("</a>");
break;
}
case FLICKR:
// TODO: include a Flickr icon
Photo p = (Photo) token;
html.append("<a href=\"");
html.append(((FlickrPhoto)token).getUrl());
html.append("\">");
html.append(token.getRawText());
html.append("</a>");
break;
default:
throw new AssertionError("unknown token type: " + token.getType());
}
}
if (token.controlCaps()) {
caps = token.setCaps();
}
}
html.append("</p>\n");
}
return html.toString();
|
public static com.google.android.util.AbstractMessageParser$Token | tokenForUrl(java.lang.String url, java.lang.String text)Get a the appropriate Token for a given URL
if(url == null) {
return null;
}
//Look for video links
Video video = Video.matchURL(url, text);
if (video != null) {
return video;
}
// Look for video links.
YouTubeVideo ytVideo = YouTubeVideo.matchURL(url, text);
if (ytVideo != null) {
return ytVideo;
}
// Look for photo links.
Photo photo = Photo.matchURL(url, text);
if (photo != null) {
return photo;
}
// Look for photo links.
FlickrPhoto flickrPhoto = FlickrPhoto.matchURL(url, text);
if (flickrPhoto != null) {
return flickrPhoto;
}
//Not media, so must be a regular URL
return new Link(url, text);
|