FileDocCategorySizeDatePackage
VCardParser_V21.javaAPI DocAndroid 1.5 API25061Wed May 06 22:41:56 BST 2009android.syncml.pim.vcard

VCardParser_V21

public class VCardParser_V21 extends Object
This class is used to parse vcard. Please refer to vCard Specification 2.1

Fields Summary
private static final HashSet
sKnownTypeSet
Store the known-type
private static final HashSet
sKnownValueSet
Store the known-value
private static final HashSet
sAvailablePropertyNameV21
Store the property name available in vCard 2.1
private static final HashSet
sAvailableEncodingV21
private String
mPreviousLine
protected android.syncml.pim.VBuilder
mBuilder
The builder to build parsed data
protected String
mEncoding
The encoding type
protected final String
sDefaultEncoding
protected BufferedReader
mReader
Constructors Summary
public VCardParser_V21()
Create a new VCard parser.

    
              
      
        super();
    
Methods Summary
protected java.lang.StringgetBase64(java.lang.String firstString)

        StringBuilder builder = new StringBuilder();
        builder.append(firstString);
        
        while (true) {
            String line = getLine();
            if (line == null) {
                throw new VCardException(
                        "File ended during parsing BASE64 binary");
            }
            if (line.length() == 0) {
                break;
            }
            builder.append(line);
        }
        
        return builder.toString();
    
protected java.lang.StringgetLine()

return
String. It may be null, or its length may be 0
throws
IOException

        return mReader.readLine();
    
protected java.lang.StringgetNonEmptyLine()

return
String with it's length > 0
throws
IOException
throws
VCardException when the stream reached end of line

        String line;
        while (true) {
            line = getLine();
            if (line == null) {
                throw new VCardException("Reached end of buffer.");
            } else if (line.trim().length() > 0) {
                return line;
            }
        }
    
protected java.lang.StringgetQuotedPrintable(java.lang.String firstString)

        // Specifically, there may be some padding between = and CRLF.
        // See the following:
        //
        // qp-line := *(qp-segment transport-padding CRLF)
        //            qp-part transport-padding
        // qp-segment := qp-section *(SPACE / TAB) "="
        //             ; Maximum length of 76 characters
        //
        // e.g. (from RFC 2045)
        // Now's the time =
        // for all folk to come=
        //  to the aid of their country.
        if (firstString.trim().endsWith("=")) {
            // remove "transport-padding"
            int pos = firstString.length() - 1;
            while(firstString.charAt(pos) != '=") {
            }
            StringBuilder builder = new StringBuilder();
            builder.append(firstString.substring(0, pos + 1));
            builder.append("\r\n");
            String line;
            while (true) {
                line = getLine();
                if (line == null) {
                    throw new VCardException(
                            "File ended during parsing quoted-printable String");
                }
                if (line.trim().endsWith("=")) {
                    // remove "transport-padding"
                    pos = line.length() - 1;
                    while(line.charAt(pos) != '=") {
                    }
                    builder.append(line.substring(0, pos + 1));
                    builder.append("\r\n");
                } else {
                    builder.append(line);
                    break;
                }
            }
            return builder.toString(); 
        } else {
            return firstString;
        }
    
protected java.lang.StringgetVersion()

        return "2.1";
    
protected voidhandleAgent(java.lang.String propertyValue)
vCard 2.1 specifies AGENT allows one vcard entry. It is not encoded at all.

        String[] strArray = propertyValue.split(":", 2);
        if (!(strArray.length == 2 ||
                strArray[0].trim().equalsIgnoreCase("BEGIN") && 
                strArray[1].trim().equalsIgnoreCase("VCARD"))) {
            throw new VCardException("BEGIN:VCARD != \"" + propertyValue + "\"");
        }
        parseItems();
        readEndVCard();
    
protected voidhandleAnyParam(java.lang.String paramName, java.lang.String paramValue)
Mainly for "X-" type. This accepts any kind of type without check.

        if (mBuilder != null) {
            mBuilder.propertyParamType(paramName);
            mBuilder.propertyParamValue(paramValue);
        }
    
protected voidhandleCharset(java.lang.String charsetval)
vCard specification only allows us-ascii and iso-8859-xxx (See RFC 1521), but some vCard contains other charset, so we allow them.

        if (mBuilder != null) {
            mBuilder.propertyParamType("CHARSET");
            mBuilder.propertyParamValue(charsetval);
        }
    
protected voidhandleEncoding(java.lang.String pencodingval)
pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word

        if (isValidEncoding(pencodingval) ||
                pencodingval.startsWith("X-")) {
            if (mBuilder != null) {
                mBuilder.propertyParamType("ENCODING");
                mBuilder.propertyParamValue(pencodingval);
            }
            mEncoding = pencodingval;
        } else {
            throw new VCardException("Unknown encoding \"" + pencodingval + "\"");
        }
    
protected voidhandleLanguage(java.lang.String langval)
See also Section 7.1 of RFC 1521

        String[] strArray = langval.split("-");
        if (strArray.length != 2) {
            throw new VCardException("Invalid Language: \"" + langval + "\"");
        }
        String tmp = strArray[0];
        int length = tmp.length();
        for (int i = 0; i < length; i++) {
            if (!isLetter(tmp.charAt(i))) {
                throw new VCardException("Invalid Language: \"" + langval + "\"");
            }
        }
        tmp = strArray[1];
        length = tmp.length();
        for (int i = 0; i < length; i++) {
            if (!isLetter(tmp.charAt(i))) {
                throw new VCardException("Invalid Language: \"" + langval + "\"");
            }
        }
        if (mBuilder != null) {
            mBuilder.propertyParamType("LANGUAGE");
            mBuilder.propertyParamValue(langval);
        }
    
protected voidhandleMultiplePropertyValue(java.lang.String propertyName, java.lang.String propertyValue)
Mainly for "ADR", "ORG", and "N" We do not care the number of strnosemi here. addressparts = 0*6(strnosemi ";") strnosemi ; PO Box, Extended Addr, Street, Locality, Region, Postal Code, Country Name orgparts = *(strnosemi ";") strnosemi ; First is Organization Name, remainder are Organization Units. nameparts = 0*4(strnosemi ";") strnosemi ; Family, Given, Middle, Prefix, Suffix. ; Example:Public;John;Q.;Reverend Dr.;III, Esq. strnosemi = *(*nonsemi ("\;" / "\" CRLF)) *nonsemi ; To include a semicolon in this string, it must be escaped ; with a "\" character. We are not sure whether we should add "\" CRLF to each value. For now, we exclude them.

        // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some data have it.
        if (mEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) {
            propertyValue = getQuotedPrintable(propertyValue);
        }
        
        if (propertyValue.endsWith("\\")) {
            StringBuilder builder = new StringBuilder();
            // builder.append(propertyValue);
            builder.append(propertyValue.substring(0, propertyValue.length() - 1));
            try {
                String line;
                while (true) {
                    line = getNonEmptyLine();
                    // builder.append("\r\n");
                    // builder.append(line);
                    if (!line.endsWith("\\")) {
                        builder.append(line);
                        break;
                    } else {
                        builder.append(line.substring(0, line.length() - 1));
                    }
                }
            } catch (IOException e) {
                throw new VCardException(
                        "IOException is throw during reading propertyValue" + e);
            }
            // Now, propertyValue may contain "\r\n"
            propertyValue = builder.toString();
        }

        if (mBuilder != null) {
            // In String#replaceAll() and Pattern class, "\\\\" means single slash. 

            final String IMPOSSIBLE_STRING = "\0";
            // First replace two backslashes with impossible strings.
            propertyValue = propertyValue.replaceAll("\\\\\\\\", IMPOSSIBLE_STRING);

            // Now, split propertyValue with ; whose previous char is not back slash.
            Pattern pattern = Pattern.compile("(?<!\\\\);");
            // TODO: limit should be set in accordance with propertyName?
            String[] strArray = pattern.split(propertyValue, -1); 
            ArrayList<String> arrayList = new ArrayList<String>();
            for (String str : strArray) {
                // Replace impossible strings with original two backslashes
                arrayList.add(
                        unescapeText(str.replaceAll(IMPOSSIBLE_STRING, "\\\\\\\\")));
            }
            mBuilder.propertyValues(arrayList);
        }
    
protected voidhandleParams(java.lang.String params)
params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param / param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws] pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "=" [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "=" [ws] word / knowntype

        String[] strArray = params.split("=", 2);
        if (strArray.length == 2) {
            String paramName = strArray[0].trim();
            String paramValue = strArray[1].trim();
            if (paramName.equals("TYPE")) {
                handleType(paramValue);
            } else if (paramName.equals("VALUE")) {
                handleValue(paramValue);
            } else if (paramName.equals("ENCODING")) {
                handleEncoding(paramValue);
            } else if (paramName.equals("CHARSET")) {
                handleCharset(paramValue);
            } else if (paramName.equals("LANGUAGE")) {
                handleLanguage(paramValue);
            } else if (paramName.startsWith("X-")) {
                handleAnyParam(paramName, paramValue);
            } else {
                throw new VCardException("Unknown type \"" + paramName + "\"");
            }
        } else {
            handleType(strArray[0]);
        }
    
protected voidhandlePropertyValue(java.lang.String propertyName, java.lang.String propertyValue)

        if (mEncoding == null || mEncoding.equalsIgnoreCase("7BIT")
                || mEncoding.equalsIgnoreCase("8BIT")
                || mEncoding.toUpperCase().startsWith("X-")) {
            if (mBuilder != null) {
                ArrayList<String> v = new ArrayList<String>();
                v.add(maybeUnescapeText(propertyValue));
                mBuilder.propertyValues(v);
            }
        } else if (mEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) {
            String result = getQuotedPrintable(propertyValue);
            if (mBuilder != null) {
                ArrayList<String> v = new ArrayList<String>();
                v.add(result);
                mBuilder.propertyValues(v);
            }
        } else if (mEncoding.equalsIgnoreCase("BASE64") ||
                mEncoding.equalsIgnoreCase("B")) {
            String result = getBase64(propertyValue);
            if (mBuilder != null) {
                ArrayList<String> v = new ArrayList<String>();
                v.add(result);
                mBuilder.propertyValues(v);
            }            
        } else {
            throw new VCardException("Unknown encoding: \"" + mEncoding + "\"");
        }
    
protected voidhandleType(java.lang.String ptypeval)
typeval = knowntype / "X-" word

        if (sKnownTypeSet.contains(ptypeval.toUpperCase()) ||
                ptypeval.startsWith("X-")) {
            if (mBuilder != null) {
                mBuilder.propertyParamType("TYPE");
                mBuilder.propertyParamValue(ptypeval.toUpperCase());
            }
        } else {
            throw new VCardException("Unknown type: \"" + ptypeval + "\"");
        }        
    
protected voidhandleValue(java.lang.String pvalueval)
pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word

        if (sKnownValueSet.contains(pvalueval.toUpperCase()) ||
                pvalueval.startsWith("X-")) {
            if (mBuilder != null) {
                mBuilder.propertyParamType("VALUE");
                mBuilder.propertyParamValue(pvalueval);
            }
        } else {
            throw new VCardException("Unknown value \"" + pvalueval + "\"");
        }
    
private booleanisLetter(char ch)

        if ((ch >= 'a" && ch <= 'z") || (ch >= 'A" && ch <= 'Z")) {
            return true;
        }
        return false;
    
protected booleanisValidEncoding(java.lang.String encoding)

return
true when the encoding is a valid encoding.

        return sAvailableEncodingV21.contains(encoding.toUpperCase());
    
protected booleanisValidPropertyName(java.lang.String propertyName)

return
true when the propertyName is a valid property name.

        return sAvailablePropertyNameV21.contains(propertyName.toUpperCase());
    
protected java.lang.StringmaybeUnescapeText(java.lang.String text)
For vCard 3.0.

        return text;
    
public booleanparse(java.io.InputStream is, java.lang.String charset, android.syncml.pim.VBuilder builder)
Parse the given stream and constructs VCardDataBuilder object. Note that vCard 2.1 specification allows "CHARSET" parameter, and some career sets local encoding to it. For example, Japanese phone career uses Shift_JIS, which is not formally allowed in vCard specification. As a result, there is a case where the encoding given here does not do well with the "CHARSET". In order to avoid such cases, It may be fine to use "ISO-8859-1" as an encoding, and to encode each localized String afterward. RFC 2426 "recommends" (not forces) to use UTF-8, so it may be OK to use UTF-8 as an encoding when parsing vCard 3.0. But note that some Japanese phone uses Shift_JIS as a charset (e.g. W61SH), and another uses "CHARSET=SHIFT_JIS", which is explicitly prohibited in vCard 3.0 specification (e.g. W53K).

param
is The source to parse.
param
charset The charset.
param
builder The v builder which used to construct data.
return
Return true for success, otherwise false.
throws
IOException

        // TODO: If we really need to allow only CRLF as line break,
        // we will have to develop our own BufferedReader().
        mReader = new BufferedReader(new InputStreamReader(is, charset));
        
        mBuilder = builder;

        if (mBuilder != null) {
            mBuilder.start();
        }
        parseVCardFile();
        if (mBuilder != null) {
            mBuilder.end();
        }
        return true;
    
protected booleanparseItem()
item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR" [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."] "AGENT" [params] ":" vcard CRLF

        mEncoding = sDefaultEncoding;

        // params    = ";" [ws] paramlist
        String line = getNonEmptyLine();
        String[] strArray = line.split(":", 2);
        if (strArray.length < 2) {
            throw new VCardException("Invalid line(\":\" does not exist): " + line);
        }
        String propertyValue = strArray[1];
        String[] groupNameParamsArray = strArray[0].split(";");
        String groupAndName = groupNameParamsArray[0].trim();
        String[] groupNameArray = groupAndName.split("\\.");
        int length = groupNameArray.length;
        String propertyName = groupNameArray[length - 1];
        if (mBuilder != null) {
            mBuilder.propertyName(propertyName);
            for (int i = 0; i < length - 1; i++) {
                mBuilder.propertyGroup(groupNameArray[i]);
            }
        }
        if (propertyName.equalsIgnoreCase("END")) {
            mPreviousLine = line;
            return true;
        }
        
        length = groupNameParamsArray.length;
        for (int i = 1; i < length; i++) {
            handleParams(groupNameParamsArray[i]);
        }
        
        if (isValidPropertyName(propertyName) ||
                propertyName.startsWith("X-")) {
            if (propertyName.equals("VERSION") &&
                    !propertyValue.equals(getVersion())) {
                throw new VCardVersionException("Incompatible version: " + 
                        propertyValue + " != " + getVersion());
            }
            handlePropertyValue(propertyName, propertyValue);
            return false;
        } else if (propertyName.equals("ADR") ||
                propertyName.equals("ORG") ||
                propertyName.equals("N")) {
            handleMultiplePropertyValue(propertyName, propertyValue);
            return false;
        } else if (propertyName.equals("AGENT")) {
            handleAgent(propertyValue);
            return false;
        }
        
        throw new VCardException("Unknown property name: \"" + 
                propertyName + "\"");
    
protected voidparseItems()
items = *CRLF item / item

        /* items *CRLF item / item */
        boolean ended = false;
        
        if (mBuilder != null) {
            mBuilder.startProperty();
        }

        try {
            ended = parseItem();
        } finally {
            if (mBuilder != null) {
                mBuilder.endProperty();
            }
        }

        while (!ended) {
            // follow VCARD ,it wont reach endProperty
            if (mBuilder != null) {
                mBuilder.startProperty();
            }
            try {
                ended = parseItem();
            } finally {
                if (mBuilder != null) {
                    mBuilder.endProperty();
                }
            }
        }
    
private booleanparseOneVCard()
vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD"

        if (!readBeginVCard()) {
            return false;
        }
        parseItems();
        readEndVCard();
        return true;
    
protected voidparseVCardFile()
Parse the file at the given position vcard_file = [wsls] vcard [wsls]

        while (parseOneVCard()) {
        }
    
protected booleanreadBeginVCard()

return
True when successful. False when reaching the end of line
throws
IOException
throws
VCardException

        String line;
        while (true) {
            line = getLine();
            if (line == null) {
                return false;
            } else if (line.trim().length() > 0) {
                break;
            }
        }
        String[] strArray = line.split(":", 2);
        
        // Though vCard specification does not allow lower cases,
        // some data may have them, so we allow it.
        if (!(strArray.length == 2 &&
                strArray[0].trim().equalsIgnoreCase("BEGIN") && 
                strArray[1].trim().equalsIgnoreCase("VCARD"))) {
            throw new VCardException("BEGIN:VCARD != \"" + line + "\"");
        }
        
        if (mBuilder != null) {
            mBuilder.startRecord("VCARD");
        }

        return true;
    
protected voidreadEndVCard()

        // Though vCard specification does not allow lower cases,
        // some data may have them, so we allow it.
        String[] strArray = mPreviousLine.split(":", 2);
        if (!(strArray.length == 2 &&
                strArray[0].trim().equalsIgnoreCase("END") &&
                strArray[1].trim().equalsIgnoreCase("VCARD"))) {
            throw new VCardException("END:VCARD != \"" + mPreviousLine + "\"");
        }
        
        if (mBuilder != null) {
            mBuilder.endRecord();
        }
    
protected java.lang.StringunescapeText(java.lang.String text)
Convert escaped text into unescaped text.

        // Original vCard 2.1 specification does not allow transformation
        // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous implementation of
        // this class allowed them, so keep it as is.
        // In String#replaceAll(), "\\\\" means single slash. 
        return text.replaceAll("\\\\;", ";")
            .replaceAll("\\\\:", ":")
            .replaceAll("\\\\,", ",")
            .replaceAll("\\\\\\\\", "\\\\");