FileDocCategorySizeDatePackage
VCardParser_V21.javaAPI DocAndroid 1.5 API25061Wed May 06 22:41:56 BST 2009android.syncml.pim.vcard

VCardParser_V21.java

/*
 * Copyright (C) 2008 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package android.syncml.pim.vcard;

import android.syncml.pim.VBuilder;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.regex.Pattern;

/**
 * This class is used to parse vcard. Please refer to vCard Specification 2.1
 */
public class VCardParser_V21 {

    /** Store the known-type */
    private static final HashSet<String> sKnownTypeSet = new HashSet<String>(
            Arrays.asList("DOM", "INTL", "POSTAL", "PARCEL", "HOME", "WORK",
                    "PREF", "VOICE", "FAX", "MSG", "CELL", "PAGER", "BBS",
                    "MODEM", "CAR", "ISDN", "VIDEO", "AOL", "APPLELINK",
                    "ATTMAIL", "CIS", "EWORLD", "INTERNET", "IBMMAIL",
                    "MCIMAIL", "POWERSHARE", "PRODIGY", "TLX", "X400", "GIF",
                    "CGM", "WMF", "BMP", "MET", "PMB", "DIB", "PICT", "TIFF",
                    "PDF", "PS", "JPEG", "QTIME", "MPEG", "MPEG2", "AVI",
                    "WAVE", "AIFF", "PCM", "X509", "PGP"));
    
    /** Store the known-value */
    private static final HashSet<String> sKnownValueSet = new HashSet<String>(
            Arrays.asList("INLINE", "URL", "CONTENT-ID", "CID"));
        
    /** Store the property name available in vCard 2.1 */
    // NICKNAME is not supported in vCard 2.1, but some vCard may contain.
    private static final HashSet<String> sAvailablePropertyNameV21 =
        new HashSet<String>(Arrays.asList(
                "LOGO", "PHOTO", "LABEL", "FN", "TITLE", "SOUND",
                "VERSION", "TEL", "EMAIL", "TZ", "GEO", "NOTE", "URL",
                "BDAY", "ROLE", "REV", "UID", "KEY", "MAILER",
                "NICKNAME"));

    // Though vCard 2.1 specification does not allow "B" encoding, some data may have it.
    // We allow it for safety...
    private static final HashSet<String> sAvailableEncodingV21 =
        new HashSet<String>(Arrays.asList(
                "7BIT", "8BIT", "QUOTED-PRINTABLE", "BASE64", "B"));
    
    // Used only for parsing END:VCARD.
    private String mPreviousLine;
    
    /** The builder to build parsed data */
    protected VBuilder mBuilder = null;

    /** The encoding type */
    protected String mEncoding = null;
    
    protected final String sDefaultEncoding = "8BIT";
    
    // Should not directly read a line from this. Use getLine() instead.
    protected BufferedReader mReader;
    
    /**
     * Create a new VCard parser.
     */
    public VCardParser_V21() {
        super();
    }

    /**
     * Parse the file at the given position
     * vcard_file   = [wsls] vcard [wsls]
     */
    protected void parseVCardFile() throws IOException, VCardException {
        while (parseOneVCard()) {
        }
    }

    protected String getVersion() {
        return "2.1";
    }
    
    /**
     * @return true when the propertyName is a valid property name.
     */
    protected boolean isValidPropertyName(String propertyName) {
        return sAvailablePropertyNameV21.contains(propertyName.toUpperCase());
    }

    /**
     * @return true when the encoding is a valid encoding.
     */
    protected boolean isValidEncoding(String encoding) {
        return sAvailableEncodingV21.contains(encoding.toUpperCase());
    }
    
    /**
     * @return String. It may be null, or its length may be 0
     * @throws IOException
     */
    protected String getLine() throws IOException {
        return mReader.readLine();
    }
    
    /**
     * @return String with it's length > 0
     * @throws IOException
     * @throws VCardException when the stream reached end of line
     */
    protected String getNonEmptyLine() throws IOException, VCardException {
        String line;
        while (true) {
            line = getLine();
            if (line == null) {
                throw new VCardException("Reached end of buffer.");
            } else if (line.trim().length() > 0) {
                return line;
            }
        }
    }
    
    /**
     *  vcard        = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF
     *                 items *CRLF
     *                 "END" [ws] ":" [ws] "VCARD"
     */
    private boolean parseOneVCard() throws IOException, VCardException {
        if (!readBeginVCard()) {
            return false;
        }
        parseItems();
        readEndVCard();
        return true;
    }
    
    /**
     * @return True when successful. False when reaching the end of line  
     * @throws IOException
     * @throws VCardException
     */
    protected boolean readBeginVCard() throws IOException, VCardException {
        String line;
        while (true) {
            line = getLine();
            if (line == null) {
                return false;
            } else if (line.trim().length() > 0) {
                break;
            }
        }
        String[] strArray = line.split(":", 2);
        
        // Though vCard specification does not allow lower cases,
        // some data may have them, so we allow it.
        if (!(strArray.length == 2 &&
                strArray[0].trim().equalsIgnoreCase("BEGIN") && 
                strArray[1].trim().equalsIgnoreCase("VCARD"))) {
            throw new VCardException("BEGIN:VCARD != \"" + line + "\"");
        }
        
        if (mBuilder != null) {
            mBuilder.startRecord("VCARD");
        }

        return true;
    }
    
    protected void readEndVCard() throws VCardException {
        // Though vCard specification does not allow lower cases,
        // some data may have them, so we allow it.
        String[] strArray = mPreviousLine.split(":", 2);
        if (!(strArray.length == 2 &&
                strArray[0].trim().equalsIgnoreCase("END") &&
                strArray[1].trim().equalsIgnoreCase("VCARD"))) {
            throw new VCardException("END:VCARD != \"" + mPreviousLine + "\"");
        }
        
        if (mBuilder != null) {
            mBuilder.endRecord();
        }
    }
    
    /**
     * items = *CRLF item 
     *       / item
     */
    protected void parseItems() throws IOException, VCardException {
        /* items *CRLF item / item */
        boolean ended = false;
        
        if (mBuilder != null) {
            mBuilder.startProperty();
        }

        try {
            ended = parseItem();
        } finally {
            if (mBuilder != null) {
                mBuilder.endProperty();
            }
        }

        while (!ended) {
            // follow VCARD ,it wont reach endProperty
            if (mBuilder != null) {
                mBuilder.startProperty();
            }
            try {
                ended = parseItem();
            } finally {
                if (mBuilder != null) {
                    mBuilder.endProperty();
                }
            }
        }
    }

    /**
     * item      = [groups "."] name    [params] ":" value CRLF
     *           / [groups "."] "ADR"   [params] ":" addressparts CRLF
     *           / [groups "."] "ORG"   [params] ":" orgparts CRLF
     *           / [groups "."] "N"     [params] ":" nameparts CRLF
     *           / [groups "."] "AGENT" [params] ":" vcard CRLF 
     */
    protected boolean parseItem() throws IOException, VCardException {
        mEncoding = sDefaultEncoding;

        // params    = ";" [ws] paramlist
        String line = getNonEmptyLine();
        String[] strArray = line.split(":", 2);
        if (strArray.length < 2) {
            throw new VCardException("Invalid line(\":\" does not exist): " + line);
        }
        String propertyValue = strArray[1];
        String[] groupNameParamsArray = strArray[0].split(";");
        String groupAndName = groupNameParamsArray[0].trim();
        String[] groupNameArray = groupAndName.split("\\.");
        int length = groupNameArray.length;
        String propertyName = groupNameArray[length - 1];
        if (mBuilder != null) {
            mBuilder.propertyName(propertyName);
            for (int i = 0; i < length - 1; i++) {
                mBuilder.propertyGroup(groupNameArray[i]);
            }
        }
        if (propertyName.equalsIgnoreCase("END")) {
            mPreviousLine = line;
            return true;
        }
        
        length = groupNameParamsArray.length;
        for (int i = 1; i < length; i++) {
            handleParams(groupNameParamsArray[i]);
        }
        
        if (isValidPropertyName(propertyName) ||
                propertyName.startsWith("X-")) {
            if (propertyName.equals("VERSION") &&
                    !propertyValue.equals(getVersion())) {
                throw new VCardVersionException("Incompatible version: " + 
                        propertyValue + " != " + getVersion());
            }
            handlePropertyValue(propertyName, propertyValue);
            return false;
        } else if (propertyName.equals("ADR") ||
                propertyName.equals("ORG") ||
                propertyName.equals("N")) {
            handleMultiplePropertyValue(propertyName, propertyValue);
            return false;
        } else if (propertyName.equals("AGENT")) {
            handleAgent(propertyValue);
            return false;
        }
        
        throw new VCardException("Unknown property name: \"" + 
                propertyName + "\"");
    }

    /**
     * params      = ";" [ws] paramlist
     * paramlist   = paramlist [ws] ";" [ws] param
     *             / param
     * param       = "TYPE" [ws] "=" [ws] ptypeval
     *             / "VALUE" [ws] "=" [ws] pvalueval
     *             / "ENCODING" [ws] "=" [ws] pencodingval
     *             / "CHARSET" [ws] "=" [ws] charsetval
     *             / "LANGUAGE" [ws] "=" [ws] langval
     *             / "X-" word [ws] "=" [ws] word
     *             / knowntype
     */
    protected void handleParams(String params) throws VCardException {
        String[] strArray = params.split("=", 2);
        if (strArray.length == 2) {
            String paramName = strArray[0].trim();
            String paramValue = strArray[1].trim();
            if (paramName.equals("TYPE")) {
                handleType(paramValue);
            } else if (paramName.equals("VALUE")) {
                handleValue(paramValue);
            } else if (paramName.equals("ENCODING")) {
                handleEncoding(paramValue);
            } else if (paramName.equals("CHARSET")) {
                handleCharset(paramValue);
            } else if (paramName.equals("LANGUAGE")) {
                handleLanguage(paramValue);
            } else if (paramName.startsWith("X-")) {
                handleAnyParam(paramName, paramValue);
            } else {
                throw new VCardException("Unknown type \"" + paramName + "\"");
            }
        } else {
            handleType(strArray[0]);
        }
    }
    
    /**
     * typeval  = knowntype / "X-" word
     */
    protected void handleType(String ptypeval) throws VCardException {
        if (sKnownTypeSet.contains(ptypeval.toUpperCase()) ||
                ptypeval.startsWith("X-")) {
            if (mBuilder != null) {
                mBuilder.propertyParamType("TYPE");
                mBuilder.propertyParamValue(ptypeval.toUpperCase());
            }
        } else {
            throw new VCardException("Unknown type: \"" + ptypeval + "\"");
        }        
    }
    
    /**
     * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word
     */
    protected void handleValue(String pvalueval) throws VCardException {
        if (sKnownValueSet.contains(pvalueval.toUpperCase()) ||
                pvalueval.startsWith("X-")) {
            if (mBuilder != null) {
                mBuilder.propertyParamType("VALUE");
                mBuilder.propertyParamValue(pvalueval);
            }
        } else {
            throw new VCardException("Unknown value \"" + pvalueval + "\"");
        }
    }
    
    /**
     * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word
     */
    protected void handleEncoding(String pencodingval) throws VCardException {
        if (isValidEncoding(pencodingval) ||
                pencodingval.startsWith("X-")) {
            if (mBuilder != null) {
                mBuilder.propertyParamType("ENCODING");
                mBuilder.propertyParamValue(pencodingval);
            }
            mEncoding = pencodingval;
        } else {
            throw new VCardException("Unknown encoding \"" + pencodingval + "\"");
        }
    }
    
    /**
     * vCard specification only allows us-ascii and iso-8859-xxx (See RFC 1521),
     * but some vCard contains other charset, so we allow them. 
     */
    protected void handleCharset(String charsetval) {
        if (mBuilder != null) {
            mBuilder.propertyParamType("CHARSET");
            mBuilder.propertyParamValue(charsetval);
        }
    }
    
    /**
     * See also Section 7.1 of RFC 1521
     */
    protected void handleLanguage(String langval) throws VCardException {
        String[] strArray = langval.split("-");
        if (strArray.length != 2) {
            throw new VCardException("Invalid Language: \"" + langval + "\"");
        }
        String tmp = strArray[0];
        int length = tmp.length();
        for (int i = 0; i < length; i++) {
            if (!isLetter(tmp.charAt(i))) {
                throw new VCardException("Invalid Language: \"" + langval + "\"");
            }
        }
        tmp = strArray[1];
        length = tmp.length();
        for (int i = 0; i < length; i++) {
            if (!isLetter(tmp.charAt(i))) {
                throw new VCardException("Invalid Language: \"" + langval + "\"");
            }
        }
        if (mBuilder != null) {
            mBuilder.propertyParamType("LANGUAGE");
            mBuilder.propertyParamValue(langval);
        }
    }

    /**
     * Mainly for "X-" type. This accepts any kind of type without check.
     */
    protected void handleAnyParam(String paramName, String paramValue) {
        if (mBuilder != null) {
            mBuilder.propertyParamType(paramName);
            mBuilder.propertyParamValue(paramValue);
        }
    }
    
    protected void handlePropertyValue(
            String propertyName, String propertyValue) throws
            IOException, VCardException {
        if (mEncoding == null || mEncoding.equalsIgnoreCase("7BIT")
                || mEncoding.equalsIgnoreCase("8BIT")
                || mEncoding.toUpperCase().startsWith("X-")) {
            if (mBuilder != null) {
                ArrayList<String> v = new ArrayList<String>();
                v.add(maybeUnescapeText(propertyValue));
                mBuilder.propertyValues(v);
            }
        } else if (mEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) {
            String result = getQuotedPrintable(propertyValue);
            if (mBuilder != null) {
                ArrayList<String> v = new ArrayList<String>();
                v.add(result);
                mBuilder.propertyValues(v);
            }
        } else if (mEncoding.equalsIgnoreCase("BASE64") ||
                mEncoding.equalsIgnoreCase("B")) {
            String result = getBase64(propertyValue);
            if (mBuilder != null) {
                ArrayList<String> v = new ArrayList<String>();
                v.add(result);
                mBuilder.propertyValues(v);
            }            
        } else {
            throw new VCardException("Unknown encoding: \"" + mEncoding + "\"");
        }
    }
    
    protected String getQuotedPrintable(String firstString) throws IOException, VCardException {
        // Specifically, there may be some padding between = and CRLF.
        // See the following:
        //
        // qp-line := *(qp-segment transport-padding CRLF)
        //            qp-part transport-padding
        // qp-segment := qp-section *(SPACE / TAB) "="
        //             ; Maximum length of 76 characters
        //
        // e.g. (from RFC 2045)
        // Now's the time =
        // for all folk to come=
        //  to the aid of their country.
        if (firstString.trim().endsWith("=")) {
            // remove "transport-padding"
            int pos = firstString.length() - 1;
            while(firstString.charAt(pos) != '=') {
            }
            StringBuilder builder = new StringBuilder();
            builder.append(firstString.substring(0, pos + 1));
            builder.append("\r\n");
            String line;
            while (true) {
                line = getLine();
                if (line == null) {
                    throw new VCardException(
                            "File ended during parsing quoted-printable String");
                }
                if (line.trim().endsWith("=")) {
                    // remove "transport-padding"
                    pos = line.length() - 1;
                    while(line.charAt(pos) != '=') {
                    }
                    builder.append(line.substring(0, pos + 1));
                    builder.append("\r\n");
                } else {
                    builder.append(line);
                    break;
                }
            }
            return builder.toString(); 
        } else {
            return firstString;
        }
    }
    
    protected String getBase64(String firstString) throws IOException, VCardException {
        StringBuilder builder = new StringBuilder();
        builder.append(firstString);
        
        while (true) {
            String line = getLine();
            if (line == null) {
                throw new VCardException(
                        "File ended during parsing BASE64 binary");
            }
            if (line.length() == 0) {
                break;
            }
            builder.append(line);
        }
        
        return builder.toString();
    }
    
    /**
     * Mainly for "ADR", "ORG", and "N"
     * We do not care the number of strnosemi here.
     * 
     * addressparts = 0*6(strnosemi ";") strnosemi
     *              ; PO Box, Extended Addr, Street, Locality, Region,
     *                Postal Code, Country Name
     * orgparts     = *(strnosemi ";") strnosemi
     *              ; First is Organization Name,
     *                remainder are Organization Units.
     * nameparts    = 0*4(strnosemi ";") strnosemi
     *              ; Family, Given, Middle, Prefix, Suffix.
     *              ; Example:Public;John;Q.;Reverend Dr.;III, Esq.
     * strnosemi    = *(*nonsemi ("\;" / "\" CRLF)) *nonsemi
     *              ; To include a semicolon in this string, it must be escaped
     *              ; with a "\" character.
     *              
     * We are not sure whether we should add "\" CRLF to each value.
     * For now, we exclude them.               
     */
    protected void handleMultiplePropertyValue(
            String propertyName, String propertyValue) throws IOException, VCardException {
        // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some data have it.
        if (mEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) {
            propertyValue = getQuotedPrintable(propertyValue);
        }
        
        if (propertyValue.endsWith("\\")) {
            StringBuilder builder = new StringBuilder();
            // builder.append(propertyValue);
            builder.append(propertyValue.substring(0, propertyValue.length() - 1));
            try {
                String line;
                while (true) {
                    line = getNonEmptyLine();
                    // builder.append("\r\n");
                    // builder.append(line);
                    if (!line.endsWith("\\")) {
                        builder.append(line);
                        break;
                    } else {
                        builder.append(line.substring(0, line.length() - 1));
                    }
                }
            } catch (IOException e) {
                throw new VCardException(
                        "IOException is throw during reading propertyValue" + e);
            }
            // Now, propertyValue may contain "\r\n"
            propertyValue = builder.toString();
        }

        if (mBuilder != null) {
            // In String#replaceAll() and Pattern class, "\\\\" means single slash. 

            final String IMPOSSIBLE_STRING = "\0";
            // First replace two backslashes with impossible strings.
            propertyValue = propertyValue.replaceAll("\\\\\\\\", IMPOSSIBLE_STRING);

            // Now, split propertyValue with ; whose previous char is not back slash.
            Pattern pattern = Pattern.compile("(?<!\\\\);");
            // TODO: limit should be set in accordance with propertyName?
            String[] strArray = pattern.split(propertyValue, -1); 
            ArrayList<String> arrayList = new ArrayList<String>();
            for (String str : strArray) {
                // Replace impossible strings with original two backslashes
                arrayList.add(
                        unescapeText(str.replaceAll(IMPOSSIBLE_STRING, "\\\\\\\\")));
            }
            mBuilder.propertyValues(arrayList);
        }
    }
    
    /**
     * vCard 2.1 specifies AGENT allows one vcard entry. It is not encoded at all.
     */
    protected void handleAgent(String propertyValue) throws IOException, VCardException {
        String[] strArray = propertyValue.split(":", 2);
        if (!(strArray.length == 2 ||
                strArray[0].trim().equalsIgnoreCase("BEGIN") && 
                strArray[1].trim().equalsIgnoreCase("VCARD"))) {
            throw new VCardException("BEGIN:VCARD != \"" + propertyValue + "\"");
        }
        parseItems();
        readEndVCard();
    }
    
    /**
     * For vCard 3.0.
     */
    protected String maybeUnescapeText(String text) {
        return text;
    }
    
    /**
     * Convert escaped text into unescaped text.
     */
    protected String unescapeText(String text) {
        // Original vCard 2.1 specification does not allow transformation
        // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous implementation of
        // this class allowed them, so keep it as is.
        // In String#replaceAll(), "\\\\" means single slash. 
        return text.replaceAll("\\\\;", ";")
            .replaceAll("\\\\:", ":")
            .replaceAll("\\\\,", ",")
            .replaceAll("\\\\\\\\", "\\\\");
    }
    
    /**
     * Parse the given stream and constructs VCardDataBuilder object.
     * Note that vCard 2.1 specification allows "CHARSET" parameter, and some career sets
     * local encoding to it. For example, Japanese phone career uses Shift_JIS, which
     * is not formally allowed in vCard specification.
     * As a result, there is a case where the encoding given here does not do well with
     * the "CHARSET".
     * 
     * In order to avoid such cases, It may be fine to use "ISO-8859-1" as an encoding,
     * and to encode each localized String afterward.
     * 
     * RFC 2426 "recommends" (not forces) to use UTF-8, so it may be OK to use
     * UTF-8 as an encoding when parsing vCard 3.0. But note that some Japanese
     * phone uses Shift_JIS as a charset (e.g. W61SH), and another uses
     * "CHARSET=SHIFT_JIS", which is explicitly prohibited in vCard 3.0 specification
     * (e.g. W53K). 
     *      
     * @param is
     *            The source to parse.
     * @param charset
     *            The charset.
     * @param builder
     *            The v builder which used to construct data.
     * @return Return true for success, otherwise false.
     * @throws IOException
     */
    public boolean parse(InputStream is, String charset, VBuilder builder)
            throws IOException, VCardException {
        // TODO: If we really need to allow only CRLF as line break,
        // we will have to develop our own BufferedReader().
        mReader = new BufferedReader(new InputStreamReader(is, charset));
        
        mBuilder = builder;

        if (mBuilder != null) {
            mBuilder.start();
        }
        parseVCardFile();
        if (mBuilder != null) {
            mBuilder.end();
        }
        return true;
    }
    
    private boolean isLetter(char ch) {
        if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
            return true;
        }
        return false;
    }
}