FileDocCategorySizeDatePackage
CSVConfiguration.javaAPI DocApache Xerces 3.0.19101Fri Sep 14 20:33:56 BST 2007xni.parser

CSVConfiguration.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package xni.parser;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.StringTokenizer;

import org.apache.xerces.util.NamespaceSupport;
import org.apache.xerces.util.XMLAttributesImpl;
import org.apache.xerces.util.XMLStringBuffer;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLDTDContentModelHandler;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLInputSource;

/**
 * This example is a very simple parser configuration that can 
 * parse files with comma-separated values (CSV) to generate XML
 * events. For example, the following CSV document:
 * <pre>
 * Andy Clark,16 Jan 1973,Cincinnati
 * </pre>
 * produces the following XML "document" as represented by the 
 * XNI streaming document information: 
 * <pre>
 * <?xml version='1.0' encoding='UTF-8' standalone='true'?>
 * <!DOCTYPE csv [
 * <!ELEMENT csv (row)*>
 * <!ELEMENT row (col)*>
 * <!ELEMENT col (#PCDATA)>
 * ]>
 * <csv>
 *  <row>
 *   <col>Andy Clark</col>
 *   <col>16 Jan 1973</col>
 *   <col>Cincinnati</col>
 *  </row>
 * </csv>
 * </pre>
 * 
 * @author Andy Clark, IBM
 *
 * @version $Id: CSVConfiguration.java 447690 2006-09-19 02:41:53Z mrglavas $
 */
public class CSVConfiguration
    extends AbstractConfiguration {

    //
    // Constants
    //

    /** A QName for the <csv> element name. */
    protected static final QName CSV = new QName(null, null, "csv", null);

    /** A QName for the <row> element name. */
    protected static final QName ROW = new QName(null, null, "row", null);

    /** A QName for the <col> element name. */
    protected static final QName COL = new QName(null, null, "col", null);
    
    /** An empty list of attributes. */
    protected static final XMLAttributes EMPTY_ATTRS = new XMLAttributesImpl();

    /** A newline XMLString. */
    private final XMLString NEWLINE = new XMLStringBuffer("\n");

    /** A newline + one space XMLString. */
    private final XMLString NEWLINE_ONE_SPACE = new XMLStringBuffer("\n ");

    /** A newline + two spaces XMLString. */
    private final XMLString NEWLINE_TWO_SPACES = new XMLStringBuffer("\n  ");

    //
    // Data
    //

    /** 
     * A string buffer for use in copying string into an XMLString
     * object for passing to the characters method.
     */
    private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();

    //
    // XMLParserConfiguration methods
    //

    /**
     * Parse an XML document.
     * <p>
     * The parser can use this method to instruct this configuration
     * to begin parsing an XML document from any valid input source
     * (a character stream, a byte stream, or a URI).
     * <p>
     * Parsers may not invoke this method while a parse is in progress.
     * Once a parse is complete, the parser may then parse another XML
     * document.
     * <p>
     * This method is synchronous: it will not return until parsing
     * has ended.  If a client application wants to terminate 
     * parsing early, it should throw an exception.
     *
     * @param source The input source for the top-level of the
     *               XML document.
     *
     * @exception XNIException Any XNI exception, possibly wrapping 
     *                         another exception.
     * @exception IOException  An IO exception from the parser, possibly
     *                         from a byte stream or character stream
     *                         supplied by the parser.
     */
    public void parse(XMLInputSource source) 
        throws IOException, XNIException {

        // get reader
        openInputSourceStream(source);
        Reader reader = source.getCharacterStream();
        if (reader == null) {
            InputStream stream = source.getByteStream();
            reader = new InputStreamReader(stream);
        }
        BufferedReader bufferedReader = new BufferedReader(reader);

        // start document
        if (fDocumentHandler != null) {
            fDocumentHandler.startDocument(null, "UTF-8", new NamespaceSupport(), null);
            fDocumentHandler.xmlDecl("1.0", "UTF-8", "true", null);
            fDocumentHandler.doctypeDecl("csv", null, null, null);
        }
        if (fDTDHandler != null) {
            fDTDHandler.startDTD(null, null);
            fDTDHandler.elementDecl("csv", "(row)*", null);
            fDTDHandler.elementDecl("row", "(col)*", null);
            fDTDHandler.elementDecl("col", "(#PCDATA)", null);
        }
        if (fDTDContentModelHandler != null) {
            fDTDContentModelHandler.startContentModel("csv", null);
            fDTDContentModelHandler.startGroup(null);
            fDTDContentModelHandler.element("row", null);
            fDTDContentModelHandler.endGroup(null);
            short csvOccurs = XMLDTDContentModelHandler.OCCURS_ZERO_OR_MORE;
            fDTDContentModelHandler.occurrence(csvOccurs, null);
            fDTDContentModelHandler.endContentModel(null);
            
            fDTDContentModelHandler.startContentModel("row", null);
            fDTDContentModelHandler.startGroup(null);
            fDTDContentModelHandler.element("col", null);
            fDTDContentModelHandler.endGroup(null);
            short rowOccurs = XMLDTDContentModelHandler.OCCURS_ZERO_OR_MORE;
            fDTDContentModelHandler.occurrence(rowOccurs, null);
            fDTDContentModelHandler.endContentModel(null);
        
            fDTDContentModelHandler.startContentModel("col", null);
            fDTDContentModelHandler.startGroup(null);
            fDTDContentModelHandler.pcdata(null);
            fDTDContentModelHandler.endGroup(null);
            fDTDContentModelHandler.endContentModel(null);
        }
        if (fDTDHandler != null) {
            fDTDHandler.endDTD(null);
        }
        if (fDocumentHandler != null) {
            fDocumentHandler.startElement(CSV, EMPTY_ATTRS, null);
        }

        // read lines
        String line;
        while ((line = bufferedReader.readLine()) != null) {
            if (fDocumentHandler != null) {
                fDocumentHandler.ignorableWhitespace(NEWLINE_ONE_SPACE, null);
                fDocumentHandler.startElement(ROW, EMPTY_ATTRS, null);
                StringTokenizer tokenizer = new StringTokenizer(line, ",");
                while (tokenizer.hasMoreTokens()) {
                    fDocumentHandler.ignorableWhitespace(NEWLINE_TWO_SPACES, null);
                    fDocumentHandler.startElement(COL, EMPTY_ATTRS, null);
                    String token = tokenizer.nextToken();
                    fStringBuffer.clear();
                    fStringBuffer.append(token);
                    fDocumentHandler.characters(fStringBuffer, null);
                    fDocumentHandler.endElement(COL, null);
                }
                fDocumentHandler.ignorableWhitespace(NEWLINE_ONE_SPACE, null);
                fDocumentHandler.endElement(ROW, null);
            }
        }
        bufferedReader.close();

        // end document
        if (fDocumentHandler != null) {
            fDocumentHandler.ignorableWhitespace(NEWLINE, null);
            fDocumentHandler.endElement(CSV, null);
            fDocumentHandler.endDocument(null);
        }

    } // parse(XMLInputSource)
    
    // NOTE: The following methods are overloaded to ignore setting
    //       of parser state so that this configuration does not
    //       throw configuration exceptions for features and properties
    //       that it doesn't care about.

    public void setFeature(String featureId, boolean state) {}
    public boolean getFeature(String featureId) { return false; }
    public void setProperty(String propertyId, Object value) {}
    public Object getProperty(String propertyId) { return null; }

} // class CSVConfiguration