XLS2CSVmrapublic class XLS2CSVmra extends Object implements HSSFListenerA XLS -> CSV processor, that uses the MissingRecordAware
EventModel code to ensure it outputs all columns and rows. |
Fields Summary |
---|
private int | minColumns | private POIFSFileSystem | fs | private PrintStream | output | private int | lastRowNumber | private int | lastColumnNumber | private boolean | outputFormulaValuesShould we output the formula, or the value it has? | private SSTRecord | sstRecord | private Map | customFormatRecords | private List | xfRecords |
Constructors Summary |
---|
public XLS2CSVmra(POIFSFileSystem fs, PrintStream output, int minColumns)Creates a new XLS -> CSV converter
this.fs = fs;
this.output = output;
this.minColumns = minColumns;
| public XLS2CSVmra(String filename, int minColumns)Creates a new XLS -> CSV converter
this(
new POIFSFileSystem(new FileInputStream(filename)),
System.out, minColumns
);
|
Methods Summary |
---|
private java.lang.String | formatNumberDateCell(org.apache.poi.hssf.record.CellValueRecordInterface cell, double value)Formats a number or date cell, be that a real number, or the
answer to a formula
// Get the built in format, if there is one
ExtendedFormatRecord xfr = (ExtendedFormatRecord)
xfRecords.get(cell.getXFIndex());
if(xfr == null) {
System.err.println("Cell " + cell.getRow() + "," + cell.getColumn() + " uses XF with index " + cell.getXFIndex() + ", but we don't have that");
return Double.toString(value);
} else {
int formatIndex = xfr.getFormatIndex();
String format;
if(formatIndex >= HSSFDataFormat.getNumberOfBuiltinBuiltinFormats()) {
FormatRecord tfr = (FormatRecord)customFormatRecords.get(new Integer(formatIndex));
format = tfr.getFormatString();
} else {
format = HSSFDataFormat.getBuiltinFormat(xfr.getFormatIndex());
}
// Is it a date?
if(HSSFDateUtil.isADateFormat(formatIndex,format) &&
HSSFDateUtil.isValidExcelDate(value)) {
// Java wants M not m for month
format = format.replace('m",'M");
// Change \- into -, if it's there
format = format.replaceAll("\\\\-","-");
// Format as a date
Date d = HSSFDateUtil.getJavaDate(value);
DateFormat df = new SimpleDateFormat(format);
return df.format(d);
} else {
if(format == "General") {
// Some sort of wierd default
return Double.toString(value);
}
// Format as a number
DecimalFormat df = new DecimalFormat(format);
return df.format(value);
}
}
| public static void | main(java.lang.String[] args)
if(args.length < 1) {
System.err.println("Use:");
System.err.println(" XLS2CSVmra <xls file> [min columns]");
System.exit(1);
}
int minColumns = -1;
if(args.length >= 2) {
minColumns = Integer.parseInt(args[1]);
}
XLS2CSVmra xls2csv = new XLS2CSVmra(args[0], minColumns);
xls2csv.process();
| public void | process()Initiates the processing of the XLS file to CSV
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
request.addListenerForAllRecords(listener);
factory.processWorkbookEvents(request, fs);
| public void | processRecord(org.apache.poi.hssf.record.Record record)Main HSSFListener method, processes events, and outputs the
CSV as the file is processed.
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
switch (record.getSid())
{
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case FormatRecord.sid:
FormatRecord fr = (FormatRecord) record;
customFormatRecords.put(new Integer(fr.getIndexCode()), fr);
break;
case ExtendedFormatRecord.sid:
ExtendedFormatRecord xr = (ExtendedFormatRecord) record;
xfRecords.add(xr);
break;
case BlankRecord.sid:
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
break;
case BoolErrRecord.sid:
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = "";
break;
case FormulaRecord.sid:
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if(outputFormulaValues) {
thisStr = formatNumberDateCell(frec, frec.getValue());
} else {
// TODO: Output the formula string
thisStr = '"" + frec.toString() + '"";
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
thisStr = '"" + lrec.getValue() + '"";
break;
case LabelSSTRecord.sid:
LabelSSTRecord lsrec = (LabelSSTRecord) record;
thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if(sstRecord == null) {
thisStr = '"" + "(No SST Record, can't identify string)" + '"";
} else {
thisStr = '"" + sstRecord.getString(lsrec.getSSTIndex()).toString() + '"";
}
break;
case NoteRecord.sid:
NoteRecord nrec = (NoteRecord) record;
thisRow = nrec.getRow();
thisColumn = nrec.getColumn();
// TODO: Find object to match nrec.getShapeId()
thisStr = '"" + "(TODO)" + '"";
break;
case NumberRecord.sid:
NumberRecord numrec = (NumberRecord) record;
thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
// Format
thisStr = formatNumberDateCell(numrec, numrec.getValue());
break;
case RKRecord.sid:
RKRecord rkrec = (RKRecord) record;
thisRow = rkrec.getRow();
thisColumn = rkrec.getColumn();
thisStr = '"" + "(TODO)" + '"";
break;
default:
break;
}
// Handle new row
if(thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
// Handle missing column
if(record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord)record;
thisRow = mc.getRow();
thisColumn = mc.getColumn();
thisStr = "";
}
// If we got something to print out, do so
if(thisStr != null) {
if(thisColumn > 0) {
output.print(',");
}
output.print(thisStr);
}
// Update column and row count
if(thisRow > -1)
lastRowNumber = thisRow;
if(thisColumn > -1)
lastColumnNumber = thisColumn;
// Handle end of row
if(record instanceof LastCellOfRowDummyRecord) {
// Print out any missing commas if needed
if(minColumns > 0) {
// Columns are 0 based
if(lastColumnNumber == -1) { lastColumnNumber = 0; }
for(int i=lastColumnNumber; i<(minColumns); i++) {
output.print(',");
}
}
// We're onto a new row
lastColumnNumber = -1;
// End the row
output.println();
}
|
|