package com.ronsoft.books.nio.charset;
import java.nio.*;
import java.nio.charset.*;
import java.nio.channels.*;
import java.io.*;
/**
* Test charset decoding Java New I/O book.
* Created and tested: Dec, 2001
* @author Ron Hitchens (ron@ronsoft.com)
* @version $Id: CharsetDecode.java,v 1.4 2002/05/20 07:24:31 ron Exp $
*/
public class CharsetDecode
{
/**
* Test charset decoding in the general case, detecting and handling
* buffer under/overflow and flushing the decoder state at end of
* input.
* This code reads from stdin and decodes the ASCII-encoded byte
* stream to chars. The decoded chars are written to stdout. This
* is effectively a 'cat' for input ascii files, but another charset
* encoding could be used by simply specifying it on the command line.
*/
public static void main (String [] argv)
throws IOException
{
// default charset is standard ASCII
String charsetName = "ISO-8859-1";
// charset name can be specified on the command line
if (argv.length > 0) {
charsetName = argv [0];
}
// wrap a Channel around stdin, wrap a channel around stdout,
// find the named Charset and pass them to the decode method.
// If the named charset is not valid, an exception of type
// UnsupportedCharsetException will be thrown.
decodeChannel (Channels.newChannel (System.in),
new OutputStreamWriter (System.out),
Charset.forName (charsetName));
}
/**
* General purpose static method which reads bytes from a Channel,
* decodes them according
* @param source A ReadableByteChannel object which will be read to
* EOF as a source of encoded bytes.
* @param writer A Writer object to which decoded chars will be written.
* @param charset A Charset object, whose CharsetDecoder will be used
* to do the character set decoding.
*/
public static void decodeChannel (ReadableByteChannel source,
Writer writer, Charset charset)
throws UnsupportedCharsetException, IOException
{
// get a decoder instance from the Charset
CharsetDecoder decoder = charset.newDecoder();
// tell decoder to replace bad chars with default marker
decoder.onMalformedInput (CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter (CodingErrorAction.REPLACE);
// allocate radically different input and output buffer sizes
// for testing purposes
ByteBuffer bb = ByteBuffer.allocateDirect (16 * 1024);
CharBuffer cb = CharBuffer.allocate (57);
// buffer starts empty, indicate input is needed
CoderResult result = CoderResult.UNDERFLOW;
boolean eof = false;
while ( ! eof) {
// input buffer underflow, decoder wants more input
if (result == CoderResult.UNDERFLOW) {
// decoder consumed all input, prepare to refill
bb.clear();
// fill the input buffer, watch for EOF
eof = (source.read (bb) == -1);
// prepare the buffer for reading by decoder
bb.flip();
}
// decode input bytes to output chars, pass EOF flag
result = decoder.decode (bb, cb, eof);
// if output buffer is full, drain output
if (result == CoderResult.OVERFLOW) {
drainCharBuf (cb, writer);
}
}
// flush any remaining state from the decoder, being careful
// to detect output buffer overflow(s).
while (decoder.flush (cb) == CoderResult.OVERFLOW) {
drainCharBuf (cb, writer);
}
// drain any chars remaining in the output buffer
drainCharBuf (cb, writer);
// close the channel, push out any buffered data to stdout
source.close();
writer.flush();
}
/**
* Helper method to drain the char buffer and write its content to
* the given Writer object. Upon return, the buffer is empty and
* ready to be refilled.
* @param cb A CharBuffer containing chars to be written.
* @param writer A Writer object to consume the chars in cb.
*/
static void drainCharBuf (CharBuffer cb, Writer writer)
throws IOException
{
cb.flip(); // prepare buffer for draining
// This writes the chars contained in the CharBuffer but
// doesn't actually modify the state of the buffer.
// If the char buffer was being drained by calls to get(),
// a loop might be needed here.
if (cb.hasRemaining()) {
writer.write (cb.toString());
}
cb.clear(); // prepare buffer to be filled again
}
}
|