File Doc Category Size Date Package
UTF_8_Reader.java API Doc phoneME MR2 API (J2ME) 8619 Wed May 02 18:00:02 BST 2007 com.sun.cdc.i18n.j2me

UTF_8_Reader

java.lang.Object
- com.sun.cdc.i18n.StreamReader

public class UTF_8_Reader extends com.sun.cdc.i18n.StreamReader

Reader for UTF-8 encoded input streams.

Fields Summary
private static final int
NO_BYTE
signals that no byte is available, but not the end of stream
private int[]
readAhead
read ahead buffer that to holds part of char from the last read
private boolean
newRead
when reading first of a char byte we need to know if the first read
Constructors Summary
public UTF_8_Reader()
Constructs a UTF-8 reader.
readAhead = new int[3]; prepareForNextChar();
Methods Summary
private int getByteOfCurrentChar(int byteOfChar)
Get one of the raw bytes for the current character to be converted from look ahead buffer.
param
byteOfChar which raw byte to get 0 for the first, 2 for the last
return
a byte value, NO_BYTE for no byte available or -1 for end of stream
exception
IOException if an I/O error occurs.
if (readAhead[byteOfChar] != NO_BYTE) { return readAhead[byteOfChar]; } /* * Our read method must block until it gets one char so don't call * available on the first real stream for each new read(). */ if (!newRead && in.available() <= 0) { return NO_BYTE; } readAhead[byteOfChar] = in.read(); /* * since we have read from the input stream, * this not a new read any more */ newRead = false; return readAhead[byteOfChar];
public void mark(int readAheadLimit)
Mark a read ahead character is not supported for UTF8 readers.
param
readAheadLimit number of characters to buffer ahead
exception
IOException is thrown, for all calls to this method because marking is not supported for UTF8 readers
throw new IOException("mark() not supported");
public boolean markSupported()
Tell whether this reader supports the mark() operation. The UTF-8 implementation always returns false because it does not support mark().
return
false
/* * For readers mark() is in characters, since UTF-8 character are * variable length, so we can't just forward this to the underlying * byte InputStream like other readers do. * So this reader does not support mark at this time. */ return false;
private void prepareForNextChar()
Prepare the reader for the next character by clearing the look ahead buffer.
readAhead[0] = NO_BYTE; readAhead[1] = NO_BYTE; readAhead[2] = NO_BYTE;
public int read(char[] cbuf, int off, int len)
Read a block of UTF8 characters.
param
cbuf output buffer for converted characters read
param
off initial offset into the provided buffer
param
len length of characters in the buffer
return
the number of converted characters
exception
IOException is thrown if the input stream could not be read for the raw unconverted character
int count = 0; int firstByte; int extraBytes; int currentChar = 0; int nextByte; if (len == 0) { return 0; } newRead = true; while (count < len) { firstByte = getByteOfCurrentChar(0); if (firstByte < 0) { if (firstByte == -1 && count == 0) { // end of stream return -1; } return count; } switch (firstByte >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: /* 7 bits: 0xxxxxxx */ extraBytes = 0; currentChar = firstByte; break; case 12: case 13: /* 11 bits: 110x xxxx 10xx xxxx */ extraBytes = 1; currentChar = firstByte & 0x1F; break; case 14: /* 16 bits: 1110 xxxx 10xx xxxx 10xx xxxx */ extraBytes = 2; currentChar = firstByte & 0x0F; break; default: /* we do not handle characters greater the 16 bits */ throw new UTFDataFormatException("invalid first byte " + Integer.toBinaryString(firstByte)); } for (int j = 1; j <= extraBytes; j++) { nextByte = getByteOfCurrentChar(j); if (nextByte == NO_BYTE) { // done for now, comeback later for the rest of char return count; } if (nextByte == -1) { // end of stream in the middle of char throw new UTFDataFormatException("partial character"); } if ((nextByte & 0xC0) != 0x80) { throw new UTFDataFormatException("invalid byte " + Integer.toBinaryString(nextByte)); } // each extra byte has 6 bits more of the char currentChar = (currentChar << 6) + (nextByte & 0x3F); } cbuf[off + count] = (char)currentChar; count++; prepareForNextChar(); } return count;
public void reset()
Reset the read ahead marks is not supported for UTF8 readers.
exception
IOException is thrown, for all calls to this method because marking is not supported for UTF8 readers
throw new IOException("reset() not supported");
public int sizeOf(byte[] array, int offset, int length)
Get the size in chars of an array of bytes.
param
array Source buffer
param
offset Offset at which to start counting characters
param
length number of bytes to use for counting
return
number of characters that would be converted
int count = 0; int endOfArray; for (endOfArray = offset + length; offset < endOfArray; ) { count++; switch (((int)array[offset] & 0xff) >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: /* 0xxxxxxx */ offset++; break; case 12: case 13: /* 110x xxxx 10xx xxxx */ offset += 2; break; case 14: /* 1110 xxxx 10xx xxxx 10xx xxxx */ offset += 3; break; default: /* * we do not support characters greater than 16 bits * return the current count, the reader will catch this */ return count; } } return count;