UTF_8_Writerpublic class UTF_8_Writer extends com.sun.cdc.i18n.StreamWriter Writer for UTF-8 encoded output streams. NOTE: The UTF-8 writer only
supports UCS-2, or Unicode, to UTF-8 conversion. There is no support
for UTF-16 encoded characters outside of the Basic Multilingual Plane
(BMP). These are encoded in UTF-16 using previously reserved values
between U+D800 and U+DFFF. Additionally, the UTF-8 writer does not
support any character that requires 4 or more UTF-8 encoded bytes. |
Methods Summary |
---|
public int | sizeOf(char[] array, int offset, int length)Get the size in chars of an array of bytes.
int outputSize = 0;
int inputChar;
while (offset < length) {
inputChar = array[offset];
if (inputChar < 0x80) {
outputSize++;
} else if (inputChar < 0x800) {
outputSize += 2;
} else {
outputSize += 3;
}
offset++;
}
return outputSize;
| public void | write(char[] cbuf, int off, int len)Write a portion of an array of characters.
byte[] outputByte = new byte[3]; // Never more than 3 encoded bytes
char inputChar;
int outputSize;
int count = 0;
while (count < len) {
inputChar = cbuf[off + count];
if (inputChar < 0x80) {
outputByte[0] = (byte)inputChar;
outputSize = 1;
} else if (inputChar < 0x800) {
outputByte[0] = (byte)(0xc0 | ((inputChar >> 6) & 0x1f));
outputByte[1] = (byte)(0x80 | (inputChar & 0x3f));
outputSize = 2;
} else {
outputByte[0] = (byte)(0xe0 | ((inputChar >> 12)) & 0x0f);
outputByte[1] = (byte)(0x80 | ((inputChar >> 6) & 0x3f));
outputByte[2] = (byte)(0x80 | (inputChar & 0x3f));
outputSize = 3;
}
out.write(outputByte, 0, outputSize);
count++;
}
|
|