FileDocCategorySizeDatePackage
CstUtf8.javaAPI DocAndroid 1.5 API12113Wed May 06 22:41:02 BST 2009com.android.dx.rop.cst

CstUtf8

public final class CstUtf8 extends Constant
Constants of type CONSTANT_Utf8_info.

Fields Summary
public static final CstUtf8
EMPTY_STRING
non-null; instance representing "", that is, the empty string
private final String
string
non-null; the UTF-8 value as a string
private final com.android.dx.util.ByteArray
bytes
non-null; the UTF-8 value as bytes
Constructors Summary
public CstUtf8(String string)
Constructs an instance from a String.

param
string non-null; the UTF-8 value as a string

        if (string == null) {
            throw new NullPointerException("string == null");
        }

        this.string = string.intern();
        this.bytes = new ByteArray(stringToUtf8Bytes(string));
    
public CstUtf8(com.android.dx.util.ByteArray bytes)
Constructs an instance from some UTF-8 bytes.

param
bytes non-null; array of the UTF-8 bytes

        if (bytes == null) {
            throw new NullPointerException("bytes == null");
        }

        this.bytes = bytes;
        this.string = utf8BytesToString(bytes).intern();
    
Methods Summary
protected intcompareTo0(Constant other)
{@inheritDoc}

        return string.compareTo(((CstUtf8) other).string);
    
public booleanequals(java.lang.Object other)
{@inheritDoc}

        if (!(other instanceof CstUtf8)) {
            return false;
        }

        return string.equals(((CstUtf8) other).string);
    
public com.android.dx.util.ByteArraygetBytes()
Gets the UTF-8 value as UTF-8 encoded bytes.

return
non-null; an array of the UTF-8 bytes

        return bytes;
    
public java.lang.StringgetString()
Gets the UTF-8 value as a string. The returned string is always already interned.

return
non-null; the UTF-8 value as a string

        return string;
    
public intgetUtf16Size()
Gets the size of this instance as UTF-16 code points. That is, get the number of 16-bit chars in the UTF-16 encoding of this instance. This is the same as the length of the Java String representation of this instance.

return
>= 0; the UTF-16 size

        return string.length();
    
public intgetUtf8Size()
Gets the size of this instance as UTF-8 code points. That is, get the number of bytes in the UTF-8 encoding of this instance.

return
>= 0; the UTF-8 size

        return bytes.size();
    
public inthashCode()
{@inheritDoc}

        return string.hashCode();
    
public booleanisCategory2()
{@inheritDoc}

        return false;
    
public static byte[]stringToUtf8Bytes(java.lang.String string)
Converts a string into its Java-style UTF-8 form. Java-style UTF-8 differs from normal UTF-8 in the handling of character '\0' and surrogate pairs.

param
string non-null; the string to convert
return
non-null; the UTF-8 bytes for it


                                               
         
        int len = string.length();
        byte[] bytes = new byte[len * 3]; // Avoid having to reallocate.
        int outAt = 0;

        for (int i = 0; i < len; i++) {
            char c = string.charAt(i);
            if ((c != 0) && (c < 0x80)) {
                bytes[outAt] = (byte) c;
                outAt++;
            } else if (c < 0x800) {
                bytes[outAt] = (byte) (((c >> 6) & 0x1f) | 0xc0);
                bytes[outAt + 1] = (byte) ((c & 0x3f) | 0x80);
                outAt += 2;
            } else {
                bytes[outAt] = (byte) (((c >> 12) & 0x0f) | 0xe0);
                bytes[outAt + 1] = (byte) (((c >> 6) & 0x3f) | 0x80);
                bytes[outAt + 2] = (byte) ((c & 0x3f) | 0x80);
                outAt += 3;
            }
        }

        byte[] result = new byte[outAt];
        System.arraycopy(bytes, 0, result, 0, outAt);
        return result;
    
private static java.lang.StringthrowBadUtf8(int value, int offset)
Helper for {@link #utf8BytesToString}, which throws the right exception for a bogus utf-8 byte.

param
value the byte value
param
offset the file offset
return
never
throws
IllegalArgumentException always thrown

        throw new IllegalArgumentException("bad utf-8 byte " + Hex.u1(value) +
                                           " at offset " + Hex.u4(offset));
    
public java.lang.StringtoHuman()
{@inheritDoc}

        int len = string.length();
        StringBuilder sb = new StringBuilder(len * 3 / 2);

        for (int i = 0; i < len; i++) {
            char c = string.charAt(i);
            if ((c >= ' ") && (c < 0x7f)) {
                if ((c == '\'") || (c == '\"") || (c == '\\")) {
                    sb.append('\\");
                }
                sb.append(c);
            } else if (c <= 0x7f) {
                switch (c) {
                    case '\n": sb.append("\\n"); break;
                    case '\r": sb.append("\\r"); break;
                    case '\t": sb.append("\\t"); break;
                    default: {
                        /*
                         * Represent the character as an octal escape.
                         * If the next character is a valid octal
                         * digit, disambiguate by using the
                         * three-digit form.
                         */
                        char nextChar =
                            (i < (len - 1)) ? string.charAt(i + 1) : 0;
                        boolean displayZero = 
                            (nextChar >= '0") && (nextChar <= '7");
                        sb.append('\\");
                        for (int shift = 6; shift >= 0; shift -= 3) {
                            char outChar = (char) (((c >> shift) & 7) + '0");
                            if ((outChar != '0") || displayZero) {
                                sb.append(outChar);
                                displayZero = true;
                            }
                        }
                        if (! displayZero) {
                            // Ironic edge case: The original value was 0.
                            sb.append('0");
                        }
                        break;
                    }
                }
            } else {
                sb.append("\\u");
                sb.append(Character.forDigit(c >> 12, 16));
                sb.append(Character.forDigit((c >> 8) & 0x0f, 16));
                sb.append(Character.forDigit((c >> 4) & 0x0f, 16));
                sb.append(Character.forDigit(c & 0x0f, 16));
            }
        }

        return sb.toString();
    
public java.lang.StringtoQuoted()
Gets the value as a human-oriented string, surrounded by double quotes.

return
non-null; the quoted string

        return '\"" + toHuman() + '\"";
    
public java.lang.StringtoQuoted(int maxLength)
Gets the value as a human-oriented string, surrounded by double quotes, but ellipsizes the result if it is longer than the given maximum length

param
maxLength >= 5; the maximum length of the string to return
return
non-null; the quoted string

        String string = toHuman();
        int length = string.length();
        String ellipses;

        if (length <= (maxLength - 2)) {
            ellipses = "";
        } else {
            string = string.substring(0, maxLength - 5);
            ellipses = "...";
        }

        return '\"" + string + ellipses + '\"";
    
public java.lang.StringtoString()
{@inheritDoc}

        return "utf8{\"" + toHuman() + "\"}";
    
public java.lang.StringtypeName()
{@inheritDoc}

        return "utf8";
    
public static java.lang.Stringutf8BytesToString(com.android.dx.util.ByteArray bytes)
Converts an array of UTF-8 bytes into a string.

param
bytes non-null; the bytes to convert
return
non-null; the converted string

        int length = bytes.size();
        char[] chars = new char[length]; // This is sized to avoid a realloc.
        int outAt = 0;

        for (int at = 0; length > 0; /*at*/) {
            int v0 = bytes.getUnsignedByte(at);
            char out;
            switch (v0 >> 4) {
                case 0x00: case 0x01: case 0x02: case 0x03:
                case 0x04: case 0x05: case 0x06: case 0x07: {
                    // 0XXXXXXX -- single-byte encoding
                    length--;
                    if (v0 == 0) {
                        // A single zero byte is illegal.
                        return throwBadUtf8(v0, at);
                    }
                    out = (char) v0;
                    at++;
                    break;
                }
                case 0x0c: case 0x0d: {
                    // 110XXXXX -- two-byte encoding
                    length -= 2;
                    if (length < 0) {
                        return throwBadUtf8(v0, at);
                    }
                    int v1 = bytes.getUnsignedByte(at + 1);
                    if ((v1 & 0xc0) != 0x80) {
                        return throwBadUtf8(v1, at + 1);
                    }
                    int value = ((v0 & 0x1f) << 6) | (v1 & 0x3f);
                    if ((value != 0) && (value < 0x80)) {
                        /*
                         * This should have been represented with
                         * one-byte encoding.
                         */
                        return throwBadUtf8(v1, at + 1);
                    }
                    out = (char) value;
                    at += 2;
                    break;
                }
                case 0x0e: {
                    // 1110XXXX -- three-byte encoding
                    length -= 3;
                    if (length < 0) {
                        return throwBadUtf8(v0, at);
                    }
                    int v1 = bytes.getUnsignedByte(at + 1);
                    if ((v1 & 0xc0) != 0x80) {
                        return throwBadUtf8(v1, at + 1);
                    }
                    int v2 = bytes.getUnsignedByte(at + 2);
                    if ((v1 & 0xc0) != 0x80) {
                        return throwBadUtf8(v2, at + 2);
                    }
                    int value = ((v0 & 0x0f) << 12) | ((v1 & 0x3f) << 6) |
                        (v2 & 0x3f);
                    if (value < 0x800) {
                        /*
                         * This should have been represented with one- or
                         * two-byte encoding.
                         */
                        return throwBadUtf8(v2, at + 2);
                    }
                    out = (char) value;
                    at += 3;
                    break;
                }
                default: {
                    // 10XXXXXX, 1111XXXX -- illegal
                    return throwBadUtf8(v0, at);
                }
            }
            chars[outAt] = out;
            outAt++;
        }

        return new String(chars, 0, outAt);