FileDocCategorySizeDatePackage
Charset.javaAPI DocAndroid 1.5 API32188Wed May 06 22:41:04 BST 2009java.nio.charset

Charset

public abstract class Charset extends Object implements Comparable
A charset defines a mapping between a Unicode character sequence and a byte sequence. It facilitates the encoding from a Unicode character sequence into a byte sequence, and the decoding from a byte sequence into a Unicode character sequence.

A charset has a canonical name, which is usually in uppercase. Typically it also has one or more aliases. The name string can only consist of the following characters: '0' - '9', 'A' - 'Z', 'a' - 'z', '.', ':'. '-' and '_'. The first character of the name must be a digit or a letter.

The following charsets should be supported by any java platform: US-ASCII, ISO-8859-1, UTF-8, UTF-16BE, UTF-16LE, UTF-16.

Additional charsets can be made available by configuring one or more charset providers through provider configuration files. Such files are always named as "java.nio.charset.spi.CharsetProvider" and located in the "META-INF/services" sub folder of one or more classpaths. The files should be encoded in "UTF-8". Each line of their content specifies the class name of a charset provider which extends java.nio.charset.spi.CharsetProvider. A line should end with '\r', '\n' or '\r\n'. Leading and trailing whitespaces are trimmed. Blank lines, and lines (after trimming) starting with "#" which are regarded as comments, are both ignored. Duplicates of names already found are also ignored. Both the configuration files and the provider classes will be loaded using the thread context class loader.

This class is thread-safe.

see
java.nio.charset.spi.CharsetProvider
since
Android 1.0

Fields Summary
private static final String
PROVIDER_CONFIGURATION_FILE_NAME
private static final String
PROVIDER_CONFIGURATION_FILE_ENCODING
private static final String
PROVIDER_CONFIGURATION_FILE_COMMENT
private static ClassLoader
systemClassLoader
private static com.ibm.icu4jni.charset.CharsetProviderICU
_builtInProvider
private static TreeMap
_builtInCharsets
private final String
canonicalName
private final HashSet
aliasesSet
private static HashMap
cachedCharsetTable
private static HashMap
cachedCharsetDecoderTable
private static HashMap
cachedCharsetEncoderTable
Constructors Summary
protected Charset(String canonicalName, String[] aliases)
Constructs a Charset object. Duplicated aliases are ignored.

param
canonicalName the canonical name of the charset.
param
aliases an array containing all aliases of the charset. May be null.
throws
IllegalCharsetNameException on an illegal value being supplied for either canonicalName or for any element of aliases.
since
Android 1.0


    /*
     * -------------------------------------------------------------------
     * Global initialization
     * -------------------------------------------------------------------
     */
     
        /*
         * create built-in charset provider even if no privilege to access
         * charset provider.
         */
        _builtInProvider = AccessController
                .doPrivileged(new PrivilegedAction<CharsetProviderICU>() {
                    public CharsetProviderICU run() {
                        return new CharsetProviderICU();
                    }
                });
    
        // throw IllegalArgumentException if name is null
        if (null == canonicalName) {
            throw new NullPointerException();
        }
        // check whether the given canonical name is legal
        checkCharsetName(canonicalName);
        this.canonicalName = canonicalName;
        // check each alias and put into a set
        this.aliasesSet = new HashSet<String>();
        if (null != aliases) {
            for (int i = 0; i < aliases.length; i++) {
                checkCharsetName(aliases[i]);
                this.aliasesSet.add(aliases[i]);
            }
        }
    
Methods Summary
private static voidaddCharsets(java.nio.charset.spi.CharsetProvider cp, java.util.TreeMap charsets)

        Iterator<Charset> it = cp.charsets();
        while (it.hasNext()) {
            Charset cs = it.next();
            // Only new charsets will be added
            if (!charsets.containsKey(cs.name())) {
                charsets.put(cs.name(), cs);
            }
        }
    
public final java.util.Setaliases()
Gets the set of this charset's aliases.

return
an unmodifiable set of this charset's aliases.
since
Android 1.0

        return Collections.unmodifiableSet(this.aliasesSet);
    
public static java.util.SortedMapavailableCharsets()
Gets a map of all available charsets supported by the runtime.

The returned map contains mappings from canonical names to corresponding instances of Charset. The canonical names can be considered as case-insensitive.

return
an unmodifiable map of all available charsets supported by the runtime.
since
Android 1.0

        // Initialize the built-in charsets map cache if necessary
        if (null == _builtInCharsets) {
            synchronized (Charset.class) {
                if (null == _builtInCharsets) {
                    _builtInCharsets = new TreeMap<String, Charset>(
                            IgnoreCaseComparator.getInstance());
                    _builtInProvider.putCharsets(_builtInCharsets);
                }
            }
        }

        // Add built-in charsets
        TreeMap<String, Charset> charsets = (TreeMap<String, Charset>) _builtInCharsets
                .clone();

        // Collect all charsets provided by charset providers
        ClassLoader contextClassLoader = getContextClassLoader();
        Enumeration<URL> e = null;
        try {
            if (null != contextClassLoader) {
                e = contextClassLoader
                        .getResources(PROVIDER_CONFIGURATION_FILE_NAME);
            } else {
                getSystemClassLoader();
                e = systemClassLoader
                        .getResources(PROVIDER_CONFIGURATION_FILE_NAME);
            }
            // Examine each configuration file
            while (e.hasMoreElements()) {
                loadConfiguredCharsets(e.nextElement(), contextClassLoader,
                        charsets);
            }
        } catch (IOException ex) {
            // Unexpected ClassLoader exception, ignore
        }
        return Collections.unmodifiableSortedMap(charsets);
    
private static voidcacheCharset(java.nio.charset.Charset cs)

        cachedCharsetTable.put(cs.name(), cs);
        Set<String> aliasesSet = cs.aliases();
        if (null != aliasesSet) {
            Iterator<String> iter = aliasesSet.iterator();
            while (iter.hasNext()) {
                String alias = iter.next();
                cachedCharsetTable.put(alias, cs);
            }
        }
    
public booleancanEncode()
Returns true if this charset supports encoding, false otherwise.

return
true if this charset supports encoding, false otherwise.
since
Android 1.0

        return true;
    
private static voidcheckCharsetName(java.lang.String name)

        // An empty string is illegal charset name
        if (name.length() == 0) {
            throw new IllegalCharsetNameException(name);
        }
        // The first character must be a letter or a digit
        // This is related to HARMONY-68 (won't fix)
        // char first = name.charAt(0);
        // if (!isLetter(first) && !isDigit(first)) {
        // throw new IllegalCharsetNameException(name);
        // }
        // Check the remaining characters
        int length = name.length();
        for (int i = 0; i < length; i++) {
            char c = name.charAt(i);
            if (!isLetter(c) && !isDigit(c) && !isSpecial(c)) {
                throw new IllegalCharsetNameException(name);
            }
        }
    
public final intcompareTo(java.nio.charset.Charset charset)
Compares this charset with the given charset. This comparation is based on the case insensitive canonical names of the charsets.

param
charset the given object to be compared with.
return
a negative integer if less than the given object, a positive integer if larger than it, or 0 if equal to it.
since
Android 1.0

        return this.canonicalName.compareToIgnoreCase(charset.canonicalName);
    
public abstract booleancontains(java.nio.charset.Charset charset)
Determines whether this charset is a super set of the given charset.

param
charset a given charset.
return
true if this charset is a super set of the given charset, false if it's unknown or this charset is not a superset of the given charset.
since
Android 1.0

public final java.nio.CharBufferdecode(java.nio.ByteBuffer buffer)
Decodes the content of the specified byte buffer and writes it to a character buffer that is to be returned.

The default action in case of decoding errors is CodingErrorAction.REPLACE.

param
buffer the byte buffer containing the content to be decoded.
return
a character buffer containing the output of the decoding.
since
Android 1.0

        CharsetDecoder d = getCachedCharsetDecoder(canonicalName);
        try {
            synchronized (d) {
                return d.decode(buffer);
            }
        } catch (CharacterCodingException ex) {
            throw new Error(ex.getMessage(), ex);
        }
    
public static java.nio.charset.CharsetdefaultCharset()
Gets the system default charset from the virtual machine.

return
the default charset.
since
Android 1.0

        Charset defaultCharset = null;
        String encoding = AccessController
                .doPrivileged(new PrivilegedAction<String>() {
                    public String run() {
                        return System.getProperty("file.encoding"); //$NON-NLS-1$
                    }
                });
        try {
            defaultCharset = Charset.forName(encoding);
        } catch (UnsupportedCharsetException e) {
            defaultCharset = Charset.forName("UTF-8"); //$NON-NLS-1$
        }
        return defaultCharset;
    
public java.lang.StringdisplayName()
Gets the name of this charset for the default locale. This is the default implementation of this method which always returns the canonical name of this charset. Subclasses overriding this Method may return a display name that was localized.

return
the name of this charset for the default locale.
since
Android 1.0

        return this.canonicalName;
    
public java.lang.StringdisplayName(java.util.Locale l)
Gets the name of this charset for the specified locale. This is the default implementation of this method which always returns the canonical name of this charset. Subclasses overriding this Method may return a display name that was localized.

param
l a certain locale
return
the name of this charset for the specified locale.
since
Android 1.0

        return this.canonicalName;
    
public final synchronized java.nio.ByteBufferencode(java.nio.CharBuffer buffer)
Encodes the content of the give character buffer and outputs to a byte buffer that is to be returned.

The default action in case of encoding errors is CodingErrorAction.REPLACE.

param
buffer the character buffer containing the content to be encoded.
return
the result of the encoding.
since
Android 1.0

        CharsetEncoder e = getCachedCharsetEncoder(canonicalName);
        try {
            synchronized (e) {
                return e.encode(buffer);
            }
        } catch (CharacterCodingException ex) {
            throw new Error(ex.getMessage(), ex);
        }
    
public final java.nio.ByteBufferencode(java.lang.String s)
Encodes a string and outputs to a byte buffer that is to be returned.

The default action in case of encoding errors is CodingErrorAction.REPLACE.

param
s the string to be encoded.
return
the result of the encoding.
since
Android 1.0

        return encode(CharBuffer.wrap(s));
    
public final booleanequals(java.lang.Object obj)
Determines whether this charset equals to the given object. They are considered to be equal if they have the same canonical name.

param
obj the given object to be compared with.
return
true if they have the same canonical name, otherwise false.
since
Android 1.0

        if (obj instanceof Charset) {
            Charset that = (Charset) obj;
            return this.canonicalName.equals(that.canonicalName);
        }
        return false;
    
public static java.nio.charset.CharsetforName(java.lang.String charsetName)
Gets a Charset instance for the specified charset name.

param
charsetName the canonical name of the charset or an alias.
return
a Charset instance for the specified charset name.
throws
IllegalCharsetNameException if the specified charset name is illegal.
throws
UnsupportedCharsetException if the desired charset is not supported by this runtime.
since
Android 1.0

        Charset c = forNameInternal(charsetName);
        if (null == c) {
            throw new UnsupportedCharsetException(charsetName);
        }
        return c;
    
private static java.nio.charset.CharsetforNameInternal(java.lang.String charsetName)

        if (null == charsetName) {
            throw new IllegalArgumentException();
        }
        checkCharsetName(charsetName);
        synchronized (Charset.class) {
            // Try to get Charset from cachedCharsetTable
            Charset cs = getCachedCharset(charsetName);
            if (null != cs) {
                return cs;
            }
            // Try built-in charsets
            cs = _builtInProvider.charsetForName(charsetName);
            if (null != cs) {
                cacheCharset(cs);
                return cs;
            }

            // Collect all charsets provided by charset providers
            ClassLoader contextClassLoader = getContextClassLoader();
            Enumeration<URL> e = null;
            try {
                if (null != contextClassLoader) {
                    e = contextClassLoader
                            .getResources(PROVIDER_CONFIGURATION_FILE_NAME);
                } else {
                    getSystemClassLoader();
                    e = systemClassLoader
                            .getResources(PROVIDER_CONFIGURATION_FILE_NAME);
                }
                // Examine each configuration file
                while (e.hasMoreElements()) {
                    cs = searchConfiguredCharsets(charsetName,
                            contextClassLoader, e.nextElement());
                    if (null != cs) {
                        cacheCharset(cs);
                        return cs;
                    }
                }
            } catch (IOException ex) {
                // Unexpected ClassLoader exception, ignore
            }
        }
        return null;
    
private static java.nio.charset.CharsetgetCachedCharset(java.lang.String name)

        return cachedCharsetTable.get(name);
    
private java.nio.charset.CharsetDecodergetCachedCharsetDecoder(java.lang.String name)

        synchronized (cachedCharsetDecoderTable) {
            CharsetDecoder d = cachedCharsetDecoderTable
                    .get(name);
            if (null == d) {
                d = this.newDecoder();
                d.onMalformedInput(CodingErrorAction.REPLACE);
                d.onUnmappableCharacter(CodingErrorAction.REPLACE);
                cachedCharsetDecoderTable.put(name, d);
            }
            return d;
        }
    
private java.nio.charset.CharsetEncodergetCachedCharsetEncoder(java.lang.String name)

        synchronized (cachedCharsetEncoderTable) {
            CharsetEncoder e = cachedCharsetEncoderTable
                    .get(name);
            if (null == e) {
                e = this.newEncoder();
                e.onMalformedInput(CodingErrorAction.REPLACE);
                e.onUnmappableCharacter(CodingErrorAction.REPLACE);
                cachedCharsetEncoderTable.put(name, e);
            }
            return e;
        }
    
private static java.lang.ClassLoadergetContextClassLoader()

        final Thread t = Thread.currentThread();
        return AccessController
                .doPrivileged(new PrivilegedAction<ClassLoader>() {
                    public ClassLoader run() {
                        return t.getContextClassLoader();
                    }
                });
    
private static voidgetSystemClassLoader()

        if (null == systemClassLoader) {
            systemClassLoader = AccessController
                    .doPrivileged(new PrivilegedAction<ClassLoader>() {
                        public ClassLoader run() {
                            return ClassLoader.getSystemClassLoader();
                        }
                    });
        }
    
public final inthashCode()
Gets the hash code of this charset.

return
the hash code of this charset.
since
Android 1.0

        return this.canonicalName.hashCode();
    
private static booleanisDigit(char c)

        return ('0" <= c && c <= '9");
    
private static booleanisLetter(char c)

        return ('a" <= c && c <= 'z") || ('A" <= c && c <= 'Z");
    
public final booleanisRegistered()
Indicates whether this charset is known to be registered in the IANA Charset Registry.

return
true if the charset is known to be registered, otherwise returns false.
since
Android 1.0

        return !canonicalName.startsWith("x-") //$NON-NLS-1$
                && !canonicalName.startsWith("X-"); //$NON-NLS-1$
    
private static booleanisSpecial(char c)

        return ('-" == c || '." == c || ':" == c || '_" == c);
    
public static booleanisSupported(java.lang.String charsetName)
Determines whether the specified charset is supported by this runtime.

param
charsetName the name of the charset.
return
true if the specified charset is supported, otherwise false.
throws
IllegalCharsetNameException if the specified charset name is illegal.
since
Android 1.0

        Charset cs = forNameInternal(charsetName);
        return (null != cs);
    
private static voidloadConfiguredCharsets(java.net.URL configFile, java.lang.ClassLoader contextClassLoader, java.util.TreeMap charsets)

        BufferedReader reader = null;
        try {
            InputStream is = configFile.openStream();
            // Read each line for charset provider class names
            // BEGIN android-modified
            reader = new BufferedReader(new InputStreamReader(is,
                            PROVIDER_CONFIGURATION_FILE_ENCODING), 8192);
            // END android-modified
            String providerClassName = reader.readLine();
            while (null != providerClassName) {
                providerClassName = trimClassName(providerClassName);
                // Skip comments and blank lines
                if (providerClassName.length() > 0) { // Non empty string
                    // Load the charset provider
                    Object cp = null;
                    try {
                        Class<?> c = Class.forName(providerClassName, true,
                                contextClassLoader);
                        cp = c.newInstance();
                    } catch (Exception ex) {
                        // try to use system classloader when context
                        // classloader failed to load config file.
                        try {
                            getSystemClassLoader();
                            Class<?> c = Class.forName(providerClassName, true,
                                    systemClassLoader);
                            cp = c.newInstance();
                        } catch (Exception e) {
                            throw new Error(e.getMessage(), e);
                        }
                    }
                    // Put the charsets supported by this provider into the map
                    addCharsets((CharsetProvider) cp, charsets);
                }
                // Read the next line of the config file
                providerClassName = reader.readLine();
            }
        } catch (IOException ex) {
            // Can't read this configuration file, ignore
        } finally {
            try {
                if (null != reader) {
                    reader.close();
                }
            } catch (IOException ex) {
                // Ignore closing exception
            }
        }
    
public final java.lang.Stringname()
Gets the canonical name of this charset.

return
this charset's name in canonical form.
since
Android 1.0

        return this.canonicalName;
    
public abstract java.nio.charset.CharsetDecodernewDecoder()
Gets a new instance of a decoder for this charset.

return
a new instance of a decoder for this charset.
since
Android 1.0

public abstract java.nio.charset.CharsetEncodernewEncoder()
Gets a new instance of an encoder for this charset.

return
a new instance of an encoder for this charset.
since
Android 1.0

private static java.nio.charset.CharsetsearchConfiguredCharsets(java.lang.String charsetName, java.lang.ClassLoader contextClassLoader, java.net.URL configFile)

        BufferedReader reader = null;
        try {
            InputStream is = configFile.openStream();
            // Read each line for charset provider class names
            // BEGIN android-modified
            reader = new BufferedReader(new InputStreamReader(is,
                            PROVIDER_CONFIGURATION_FILE_ENCODING), 8192);
            // END android-modified
            String providerClassName = reader.readLine();
            while (null != providerClassName) {
                providerClassName = trimClassName(providerClassName);
                if (providerClassName.length() > 0) { // Non empty string
                    // Load the charset provider
                    Object cp = null;
                    try {
                        Class<?> c = Class.forName(providerClassName, true,
                                contextClassLoader);
                        cp = c.newInstance();
                    } catch (Exception ex) {
                        // try to use system classloader when context
                        // classloader failed to load config file.
                        try {
                            getSystemClassLoader();
                            Class<?> c = Class.forName(providerClassName, true,
                                    systemClassLoader);
                            cp = c.newInstance();
                        } catch (SecurityException e) {
                            // BEGIN android-changed
                            // ignore
                            // END android-changed
                        } catch (Exception e) {
                            throw new Error(e.getMessage(), e);
                        }
                    }
                    // BEGIN android-changed
                    if (cp != null) {
                        // Try to get the desired charset from this provider
                        Charset cs = ((CharsetProvider) cp)
                                .charsetForName(charsetName);
                        if (null != cs) {
                            return cs;
                        }
                    }
                    // END android-changed
                }
                // Read the next line of the config file
                providerClassName = reader.readLine();
            }
            return null;
        } catch (IOException ex) {
            // Can't read this configuration file
            return null;
        } finally {
            try {
                if (null != reader) {
                    reader.close();
                }
            } catch (IOException ex) {
                // Ignore closing exception
            }
        }
    
public final java.lang.StringtoString()
Gets a string representation of this charset. Usually this contains the canonical name of the charset.

return
a string representation of this charset.
since
Android 1.0

        return "Charset[" + this.canonicalName + "]"; //$NON-NLS-1$//$NON-NLS-2$
    
private static java.lang.StringtrimClassName(java.lang.String name)

        String trimedName = name;
        int index = name.indexOf(PROVIDER_CONFIGURATION_FILE_COMMENT);
        // Trim comments
        if (index != -1) {
            trimedName = name.substring(0, index);
        }
        return trimedName.trim();