FileDocCategorySizeDatePackage
CodeSetConversion.javaAPI DocJava SE 5 API26420Fri Aug 26 14:54:20 BST 2005com.sun.corba.se.impl.encoding

CodeSetConversion

public class CodeSetConversion extends Object
Collection of classes, interfaces, and factory methods for CORBA code set conversion. This is mainly used to shield other code from the sun.io converters which might change, as well as provide some basic translation from conversion to CORBA error exceptions. Some extra work is required here to facilitate the way CORBA says it uses UTF-16 as of the 00-11-03 spec. REVISIT - Since the nio.Charset and nio.Charset.Encoder/Decoder use NIO ByteBuffer and NIO CharBuffer, the interaction and interface between this class and the CDR streams should be looked at more closely for optimizations to avoid unnecessary copying of data between char[] & CharBuffer and byte[] & ByteBuffer, especially DirectByteBuffers.

Fields Summary
private static CodeSetConversion
implementation
private static final int
FALLBACK_CODESET
private CodeSetCache
cache
Constructors Summary
private CodeSetConversion()

Methods Summary
public com.sun.corba.se.impl.encoding.CodeSetConversion$BTCConvertergetBTCConverter(OSFCodeSetRegistry.Entry codeset)
BTCConverter factory for single byte or variable width encodings.

        return new JavaBTCConverter(codeset);
    
public com.sun.corba.se.impl.encoding.CodeSetConversion$BTCConvertergetBTCConverter(OSFCodeSetRegistry.Entry codeset, boolean defaultToLittleEndian)
BTCConverter factory for fixed width multibyte encodings.


        if (codeset == OSFCodeSetRegistry.UTF_16 ||
            codeset == OSFCodeSetRegistry.UCS_2) {

            return new UTF16BTCConverter(defaultToLittleEndian);
        } else {
            return new JavaBTCConverter(codeset);
        }
    
public com.sun.corba.se.impl.encoding.CodeSetConversion$CTBConvertergetCTBConverter(OSFCodeSetRegistry.Entry codeset)
CTB converter factory for single byte or variable length encodings.

        int alignment = (!codeset.isFixedWidth() ?
                         1 :
                         codeset.getMaxBytesPerChar());
            
        return new JavaCTBConverter(codeset, alignment);
    
public com.sun.corba.se.impl.encoding.CodeSetConversion$CTBConvertergetCTBConverter(OSFCodeSetRegistry.Entry codeset, boolean littleEndian, boolean useByteOrderMarkers)
CTB converter factory for multibyte (mainly fixed) encodings. Because of the awkwardness with byte order markers and the possibility of using UCS-2, you must specify both the endianness of the stream as well as whether or not to use byte order markers if applicable. UCS-2 has no byte order markers. UTF-16 has optional markers. If you select useByteOrderMarkers, there is no guarantee that the encoding will use the endianness specified.


        // UCS2 doesn't have byte order markers, and we're encoding it
        // as UTF-16 since UCS2 isn't available in all Java platforms.
        // They should be identical with only minor differences in
        // negative cases.
        if (codeset == OSFCodeSetRegistry.UCS_2)
            return new UTF16CTBConverter(littleEndian);

        // We can write UTF-16 with or without a byte order marker.
        if (codeset == OSFCodeSetRegistry.UTF_16) {
            if (useByteOrderMarkers)
                return new UTF16CTBConverter();
            else
                return new UTF16CTBConverter(littleEndian);
        }

        // Everything else uses the generic JavaCTBConverter.
        //
        // Variable width encodings are aligned on 1 byte boundaries.
        // A fixed width encoding with a max. of 4 bytes/char should
        // align on a 4 byte boundary.  Note that UTF-16 is a special
        // case because of the optional byte order marker, so it's
        // handled above.
        //
        // This doesn't matter for GIOP 1.2 wchars and wstrings
        // since the encoded bytes are treated as an encapsulation.
        int alignment = (!codeset.isFixedWidth() ?
                         1 :
                         codeset.getMaxBytesPerChar());
        
        return new JavaCTBConverter(codeset, alignment);
    
public static final com.sun.corba.se.impl.encoding.CodeSetConversionimpl()
CodeSetConversion is a singleton, and this is the access point.

    

                   
         
	return CodeSetConversionHolder.csc ;
    
public CodeSetComponentInfo.CodeSetContextnegotiate(CodeSetComponentInfo client, CodeSetComponentInfo server)
Perform the code set negotiation algorithm and come up with the two encodings to use.

        int charData
            = selectEncoding(client.getCharComponent(),
                             server.getCharComponent());

        if (charData == CodeSetConversion.FALLBACK_CODESET) {
            charData = OSFCodeSetRegistry.UTF_8.getNumber();
        }

        int wcharData
            = selectEncoding(client.getWCharComponent(),
                             server.getWCharComponent());

        if (wcharData == CodeSetConversion.FALLBACK_CODESET) {
            wcharData = OSFCodeSetRegistry.UTF_16.getNumber();
        }

        return new CodeSetComponentInfo.CodeSetContext(charData,
                                                       wcharData);
    
private intselectEncoding(CodeSetComponentInfo.CodeSetComponent client, CodeSetComponentInfo.CodeSetComponent server)
Follows the code set negotiation algorithm in CORBA formal 99-10-07 13.7.2. Returns the proper negotiated OSF character encoding number or CodeSetConversion.FALLBACK_CODESET.


        // A "null" value for the server's nativeCodeSet means that
        // the server desired not to indicate one.  We'll take that
        // to mean that it wants the first thing in its conversion list.
        // If it's conversion list is empty, too, then use the fallback
        // codeset.
        int serverNative = server.nativeCodeSet;

        if (serverNative == 0) {
            if (server.conversionCodeSets.length > 0)
                serverNative = server.conversionCodeSets[0];
            else
                return CodeSetConversion.FALLBACK_CODESET;
        }

        if (client.nativeCodeSet == serverNative) {
            // Best case -- client and server don't have to convert
            return serverNative;
        }

        // Is this client capable of converting to the server's
        // native code set?
        for (int i = 0; i < client.conversionCodeSets.length; i++) {
            if (serverNative == client.conversionCodeSets[i]) {
                // The client will convert to the server's
                // native code set.
                return serverNative;
            }
        }

        // Is the server capable of converting to the client's
        // native code set?
        for (int i = 0; i < server.conversionCodeSets.length; i++) {
            if (client.nativeCodeSet == server.conversionCodeSets[i]) {
                // The server will convert to the client's
                // native code set.
                return client.nativeCodeSet;
            }
        }

        // See if there are any code sets that both the server and client
        // support (giving preference to the server).  The order
        // of conversion sets is from most to least desired.
        for (int i = 0; i < server.conversionCodeSets.length; i++) {
            for (int y = 0; y < client.conversionCodeSets.length; y++) {
                if (server.conversionCodeSets[i] == client.conversionCodeSets[y]) {
                    return server.conversionCodeSets[i];
                }
            }
        }

        // Before using the fallback codesets, the spec calls for a
        // compatibility check on the native code sets.  It doesn't make
        // sense because loss free communication is always possible with
        // UTF8 and UTF16, the fall back code sets.  It's also a lot
        // of work to implement.  In the case of incompatibility, the
        // spec says to throw a CODESET_INCOMPATIBLE exception.
        
        // Use the fallback
        return CodeSetConversion.FALLBACK_CODESET;