FileDocCategorySizeDatePackage
MIME2Java.javaAPI DocApache Tomcat 6.0.1419482Fri Jul 20 04:20:36 BST 2007org.apache.catalina.util

MIME2Java

public class MIME2Java extends Object
MIME2Java is a convenience class which handles conversions between MIME charset names and Java encoding names.

The supported XML encodings are the intersection of XML-supported code sets and those supported in JDK 1.1.

MIME charset names are used on xmlEncoding parameters to methods such as TXDocument#setEncoding and DTD#setEncoding.

Java encoding names are used on encoding parameters to methods such as TXDocument#printWithFormat and DTD#printExternal.

Common Name

Use this name in XML files

Name Type

Xerces converts to this Java Encoder Name

8 bit Unicode

UTF-8

IANA

UTF8

ISO Latin 1

ISO-8859-1

MIME

ISO-8859-1

ISO Latin 2

ISO-8859-2

MIME

ISO-8859-2

ISO Latin 3

ISO-8859-3

MIME

ISO-8859-3

ISO Latin 4

ISO-8859-4

MIME

ISO-8859-4

ISO Latin Cyrillic

ISO-8859-5

MIME

ISO-8859-5

ISO Latin Arabic

ISO-8859-6

MIME

ISO-8859-6

ISO Latin Greek

ISO-8859-7

MIME

ISO-8859-7

ISO Latin Hebrew

ISO-8859-8

MIME

ISO-8859-8

ISO Latin 5

ISO-8859-9

MIME

ISO-8859-9

EBCDIC: US

ebcdic-cp-us

IANA

cp037

EBCDIC: Canada

ebcdic-cp-ca

IANA

cp037

EBCDIC: Netherlands

ebcdic-cp-nl

IANA

cp037

EBCDIC: Denmark

ebcdic-cp-dk

IANA

cp277

EBCDIC: Norway

ebcdic-cp-no

IANA

cp277

EBCDIC: Finland

ebcdic-cp-fi

IANA

cp278

EBCDIC: Sweden

ebcdic-cp-se

IANA

cp278

EBCDIC: Italy

ebcdic-cp-it

IANA

cp280

EBCDIC: Spain, Latin America

ebcdic-cp-es

IANA

cp284

EBCDIC: Great Britain

ebcdic-cp-gb

IANA

cp285

EBCDIC: France

ebcdic-cp-fr

IANA

cp297

EBCDIC: Arabic

ebcdic-cp-ar1

IANA

cp420

EBCDIC: Hebrew

ebcdic-cp-he

IANA

cp424

EBCDIC: Switzerland

ebcdic-cp-ch

IANA

cp500

EBCDIC: Roece

ebcdic-cp-roece

IANA

cp870

EBCDIC: Yogoslavia

ebcdic-cp-yu

IANA

cp870

EBCDIC: Iceland

ebcdic-cp-is

IANA

cp871

EBCDIC: Urdu

ebcdic-cp-ar2

IANA

cp918

Chinese for PRC, mixed 1/2 byte

gb2312

MIME

GB2312

Extended Unix Code, packed for Japanese

euc-jp

MIME

eucjis

Japanese: iso-2022-jp

iso-2020-jp

MIME

JIS

Japanese: Shift JIS

Shift_JIS

MIME

SJIS

Chinese: Big5

Big5

MIME

Big5

Extended Unix Code, packed for Korean

euc-kr

MIME

iso2022kr

Cyrillic

koi8-r

MIME

koi8-r

version
$Revision: 467222 $ $Date: 2006-10-24 05:17:11 +0200 (mar., 24 oct. 2006) $
author
TAMURA Kent <kent@trl.ibm.co.jp>

Fields Summary
private static Hashtable
s_enchash
private static Hashtable
s_revhash
Constructors Summary
private MIME2Java()

        s_enchash = new Hashtable();
        //    <preferred MIME name>, <Java encoding name>
        s_enchash.put("UTF-8", "UTF8");
        s_enchash.put("US-ASCII",        "8859_1");    // ?
        s_enchash.put("ISO-8859-1",      "8859_1");
        s_enchash.put("ISO-8859-2",      "8859_2");
        s_enchash.put("ISO-8859-3",      "8859_3");
        s_enchash.put("ISO-8859-4",      "8859_4");
        s_enchash.put("ISO-8859-5",      "8859_5");
        s_enchash.put("ISO-8859-6",      "8859_6");
        s_enchash.put("ISO-8859-7",      "8859_7");
        s_enchash.put("ISO-8859-8",      "8859_8");
        s_enchash.put("ISO-8859-9",      "8859_9");
        s_enchash.put("ISO-2022-JP",     "JIS");
        s_enchash.put("SHIFT_JIS",       "SJIS");
        s_enchash.put("EUC-JP",          "EUCJIS");
        s_enchash.put("GB2312",          "GB2312");
        s_enchash.put("BIG5",            "Big5");
        s_enchash.put("EUC-KR",          "KSC5601");
        s_enchash.put("ISO-2022-KR",     "ISO2022KR");
        s_enchash.put("KOI8-R",          "KOI8_R");

        s_enchash.put("EBCDIC-CP-US",    "CP037");
        s_enchash.put("EBCDIC-CP-CA",    "CP037");
        s_enchash.put("EBCDIC-CP-NL",    "CP037");
        s_enchash.put("EBCDIC-CP-DK",    "CP277");
        s_enchash.put("EBCDIC-CP-NO",    "CP277");
        s_enchash.put("EBCDIC-CP-FI",    "CP278");
        s_enchash.put("EBCDIC-CP-SE",    "CP278");
        s_enchash.put("EBCDIC-CP-IT",    "CP280");
        s_enchash.put("EBCDIC-CP-ES",    "CP284");
        s_enchash.put("EBCDIC-CP-GB",    "CP285");
        s_enchash.put("EBCDIC-CP-FR",    "CP297");
        s_enchash.put("EBCDIC-CP-AR1",   "CP420");
        s_enchash.put("EBCDIC-CP-HE",    "CP424");
        s_enchash.put("EBCDIC-CP-CH",    "CP500");
        s_enchash.put("EBCDIC-CP-ROECE", "CP870");
        s_enchash.put("EBCDIC-CP-YU",    "CP870");
        s_enchash.put("EBCDIC-CP-IS",    "CP871");
        s_enchash.put("EBCDIC-CP-AR2",   "CP918");

                                                // j:CNS11643 -> EUC-TW?
                                                // ISO-2022-CN? ISO-2022-CN-EXT?

        s_revhash = new Hashtable();
        //    <Java encoding name>, <preferred MIME name>
        s_revhash.put("UTF8", "UTF-8");
        //s_revhash.put("8859_1", "US-ASCII");    // ?
        s_revhash.put("8859_1", "ISO-8859-1");
        s_revhash.put("8859_2", "ISO-8859-2");
        s_revhash.put("8859_3", "ISO-8859-3");
        s_revhash.put("8859_4", "ISO-8859-4");
        s_revhash.put("8859_5", "ISO-8859-5");
        s_revhash.put("8859_6", "ISO-8859-6");
        s_revhash.put("8859_7", "ISO-8859-7");
        s_revhash.put("8859_8", "ISO-8859-8");
        s_revhash.put("8859_9", "ISO-8859-9");
        s_revhash.put("JIS", "ISO-2022-JP");
        s_revhash.put("SJIS", "Shift_JIS");
        s_revhash.put("EUCJIS", "EUC-JP");
        s_revhash.put("GB2312", "GB2312");
        s_revhash.put("BIG5", "Big5");
        s_revhash.put("KSC5601", "EUC-KR");
        s_revhash.put("ISO2022KR", "ISO-2022-KR");
        s_revhash.put("KOI8_R", "KOI8-R");

        s_revhash.put("CP037", "EBCDIC-CP-US");
        s_revhash.put("CP037", "EBCDIC-CP-CA");
        s_revhash.put("CP037", "EBCDIC-CP-NL");
        s_revhash.put("CP277", "EBCDIC-CP-DK");
        s_revhash.put("CP277", "EBCDIC-CP-NO");
        s_revhash.put("CP278", "EBCDIC-CP-FI");
        s_revhash.put("CP278", "EBCDIC-CP-SE");
        s_revhash.put("CP280", "EBCDIC-CP-IT");
        s_revhash.put("CP284", "EBCDIC-CP-ES");
        s_revhash.put("CP285", "EBCDIC-CP-GB");
        s_revhash.put("CP297", "EBCDIC-CP-FR");
        s_revhash.put("CP420", "EBCDIC-CP-AR1");
        s_revhash.put("CP424", "EBCDIC-CP-HE");
        s_revhash.put("CP500", "EBCDIC-CP-CH");
        s_revhash.put("CP870", "EBCDIC-CP-ROECE");
        s_revhash.put("CP870", "EBCDIC-CP-YU");
        s_revhash.put("CP871", "EBCDIC-CP-IS");
        s_revhash.put("CP918", "EBCDIC-CP-AR2");
    
    
Methods Summary
public static java.lang.Stringconvert(java.lang.String mimeCharsetName)
Convert a MIME charset name, also known as an XML encoding name, to a Java encoding name.

param
mimeCharsetName Case insensitive MIME charset name: UTF-8, US-ASCII, ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS, EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R, EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK, EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT, EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1, EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU, EBCDIC-CP-IS and EBCDIC-CP-AR2.
return
Java encoding name, or null if mimeCharsetName is unknown.
see
#reverse

        return (String)s_enchash.get(mimeCharsetName.toUpperCase());
    
public static java.lang.Stringreverse(java.lang.String encoding)
Convert a Java encoding name to MIME charset name. Available values of encoding are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4", "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS", "EUCJIS", "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037", "CP277", "CP278", "CP280", "CP284", "CP285", "CP297", "CP420", "CP424", "CP500", "CP870", "CP871" and "CP918".

param
encoding Case insensitive Java encoding name: UTF8, 8859_1, 8859_2, 8859_3, 8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, EUCJIS, GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278, CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871 and CP918.
return
MIME charset name, or null if encoding is unknown.
see
#convert

        return (String)s_revhash.get(encoding.toUpperCase());