FileDocCategorySizeDatePackage
Escape.javaAPI DocGlassfish v2 API42913Fri May 04 22:34:48 BST 2007com.sun.enterprise.diagnostics.report.html

Escape.java

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 * 
 * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
 * 
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common Development
 * and Distribution License("CDDL") (collectively, the "License").  You
 * may not use this file except in compliance with the License. You can obtain
 * a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html
 * or glassfish/bootstrap/legal/LICENSE.txt.  See the License for the specific
 * language governing permissions and limitations under the License.
 * 
 * When distributing the software, include this License Header Notice in each
 * file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.
 * Sun designates this particular file as subject to the "Classpath" exception
 * as provided by Sun in the GPL Version 2 section of the License file that
 * accompanied this code.  If applicable, add the following below the License
 * Header, with the fields enclosed by brackets [] replaced by your own
 * identifying information: "Portions Copyrighted [year]
 * [name of copyright owner]"
 * 
 * Contributor(s):
 * 
 * If you wish your version of this file to be governed by only the CDDL or
 * only the GPL Version 2, indicate your decision by adding "[Contributor]
 * elects to include this software in this distribution under the [CDDL or GPL
 * Version 2] license."  If you don't indicate a single choice of license, a
 * recipient has the option to distribute your version of this file under
 * either the CDDL, the GPL Version 2 or to extend the choice of license to
 * its licensees as provided above.  However, if you add GPL Version 2 code
 * and therefore, elected the GPL Version 2 license, then the option applies
 * only if the new code is made subject to such option by the copyright
 * holder.
 */
package com.sun.enterprise.diagnostics.report.html;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

/**
 * Implement HTML escapes.  Additional escapes can be added.
 * <p>
 * This class is a singleton.  If you subclass and override the
 * <code>escape</code> methods, use <code>setInstance</code> to
 * install your handler.
 */
public class Escape {
    
    /** A value to signal an undefined entity. */
    public static final int UNDEFINED = -1;
    
    /** The instance to use. */
    private static Escape instance = null;
    
    /**
     * If true, use hexadecimal character references.  If false,
     * use decimal character references.
     */
    private boolean useHex = false;
    
    /**
     * These are the entities which are always replaced on output.  Add
     * entities which should always be recognized on input and always
     * replaced on output here.
     */
    private final Map<Character,String> alwaysReplace =
        new HashMap<Character,String>();
    {
        alwaysReplace.put(new Character('&'), "amp");
        alwaysReplace.put(new Character('<'), "lt");
        alwaysReplace.put(new Character('>'), "gt");
        alwaysReplace.put(new Character('"'), "quot");
        alwaysReplace.put(new Character('\''), "#039");
        alwaysReplace.put(new Character('\u00A0'), "nbsp");
    }
    
    /**
     * This holds all entities.  The map is generated by reversing
     * the {@link #setEntity(String, char)} method.
     */
    private final Map<String,Character> entityToChar =
        new HashMap<String,Character>();
    
    /**
     * This holds all entities.  Add entities which should be recognized
     * on input but not (necessarily) generated on output here.
     * <p>
     * This set was automatically generated from the HTML 4.01 character
     * entity specification.  You can find it online at:
     * <a href="http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html"
     * >http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html</a>.
     * <p>
     * Note that this method is initialized using the
     * {@link #setEntity(String, char)} method.
     */
    private final Map<Character,String> charToEntity = new HashMap<Character,String>();
    {
        /*
         * The following are the basic escapes in the ISO 8859-1
         * range.
         */
        /** no-break space = non-breaking space, U+00A0 ISOnum */
        setEntity("nbsp", (char) 160);
        /** inverted exclamation mark, U+00A1 ISOnum */
        setEntity("iexcl", (char) 161);
        /** cent sign, U+00A2 ISOnum */
        setEntity("cent", (char) 162);
        /** pound sign, U+00A3 ISOnum */
        setEntity("pound", (char) 163);
        /** currency sign, U+00A4 ISOnum */
        setEntity("curren", (char) 164);
        /** yen sign = yuan sign, U+00A5 ISOnum */
        setEntity("yen", (char) 165);
        /** broken bar = broken vertical bar, U+00A6 ISOnum */
        setEntity("brvbar", (char) 166);
        /** section sign, U+00A7 ISOnum */
        setEntity("sect", (char) 167);
        /** diaeresis = spacing diaeresis, U+00A8 ISOdia */
        setEntity("uml", (char) 168);
        /** copyright sign, U+00A9 ISOnum */
        setEntity("copy", (char) 169);
        /** feminine ordinal indicator, U+00AA ISOnum */
        setEntity("ordf", (char) 170);
        /** left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum */
        setEntity("laquo", (char) 171);
        /** not sign, U+00AC ISOnum */
        setEntity("not", (char) 172);
        /** soft hyphen = discretionary hyphen, U+00AD ISOnum */
        setEntity("shy", (char) 173);
        /** registered sign = registered trade mark sign, U+00AE ISOnum */
        setEntity("reg", (char) 174);
        /** macron = spacing macron = overline = APL overbar, U+00AF ISOdia */
        setEntity("macr", (char) 175);
        /** degree sign, U+00B0 ISOnum */
        setEntity("deg", (char) 176);
        /** plus-minus sign = plus-or-minus sign, U+00B1 ISOnum */
        setEntity("plusmn", (char) 177);
        /** superscript two = superscript digit two = squared, U+00B2 ISOnum */
        setEntity("sup2", (char) 178);
        /** superscript three = superscript digit three = cubed, U+00B3 ISOnum */
        setEntity("sup3", (char) 179);
        /** acute accent = spacing acute, U+00B4 ISOdia */
        setEntity("acute", (char) 180);
        /** micro sign, U+00B5 ISOnum */
        setEntity("micro", (char) 181);
        /** pilcrow sign = paragraph sign, U+00B6 ISOnum */
        setEntity("para", (char) 182);
        /** middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum */
        setEntity("middot", (char) 183);
        /** cedilla = spacing cedilla, U+00B8 ISOdia */
        setEntity("cedil", (char) 184);
        /** superscript one = superscript digit one, U+00B9 ISOnum */
        setEntity("sup1", (char) 185);
        /** masculine ordinal indicator, U+00BA ISOnum */
        setEntity("ordm", (char) 186);
        /** right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum */
        setEntity("raquo", (char) 187);
        /** vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum */
        setEntity("frac14", (char) 188);
        /** vulgar fraction one half = fraction one half, U+00BD ISOnum */
        setEntity("frac12", (char) 189);
        /** vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum */
        setEntity("frac34", (char) 190);
        /** inverted question mark = turned question mark, U+00BF ISOnum */
        setEntity("iquest", (char) 191);
        /** latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1 */
        setEntity("Agrave", (char) 192);
        /** latin capital letter A with acute, U+00C1 ISOlat1 */
        setEntity("Aacute", (char) 193);
        /** latin capital letter A with circumflex, U+00C2 ISOlat1 */
        setEntity("Acirc", (char) 194);
        /** latin capital letter A with tilde, U+00C3 ISOlat1 */
        setEntity("Atilde", (char) 195);
        /** latin capital letter A with diaeresis, U+00C4 ISOlat1 */
        setEntity("Auml", (char) 196);
        /** latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1 */
        setEntity("Aring", (char) 197);
        /** latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 */
        setEntity("AElig", (char) 198);
        /** latin capital letter C with cedilla, U+00C7 ISOlat1 */
        setEntity("Ccedil", (char) 199);
        /** latin capital letter E with grave, U+00C8 ISOlat1 */
        setEntity("Egrave", (char) 200);
        /** latin capital letter E with acute, U+00C9 ISOlat1 */
        setEntity("Eacute", (char) 201);
        /** latin capital letter E with circumflex, U+00CA ISOlat1 */
        setEntity("Ecirc", (char) 202);
        /** latin capital letter E with diaeresis, U+00CB ISOlat1 */
        setEntity("Euml", (char) 203);
        /** latin capital letter I with grave, U+00CC ISOlat1 */
        setEntity("Igrave", (char) 204);
        /** latin capital letter I with acute, U+00CD ISOlat1 */
        setEntity("Iacute", (char) 205);
        /** latin capital letter I with circumflex, U+00CE ISOlat1 */
        setEntity("Icirc", (char) 206);
        /** latin capital letter I with diaeresis, U+00CF ISOlat1 */
        setEntity("Iuml", (char) 207);
        /** latin capital letter ETH, U+00D0 ISOlat1 */
        setEntity("ETH", (char) 208);
        /** latin capital letter N with tilde, U+00D1 ISOlat1 */
        setEntity("Ntilde", (char) 209);
        /** latin capital letter O with grave, U+00D2 ISOlat1 */
        setEntity("Ograve", (char) 210);
        /** latin capital letter O with acute, U+00D3 ISOlat1 */
        setEntity("Oacute", (char) 211);
        /** latin capital letter O with circumflex, U+00D4 ISOlat1 */
        setEntity("Ocirc", (char) 212);
        /** latin capital letter O with tilde, U+00D5 ISOlat1 */
        setEntity("Otilde", (char) 213);
        /** latin capital letter O with diaeresis, U+00D6 ISOlat1 */
        setEntity("Ouml", (char) 214);
        /** multiplication sign, U+00D7 ISOnum */
        setEntity("times", (char) 215);
        /** latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1 */
        setEntity("Oslash", (char) 216);
        /** latin capital letter U with grave, U+00D9 ISOlat1 */
        setEntity("Ugrave", (char) 217);
        /** latin capital letter U with acute, U+00DA ISOlat1 */
        setEntity("Uacute", (char) 218);
        /** latin capital letter U with circumflex, U+00DB ISOlat1 */
        setEntity("Ucirc", (char) 219);
        /** latin capital letter U with diaeresis, U+00DC ISOlat1 */
        setEntity("Uuml", (char) 220);
        /** latin capital letter Y with acute, U+00DD ISOlat1 */
        setEntity("Yacute", (char) 221);
        /** latin capital letter THORN, U+00DE ISOlat1 */
        setEntity("THORN", (char) 222);
        /** latin small letter sharp s = ess-zed, U+00DF ISOlat1 */
        setEntity("szlig", (char) 223);
        /** latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1 */
        setEntity("agrave", (char) 224);
        /** latin small letter a with acute, U+00E1 ISOlat1 */
        setEntity("aacute", (char) 225);
        /** latin small letter a with circumflex, U+00E2 ISOlat1 */
        setEntity("acirc", (char) 226);
        /** latin small letter a with tilde, U+00E3 ISOlat1 */
        setEntity("atilde", (char) 227);
        /** latin small letter a with diaeresis, U+00E4 ISOlat1 */
        setEntity("auml", (char) 228);
        /** latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1 */
        setEntity("aring", (char) 229);
        /** latin small letter ae = latin small ligature ae, U+00E6 ISOlat1 */
        setEntity("aelig", (char) 230);
        /** latin small letter c with cedilla, U+00E7 ISOlat1 */
        setEntity("ccedil", (char) 231);
        /** latin small letter e with grave, U+00E8 ISOlat1 */
        setEntity("egrave", (char) 232);
        /** latin small letter e with acute, U+00E9 ISOlat1 */
        setEntity("eacute", (char) 233);
        /** latin small letter e with circumflex, U+00EA ISOlat1 */
        setEntity("ecirc", (char) 234);
        /** latin small letter e with diaeresis, U+00EB ISOlat1 */
        setEntity("euml", (char) 235);
        /** latin small letter i with grave, U+00EC ISOlat1 */
        setEntity("igrave", (char) 236);
        /** latin small letter i with acute, U+00ED ISOlat1 */
        setEntity("iacute", (char) 237);
        /** latin small letter i with circumflex, U+00EE ISOlat1 */
        setEntity("icirc", (char) 238);
        /** latin small letter i with diaeresis, U+00EF ISOlat1 */
        setEntity("iuml", (char) 239);
        /** latin small letter eth, U+00F0 ISOlat1 */
        setEntity("eth", (char) 240);
        /** latin small letter n with tilde, U+00F1 ISOlat1 */
        setEntity("ntilde", (char) 241);
        /** latin small letter o with grave, U+00F2 ISOlat1 */
        setEntity("ograve", (char) 242);
        /** latin small letter o with acute, U+00F3 ISOlat1 */
        setEntity("oacute", (char) 243);
        /** latin small letter o with circumflex, U+00F4 ISOlat1 */
        setEntity("ocirc", (char) 244);
        /** latin small letter o with tilde, U+00F5 ISOlat1 */
        setEntity("otilde", (char) 245);
        /** latin small letter o with diaeresis, U+00F6 ISOlat1 */
        setEntity("ouml", (char) 246);
        /** division sign, U+00F7 ISOnum */
        setEntity("divide", (char) 247);
        /** latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1 */
        setEntity("oslash", (char) 248);
        /** latin small letter u with grave, U+00F9 ISOlat1 */
        setEntity("ugrave", (char) 249);
        /** latin small letter u with acute, U+00FA ISOlat1 */
        setEntity("uacute", (char) 250);
        /** latin small letter u with circumflex, U+00FB ISOlat1 */
        setEntity("ucirc", (char) 251);
        /** latin small letter u with diaeresis, U+00FC ISOlat1 */
        setEntity("uuml", (char) 252);
        /** latin small letter y with acute, U+00FD ISOlat1 */
        setEntity("yacute", (char) 253);
        /** latin small letter thorn, U+00FE ISOlat1 */
        setEntity("thorn", (char) 254);
        /** latin small letter y with diaeresis, U+00FF ISOlat1 */
        setEntity("yuml", (char) 255);

        /*
         * The following are the greek and mathematical symbols.
         */
        /** latin small f with hook = function = florin, U+0192 ISOtech */
        setEntity("fnof", (char) 402);
        /** greek capital letter alpha, U+0391 */
        setEntity("Alpha", (char) 913);
        /** greek capital letter beta, U+0392 */
        setEntity("Beta", (char) 914);
        /** greek capital letter gamma, U+0393 ISOgrk3 */
        setEntity("Gamma", (char) 915);
        /** greek capital letter delta, U+0394 ISOgrk3 */
        setEntity("Delta", (char) 916);
        /** greek capital letter epsilon, U+0395 */
        setEntity("Epsilon", (char) 917);
        /** greek capital letter zeta, U+0396 */
        setEntity("Zeta", (char) 918);
        /** greek capital letter eta, U+0397 */
        setEntity("Eta", (char) 919);
        /** greek capital letter theta, U+0398 ISOgrk3 */
        setEntity("Theta", (char) 920);
        /** greek capital letter iota, U+0399 */
        setEntity("Iota", (char) 921);
        /** greek capital letter kappa, U+039A */
        setEntity("Kappa", (char) 922);
        /** greek capital letter lambda, U+039B ISOgrk3 */
        setEntity("Lambda", (char) 923);
        /** greek capital letter mu, U+039C */
        setEntity("Mu", (char) 924);
        /** greek capital letter nu, U+039D */
        setEntity("Nu", (char) 925);
        /** greek capital letter xi, U+039E ISOgrk3 */
        setEntity("Xi", (char) 926);
        /** greek capital letter omicron, U+039F */
        setEntity("Omicron", (char) 927);
        /** greek capital letter pi, U+03A0 ISOgrk3 */
        setEntity("Pi", (char) 928);
        /** greek capital letter rho, U+03A1 */
        setEntity("Rho", (char) 929);
        /** greek capital letter sigma, U+03A3 ISOgrk3 */
        setEntity("Sigma", (char) 931);
        /** greek capital letter tau, U+03A4 */
        setEntity("Tau", (char) 932);
        /** greek capital letter upsilon, U+03A5 ISOgrk3 */
        setEntity("Upsilon", (char) 933);
        /** greek capital letter phi, U+03A6 ISOgrk3 */
        setEntity("Phi", (char) 934);
        /** greek capital letter chi, U+03A7 */
        setEntity("Chi", (char) 935);
        /** greek capital letter psi, U+03A8 ISOgrk3 */
        setEntity("Psi", (char) 936);
        /** greek capital letter omega, U+03A9 ISOgrk3 */
        setEntity("Omega", (char) 937);
        /** greek small letter alpha, U+03B1 ISOgrk3 */
        setEntity("alpha", (char) 945);
        /** greek small letter beta, U+03B2 ISOgrk3 */
        setEntity("beta", (char) 946);
        /** greek small letter gamma, U+03B3 ISOgrk3 */
        setEntity("gamma", (char) 947);
        /** greek small letter delta, U+03B4 ISOgrk3 */
        setEntity("delta", (char) 948);
        /** greek small letter epsilon, U+03B5 ISOgrk3 */
        setEntity("epsilon", (char) 949);
        /** greek small letter zeta, U+03B6 ISOgrk3 */
        setEntity("zeta", (char) 950);
        /** greek small letter eta, U+03B7 ISOgrk3 */
        setEntity("eta", (char) 951);
        /** greek small letter theta, U+03B8 ISOgrk3 */
        setEntity("theta", (char) 952);
        /** greek small letter iota, U+03B9 ISOgrk3 */
        setEntity("iota", (char) 953);
        /** greek small letter kappa, U+03BA ISOgrk3 */
        setEntity("kappa", (char) 954);
        /** greek small letter lambda, U+03BB ISOgrk3 */
        setEntity("lambda", (char) 955);
        /** greek small letter mu, U+03BC ISOgrk3 */
        setEntity("mu", (char) 956);
        /** greek small letter nu, U+03BD ISOgrk3 */
        setEntity("nu", (char) 957);
        /** greek small letter xi, U+03BE ISOgrk3 */
        setEntity("xi", (char) 958);
        /** greek small letter omicron, U+03BF NEW */
        setEntity("omicron", (char) 959);
        /** greek small letter pi, U+03C0 ISOgrk3 */
        setEntity("pi", (char) 960);
        /** greek small letter rho, U+03C1 ISOgrk3 */
        setEntity("rho", (char) 961);
        /** greek small letter final sigma, U+03C2 ISOgrk3 */
        setEntity("sigmaf", (char) 962);
        /** greek small letter sigma, U+03C3 ISOgrk3 */
        setEntity("sigma", (char) 963);
        /** greek small letter tau, U+03C4 ISOgrk3 */
        setEntity("tau", (char) 964);
        /** greek small letter upsilon, U+03C5 ISOgrk3 */
        setEntity("upsilon", (char) 965);
        /** greek small letter phi, U+03C6 ISOgrk3 */
        setEntity("phi", (char) 966);
        /** greek small letter chi, U+03C7 ISOgrk3 */
        setEntity("chi", (char) 967);
        /** greek small letter psi, U+03C8 ISOgrk3 */
        setEntity("psi", (char) 968);
        /** greek small letter omega, U+03C9 ISOgrk3 */
        setEntity("omega", (char) 969);
        /** greek small letter theta symbol, U+03D1 NEW */
        setEntity("thetasym", (char) 977);
        /** greek upsilon with hook symbol, U+03D2 NEW */
        setEntity("upsih", (char) 978);
        /** greek pi symbol, U+03D6 ISOgrk3 */
        setEntity("piv", (char) 982);
        /** bullet = black small circle, U+2022 ISOpub */
        setEntity("bull", (char) 8226);
        /** horizontal ellipsis = three dot leader, U+2026 ISOpub */
        setEntity("hellip", (char) 8230);
        /** prime = minutes = feet, U+2032 ISOtech */
        setEntity("prime", (char) 8242);
        /** double prime = seconds = inches, U+2033 ISOtech */
        setEntity("Prime", (char) 8243);
        /** overline = spacing overscore, U+203E NEW */
        setEntity("oline", (char) 8254);
        /** fraction slash, U+2044 NEW */
        setEntity("frasl", (char) 8260);
        /** script capital P = power set = Weierstrass p, U+2118 ISOamso */
        setEntity("weierp", (char) 8472);
        /** blackletter capital I = imaginary part, U+2111 ISOamso */
        setEntity("image", (char) 8465);
        /** blackletter capital R = real part symbol, U+211C ISOamso */
        setEntity("real", (char) 8476);
        /** trade mark sign, U+2122 ISOnum */
        setEntity("trade", (char) 8482);
        /** alef symbol = first transfinite cardinal, U+2135 NEW */
        setEntity("alefsym", (char) 8501);
        /** leftwards arrow, U+2190 ISOnum */
        setEntity("larr", (char) 8592);
        /** upwards arrow, U+2191 ISOnum*/
        setEntity("uarr", (char) 8593);
        /** rightwards arrow, U+2192 ISOnum */
        setEntity("rarr", (char) 8594);
        /** downwards arrow, U+2193 ISOnum */
        setEntity("darr", (char) 8595);
        /** left right arrow, U+2194 ISOamsa */
        setEntity("harr", (char) 8596);
        /** downwards arrow with corner leftwards = carriage return, U+21B5 NEW */
        setEntity("crarr", (char) 8629);
        /** leftwards double arrow, U+21D0 ISOtech */
        setEntity("lArr", (char) 8656);
        /** upwards double arrow, U+21D1 ISOamsa */
        setEntity("uArr", (char) 8657);
        /** rightwards double arrow, U+21D2 ISOtech */
        setEntity("rArr", (char) 8658);
        /** downwards double arrow, U+21D3 ISOamsa */
        setEntity("dArr", (char) 8659);
        /** left right double arrow, U+21D4 ISOamsa */
        setEntity("hArr", (char) 8660);
        /** for all, U+2200 ISOtech */
        setEntity("forall", (char) 8704);
        /** partial differential, U+2202 ISOtech */
        setEntity("part", (char) 8706);
        /** there exists, U+2203 ISOtech */
        setEntity("exist", (char) 8707);
        /** empty set = null set = diameter, U+2205 ISOamso */
        setEntity("empty", (char) 8709);
        /** nabla = backward difference, U+2207 ISOtech */
        setEntity("nabla", (char) 8711);
        /** element of, U+2208 ISOtech */
        setEntity("isin", (char) 8712);
        /** not an element of, U+2209 ISOtech */
        setEntity("notin", (char) 8713);
        /** contains as member, U+220B ISOtech */
        setEntity("ni", (char) 8715);
        /** n-ary product = product sign, U+220F ISOamsb */
        setEntity("prod", (char) 8719);
        /** n-ary sumation, U+2211 ISOamsb */
        setEntity("sum", (char) 8721);
        /** minus sign, U+2212 ISOtech */
        setEntity("minus", (char) 8722);
        /** asterisk operator, U+2217 ISOtech */
        setEntity("lowast", (char) 8727);
        /** square root = radical sign, U+221A ISOtech */
        setEntity("radic", (char) 8730);
        /** proportional to, U+221D ISOtech */
        setEntity("prop", (char) 8733);
        /** infinity, U+221E ISOtech */
        setEntity("infin", (char) 8734);
        /** angle, U+2220 ISOamso */
        setEntity("ang", (char) 8736);
        /** logical and = wedge, U+2227 ISOtech */
        setEntity("and", (char) 8743);
        /** logical or = vee, U+2228 ISOtech */
        setEntity("or", (char) 8744);
        /** intersection = cap, U+2229 ISOtech */
        setEntity("cap", (char) 8745);
        /** union = cup, U+222A ISOtech */
        setEntity("cup", (char) 8746);
        /** integral, U+222B ISOtech */
        setEntity("int", (char) 8747);
        /** therefore, U+2234 ISOtech */
        setEntity("there4", (char) 8756);
        /** tilde operator = varies with = similar to, U+223C ISOtech */
        setEntity("sim", (char) 8764);
        /** approximately equal to, U+2245 ISOtech */
        setEntity("cong", (char) 8773);
        /** almost equal to = asymptotic to, U+2248 ISOamsr */
        setEntity("asymp", (char) 8776);
        /** not equal to, U+2260 ISOtech */
        setEntity("ne", (char) 8800);
        /** identical to, U+2261 ISOtech */
        setEntity("equiv", (char) 8801);
        /** less-than or equal to, U+2264 ISOtech */
        setEntity("le", (char) 8804);
        /** greater-than or equal to, U+2265 ISOtech */
        setEntity("ge", (char) 8805);
        /** subset of, U+2282 ISOtech */
        setEntity("sub", (char) 8834);
        /** superset of, U+2283 ISOtech */
        setEntity("sup", (char) 8835);
        /** not a subset of, U+2284 ISOamsn */
        setEntity("nsub", (char) 8836);
        /** subset of or equal to, U+2286 ISOtech */
        setEntity("sube", (char) 8838);
        /** superset of or equal to, U+2287 ISOtech */
        setEntity("supe", (char) 8839);
        /** circled plus = direct sum, U+2295 ISOamsb */
        setEntity("oplus", (char) 8853);
        /** circled times = vector product, U+2297 ISOamsb */
        setEntity("otimes", (char) 8855);
        /** up tack = orthogonal to = perpendicular, U+22A5 ISOtech */
        setEntity("perp", (char) 8869);
        /** dot operator, U+22C5 ISOamsb */
        setEntity("sdot", (char) 8901);
        /** left ceiling = apl upstile, U+2308 ISOamsc */
        setEntity("lceil", (char) 8968);
        /** right ceiling, U+2309 ISOamsc */
        setEntity("rceil", (char) 8969);
        /** left floor = apl downstile, U+230A ISOamsc */
        setEntity("lfloor", (char) 8970);
        /** right floor, U+230B ISOamsc */
        setEntity("rfloor", (char) 8971);
        /** left-pointing angle bracket = bra, U+2329 ISOtech */
        setEntity("lang", (char) 9001);
        /** right-pointing angle bracket = ket, U+232A ISOtech */
        setEntity("rang", (char) 9002);
        /** lozenge, U+25CA ISOpub */
        setEntity("loz", (char) 9674);
        /** black spade suit, U+2660 ISOpub */
        setEntity("spades", (char) 9824);
        /** black club suit = shamrock, U+2663 ISOpub */
        setEntity("clubs", (char) 9827);
        /** black heart suit = valentine, U+2665 ISOpub */
        setEntity("hearts", (char) 9829);
        /** black diamond suit, U+2666 ISOpub */
        setEntity("diams", (char) 9830);

        /*
         * The following are other special symbols included in the
         * list of HTML character entities for 4.01.
         */
        /** quotation mark = APL quote, U+0022 ISOnum */
        setEntity("quot", (char) 34);
        /** ampersand, U+0026 ISOnum */
        setEntity("amp", (char) 38);
        /** less-than sign, U+003C ISOnum */
        setEntity("lt", (char) 60);
        /** greater-than sign, U+003E ISOnum */
        setEntity("gt", (char) 62);
        /** latin capital ligature OE, U+0152 ISOlat2 */
        setEntity("OElig", (char) 338);
        /** latin small ligature oe, U+0153 ISOlat2 */
        setEntity("oelig", (char) 339);
        /** latin capital letter S with caron, U+0160 ISOlat2 */
        setEntity("Scaron", (char) 352);
        /** latin small letter s with caron, U+0161 ISOlat2 */
        setEntity("scaron", (char) 353);
        /** latin capital letter Y with diaeresis, U+0178 ISOlat2 */
        setEntity("Yuml", (char) 376);
        /** modifier letter circumflex accent, U+02C6 ISOpub */
        setEntity("circ", (char) 710);
        /** small tilde, U+02DC ISOdia */
        setEntity("tilde", (char) 732);
        /** en space, U+2002 ISOpub */
        setEntity("ensp", (char) 8194);
        /** em space, U+2003 ISOpub */
        setEntity("emsp", (char) 8195);
        /** thin space, U+2009 ISOpub */
        setEntity("thinsp", (char) 8201);
        /** zero width non-joiner, U+200C NEW RFC 2070 */
        setEntity("zwnj", (char) 8204);
        /** zero width joiner, U+200D NEW RFC 2070 */
        setEntity("zwj", (char) 8205);
        /** left-to-right mark, U+200E NEW RFC 2070 */
        setEntity("lrm", (char) 8206);
        /** right-to-left mark, U+200F NEW RFC 2070 */
        setEntity("rlm", (char) 8207);
        /** en dash, U+2013 ISOpub */
        setEntity("ndash", (char) 8211);
        /** em dash, U+2014 ISOpub */
        setEntity("mdash", (char) 8212);
        /** left single quotation mark, U+2018 ISOnum */
        setEntity("lsquo", (char) 8216);
        /** right single quotation mark, U+2019 ISOnum */
        setEntity("rsquo", (char) 8217);
        /** single low-9 quotation mark, U+201A NEW */
        setEntity("sbquo", (char) 8218);
        /** left double quotation mark, U+201C ISOnum */
        setEntity("ldquo", (char) 8220);
        /** right double quotation mark, U+201D ISOnum */
        setEntity("rdquo", (char) 8221);
        /** double low-9 quotation mark, U+201E NEW */
        setEntity("bdquo", (char) 8222);
        /** dagger, U+2020 ISOpub */
        setEntity("dagger", (char) 8224);
        /** double dagger, U+2021 ISOpub */
        setEntity("Dagger", (char) 8225);
        /** per mille sign, U+2030 ISOtech */
        setEntity("permil", (char) 8240);
        /** single left-pointing angle quotation mark, U+2039 ISO proposed */
        setEntity("lsaquo", (char) 8249);
        /** single right-pointing angle quotation mark, U+203A ISO proposed */
        setEntity("rsaquo", (char) 8250);
        /** euro sign, U+20AC NEW */
        setEntity("euro", (char) 8364);
        charToEntity.put(new Character(';'), "semi");
        charToEntity.put(new Character('\u00A0'), "nbsp");
    }
    
    
    /**
     * This field holds the list of non-alphanumeric characters to
     * preserve as-is in URLs.
     */
    private String preserve = "_-!.~#()*" + ",;:$&+=" + "?/[]@";
    
    
    /**
     * Make a new escape instance.  This method is protected since only
     * subclasses should use it.  Do not create instances of this class
     * directly; use {@link #getInstance()} to get the correct
     * <code>Escape</code> instance to use.
     */
    protected Escape() {
        super();
    }
    
    
    /**
     * Get the escape instance to use to escape strings.
     * @return	The instance to use.
     * @see #setInstance(Escape)
     */
    public static final Escape getInstance() {
        if (instance == null) {
            instance = new Escape();
        }
        return instance;
    }
    
    
    /**
     * Set the instance to use to escape strings.
     * @param escape	The instance to use.
     * @return	The instance to use.
     * @see #getInstance()
     */
    public static final Escape setInstance(Escape escape) {
        if (escape == null) {
            throw new NullPointerException("Escape instance is null.");
        }
        instance = escape;
        return instance;
    }
    
    
    /**
     * Given a character, return the appropriate entity if there
     * is an entity representation for this character.  Otherwise
     * return a numeric character reference.
     * @param ch	The character to encode.
     * @return	The encoded string.
     */
    public String encodeAsEntity(char ch) {
        String replacement = charToEntity.get(new Character(ch));
        if (replacement == null) {
            String value = "" + (int) ch;
            for (int i = value.length(); i < 3; i++) {
                value = "0" + value;
            } // Pad with zeros to length three.
            return "&#" + value + ";";
        } else {
            return "&" + replacement + ";";
        }
    }
    
    
    /**
     * Decode an entity or numeric character reference, and return the
     * appropriate character.  Entity names are case-sensitive.
     * @param name	An entity or numeric character reference.  It can
     * 				include the ampersand and semicolon, or not.
     * @return	Either the character referenced, or the input
     * 			string.
     */
    public String decodeAsEntity(String name) {
        if (name == null) {
            throw new NullPointerException("Entity name is null.");
        }
        
        // This should just be the entity name.  If the entity is
        // decorated, remove the decorations.
        if (name.startsWith("&") && name.endsWith(";")) {
            name = name.substring(1, name.length()-1);
        }
        
        // See if this is a numeric character reference (ISO 10646).
        // If the entity name starts with a hash mark, it is.  The
        // next character determines if this is hex or decimal.  If
        // the next character is an x, then this is hex.
        // Section 5.3.1
        if (name.startsWith("#")) {
            try {
                name = name.substring(1);
                if (name.startsWith("X")) {
                    name = name.substring(1);
                    return "" + Integer.parseInt(name, 16);
                } else {
                    return "" + Integer.parseInt(name);
                }
            } catch (NumberFormatException nfe) {
                return "&" + name + ";";
            }
        }
        
        // Get the entity's value, if it is defined.
        Character value = entityToChar.get(name);
        if (value == null) {
            return "&" + name + ";";
        } else {
            return "" + value.charValue();
        }
    }
    
    
    /**
     * Encode a string by replacing characters with entity references
     * or numeric character references, if there is no named entity.
     * <p>
     * The characters which will always be replaced are:
     * <ul>
     * <li>&amp; (&)</li>
     * <li>&lt; (<)</li>
     * <li>&gt; (>)</li>
     * <li>&quot; (")</li>
     * <li>&#039; (')</li>
     * <li>&nbsp; ( )</li>
     * </ul>
     * Additionally, anything outside of the ISO 8859-1 range will be
     * encoded.  From what I've read, this is a good idea.
     * @param cdata			The string to encode.
     * @param characters	Additional characters which should be encoded.
     * @return	The encoded string.
     * @see #encodeAsEntity(char)
     */
    public String encodeEntities(String cdata, String characters) {
        if (cdata == null) {
            throw new NullPointerException("The character data to " +
            		"encode is null.");
        }
        if (characters == null) {
            throw new NullPointerException("The list of additional " +
            		"characters to encode is null.");
        }
        
        // Traverse the string.  Just replace the characters indicated
        // in the argument, and any additional characters which should
        // always be encoded.
        StringBuffer buf = new StringBuffer();
        for (char ch : cdata.toCharArray()) {
            if (ch >= 128 ||
                    alwaysReplace.containsKey(new Character(ch)) ||
                    characters.indexOf(ch) >= 0) {
                buf.append(encodeAsEntity(ch));
            } else {
                buf.append(ch);
            }
        } // Traverse the string.
        
        // Done.
        return buf.toString();        
    }
    
    
    /**
     * Decode all entity references in the provided string.  This also
     * decodes any numeric character references of the form &#N;,
     * where N is a decimal number, or &#xN;, where N is a hex
     * number.
     * @param cdata	The string to decode.
     * @return	The decoded string.
     * @see #decodeAsEntity(String)
     */
    public String decodeEntities(String cdata) {
        if (cdata == null) {
            throw new NullPointerException("The character data to " +
            		"decode is null.");
        }
        
        // Traverse the string.  Replace all entity and numeric character
        // references with the actual characters.
        StringBuffer buf = new StringBuffer();
        int i = 0;
        while (cdata.length() > 0) {
            // Find the next ampersand.
            i = cdata.indexOf('&');
            if (i < 0) {
                buf.append(cdata);
                cdata = "";
                continue;
            }
            
            // Extract the prefix.
            buf.append(cdata.substring(0,i));
            cdata = cdata.substring(i);
            
            // Find the ending semicolon.
            i = cdata.indexOf(';');
            if (i < 0) {
                buf.append(cdata);
                cdata = "";
                continue;
            }
            
            // Extract the entity name.
            String entity = cdata.substring(1,i);
            cdata = cdata.substring(i+1);
            
            // Convert the entity to a character, if possible.
            String replace = decodeAsEntity(entity);

            // Add the replacement.
            buf.append(replace);
        } // Construct decoded string.
        
        // Done.
        return buf.toString();
    }
    
    
    /**
     * Specify whether to use hexadecimal character references of the
     * form <code>&#xN;</code>, where N is the hex character code.
     * The alternative is decimal character references of the form
     * <code>&#N;</code>, where N is the decimal character code.
     * @param flag	The setting.
     * @return	This escape.
     * @see #encodeAsEntity(char)
     */
    public Escape setUseHex(boolean flag) {
        useHex = flag;
        return this;
    }
    
    
    /**
     * Add a new entity to this escape.
     * @param entity	The entity name.  There can be an ampersand at
     * 					the start and a semicolon at the end, but these
     * 					are optional.
     * @param value		The value of the entity, as a single character.
     * @return	This escape.
     */
    public Escape setEntity(String entity, char value) {
        if (entity == null) {
            throw new NullPointerException("The entity name is null.");
        }
        if (entity.startsWith("&")) {
            entity = entity.substring(1, entity.length());
        }
        if (entity.endsWith(";")) {
            entity = entity.substring(0, entity.length()-1);
        }
        charToEntity.put(new Character(value), entity);
        entityToChar.put(entity, new Character(value));
        return this;
    }
    
    
    /**
     * Convert a character to a sequence of hex URL escapes.
     * <p>
     * Multibyte characters are handled in the default character encoding.
     * @param ch	The character to encode.
     * @return	The hex encoding, which may consist of more than
     * 			one byte, and which is performed in the default
     * 			character encoding.
     */
    public String hexEncode(char ch) {
        // Some characters occupy more than one byte (multibyte).
        // To account for this, convert the character to a string
        // and then get the bytes for the string.  I expect there
        // is a better way to do this, which is dependent on character
        // encodings, but for now this will have to work.
        byte[] bytes = ("" + ch).getBytes();
        StringBuffer buf = new StringBuffer();
        for (byte bt : bytes) {
            // Bytes are signed (why?) so this is necessary to prevent
            // an undesirable number of one bits in the result.  This
            // essentially converts the byte to a signed value.  I
            // sometimes wish Java had an unsigned keyword.
            int ibt = (int) bt & 0xFF;
            buf.append('%');
            String hex = Integer.toHexString(ibt);
            if (hex.length() < 2) {
                buf.append('0');
            }
            buf.append(hex);
        } // Traversing the bytes.
        
        // Now return the encoded string.
        return buf.toString();
    }
    
    
    /**
     * Traverse the input string, and hex encode non-alphanumeric
     * characters in the string, other than those in the provided set.
     * Note that all non-ascii characters are encoded here.
     * @param text			The text to encode.
     * @param characters	Characters to preserve, unencoded.
     * @return	The encoded string.
     */
    public String hexEncode(String text, String characters) {
        // Traverse the string and encode characters.
        StringBuffer buf = new StringBuffer();
        for (char ch : text.toCharArray()) {
            if (ch < 128 &&
                    (Character.isLetterOrDigit(ch) ||
                            characters.indexOf(ch) >= 0) ||
                            preserve.indexOf(ch) >= 0) {
                buf.append(ch);
            } else {
                buf.append(hexEncode(ch));
            }
        } // Loop over input string.
        
        // Done.
        return buf.toString();
    }
    
    
    /**
     * Convert all URL hex escapes in the string to characters.  This is
     * complicated by the need to handle multibyte characters.
     * <p>
     * Multibyte characters are handled in the default character encoding.
     * @param text	The text to decode.
     * @return	The decoded text.
     */
    public String hexDecode(String text) {
        if (text == null) {
            throw new NullPointerException("The text to hex decode is null.");
        }
        
        // Traverse the string and decode any hex escapes.  These are
        // turned into bytes, and added to the byte sequence.
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        int index = 0;
        int length = text.length();
        while (index < length) {
            // If the next character is a percent sign, decode a hex
            // escape.  Otherwise, just add the bytes for the character.
            char ch = text.charAt(index);
            try {
                if (ch == '%') {
                    // There must be two more characters in the text.
                    if (length - index <= 2) {
                        // Too few characters in the text.
                        baos.write("%".getBytes());
                        index++;
                        continue;
                    }
                    
                    // The next two characters must be hex.
                    String hex = text.substring(index+1, index+3);
                    try {
                        int value = Integer.parseInt(hex, 16);
                        baos.write((byte) value);
                        index += 3;
                    } catch (NumberFormatException exception) {
                        // Some characters are not hexadecimal.
                        baos.write("%".getBytes());
                        index++;
                    }
                } else {
                    // Just add the character as-is.
                    baos.write(("" + ch).getBytes());
                    index++;
                }
            } catch (IOException exception) {
                // This should never happen.  Ignore this.
            }
        } // Loop over input string.
        
        // Return the result, in the default encoding.
        return baos.toString();
    }
}