FileDocCategorySizeDatePackage
Escape.javaAPI DocGlassfish v2 API42913Fri May 04 22:34:48 BST 2007com.sun.enterprise.diagnostics.report.html

Escape

public class Escape extends Object
Implement HTML escapes. Additional escapes can be added.

This class is a singleton. If you subclass and override the escape methods, use setInstance to install your handler.

Fields Summary
public static final int
UNDEFINED
A value to signal an undefined entity.
private static Escape
instance
The instance to use.
private boolean
useHex
If true, use hexadecimal character references. If false, use decimal character references.
private final Map
alwaysReplace
These are the entities which are always replaced on output. Add entities which should always be recognized on input and always replaced on output here.
private final Map
entityToChar
This holds all entities. The map is generated by reversing the {@link #setEntity(String, char)} method.
private final Map
charToEntity
This holds all entities. Add entities which should be recognized on input but not (necessarily) generated on output here.

This set was automatically generated from the HTML 4.01 character entity specification. You can find it online at: http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html.

Note that this method is initialized using the {@link #setEntity(String, char)} method.

private String
preserve
This field holds the list of non-alphanumeric characters to preserve as-is in URLs.
Constructors Summary
protected Escape()
Make a new escape instance. This method is protected since only subclasses should use it. Do not create instances of this class directly; use {@link #getInstance()} to get the correct Escape instance to use.

    
    
                                             
      
        super();
    
Methods Summary
public java.lang.StringdecodeAsEntity(java.lang.String name)
Decode an entity or numeric character reference, and return the appropriate character. Entity names are case-sensitive.

param
name An entity or numeric character reference. It can include the ampersand and semicolon, or not.
return
Either the character referenced, or the input string.

        if (name == null) {
            throw new NullPointerException("Entity name is null.");
        }
        
        // This should just be the entity name.  If the entity is
        // decorated, remove the decorations.
        if (name.startsWith("&") && name.endsWith(";")) {
            name = name.substring(1, name.length()-1);
        }
        
        // See if this is a numeric character reference (ISO 10646).
        // If the entity name starts with a hash mark, it is.  The
        // next character determines if this is hex or decimal.  If
        // the next character is an x, then this is hex.
        // Section 5.3.1
        if (name.startsWith("#")) {
            try {
                name = name.substring(1);
                if (name.startsWith("X")) {
                    name = name.substring(1);
                    return "" + Integer.parseInt(name, 16);
                } else {
                    return "" + Integer.parseInt(name);
                }
            } catch (NumberFormatException nfe) {
                return "&" + name + ";";
            }
        }
        
        // Get the entity's value, if it is defined.
        Character value = entityToChar.get(name);
        if (value == null) {
            return "&" + name + ";";
        } else {
            return "" + value.charValue();
        }
    
public java.lang.StringdecodeEntities(java.lang.String cdata)
Decode all entity references in the provided string. This also decodes any numeric character references of the form &#N;, where N is a decimal number, or &#xN;, where N is a hex number.

param
cdata The string to decode.
return
The decoded string.
see
#decodeAsEntity(String)

        if (cdata == null) {
            throw new NullPointerException("The character data to " +
            		"decode is null.");
        }
        
        // Traverse the string.  Replace all entity and numeric character
        // references with the actual characters.
        StringBuffer buf = new StringBuffer();
        int i = 0;
        while (cdata.length() > 0) {
            // Find the next ampersand.
            i = cdata.indexOf('&");
            if (i < 0) {
                buf.append(cdata);
                cdata = "";
                continue;
            }
            
            // Extract the prefix.
            buf.append(cdata.substring(0,i));
            cdata = cdata.substring(i);
            
            // Find the ending semicolon.
            i = cdata.indexOf(';");
            if (i < 0) {
                buf.append(cdata);
                cdata = "";
                continue;
            }
            
            // Extract the entity name.
            String entity = cdata.substring(1,i);
            cdata = cdata.substring(i+1);
            
            // Convert the entity to a character, if possible.
            String replace = decodeAsEntity(entity);

            // Add the replacement.
            buf.append(replace);
        } // Construct decoded string.
        
        // Done.
        return buf.toString();
    
public java.lang.StringencodeAsEntity(char ch)
Given a character, return the appropriate entity if there is an entity representation for this character. Otherwise return a numeric character reference.

param
ch The character to encode.
return
The encoded string.

        String replacement = charToEntity.get(new Character(ch));
        if (replacement == null) {
            String value = "" + (int) ch;
            for (int i = value.length(); i < 3; i++) {
                value = "0" + value;
            } // Pad with zeros to length three.
            return "&#" + value + ";";
        } else {
            return "&" + replacement + ";";
        }
    
public java.lang.StringencodeEntities(java.lang.String cdata, java.lang.String characters)
Encode a string by replacing characters with entity references or numeric character references, if there is no named entity.

The characters which will always be replaced are:

  • &amp; (&)
  • &lt; (<)
  • &gt; (>)
  • &quot; (")
  • &#039; (')
  • &nbsp; ( )
Additionally, anything outside of the ISO 8859-1 range will be encoded. From what I've read, this is a good idea.

param
cdata The string to encode.
param
characters Additional characters which should be encoded.
return
The encoded string.
see
#encodeAsEntity(char)

        if (cdata == null) {
            throw new NullPointerException("The character data to " +
            		"encode is null.");
        }
        if (characters == null) {
            throw new NullPointerException("The list of additional " +
            		"characters to encode is null.");
        }
        
        // Traverse the string.  Just replace the characters indicated
        // in the argument, and any additional characters which should
        // always be encoded.
        StringBuffer buf = new StringBuffer();
        for (char ch : cdata.toCharArray()) {
            if (ch >= 128 ||
                    alwaysReplace.containsKey(new Character(ch)) ||
                    characters.indexOf(ch) >= 0) {
                buf.append(encodeAsEntity(ch));
            } else {
                buf.append(ch);
            }
        } // Traverse the string.
        
        // Done.
        return buf.toString();        
    
public static final com.sun.enterprise.diagnostics.report.html.EscapegetInstance()
Get the escape instance to use to escape strings.

return
The instance to use.
see
#setInstance(Escape)

        if (instance == null) {
            instance = new Escape();
        }
        return instance;
    
public java.lang.StringhexDecode(java.lang.String text)
Convert all URL hex escapes in the string to characters. This is complicated by the need to handle multibyte characters.

Multibyte characters are handled in the default character encoding.

param
text The text to decode.
return
The decoded text.

        if (text == null) {
            throw new NullPointerException("The text to hex decode is null.");
        }
        
        // Traverse the string and decode any hex escapes.  These are
        // turned into bytes, and added to the byte sequence.
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        int index = 0;
        int length = text.length();
        while (index < length) {
            // If the next character is a percent sign, decode a hex
            // escape.  Otherwise, just add the bytes for the character.
            char ch = text.charAt(index);
            try {
                if (ch == '%") {
                    // There must be two more characters in the text.
                    if (length - index <= 2) {
                        // Too few characters in the text.
                        baos.write("%".getBytes());
                        index++;
                        continue;
                    }
                    
                    // The next two characters must be hex.
                    String hex = text.substring(index+1, index+3);
                    try {
                        int value = Integer.parseInt(hex, 16);
                        baos.write((byte) value);
                        index += 3;
                    } catch (NumberFormatException exception) {
                        // Some characters are not hexadecimal.
                        baos.write("%".getBytes());
                        index++;
                    }
                } else {
                    // Just add the character as-is.
                    baos.write(("" + ch).getBytes());
                    index++;
                }
            } catch (IOException exception) {
                // This should never happen.  Ignore this.
            }
        } // Loop over input string.
        
        // Return the result, in the default encoding.
        return baos.toString();
    
public java.lang.StringhexEncode(char ch)
Convert a character to a sequence of hex URL escapes.

Multibyte characters are handled in the default character encoding.

param
ch The character to encode.
return
The hex encoding, which may consist of more than one byte, and which is performed in the default character encoding.

        // Some characters occupy more than one byte (multibyte).
        // To account for this, convert the character to a string
        // and then get the bytes for the string.  I expect there
        // is a better way to do this, which is dependent on character
        // encodings, but for now this will have to work.
        byte[] bytes = ("" + ch).getBytes();
        StringBuffer buf = new StringBuffer();
        for (byte bt : bytes) {
            // Bytes are signed (why?) so this is necessary to prevent
            // an undesirable number of one bits in the result.  This
            // essentially converts the byte to a signed value.  I
            // sometimes wish Java had an unsigned keyword.
            int ibt = (int) bt & 0xFF;
            buf.append('%");
            String hex = Integer.toHexString(ibt);
            if (hex.length() < 2) {
                buf.append('0");
            }
            buf.append(hex);
        } // Traversing the bytes.
        
        // Now return the encoded string.
        return buf.toString();
    
public java.lang.StringhexEncode(java.lang.String text, java.lang.String characters)
Traverse the input string, and hex encode non-alphanumeric characters in the string, other than those in the provided set. Note that all non-ascii characters are encoded here.

param
text The text to encode.
param
characters Characters to preserve, unencoded.
return
The encoded string.

        // Traverse the string and encode characters.
        StringBuffer buf = new StringBuffer();
        for (char ch : text.toCharArray()) {
            if (ch < 128 &&
                    (Character.isLetterOrDigit(ch) ||
                            characters.indexOf(ch) >= 0) ||
                            preserve.indexOf(ch) >= 0) {
                buf.append(ch);
            } else {
                buf.append(hexEncode(ch));
            }
        } // Loop over input string.
        
        // Done.
        return buf.toString();
    
public com.sun.enterprise.diagnostics.report.html.EscapesetEntity(java.lang.String entity, char value)
Add a new entity to this escape.

param
entity The entity name. There can be an ampersand at the start and a semicolon at the end, but these are optional.
param
value The value of the entity, as a single character.
return
This escape.

        if (entity == null) {
            throw new NullPointerException("The entity name is null.");
        }
        if (entity.startsWith("&")) {
            entity = entity.substring(1, entity.length());
        }
        if (entity.endsWith(";")) {
            entity = entity.substring(0, entity.length()-1);
        }
        charToEntity.put(new Character(value), entity);
        entityToChar.put(entity, new Character(value));
        return this;
    
public static final com.sun.enterprise.diagnostics.report.html.EscapesetInstance(com.sun.enterprise.diagnostics.report.html.Escape escape)
Set the instance to use to escape strings.

param
escape The instance to use.
return
The instance to use.
see
#getInstance()

        if (escape == null) {
            throw new NullPointerException("Escape instance is null.");
        }
        instance = escape;
        return instance;
    
public com.sun.enterprise.diagnostics.report.html.EscapesetUseHex(boolean flag)
Specify whether to use hexadecimal character references of the form &#xN;, where N is the hex character code. The alternative is decimal character references of the form &#N;, where N is the decimal character code.

param
flag The setting.
return
This escape.
see
#encodeAsEntity(char)

        useHex = flag;
        return this;