Escapepublic class Escape extends Object Implement HTML escapes. Additional escapes can be added.
This class is a singleton. If you subclass and override the
escape methods, use setInstance to
install your handler. |
Fields Summary |
---|
public static final int | UNDEFINEDA value to signal an undefined entity. | private static Escape | instanceThe instance to use. | private boolean | useHexIf true, use hexadecimal character references. If false,
use decimal character references. | private final Map | alwaysReplaceThese are the entities which are always replaced on output. Add
entities which should always be recognized on input and always
replaced on output here. | private final Map | entityToCharThis holds all entities. The map is generated by reversing
the {@link #setEntity(String, char)} method. | private final Map | charToEntityThis holds all entities. Add entities which should be recognized
on input but not (necessarily) generated on output here.
This set was automatically generated from the HTML 4.01 character
entity specification. You can find it online at:
http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html.
Note that this method is initialized using the
{@link #setEntity(String, char)} method. | private String | preserveThis field holds the list of non-alphanumeric characters to
preserve as-is in URLs. |
Constructors Summary |
---|
protected Escape()Make a new escape instance. This method is protected since only
subclasses should use it. Do not create instances of this class
directly; use {@link #getInstance()} to get the correct
Escape instance to use.
super();
|
Methods Summary |
---|
public java.lang.String | decodeAsEntity(java.lang.String name)Decode an entity or numeric character reference, and return the
appropriate character. Entity names are case-sensitive.
if (name == null) {
throw new NullPointerException("Entity name is null.");
}
// This should just be the entity name. If the entity is
// decorated, remove the decorations.
if (name.startsWith("&") && name.endsWith(";")) {
name = name.substring(1, name.length()-1);
}
// See if this is a numeric character reference (ISO 10646).
// If the entity name starts with a hash mark, it is. The
// next character determines if this is hex or decimal. If
// the next character is an x, then this is hex.
// Section 5.3.1
if (name.startsWith("#")) {
try {
name = name.substring(1);
if (name.startsWith("X")) {
name = name.substring(1);
return "" + Integer.parseInt(name, 16);
} else {
return "" + Integer.parseInt(name);
}
} catch (NumberFormatException nfe) {
return "&" + name + ";";
}
}
// Get the entity's value, if it is defined.
Character value = entityToChar.get(name);
if (value == null) {
return "&" + name + ";";
} else {
return "" + value.charValue();
}
| public java.lang.String | decodeEntities(java.lang.String cdata)Decode all entity references in the provided string. This also
decodes any numeric character references of the form &#N;,
where N is a decimal number, or &#xN;, where N is a hex
number.
if (cdata == null) {
throw new NullPointerException("The character data to " +
"decode is null.");
}
// Traverse the string. Replace all entity and numeric character
// references with the actual characters.
StringBuffer buf = new StringBuffer();
int i = 0;
while (cdata.length() > 0) {
// Find the next ampersand.
i = cdata.indexOf('&");
if (i < 0) {
buf.append(cdata);
cdata = "";
continue;
}
// Extract the prefix.
buf.append(cdata.substring(0,i));
cdata = cdata.substring(i);
// Find the ending semicolon.
i = cdata.indexOf(';");
if (i < 0) {
buf.append(cdata);
cdata = "";
continue;
}
// Extract the entity name.
String entity = cdata.substring(1,i);
cdata = cdata.substring(i+1);
// Convert the entity to a character, if possible.
String replace = decodeAsEntity(entity);
// Add the replacement.
buf.append(replace);
} // Construct decoded string.
// Done.
return buf.toString();
| public java.lang.String | encodeAsEntity(char ch)Given a character, return the appropriate entity if there
is an entity representation for this character. Otherwise
return a numeric character reference.
String replacement = charToEntity.get(new Character(ch));
if (replacement == null) {
String value = "" + (int) ch;
for (int i = value.length(); i < 3; i++) {
value = "0" + value;
} // Pad with zeros to length three.
return "" + value + ";";
} else {
return "&" + replacement + ";";
}
| public java.lang.String | encodeEntities(java.lang.String cdata, java.lang.String characters)Encode a string by replacing characters with entity references
or numeric character references, if there is no named entity.
The characters which will always be replaced are:
- & (&)
- < (<)
- > (>)
- " (")
- ' (')
- ( )
Additionally, anything outside of the ISO 8859-1 range will be
encoded. From what I've read, this is a good idea.
if (cdata == null) {
throw new NullPointerException("The character data to " +
"encode is null.");
}
if (characters == null) {
throw new NullPointerException("The list of additional " +
"characters to encode is null.");
}
// Traverse the string. Just replace the characters indicated
// in the argument, and any additional characters which should
// always be encoded.
StringBuffer buf = new StringBuffer();
for (char ch : cdata.toCharArray()) {
if (ch >= 128 ||
alwaysReplace.containsKey(new Character(ch)) ||
characters.indexOf(ch) >= 0) {
buf.append(encodeAsEntity(ch));
} else {
buf.append(ch);
}
} // Traverse the string.
// Done.
return buf.toString();
| public static final com.sun.enterprise.diagnostics.report.html.Escape | getInstance()Get the escape instance to use to escape strings.
if (instance == null) {
instance = new Escape();
}
return instance;
| public java.lang.String | hexDecode(java.lang.String text)Convert all URL hex escapes in the string to characters. This is
complicated by the need to handle multibyte characters.
Multibyte characters are handled in the default character encoding.
if (text == null) {
throw new NullPointerException("The text to hex decode is null.");
}
// Traverse the string and decode any hex escapes. These are
// turned into bytes, and added to the byte sequence.
ByteArrayOutputStream baos = new ByteArrayOutputStream();
int index = 0;
int length = text.length();
while (index < length) {
// If the next character is a percent sign, decode a hex
// escape. Otherwise, just add the bytes for the character.
char ch = text.charAt(index);
try {
if (ch == '%") {
// There must be two more characters in the text.
if (length - index <= 2) {
// Too few characters in the text.
baos.write("%".getBytes());
index++;
continue;
}
// The next two characters must be hex.
String hex = text.substring(index+1, index+3);
try {
int value = Integer.parseInt(hex, 16);
baos.write((byte) value);
index += 3;
} catch (NumberFormatException exception) {
// Some characters are not hexadecimal.
baos.write("%".getBytes());
index++;
}
} else {
// Just add the character as-is.
baos.write(("" + ch).getBytes());
index++;
}
} catch (IOException exception) {
// This should never happen. Ignore this.
}
} // Loop over input string.
// Return the result, in the default encoding.
return baos.toString();
| public java.lang.String | hexEncode(char ch)Convert a character to a sequence of hex URL escapes.
Multibyte characters are handled in the default character encoding.
// Some characters occupy more than one byte (multibyte).
// To account for this, convert the character to a string
// and then get the bytes for the string. I expect there
// is a better way to do this, which is dependent on character
// encodings, but for now this will have to work.
byte[] bytes = ("" + ch).getBytes();
StringBuffer buf = new StringBuffer();
for (byte bt : bytes) {
// Bytes are signed (why?) so this is necessary to prevent
// an undesirable number of one bits in the result. This
// essentially converts the byte to a signed value. I
// sometimes wish Java had an unsigned keyword.
int ibt = (int) bt & 0xFF;
buf.append('%");
String hex = Integer.toHexString(ibt);
if (hex.length() < 2) {
buf.append('0");
}
buf.append(hex);
} // Traversing the bytes.
// Now return the encoded string.
return buf.toString();
| public java.lang.String | hexEncode(java.lang.String text, java.lang.String characters)Traverse the input string, and hex encode non-alphanumeric
characters in the string, other than those in the provided set.
Note that all non-ascii characters are encoded here.
// Traverse the string and encode characters.
StringBuffer buf = new StringBuffer();
for (char ch : text.toCharArray()) {
if (ch < 128 &&
(Character.isLetterOrDigit(ch) ||
characters.indexOf(ch) >= 0) ||
preserve.indexOf(ch) >= 0) {
buf.append(ch);
} else {
buf.append(hexEncode(ch));
}
} // Loop over input string.
// Done.
return buf.toString();
| public com.sun.enterprise.diagnostics.report.html.Escape | setEntity(java.lang.String entity, char value)Add a new entity to this escape.
if (entity == null) {
throw new NullPointerException("The entity name is null.");
}
if (entity.startsWith("&")) {
entity = entity.substring(1, entity.length());
}
if (entity.endsWith(";")) {
entity = entity.substring(0, entity.length()-1);
}
charToEntity.put(new Character(value), entity);
entityToChar.put(entity, new Character(value));
return this;
| public static final com.sun.enterprise.diagnostics.report.html.Escape | setInstance(com.sun.enterprise.diagnostics.report.html.Escape escape)Set the instance to use to escape strings.
if (escape == null) {
throw new NullPointerException("Escape instance is null.");
}
instance = escape;
return instance;
| public com.sun.enterprise.diagnostics.report.html.Escape | setUseHex(boolean flag)Specify whether to use hexadecimal character references of the
form &#xN; , where N is the hex character code.
The alternative is decimal character references of the form
&#N; , where N is the decimal character code.
useHex = flag;
return this;
|
|