UEncoderpublic final class UEncoder extends Object Efficient implementation for encoders.
This class is not thread safe - you need one encoder per thread.
The encoder will save and recycle the internal objects, avoiding
garbage.
You can add extra characters that you want preserved, for example
while encoding a URL you can add "/". |
Fields Summary |
---|
private static org.apache.juli.logging.Log | log | private BitSet | safeChars | private C2BConverter | c2b | private ByteChunk | bb | private String | encoding | private static final int | debug |
Constructors Summary |
---|
public UEncoder()
initSafeChars();
|
Methods Summary |
---|
public void | addSafeCharacter(char c)
safeChars.set( c );
| public java.lang.String | encodeURL(java.lang.String uri)Utility funtion to re-encode the URL.
Still has problems with charset, since UEncoder mostly
ignores it.
String outUri=null;
try {
// XXX optimize - recycle, etc
CharArrayWriter out = new CharArrayWriter();
urlEncode(out, uri);
outUri=out.toString();
} catch (IOException iex) {
}
return outUri;
| private void | init()
| private void | initSafeChars()
safeChars=new BitSet(128);
int i;
for (i = 'a"; i <= 'z"; i++) {
safeChars.set(i);
}
for (i = 'A"; i <= 'Z"; i++) {
safeChars.set(i);
}
for (i = '0"; i <= '9"; i++) {
safeChars.set(i);
}
//safe
safeChars.set('$");
safeChars.set('-");
safeChars.set('_");
safeChars.set('.");
// Dangerous: someone may treat this as " "
// RFC1738 does allow it, it's not reserved
// safeChars.set('+');
//extra
safeChars.set('!");
safeChars.set('*");
safeChars.set('\'");
safeChars.set('(");
safeChars.set(')");
safeChars.set(',");
| private static void | log(java.lang.String s)
if (log.isDebugEnabled())
log.debug("Encoder: " + s );
| public void | setEncoding(java.lang.String s)
encoding=s;
| public void | urlEncode(java.io.Writer buf, java.lang.String s)URL Encode string, using a specified encoding.
if( c2b==null ) {
bb=new ByteChunk(16); // small enough.
c2b=new C2BConverter( bb, encoding );
}
for (int i = 0; i < s.length(); i++) {
int c = (int) s.charAt(i);
if( safeChars.get( c ) ) {
if( debug > 0 ) log("Safe: " + (char)c);
buf.write((char)c);
} else {
if( debug > 0 ) log("Unsafe: " + (char)c);
c2b.convert( (char)c );
// "surrogate" - UTF is _not_ 16 bit, but 21 !!!!
// ( while UCS is 31 ). Amazing...
if (c >= 0xD800 && c <= 0xDBFF) {
if ( (i+1) < s.length()) {
int d = (int) s.charAt(i+1);
if (d >= 0xDC00 && d <= 0xDFFF) {
if( debug > 0 ) log("Unsafe: " + c);
c2b.convert( (char)d);
i++;
}
}
}
c2b.flushBuffer();
urlEncode( buf, bb.getBuffer(), bb.getOffset(),
bb.getLength() );
bb.recycle();
}
}
| public void | urlEncode(java.io.Writer buf, byte[] bytes, int off, int len)
for( int j=off; j< len; j++ ) {
buf.write( '%" );
char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16);
if( debug > 0 ) log("Encode: " + ch);
buf.write(ch);
ch = Character.forDigit(bytes[j] & 0xF, 16);
if( debug > 0 ) log("Encode: " + ch);
buf.write(ch);
}
|
|