FileDocCategorySizeDatePackage
UEncoder.javaAPI DocApache Tomcat 6.0.144893Fri Jul 20 04:20:36 BST 2007org.apache.tomcat.util.buf

UEncoder

public final class UEncoder extends Object
Efficient implementation for encoders. This class is not thread safe - you need one encoder per thread. The encoder will save and recycle the internal objects, avoiding garbage. You can add extra characters that you want preserved, for example while encoding a URL you can add "/".
author
Costin Manolache

Fields Summary
private static org.apache.juli.logging.Log
log
private BitSet
safeChars
private C2BConverter
c2b
private ByteChunk
bb
private String
encoding
private static final int
debug
Constructors Summary
public UEncoder()

    
      
	initSafeChars();
    
Methods Summary
public voidaddSafeCharacter(char c)

	safeChars.set( c );
    
public java.lang.StringencodeURL(java.lang.String uri)
Utility funtion to re-encode the URL. Still has problems with charset, since UEncoder mostly ignores it.

	String outUri=null;
	try {
	    // XXX optimize - recycle, etc
	    CharArrayWriter out = new CharArrayWriter();
	    urlEncode(out, uri);
	    outUri=out.toString();
	} catch (IOException iex) {
	}
	return outUri;
    
private voidinit()

	
    
private voidinitSafeChars()

	safeChars=new BitSet(128);
	int i;
	for (i = 'a"; i <= 'z"; i++) {
	    safeChars.set(i);
	}
	for (i = 'A"; i <= 'Z"; i++) {
	    safeChars.set(i);
	}
	for (i = '0"; i <= '9"; i++) {
	    safeChars.set(i);
	}
	//safe
	safeChars.set('$");
	safeChars.set('-");
	safeChars.set('_");
	safeChars.set('.");

	// Dangerous: someone may treat this as " "
	// RFC1738 does allow it, it's not reserved
	//    safeChars.set('+');
	//extra
	safeChars.set('!");
	safeChars.set('*");
	safeChars.set('\'");
	safeChars.set('(");
	safeChars.set(')");
	safeChars.set(',");	
    
private static voidlog(java.lang.String s)

        if (log.isDebugEnabled())
            log.debug("Encoder: " + s );
    
public voidsetEncoding(java.lang.String s)

	encoding=s;
    
public voidurlEncode(java.io.Writer buf, java.lang.String s)
URL Encode string, using a specified encoding.

param
buf The writer
param
s string to be encoded
throws
IOException If an I/O error occurs

	if( c2b==null ) {
	    bb=new ByteChunk(16); // small enough.
	    c2b=new C2BConverter( bb, encoding );
	}

	for (int i = 0; i < s.length(); i++) {
	    int c = (int) s.charAt(i);
	    if( safeChars.get( c ) ) {
		if( debug > 0 ) log("Safe: " + (char)c);
		buf.write((char)c);
	    } else {
		if( debug > 0 ) log("Unsafe:  " + (char)c);
		c2b.convert( (char)c );
		
		// "surrogate" - UTF is _not_ 16 bit, but 21 !!!!
		// ( while UCS is 31 ). Amazing...
		if (c >= 0xD800 && c <= 0xDBFF) {
		    if ( (i+1) < s.length()) {
			int d = (int) s.charAt(i+1);
			if (d >= 0xDC00 && d <= 0xDFFF) {
			    if( debug > 0 ) log("Unsafe:  " + c);
			    c2b.convert( (char)d);
			    i++;
			}
		    }
		}

		c2b.flushBuffer();
		
		urlEncode( buf, bb.getBuffer(), bb.getOffset(),
			   bb.getLength() );
		bb.recycle();
	    }
	}
    
public voidurlEncode(java.io.Writer buf, byte[] bytes, int off, int len)

	for( int j=off; j< len; j++ ) {
	    buf.write( '%" );
	    char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16);
	    if( debug > 0 ) log("Encode:  " + ch);
	    buf.write(ch);
	    ch = Character.forDigit(bytes[j] & 0xF, 16);
	    if( debug > 0 ) log("Encode:  " + ch);
	    buf.write(ch);
	}