FileDocCategorySizeDatePackage
UEncoder.javaAPI DocGlassfish v2 API5970Fri May 04 22:33:14 BST 2007org.apache.tomcat.util.buf

UEncoder

public final class UEncoder extends Object
Efficient implementation for encoders. This class is not thread safe - you need one encoder per thread. The encoder will save and recycle the internal objects, avoiding garbage. You can add extra characters that you want preserved, for example while encoding a URL you can add "/".
author
Costin Manolache

Fields Summary
private static com.sun.org.apache.commons.logging.Log
log
private BitSet
safeChars
private C2BConverter
c2b
private ByteChunk
bb
private String
encoding
private static final int
debug
Constructors Summary
public UEncoder()

    
      
	initSafeChars();
    
Methods Summary
public voidaddSafeCharacter(char c)

	safeChars.set( c );
    
public java.lang.StringencodeURL(java.lang.String uri)
Utility funtion to re-encode the URL. Still has problems with charset, since UEncoder mostly ignores it.

	String outUri=null;
	try {
	    // XXX optimize - recycle, etc
	    CharArrayWriter out = new CharArrayWriter();
	    urlEncode(out, uri);
	    outUri=out.toString();
	} catch (IOException iex) {
	}
	return outUri;
    
private voidinit()

	
    
private voidinitSafeChars()

	safeChars=new BitSet(128);
	int i;
	for (i = 'a"; i <= 'z"; i++) {
	    safeChars.set(i);
	}
	for (i = 'A"; i <= 'Z"; i++) {
	    safeChars.set(i);
	}
	for (i = '0"; i <= '9"; i++) {
	    safeChars.set(i);
	}
	//safe
	safeChars.set('$");
	safeChars.set('-");
	safeChars.set('_");
	safeChars.set('.");

	// Dangerous: someone may treat this as " "
	// RFC1738 does allow it, it's not reserved
	//    safeChars.set('+');
	//extra
	safeChars.set('!");
	safeChars.set('*");
	safeChars.set('\'");
	safeChars.set('(");
	safeChars.set(')");
	safeChars.set(',");	
    
private static voidlog(java.lang.String s)

        if (log.isDebugEnabled())
	    log.debug("Encoder: " + s );
    
public voidsetEncoding(java.lang.String s)

	encoding=s;
    
public voidurlEncode(java.io.Writer buf, java.lang.String s)
URL Encode string, using a specified encoding.

param
s string to be encoded
param
enc character encoding, for chars >%80 ( use UTF8 if not set, as recommended in RFCs)
param
reserved extra characters to preserve ( "/" - if s is a URL )

	if( c2b==null ) {
	    bb=new ByteChunk(16); // small enough.
	    c2b=C2BConverter.getInstance( bb, encoding );
	}

	for (int i = 0; i < s.length(); i++) {
	    int c = (int) s.charAt(i);
	    if( safeChars.get( c ) ) {
		if( debug > 0 ) log("Safe: " + (char)c);
		buf.write((char)c);
	    } else {
		if( debug > 0 ) log("Unsafe:  " + (char)c);
		c2b.convert( (char)c );
		
		// "surrogate" - UTF is _not_ 16 bit, but 21 !!!!
		// ( while UCS is 31 ). Amazing...
		if (c >= 0xD800 && c <= 0xDBFF) {
		    if ( (i+1) < s.length()) {
			int d = (int) s.charAt(i+1);
			if (d >= 0xDC00 && d <= 0xDFFF) {
			    if( debug > 0 ) log("Unsafe:  " + c);
			    c2b.convert( (char)d);
			    i++;
			}
		    }
		}

		urlEncode( buf, bb.getBuffer(), bb.getOffset(),
			   bb.getLength() );
		bb.recycle();
	    }
	}
    
public voidurlEncode(java.io.Writer buf, byte[] bytes, int off, int len)

	for( int j=off; j< len; j++ ) {
	    buf.write( '%" );
	    char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16);
	    if( debug > 0 ) log("Encode:  " + ch);
	    buf.write(ch);
	    ch = Character.forDigit(bytes[j] & 0xF, 16);
	    if( debug > 0 ) log("Encode:  " + ch);
	    buf.write(ch);
	}