FileDocCategorySizeDatePackage
StringUtil.javaAPI DocApache Poi 3.0.112049Mon Jan 01 12:39:42 GMT 2007org.apache.poi.util

StringUtil

public class StringUtil extends Object
Title: String Utility Description: Collection of string handling utilities
author
Andrew C. Oliver
author
Sergei Kozello (sergeikozello at mail.ru)
author
Toshiaki Kamoshida (kamoshida.toshiaki at future dot co dot jp)
since
May 10, 2002
version
1.0

Fields Summary
private static final String
ENCODING
Constructors Summary
private StringUtil()
Constructor for the StringUtil object

	                	 
	  
	
Methods Summary
public static java.lang.Stringformat(java.lang.String message, java.lang.Object[] params)
Apply printf() like formatting to a string. Primarily used for logging.

param
message the string with embedded formatting info eg. "This is a test %2.2"
param
params array of values to format into the string
return
The formatted string

		int currentParamNumber = 0;
		StringBuffer formattedMessage = new StringBuffer();
		for (int i = 0; i < message.length(); i++) {
			if (message.charAt(i) == '%") {
				if (currentParamNumber >= params.length) {
					formattedMessage.append("?missing data?");
				} else if (
					(params[currentParamNumber] instanceof Number)
						&& (i + 1 < message.length())) {
					i
						+= matchOptionalFormatting(
							(Number) params[currentParamNumber++],
							message.substring(i + 1),
							formattedMessage);
				} else {
					formattedMessage.append(
						params[currentParamNumber++].toString());
				}
			} else {
				if ((message.charAt(i) == '\\")
					&& (i + 1 < message.length())
					&& (message.charAt(i + 1) == '%")) {
					formattedMessage.append('%");
					i++;
				} else {
					formattedMessage.append(message.charAt(i));
				}
			}
		}
		return formattedMessage.toString();
	
public static java.lang.StringgetFromCompressedUnicode(byte[] string, int offset, int len)
Read 8 bit data (in ISO-8859-1 codepage) into a (unicode) Java String and return. (In Excel terms, read compressed 8 bit unicode as a string)

param
string byte array to read
param
offset offset to read byte array
param
len length to read byte array
return
String generated String instance by reading byte array

		try {
			return new String(string, offset, len, "ISO-8859-1");
		} catch (UnsupportedEncodingException e) {
			throw new InternalError(); /* unreachable */
		}
	
public static java.lang.StringgetFromUnicodeBE(byte[] string, int offset, int len)
Given a byte array of 16-bit unicode characters in big endian format (most important byte first), return a Java String representation of it. { 0x00, 0x16 } -0x16

param
string the byte array to be converted
param
offset the initial offset into the byte array. it is assumed that string[ offset ] and string[ offset + 1 ] contain the first 16-bit unicode character
param
len the length of the final string
return
the converted string
exception
ArrayIndexOutOfBoundsException if offset is out of bounds for the byte array (i.e., is negative or is greater than or equal to string.length)
exception
IllegalArgumentException if len is too large (i.e., there is not enough data in string to create a String of that length)

		if ((offset < 0) || (offset >= string.length)) {
			throw new ArrayIndexOutOfBoundsException("Illegal offset");
		}
		if ((len < 0) || (((string.length - offset) / 2) < len)) {
			throw new IllegalArgumentException("Illegal length");
		}
		try {
			return new String(string, offset, len * 2, "UTF-16BE");
		} catch (UnsupportedEncodingException e) {
			throw new InternalError(); /*unreachable*/
		}
	
public static java.lang.StringgetFromUnicodeBE(byte[] string)
Given a byte array of 16-bit unicode characters in big endian format (most important byte first), return a Java String representation of it. { 0x00, 0x16 } -0x16

param
string the byte array to be converted
return
the converted string

		if(string.length == 0) { return ""; }
		return getFromUnicodeBE(string, 0, string.length / 2);
	
public static java.lang.StringgetFromUnicodeLE(byte[] string, int offset, int len)
Given a byte array of 16-bit unicode characters in Little Endian format (most important byte last), return a Java String representation of it. { 0x16, 0x00 } -0x16

param
string the byte array to be converted
param
offset the initial offset into the byte array. it is assumed that string[ offset ] and string[ offset + 1 ] contain the first 16-bit unicode character
param
len the length of the final string
return
the converted string
exception
ArrayIndexOutOfBoundsException if offset is out of bounds for the byte array (i.e., is negative or is greater than or equal to string.length)
exception
IllegalArgumentException if len is too large (i.e., there is not enough data in string to create a String of that length)

		if ((offset < 0) || (offset >= string.length)) {
			throw new ArrayIndexOutOfBoundsException("Illegal offset");
		}
		if ((len < 0) || (((string.length - offset) / 2) < len)) {
			throw new IllegalArgumentException("Illegal length");
		}

		try {
			return new String(string, offset, len * 2, "UTF-16LE");
		} catch (UnsupportedEncodingException e) {
			throw new InternalError(); /*unreachable*/
		}
	
public static java.lang.StringgetFromUnicodeLE(byte[] string)
Given a byte array of 16-bit unicode characters in little endian format (most important byte last), return a Java String representation of it. { 0x16, 0x00 } -0x16

param
string the byte array to be converted
return
the converted string

		if(string.length == 0) { return ""; }
		return getFromUnicodeLE(string, 0, string.length / 2);
	
public static java.lang.StringgetPreferredEncoding()

return
the encoding we want to use, currently hardcoded to ISO-8859-1

		return ENCODING;
	
public static booleanhasMultibyte(java.lang.String value)
check the parameter has multibyte character

param
value string to check
return
boolean result true:string has at least one multibyte character

	    if( value == null )return false;
	    for(int i = 0 ; i < value.length() ; i++ ){
	        char c = value.charAt(i);
	        if(c > 0xFF )return true;
	    }
	    return false;
	
public static booleanisUnicodeString(java.lang.String value)
Checks to see if a given String needs to be represented as Unicode

param
value
return
true if string needs Unicode to be represented.

	    try {
	      return !value.equals(new String(value.getBytes("ISO-8859-1"), "ISO-8859-1"));
	    } catch (UnsupportedEncodingException e) {
	      return true;
	    }
	  
private static intmatchOptionalFormatting(java.lang.Number number, java.lang.String formatting, java.lang.StringBuffer outputTo)

		NumberFormat numberFormat = NumberFormat.getInstance();
		if ((0 < formatting.length())
			&& Character.isDigit(formatting.charAt(0))) {
			numberFormat.setMinimumIntegerDigits(
				Integer.parseInt(formatting.charAt(0) + ""));
			if ((2 < formatting.length())
				&& (formatting.charAt(1) == '.")
				&& Character.isDigit(formatting.charAt(2))) {
				numberFormat.setMaximumFractionDigits(
					Integer.parseInt(formatting.charAt(2) + ""));
				numberFormat.format(number, outputTo, new FieldPosition(0));
				return 3;
			}
			numberFormat.format(number, outputTo, new FieldPosition(0));
			return 1;
		} else if (
			(0 < formatting.length()) && (formatting.charAt(0) == '.")) {
			if ((1 < formatting.length())
				&& Character.isDigit(formatting.charAt(1))) {
				numberFormat.setMaximumFractionDigits(
					Integer.parseInt(formatting.charAt(1) + ""));
				numberFormat.format(number, outputTo, new FieldPosition(0));
				return 2;
			}
		}
		numberFormat.format(number, outputTo, new FieldPosition(0));
		return 1;
	
public static voidputCompressedUnicode(java.lang.String input, byte[] output, int offset)
Takes a unicode (java) string, and returns it as 8 bit data (in ISO-8859-1 codepage). (In Excel terms, write compressed 8 bit unicode)

param
input the String containing the data to be written
param
output the byte array to which the data is to be written
param
offset an offset into the byte arrat at which the data is start when written

		try {
			byte[] bytes = input.getBytes("ISO-8859-1");
			System.arraycopy(bytes, 0, output, offset, bytes.length);
		} catch (UnsupportedEncodingException e) {
			throw new InternalError(); /*unreachable*/
		}
	
public static voidputUnicodeBE(java.lang.String input, byte[] output, int offset)
Takes a unicode string, and returns it as big endian (most important byte first) bytes in the supplied byte array. (In Excel terms, write uncompressed unicode)

param
input the String containing the unicode data to be written
param
output the byte array to hold the uncompressed unicode, should be twice the length of the String
param
offset the offset to start writing into the byte array

		try {
			byte[] bytes = input.getBytes("UTF-16BE");
			System.arraycopy(bytes, 0, output, offset, bytes.length);
		} catch (UnsupportedEncodingException e) {
			throw new InternalError(); /*unreachable*/
		}
	
public static voidputUnicodeLE(java.lang.String input, byte[] output, int offset)
Takes a unicode string, and returns it as little endian (most important byte last) bytes in the supplied byte array. (In Excel terms, write uncompressed unicode)

param
input the String containing the unicode data to be written
param
output the byte array to hold the uncompressed unicode, should be twice the length of the String
param
offset the offset to start writing into the byte array

		try {
			byte[] bytes = input.getBytes("UTF-16LE");
			System.arraycopy(bytes, 0, output, offset, bytes.length);
		} catch (UnsupportedEncodingException e) {
			throw new InternalError(); /*unreachable*/
		}