File Doc Category Size Date Package
StringNormalizer.java API Doc phoneME MR2 API (J2ME) 7372 Wed May 02 18:00:46 BST 2007 com.sun.j2me.global

StringNormalizer

java.lang.Object

public final class StringNormalizer extends Object implements StringDecomposer

A string normalizer is responsible for decomposing strings into their canonically decomposed equivalents (Normalization Form D).

Fields Summary
private static final int
CAPACITY_INCREMENT
The capacity increment value of the internal buffers.
private int[]
decomposition
Internal decomposition buffer.
private int
decOffset
Decomposition offset.
private int
decLength
Decomposition length.
private int
strOffset
String offset.
private int
strLength
String length.
private int
strInitOffset
String initial offset.
private int
maxDecomposition
Max decomposition length.
private String
source
The string being decomposed.
private NormalizationTable
table
A lookup table which is used during the normalization.
Constructors Summary
public StringNormalizer(NormalizationTable table)
Creates a new instance of StringNormalizer.
param
table a lookup table for the normalization
this.table = table; this.maxDecomposition = table.getMaxDecompositionLength();
public StringNormalizer(String s, NormalizationTable table)
Creates a new instance of StringNormalizer.
param
s a string for the normaliztion
param
table a lookup table for the normalization
this(table); source = s; strLength = s.length();
Methods Summary
public int getNextElement()
Returns the next encoded code point value from the normalized input string. The methods of the NormalizationTable class can be used to inspect the returned value. Returns EOF_ELEMENT if the end of string is reached.
return
the next encoded code point value from the normalized input string or EOF_ELEMENT if the end of string is reached
see
NormalizationTable
if (decOffset < decLength) { return decomposition[decOffset++]; } int value = nextUTF32(); if (value == EOF_ELEMENT) { return EOF_ELEMENT; } value = table.getCanonicalDecomposition(decomposition, 0, value); if (NormalizationTable.isSingleCodePoint(value)) { if (NormalizationTable.isStable(value)) { return value; } decomposition[0] = value; decLength = 1; } else { decLength = value; } decOffset = 0; // decompose till we get a stable code point value = nextUTF32(); while (value != -1) { if ((decLength + maxDecomposition) > decomposition.length) { int[] newDecomposition = new int[decomposition.length + CAPACITY_INCREMENT]; System.arraycopy(decomposition, 0, newDecomposition, 0, decLength); decomposition = newDecomposition; } value = table.getCanonicalDecomposition(decomposition, decLength, value); if (NormalizationTable.isSingleCodePoint(value)) { decomposition[decLength++] = value; if (NormalizationTable.isStable(value)) { break; } } else { decLength += value; } value = nextUTF32(); } // order the code points according to their combining classes boolean checkOrder; do { checkOrder = false; for (int i = 1; i < decLength; ++i) { int cp1 = decomposition[i - 1]; int cp2 = decomposition[i]; int cc1 = NormalizationTable.getCombiningClass(cp1); int cc2 = NormalizationTable.getCombiningClass(cp2); if ((cc1 > cc2) && (cc2 != 0)) { decomposition[i - 1] = cp2; decomposition[i] = cp1; checkOrder = true; } } } while (checkOrder); if (decLength > 0) { return decomposition[decOffset++]; } return EOF_ELEMENT;
public final int nextUTF32()
Returns the next code point value from the source string. It expects the input string to be UTF-16 encoded.
return
the next code point value
if (strOffset >= strLength) { return EOF_ELEMENT; } int cp = (int)source.charAt(strOffset++); if (((cp & 0xfc00) == 0xd800) && (strOffset < strLength)) { // is a high surrogate cp int cp2 = (int)source.charAt(strOffset); if ((cp2 & 0xfc00) == 0xdc00) { // we have got suplementary low surrogate // so construct the final code point int wwww = (cp >> 6) & 0xf; cp = ((wwww + 1) << 16) | ((cp & 0x3f) << 10) | (cp2 & 0x3ff); ++strOffset; } } return cp;
public final void reset()
Restarts the decomposition.
decOffset = 0; decLength = 0; strOffset = strInitOffset;
public final void setSource(java.lang.String s)
Sets the string for the normalization.
param
s the string
source = s; strLength = s.length(); strInitOffset = 0; reset();
public final void setSource(java.lang.String s, int offset)
Sets the string for the normalization.
param
s the string
param
offset the offset to start the normalization from
source = s; strLength = s.length(); strInitOffset = offset; reset();