FileDocCategorySizeDatePackage
StringNormalizer.javaAPI DocphoneME MR2 API (J2ME)7372Wed May 02 18:00:46 BST 2007com.sun.j2me.global

StringNormalizer

public final class StringNormalizer extends Object implements StringDecomposer
A string normalizer is responsible for decomposing strings into their canonically decomposed equivalents (Normalization Form D).

Fields Summary
private static final int
CAPACITY_INCREMENT
The capacity increment value of the internal buffers.
private int[]
decomposition
Internal decomposition buffer.
private int
decOffset
Decomposition offset.
private int
decLength
Decomposition length.
private int
strOffset
String offset.
private int
strLength
String length.
private int
strInitOffset
String initial offset.
private int
maxDecomposition
Max decomposition length.
private String
source
The string being decomposed.
private NormalizationTable
table
A lookup table which is used during the normalization.
Constructors Summary
public StringNormalizer(NormalizationTable table)
Creates a new instance of StringNormalizer.

param
table a lookup table for the normalization

    
                        
       
        this.table = table;       
        this.maxDecomposition = table.getMaxDecompositionLength();
    
public StringNormalizer(String s, NormalizationTable table)
Creates a new instance of StringNormalizer.

param
s a string for the normaliztion
param
table a lookup table for the normalization

        this(table);
        source = s;
        strLength = s.length();
    
Methods Summary
public intgetNextElement()
Returns the next encoded code point value from the normalized input string. The methods of the NormalizationTable class can be used to inspect the returned value. Returns EOF_ELEMENT if the end of string is reached.

return
the next encoded code point value from the normalized input string or EOF_ELEMENT if the end of string is reached
see
NormalizationTable

        if (decOffset < decLength) {
            return decomposition[decOffset++];
        }
        
        int value = nextUTF32();
        if (value == EOF_ELEMENT) {
            return EOF_ELEMENT;
        }

        value = table.getCanonicalDecomposition(decomposition, 0, 
                value);
        
        if (NormalizationTable.isSingleCodePoint(value)) {
            if (NormalizationTable.isStable(value)) {
                return value;
            }
            decomposition[0] = value;
            decLength = 1;
        } else {
            decLength = value;
        }
        
        decOffset = 0;
        
        // decompose till we get a stable code point
        value = nextUTF32();
        while (value != -1) {
            if ((decLength + maxDecomposition) > decomposition.length) {
                int[] newDecomposition = new int[decomposition.length + 
                        CAPACITY_INCREMENT];
                System.arraycopy(decomposition, 0, newDecomposition, 0, 
                        decLength);
                decomposition = newDecomposition;
            }
            
            value = table.getCanonicalDecomposition(decomposition, decLength, 
                    value);
            
            if (NormalizationTable.isSingleCodePoint(value)) {
                decomposition[decLength++] = value;
                if (NormalizationTable.isStable(value)) {
                    break;
                }
            } else {
                decLength += value;
            }
            
            value = nextUTF32();
        }
      
        // order the code points according to their combining classes
        boolean checkOrder;
        do {
            checkOrder = false;
            
            for (int i = 1; i < decLength; ++i) {
                int cp1 = decomposition[i - 1];
                int cp2 = decomposition[i];

                int cc1 = NormalizationTable.getCombiningClass(cp1);
                int cc2 = NormalizationTable.getCombiningClass(cp2);
                
                if ((cc1 > cc2) && (cc2 != 0)) {
                    decomposition[i - 1] = cp2;
                    decomposition[i] = cp1;
                    checkOrder = true;
                }
            }
        } while (checkOrder);
        
        if (decLength > 0) {
            return decomposition[decOffset++];
        }
        
        return EOF_ELEMENT;
    
public final intnextUTF32()
Returns the next code point value from the source string. It expects the input string to be UTF-16 encoded.

return
the next code point value

        if (strOffset >= strLength) {
            return EOF_ELEMENT;
        }

        int cp = (int)source.charAt(strOffset++);
        if (((cp & 0xfc00) == 0xd800) && (strOffset < strLength)) {
            // is a high surrogate cp
            int cp2 = (int)source.charAt(strOffset);
            if ((cp2 & 0xfc00) == 0xdc00) {
                // we have got suplementary low surrogate
                // so construct the final code point
                int wwww = (cp >> 6) & 0xf;                    
                cp = ((wwww + 1) << 16) | ((cp & 0x3f) << 10) | 
                        (cp2 & 0x3ff);

                ++strOffset;
            }
        }
        
        return cp;
    
public final voidreset()
Restarts the decomposition.

        decOffset = 0;
        decLength = 0;
        strOffset = strInitOffset;
    
public final voidsetSource(java.lang.String s)
Sets the string for the normalization.

param
s the string

        source = s;
        strLength = s.length();
        strInitOffset = 0;
        reset();        
    
public final voidsetSource(java.lang.String s, int offset)
Sets the string for the normalization.

param
s the string
param
offset the offset to start the normalization from

        source = s;
        strLength = s.length();
        strInitOffset = offset;
        reset();