FileDocCategorySizeDatePackage
ConditionalSpecialCasing.javaAPI DocJava SE 6 API13904Tue Jun 10 00:25:36 BST 2008java.lang

ConditionalSpecialCasing

public final class ConditionalSpecialCasing extends Object
This is a utility class for String.toLowerCase() and String.toUpperCase(), that handles special casing with conditions. In other words, it handles the mappings with conditions that are defined in Special Casing Properties file.

Note that the unconditional case mappings (including 1:M mappings) are handled in Character.toLower/UpperCase().

Fields Summary
static final int
FINAL_CASED
static final int
AFTER_SOFT_DOTTED
static final int
MORE_ABOVE
static final int
AFTER_I
static final int
NOT_BEFORE_DOT
static final int
COMBINING_CLASS_ABOVE
static Entry[]
entry
static Hashtable
entryTable
Constructors Summary
Methods Summary
private static booleanisAfterI(java.lang.String src, int index)
Implements the "After_I" condition Specification: The last preceding base character was an uppercase I, and there is no intervening combining character class 230 (ABOVE). Regular Expression: Before C: [I]([{cc!=230}&{cc!=0}])*

	int ch;
	int cc;

	// Look for the last preceding base character
	for (int i = index; i > 0; i -= Character.charCount(ch)) {

	    ch = src.codePointBefore(i);

	    if (ch == 'I") {
		return true;
	    } else {
		cc = Normalizer.getCombiningClass(ch);
		if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
		    return false;
		}
	    }
	}

	return false;
    
private static booleanisAfterSoftDotted(java.lang.String src, int index)
Implements the "After_Soft_Dotted" condition Specification: The last preceding character with combining class of zero before C was Soft_Dotted, and there is no intervening combining character class 230 (ABOVE). Regular Expression: Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*

	int ch;
	int cc;

	// Look for the last preceding character
	for (int i = index; i > 0; i -= Character.charCount(ch)) {

	    ch = src.codePointBefore(i);

	    if (isSoftDotted(ch)) {
		return true;
	    } else {
		cc = Normalizer.getCombiningClass(ch);
		if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
		    return false;
		}
	    }
	}

	return false;
    
private static booleanisBeforeDot(java.lang.String src, int index)
Implements the "Before_Dot" condition Specification: C is followed by U+0307 COMBINING DOT ABOVE. Any sequence of characters with a combining class that is neither 0 nor 230 may intervene between the current character and the combining dot above. Regular Expression: After C: ([{cc!=230}&{cc!=0}])*[\u0307]

	int ch;
	int cc;
	int len = src.length();

	// Look for a following COMBINING DOT ABOVE
	for (int i = index + Character.charCount(src.codePointAt(index));
		i < len; i += Character.charCount(ch)) {
	    
	    ch = src.codePointAt(i);

	    if (ch == '\u0307") {
		return true;
	    } else {
		cc = Normalizer.getCombiningClass(ch);
		if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {
		    return false;
		}
	    }
	}

	return false;
    
private static booleanisCased(int ch)
Examines whether a character is 'cased'. A character C is defined to be 'cased' if and only if at least one of following are true for C: uppercase==true, or lowercase==true, or general_category==titlecase_letter. The uppercase and lowercase property values are specified in the data file DerivedCoreProperties.txt in the Unicode Character Database.

	int type = Character.getType(ch);
	if (type == Character.LOWERCASE_LETTER ||
		type == Character.UPPERCASE_LETTER ||
		type == Character.TITLECASE_LETTER) {
	    return true;
	} else {
	    // Check for Other_Lowercase and Other_Uppercase
	    //
            if ((ch >= 0x02B0) && (ch <= 0x02B8)) {
		// MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
		return true;
	    } else if ((ch >= 0x02C0) && (ch <= 0x02C1)) {
		// MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP
		return true;
	    } else if ((ch >= 0x02E0) && (ch <= 0x02E4)) {
		// MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
		return true;
	    } else if (ch == 0x0345) {
		// COMBINING GREEK YPOGEGRAMMENI
		return true;
	    } else if (ch == 0x037A) {
		// GREEK YPOGEGRAMMENI
		return true;
	    } else if ((ch >= 0x1D2C) && (ch <= 0x1D61)) {
		// MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
		return true;
	    } else if ((ch >= 0x2160) && (ch <= 0x217F)) {
		// ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND
		// SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND
		return true;
	    } else if ((ch >= 0x24B6) && (ch <= 0x24E9)) {
		// CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z
		// CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
		return true;
	    } else {
		return false;
	    }
	}
    
private static booleanisConditionMet(java.lang.String src, int index, java.util.Locale locale, int condition)

	switch (condition) {
	case FINAL_CASED:
	    return isFinalCased(src, index, locale);

	case AFTER_SOFT_DOTTED:
	    return isAfterSoftDotted(src, index);

	case MORE_ABOVE:
	    return isMoreAbove(src, index);

	case AFTER_I:
	    return isAfterI(src, index);

	case NOT_BEFORE_DOT:
	    return !isBeforeDot(src, index);

	default:
	    return true;
	}
    
private static booleanisFinalCased(java.lang.String src, int index, java.util.Locale locale)
Implements the "Final_Cased" condition Specification: Within the closest word boundaries containing C, there is a cased letter before C, and there is no cased letter after C. Regular Expression: Before C: [{cased==true}][{wordBoundary!=true}]* After C: !([{wordBoundary!=true}]*[{cased}])

	BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
	wordBoundary.setText(src);
	int ch;

	// Look for a preceding 'cased' letter
	for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
		i -= Character.charCount(ch)) {

	    ch = src.codePointBefore(i);
	    if (isCased(ch)) {

		int len = src.length();
		// Check that there is no 'cased' letter after the index
		for (i = index + Character.charCount(src.codePointAt(index));
			(i < len) && !wordBoundary.isBoundary(i);
			i += Character.charCount(ch)) {

		    ch = src.codePointAt(i);
		    if (isCased(ch)) {
			return false;
		    }
		}

		return true;
	    }
	}

	return false;
    
private static booleanisMoreAbove(java.lang.String src, int index)
Implements the "More_Above" condition Specification: C is followed by one or more characters of combining class 230 (ABOVE) in the combining character sequence. Regular Expression: After C: [{cc!=0}]*[{cc==230}]

	int ch;
	int cc;
	int len = src.length();

	// Look for a following ABOVE combining class character
	for (int i = index + Character.charCount(src.codePointAt(index));
		i < len; i += Character.charCount(ch)) {
	    
	    ch = src.codePointAt(i);
	    cc = Normalizer.getCombiningClass(ch);

	    if (cc == COMBINING_CLASS_ABOVE) {
		return true;
	    } else if (cc == 0) {
		return false;
	    }
	}

	return false;
    
private static booleanisSoftDotted(int ch)

	switch (ch) {
	case 0x0069: // Soft_Dotted # L&       LATIN SMALL LETTER I
	case 0x006A: // Soft_Dotted # L&       LATIN SMALL LETTER J
	case 0x012F: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH OGONEK
	case 0x0268: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH STROKE
	case 0x0456: // Soft_Dotted # L&       CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
	case 0x0458: // Soft_Dotted # L&       CYRILLIC SMALL LETTER JE
	case 0x1D62: // Soft_Dotted # L&       LATIN SUBSCRIPT SMALL LETTER I
	case 0x1E2D: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH TILDE BELOW
	case 0x1ECB: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH DOT BELOW
	case 0x2071: // Soft_Dotted # L&       SUPERSCRIPT LATIN SMALL LETTER I
	    return true;
	default:
	    return false;
	}
    
private static char[]lookUpTable(java.lang.String src, int index, java.util.Locale locale, boolean bLowerCasing)

	HashSet set = (HashSet)entryTable.get(new Integer(src.codePointAt(index)));

	if (set != null) {
	    Iterator iter = set.iterator();
	    String currentLang = locale.getLanguage();
	    while (iter.hasNext()) {
		Entry entry = (Entry)iter.next();
		String conditionLang= entry.getLanguage();
		if (((conditionLang == null) || (conditionLang.equals(currentLang))) &&
			isConditionMet(src, index, locale, entry.getCondition())) {
		    return (bLowerCasing ? entry.getLowerCase() : entry.getUpperCase());
		}
	    }
	}

	return null;
    
static char[]toLowerCaseCharArray(java.lang.String src, int index, java.util.Locale locale)

        return lookUpTable(src, index, locale, true);
    
static inttoLowerCaseEx(java.lang.String src, int index, java.util.Locale locale)

     
	// create hashtable from the entry
	for (int i = 0; i < entry.length; i ++) {
	    Entry cur = entry[i];
	    Integer cp = new Integer(cur.getCodePoint());
	    HashSet set = (HashSet)entryTable.get(cp);
	    if (set == null) {
		set = new HashSet();
	    }
	    set.add(cur);
	    entryTable.put(cp, set);
	}
    
        char[] result = lookUpTable(src, index, locale, true);

	if (result != null) {
	    if (result.length == 1) {
		return result[0];
	    } else {
		return Character.ERROR;
	    }
	} else {
	    // default to Character class' one
	    return Character.toLowerCase(src.codePointAt(index));
	}
    
static char[]toUpperCaseCharArray(java.lang.String src, int index, java.util.Locale locale)

        char[] result = lookUpTable(src, index, locale, false);
	if (result != null) {
	    return result;
	} else {
	    return Character.toUpperCaseCharArray(src.codePointAt(index));
	}
    
static inttoUpperCaseEx(java.lang.String src, int index, java.util.Locale locale)

        char[] result = lookUpTable(src, index, locale, false);

	if (result != null) {
	    if (result.length == 1) {
		return result[0];
	    } else {
		return Character.ERROR;
	    }
	} else {
	    // default to Character class' one
	    return Character.toUpperCaseEx(src.codePointAt(index));
	}