File Doc Category Size Date Package
ConditionalSpecialCasing.java API Doc Java SE 6 API 13904 Tue Jun 10 00:25:36 BST 2008 java.lang

ConditionalSpecialCasing

java.lang.Object

public final class ConditionalSpecialCasing extends Object

This is a utility class for String.toLowerCase() and String.toUpperCase(), that handles special casing with conditions. In other words, it handles the mappings with conditions that are defined in Special Casing Properties file.

Note that the unconditional case mappings (including 1:M mappings) are handled in Character.toLower/UpperCase().

Fields Summary
static final int
FINAL_CASED
static final int
AFTER_SOFT_DOTTED
static final int
MORE_ABOVE
static final int
AFTER_I
static final int
NOT_BEFORE_DOT
static final int
COMBINING_CLASS_ABOVE
static Entry[]
entry
static Hashtable
entryTable
Constructors Summary
Methods Summary
private static boolean isAfterI(java.lang.String src, int index)
Implements the "After_I" condition Specification: The last preceding base character was an uppercase I, and there is no intervening combining character class 230 (ABOVE). Regular Expression: Before C: [I]([{cc!=230}&{cc!=0}])*
int ch; int cc; // Look for the last preceding base character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (ch == 'I") { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false;
private static boolean isAfterSoftDotted(java.lang.String src, int index)
Implements the "After_Soft_Dotted" condition Specification: The last preceding character with combining class of zero before C was Soft_Dotted, and there is no intervening combining character class 230 (ABOVE). Regular Expression: Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*
int ch; int cc; // Look for the last preceding character for (int i = index; i > 0; i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (isSoftDotted(ch)) { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false;
private static boolean isBeforeDot(java.lang.String src, int index)
Implements the "Before_Dot" condition Specification: C is followed by U+0307 COMBINING DOT ABOVE. Any sequence of characters with a combining class that is neither 0 nor 230 may intervene between the current character and the combining dot above. Regular Expression: After C: ([{cc!=230}&{cc!=0}])*[\u0307]
int ch; int cc; int len = src.length(); // Look for a following COMBINING DOT ABOVE for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); if (ch == '\u0307") { return true; } else { cc = Normalizer.getCombiningClass(ch); if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) { return false; } } } return false;
private static boolean isCased(int ch)
Examines whether a character is 'cased'. A character C is defined to be 'cased' if and only if at least one of following are true for C: uppercase==true, or lowercase==true, or general_category==titlecase_letter. The uppercase and lowercase property values are specified in the data file DerivedCoreProperties.txt in the Unicode Character Database.
int type = Character.getType(ch); if (type == Character.LOWERCASE_LETTER || type == Character.UPPERCASE_LETTER || type == Character.TITLECASE_LETTER) { return true; } else { // Check for Other_Lowercase and Other_Uppercase // if ((ch >= 0x02B0) && (ch <= 0x02B8)) { // MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y return true; } else if ((ch >= 0x02C0) && (ch <= 0x02C1)) { // MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP return true; } else if ((ch >= 0x02E0) && (ch <= 0x02E4)) { // MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP return true; } else if (ch == 0x0345) { // COMBINING GREEK YPOGEGRAMMENI return true; } else if (ch == 0x037A) { // GREEK YPOGEGRAMMENI return true; } else if ((ch >= 0x1D2C) && (ch <= 0x1D61)) { // MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI return true; } else if ((ch >= 0x2160) && (ch <= 0x217F)) { // ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND // SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND return true; } else if ((ch >= 0x24B6) && (ch <= 0x24E9)) { // CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z // CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z return true; } else { return false; } }
private static boolean isConditionMet(java.lang.String src, int index, java.util.Locale locale, int condition)
switch (condition) { case FINAL_CASED: return isFinalCased(src, index, locale); case AFTER_SOFT_DOTTED: return isAfterSoftDotted(src, index); case MORE_ABOVE: return isMoreAbove(src, index); case AFTER_I: return isAfterI(src, index); case NOT_BEFORE_DOT: return !isBeforeDot(src, index); default: return true; }
private static boolean isFinalCased(java.lang.String src, int index, java.util.Locale locale)
Implements the "Final_Cased" condition Specification: Within the closest word boundaries containing C, there is a cased letter before C, and there is no cased letter after C. Regular Expression: Before C: [{cased==true}][{wordBoundary!=true}]* After C: !([{wordBoundary!=true}]*[{cased}])
BreakIterator wordBoundary = BreakIterator.getWordInstance(locale); wordBoundary.setText(src); int ch; // Look for a preceding 'cased' letter for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i); i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (isCased(ch)) { int len = src.length(); // Check that there is no 'cased' letter after the index for (i = index + Character.charCount(src.codePointAt(index)); (i < len) && !wordBoundary.isBoundary(i); i += Character.charCount(ch)) { ch = src.codePointAt(i); if (isCased(ch)) { return false; } } return true; } } return false;
private static boolean isMoreAbove(java.lang.String src, int index)
Implements the "More_Above" condition Specification: C is followed by one or more characters of combining class 230 (ABOVE) in the combining character sequence. Regular Expression: After C: [{cc!=0}]*[{cc==230}]
int ch; int cc; int len = src.length(); // Look for a following ABOVE combining class character for (int i = index + Character.charCount(src.codePointAt(index)); i < len; i += Character.charCount(ch)) { ch = src.codePointAt(i); cc = Normalizer.getCombiningClass(ch); if (cc == COMBINING_CLASS_ABOVE) { return true; } else if (cc == 0) { return false; } } return false;
private static boolean isSoftDotted(int ch)
switch (ch) { case 0x0069: // Soft_Dotted # L& LATIN SMALL LETTER I case 0x006A: // Soft_Dotted # L& LATIN SMALL LETTER J case 0x012F: // Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK case 0x0268: // Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE case 0x0456: // Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I case 0x0458: // Soft_Dotted # L& CYRILLIC SMALL LETTER JE case 0x1D62: // Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER I case 0x1E2D: // Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW case 0x1ECB: // Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW case 0x2071: // Soft_Dotted # L& SUPERSCRIPT LATIN SMALL LETTER I return true; default: return false; }
private static char[] lookUpTable(java.lang.String src, int index, java.util.Locale locale, boolean bLowerCasing)
HashSet set = (HashSet)entryTable.get(new Integer(src.codePointAt(index))); if (set != null) { Iterator iter = set.iterator(); String currentLang = locale.getLanguage(); while (iter.hasNext()) { Entry entry = (Entry)iter.next(); String conditionLang= entry.getLanguage(); if (((conditionLang == null) || (conditionLang.equals(currentLang))) && isConditionMet(src, index, locale, entry.getCondition())) { return (bLowerCasing ? entry.getLowerCase() : entry.getUpperCase()); } } } return null;
static char[] toLowerCaseCharArray(java.lang.String src, int index, java.util.Locale locale)
return lookUpTable(src, index, locale, true);
static int toLowerCaseEx(java.lang.String src, int index, java.util.Locale locale)
// create hashtable from the entry for (int i = 0; i < entry.length; i ++) { Entry cur = entry[i]; Integer cp = new Integer(cur.getCodePoint()); HashSet set = (HashSet)entryTable.get(cp); if (set == null) { set = new HashSet(); } set.add(cur); entryTable.put(cp, set); } char[] result = lookUpTable(src, index, locale, true); if (result != null) { if (result.length == 1) { return result[0]; } else { return Character.ERROR; } } else { // default to Character class' one return Character.toLowerCase(src.codePointAt(index)); }
static char[] toUpperCaseCharArray(java.lang.String src, int index, java.util.Locale locale)
char[] result = lookUpTable(src, index, locale, false); if (result != null) { return result; } else { return Character.toUpperCaseCharArray(src.codePointAt(index)); }
static int toUpperCaseEx(java.lang.String src, int index, java.util.Locale locale)
char[] result = lookUpTable(src, index, locale, false); if (result != null) { if (result.length == 1) { return result[0]; } else { return Character.ERROR; } } else { // default to Character class' one return Character.toUpperCaseEx(src.codePointAt(index)); }