Fields Summary |
---|
public static final int | MIN_RADIXThe minimum radix available for conversion to and from strings.
The constant value of this field is the smallest value permitted
for the radix argument in radix-conversion methods such as the
digit method, the forDigit
method, and the toString method of class
Integer . |
public static final int | MAX_RADIXThe maximum radix available for conversion to and from strings.
The constant value of this field is the largest value permitted
for the radix argument in radix-conversion methods such as the
digit method, the forDigit
method, and the toString method of class
Integer . |
public static final char | MIN_VALUEThe constant value of this field is the smallest value of type
char , '\u0000' . |
public static final char | MAX_VALUEThe constant value of this field is the largest value of type
char , '\uFFFF' . |
public static final Class | TYPEThe Class instance representing the primitive type
char . |
public static final byte | UNASSIGNEDGeneral category "Cn" in the Unicode specification. |
public static final byte | UPPERCASE_LETTERGeneral category "Lu" in the Unicode specification. |
public static final byte | LOWERCASE_LETTERGeneral category "Ll" in the Unicode specification. |
public static final byte | TITLECASE_LETTERGeneral category "Lt" in the Unicode specification. |
public static final byte | MODIFIER_LETTERGeneral category "Lm" in the Unicode specification. |
public static final byte | OTHER_LETTERGeneral category "Lo" in the Unicode specification. |
public static final byte | NON_SPACING_MARKGeneral category "Mn" in the Unicode specification. |
public static final byte | ENCLOSING_MARKGeneral category "Me" in the Unicode specification. |
public static final byte | COMBINING_SPACING_MARKGeneral category "Mc" in the Unicode specification. |
public static final byte | DECIMAL_DIGIT_NUMBERGeneral category "Nd" in the Unicode specification. |
public static final byte | LETTER_NUMBERGeneral category "Nl" in the Unicode specification. |
public static final byte | OTHER_NUMBERGeneral category "No" in the Unicode specification. |
public static final byte | SPACE_SEPARATORGeneral category "Zs" in the Unicode specification. |
public static final byte | LINE_SEPARATORGeneral category "Zl" in the Unicode specification. |
public static final byte | PARAGRAPH_SEPARATORGeneral category "Zp" in the Unicode specification. |
public static final byte | CONTROLGeneral category "Cc" in the Unicode specification. |
public static final byte | FORMATGeneral category "Cf" in the Unicode specification. |
public static final byte | PRIVATE_USEGeneral category "Co" in the Unicode specification. |
public static final byte | SURROGATEGeneral category "Cs" in the Unicode specification. |
public static final byte | DASH_PUNCTUATIONGeneral category "Pd" in the Unicode specification. |
public static final byte | START_PUNCTUATIONGeneral category "Ps" in the Unicode specification. |
public static final byte | END_PUNCTUATIONGeneral category "Pe" in the Unicode specification. |
public static final byte | CONNECTOR_PUNCTUATIONGeneral category "Pc" in the Unicode specification. |
public static final byte | OTHER_PUNCTUATIONGeneral category "Po" in the Unicode specification. |
public static final byte | MATH_SYMBOLGeneral category "Sm" in the Unicode specification. |
public static final byte | CURRENCY_SYMBOLGeneral category "Sc" in the Unicode specification. |
public static final byte | MODIFIER_SYMBOLGeneral category "Sk" in the Unicode specification. |
public static final byte | OTHER_SYMBOLGeneral category "So" in the Unicode specification. |
public static final byte | INITIAL_QUOTE_PUNCTUATIONGeneral category "Pi" in the Unicode specification. |
public static final byte | FINAL_QUOTE_PUNCTUATIONGeneral category "Pf" in the Unicode specification. |
static final int | ERRORError flag. Use int (code point) to avoid confusion with U+FFFF. |
public static final byte | DIRECTIONALITY_UNDEFINEDUndefined bidirectional character type. Undefined char
values have undefined directionality in the Unicode specification. |
public static final byte | DIRECTIONALITY_LEFT_TO_RIGHTStrong bidirectional character type "L" in the Unicode specification. |
public static final byte | DIRECTIONALITY_RIGHT_TO_LEFTStrong bidirectional character type "R" in the Unicode specification. |
public static final byte | DIRECTIONALITY_RIGHT_TO_LEFT_ARABICStrong bidirectional character type "AL" in the Unicode specification. |
public static final byte | DIRECTIONALITY_EUROPEAN_NUMBERWeak bidirectional character type "EN" in the Unicode specification. |
public static final byte | DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATORWeak bidirectional character type "ES" in the Unicode specification. |
public static final byte | DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATORWeak bidirectional character type "ET" in the Unicode specification. |
public static final byte | DIRECTIONALITY_ARABIC_NUMBERWeak bidirectional character type "AN" in the Unicode specification. |
public static final byte | DIRECTIONALITY_COMMON_NUMBER_SEPARATORWeak bidirectional character type "CS" in the Unicode specification. |
public static final byte | DIRECTIONALITY_NONSPACING_MARKWeak bidirectional character type "NSM" in the Unicode specification. |
public static final byte | DIRECTIONALITY_BOUNDARY_NEUTRALWeak bidirectional character type "BN" in the Unicode specification. |
public static final byte | DIRECTIONALITY_PARAGRAPH_SEPARATORNeutral bidirectional character type "B" in the Unicode specification. |
public static final byte | DIRECTIONALITY_SEGMENT_SEPARATORNeutral bidirectional character type "S" in the Unicode specification. |
public static final byte | DIRECTIONALITY_WHITESPACENeutral bidirectional character type "WS" in the Unicode specification. |
public static final byte | DIRECTIONALITY_OTHER_NEUTRALSNeutral bidirectional character type "ON" in the Unicode specification. |
public static final byte | DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDINGStrong bidirectional character type "LRE" in the Unicode specification. |
public static final byte | DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDEStrong bidirectional character type "LRO" in the Unicode specification. |
public static final byte | DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDINGStrong bidirectional character type "RLE" in the Unicode specification. |
public static final byte | DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDEStrong bidirectional character type "RLO" in the Unicode specification. |
public static final byte | DIRECTIONALITY_POP_DIRECTIONAL_FORMATWeak bidirectional character type "PDF" in the Unicode specification. |
public static final char | MIN_HIGH_SURROGATEThe minimum value of a Unicode high-surrogate code unit in the
UTF-16 encoding. A high-surrogate is also known as a
leading-surrogate. |
public static final char | MAX_HIGH_SURROGATEThe maximum value of a Unicode high-surrogate code unit in the
UTF-16 encoding. A high-surrogate is also known as a
leading-surrogate. |
public static final char | MIN_LOW_SURROGATEThe minimum value of a Unicode low-surrogate code unit in the
UTF-16 encoding. A low-surrogate is also known as a
trailing-surrogate. |
public static final char | MAX_LOW_SURROGATEThe maximum value of a Unicode low-surrogate code unit in the
UTF-16 encoding. A low-surrogate is also known as a
trailing-surrogate. |
public static final char | MIN_SURROGATEThe minimum value of a Unicode surrogate code unit in the UTF-16 encoding. |
public static final char | MAX_SURROGATEThe maximum value of a Unicode surrogate code unit in the UTF-16 encoding. |
public static final int | MIN_SUPPLEMENTARY_CODE_POINTThe minimum value of a supplementary code point. |
public static final int | MIN_CODE_POINTThe minimum value of a Unicode code point. |
public static final int | MAX_CODE_POINTThe maximum value of a Unicode code point. |
private final char | valueThe value of the Character . |
private static final long | serialVersionUIDuse serialVersionUID from JDK 1.0.2 for interoperability |
private static final int | FAST_PATH_MAX |
public static final int | SIZEThe number of bits used to represent a char value in unsigned
binary form. |
Methods Summary |
---|
public static int | charCount(int codePoint)Determines the number of char values needed to
represent the specified character (Unicode code point). If the
specified character is equal to or greater than 0x10000, then
the method returns 2. Otherwise, the method returns 1.
This method doesn't validate the specified character to be a
valid Unicode code point. The caller must validate the
character value using {@link #isValidCodePoint(int) isValidCodePoint}
if necessary.
return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT? 2 : 1;
|
public char | charValue()Returns the value of this Character object.
return value;
|
public static int | codePointAt(java.lang.CharSequence seq, int index)Returns the code point at the given index of the
CharSequence . If the char value at
the given index in the CharSequence is in the
high-surrogate range, the following index is less than the
length of the CharSequence , and the
char value at the following index is in the
low-surrogate range, then the supplementary code point
corresponding to this surrogate pair is returned. Otherwise,
the char value at the given index is returned.
char c1 = seq.charAt(index++);
if (isHighSurrogate(c1)) {
if (index < seq.length()) {
char c2 = seq.charAt(index);
if (isLowSurrogate(c2)) {
return toCodePoint(c1, c2);
}
}
}
return c1;
|
public static int | codePointAt(char[] a, int index)Returns the code point at the given index of the
char array. If the char value at
the given index in the char array is in the
high-surrogate range, the following index is less than the
length of the char array, and the
char value at the following index is in the
low-surrogate range, then the supplementary code point
corresponding to this surrogate pair is returned. Otherwise,
the char value at the given index is returned.
return codePointAtImpl(a, index, a.length);
|
public static int | codePointAt(char[] a, int index, int limit)Returns the code point at the given index of the
char array, where only array elements with
index less than limit can be used. If
the char value at the given index in the
char array is in the high-surrogate range, the
following index is less than the limit , and the
char value at the following index is in the
low-surrogate range, then the supplementary code point
corresponding to this surrogate pair is returned. Otherwise,
the char value at the given index is returned.
if (index >= limit || limit < 0 || limit > a.length) {
throw new IndexOutOfBoundsException();
}
return codePointAtImpl(a, index, limit);
|
static int | codePointAtImpl(char[] a, int index, int limit)
char c1 = a[index++];
if (isHighSurrogate(c1)) {
if (index < limit) {
char c2 = a[index];
if (isLowSurrogate(c2)) {
return toCodePoint(c1, c2);
}
}
}
return c1;
|
public static int | codePointBefore(java.lang.CharSequence seq, int index)Returns the code point preceding the given index of the
CharSequence . If the char value at
(index - 1) in the CharSequence is in
the low-surrogate range, (index - 2) is not
negative, and the char value at (index -
2) in the CharSequence is in the
high-surrogate range, then the supplementary code point
corresponding to this surrogate pair is returned. Otherwise,
the char value at (index - 1) is
returned.
char c2 = seq.charAt(--index);
if (isLowSurrogate(c2)) {
if (index > 0) {
char c1 = seq.charAt(--index);
if (isHighSurrogate(c1)) {
return toCodePoint(c1, c2);
}
}
}
return c2;
|
public static int | codePointBefore(char[] a, int index)Returns the code point preceding the given index of the
char array. If the char value at
(index - 1) in the char array is in
the low-surrogate range, (index - 2) is not
negative, and the char value at (index -
2) in the char array is in the
high-surrogate range, then the supplementary code point
corresponding to this surrogate pair is returned. Otherwise,
the char value at (index - 1) is
returned.
return codePointBeforeImpl(a, index, 0);
|
public static int | codePointBefore(char[] a, int index, int start)Returns the code point preceding the given index of the
char array, where only array elements with
index greater than or equal to start
can be used. If the char value at (index -
1) in the char array is in the
low-surrogate range, (index - 2) is not less than
start , and the char value at
(index - 2) in the char array is in
the high-surrogate range, then the supplementary code point
corresponding to this surrogate pair is returned. Otherwise,
the char value at (index - 1) is
returned.
if (index <= start || start < 0 || start >= a.length) {
throw new IndexOutOfBoundsException();
}
return codePointBeforeImpl(a, index, start);
|
static int | codePointBeforeImpl(char[] a, int index, int start)
char c2 = a[--index];
if (isLowSurrogate(c2)) {
if (index > start) {
char c1 = a[--index];
if (isHighSurrogate(c1)) {
return toCodePoint(c1, c2);
}
}
}
return c2;
|
public static int | codePointCount(java.lang.CharSequence seq, int beginIndex, int endIndex)Returns the number of Unicode code points in the text range of
the specified char sequence. The text range begins at the
specified beginIndex and extends to the
char at index endIndex - 1 . Thus the
length (in char s) of the text range is
endIndex-beginIndex . Unpaired surrogates within
the text range count as one code point each.
int length = seq.length();
if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
throw new IndexOutOfBoundsException();
}
int n = 0;
for (int i = beginIndex; i < endIndex; ) {
n++;
if (isHighSurrogate(seq.charAt(i++))) {
if (i < endIndex && isLowSurrogate(seq.charAt(i))) {
i++;
}
}
}
return n;
|
public static int | codePointCount(char[] a, int offset, int count)Returns the number of Unicode code points in a subarray of the
char array argument. The offset
argument is the index of the first char of the
subarray and the count argument specifies the
length of the subarray in char s. Unpaired
surrogates within the subarray count as one code point each.
if (count > a.length - offset || offset < 0 || count < 0) {
throw new IndexOutOfBoundsException();
}
return codePointCountImpl(a, offset, count);
|
static int | codePointCountImpl(char[] a, int offset, int count)
int endIndex = offset + count;
int n = 0;
for (int i = offset; i < endIndex; ) {
n++;
if (isHighSurrogate(a[i++])) {
if (i < endIndex && isLowSurrogate(a[i])) {
i++;
}
}
}
return n;
|
public int | compareTo(java.lang.Character anotherCharacter)Compares two Character objects numerically.
return this.value - anotherCharacter.value;
|
public static int | digit(char ch, int radix)Returns the numeric value of the character ch in the
specified radix.
If the radix is not in the range MIN_RADIX <=
radix <= MAX_RADIX or if the
value of ch is not a valid digit in the specified
radix, -1 is returned. A character is a valid digit
if at least one of the following is true:
- The method
isDigit is true of the character
and the Unicode decimal digit value of the character (or its
single-character decomposition) is less than the specified radix.
In this case the decimal digit value is returned.
- The character is one of the uppercase Latin letters
'A' through 'Z' and its code is less than
radix + 'A' - 10 .
In this case, ch - 'A' + 10
is returned.
- The character is one of the lowercase Latin letters
'a' through 'z' and its code is less than
radix + 'a' - 10 .
In this case, ch - 'a' + 10
is returned.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #digit(int, int)} method.
return digit((int)ch, radix);
|
public static int | digit(int codePoint, int radix)Returns the numeric value of the specified character (Unicode
code point) in the specified radix.
If the radix is not in the range MIN_RADIX <=
radix <= MAX_RADIX or if the
character is not a valid digit in the specified
radix, -1 is returned. A character is a valid digit
if at least one of the following is true:
- The method {@link #isDigit(int) isDigit(codePoint)} is
true of the character
and the Unicode decimal digit value of the character (or its
single-character decomposition) is less than the specified radix.
In this case the decimal digit value is returned.
- The character is one of the uppercase Latin letters
'A' through 'Z' and its code is less than
radix + 'A' - 10 .
In this case, ch - 'A' + 10
is returned.
- The character is one of the lowercase Latin letters
'a' through 'z' and its code is less than
radix + 'a' - 10 .
In this case, ch - 'a' + 10
is returned.
int digit = -1;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
digit = CharacterDataLatin1.digit(codePoint, radix);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
digit = CharacterData00.digit(codePoint, radix);
break;
case(1):
digit = CharacterData01.digit(codePoint, radix);
break;
case(2):
digit = CharacterData02.digit(codePoint, radix);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
digit = CharacterDataUndefined.digit(codePoint, radix);
break;
case(14):
digit = CharacterData0E.digit(codePoint, radix);
break;
case(15): // Private Use
case(16): // Private Use
digit = CharacterDataPrivateUse.digit(codePoint, radix);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// digit remains -1;
break;
}
}
return digit;
|
public boolean | equals(java.lang.Object obj)Compares this object against the specified object.
The result is true if and only if the argument is not
null and is a Character object that
represents the same char value as this object.
if (obj instanceof Character) {
return value == ((Character)obj).charValue();
}
return false;
|
public static char | forDigit(int digit, int radix)Determines the character representation for a specific digit in
the specified radix. If the value of radix is not a
valid radix, or the value of digit is not a valid
digit in the specified radix, the null character
('\u0000' ) is returned.
The radix argument is valid if it is greater than or
equal to MIN_RADIX and less than or equal to
MAX_RADIX . The digit argument is valid if
0 <=digit < radix .
If the digit is less than 10, then
'0' + digit is returned. Otherwise, the value
'a' + digit - 10 is returned.
if ((digit >= radix) || (digit < 0)) {
return '\0";
}
if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
return '\0";
}
if (digit < 10) {
return (char)('0" + digit);
}
return (char)('a" - 10 + digit);
|
public static byte | getDirectionality(char ch)Returns the Unicode directionality property for the given
character. Character directionality is used to calculate the
visual ordering of text. The directionality value of undefined
char values is DIRECTIONALITY_UNDEFINED .
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #getDirectionality(int)} method.
return getDirectionality((int)ch);
|
public static byte | getDirectionality(int codePoint)Returns the Unicode directionality property for the given
character (Unicode code point). Character directionality is
used to calculate the visual ordering of text. The
directionality value of undefined character is {@link
#DIRECTIONALITY_UNDEFINED}.
byte directionality = Character.DIRECTIONALITY_UNDEFINED;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
directionality = CharacterDataLatin1.getDirectionality(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
directionality = CharacterData00.getDirectionality(codePoint);
break;
case(1):
directionality = CharacterData01.getDirectionality(codePoint);
break;
case(2):
directionality = CharacterData02.getDirectionality(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
directionality = CharacterDataUndefined.getDirectionality(codePoint);
break;
case(14):
directionality = CharacterData0E.getDirectionality(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
directionality = CharacterDataPrivateUse.getDirectionality(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// directionality remains DIRECTIONALITY_UNDEFINED
break;
}
}
return directionality;
|
public static int | getNumericValue(char ch)Returns the int value that the specified Unicode
character represents. For example, the character
'\u216C' (the roman numeral fifty) will return
an int with a value of 50.
The letters A-Z in their uppercase ('\u0041' through
'\u005A' ), lowercase
('\u0061' through '\u007A' ), and
full width variant ('\uFF21' through
'\uFF3A' and '\uFF41' through
'\uFF5A' ) forms have numeric values from 10
through 35. This is independent of the Unicode specification,
which does not assign numeric values to these char
values.
If the character does not have a numeric value, then -1 is returned.
If the character has a numeric value that cannot be represented as a
nonnegative integer (for example, a fractional value), then -2
is returned.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #getNumericValue(int)} method.
return getNumericValue((int)ch);
|
public static int | getNumericValue(int codePoint)Returns the int value that the specified
character (Unicode code point) represents. For example, the character
'\u216C' (the Roman numeral fifty) will return
an int with a value of 50.
The letters A-Z in their uppercase ('\u0041' through
'\u005A' ), lowercase
('\u0061' through '\u007A' ), and
full width variant ('\uFF21' through
'\uFF3A' and '\uFF41' through
'\uFF5A' ) forms have numeric values from 10
through 35. This is independent of the Unicode specification,
which does not assign numeric values to these char
values.
If the character does not have a numeric value, then -1 is returned.
If the character has a numeric value that cannot be represented as a
nonnegative integer (for example, a fractional value), then -2
is returned.
int numericValue = -1;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
numericValue = CharacterDataLatin1.getNumericValue(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
numericValue = CharacterData00.getNumericValue(codePoint);
break;
case(1):
numericValue = CharacterData01.getNumericValue(codePoint);
break;
case(2):
numericValue = CharacterData02.getNumericValue(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
numericValue = CharacterDataUndefined.getNumericValue(codePoint);
break;
case(14):
numericValue = CharacterData0E.getNumericValue(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
numericValue = CharacterDataPrivateUse.getNumericValue(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// numericValue remains -1
break;
}
}
return numericValue;
|
private static int | getPlane(int ch)Provide the character plane to which this codepoint belongs.
return (ch >>> 16);
|
public static int | getType(char ch)Returns a value indicating a character's general category.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #getType(int)} method.
return getType((int)ch);
|
public static int | getType(int codePoint)Returns a value indicating a character's general category.
int type = Character.UNASSIGNED;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
type = CharacterDataLatin1.getType(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
type = CharacterData00.getType(codePoint);
break;
case(1):
type = CharacterData01.getType(codePoint);
break;
case(2):
type = CharacterData02.getType(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
type = CharacterDataUndefined.getType(codePoint);
break;
case(14):
type = CharacterData0E.getType(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
type = CharacterDataPrivateUse.getType(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// type remains UNASSIGNED
break;
}
}
return type;
|
public int | hashCode()Returns a hash code for this Character .
return (int)value;
|
public static boolean | isDefined(char ch)Determines if a character is defined in Unicode.
A character is defined if at least one of the following is true:
- It has an entry in the UnicodeData file.
- It has a value in a range defined by the UnicodeData file.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isDefined(int)} method.
return isDefined((int)ch);
|
public static boolean | isDefined(int codePoint)Determines if a character (Unicode code point) is defined in Unicode.
A character is defined if at least one of the following is true:
- It has an entry in the UnicodeData file.
- It has a value in a range defined by the UnicodeData file.
boolean bDefined = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bDefined = CharacterDataLatin1.isDefined(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bDefined = CharacterData00.isDefined(codePoint);
break;
case(1):
bDefined = CharacterData01.isDefined(codePoint);
break;
case(2):
bDefined = CharacterData02.isDefined(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bDefined = CharacterDataUndefined.isDefined(codePoint);
break;
case(14):
bDefined = CharacterData0E.isDefined(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bDefined = CharacterDataPrivateUse.isDefined(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bDefined remains false;
break;
}
}
return bDefined;
|
public static boolean | isDigit(char ch)Determines if the specified character is a digit.
A character is a digit if its general category type, provided
by Character.getType(ch) , is
DECIMAL_DIGIT_NUMBER .
Some Unicode character ranges that contain digits:
'\u0030' through '\u0039' ,
ISO-LATIN-1 digits ('0' through '9' )
'\u0660' through '\u0669' ,
Arabic-Indic digits
'\u06F0' through '\u06F9' ,
Extended Arabic-Indic digits
'\u0966' through '\u096F' ,
Devanagari digits
'\uFF10' through '\uFF19' ,
Fullwidth digits
Many other character ranges contain digits as well.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isDigit(int)} method.
return isDigit((int)ch);
|
public static boolean | isDigit(int codePoint)Determines if the specified character (Unicode code point) is a digit.
A character is a digit if its general category type, provided
by {@link Character#getType(int) getType(codePoint)}, is
DECIMAL_DIGIT_NUMBER .
Some Unicode character ranges that contain digits:
'\u0030' through '\u0039' ,
ISO-LATIN-1 digits ('0' through '9' )
'\u0660' through '\u0669' ,
Arabic-Indic digits
'\u06F0' through '\u06F9' ,
Extended Arabic-Indic digits
'\u0966' through '\u096F' ,
Devanagari digits
'\uFF10' through '\uFF19' ,
Fullwidth digits
Many other character ranges contain digits as well.
boolean bDigit = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bDigit = CharacterDataLatin1.isDigit(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bDigit = CharacterData00.isDigit(codePoint);
break;
case(1):
bDigit = CharacterData01.isDigit(codePoint);
break;
case(2):
bDigit = CharacterData02.isDigit(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bDigit = CharacterDataUndefined.isDigit(codePoint);
break;
case(14):
bDigit = CharacterData0E.isDigit(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bDigit = CharacterDataPrivateUse.isDigit(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bDigit remains false;
break;
}
}
return bDigit;
|
public static boolean | isHighSurrogate(char ch)Determines if the given char value is a
high-surrogate code unit (also known as leading-surrogate
code unit). Such values do not represent characters by
themselves, but are used in the representation of supplementary characters in the
UTF-16 encoding.
This method returns true if and only if
ch >= '\uD800' && ch <= '\uDBFF'
is true .
return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
|
public static boolean | isISOControl(char ch)Determines if the specified character is an ISO control
character. A character is considered to be an ISO control
character if its code is in the range '\u0000'
through '\u001F' or in the range
'\u007F' through '\u009F' .
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isISOControl(int)} method.
return isISOControl((int)ch);
|
public static boolean | isISOControl(int codePoint)Determines if the referenced character (Unicode code point) is an ISO control
character. A character is considered to be an ISO control
character if its code is in the range '\u0000'
through '\u001F' or in the range
'\u007F' through '\u009F' .
return (codePoint >= 0x0000 && codePoint <= 0x001F) ||
(codePoint >= 0x007F && codePoint <= 0x009F);
|
public static boolean | isIdentifierIgnorable(char ch)Determines if the specified character should be regarded as
an ignorable character in a Java identifier or a Unicode identifier.
The following Unicode characters are ignorable in a Java identifier
or a Unicode identifier:
- ISO control characters that are not whitespace
'\u0000' through '\u0008'
'\u000E' through '\u001B'
'\u007F' through '\u009F'
- all characters that have the
FORMAT general
category value
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isIdentifierIgnorable(int)} method.
return isIdentifierIgnorable((int)ch);
|
public static boolean | isIdentifierIgnorable(int codePoint)Determines if the specified character (Unicode code point) should be regarded as
an ignorable character in a Java identifier or a Unicode identifier.
The following Unicode characters are ignorable in a Java identifier
or a Unicode identifier:
- ISO control characters that are not whitespace
'\u0000' through '\u0008'
'\u000E' through '\u001B'
'\u007F' through '\u009F'
- all characters that have the
FORMAT general
category value
boolean bIdentifierIgnorable = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bIdentifierIgnorable = CharacterDataLatin1.isIdentifierIgnorable(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bIdentifierIgnorable = CharacterData00.isIdentifierIgnorable(codePoint);
break;
case(1):
bIdentifierIgnorable = CharacterData01.isIdentifierIgnorable(codePoint);
break;
case(2):
bIdentifierIgnorable = CharacterData02.isIdentifierIgnorable(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bIdentifierIgnorable = CharacterDataUndefined.isIdentifierIgnorable(codePoint);
break;
case(14):
bIdentifierIgnorable = CharacterData0E.isIdentifierIgnorable(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bIdentifierIgnorable = CharacterDataPrivateUse.isIdentifierIgnorable(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bIdentifierIgnorable remains false;
break;
}
}
return bIdentifierIgnorable;
|
public static boolean | isJavaIdentifierPart(char ch)Determines if the specified character may be part of a Java
identifier as other than the first character.
A character may be part of a Java identifier if any of the following
are true:
- it is a letter
- it is a currency symbol (such as
'$' )
- it is a connecting punctuation character (such as
'_' )
- it is a digit
- it is a numeric letter (such as a Roman numeral character)
- it is a combining mark
- it is a non-spacing mark
-
isIdentifierIgnorable returns
true for the character
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isJavaIdentifierPart(int)} method.
return isJavaIdentifierPart((int)ch);
|
public static boolean | isJavaIdentifierPart(int codePoint)Determines if the character (Unicode code point) may be part of a Java
identifier as other than the first character.
A character may be part of a Java identifier if any of the following
are true:
- it is a letter
- it is a currency symbol (such as
'$' )
- it is a connecting punctuation character (such as
'_' )
- it is a digit
- it is a numeric letter (such as a Roman numeral character)
- it is a combining mark
- it is a non-spacing mark
- {@link #isIdentifierIgnorable(int)
isIdentifierIgnorable(codePoint)} returns
true for
the character
boolean bJavaPart = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bJavaPart = CharacterDataLatin1.isJavaIdentifierPart(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bJavaPart = CharacterData00.isJavaIdentifierPart(codePoint);
break;
case(1):
bJavaPart = CharacterData01.isJavaIdentifierPart(codePoint);
break;
case(2):
bJavaPart = CharacterData02.isJavaIdentifierPart(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bJavaPart = CharacterDataUndefined.isJavaIdentifierPart(codePoint);
break;
case(14):
bJavaPart = CharacterData0E.isJavaIdentifierPart(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bJavaPart = CharacterDataPrivateUse.isJavaIdentifierPart(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bJavaPart remains false;
break;
}
}
return bJavaPart;
|
public static boolean | isJavaIdentifierStart(char ch)Determines if the specified character is
permissible as the first character in a Java identifier.
A character may start a Java identifier if and only if
one of the following conditions is true:
- {@link #isLetter(char) isLetter(ch)} returns
true
- {@link #getType(char) getType(ch)} returns
LETTER_NUMBER
- ch is a currency symbol (such as "$")
- ch is a connecting punctuation character (such as "_").
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isJavaIdentifierStart(int)} method.
return isJavaIdentifierStart((int)ch);
|
public static boolean | isJavaIdentifierStart(int codePoint)Determines if the character (Unicode code point) is
permissible as the first character in a Java identifier.
A character may start a Java identifier if and only if
one of the following conditions is true:
- {@link #isLetter(int) isLetter(codePoint)}
returns
true
- {@link #getType(int) getType(codePoint)}
returns
LETTER_NUMBER
- the referenced character is a currency symbol (such as "$")
- the referenced character is a connecting punctuation character
(such as "_").
boolean bJavaStart = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bJavaStart = CharacterDataLatin1.isJavaIdentifierStart(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bJavaStart = CharacterData00.isJavaIdentifierStart(codePoint);
break;
case(1):
bJavaStart = CharacterData01.isJavaIdentifierStart(codePoint);
break;
case(2):
bJavaStart = CharacterData02.isJavaIdentifierStart(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bJavaStart = CharacterDataUndefined.isJavaIdentifierStart(codePoint);
break;
case(14):
bJavaStart = CharacterData0E.isJavaIdentifierStart(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bJavaStart = CharacterDataPrivateUse.isJavaIdentifierStart(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bJavaStart remains false;
break;
}
}
return bJavaStart;
|
public static boolean | isJavaLetter(char ch)Determines if the specified character is permissible as the first
character in a Java identifier.
A character may start a Java identifier if and only if
one of the following is true:
- {@link #isLetter(char) isLetter(ch)} returns
true
- {@link #getType(char) getType(ch)} returns
LETTER_NUMBER
- ch is a currency symbol (such as "$")
- ch is a connecting punctuation character (such as "_").
return isJavaIdentifierStart(ch);
|
public static boolean | isJavaLetterOrDigit(char ch)Determines if the specified character may be part of a Java
identifier as other than the first character.
A character may be part of a Java identifier if and only if any
of the following are true:
- it is a letter
- it is a currency symbol (such as
'$' )
- it is a connecting punctuation character (such as
'_' )
- it is a digit
- it is a numeric letter (such as a Roman numeral character)
- it is a combining mark
- it is a non-spacing mark
-
isIdentifierIgnorable returns
true for the character.
return isJavaIdentifierPart(ch);
|
public static boolean | isLetter(char ch)Determines if the specified character is a letter.
A character is considered to be a letter if its general
category type, provided by Character.getType(ch) ,
is any of the following:
-
UPPERCASE_LETTER
-
LOWERCASE_LETTER
-
TITLECASE_LETTER
-
MODIFIER_LETTER
-
OTHER_LETTER
Not all letters have case. Many characters are
letters but are neither uppercase nor lowercase nor titlecase.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isLetter(int)} method.
return isLetter((int)ch);
|
public static boolean | isLetter(int codePoint)Determines if the specified character (Unicode code point) is a letter.
A character is considered to be a letter if its general
category type, provided by {@link Character#getType(int) getType(codePoint)},
is any of the following:
-
UPPERCASE_LETTER
-
LOWERCASE_LETTER
-
TITLECASE_LETTER
-
MODIFIER_LETTER
-
OTHER_LETTER
Not all letters have case. Many characters are
letters but are neither uppercase nor lowercase nor titlecase.
boolean bLetter = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bLetter = CharacterDataLatin1.isLetter(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bLetter = CharacterData00.isLetter(codePoint);
break;
case(1):
bLetter = CharacterData01.isLetter(codePoint);
break;
case(2):
bLetter = CharacterData02.isLetter(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bLetter = CharacterDataUndefined.isLetter(codePoint);
break;
case(14):
bLetter = CharacterData0E.isLetter(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bLetter = CharacterDataPrivateUse.isLetter(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bLetter remains false;
break;
}
}
return bLetter;
|
public static boolean | isLetterOrDigit(char ch)Determines if the specified character is a letter or digit.
A character is considered to be a letter or digit if either
Character.isLetter(char ch) or
Character.isDigit(char ch) returns
true for the character.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isLetterOrDigit(int)} method.
return isLetterOrDigit((int)ch);
|
public static boolean | isLetterOrDigit(int codePoint)Determines if the specified character (Unicode code point) is a letter or digit.
A character is considered to be a letter or digit if either
{@link #isLetter(int) isLetter(codePoint)} or
{@link #isDigit(int) isDigit(codePoint)} returns
true for the character.
boolean bLetterOrDigit = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bLetterOrDigit = CharacterDataLatin1.isLetterOrDigit(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bLetterOrDigit = CharacterData00.isLetterOrDigit(codePoint);
break;
case(1):
bLetterOrDigit = CharacterData01.isLetterOrDigit(codePoint);
break;
case(2):
bLetterOrDigit = CharacterData02.isLetterOrDigit(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bLetterOrDigit = CharacterDataUndefined.isLetterOrDigit(codePoint);
break;
case(14): // Undefined
bLetterOrDigit = CharacterData0E.isLetterOrDigit(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bLetterOrDigit = CharacterDataPrivateUse.isLetterOrDigit(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bLetterOrDigit remains false;
break;
}
}
return bLetterOrDigit;
|
public static boolean | isLowSurrogate(char ch)Determines if the given char value is a
low-surrogate code unit (also known as trailing-surrogate code
unit). Such values do not represent characters by themselves,
but are used in the representation of supplementary characters in the UTF-16 encoding.
This method returns true if and only if
ch >= '\uDC00' && ch <= '\uDFFF'
is true .
return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
|
public static boolean | isLowerCase(char ch)Determines if the specified character is a lowercase character.
A character is lowercase if its general category type, provided
by Character.getType(ch) , is
LOWERCASE_LETTER .
The following are examples of lowercase characters:
a b c d e f g h i j k l m n o p q r s t u v w x y z
'\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
'\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
'\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
'\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
Many other Unicode characters are lowercase too.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isLowerCase(int)} method.
return isLowerCase((int)ch);
|
public static boolean | isLowerCase(int codePoint)Determines if the specified character (Unicode code point) is a
lowercase character.
A character is lowercase if its general category type, provided
by {@link Character#getType getType(codePoint)}, is
LOWERCASE_LETTER .
The following are examples of lowercase characters:
a b c d e f g h i j k l m n o p q r s t u v w x y z
'\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
'\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
'\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
'\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
Many other Unicode characters are lowercase too.
boolean bLowerCase = false;
// codePoint must be in the valid range of codepoints
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bLowerCase = CharacterDataLatin1.isLowerCase(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bLowerCase = CharacterData00.isLowerCase(codePoint);
break;
case(1):
bLowerCase = CharacterData01.isLowerCase(codePoint);
break;
case(2):
bLowerCase = CharacterData02.isLowerCase(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bLowerCase = CharacterDataUndefined.isLowerCase(codePoint);
break;
case(14):
bLowerCase = CharacterData0E.isLowerCase(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bLowerCase = CharacterDataPrivateUse.isLowerCase(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bLowerCase remains false
break;
}
}
return bLowerCase;
|
public static boolean | isMirrored(char ch)Determines whether the character is mirrored according to the
Unicode specification. Mirrored characters should have their
glyphs horizontally mirrored when displayed in text that is
right-to-left. For example, '\u0028' LEFT
PARENTHESIS is semantically defined to be an opening
parenthesis. This will appear as a "(" in text that is
left-to-right but as a ")" in text that is right-to-left.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isMirrored(int)} method.
return isMirrored((int)ch);
|
public static boolean | isMirrored(int codePoint)Determines whether the specified character (Unicode code point)
is mirrored according to the Unicode specification. Mirrored
characters should have their glyphs horizontally mirrored when
displayed in text that is right-to-left. For example,
'\u0028' LEFT PARENTHESIS is semantically
defined to be an opening parenthesis. This will appear
as a "(" in text that is left-to-right but as a ")" in text
that is right-to-left.
boolean bMirrored = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bMirrored = CharacterDataLatin1.isMirrored(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bMirrored = CharacterData00.isMirrored(codePoint);
break;
case(1):
bMirrored = CharacterData01.isMirrored(codePoint);
break;
case(2):
bMirrored = CharacterData02.isMirrored(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bMirrored = CharacterDataUndefined.isMirrored(codePoint);
break;
case(14):
bMirrored = CharacterData0E.isMirrored(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bMirrored = CharacterDataPrivateUse.isMirrored(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bMirrored remains false
break;
}
}
return bMirrored;
|
public static boolean | isSpace(char ch)Determines if the specified character is ISO-LATIN-1 white space.
This method returns true for the following five
characters only:
'\t' | '\u0009' |
HORIZONTAL TABULATION |
'\n' | '\u000A' |
NEW LINE |
'\f' | '\u000C' |
FORM FEED |
'\r' | '\u000D' |
CARRIAGE RETURN |
' ' | '\u0020' |
SPACE |
return (ch <= 0x0020) &&
(((((1L << 0x0009) |
(1L << 0x000A) |
(1L << 0x000C) |
(1L << 0x000D) |
(1L << 0x0020)) >> ch) & 1L) != 0);
|
public static boolean | isSpaceChar(char ch)Determines if the specified character is a Unicode space character.
A character is considered to be a space character if and only if
it is specified to be a space character by the Unicode standard. This
method returns true if the character's general category type is any of
the following:
-
SPACE_SEPARATOR
-
LINE_SEPARATOR
-
PARAGRAPH_SEPARATOR
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isSpaceChar(int)} method.
return isSpaceChar((int)ch);
|
public static boolean | isSpaceChar(int codePoint)Determines if the specified character (Unicode code point) is a
Unicode space character. A character is considered to be a
space character if and only if it is specified to be a space
character by the Unicode standard. This method returns true if
the character's general category type is any of the following:
- {@link #SPACE_SEPARATOR}
- {@link #LINE_SEPARATOR}
- {@link #PARAGRAPH_SEPARATOR}
boolean bSpaceChar = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bSpaceChar = CharacterDataLatin1.isSpaceChar(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bSpaceChar = CharacterData00.isSpaceChar(codePoint);
break;
case(1):
bSpaceChar = CharacterData01.isSpaceChar(codePoint);
break;
case(2):
bSpaceChar = CharacterData02.isSpaceChar(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bSpaceChar = CharacterDataUndefined.isSpaceChar(codePoint);
break;
case(14):
bSpaceChar = CharacterData0E.isSpaceChar(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bSpaceChar = CharacterDataPrivateUse.isSpaceChar(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bSpaceChar remains false
break;
}
}
return bSpaceChar;
|
public static boolean | isSupplementaryCodePoint(int codePoint)Determines whether the specified character (Unicode code point)
is in the supplementary character range. The method call is
equivalent to the expression:
codePoint >= 0x10000 && codePoint <= 0x10FFFF
return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
&& codePoint <= MAX_CODE_POINT;
|
public static boolean | isSurrogatePair(char high, char low)Determines whether the specified pair of char
values is a valid surrogate pair. This method is equivalent to
the expression:
isHighSurrogate(high) && isLowSurrogate(low)
return isHighSurrogate(high) && isLowSurrogate(low);
|
public static boolean | isTitleCase(char ch)Determines if the specified character is a titlecase character.
A character is a titlecase character if its general
category type, provided by Character.getType(ch) ,
is TITLECASE_LETTER .
Some characters look like pairs of Latin letters. For example, there
is an uppercase letter that looks like "LJ" and has a corresponding
lowercase letter that looks like "lj". A third form, which looks like "Lj",
is the appropriate form to use when rendering a word in lowercase
with initial capitals, as for a book title.
These are some of the Unicode characters for which this method returns
true :
LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
LATIN CAPITAL LETTER L WITH SMALL LETTER J
LATIN CAPITAL LETTER N WITH SMALL LETTER J
LATIN CAPITAL LETTER D WITH SMALL LETTER Z
Many other Unicode characters are titlecase too.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isTitleCase(int)} method.
return isTitleCase((int)ch);
|
public static boolean | isTitleCase(int codePoint)Determines if the specified character (Unicode code point) is a titlecase character.
A character is a titlecase character if its general
category type, provided by {@link Character#getType(int) getType(codePoint)},
is TITLECASE_LETTER .
Some characters look like pairs of Latin letters. For example, there
is an uppercase letter that looks like "LJ" and has a corresponding
lowercase letter that looks like "lj". A third form, which looks like "Lj",
is the appropriate form to use when rendering a word in lowercase
with initial capitals, as for a book title.
These are some of the Unicode characters for which this method returns
true :
LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
LATIN CAPITAL LETTER L WITH SMALL LETTER J
LATIN CAPITAL LETTER N WITH SMALL LETTER J
LATIN CAPITAL LETTER D WITH SMALL LETTER Z
Many other Unicode characters are titlecase too.
boolean bTitleCase = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bTitleCase = CharacterDataLatin1.isTitleCase(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bTitleCase = CharacterData00.isTitleCase(codePoint);
break;
case(1):
bTitleCase = CharacterData01.isTitleCase(codePoint);
break;
case(2):
bTitleCase = CharacterData02.isTitleCase(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bTitleCase = CharacterDataUndefined.isTitleCase(codePoint);
break;
case(14):
bTitleCase = CharacterData0E.isTitleCase(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bTitleCase = CharacterDataPrivateUse.isTitleCase(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bTitleCase remains false;
break;
}
}
return bTitleCase;
|
public static boolean | isUnicodeIdentifierPart(char ch)Determines if the specified character may be part of a Unicode
identifier as other than the first character.
A character may be part of a Unicode identifier if and only if
one of the following statements is true:
- it is a letter
- it is a connecting punctuation character (such as
'_' )
- it is a digit
- it is a numeric letter (such as a Roman numeral character)
- it is a combining mark
- it is a non-spacing mark
-
isIdentifierIgnorable returns
true for this character.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isUnicodeIdentifierPart(int)} method.
return isUnicodeIdentifierPart((int)ch);
|
public static boolean | isUnicodeIdentifierPart(int codePoint)Determines if the specified character (Unicode code point) may be part of a Unicode
identifier as other than the first character.
A character may be part of a Unicode identifier if and only if
one of the following statements is true:
- it is a letter
- it is a connecting punctuation character (such as
'_' )
- it is a digit
- it is a numeric letter (such as a Roman numeral character)
- it is a combining mark
- it is a non-spacing mark
-
isIdentifierIgnorable returns
true for this character.
boolean bUnicodePart = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bUnicodePart = CharacterDataLatin1.isUnicodeIdentifierPart(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bUnicodePart = CharacterData00.isUnicodeIdentifierPart(codePoint);
break;
case(1):
bUnicodePart = CharacterData01.isUnicodeIdentifierPart(codePoint);
break;
case(2):
bUnicodePart = CharacterData02.isUnicodeIdentifierPart(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bUnicodePart = CharacterDataUndefined.isUnicodeIdentifierPart(codePoint);
break;
case(14):
bUnicodePart = CharacterData0E.isUnicodeIdentifierPart(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bUnicodePart = CharacterDataPrivateUse.isUnicodeIdentifierPart(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
//bUnicodePart remains false;
break;
}
}
return bUnicodePart;
|
public static boolean | isUnicodeIdentifierStart(char ch)Determines if the specified character is permissible as the
first character in a Unicode identifier.
A character may start a Unicode identifier if and only if
one of the following conditions is true:
- {@link #isLetter(char) isLetter(ch)} returns
true
- {@link #getType(char) getType(ch)} returns
LETTER_NUMBER .
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isUnicodeIdentifierStart(int)} method.
return isUnicodeIdentifierStart((int)ch);
|
public static boolean | isUnicodeIdentifierStart(int codePoint)Determines if the specified character (Unicode code point) is permissible as the
first character in a Unicode identifier.
A character may start a Unicode identifier if and only if
one of the following conditions is true:
- {@link #isLetter(int) isLetter(codePoint)}
returns
true
- {@link #getType(int) getType(codePoint)}
returns
LETTER_NUMBER .
boolean bUnicodeStart = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bUnicodeStart = CharacterDataLatin1.isUnicodeIdentifierStart(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bUnicodeStart = CharacterData00.isUnicodeIdentifierStart(codePoint);
break;
case(1):
bUnicodeStart = CharacterData01.isUnicodeIdentifierStart(codePoint);
break;
case(2):
bUnicodeStart = CharacterData02.isUnicodeIdentifierStart(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bUnicodeStart = CharacterDataUndefined.isUnicodeIdentifierStart(codePoint);
break;
case(14):
bUnicodeStart = CharacterData0E.isUnicodeIdentifierStart(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bUnicodeStart = CharacterDataPrivateUse.isUnicodeIdentifierStart(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bUnicodeStart remains false;
break;
}
}
return bUnicodeStart;
|
public static boolean | isUpperCase(char ch)Determines if the specified character is an uppercase character.
A character is uppercase if its general category type, provided by
Character.getType(ch) , is UPPERCASE_LETTER .
The following are examples of uppercase characters:
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
'\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
'\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
'\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
'\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
Many other Unicode characters are uppercase too.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isUpperCase(int)} method.
return isUpperCase((int)ch);
|
public static boolean | isUpperCase(int codePoint)Determines if the specified character (Unicode code point) is an uppercase character.
A character is uppercase if its general category type, provided by
{@link Character#getType(int) getType(codePoint)}, is UPPERCASE_LETTER .
The following are examples of uppercase characters:
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
'\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
'\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
'\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
'\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
Many other Unicode characters are uppercase too.
boolean bUpperCase = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bUpperCase = CharacterDataLatin1.isUpperCase(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bUpperCase = CharacterData00.isUpperCase(codePoint);
break;
case(1):
bUpperCase = CharacterData01.isUpperCase(codePoint);
break;
case(2):
bUpperCase = CharacterData02.isUpperCase(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bUpperCase = CharacterDataUndefined.isUpperCase(codePoint);
break;
case(14):
bUpperCase = CharacterData0E.isUpperCase(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bUpperCase = CharacterDataPrivateUse.isUpperCase(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bUpperCase remains false;
break;
}
}
return bUpperCase;
|
public static boolean | isValidCodePoint(int codePoint)Determines whether the specified code point is a valid Unicode
code point value in the range of 0x0000 to
0x10FFFF inclusive. This method is equivalent to
the expression:
codePoint >= 0x0000 && codePoint <= 0x10FFFF
return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
|
public static boolean | isWhitespace(char ch)Determines if the specified character is white space according to Java.
A character is a Java whitespace character if and only if it satisfies
one of the following criteria:
- It is a Unicode space character (
SPACE_SEPARATOR ,
LINE_SEPARATOR , or PARAGRAPH_SEPARATOR )
but is not also a non-breaking space ('\u00A0' ,
'\u2007' , '\u202F' ).
- It is
'\u0009' , HORIZONTAL TABULATION.
- It is
'\u000A' , LINE FEED.
- It is
'\u000B' , VERTICAL TABULATION.
- It is
'\u000C' , FORM FEED.
- It is
'\u000D' , CARRIAGE RETURN.
- It is
'\u001C' , FILE SEPARATOR.
- It is
'\u001D' , GROUP SEPARATOR.
- It is
'\u001E' , RECORD SEPARATOR.
- It is
'\u001F' , UNIT SEPARATOR.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #isWhitespace(int)} method.
return isWhitespace((int)ch);
|
public static boolean | isWhitespace(int codePoint)Determines if the specified character (Unicode code point) is
white space according to Java. A character is a Java
whitespace character if and only if it satisfies one of the
following criteria:
- It is a Unicode space character ({@link #SPACE_SEPARATOR},
{@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
but is not also a non-breaking space (
'\u00A0' ,
'\u2007' , '\u202F' ).
- It is
'\u0009' , HORIZONTAL TABULATION.
- It is
'\u000A' , LINE FEED.
- It is
'\u000B' , VERTICAL TABULATION.
- It is
'\u000C' , FORM FEED.
- It is
'\u000D' , CARRIAGE RETURN.
- It is
'\u001C' , FILE SEPARATOR.
- It is
'\u001D' , GROUP SEPARATOR.
- It is
'\u001E' , RECORD SEPARATOR.
- It is
'\u001F' , UNIT SEPARATOR.
boolean bWhiteSpace = false;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
bWhiteSpace = CharacterDataLatin1.isWhitespace(codePoint);
} else {
int plane = getPlane(codePoint);
switch(plane) {
case(0):
bWhiteSpace = CharacterData00.isWhitespace(codePoint);
break;
case(1):
bWhiteSpace = CharacterData01.isWhitespace(codePoint);
break;
case(2):
bWhiteSpace = CharacterData02.isWhitespace(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
bWhiteSpace = CharacterDataUndefined.isWhitespace(codePoint);
break;
case(14):
bWhiteSpace = CharacterData0E.isWhitespace(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
bWhiteSpace = CharacterDataPrivateUse.isWhitespace(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// bWhiteSpace remains false
break;
}
}
return bWhiteSpace;
|
public static int | offsetByCodePoints(java.lang.CharSequence seq, int index, int codePointOffset)Returns the index within the given char sequence that is offset
from the given index by codePointOffset
code points. Unpaired surrogates within the text range given by
index and codePointOffset count as
one code point each.
int length = seq.length();
if (index < 0 || index > length) {
throw new IndexOutOfBoundsException();
}
int x = index;
if (codePointOffset >= 0) {
int i;
for (i = 0; x < length && i < codePointOffset; i++) {
if (isHighSurrogate(seq.charAt(x++))) {
if (x < length && isLowSurrogate(seq.charAt(x))) {
x++;
}
}
}
if (i < codePointOffset) {
throw new IndexOutOfBoundsException();
}
} else {
int i;
for (i = codePointOffset; x > 0 && i < 0; i++) {
if (isLowSurrogate(seq.charAt(--x))) {
if (x > 0 && isHighSurrogate(seq.charAt(x-1))) {
x--;
}
}
}
if (i < 0) {
throw new IndexOutOfBoundsException();
}
}
return x;
|
public static int | offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset)Returns the index within the given char subarray
that is offset from the given index by
codePointOffset code points. The
start and count arguments specify a
subarray of the char array. Unpaired surrogates
within the text range given by index and
codePointOffset count as one code point each.
if (count > a.length-start || start < 0 || count < 0
|| index < start || index > start+count) {
throw new IndexOutOfBoundsException();
}
return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
|
static int | offsetByCodePointsImpl(char[] a, int start, int count, int index, int codePointOffset)
int x = index;
if (codePointOffset >= 0) {
int limit = start + count;
int i;
for (i = 0; x < limit && i < codePointOffset; i++) {
if (isHighSurrogate(a[x++])) {
if (x < limit && isLowSurrogate(a[x])) {
x++;
}
}
}
if (i < codePointOffset) {
throw new IndexOutOfBoundsException();
}
} else {
int i;
for (i = codePointOffset; x > start && i < 0; i++) {
if (isLowSurrogate(a[--x])) {
if (x > start && isHighSurrogate(a[x-1])) {
x--;
}
}
}
if (i < 0) {
throw new IndexOutOfBoundsException();
}
}
return x;
|
public static char | reverseBytes(char ch)Returns the value obtained by reversing the order of the bytes in the
specified char value.
return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
|
public static int | toChars(int codePoint, char[] dst, int dstIndex)Converts the specified character (Unicode code point) to its
UTF-16 representation. If the specified code point is a BMP
(Basic Multilingual Plane or Plane 0) value, the same value is
stored in dst[dstIndex] , and 1 is returned. If the
specified code point is a supplementary character, its
surrogate values are stored in dst[dstIndex]
(high-surrogate) and dst[dstIndex+1]
(low-surrogate), and 2 is returned.
if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
throw new IllegalArgumentException();
}
if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
dst[dstIndex] = (char) codePoint;
return 1;
}
toSurrogates(codePoint, dst, dstIndex);
return 2;
|
public static char[] | toChars(int codePoint)Converts the specified character (Unicode code point) to its
UTF-16 representation stored in a char array. If
the specified code point is a BMP (Basic Multilingual Plane or
Plane 0) value, the resulting char array has
the same value as codePoint . If the specified code
point is a supplementary code point, the resulting
char array has the corresponding surrogate pair.
if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
throw new IllegalArgumentException();
}
if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
return new char[] { (char) codePoint };
}
char[] result = new char[2];
toSurrogates(codePoint, result, 0);
return result;
|
public static int | toCodePoint(char high, char low)Converts the specified surrogate pair to its supplementary code
point value. This method does not validate the specified
surrogate pair. The caller must validate it using {@link
#isSurrogatePair(char, char) isSurrogatePair} if necessary.
return ((high - MIN_HIGH_SURROGATE) << 10)
+ (low - MIN_LOW_SURROGATE) + MIN_SUPPLEMENTARY_CODE_POINT;
|
public static char | toLowerCase(char ch)Converts the character argument to lowercase using case
mapping information from the UnicodeData file.
Note that
Character.isLowerCase(Character.toLowerCase(ch))
does not always return true for some ranges of
characters, particularly those that are symbols or ideographs.
In general, {@link java.lang.String#toLowerCase()} should be used to map
characters to lowercase. String case mapping methods
have several benefits over Character case mapping methods.
String case mapping methods can perform locale-sensitive
mappings, context-sensitive mappings, and 1:M character mappings, whereas
the Character case mapping methods cannot.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #toLowerCase(int)} method.
return (char)toLowerCase((int)ch);
|
public static int | toLowerCase(int codePoint)Converts the character (Unicode code point) argument to
lowercase using case mapping information from the UnicodeData
file.
Note that
Character.isLowerCase(Character.toLowerCase(codePoint))
does not always return true for some ranges of
characters, particularly those that are symbols or ideographs.
In general, {@link java.lang.String#toLowerCase()} should be used to map
characters to lowercase. String case mapping methods
have several benefits over Character case mapping methods.
String case mapping methods can perform locale-sensitive
mappings, context-sensitive mappings, and 1:M character mappings, whereas
the Character case mapping methods cannot.
int lowerCase = codePoint;
int plane = 0;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
lowerCase = CharacterDataLatin1.toLowerCase(codePoint);
} else {
plane = getPlane(codePoint);
switch(plane) {
case(0):
lowerCase = CharacterData00.toLowerCase(codePoint);
break;
case(1):
lowerCase = CharacterData01.toLowerCase(codePoint);
break;
case(2):
lowerCase = CharacterData02.toLowerCase(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
lowerCase = CharacterDataUndefined.toLowerCase(codePoint);
break;
case(14):
lowerCase = CharacterData0E.toLowerCase(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
lowerCase = CharacterDataPrivateUse.toLowerCase(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// lowerCase remains codePoint;
break;
}
}
return lowerCase;
|
public java.lang.String | toString()Returns a String object representing this
Character 's value. The result is a string of
length 1 whose sole component is the primitive
char value represented by this
Character object.
char buf[] = {value};
return String.valueOf(buf);
|
public static java.lang.String | toString(char c)Returns a String object representing the
specified char . The result is a string of length
1 consisting solely of the specified char .
return String.valueOf(c);
|
static void | toSurrogates(int codePoint, char[] dst, int index)
int offset = codePoint - MIN_SUPPLEMENTARY_CODE_POINT;
dst[index+1] = (char)((offset & 0x3ff) + MIN_LOW_SURROGATE);
dst[index] = (char)((offset >>> 10) + MIN_HIGH_SURROGATE);
|
public static char | toTitleCase(char ch)Converts the character argument to titlecase using case mapping
information from the UnicodeData file. If a character has no
explicit titlecase mapping and is not itself a titlecase char
according to UnicodeData, then the uppercase mapping is
returned as an equivalent titlecase mapping. If the
char argument is already a titlecase
char , the same char value will be
returned.
Note that
Character.isTitleCase(Character.toTitleCase(ch))
does not always return true for some ranges of
characters.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #toTitleCase(int)} method.
return (char)toTitleCase((int)ch);
|
public static int | toTitleCase(int codePoint)Converts the character (Unicode code point) argument to titlecase using case mapping
information from the UnicodeData file. If a character has no
explicit titlecase mapping and is not itself a titlecase char
according to UnicodeData, then the uppercase mapping is
returned as an equivalent titlecase mapping. If the
character argument is already a titlecase
character, the same character value will be
returned.
Note that
Character.isTitleCase(Character.toTitleCase(codePoint))
does not always return true for some ranges of
characters.
int titleCase = codePoint;
int plane = 0;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
titleCase = CharacterDataLatin1.toTitleCase(codePoint);
} else {
plane = getPlane(codePoint);
switch(plane) {
case(0):
titleCase = CharacterData00.toTitleCase(codePoint);
break;
case(1):
titleCase = CharacterData01.toTitleCase(codePoint);
break;
case(2):
titleCase = CharacterData02.toTitleCase(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
titleCase = CharacterDataUndefined.toTitleCase(codePoint);
break;
case(14):
titleCase = CharacterData0E.toTitleCase(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
titleCase = CharacterDataPrivateUse.toTitleCase(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// titleCase remains codePoint;
break;
}
}
return titleCase;
|
public static char | toUpperCase(char ch)Converts the character argument to uppercase using case mapping
information from the UnicodeData file.
Note that
Character.isUpperCase(Character.toUpperCase(ch))
does not always return true for some ranges of
characters, particularly those that are symbols or ideographs.
In general, {@link java.lang.String#toUpperCase()} should be used to map
characters to uppercase. String case mapping methods
have several benefits over Character case mapping methods.
String case mapping methods can perform locale-sensitive
mappings, context-sensitive mappings, and 1:M character mappings, whereas
the Character case mapping methods cannot.
Note: This method cannot handle supplementary characters. To support
all Unicode characters, including supplementary characters, use
the {@link #toUpperCase(int)} method.
return (char)toUpperCase((int)ch);
|
public static int | toUpperCase(int codePoint)Converts the character (Unicode code point) argument to
uppercase using case mapping information from the UnicodeData
file.
Note that
Character.isUpperCase(Character.toUpperCase(codePoint))
does not always return true for some ranges of
characters, particularly those that are symbols or ideographs.
In general, {@link java.lang.String#toUpperCase()} should be used to map
characters to uppercase. String case mapping methods
have several benefits over Character case mapping methods.
String case mapping methods can perform locale-sensitive
mappings, context-sensitive mappings, and 1:M character mappings, whereas
the Character case mapping methods cannot.
int upperCase = codePoint;
int plane = 0;
if (codePoint >= MIN_CODE_POINT && codePoint <= FAST_PATH_MAX) {
upperCase = CharacterDataLatin1.toUpperCase(codePoint);
} else {
plane = getPlane(codePoint);
switch(plane) {
case(0):
upperCase = CharacterData00.toUpperCase(codePoint);
break;
case(1):
upperCase = CharacterData01.toUpperCase(codePoint);
break;
case(2):
upperCase = CharacterData02.toUpperCase(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
upperCase = CharacterDataUndefined.toUpperCase(codePoint);
break;
case(14):
upperCase = CharacterData0E.toUpperCase(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
upperCase = CharacterDataPrivateUse.toUpperCase(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// upperCase remains codePoint;
break;
}
}
return upperCase;
|
static char[] | toUpperCaseCharArray(int codePoint)Converts the character (Unicode code point) argument to uppercase using case
mapping information from the SpecialCasing file in the Unicode
specification. If a character has no explicit uppercase
mapping, then the char itself is returned in the
char[] .
char[] upperCase = null;
// As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
assert isValidCodePoint(codePoint) &&
!isSupplementaryCodePoint(codePoint);
if (codePoint <= FAST_PATH_MAX) {
upperCase = CharacterDataLatin1.toUpperCaseCharArray(codePoint);
} else {
upperCase = CharacterData00.toUpperCaseCharArray(codePoint);
}
return upperCase;
|
static int | toUpperCaseEx(int codePoint)Converts the character (Unicode code point) argument to uppercase using
information from the UnicodeData file.
int upperCase = codePoint;
int plane = 0;
assert isValidCodePoint(codePoint);
if (codePoint <= FAST_PATH_MAX) {
upperCase = CharacterDataLatin1.toUpperCaseEx(codePoint);
} else {
plane = getPlane(codePoint);
switch(plane) {
case(0):
upperCase = CharacterData00.toUpperCaseEx(codePoint);
break;
case(1):
upperCase = CharacterData01.toUpperCase(codePoint);
break;
case(2):
upperCase = CharacterData02.toUpperCase(codePoint);
break;
case(3): // Undefined
case(4): // Undefined
case(5): // Undefined
case(6): // Undefined
case(7): // Undefined
case(8): // Undefined
case(9): // Undefined
case(10): // Undefined
case(11): // Undefined
case(12): // Undefined
case(13): // Undefined
upperCase = CharacterDataUndefined.toUpperCase(codePoint);
break;
case(14):
upperCase = CharacterData0E.toUpperCase(codePoint);
break;
case(15): // Private Use
case(16): // Private Use
upperCase = CharacterDataPrivateUse.toUpperCase(codePoint);
break;
default:
// the argument's plane is invalid, and thus is an invalid codepoint
// upperCase remains codePoint;
break;
}
}
return upperCase;
|
public static java.lang.Character | valueOf(char c)Returns a Character instance representing the specified
char value.
If a new Character instance is not required, this method
should generally be used in preference to the constructor
{@link #Character(char)}, as this method is likely to yield
significantly better space and time performance by caching
frequently requested values.
for(int i = 0; i < cache.length; i++)
cache[i] = new Character((char)i);
if(c <= 127) { // must cache
return CharacterCache.cache[(int)c];
}
return new Character(c);
|