GreekCharsetspublic class GreekCharsets extends Object GreekCharsets class contains encodings schemes (charsets) and toLowerCase() method implementation
for greek characters in Unicode, ISO-8859-7 and Microsoft Windows CP1253.
Each encoding scheme contains lowercase (positions 0-35) and uppercase (position 36-68) characters,
including accented ones. One should be able to add other encoding schemes (see RFC 1947) by adding
the definition of a new charset as well as the required logic in the toLowerCase() method. |
Fields Summary |
---|
public static char[] | UnicodeGreek | public static char[] | ISO | public static char[] | CP1253 |
Methods Summary |
---|
public static char | toLowerCase(char letter, char[] charset)
if (charset == UnicodeGreek) {
// First deal with lower case, not accented letters
if (letter >= '\u03B1" && letter <= '\u03C9")
{
// Special case 'small final sigma', where we return 'small sigma'
if (letter == '\u03C2") {
return '\u03C3";
} else {
return letter;
}
}
// Then deal with lower case, accented letters
// alpha with acute
if (letter == '\u03AC") {
return '\u03B1";
}
// epsilon with acute
if (letter == '\u03AD") {
return '\u03B5";
}
// eta with acute
if (letter == '\u03AE") {
return '\u03B7";
}
// iota with acute, iota with diaeresis, iota with acute and diaeresis
if (letter == '\u03AF" || letter == '\u03CA" || letter == '\u0390") {
return '\u03B9";
}
// upsilon with acute, upsilon with diaeresis, upsilon with acute and diaeresis
if (letter == '\u03CD" || letter == '\u03CB" || letter == '\u03B0") {
return '\u03C5";
}
// omicron with acute
if (letter == '\u03CC") {
return '\u03BF";
}
// omega with acute
if (letter == '\u03CE") {
return '\u03C9";
}
// After that, deal with upper case, not accented letters
if (letter >= '\u0391" && letter <= '\u03A9")
{
return (char) (letter + 32);
}
// Finally deal with upper case, accented letters
// alpha with acute
if (letter == '\u0386") {
return '\u03B1";
}
// epsilon with acute
if (letter == '\u0388") {
return '\u03B5";
}
// eta with acute
if (letter == '\u0389") {
return '\u03B7";
}
// iota with acute, iota with diaeresis
if (letter == '\u038A" || letter == '\u03AA") {
return '\u03B9";
}
// upsilon with acute, upsilon with diaeresis
if (letter == '\u038E" || letter == '\u03AB") {
return '\u03C5";
}
// omicron with acute
if (letter == '\u038C") {
return '\u03BF";
}
// omega with acute
if (letter == '\u038F") {
return '\u03C9";
}
} else if (charset == ISO) {
// First deal with lower case, not accented letters
if (letter >= 0xe1 && letter <= 0xf9)
{
// Special case 'small final sigma', where we return 'small sigma'
if (letter == 0xf2) {
return 0xf3;
} else {
return letter;
}
}
// Then deal with lower case, accented letters
// alpha with acute
if (letter == 0xdc) {
return 0xe1;
}
// epsilon with acute
if (letter == 0xdd) {
return 0xe5;
}
// eta with acute
if (letter == 0xde) {
return 0xe7;
}
// iota with acute, iota with diaeresis, iota with acute and diaeresis
if (letter == 0xdf || letter == 0xfa || letter == 0xc0) {
return '\u03B9";
}
// upsilon with acute, upsilon with diaeresis, upsilon with acute and diaeresis
if (letter == 0xfd || letter == 0xfb || letter == 0xe0) {
return 0xf5;
}
// omicron with acute
if (letter == 0xfc) {
return 0xef;
}
// omega with acute
if (letter == 0xfe) {
return 0xf9;
}
// After that, deal with upper case, not accented letters
if (letter >= 0xc1 && letter <= 0xd9) {
return (char) (letter + 32);
}
// Finally deal with upper case, accented letters
// alpha with acute
if (letter == 0xb6) {
return 0xe1;
}
// epsilon with acute
if (letter == 0xb8) {
return 0xe5;
}
// eta with acute
if (letter == 0xb9) {
return 0xe7;
}
// iota with acute, iota with diaeresis
if (letter == 0xba || letter == 0xda) {
return 0xe9;
}
// upsilon with acute, upsilon with diaeresis
if (letter == 0xbe || letter == 0xdb) {
return 0xf5;
}
// omicron with acute
if (letter == 0xbc) {
return 0xef;
}
// omega with acute
if (letter == 0xbf) {
return 0xf9;
}
} else if (charset == CP1253) {
// First deal with lower case, not accented letters
if (letter >= 0xe1 && letter <= 0xf9)
{
// Special case 'small final sigma', where we return 'small sigma'
if (letter == 0xf2) {
return 0xf3;
} else {
return letter;
}
}
// Then deal with lower case, accented letters
// alpha with acute
if (letter == 0xdc) {
return 0xe1;
}
// epsilon with acute
if (letter == 0xdd) {
return 0xe5;
}
// eta with acute
if (letter == 0xde) {
return 0xe7;
}
// iota with acute, iota with diaeresis, iota with acute and diaeresis
if (letter == 0xdf || letter == 0xfa || letter == 0xc0) {
return '\u03B9";
}
// upsilon with acute, upsilon with diaeresis, upsilon with acute and diaeresis
if (letter == 0xfd || letter == 0xfb || letter == 0xe0) {
return 0xf5;
}
// omicron with acute
if (letter == 0xfc) {
return 0xef;
}
// omega with acute
if (letter == 0xfe) {
return 0xf9;
}
// After that, deal with upper case, not accented letters
if (letter >= 0xc1 && letter <= 0xd9) {
return (char) (letter + 32);
}
// Finally deal with upper case, accented letters
// alpha with acute
if (letter == 0xa2) {
return 0xe1;
}
// epsilon with acute
if (letter == 0xb8) {
return 0xe5;
}
// eta with acute
if (letter == 0xb9) {
return 0xe7;
}
// iota with acute, iota with diaeresis
if (letter == 0xba || letter == 0xda) {
return 0xe9;
}
// upsilon with acute, upsilon with diaeresis
if (letter == 0xbe || letter == 0xdb) {
return 0xf5;
}
// omicron with acute
if (letter == 0xbc) {
return 0xef;
}
// omega with acute
if (letter == 0xbf) {
return 0xf9;
}
}
return Character.toLowerCase(letter);
|
|