/*
*
*
* Copyright 1990-2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 only, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is
* included at /legal/license.txt).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
* Clara, CA 95054 or visit www.sun.com if you need additional
* information or have any questions.
*/
package com.sun.j2me.global;
import java.io.DataInputStream;
import java.io.IOException;
import java.util.Vector;
import javax.microedition.io.Connector;
import com.sun.midp.security.ImplicitlyTrustedClass;
import com.sun.midp.security.SecurityToken;
import com.sun.midp.jsr238.SecurityInitializer;
import com.sun.midp.io.j2me.storage.File;
import com.sun.midp.io.j2me.storage.RandomAccessStream;
import com.sun.midp.log.LogChannels;
import com.sun.midp.log.Logging;
import com.sun.midp.main.Configuration;
import com.sun.midp.configurator.Constants;
import javax.microedition.global.UnsupportedLocaleException;
/**
* An emulator-specific implementation of the <code>CollationElementTable</code>
* interface.
*/
public final class CollationElementTableImpl extends CollationElementTable {
/** This class has a different security domain than the MIDlet suite */
static private class SecurityTrusted
implements ImplicitlyTrustedClass {};
/** This class has a different security domain than the MIDlet suite */
private static SecurityToken classSecurityToken =
SecurityInitializer.requestToken(new SecurityTrusted());
/**
* Array of collation table files.
*/
private static CollationFile[] collationFiles;
/**
* Collation table instances for supported locales.
*/
private static CollationElementTableImpl[] collationTables;
/**
* Array of locales for which collation elements exist.
*/
private static String[] locales;
/**
* Array for converting from a locale string to the collation table index.
*/
private static int[] localeToTable;
/**
* This is used to prevent loading of all collation element tables at once,
* which is very memory consuming.
*/
private static Object loadingMutex = new Object();
/**
* Class representing a file with collation tables data.
*/
private static class CollationFile implements Runnable {
/**
* The name of the file without an extension.
*/
private final String fileName;
/**
* The max contraction for that file.
*/
public final int maxContraction;
/**
* Loading state of the file.
*
* @see #STATE_UNINITIALIZED
* @see #STATE_LOAD_FINISHED
* @see #STATE_LOAD_FAILED
*/
public int loadingState = STATE_UNINITIALIZED;
/** Collation file data. */
public byte[] offsets0;
/** Collation file data. */
public short[] offsets1;
/** Collation file data. */
public short[] offsets2;
/** Collation file data. */
public int[] data;
/** Collation file data. */
public int[] data2;
/**
* Creates a new collation file with the given name and maximum
* contraction.
*
* @param fileName the file name
* @param maxContraction the maximum contraction
*/
public CollationFile(String fileName, int maxContraction) {
this.fileName = fileName;
this.maxContraction = maxContraction;
}
/**
* Implements loading of the table data from the input file.
*/
public void run() {
int newState = STATE_LOAD_FAILED;
byte[] buffer0 = null;
byte[] buffer1 = null;
byte[] buffer2 = null;
byte[] buffer3 = null;
byte[] buffer4 = null;
synchronized (loadingMutex) {
String storageName=null;
try {
RandomAccessStream storage =
new RandomAccessStream(classSecurityToken);
storageName = File.getConfigRoot(Constants.INTERNAL_STORAGE_ID) + fileName + ".bin";
storage.connect(storageName, Connector.READ);
DataInputStream ds = new DataInputStream(storage.openInputStream());
try {
int length;
length = ds.readUnsignedShort();
buffer0 = new byte[length];
ds.readFully(buffer0, 0, length);
length = ds.readUnsignedShort();
length <<= 1;
buffer1 = new byte[length];
ds.readFully(buffer1, 0, length);
length = ds.readUnsignedShort();
length <<= 1;
buffer2 = new byte[length];
ds.readFully(buffer2, 0, length);
length = ds.readUnsignedShort();
length <<= 2;
buffer3 = new byte[length];
ds.readFully(buffer3, 0, length);
length = ds.readUnsignedShort();
length <<= 2;
buffer4 = new byte[length];
ds.readFully(buffer4, 0, length);
newState = STATE_LOAD_FINISHED;
} catch (IOException e) {
if (Logging.REPORT_LEVEL <= Logging.ERROR) {
Logging.report(Logging.ERROR, LogChannels.LC_JSR238,
"Failed to read collation table: " + storageName +
"\nException: " + e.toString());
}
}
ds.close();
storage.disconnect();
} catch (IOException e) {
if (Logging.REPORT_LEVEL <= Logging.ERROR) {
Logging.report(Logging.ERROR, LogChannels.LC_JSR238,
"Failed to open collation table: " + storageName +
"Exception: " + e.toString());
}
}
if (newState == STATE_LOAD_FINISHED) {
offsets0 = buffer0;
offsets1 = LocaleHelpers.byteArrayToShortArray(buffer1);
buffer1 = null;
offsets2 = LocaleHelpers.byteArrayToShortArray(buffer2);
buffer2 = null;
data = LocaleHelpers.byteArrayToIntArray(buffer3);
buffer3 = null;
data2 = LocaleHelpers.byteArrayToIntArray(buffer4);
buffer4 = null;
}
}
synchronized (this) {
loadingState = newState;
notifyAll();
}
}
}
/** Initialization of static members */
static {
String propString;
// get and parse the collation files
propString = Configuration.getProperty("microedition.global.collation");
if ((propString != null) && (propString.length() != 0)) {
// helper array used to parse "<left part> = <right part>"
String[] equationParts = new String[2];
String[] fileNames = LocaleHelpers.splitString(propString, ",", -1);
collationFiles = new CollationFile[fileNames.length];
Vector tmpCollationTables = new Vector();
Vector tmpLocales = new Vector();
Vector tmpLocaleToTable = new Vector();
for (int i = 0; i < collationFiles.length; ++i) {
String keyPrefix = "microedition.global.collation." +
fileNames[i] + ".";
// get and parse the max number of contractions
int maxContractions = 2;
propString = Configuration.getProperty(keyPrefix + "maxcontr");
if ((propString != null) && (propString.length() != 0)) {
try {
maxContractions = Integer.parseInt(propString);
} catch (NumberFormatException e) {
// ignore
}
}
// create a new CollationFile instance
collationFiles[i] = new CollationFile(fileNames[i],
maxContractions);
// get and parse supported locales for the collation file
propString = Configuration.getProperty(keyPrefix + "locales");
if ((propString == null) || (propString.length() == 0)) {
continue;
}
String[] localeParts = LocaleHelpers.splitString(propString,
";", -1);
for (int j = 0; j < localeParts.length; ++j) {
if ((LocaleHelpers.splitString(
equationParts, localeParts[j], "=", 2) != 2) ||
(equationParts[1].length() == 0)) {
continue;
}
// parse the locale index
int localeIndex;
try {
localeIndex = Integer.parseInt(equationParts[1]);
} catch (NumberFormatException e) {
continue;
}
// add to locales and localeToTable
Integer tableIndex =
new Integer(tmpCollationTables.size());
String[] localeStrings =
LocaleHelpers.splitString(
equationParts[0], ",", -1);
for (int k = 0; k < localeStrings.length; ++k) {
tmpLocales.addElement(localeStrings[k]);
tmpLocaleToTable.addElement(tableIndex);
}
// create a new collation table and add it to the list
tmpCollationTables.addElement(new CollationElementTableImpl(
localeIndex, collationFiles[i]));
}
}
// convert the vectors to arrays
int length;
length = tmpCollationTables.size();
collationTables = new CollationElementTableImpl[length];
tmpCollationTables.copyInto(collationTables);
length = tmpLocales.size();
locales = new String[length];
tmpLocales.copyInto(locales);
length = tmpLocaleToTable.size();
localeToTable = new int[length];
for (int i = 0; i < length; ++i) {
localeToTable[i] =
((Integer)tmpLocaleToTable.elementAt(i)).intValue();
}
} else {
locales = new String[0];
}
}
/** Before loading of the table data. */
private static final int STATE_UNINITIALIZED = 0;
/** After loading of the table data. */
private static final int STATE_LOAD_FINISHED = 1;
/** The table is inconsistent and can't be used. */
private static final int STATE_LOAD_FAILED = 2;
/** Min value of the L2 weight value of an encoded collation. */
private static final int MIN_L2 = 1;
/** Min value of the L3 weight value of an encoded collation. */
private static final int MIN_L3 = 1;
/** The mask of the Sequence flag. */
private static final int SEQUENCE_FLAG = 0x80000000;
/** The mask of the Operation flag. */
private static final int OPERATION_FLAG = 0x40000000;
/** The mask of the Bookmark offset. */
private static final int BOOKMARK_OFFSET_MASK = 0x7fff0000;
/** The mask of the Bookmark code. */
private static final int BOOKMARK_CODEPT_MASK = 0x0000ffff;
/** The shift of the Bookmark offset. */
private static final int BOOKMARK_OFFSET_SHIFT = 16;
/** The mask of the data entry type flag. */
private static final int DATA2_ENTRY_TYPE_FLAG = 0x80000000;
/** The mask of the data sequence flag. */
private static final int DATA2_SEQUENCE_FLAG = 0x40000000;
/** The mask of the data locale. */
private static final int DA2E0_LOCALE_MASK = 0x0fff0000;
/** The mask of the data offset. */
private static final int DA2E0_OFFSET_MASK = 0x00007fff;
/** The mask of the data offset. */
private static final int DA2E1_OFFSET_MASK = 0x3ff00000;
/** The mask of the data code. */
private static final int DA2E1_CODEPT_MASK = 0x000fffff;
/** The shift of the data locale. */
private static final int DA2E0_LOCALE_SHIFT = 16;
/** The shift of the data offset. */
private static final int DA2E1_OFFSET_SHIFT = 20;
/** Array of offsets used in the getCollationElements function. */
private byte[] offsets0;
/** Array of offsets used in the getCollationElements function. */
private short[] offsets1;
/** Array of offsets used in the getCollationElements function. */
private short[] offsets2;
/**
* Array of offsets used in the getCollationData and
* getCollationElements functions.
*/
private int[] data;
/**
* Array of offsets used in the getCollationDataOffset and
* getChildBookmark functions.
*/
private int[] data2;
/** The assigned collation data file for this table. */
private final CollationFile collationFile;
/** The locale index. */
private int localeIndex;
/** The locale flag. */
private int localeFlag;
/**
* Creates a new instance of <code>CollationElementTableImpl</code> for
* the given locale index and collation file.
*
* @param index the locale index
* @param file the CollationFile instance
*/
private CollationElementTableImpl(int index, CollationFile file) {
localeIndex = index;
localeFlag = 1 << index;
collationFile = file;
}
/**
* Returns an instance of the table for the given locale.
*
* @param locale the locale
* @return the instance
*/
public static synchronized CollationElementTableImpl getInstance(
String locale) {
int i;
for (i = 0; i < locales.length; ++i) {
if (locales[i].equals(locale)) {
break;
}
}
if (i == locales.length) {
// not supported
throw new UnsupportedLocaleException("The locale " + locale +
" is not supported by the string comparator");
}
CollationElementTableImpl collationTable =
collationTables[localeToTable[i]];
CollationFile collationFile = collationTable.collationFile;
synchronized (collationFile) {
if (collationFile.loadingState == STATE_UNINITIALIZED) {
// Start loading of the data immediately
new Thread(collationFile).start();
}
}
return collationTable;
}
/**
* Blocks until all table data is loaded from the file.
*
* @throws IllegalStateException if the loading has failed
*/
private void initializeData() {
synchronized (collationFile) {
if (collationFile.loadingState != STATE_LOAD_FINISHED) {
if (collationFile.loadingState == STATE_UNINITIALIZED) {
try {
collationFile.wait();
} catch (InterruptedException e) {
}
}
if (collationFile.loadingState != STATE_LOAD_FINISHED) {
throw new IllegalStateException(
"Failed to load the collation element table data");
}
}
offsets0 = collationFile.offsets0;
offsets1 = collationFile.offsets1;
offsets2 = collationFile.offsets2;
data = collationFile.data;
data2 = collationFile.data2;
}
}
/**
* Computes the implicit weights for the given code point and stores them
* into the buffer on the given index. Returns the number of stored
* collation elements.
*
* @param buffer the buffer for the collation elements
* @param offset the offset into <code>buffer</code>
* @param cp the code point
* @return the number of calculated collation elements
*/
private static final int calculateImplicitWeights(int[] buffer, int offset,
int cp) {
int base = 0xfbc0;
if ((cp >= 0x4e00) && (cp <= 0x9fbf)) {
// CJK Unified Ideographs
base = 0xfb40;
} else if ((cp >= 0x3400) && (cp <= 0x4dbf)) {
// CJK Unified Ideographs Extension A
base = 0xfb80;
} else if ((cp >= 0x20000) && (cp <= 0x2a6df)) {
// CJK Unified Ideographs Extension B
base = 0xfb80;
} // TODO: else if...??
buffer[offset++] = (MIN_L3 << L3_SHIFT) |
(MIN_L2 << L2_SHIFT) |
(base + (cp >> 15)) & L1_MASK;
buffer[offset] = ((cp & 0x7fff) | 0x8000) & L1_MASK;
return 2;
}
/**
* Stores the collation elements from the given data table index and the
* code point into the buffer on the given offset. Returns the number of
* the stored collation elements.
*
* @param buffer the buffer for collation elements
* @param offset the offset into <code>buffer</code>
* @param cp the code point
* @param index the data table index
* @return the number of stored elements
*/
private final int getCollationData(int[] buffer, int offset, int cp,
int index) {
int value = data[index];
int sequenceFlag = value & SEQUENCE_FLAG;
if ((value & OPERATION_FLAG) != 0) {
int tmp = (value & L1_MASK) + cp;
value = (value & ~L1_MASK) | tmp & L1_MASK;
// value &= ~OPERATION_FLAG;
}
if ((data[index + 1] & SEQUENCE_FLAG) != sequenceFlag) {
return (value | SINGLE_CE_FLAG) & ~BOOKMARK_FLAG;
}
buffer[offset] = value;
int i = 1;
value = data[index + 1];
do {
// value &= ~SEQUENCE_FLAG;
if ((value & OPERATION_FLAG) != 0) {
int tmp = (value & L1_MASK) + cp;
value = (value & ~L1_MASK) | tmp & L1_MASK;
// value &= ~OPERATION_FLAG;
}
buffer[offset + i++] = value;
value = data[index + i];
} while ((value & SEQUENCE_FLAG) == sequenceFlag);
return i;
}
/**
* Returns the data table index for the given bookmark.
*
* @param bookmark the bookmark
* @return the data table index
*/
private final int getCollationDataOffset(int bookmark) {
int index = (bookmark & BOOKMARK_OFFSET_MASK) >>> BOOKMARK_OFFSET_SHIFT;
int value = data2[index];
int sequenceFlag = value & DATA2_SEQUENCE_FLAG;
int i = 0;
do {
if (((value & DATA2_ENTRY_TYPE_FLAG) == 0) &&
((((value & DA2E0_LOCALE_MASK) >>> DA2E0_LOCALE_SHIFT) &
localeFlag) != 0)) {
return value & DA2E0_OFFSET_MASK;
}
++i;
value = data2[index + i];
} while ((value & DATA2_SEQUENCE_FLAG) == sequenceFlag);
return -1;
}
/**
* This method can be used to traverse the contractions. The traversing
* starts when the {@link #getCollationElements} method returns a bookmark
* instead of collation elements. The returned bookmark, which represents
* a code point sequence consisting only of one code point, can be further
* tested if it's extensible by various other code points.
* <p>
* If a partial match is found, the method returns another bookmark which
* represents the new sequence. The new bookmark can be further "refined"
* as well. To get the collation elements for the sequence, the sequence
* has to be terminated by the
* <code>getChildBookmark(bookmark, TERMINAL_CODE_POINT)</code> call.
* If the call returns a valid bookmark, it is guaranteed, that the
* <code>getCollationElements</code> method will return the collation
* elements for this final bookmark.
* </p><p>
* If no match can be found for the given bookmark and the code point
* value, the method returns <code>INVALID_BOOKMARK_VALUE</code>.
* </p>
*
* @param bookmark the bookmark
* @param cp a code point value or <code>TERMINAL_CODE_POINT</code>
* @return the new bookmark for the new code point sequence if a match is
* found or <code>INVALID_BOOKMARK_VALUE</code> if no match can be
* found in the table
* @see #getCollationElements
*/
public int getChildBookmark(int bookmark, int cp) {
if (bookmark == INVALID_BOOKMARK_VALUE) {
return INVALID_BOOKMARK_VALUE;
}
int index = (bookmark & BOOKMARK_OFFSET_MASK) >>> BOOKMARK_OFFSET_SHIFT;
int value = data2[index];
int sequenceFlag = value & DATA2_SEQUENCE_FLAG;
int i = 0;
if (cp == TERMINAL_CODE_POINT) {
do {
if (((value & DATA2_ENTRY_TYPE_FLAG) == 0) &&
((((value & DA2E0_LOCALE_MASK) >>> DA2E0_LOCALE_SHIFT) &
localeFlag) != 0)) {
// we have found an entry for our locale
return bookmark;
}
++i;
value = data2[index + i];
} while ((value & DATA2_SEQUENCE_FLAG) == sequenceFlag);
} else {
do {
if (((value & DATA2_ENTRY_TYPE_FLAG) != 0) &&
((value & DA2E1_CODEPT_MASK) == cp)) {
// construct a new bookmark
// replace the old offset with a new one
bookmark &= ~BOOKMARK_OFFSET_MASK;
bookmark |= ((((value & DA2E1_OFFSET_MASK)
>>> DA2E1_OFFSET_SHIFT) + index)
<< BOOKMARK_OFFSET_SHIFT) & BOOKMARK_OFFSET_MASK;
return bookmark;
}
++i;
value = data2[index + i];
} while ((value & DATA2_SEQUENCE_FLAG) == sequenceFlag);
}
return INVALID_BOOKMARK_VALUE;
}
/**
* Returns the collation element/elements for the given code point/points.
* Each returned collation element is encoded in a single integer value,
* which can be further decoded by the static methods of this class.
* <p>
* There are three types of possible return value and two types of the
* input values.
* </p><p>
* If the parameters are an integer buffer, an offset to this buffer and
* a single code point, the method can return:
* </p><p>
* <ol>
* <li>
* A single encoded collation element value, when the code point
* decomposes into one collation element and it isn't a starting
* code point of any contraction. In this case nothing is written
* into the buffer.
* </li>
* <li>
* The number of encoded collation elements, when the code point
* decomposes into more than one collation elements and it isn't
* a starting code point of any contraction. The encoded collation
* elements are written to the buffer on the given offset.
* </li>
* <li>
* A bookmark value, when the given code point is a starting code
* point of a contraction. Nothing is written into the buffer.
* </li>
* </ol>
* </p><p>
* If the parameters are an integer buffer, an offset to this buffer and
* a bookmark, the method can return:
* </p><p>
* <ol>
* <li>
* A single encoded collation element value, when the code point
* sequence behind the bookmark decomposes into one collation
* element. Nothing is written into the buffer.
* </li>
* <li>
* The number of encoded collation elements, when the code point
* sequence behind the bookmark decomposes into more than one
* collation elements. The encoded collation elements are written
* to the buffer on the given offset.
* </li>
* <li>
* A zero value, when the given bookmark is invalid or it doesn't
* target the complete (terminated) code point sequence.
* </li>
* </ol>
* </p>
*
* @param buffer the array for the decomposition
* @param offset the offset from the beginning of the array, where to place
* the collation elements
* @param cp a code point or a bookmark
* @return a single encoded collation element or the number of returned
* collation elements or a bookmark or
* <code>INVALID_BOOKMARK_VALUE</code>
* @see #isBookmark
* @see #isSingleCollationEl
* @see #getChildBookmark
*/
public int getCollationElements(int[] buffer, int offset, int cp) {
if (data == null) {
initializeData();
}
if ((cp & BOOKMARK_FLAG) != 0) {
// handle the case when cp is a bookmark
if (cp == INVALID_BOOKMARK_VALUE) {
return 0;
}
int collationOffset = getCollationDataOffset(cp);
if (collationOffset == -1) {
return 0;
}
return getCollationData(buffer, offset, cp & BOOKMARK_CODEPT_MASK,
collationOffset);
}
int index;
index = (cp >> 8) & 0x1fff;
if ((index >= offsets0.length) || (offsets0[index] == -1)) {
return calculateImplicitWeights(buffer, offset, cp);
}
index = (((int)offsets0[index] & 0xff) << 4) + ((cp >> 4) & 0xf);
if (offsets1[index] == -1) {
return calculateImplicitWeights(buffer, offset, cp);
}
if ((offsets1[index] & 0x8000) != 0) {
index = (int)offsets1[index] & 0x7fff;
return getCollationData(buffer, offset, cp, index);
}
index = (((int)offsets1[index] & 0xfff) << 4) + (cp & 0xf);
if (offsets2[index] == -1) {
return calculateImplicitWeights(buffer, offset, cp);
}
index = (int)offsets2[index] & 0xffff;
if ((index & 0x8000) != 0) {
return BOOKMARK_FLAG |
(index << BOOKMARK_OFFSET_SHIFT) & BOOKMARK_OFFSET_MASK |
cp & BOOKMARK_CODEPT_MASK;
}
return getCollationData(buffer, offset, cp, index);
}
/**
* Returns the length of the longest possible contraction in the table.
*
* @return the longest contraction
*/
public int getMaxContractionLength() {
return collationFile.maxContraction;
}
/**
* Gets the locales for which a <code>StringComparator</code> is available
* in this implementation. If no locales are supported, the returned array
* must be empty, not <code>null</code>. As the value <code>null</code> is
* not technically a valid locale, but a special value to trigger the
* generic collation algorithm, it must not appear in the array.
*
* @return an array of valid <code>microedition.locale</code> values
*/
public static String[] getSupportedLocales() {
// locales without the "empty string" locale
String[] filteredLocales = new String[locales.length];
int filteredCount = 0;
for (int i = 0; i < locales.length; ++i) {
if (locales[i].length() != 0) {
filteredLocales[filteredCount++] = locales[i];
}
}
if (filteredCount != locales.length) {
String[] compactedArray = new String[filteredCount];
System.arraycopy(filteredLocales, 0, compactedArray, 0,
filteredCount);
filteredLocales = compactedArray;
}
return filteredLocales;
}
} |