MakeBinaryDictionarypublic class MakeBinaryDictionary extends Object Compresses a list of words and frequencies into a tree structured binary dictionary. |
Fields Summary |
---|
public static final int | ALPHA_SIZE | public static final String | TAG_WORD | public static final String | ATTR_FREQ | private static final int | FLAG_ADDRESS_MASK | private static final int | FLAG_TERMINAL_MASK | private static final int | ADDRESS_MASK | public static final CharNode | EMPTY_NODE | List | roots | Map | mDictionary | int | mWordCount | byte[] | dict | int | dictSize | static final int | CHAR_WIDTH | static final int | FLAGS_WIDTH | static final int | ADDR_WIDTH | static final int | FREQ_WIDTH_BYTES | static final int | COUNT_WIDTH_BYTES | int | nullChildrenCount | int | notTerminalCount |
Constructors Summary |
---|
public MakeBinaryDictionary(String srcFilename, String destFilename)
populateDictionary(srcFilename);
writeToDict(destFilename);
// Enable the code below to verify that the generated tree is traversable.
if (false) {
traverseDict(0, new char[32], 0);
}
|
Methods Summary |
---|
private void | addCount(int count)
dict[dictSize++] = (byte) (0xFF & count);
| private void | addNode(com.android.tools.dict.MakeBinaryDictionary$CharNode node)
int charData = 0xFFFF & node.data;
if (charData > 254) {
dict[dictSize++] = (byte) 255;
dict[dictSize++] = (byte) ((node.data >> 8) & 0xFF);
dict[dictSize++] = (byte) (node.data & 0xFF);
} else {
dict[dictSize++] = (byte) (0xFF & node.data);
}
if (node.children != null) {
dictSize += 3; // Space for children address
} else {
dictSize += 1; // Space for just the terminal/address flags
}
if ((0xFFFFFF & node.freq) > 255) {
node.freq = 255;
}
if (node.terminal) {
byte freq = (byte) (0xFF & node.freq);
dict[dictSize++] = freq;
}
| private void | addWordRec(com.android.tools.dict.MakeBinaryDictionary$CharNode parent, java.lang.String word, int charAt, int occur)
CharNode child = null;
char data = word.charAt(charAt);
if (parent.children == null) {
parent.children = new ArrayList<CharNode>();
} else {
for (int i = 0; i < parent.children.size(); i++) {
CharNode node = parent.children.get(i);
if (node.data == data) {
child = node;
break;
}
}
}
if (child == null) {
child = new CharNode();
parent.children.add(child);
}
child.data = data;
if (child.freq == 0) child.freq = occur;
if (word.length() > charAt + 1) {
addWordRec(child, word, charAt + 1, occur);
} else {
child.terminal = true;
child.freq = occur;
}
| private void | addWordTop(java.lang.String word, int occur)
if (occur > 255) occur = 255;
char firstChar = word.charAt(0);
int index = indexOf(roots, firstChar);
if (index == -1) {
CharNode newNode = new CharNode();
newNode.data = firstChar;
newNode.freq = occur;
index = roots.size();
roots.add(newNode);
} else {
roots.get(index).freq += occur;
}
if (word.length() > 1) {
addWordRec(roots.get(index), word, 1, occur);
} else {
roots.get(index).terminal = true;
}
| private int | indexOf(java.util.List children, char c)
if (children == null) {
return -1;
}
for (int i = 0; i < children.size(); i++) {
if (children.get(i).data == c) {
return i;
}
}
return -1;
| public static void | main(java.lang.String[] args)
if (args.length < 2) {
usage();
} else {
new MakeBinaryDictionary(args[0], args[1]);
}
| private void | populateDictionary(java.lang.String filename)
roots = new ArrayList<CharNode>();
try {
SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
parser.parse(new File(filename), new DefaultHandler() {
boolean inWord;
int freq;
@Override
public void startElement(String uri, String localName,
String qName, Attributes attributes) {
if (qName.equals("w")) {
inWord = true;
freq = Integer.parseInt(attributes.getValue(0));
}
}
@Override
public void characters(char[] data, int offset, int length) {
// Ignore other whitespace
if (!inWord) return;
// Ignore one letter words
if (length < 2) return;
mWordCount++;
String word = new String(data, offset, length);
addWordTop(word, freq);
}
@Override
public void endElement(String uri, String localName,
String qName) {
if (qName.equals("w")) inWord = false;
}
});
} catch (Exception ioe) {
System.err.println("Exception in parsing\n" + ioe);
ioe.printStackTrace();
}
System.out.println("Nodes = " + CharNode.sNodes);
| void | showWord(char[] word, int size, int freq)
System.out.print(new String(word, 0, size) + " " + freq + "\n");
| void | traverseDict(int pos, char[] word, int depth)
int count = dict[pos++] & 0xFF;
for (int i = 0; i < count; i++) {
char c = (char) (dict[pos++] & 0xFF);
if (c == 0xFF) {
c = (char) (((dict[pos] & 0xFF) << 8) | (dict[pos+1] & 0xFF));
pos += 2;
}
word[depth] = c;
boolean terminal = (dict[pos] & 0x80) > 0;
int address = 0;
if ((dict[pos] & (FLAG_ADDRESS_MASK >> 16)) > 0) {
address =
((dict[pos + 0] & (FLAG_ADDRESS_MASK >> 16)) << 16)
| ((dict[pos + 1] & 0xFF) << 8)
| ((dict[pos + 2] & 0xFF));
pos += 2;
}
pos++;
if (terminal) {
showWord(word, depth + 1, dict[pos] & 0xFF);
pos++;
}
if (address != 0) {
traverseDict(address, word, depth + 1);
}
}
| private void | updateNodeAddress(int nodeAddress, com.android.tools.dict.MakeBinaryDictionary$CharNode node, int childrenAddress)
if ((dict[nodeAddress] & 0xFF) == 0xFF) { // 3 byte character
nodeAddress += 2;
}
childrenAddress = ADDRESS_MASK & childrenAddress;
if (childrenAddress == 0) {
nullChildrenCount++;
} else {
childrenAddress |= FLAG_ADDRESS_MASK;
}
if (node.terminal) {
childrenAddress |= FLAG_TERMINAL_MASK;
} else {
notTerminalCount++;
}
dict[nodeAddress + 1] = (byte) (childrenAddress >> 16);
if ((childrenAddress & FLAG_ADDRESS_MASK) != 0) {
dict[nodeAddress + 2] = (byte) ((childrenAddress & 0xFF00) >> 8);
dict[nodeAddress + 3] = (byte) ((childrenAddress & 0xFF));
}
| public static void | usage()
System.err.println("Usage: makedict <src.xml> <dest.dict>");
System.exit(-1);
| void | writeToDict(java.lang.String dictFilename)
// 4MB max, 22-bit offsets
dict = new byte[4 * 1024 * 1024]; // 4MB upper limit. Actual is probably
// < 1MB in most cases, as there is a limit in the
// resource size in apks.
dictSize = 0;
writeWordsRec(roots);
System.out.println("Dict Size = " + dictSize);
try {
FileOutputStream fos = new FileOutputStream(dictFilename);
fos.write(dict, 0, dictSize);
fos.close();
} catch (IOException ioe) {
System.err.println("Error writing dict file:" + ioe);
}
| void | writeWordsRec(java.util.List children)
if (children == null || children.size() == 0) {
return;
}
final int childCount = children.size();
addCount(childCount);
//int childrenStart = dictSize;
int[] childrenAddresses = new int[childCount];
for (int j = 0; j < childCount; j++) {
CharNode node = children.get(j);
childrenAddresses[j] = dictSize;
addNode(node);
}
for (int j = 0; j < childCount; j++) {
CharNode node = children.get(j);
int nodeAddress = childrenAddresses[j];
int cacheDictSize = dictSize;
writeWordsRec(node.children);
updateNodeAddress(nodeAddress, node, node.children != null
? cacheDictSize : 0);
}
|
|