diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index 9dcffd4e287227dff862a058bcef64351648c569..3d89226c0c79dd1f95932cbe8747857ce917314d 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -159,7 +159,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { // TODO: Create "cache dictionary" to cache fresh words for frequently updated dictionaries, // considering performance regression. protected void addWord(final String word, final int frequency) { - mFusionDictionary.add(word, frequency, null, null); + mFusionDictionary.add(word, frequency, null /* shortcutTargets */); } /** diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index cc98010fb4fa9fdc56ac8ad2d8d858f2f12aa91f..88da7b0d880b1c68bca0e3fe96dabbb5c6734e1f 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -1317,8 +1317,16 @@ public class BinaryDictInputOutput { 0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG), 0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG))); if (null != dict) { - for (Word w : dict) { - newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mBigrams); + for (final Word w : dict) { + newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets); + } + for (final Word w : dict) { + // By construction a binary dictionary may not have bigrams pointing to + // words that are not also registered as unigrams so we don't have to avoid + // them explicitly here. + for (final WeightedString bigram : w.mBigrams) { + newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency); + } } } diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index 40bcfc3aac05f0a94d45ffae44b113f5f889d11e..c293b2ba483239e54ea6fa8c831dbf142a2d63d5 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -286,7 +286,7 @@ public class FusionDictionary implements Iterable<Word> { for (WeightedString word : words) { final CharGroup t = findWordInTree(mRoot, word.mWord); if (null == t) { - add(getCodePoints(word.mWord), 0, null, null); + add(getCodePoints(word.mWord), 0, null); } } } @@ -305,12 +305,8 @@ public class FusionDictionary implements Iterable<Word> { * @param bigrams a list of bigrams, or null. */ public void add(final String word, final int frequency, - final ArrayList<WeightedString> shortcutTargets, - final ArrayList<WeightedString> bigrams) { - if (null != bigrams) { - addNeutralWords(bigrams); - } - add(getCodePoints(word), frequency, shortcutTargets, bigrams); + final ArrayList<WeightedString> shortcutTargets) { + add(getCodePoints(word), frequency, shortcutTargets); } /** @@ -344,7 +340,7 @@ public class FusionDictionary implements Iterable<Word> { final CharGroup charGroup2 = findWordInTree(mRoot, word2); if (charGroup2 == null) { // TODO: refactor with the identical code in addNeutralWords - add(getCodePoints(word2), 0, null, null); + add(getCodePoints(word2), 0, null); } charGroup.addBigram(word2, frequency); } else { @@ -355,17 +351,15 @@ public class FusionDictionary implements Iterable<Word> { /** * Add a word to this dictionary. * - * The shortcuts and bigrams, if any, have to be in the dictionary already. If they aren't, + * The shortcuts, if any, have to be in the dictionary already. If they aren't, * an exception is thrown. * * @param word the word, as an int array. * @param frequency the frequency of the word, in the range [0..255]. * @param shortcutTargets an optional list of shortcut targets for this word (null if none). - * @param bigrams an optional list of bigrams for this word (null if none). */ private void add(final int[] word, final int frequency, - final ArrayList<WeightedString> shortcutTargets, - final ArrayList<WeightedString> bigrams) { + final ArrayList<WeightedString> shortcutTargets) { assert(frequency >= 0 && frequency <= 255); Node currentNode = mRoot; int charIndex = 0; @@ -390,7 +384,7 @@ public class FusionDictionary implements Iterable<Word> { final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]); final CharGroup newGroup = new CharGroup( Arrays.copyOfRange(word, charIndex, word.length), - shortcutTargets, bigrams, frequency); + shortcutTargets, null /* bigrams */, frequency); currentNode.mData.add(insertionIndex, newGroup); checkStack(currentNode); } else { @@ -400,21 +394,21 @@ public class FusionDictionary implements Iterable<Word> { // The new word is a prefix of an existing word, but the node on which it // should end already exists as is. Since the old CharNode was not a terminal, // make it one by filling in its frequency and other attributes - currentGroup.update(frequency, shortcutTargets, bigrams); + currentGroup.update(frequency, shortcutTargets, null); } else { // The new word matches the full old word and extends past it. // We only have to create a new node and add it to the end of this. final CharGroup newNode = new CharGroup( Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), - shortcutTargets, bigrams, frequency); + shortcutTargets, null /* bigrams */, frequency); currentGroup.mChildren = new Node(); currentGroup.mChildren.mData.add(newNode); } } else { if (0 == differentCharIndex) { // Exact same word. Update the frequency if higher. This will also add the - // new bigrams to the existing bigram list if it already exists. - currentGroup.update(frequency, shortcutTargets, bigrams); + // new shortcuts to the existing shortcut list if it already exists. + currentGroup.update(frequency, shortcutTargets, null); } else { // Partial prefix match only. We have to replace the current node with a node // containing the current prefix and create two new ones for the tails. @@ -429,14 +423,14 @@ public class FusionDictionary implements Iterable<Word> { if (charIndex + differentCharIndex >= word.length) { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), - shortcutTargets, bigrams, frequency, newChildren); + shortcutTargets, null /* bigrams */, frequency, newChildren); } else { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), - null, null, -1, newChildren); - final CharGroup newWord = new CharGroup( - Arrays.copyOfRange(word, charIndex + differentCharIndex, - word.length), shortcutTargets, bigrams, frequency); + null /* shortcutTargets */, null /* bigrams */, -1, newChildren); + final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word, + charIndex + differentCharIndex, word.length), + shortcutTargets, null /* bigrams */, frequency); final int addIndex = word[charIndex + differentCharIndex] > currentGroup.mChars[differentCharIndex] ? 1 : 0; newChildren.mData.add(addIndex, newWord); @@ -494,7 +488,8 @@ public class FusionDictionary implements Iterable<Word> { */ private static int findInsertionIndex(final Node node, int character) { final ArrayList<CharGroup> data = node.mData; - final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0); + final CharGroup reference = new CharGroup(new int[] { character }, + null /* shortcutTargets */, null /* bigrams */, 0); int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR); return result >= 0 ? result : -result - 1; } diff --git a/tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java index d1d2a9ca4c6d18b21d9bedbba7028db252ead24b..d86719a1d8f804f3a44909b46628c31616cec696 100644 --- a/tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java +++ b/tools/makedict/src/com/android/inputmethod/latin/makedict/XmlDictInputOutput.java @@ -72,19 +72,15 @@ public class XmlDictInputOutput { int mFreq; // the currently read freq String mWord; // the current word final HashMap<String, ArrayList<WeightedString>> mShortcutsMap; - final HashMap<String, ArrayList<WeightedString>> mBigramsMap; /** * Create the handler. * * @param shortcuts the shortcuts as a map. This may be empty, but may not be null. - * @param bigrams the bigrams as a map. This may be empty, but may not be null. */ - public UnigramHandler(final HashMap<String, ArrayList<WeightedString>> shortcuts, - final HashMap<String, ArrayList<WeightedString>> bigrams) { + public UnigramHandler(final HashMap<String, ArrayList<WeightedString>> shortcuts) { mDictionary = null; mShortcutsMap = shortcuts; - mBigramsMap = bigrams; mWord = ""; mState = START; mFreq = 0; @@ -94,7 +90,6 @@ public class XmlDictInputOutput { final FusionDictionary dict = mDictionary; mDictionary = null; mShortcutsMap.clear(); - mBigramsMap.clear(); mWord = ""; mState = START; mFreq = 0; @@ -143,7 +138,7 @@ public class XmlDictInputOutput { @Override public void endElement(String uri, String localName, String qName) { if (WORD == mState) { - mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), mBigramsMap.get(mWord)); + mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord)); mState = START; } } @@ -191,6 +186,7 @@ public class XmlDictInputOutput { } } + // This may return an empty map, but will never return null. public HashMap<String, ArrayList<WeightedString>> getAssocMap() { return mAssocMap; } @@ -211,6 +207,7 @@ public class XmlDictInputOutput { BIGRAM_FREQ_ATTRIBUTE); } + // As per getAssocMap(), this never returns null. public HashMap<String, ArrayList<WeightedString>> getBigramMap() { return getAssocMap(); } @@ -231,6 +228,7 @@ public class XmlDictInputOutput { TARGET_PRIORITY_ATTRIBUTE); } + // As per getAssocMap(), this never returns null. public HashMap<String, ArrayList<WeightedString>> getShortcutMap() { return getAssocMap(); } @@ -260,10 +258,19 @@ public class XmlDictInputOutput { if (null != shortcuts) parser.parse(shortcuts, shortcutHandler); final UnigramHandler unigramHandler = - new UnigramHandler(shortcutHandler.getShortcutMap(), - bigramHandler.getBigramMap()); + new UnigramHandler(shortcutHandler.getShortcutMap()); parser.parse(unigrams, unigramHandler); - return unigramHandler.getFinalDictionary(); + final FusionDictionary dict = unigramHandler.getFinalDictionary(); + final HashMap<String, ArrayList<WeightedString>> bigramMap = bigramHandler.getBigramMap(); + for (final String firstWord : bigramMap.keySet()) { + if (!dict.hasWord(firstWord)) continue; + final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord); + for (final WeightedString bigram : bigramList) { + if (!dict.hasWord(bigram.mWord)) continue; + dict.setBigram(firstWord, bigram.mWord, bigram.mFrequency); + } + } + return dict; } /** diff --git a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java index 191eb804df4ea162f9b9b4f441e2ea6ebedf5e53..24042f120d1c84f7b82aa63f843bf890cb673fe6 100644 --- a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java +++ b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java @@ -43,11 +43,11 @@ public class BinaryDictInputOutputTest extends TestCase { final FusionDictionary dict = new FusionDictionary(new Node(), new DictionaryOptions(new HashMap<String, String>(), false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */)); - dict.add("foo", 1, null, null); - dict.add("fta", 1, null, null); - dict.add("ftb", 1, null, null); - dict.add("bar", 1, null, null); - dict.add("fool", 1, null, null); + dict.add("foo", 1, null); + dict.add("fta", 1, null); + dict.add("ftb", 1, null); + dict.add("bar", 1, null); + dict.add("fool", 1, null); final ArrayList<Node> result = BinaryDictInputOutput.flattenTree(dict.mRoot); assertEquals(4, result.size()); while (!result.isEmpty()) {