From eec2e51e2cbc9e69739187557846a439ed74325e Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Mon, 26 Dec 2011 19:00:00 +0900
Subject: [PATCH] Add shortcut support to the in-memory format of makedict (B3)

Change-Id: Icb8427c82694e24c8d08a5376157c7d1444af979
---
 .../latin/BinaryDictInputOutput.java          | 16 ++++--
 .../inputmethod/latin/FusionDictionary.java   | 54 +++++++++++++------
 .../inputmethod/latin/XmlDictInputOutput.java |  3 +-
 .../latin/BinaryDictInputOutputTest.java      | 10 ++--
 4 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
index 92f402d3e8..b1d9cc02d5 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
@@ -44,8 +44,9 @@ public class BinaryDictInputOutput {
      * a |                                     11 = 3 bytes     : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
      * g | has several chars ?         1 bit, 1 = yes, 0 = no   : FLAG_HAS_MULTIPLE_CHARS
      * s | has a terminal ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_TERMINAL
-     *   | reserved                    1 bit, 1 = yes, 0 = no
+     *   | has shortcut targets ?      1 bit, 1 = yes, 0 = no   : FLAG_HAS_SHORTCUT_TARGETS
      *   | has bigrams ?               1 bit, 1 = yes, 0 = no   : FLAG_HAS_BIGRAMS
+     *   | is shortcut only ?          1 bit, 1 = yes, 0 = no   : FLAG_IS_SHORTCUT_ONLY
      *
      * c | IF FLAG_HAS_MULTIPLE_CHARS
      * h |   char, char, char, char    n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
@@ -71,6 +72,8 @@ public class BinaryDictInputOutput {
      * d
      * dress
      *
+     *   | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS
+     *   | shortcut targets address list
      *   | IF FLAG_IS_TERMINAL && FLAG_HAS_BIGRAMS
      *   | bigrams address list
      *
@@ -126,7 +129,9 @@ public class BinaryDictInputOutput {
     private static final int FLAG_HAS_MULTIPLE_CHARS = 0x20;
 
     private static final int FLAG_IS_TERMINAL = 0x10;
+    private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
     private static final int FLAG_HAS_BIGRAMS = 0x04;
+    private static final int FLAG_IS_SHORTCUT_ONLY = 0x02;
 
     private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
     private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
@@ -942,11 +947,13 @@ public class BinaryDictInputOutput {
                     source.seek(currentPosition);
                 }
                 nodeContents.add(
-                        new CharGroup(info.mCharacters, bigrams, info.mFrequency,
+                        // TODO: read and pass the shortcut targets
+                        new CharGroup(info.mCharacters, null, bigrams, info.mFrequency,
                         children));
             } else {
+                // TODO: read and pass the shortcut targets
                 nodeContents.add(
-                        new CharGroup(info.mCharacters, bigrams, info.mFrequency));
+                        new CharGroup(info.mCharacters, null, bigrams, info.mFrequency));
             }
             groupOffset = info.mEndAddress;
         }
@@ -996,7 +1003,8 @@ public class BinaryDictInputOutput {
                 new FusionDictionary.DictionaryOptions());
         if (null != dict) {
             for (Word w : dict) {
-                newDict.add(w.mWord, w.mFrequency, w.mBigrams);
+                // TODO: pass the shortcut targets
+                newDict.add(w.mWord, w.mFrequency, null, w.mBigrams);
             }
         }
 
diff --git a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
index f6220eea2b..2f6b2c3713 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
@@ -68,7 +68,7 @@ public class FusionDictionary implements Iterable<Word> {
     }
 
     /**
-     * A group of characters, with a frequency, shortcuts, bigrams, and children.
+     * A group of characters, with a frequency, shortcut targets, bigrams, and children.
      *
      * This is the central class of the in-memory representation. A CharGroup is what can
      * be seen as a traditional "trie node", except it can hold several characters at the
@@ -82,6 +82,7 @@ public class FusionDictionary implements Iterable<Word> {
     public static class CharGroup {
         public static final int NOT_A_TERMINAL = -1;
         final int mChars[];
+        final ArrayList<WeightedString> mShortcutTargets;
         final ArrayList<WeightedString> mBigrams;
         final int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
         Node mChildren;
@@ -89,18 +90,20 @@ public class FusionDictionary implements Iterable<Word> {
         int mCachedSize;
         int mCachedAddress;
 
-        public CharGroup(final int[] chars,
+        public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
                 final ArrayList<WeightedString> bigrams, final int frequency) {
             mChars = chars;
             mFrequency = frequency;
+            mShortcutTargets = shortcutTargets;
             mBigrams = bigrams;
             mChildren = null;
         }
 
-        public CharGroup(final int[] chars,
+        public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
                 final ArrayList<WeightedString> bigrams, final int frequency, final Node children) {
             mChars = chars;
             mFrequency = frequency;
+            mShortcutTargets = shortcutTargets;
             mBigrams = bigrams;
             mChildren = children;
         }
@@ -165,18 +168,29 @@ public class FusionDictionary implements Iterable<Word> {
      *
      * @param word the word to add.
      * @param frequency the frequency of the word, in the range [0..255].
+     * @param shortcutTargets a list of shortcut targets for this word, or null.
      * @param bigrams a list of bigrams, or null.
      */
-    public void add(String word, int frequency, ArrayList<WeightedString> bigrams) {
+    public void add(final String word, final int frequency,
+            final ArrayList<WeightedString> shortcutTargets,
+            final ArrayList<WeightedString> bigrams) {
+        if (null != shortcutTargets) {
+            for (WeightedString target : shortcutTargets) {
+                final CharGroup t = findWordInTree(mRoot, target.mWord);
+                if (null == t) {
+                    add(getCodePoints(target.mWord), 0, null, null);
+                }
+            }
+        }
         if (null != bigrams) {
             for (WeightedString bigram : bigrams) {
                 final CharGroup t = findWordInTree(mRoot, bigram.mWord);
                 if (null == t) {
-                    add(getCodePoints(bigram.mWord), 0, null);
+                    add(getCodePoints(bigram.mWord), 0, null, null);
                 }
             }
         }
-        add(getCodePoints(word), frequency, bigrams);
+        add(getCodePoints(word), frequency, shortcutTargets, bigrams);
     }
 
     /**
@@ -200,14 +214,17 @@ public class FusionDictionary implements Iterable<Word> {
     /**
      * Add a word to this dictionary.
      *
-     * The bigrams, if any, have to be in the dictionary already. If they aren't,
+     * The shortcuts and bigrams, if any, have to be in the dictionary already. If they aren't,
      * an exception is thrown.
      *
      * @param word the word, as an int array.
      * @param frequency the frequency of the word, in the range [0..255].
+     * @param shortcutTargets an optional list of shortcut targets for this word (null if none).
      * @param bigrams an optional list of bigrams for this word (null if none).
      */
-    private void add(int[] word, int frequency, ArrayList<WeightedString> bigrams) {
+    private void add(final int[] word, final int frequency,
+            final ArrayList<WeightedString> shortcutTargets,
+            final ArrayList<WeightedString> bigrams) {
         assert(frequency >= 0 && frequency <= 255);
         Node currentNode = mRoot;
         int charIndex = 0;
@@ -231,7 +248,8 @@ public class FusionDictionary implements Iterable<Word> {
             // No node at this point to accept the word. Create one.
             final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
             final CharGroup newGroup = new CharGroup(
-                    Arrays.copyOfRange(word, charIndex, word.length), bigrams, frequency);
+                    Arrays.copyOfRange(word, charIndex, word.length),
+                    shortcutTargets, bigrams, frequency);
             currentNode.mData.add(insertionIndex, newGroup);
             checkStack(currentNode);
         } else {
@@ -245,7 +263,7 @@ public class FusionDictionary implements Iterable<Word> {
                                 + new String(word, 0, word.length));
                     } else {
                         final CharGroup newNode = new CharGroup(currentGroup.mChars,
-                                bigrams, frequency, currentGroup.mChildren);
+                                shortcutTargets, bigrams, frequency, currentGroup.mChildren);
                         currentNode.mData.set(nodeIndex, newNode);
                         checkStack(currentNode);
                     }
@@ -254,7 +272,7 @@ public class FusionDictionary implements Iterable<Word> {
                     // We only have to create a new node and add it to the end of this.
                     final CharGroup newNode = new CharGroup(
                             Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
-                                    bigrams, frequency);
+                                    shortcutTargets, bigrams, frequency);
                     currentGroup.mChildren = new Node();
                     currentGroup.mChildren.mData.add(newNode);
                 }
@@ -268,7 +286,8 @@ public class FusionDictionary implements Iterable<Word> {
                                     + new String(word, 0, word.length));
                         }
                         final CharGroup newGroup = new CharGroup(word,
-                                currentGroup.mBigrams, frequency, currentGroup.mChildren);
+                                currentGroup.mShortcutTargets, currentGroup.mBigrams,
+                                frequency, currentGroup.mChildren);
                         currentNode.mData.set(nodeIndex, newGroup);
                     }
                 } else {
@@ -277,7 +296,7 @@ public class FusionDictionary implements Iterable<Word> {
                     Node newChildren = new Node();
                     final CharGroup newOldWord = new CharGroup(
                             Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
-                                    currentGroup.mChars.length),
+                                    currentGroup.mChars.length), currentGroup.mShortcutTargets,
                             currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren);
                     newChildren.mData.add(newOldWord);
 
@@ -285,14 +304,14 @@ public class FusionDictionary implements Iterable<Word> {
                     if (charIndex + differentCharIndex >= word.length) {
                         newParent = new CharGroup(
                                 Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
-                                        bigrams, frequency, newChildren);
+                                        shortcutTargets, bigrams, frequency, newChildren);
                     } else {
                         newParent = new CharGroup(
                                 Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
-                                        null, -1, newChildren);
+                                        null, null, -1, newChildren);
                         final CharGroup newWord = new CharGroup(
                                 Arrays.copyOfRange(word, charIndex + differentCharIndex,
-                                        word.length), bigrams, frequency);
+                                        word.length), shortcutTargets, bigrams, frequency);
                         final int addIndex = word[charIndex + differentCharIndex]
                                 > currentGroup.mChars[differentCharIndex] ? 1 : 0;
                         newChildren.mData.add(addIndex, newWord);
@@ -355,7 +374,7 @@ public class FusionDictionary implements Iterable<Word> {
      */
     private static int findInsertionIndex(final Node node, int character) {
         final List data = node.mData;
-        final CharGroup reference = new CharGroup(new int[] { character }, null, 0);
+        final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0);
         int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
         return result >= 0 ? result : -result - 1;
     }
@@ -573,6 +592,7 @@ public class FusionDictionary implements Iterable<Word> {
                     }
                     if (currentGroup.mFrequency >= 0)
                         return new Word(mCurrentString.toString(), currentGroup.mFrequency,
+                                // TODO: pass the shortcut targets here
                                 currentGroup.mBigrams);
                 } else {
                     mPositions.removeLast();
diff --git a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
index 4720e9d106..19ed9d8d2d 100644
--- a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
@@ -107,7 +107,8 @@ public class XmlDictInputOutput {
         @Override
         public void endElement(String uri, String localName, String qName) {
             if (WORD == mState) {
-                mDictionary.add(mWord, mFreq, mBigramsMap.get(mWord));
+                // TODO: pass the shortcut targets
+                mDictionary.add(mWord, mFreq, null, mBigramsMap.get(mWord));
                 mState = START;
             }
         }
diff --git a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
index 79cf14b2be..6ac046bbf5 100644
--- a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
+++ b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java
@@ -39,11 +39,11 @@ public class BinaryDictInputOutputTest extends TestCase {
     // that it does not contain any duplicates.
     public void testFlattenNodes() {
         final FusionDictionary dict = new FusionDictionary();
-        dict.add("foo", 1, null);
-        dict.add("fta", 1, null);
-        dict.add("ftb", 1, null);
-        dict.add("bar", 1, null);
-        dict.add("fool", 1, null);
+        dict.add("foo", 1, null, null);
+        dict.add("fta", 1, null, null);
+        dict.add("ftb", 1, null, null);
+        dict.add("bar", 1, null, null);
+        dict.add("fool", 1, null, null);
         final ArrayList<Node> result = BinaryDictInputOutput.flattenTree(dict.mRoot);
         assertEquals(4, result.size());
         while (!result.isEmpty()) {
-- 
GitLab