From 2ee70804e92b17016a2f042c4f6b0e94b5d23e88 Mon Sep 17 00:00:00 2001
From: Yuichiro Hanada <yhanada@google.com>
Date: Tue, 25 Sep 2012 20:48:25 +0900
Subject: [PATCH] Add moved char groups.

bug: 6669677

Change-Id: I372f841044fe8e076a50a80ac10b715e5f8fd4eb
---
 .../latin/makedict/BinaryDictIOUtils.java     | 11 +++++++--
 .../latin/makedict/BinaryDictInputOutput.java | 23 +++++++++++++++++--
 .../latin/makedict/FormatSpec.java            | 20 ++++++++++------
 3 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
index 19da5124a5..e2c1254cec 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java
@@ -85,7 +85,10 @@ public class BinaryDictIOUtils {
             }
             p.mPosition++;
 
-            if (info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) { // found word
+            final boolean isMovedGroup = BinaryDictInputOutput.isMovedGroup(info.mFlags,
+                    formatOptions);
+            if (!isMovedGroup
+                    && info.mFrequency != FusionDictionary.CharGroup.NOT_A_TERMINAL) {// found word
                 words.put(info.mOriginalAddress, new String(pushedChars, 0, index));
                 frequencies.put(info.mOriginalAddress, info.mFrequency);
                 if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams);
@@ -109,7 +112,7 @@ public class BinaryDictIOUtils {
                 p.mAddress = buffer.position();
             }
 
-            if (BinaryDictInputOutput.hasChildrenAddress(info.mChildrenAddress)) {
+            if (!isMovedGroup && BinaryDictInputOutput.hasChildrenAddress(info.mChildrenAddress)) {
                 Position childrenPos = new Position(info.mChildrenAddress + headerSize, index);
                 stack.push(childrenPos);
             }
@@ -168,6 +171,10 @@ public class BinaryDictIOUtils {
                     final int charGroupPos = buffer.position();
                     final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer,
                             buffer.position(), header.mFormatOptions);
+                    if (BinaryDictInputOutput.isMovedGroup(currentInfo.mFlags,
+                            header.mFormatOptions)) {
+                        continue;
+                    }
                     boolean same = true;
                     for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
                             p < currentInfo.mCharacters.length;
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index f9339de085..9fc6942181 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -53,6 +53,7 @@ public class BinaryDictInputOutput {
     // If the number of passes exceeds this number, makedict bails with an exception on
     // suspicion that a bug might be causing an infinite loop.
     private static final int MAX_PASSES = 24;
+    private static final int MAX_JUMPS = 12;
 
     public interface FusionDictionaryBufferInterface {
         public int readUnsignedByte();
@@ -394,6 +395,13 @@ public class BinaryDictInputOutput {
         return FormatSpec.NO_CHILDREN_ADDRESS != address;
     }
 
+    /**
+     * Helper method to check whether the group is moved.
+     */
+    public static boolean isMovedGroup(final int flags, final FormatOptions options) {
+        return options.mSupportsDynamicUpdate && ((flags & FormatSpec.FLAG_IS_MOVED) == 1);
+    }
+
     /**
      * Helper method to check whether the dictionary can be updated dynamically.
      */
@@ -1374,8 +1382,18 @@ public class BinaryDictInputOutput {
         int index = FormatSpec.MAX_WORD_LENGTH - 1;
         // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
         for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
-            buffer.position(currentAddress + headerSize);
-            final CharGroupInfo currentInfo = readCharGroup(buffer, currentAddress, options);
+            CharGroupInfo currentInfo;
+            int loopCounter = 0;
+            do {
+                buffer.position(currentAddress + headerSize);
+                currentInfo = readCharGroup(buffer, currentAddress, options);
+                if (isMovedGroup(currentInfo.mFlags, options)) {
+                    currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
+                }
+                if (DBG && loopCounter++ > MAX_JUMPS) {
+                    MakedictLog.d("Too many jumps - probably a bug");
+                }
+            } while (isMovedGroup(currentInfo.mFlags, options));
             for (int i = 0; i < currentInfo.mCharacters.length; ++i) {
                 sGetWordBuffer[index--] =
                         currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1];
@@ -1457,6 +1475,7 @@ public class BinaryDictInputOutput {
             int groupOffset = nodeHeadPosition + getGroupCountSize(count);
             for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
                 CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
+                if (isMovedGroup(info.mFlags, options)) continue;
                 ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
                 ArrayList<WeightedString> bigrams = null;
                 if (null != info.mBigrams) {
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index cab0661f6e..35311f0c24 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -52,13 +52,18 @@ public final class FormatSpec {
      */
 
     /* Node(CharGroup) layout is as follows:
-     *   | addressType                         xx     : mask with MASK_GROUP_ADDRESS_TYPE
-     *                                 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
-     * f |                                     01 = 1 byte      : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE
-     * l |                                     10 = 2 bytes     : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES
-     * a |                                     11 = 3 bytes     : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
-     * g | has several chars ?         1 bit, 1 = yes, 0 = no   : FLAG_HAS_MULTIPLE_CHARS
-     * s | has a terminal ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_TERMINAL
+     *   | IF !SUPPORTS_DYNAMIC_UPDATE
+     *   |   addressType                         xx     : mask with MASK_GROUP_ADDRESS_TYPE
+     *   |                           2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
+     * f |                                   01 = 1 byte      : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE
+     * l |                                   10 = 2 bytes     : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES
+     * a |                                   11 = 3 bytes     : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
+     * g | ELSE
+     * s |   is moved ?              2 bits, 11 = no
+     *   |                                   01 = yes
+     *   |                        the new address is stored in the same place as the parent address
+     *   | has several chars ?         1 bit, 1 = yes, 0 = no   : FLAG_HAS_MULTIPLE_CHARS
+     *   | has a terminal ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_TERMINAL
      *   | has shortcut targets ?      1 bit, 1 = yes, 0 = no   : FLAG_HAS_SHORTCUT_TARGETS
      *   | has bigrams ?               1 bit, 1 = yes, 0 = no   : FLAG_HAS_BIGRAMS
      *   | is not a word ?             1 bit, 1 = yes, 0 = no   : FLAG_IS_NOT_A_WORD
@@ -178,6 +183,7 @@ public final class FormatSpec {
     static final int FLAG_HAS_BIGRAMS = 0x04;
     static final int FLAG_IS_NOT_A_WORD = 0x02;
     static final int FLAG_IS_BLACKLISTED = 0x01;
+    static final int FLAG_IS_MOVED = 0x40;
 
     static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
     static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
-- 
GitLab