From 9168ab60cf08385554a7a8255e40698988ee37f6 Mon Sep 17 00:00:00 2001
From: Akifumi Yoshimoto <akifumi@google.com>
Date: Wed, 10 Sep 2014 18:33:24 +0900
Subject: [PATCH] Include a code point table in the binary dictionary.

Bug:17097992
Change-Id: I677a5eb3a704e4386f6573360e44ca335d81d2df
---
 .../latin/makedict/FormatSpec.java            |   4 +-
 .../BinaryDictDecoderEncoderTests.java        |   2 +-
 .../makedict/BinaryDictDecoderUtils.java      |  58 ++++++----
 .../makedict/BinaryDictEncoderUtils.java      | 101 ++++++++++++------
 .../latin/makedict/DictEncoder.java           |   4 +-
 .../latin/makedict/Ver2DictDecoder.java       |   9 +-
 .../latin/makedict/Ver2DictEncoder.java       |  52 ++++++---
 .../latin/makedict/Ver4DictEncoder.java       |   4 +-
 .../latin/dicttool/DictionaryMaker.java       |   2 +-
 9 files changed, 162 insertions(+), 74 deletions(-)

diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 2661d5d48a..34edfa0da7 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -163,13 +163,15 @@ public final class FormatSpec {
     static final int NOT_A_VERSION_NUMBER = -1;
 
     // These MUST have the same values as the relevant constants in format_utils.h.
-    // From version 4 on, we use version * 100 + revision as a version number. That allows
+    // From version 2.01 on, we use version * 100 + revision as a version number. That allows
     // us to change the format during development while having testing devices remove
     // older files with each upgrade, while still having a readable versioning scheme.
     // When we bump up the dictionary format version, we should update
     // ExpandableDictionary.needsToMigrateDictionary() and
     // ExpandableDictionary.matchesExpectedBinaryDictFormatVersionForThisType().
     public static final int VERSION2 = 2;
+    public static final int VERSION201 = 201;
+    public static final int MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE = VERSION201;
     // Dictionary version used for testing.
     public static final int VERSION4_ONLY_FOR_TESTING = 399;
     public static final int VERSION401 = 401;
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
index 406046a745..fa20d06c70 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
@@ -312,7 +312,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
         final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer);
         for (final String word : sWords) {
             Arrays.fill(buffer, (byte) 0);
-            CharEncoding.writeString(buffer, 0, word);
+            CharEncoding.writeString(buffer, 0, word, null);
             dictBuffer.position(0);
             final String str = CharEncoding.readString(dictBuffer);
             assertEquals(word, str);
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
index 96604a1977..1f3ee19af8 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java
@@ -17,11 +17,11 @@
 package com.android.inputmethod.latin.makedict;
 
 import com.android.inputmethod.annotations.UsedForTesting;
-
 import java.io.File;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
+import java.util.HashMap;
 
 /**
  * Decodes binary files for a FusionDictionary.
@@ -109,15 +109,19 @@ public final class BinaryDictDecoderUtils {
      * A class grouping utility function for our specific character encoding.
      */
     static final class CharEncoding {
-        private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
-        private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF;
 
         /**
          * Helper method to find out whether this code fits on one byte
          */
-        private static boolean fitsOnOneByte(final int character) {
-            return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE
-                    && character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
+        private static boolean fitsOnOneByte(int character,
+                final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+            if (codePointToOneByteCodeMap != null) {
+                if (codePointToOneByteCodeMap.containsKey(character)) {
+                    character = codePointToOneByteCodeMap.get(character);
+                }
+            }
+            return character >= FormatSpec.MINIMAL_ONE_BYTE_CHARACTER_VALUE
+                    && character <= FormatSpec.MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
         }
 
         /**
@@ -137,9 +141,10 @@ public final class BinaryDictDecoderUtils {
          * @param character the character code.
          * @return the size in binary encoded-form, either 1 or 3 bytes.
          */
-        static int getCharSize(final int character) {
+        static int getCharSize(final int character,
+                final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
             // See char encoding in FusionDictionary.java
-            if (fitsOnOneByte(character)) return 1;
+            if (fitsOnOneByte(character, codePointToOneByteCodeMap)) return 1;
             if (FormatSpec.INVALID_CHARACTER == character) return 1;
             return 3;
         }
@@ -147,9 +152,10 @@ public final class BinaryDictDecoderUtils {
         /**
          * Compute the byte size of a character array.
          */
-        static int getCharArraySize(final int[] chars) {
+        static int getCharArraySize(final int[] chars,
+                final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
             int size = 0;
-            for (int character : chars) size += getCharSize(character);
+            for (int character : chars) size += getCharSize(character, codePointToOneByteCodeMap);
             return size;
         }
 
@@ -159,11 +165,19 @@ public final class BinaryDictDecoderUtils {
          * @param codePoints the code point array to write.
          * @param buffer the byte buffer to write to.
          * @param index the index in buffer to write the character array to.
+         * @param codePointToOneByteCodeMap the map to convert the code point.
          * @return the index after the last character.
          */
-        static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) {
+        static int writeCharArray(final int[] codePoints, final byte[] buffer, int index,
+                final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
             for (int codePoint : codePoints) {
-                if (1 == getCharSize(codePoint)) {
+                if (codePointToOneByteCodeMap != null) {
+                    if (codePointToOneByteCodeMap.containsKey(codePoint)) {
+                        // Convert code points
+                        codePoint = codePointToOneByteCodeMap.get(codePoint);
+                    }
+                }
+                if (1 == getCharSize(codePoint, codePointToOneByteCodeMap)) {
                     buffer[index++] = (byte)codePoint;
                 } else {
                     buffer[index++] = (byte)(0xFF & (codePoint >> 16));
@@ -184,12 +198,19 @@ public final class BinaryDictDecoderUtils {
          * @param word the string to write.
          * @return the size written, in bytes.
          */
-        static int writeString(final byte[] buffer, final int origin, final String word) {
+        static int writeString(final byte[] buffer, final int origin, final String word,
+                final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
             final int length = word.length();
             int index = origin;
             for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
-                final int codePoint = word.codePointAt(i);
-                if (1 == getCharSize(codePoint)) {
+                int codePoint = word.codePointAt(i);
+                if (codePointToOneByteCodeMap != null) {
+                    if (codePointToOneByteCodeMap.containsKey(codePoint)) {
+                        // Convert code points
+                        codePoint = codePointToOneByteCodeMap.get(codePoint);
+                    }
+                }
+                if (1 == getCharSize(codePoint, codePointToOneByteCodeMap)) {
                     buffer[index++] = (byte)codePoint;
                 } else {
                     buffer[index++] = (byte)(0xFF & (codePoint >> 16));
@@ -210,12 +231,13 @@ public final class BinaryDictDecoderUtils {
          * @param word the string to write.
          * @return the size written, in bytes.
          */
-        static int writeString(final OutputStream stream, final String word) throws IOException {
+        static int writeString(final OutputStream stream, final String word,
+                final HashMap<Integer, Integer> codePointToOneByteCodeMap) throws IOException {
             final int length = word.length();
             int written = 0;
             for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
                 final int codePoint = word.codePointAt(i);
-                final int charSize = getCharSize(codePoint);
+                final int charSize = getCharSize(codePoint, codePointToOneByteCodeMap);
                 if (1 == charSize) {
                     stream.write((byte) codePoint);
                 } else {
@@ -253,7 +275,7 @@ public final class BinaryDictDecoderUtils {
          */
         static int readChar(final DictBuffer dictBuffer) {
             int character = dictBuffer.readUnsignedByte();
-            if (!fitsOnOneByte(character)) {
+            if (!fitsOnOneByte(character, null)) {
                 if (FormatSpec.PTNODE_CHARACTERS_TERMINATOR == character) {
                     return FormatSpec.INVALID_CHARACTER;
                 }
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
index 12290e6aa4..2d536d822d 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
@@ -61,8 +61,9 @@ public class BinaryDictEncoderUtils {
      * @param characters the character array
      * @return the size of the char array, including the terminator if any
      */
-    static int getPtNodeCharactersSize(final int[] characters) {
-        int size = CharEncoding.getCharArraySize(characters);
+    static int getPtNodeCharactersSize(final int[] characters,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+        int size = CharEncoding.getCharArraySize(characters, codePointToOneByteCodeMap);
         if (characters.length > 1) size += FormatSpec.PTNODE_TERMINATOR_SIZE;
         return size;
     }
@@ -76,8 +77,9 @@ public class BinaryDictEncoderUtils {
      * @param ptNode the PtNode
      * @return the size of the char array, including the terminator if any
      */
-    private static int getPtNodeCharactersSize(final PtNode ptNode) {
-        return getPtNodeCharactersSize(ptNode.mChars);
+    private static int getPtNodeCharactersSize(final PtNode ptNode,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+        return getPtNodeCharactersSize(ptNode.mChars, codePointToOneByteCodeMap);
     }
 
     /**
@@ -92,13 +94,14 @@ public class BinaryDictEncoderUtils {
     /**
      * Compute the size of a shortcut in bytes.
      */
-    private static int getShortcutSize(final WeightedString shortcut) {
+    private static int getShortcutSize(final WeightedString shortcut,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
         int size = FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE;
         final String word = shortcut.mWord;
         final int length = word.length();
         for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
             final int codePoint = word.codePointAt(i);
-            size += CharEncoding.getCharSize(codePoint);
+            size += CharEncoding.getCharSize(codePoint, codePointToOneByteCodeMap);
         }
         size += FormatSpec.PTNODE_TERMINATOR_SIZE;
         return size;
@@ -110,11 +113,12 @@ public class BinaryDictEncoderUtils {
      * This is known in advance and does not change according to position in the file
      * like address lists do.
      */
-    static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) {
+    static int getShortcutListSize(final ArrayList<WeightedString> shortcutList,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
         if (null == shortcutList || shortcutList.isEmpty()) return 0;
         int size = FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
         for (final WeightedString shortcut : shortcutList) {
-            size += getShortcutSize(shortcut);
+            size += getShortcutSize(shortcut, codePointToOneByteCodeMap);
         }
         return size;
     }
@@ -125,14 +129,16 @@ public class BinaryDictEncoderUtils {
      * @param ptNode the PtNode to compute the size of.
      * @return the maximum size of the PtNode.
      */
-    private static int getPtNodeMaximumSize(final PtNode ptNode) {
-        int size = getNodeHeaderSize(ptNode);
+    private static int getPtNodeMaximumSize(final PtNode ptNode,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+        int size = getNodeHeaderSize(ptNode, codePointToOneByteCodeMap);
         if (ptNode.isTerminal()) {
             // If terminal, one byte for the frequency.
             size += FormatSpec.PTNODE_FREQUENCY_SIZE;
         }
         size += FormatSpec.PTNODE_MAX_ADDRESS_SIZE; // For children address
-        size += getShortcutListSize(ptNode.mShortcutTargets);
+        // TODO: Use codePointToOneByteCodeMap for shortcuts.
+        size += getShortcutListSize(ptNode.mShortcutTargets, null /* codePointToOneByteCodeMap */);
         if (null != ptNode.mBigrams) {
             size += (FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE
                     + FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE)
@@ -148,10 +154,11 @@ public class BinaryDictEncoderUtils {
      *
      * @param ptNodeArray the node array to compute the maximum size of.
      */
-    private static void calculatePtNodeArrayMaximumSize(final PtNodeArray ptNodeArray) {
+    private static void calculatePtNodeArrayMaximumSize(final PtNodeArray ptNodeArray,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
         int size = getPtNodeCountSize(ptNodeArray);
         for (PtNode node : ptNodeArray.mData) {
-            final int nodeSize = getPtNodeMaximumSize(node);
+            final int nodeSize = getPtNodeMaximumSize(node, codePointToOneByteCodeMap);
             node.mCachedSize = nodeSize;
             size += nodeSize;
         }
@@ -163,8 +170,10 @@ public class BinaryDictEncoderUtils {
      *
      * @param ptNode the PtNode of which to compute the size of the header
      */
-    private static int getNodeHeaderSize(final PtNode ptNode) {
-        return FormatSpec.PTNODE_FLAGS_SIZE + getPtNodeCharactersSize(ptNode);
+    private static int getNodeHeaderSize(final PtNode ptNode,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+        return FormatSpec.PTNODE_FLAGS_SIZE + getPtNodeCharactersSize(ptNode,
+                codePointToOneByteCodeMap);
     }
 
     /**
@@ -367,7 +376,8 @@ public class BinaryDictEncoderUtils {
      * @return false if none of the cached addresses inside the node array changed, true otherwise.
      */
     private static boolean computeActualPtNodeArraySize(final PtNodeArray ptNodeArray,
-            final FusionDictionary dict) {
+            final FusionDictionary dict,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
         boolean changed = false;
         int size = getPtNodeCountSize(ptNodeArray);
         for (PtNode ptNode : ptNodeArray.mData) {
@@ -375,7 +385,7 @@ public class BinaryDictEncoderUtils {
             if (ptNode.mCachedAddressAfterUpdate != ptNode.mCachedAddressBeforeUpdate) {
                 changed = true;
             }
-            int nodeSize = getNodeHeaderSize(ptNode);
+            int nodeSize = getNodeHeaderSize(ptNode, codePointToOneByteCodeMap);
             if (ptNode.isTerminal()) {
                 nodeSize += FormatSpec.PTNODE_FREQUENCY_SIZE;
             }
@@ -383,7 +393,9 @@ public class BinaryDictEncoderUtils {
                 nodeSize += getByteSize(getOffsetToTargetNodeArrayDuringUpdate(ptNodeArray,
                         nodeSize + size, ptNode.mChildren));
             }
-            nodeSize += getShortcutListSize(ptNode.mShortcutTargets);
+            // TODO: Use codePointToOneByteCodeMap for shortcuts.
+            nodeSize += getShortcutListSize(ptNode.mShortcutTargets,
+                    null /* codePointToOneByteCodeMap */);
             if (null != ptNode.mBigrams) {
                 for (WeightedString bigram : ptNode.mBigrams) {
                     final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
@@ -454,10 +466,11 @@ public class BinaryDictEncoderUtils {
      * @return the same array it was passed. The nodes have been updated for address and size.
      */
     /* package */ static ArrayList<PtNodeArray> computeAddresses(final FusionDictionary dict,
-            final ArrayList<PtNodeArray> flatNodes) {
+            final ArrayList<PtNodeArray> flatNodes,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
         // First get the worst possible sizes and offsets
         for (final PtNodeArray n : flatNodes) {
-            calculatePtNodeArrayMaximumSize(n);
+            calculatePtNodeArrayMaximumSize(n, codePointToOneByteCodeMap);
         }
         final int offset = initializePtNodeArraysCachedAddresses(flatNodes);
 
@@ -472,7 +485,8 @@ public class BinaryDictEncoderUtils {
             for (final PtNodeArray ptNodeArray : flatNodes) {
                 ptNodeArray.mCachedAddressAfterUpdate = ptNodeArrayStartOffset;
                 final int oldNodeArraySize = ptNodeArray.mCachedSize;
-                final boolean changed = computeActualPtNodeArraySize(ptNodeArray, dict);
+                final boolean changed = computeActualPtNodeArraySize(ptNodeArray, dict,
+                        codePointToOneByteCodeMap);
                 final int newNodeArraySize = ptNodeArray.mCachedSize;
                 if (oldNodeArraySize < newNodeArraySize) {
                     throw new RuntimeException("Increased size ?!");
@@ -686,9 +700,10 @@ public class BinaryDictEncoderUtils {
                 + (frequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY);
     }
 
-    /* package */ static final int getChildrenPosition(final PtNode ptNode) {
+    /* package */ static final int getChildrenPosition(final PtNode ptNode,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
         int positionOfChildrenPosField = ptNode.mCachedAddressAfterUpdate
-                + getNodeHeaderSize(ptNode);
+                + getNodeHeaderSize(ptNode, codePointToOneByteCodeMap);
         if (ptNode.isTerminal()) {
             // A terminal node has the frequency.
             // If positionOfChildrenPosField is incorrect, we may crash when jumping to the children
@@ -705,10 +720,12 @@ public class BinaryDictEncoderUtils {
      * @param dict the dictionary the node array is a part of (for relative offsets).
      * @param dictEncoder the dictionary encoder.
      * @param ptNodeArray the node array to write.
+     * @param codePointToOneByteCodeMap the map to convert the code points.
      */
     @SuppressWarnings("unused")
     /* package */ static void writePlacedPtNodeArray(final FusionDictionary dict,
-            final DictEncoder dictEncoder, final PtNodeArray ptNodeArray) {
+            final DictEncoder dictEncoder, final PtNodeArray ptNodeArray,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
         // TODO: Make the code in common with BinaryDictIOUtils#writePtNode
         dictEncoder.setPosition(ptNodeArray.mCachedAddressAfterUpdate);
 
@@ -727,7 +744,7 @@ public class BinaryDictEncoderUtils {
                         + FormatSpec.MAX_TERMINAL_FREQUENCY
                         + " : " + ptNode.mProbabilityInfo.toString());
             }
-            dictEncoder.writePtNode(ptNode, dict);
+            dictEncoder.writePtNode(ptNode, dict, codePointToOneByteCodeMap);
         }
         if (dictEncoder.getPosition() != ptNodeArray.mCachedAddressAfterUpdate
                 + ptNodeArray.mCachedSize) {
@@ -834,12 +851,16 @@ public class BinaryDictEncoderUtils {
         // Write out the options.
         for (final String key : dict.mOptions.mAttributes.keySet()) {
             final String value = dict.mOptions.mAttributes.get(key);
-            CharEncoding.writeString(headerBuffer, key);
-            CharEncoding.writeString(headerBuffer, value);
+            CharEncoding.writeString(headerBuffer, key, null);
+            CharEncoding.writeString(headerBuffer, value, null);
+        }
+        // Write out the codePointTable if there is codePointOccurrenceArray.
+        if (codePointOccurrenceArray != null) {
+            final String codePointTableString =
+                    encodeCodePointTable(codePointOccurrenceArray);
+            CharEncoding.writeString(headerBuffer, DictionaryHeader.CODE_POINT_TABLE_KEY, null);
+            CharEncoding.writeString(headerBuffer, codePointTableString, null);
         }
-
-        // TODO: Write out the code point table.
-
         final int size = headerBuffer.size();
         final byte[] bytes = headerBuffer.toByteArray();
         // Write out the header size.
@@ -857,10 +878,30 @@ public class BinaryDictEncoderUtils {
         final HashMap<Integer, Integer> mCodePointToOneByteCodeMap;
         final ArrayList<Entry<Integer, Integer>> mCodePointOccurrenceArray;
 
+        // Let code point table empty for version 200 dictionary which used in test
+        CodePointTable() {
+            mCodePointToOneByteCodeMap = null;
+            mCodePointOccurrenceArray = null;
+        }
+
         CodePointTable(final HashMap<Integer, Integer> codePointToOneByteCodeMap,
                 final ArrayList<Entry<Integer, Integer>> codePointOccurrenceArray) {
             mCodePointToOneByteCodeMap = codePointToOneByteCodeMap;
             mCodePointOccurrenceArray = codePointOccurrenceArray;
         }
     }
+
+    private static String encodeCodePointTable(
+            final ArrayList<Entry<Integer, Integer>> codePointOccurrenceArray) {
+        final StringBuilder codePointTableString = new StringBuilder();
+        int currentCodePointTableIndex = FormatSpec.MINIMAL_ONE_BYTE_CHARACTER_VALUE;
+        for (final Entry<Integer, Integer> entry : codePointOccurrenceArray) {
+            // Native reads the table as a string
+            codePointTableString.appendCodePoint(entry.getKey());
+            if (FormatSpec.MAXIMAL_ONE_BYTE_CHARACTER_VALUE < ++currentCodePointTableIndex) {
+                break;
+            }
+        }
+        return codePointTableString.toString();
+    }
 }
diff --git a/tests/src/com/android/inputmethod/latin/makedict/DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/DictEncoder.java
index 645fd5c028..10dd003251 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/DictEncoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/DictEncoder.java
@@ -21,6 +21,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
 
 import java.io.IOException;
+import java.util.HashMap;
 
 /**
  * An interface of binary dictionary encoder.
@@ -33,5 +34,6 @@ public interface DictEncoder {
     public void setPosition(final int position);
     public int getPosition();
     public void writePtNodeCount(final int ptNodeCount);
-    public void writePtNode(final PtNode ptNode, final FusionDictionary dict);
+    public void writePtNode(final PtNode ptNode, final FusionDictionary dict,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap);
 }
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
index 65b84d5f76..6fd0203a38 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
@@ -177,7 +177,8 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
         if (header == null) {
             throw new IOException("Cannot read the dictionary header.");
         }
-        if (header.mFormatOptions.mVersion != FormatSpec.VERSION2) {
+        if (header.mFormatOptions.mVersion != FormatSpec.VERSION2 &&
+                header.mFormatOptions.mVersion != FormatSpec.VERSION201) {
             throw new UnsupportedFormatException("File header has a wrong version : "
                     + header.mFormatOptions.mVersion);
         }
@@ -200,19 +201,19 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
         if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
             int index = 0;
             int character = CharEncoding.readChar(mDictBuffer);
-            addressPointer += CharEncoding.getCharSize(character);
+            addressPointer += CharEncoding.getCharSize(character, null);
             while (FormatSpec.INVALID_CHARACTER != character) {
                 // FusionDictionary is making sure that the length of the word is smaller than
                 // MAX_WORD_LENGTH.
                 // So we'll never write past the end of mCharacterBuffer.
                 mCharacterBuffer[index++] = character;
                 character = CharEncoding.readChar(mDictBuffer);
-                addressPointer += CharEncoding.getCharSize(character);
+                addressPointer += CharEncoding.getCharSize(character, null);
             }
             characters = Arrays.copyOfRange(mCharacterBuffer, 0, index);
         } else {
             final int character = CharEncoding.readChar(mDictBuffer);
-            addressPointer += CharEncoding.getCharSize(character);
+            addressPointer += CharEncoding.getCharSize(character, null);
             characters = new int[] { character };
         }
         final ProbabilityInfo probabilityInfo;
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
index c47190190d..eabde46204 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
@@ -124,7 +124,7 @@ public class Ver2DictEncoder implements DictEncoder {
     @Override
     public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
             throws IOException, UnsupportedFormatException {
-        if (formatOptions.mVersion > FormatSpec.VERSION2) {
+        if (formatOptions.mVersion > FormatSpec.VERSION201) {
             throw new UnsupportedFormatException(
                     "The given format options has wrong version number : "
                     + formatOptions.mVersion);
@@ -135,7 +135,13 @@ public class Ver2DictEncoder implements DictEncoder {
         }
 
         // Make code point conversion table ordered by occurrence of code points
-        final CodePointTable codePointTable = makeCodePointTable(dict);
+        // Version 201 or later have codePointTable
+        final CodePointTable codePointTable;
+        if (formatOptions.mVersion >= FormatSpec.MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE) {
+            codePointTable = makeCodePointTable(dict);
+        } else {
+            codePointTable = new CodePointTable();
+        }
 
         BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions,
                 codePointTable.mCodePointOccurrenceArray);
@@ -152,7 +158,8 @@ public class Ver2DictEncoder implements DictEncoder {
         ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
 
         MakedictLog.i("Computing addresses...");
-        BinaryDictEncoderUtils.computeAddresses(dict, flatNodes);
+        BinaryDictEncoderUtils.computeAddresses(dict, flatNodes,
+                codePointTable.mCodePointToOneByteCodeMap);
         MakedictLog.i("Checking PtNode array...");
         if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
 
@@ -164,7 +171,8 @@ public class Ver2DictEncoder implements DictEncoder {
         MakedictLog.i("Writing file...");
 
         for (PtNodeArray nodeArray : flatNodes) {
-            BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray);
+            BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray,
+                    codePointTable.mCodePointToOneByteCodeMap);
         }
         if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes);
         mOutStream.write(mBuffer, 0, mPosition);
@@ -196,15 +204,19 @@ public class Ver2DictEncoder implements DictEncoder {
                 countSize);
     }
 
-    private void writePtNodeFlags(final PtNode ptNode) {
-        final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode);
+    private void writePtNodeFlags(final PtNode ptNode,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+        final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode,
+                codePointToOneByteCodeMap);
         mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition,
                 BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos),
                 FormatSpec.PTNODE_FLAGS_SIZE);
     }
 
-    private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars) {
-        mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition);
+    private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+        mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition,
+                codePointToOneByteCodeMap);
         if (hasSeveralChars) {
             mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
         }
@@ -217,8 +229,10 @@ public class Ver2DictEncoder implements DictEncoder {
         }
     }
 
-    private void writeChildrenPosition(final PtNode ptNode) {
-        final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode);
+    private void writeChildrenPosition(final PtNode ptNode,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+        final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode,
+                codePointToOneByteCodeMap);
         mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
                 childrenPos);
     }
@@ -228,7 +242,8 @@ public class Ver2DictEncoder implements DictEncoder {
      *
      * @param shortcuts the shortcut attributes list.
      */
-    private void writeShortcuts(final ArrayList<WeightedString> shortcuts) {
+    private void writeShortcuts(final ArrayList<WeightedString> shortcuts,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
         if (null == shortcuts || shortcuts.isEmpty()) return;
 
         final int indexOfShortcutByteSize = mPosition;
@@ -241,7 +256,8 @@ public class Ver2DictEncoder implements DictEncoder {
                     target.getProbability());
             mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags,
                     FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
-            final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord);
+            final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord,
+                codePointToOneByteCodeMap);
             mPosition += shortcutShift;
         }
         final int shortcutByteSize = mPosition - indexOfShortcutByteSize;
@@ -281,12 +297,14 @@ public class Ver2DictEncoder implements DictEncoder {
     }
 
     @Override
-    public void writePtNode(final PtNode ptNode, final FusionDictionary dict) {
-        writePtNodeFlags(ptNode);
-        writeCharacters(ptNode.mChars, ptNode.hasSeveralChars());
+    public void writePtNode(final PtNode ptNode, final FusionDictionary dict,
+            final HashMap<Integer, Integer> codePointToOneByteCodeMap) {
+        writePtNodeFlags(ptNode, codePointToOneByteCodeMap);
+        writeCharacters(ptNode.mChars, ptNode.hasSeveralChars(), codePointToOneByteCodeMap);
         writeFrequency(ptNode.getProbability());
-        writeChildrenPosition(ptNode);
-        writeShortcuts(ptNode.mShortcutTargets);
+        writeChildrenPosition(ptNode, codePointToOneByteCodeMap);
+        // TODO: Use codePointToOneByteCodeMap for shortcuts.
+        writeShortcuts(ptNode.mShortcutTargets, null /* codePointToOneByteCodeMap */);
         writeBigrams(ptNode.mBigrams, dict);
     }
 }
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index 74da937669..dc9cb6a3f7 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -27,6 +27,7 @@ import com.android.inputmethod.latin.utils.LocaleUtils;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.HashMap;
 
 /**
  * An implementation of DictEncoder for version 4 binary dictionary.
@@ -141,6 +142,7 @@ public class Ver4DictEncoder implements DictEncoder {
     }
 
     @Override
-    public void writePtNode(PtNode ptNode, FusionDictionary dict) {
+    public void writePtNode(PtNode ptNode, FusionDictionary dict,
+            HashMap<Integer, Integer> codePointToOneByteCodeMap) {
     }
 }
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
index 44f9695cf4..5dfb7bf117 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
@@ -158,7 +158,7 @@ public class DictionaryMaker {
             String outputBinary = null;
             String outputXml = null;
             String outputCombined = null;
-            int outputBinaryFormatVersion = 2; // the default version is 2.
+            int outputBinaryFormatVersion = FormatSpec.VERSION201; // the default version is 201.
             // Don't use code point table by default.
             int codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF;
 
-- 
GitLab