From c6a6f6a9905ab98516d944ac85933d016e4147fb Mon Sep 17 00:00:00 2001
From: Keisuke Kuroyanagi <ksk@google.com>
Date: Wed, 1 Oct 2014 11:21:08 +0900
Subject: [PATCH] Introduce NgramProperty in Java side.

Bug: 14425059
Change-Id: I8b3458ad22730b3dccbe0caea2c5930f5276dc82
---
 .../inputmethod/latin/NgramContext.java       | 10 +++-
 .../latin/makedict/NgramProperty.java         | 26 +++++++++
 .../latin/makedict/WordProperty.java          | 57 +++++++++++++------
 .../latin/utils/CombinedFormatUtils.java      |  7 ++-
 .../latin/BinaryDictionaryTests.java          | 18 +++---
 .../BinaryDictDecoderEncoderTests.java        |  4 +-
 .../latin/makedict/Ver2DictDecoder.java       |  4 +-
 .../latin/makedict/Ver4DictDecoder.java       |  5 +-
 .../latin/makedict/Ver4DictEncoder.java       |  5 +-
 .../inputmethod/latin/dicttool/Diff.java      |  2 +-
 .../inputmethod/latin/dicttool/Info.java      |  4 +-
 .../latin/dicttool/XmlDictInputOutput.java    |  6 +-
 12 files changed, 105 insertions(+), 43 deletions(-)
 create mode 100644 java/src/com/android/inputmethod/latin/makedict/NgramProperty.java

diff --git a/java/src/com/android/inputmethod/latin/NgramContext.java b/java/src/com/android/inputmethod/latin/NgramContext.java
index c35c6e2c83..6d438584fd 100644
--- a/java/src/com/android/inputmethod/latin/NgramContext.java
+++ b/java/src/com/android/inputmethod/latin/NgramContext.java
@@ -169,8 +169,14 @@ public class NgramContext {
 
     @Override
     public int hashCode() {
-        // Just for having equals().
-        return mPrevWordsInfo[0].hashCode();
+        int hashValue = 0;
+        for (final WordInfo wordInfo : mPrevWordsInfo) {
+            if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
+                break;
+            }
+            hashValue ^= wordInfo.hashCode();
+        }
+        return hashValue;
     }
 
     @Override
diff --git a/java/src/com/android/inputmethod/latin/makedict/NgramProperty.java b/java/src/com/android/inputmethod/latin/makedict/NgramProperty.java
new file mode 100644
index 0000000000..99e0e273f9
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/NgramProperty.java
@@ -0,0 +1,26 @@
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.latin.NgramContext;
+
+public class NgramProperty {
+    public final WeightedString mTargetWord;
+    public final NgramContext mNgramContext;
+
+    public NgramProperty(final WeightedString targetWord, final NgramContext ngramContext) {
+        mTargetWord = targetWord;
+        mNgramContext = ngramContext;
+    }
+
+    @Override
+    public int hashCode() {
+        return mTargetWord.hashCode() ^ mNgramContext.hashCode();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (o == this) return true;
+        if (!(o instanceof NgramProperty)) return false;
+        final NgramProperty n = (NgramProperty)o;
+        return mTargetWord.equals(n.mTargetWord) && mNgramContext.equals(n.mNgramContext);
+    }
+}
diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
index cd78e22356..46705f9db1 100644
--- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
+++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java
@@ -18,6 +18,8 @@ package com.android.inputmethod.latin.makedict;
 
 import com.android.inputmethod.annotations.UsedForTesting;
 import com.android.inputmethod.latin.BinaryDictionary;
+import com.android.inputmethod.latin.NgramContext;
+import com.android.inputmethod.latin.NgramContext.WordInfo;
 import com.android.inputmethod.latin.utils.CombinedFormatUtils;
 import com.android.inputmethod.latin.utils.StringUtils;
 
@@ -33,16 +35,17 @@ public final class WordProperty implements Comparable<WordProperty> {
     public final String mWord;
     public final ProbabilityInfo mProbabilityInfo;
     public final ArrayList<WeightedString> mShortcutTargets;
-    public final ArrayList<WeightedString> mBigrams;
+    public final ArrayList<NgramProperty> mNgrams;
     // TODO: Support mIsBeginningOfSentence.
     public final boolean mIsBeginningOfSentence;
     public final boolean mIsNotAWord;
     public final boolean mIsBlacklistEntry;
     public final boolean mHasShortcuts;
-    public final boolean mHasBigrams;
+    public final boolean mHasNgrams;
 
     private int mHashCode = 0;
 
+    // TODO: Support n-gram.
     @UsedForTesting
     public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
             final ArrayList<WeightedString> shortcutTargets,
@@ -51,11 +54,17 @@ public final class WordProperty implements Comparable<WordProperty> {
         mWord = word;
         mProbabilityInfo = probabilityInfo;
         mShortcutTargets = shortcutTargets;
-        mBigrams = bigrams;
+        mNgrams = new ArrayList<>();
+        final NgramContext ngramContext = new NgramContext(new WordInfo(mWord));
+        if (bigrams != null) {
+            for (final WeightedString bigramTarget : bigrams) {
+                mNgrams.add(new NgramProperty(bigramTarget, ngramContext));
+            }
+        }
         mIsBeginningOfSentence = false;
         mIsNotAWord = isNotAWord;
         mIsBlacklistEntry = isBlacklistEntry;
-        mHasBigrams = bigrams != null && !bigrams.isEmpty();
+        mHasNgrams = bigrams != null && !bigrams.isEmpty();
         mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
     }
 
@@ -78,19 +87,24 @@ public final class WordProperty implements Comparable<WordProperty> {
         mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
         mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
         mShortcutTargets = new ArrayList<>();
-        mBigrams = new ArrayList<>();
+        mNgrams = new ArrayList<>();
         mIsBeginningOfSentence = isBeginningOfSentence;
         mIsNotAWord = isNotAWord;
         mIsBlacklistEntry = isBlacklisted;
         mHasShortcuts = hasShortcuts;
-        mHasBigrams = hasBigram;
-
-        final int bigramTargetCount = bigramTargets.size();
-        for (int i = 0; i < bigramTargetCount; i++) {
-            final String bigramTargetString =
+        mHasNgrams = hasBigram;
+
+        final int relatedNgramCount = bigramTargets.size();
+        final WordInfo currentWordInfo =
+                mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE : new WordInfo(mWord);
+        final NgramContext ngramContext = new NgramContext(currentWordInfo);
+        for (int i = 0; i < relatedNgramCount; i++) {
+            final String ngramTargetString =
                     StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i));
-            mBigrams.add(new WeightedString(bigramTargetString,
-                    createProbabilityInfoFromArray(bigramProbabilityInfo.get(i))));
+            final WeightedString ngramTarget = new WeightedString(ngramTargetString,
+                    createProbabilityInfoFromArray(bigramProbabilityInfo.get(i)));
+            // TODO: Support n-gram.
+            mNgrams.add(new NgramProperty(ngramTarget, ngramContext));
         }
 
         final int shortcutTargetCount = shortcutTargets.size();
@@ -102,6 +116,17 @@ public final class WordProperty implements Comparable<WordProperty> {
         }
     }
 
+    // TODO: Remove
+    public ArrayList<WeightedString> getBigrams() {
+        final ArrayList<WeightedString> bigrams = new ArrayList<>();
+        for (final NgramProperty ngram : mNgrams) {
+            if (ngram.mNgramContext.getPrevWordCount() == 1) {
+                bigrams.add(ngram.mTargetWord);
+            }
+        }
+        return bigrams;
+    }
+
     public int getProbability() {
         return mProbabilityInfo.mProbability;
     }
@@ -110,8 +135,8 @@ public final class WordProperty implements Comparable<WordProperty> {
         return Arrays.hashCode(new Object[] {
                 word.mWord,
                 word.mProbabilityInfo,
-                word.mShortcutTargets.hashCode(),
-                word.mBigrams.hashCode(),
+                word.mShortcutTargets,
+                word.mNgrams,
                 word.mIsNotAWord,
                 word.mIsBlacklistEntry
         });
@@ -142,9 +167,9 @@ public final class WordProperty implements Comparable<WordProperty> {
         if (!(o instanceof WordProperty)) return false;
         WordProperty w = (WordProperty)o;
         return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord)
-                && mShortcutTargets.equals(w.mShortcutTargets) && mBigrams.equals(w.mBigrams)
+                && mShortcutTargets.equals(w.mShortcutTargets) && mNgrams.equals(w.mNgrams)
                 && mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry
-                && mHasBigrams == w.mHasBigrams && mHasShortcuts && w.mHasBigrams;
+                && mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams;
     }
 
     @Override
diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
index 34f59e8bc2..7e8e559906 100644
--- a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
@@ -67,7 +67,7 @@ public class CombinedFormatUtils {
             builder.append("," + BLACKLISTED_TAG + "=true");
         }
         builder.append("\n");
-        if (wordProperty.mShortcutTargets != null) {
+        if (wordProperty.mHasShortcuts) {
             for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
                 builder.append("  " + SHORTCUT_TAG + "=" + shortcutTarget.mWord);
                 builder.append(",");
@@ -75,8 +75,9 @@ public class CombinedFormatUtils {
                 builder.append("\n");
             }
         }
-        if (wordProperty.mBigrams != null) {
-            for (final WeightedString bigram : wordProperty.mBigrams) {
+        if (wordProperty.mHasNgrams) {
+            // TODO: Support ngram.
+            for (final WeightedString bigram : wordProperty.getBigrams()) {
                 builder.append("  " + BIGRAM_TAG + "=" + bigram.mWord);
                 builder.append(",");
                 builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index e6f00b6689..9c7792cf2d 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -1105,7 +1105,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
             assertTrue(wordProperty.isValid());
             assertEquals(isNotAWord, wordProperty.mIsNotAWord);
             assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
-            assertEquals(false, wordProperty.mHasBigrams);
+            assertEquals(false, wordProperty.mHasNgrams);
             assertEquals(false, wordProperty.mHasShortcuts);
             assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
             assertTrue(wordProperty.mShortcutTargets.isEmpty());
@@ -1142,13 +1142,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
             final HashSet<String> bigramWord1s = bigrams.get(word0);
             final WordProperty wordProperty = binaryDictionary.getWordProperty(word0,
                     false /* isBeginningOfSentence */);
-            assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
-            for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
-                final String word1 = wordProperty.mBigrams.get(j).mWord;
+            assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size());
+            // TODO: Support ngram.
+            for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
+                final String word1 = bigramTarget.mWord;
                 assertTrue(bigramWord1s.contains(word1));
                 if (canCheckBigramProbability(formatVersion)) {
                     final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
-                    assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
+                    assertEquals(bigramProbability, bigramTarget.getProbability());
                 }
             }
         }
@@ -1235,13 +1236,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
                     wordProperty.mProbabilityInfo.mProbability);
             wordSet.remove(word0);
             final HashSet<String> bigramWord1s = bigrams.get(word0);
-            for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
-                final String word1 = wordProperty.mBigrams.get(j).mWord;
+            // TODO: Support ngram.
+            for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
+                final String word1 = bigramTarget.mWord;
                 assertTrue(bigramWord1s.contains(word1));
                 final Pair<String, String> bigram = new Pair<>(word0, word1);
                 if (canCheckBigramProbability(formatVersion)) {
                     final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
-                    assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
+                    assertEquals(bigramProbability, bigramTarget.getProbability());
                 }
                 bigramSet.remove(bigram);
             }
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
index 406046a745..f8b68e0ce1 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java
@@ -682,8 +682,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
                 }
                 assertTrue(shortcutList.isEmpty());
             }
-            for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
-                final String word1 = wordProperty.mBigrams.get(j).mWord;
+            for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
+                final String word1 = bigramTarget.mWord;
                 final Pair<String, String> bigram = new Pair<>(word0, word1);
                 assertTrue(bigramSet.contains(bigram));
                 bigramSet.remove(bigram);
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
index 65b84d5f76..18f4bcf5f6 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
@@ -292,11 +292,11 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
         }
         // Insert bigrams into the fusion dictionary.
         for (final WordProperty wordProperty : wordProperties) {
-            if (wordProperty.mBigrams == null) {
+            if (!wordProperty.mHasNgrams) {
                 continue;
             }
             final String word0 = wordProperty.mWord;
-            for (final WeightedString bigram : wordProperty.mBigrams) {
+            for (final WeightedString bigram : wordProperty.getBigrams()) {
                 fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
             }
         }
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index 5e8417ed6e..0da915a752 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -97,12 +97,13 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
             }
         }
         // Insert bigrams into the fusion dictionary.
+        // TODO: Support ngrams.
         for (final WordProperty wordProperty : wordProperties) {
-            if (wordProperty.mBigrams == null) {
+            if (!wordProperty.mHasNgrams) {
                 continue;
             }
             final String word0 = wordProperty.mWord;
-            for (final WeightedString bigram : wordProperty.mBigrams) {
+            for (final WeightedString bigram : wordProperty.getBigrams()) {
                 fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
             }
         }
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index 74da937669..401ffde6d2 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -102,8 +102,9 @@ public class Ver4DictEncoder implements DictEncoder {
             }
         }
         for (final WordProperty word0Property : dict) {
-            if (null == word0Property.mBigrams) continue;
-            for (final WeightedString word1 : word0Property.mBigrams) {
+            if (!word0Property.mHasNgrams) continue;
+            // TODO: Support ngram.
+            for (final WeightedString word1 : word0Property.getBigrams()) {
                 final NgramContext ngramContext =
                         new NgramContext(new NgramContext.WordInfo(word0Property.mWord));
                 if (!binaryDict.addNgramEntry(ngramContext, word1.mWord,
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
index 94d1ae8bb1..c6818ce0c4 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java
@@ -135,7 +135,7 @@ public class Diff extends Dicttool.Command {
                     hasDifferences = true;
                 }
                 hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
-                        "Bigram", word0Property.mBigrams, word1PtNode.getBigrams());
+                        "Bigram", word0Property.getBigrams(), word1PtNode.getBigrams());
                 hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
                         "Shortcut", word0Property.mShortcutTargets,
                         word1PtNode.getShortcutTargets());
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
index 9b2567fd3a..2850e1ff62 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java
@@ -45,8 +45,8 @@ public class Info extends Dicttool.Command {
         int whitelistCount = 0;
         for (final WordProperty wordProperty : dict) {
             ++wordCount;
-            if (null != wordProperty.mBigrams) {
-                bigramCount += wordProperty.mBigrams.size();
+            if (wordProperty.mHasNgrams) {
+                bigramCount += wordProperty.mNgrams.size();
             }
             if (null != wordProperty.mShortcutTargets) {
                 shortcutCount += wordProperty.mShortcutTargets.size();
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
index bdec447611..cd3ce70ebe 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
@@ -353,7 +353,7 @@ public class XmlDictInputOutput {
                     + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability()
                     + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "")
                     + "\">");
-            if (null != wordProperty.mShortcutTargets) {
+            if (wordProperty.mHasShortcuts) {
                 destination.write("\n");
                 for (WeightedString target : wordProperty.mShortcutTargets) {
                     destination.write("    <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\""
@@ -362,9 +362,9 @@ public class XmlDictInputOutput {
                 }
                 destination.write("  ");
             }
-            if (null != wordProperty.mBigrams) {
+            if (wordProperty.mHasNgrams) {
                 destination.write("\n");
-                for (WeightedString bigram : wordProperty.mBigrams) {
+                for (WeightedString bigram : wordProperty.getBigrams()) {
                     destination.write("    <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\""
                             + bigram.getProbability() + "\">" + bigram.mWord
                             + "</" + BIGRAM_TAG + ">\n");
-- 
GitLab