From bd1f59bda5ad0b7028ec06c2de078f1623e76cdd Mon Sep 17 00:00:00 2001
From: Keisuke Kuroyanagi <ksk@google.com>
Date: Wed, 30 Jul 2014 17:26:26 +0900
Subject: [PATCH] Remove PrevWordsInfo.getBigramsIteratorForPrediction().

This changes the behavior of predictions for some rare cases.
For example, let's consider the case of the prev word is
"ABC", "ABC" and "abc" is in the dict, "ABC" doesn't have
any bigrams, and "abc" has bigrams.
Without this change, "abc"'s bigrams are used for
prediction. But, with this change, "abc"'s bigrams are not
used for prediction. This strategy is the same as the the
current way of bigram probability computation during
traversal (for suggestion).

Bug: 14425059
Change-Id: I857ff39aa70aea265efa651451a63b759bb47d48
---
 .../suggest/core/dictionary/dictionary.cpp    | 16 ++++++-
 .../dictionary_structure_with_buffer_policy.h |  4 +-
 .../suggest/core/session/prev_words_info.h    | 47 -------------------
 .../v402/ver4_patricia_trie_policy.cpp        | 14 +++---
 .../backward/v402/ver4_patricia_trie_policy.h |  5 +-
 .../structure/v2/patricia_trie_policy.cpp     | 14 +++---
 .../structure/v2/patricia_trie_policy.h       |  4 +-
 .../v4/ver4_patricia_trie_policy.cpp          | 14 +++---
 .../structure/v4/ver4_patricia_trie_policy.h  |  4 +-
 9 files changed, 46 insertions(+), 76 deletions(-)

diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index 92f5c17136..d625739705 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -92,7 +92,11 @@ void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
     TimeKeeper::setCurrentTime();
     NgramListenerForPrediction listener(prevWordsInfo, outSuggestionResults,
             mDictionaryStructureWithBufferPolicy.get());
-    mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsInfo, &listener);
+    int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+    prevWordsInfo->getPrevWordsTerminalPtNodePos(
+            mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
+            true /* tryLowerCaseSearch */);
+    mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsPtNodePos, &listener);
 }
 
 int Dictionary::getProbability(const int *word, int length) const {
@@ -111,7 +115,15 @@ int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, co
     int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
             length, false /* forceLowerCaseSearch */);
     if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
-    return getDictionaryStructurePolicy()->getProbabilityOfPtNode(prevWordsInfo, nextWordPos);
+    if (!prevWordsInfo) {
+        return getDictionaryStructurePolicy()->getProbabilityOfPtNode(
+                nullptr /* prevWordsPtNodePos */, nextWordPos);
+    }
+    int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+    prevWordsInfo->getPrevWordsTerminalPtNodePos(
+            mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
+            true /* tryLowerCaseSearch */);
+    return getDictionaryStructurePolicy()->getProbabilityOfPtNode(prevWordsPtNodePos, nextWordPos);
 }
 
 bool Dictionary::addUnigramEntry(const int *const word, const int length,
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index 81e38f78ef..7e3bf3ff66 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -59,10 +59,10 @@ class DictionaryStructureWithBufferPolicy {
     virtual int getProbability(const int unigramProbability,
             const int bigramProbability) const = 0;
 
-    virtual int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
+    virtual int getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
             const int nodePos) const = 0;
 
-    virtual void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
+    virtual void iterateNgramEntries(const int *const prevWordsPtNodePos,
             NgramListener *const listener) const = 0;
 
     virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h
index 76276f5284..e44e876e9c 100644
--- a/native/jni/src/suggest/core/session/prev_words_info.h
+++ b/native/jni/src/suggest/core/session/prev_words_info.h
@@ -90,13 +90,6 @@ class PrevWordsInfo {
         }
     }
 
-    BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
-            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
-        return getBigramsIteratorForWordWithTryingLowerCaseSearch(
-                dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0],
-                mIsBeginningOfSentence[0]);
-    }
-
     // n is 1-indexed.
     const int *getNthPrevWordCodePoints(const int n) const {
         if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
@@ -154,46 +147,6 @@ class PrevWordsInfo {
                 codePoints, codePointCount, true /* forceLowerCaseSearch */);
     }
 
-    static BinaryDictionaryBigramsIterator getBigramsIteratorForWordWithTryingLowerCaseSearch(
-            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
-            const int *const wordCodePoints, const int wordCodePointCount,
-            const bool isBeginningOfSentence) {
-        if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
-            return BinaryDictionaryBigramsIterator();
-        }
-        int codePoints[MAX_WORD_LENGTH];
-        int codePointCount = wordCodePointCount;
-        memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
-        if (isBeginningOfSentence) {
-            codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
-                    codePointCount, MAX_WORD_LENGTH);
-            if (codePointCount <= 0) {
-                return BinaryDictionaryBigramsIterator();
-            }
-        }
-        BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorForWord(dictStructurePolicy,
-                codePoints, codePointCount, false /* forceLowerCaseSearch */);
-        // getBigramsIteratorForWord returns an empty iterator if this word isn't in the dictionary
-        // or has no bigrams.
-        if (bigramsIt.hasNext()) {
-            return bigramsIt;
-        }
-        // If no bigrams for this exact word, search again in lower case.
-        return getBigramsIteratorForWord(dictStructurePolicy, codePoints,
-                codePointCount, true /* forceLowerCaseSearch */);
-    }
-
-    static BinaryDictionaryBigramsIterator getBigramsIteratorForWord(
-            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
-            const int *wordCodePoints, const int wordCodePointCount,
-            const bool forceLowerCaseSearch) {
-        if (!wordCodePoints || wordCodePointCount <= 0) return BinaryDictionaryBigramsIterator();
-        const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
-                wordCodePoints, wordCodePointCount, forceLowerCaseSearch);
-        if (NOT_A_DICT_POS == terminalPtNodePos) return BinaryDictionaryBigramsIterator();
-        return dictStructurePolicy->getBigramsIteratorOfPtNode(terminalPtNodePos);
-    }
-
     void clear() {
         for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
             mPrevWordCodePointCount[i] = 0;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 4b834a09dc..994c425055 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -132,7 +132,7 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
     }
 }
 
-int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
+int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
         const int ptNodePos) const {
     if (ptNodePos == NOT_A_DICT_POS) {
         return NOT_A_PROBABILITY;
@@ -141,9 +141,9 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
     if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
         return NOT_A_PROBABILITY;
     }
-    if (prevWordsInfo) {
+    if (prevWordsPtNodePos) {
         BinaryDictionaryBigramsIterator bigramsIt =
-                prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */);
+                getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
         while (bigramsIt.hasNext()) {
             bigramsIt.next();
             if (bigramsIt.getBigramPos() == ptNodePos
@@ -156,10 +156,12 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
     return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
 }
 
-void Ver4PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
+void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
         NgramListener *const listener) const {
-    BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
-            this /* dictStructurePolicy */);
+    if (!prevWordsPtNodePos) {
+        return;
+    }
+    BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
     while (bigramsIt.hasNext()) {
         bigramsIt.next();
         listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
index e61c060e80..ff69de7c08 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@@ -90,10 +90,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
 
     int getProbability(const int unigramProbability, const int bigramProbability) const;
 
-    int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
-            const int ptNodePos) const;
+    int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
 
-    void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
+    void iterateNgramEntries(const int *const prevWordsPtNodePos,
             NgramListener *const listener) const;
 
     int getShortcutPositionOfPtNode(const int ptNodePos) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 6f02ff3636..53415aeb60 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -297,7 +297,7 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability,
     }
 }
 
-int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
+int PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
         const int ptNodePos) const {
     if (ptNodePos == NOT_A_DICT_POS) {
         return NOT_A_PROBABILITY;
@@ -310,9 +310,9 @@ int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWo
         // for shortcuts).
         return NOT_A_PROBABILITY;
     }
-    if (prevWordsInfo) {
+    if (prevWordsPtNodePos) {
         BinaryDictionaryBigramsIterator bigramsIt =
-                prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */);
+                getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
         while (bigramsIt.hasNext()) {
             bigramsIt.next();
             if (bigramsIt.getBigramPos() == ptNodePos
@@ -325,10 +325,12 @@ int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWo
     return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
 }
 
-void PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
+void PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
         NgramListener *const listener) const {
-    BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
-            this /* dictStructurePolicy */);
+    if (!prevWordsPtNodePos) {
+        return;
+    }
+    BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
     while (bigramsIt.hasNext()) {
         bigramsIt.next();
         listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index a3b22206c1..07cb72b234 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -63,9 +63,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
 
     int getProbability(const int unigramProbability, const int bigramProbability) const;
 
-    int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const;
+    int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
 
-    void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
+    void iterateNgramEntries(const int *const prevWordsPtNodePos,
             NgramListener *const listener) const;
 
     int getShortcutPositionOfPtNode(const int ptNodePos) const;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
index 23bbbbde57..22f7e11825 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@@ -122,7 +122,7 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
     }
 }
 
-int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
+int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
         const int ptNodePos) const {
     if (ptNodePos == NOT_A_DICT_POS) {
         return NOT_A_PROBABILITY;
@@ -131,9 +131,9 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
     if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
         return NOT_A_PROBABILITY;
     }
-    if (prevWordsInfo) {
+    if (prevWordsPtNodePos) {
         BinaryDictionaryBigramsIterator bigramsIt =
-                prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */);
+                getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
         while (bigramsIt.hasNext()) {
             bigramsIt.next();
             if (bigramsIt.getBigramPos() == ptNodePos
@@ -146,10 +146,12 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
     return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
 }
 
-void Ver4PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
+void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
         NgramListener *const listener) const {
-    BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
-            this /* dictStructurePolicy */);
+    if (!prevWordsPtNodePos) {
+        return;
+    }
+    BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
     while (bigramsIt.hasNext()) {
         bigramsIt.next();
         listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
index 18384546fc..c5b6a80c09 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@@ -72,9 +72,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
 
     int getProbability(const int unigramProbability, const int bigramProbability) const;
 
-    int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const;
+    int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
 
-    void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
+    void iterateNgramEntries(const int *const prevWordsPtNodePos,
             NgramListener *const listener) const;
 
     int getShortcutPositionOfPtNode(const int ptNodePos) const;
-- 
GitLab