From 17e44a72e846d1514c5b2b4d0ad80e3fbbf26fdd Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Thu, 16 Jun 2011 22:51:11 +0900
Subject: [PATCH] New dict format, step 3

Some refactoring and add of a parameter that will be necessary.

Bug: 4392433
Change-Id: I17f001a7efd4f69f4c35f94ee1ca8e97391b81d5
---
 native/src/unigram_dictionary.cpp | 27 +++++++++++++++++----------
 native/src/unigram_dictionary.h   |  5 +++--
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index 91e3c81340..7bcdbb4989 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -289,8 +289,8 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
     // Find the right insertion point
     int insertAt = 0;
     while (insertAt < MAX_WORDS) {
-        if (frequency > mFrequencies[insertAt] || (mFrequencies[insertAt] == frequency
-                && length < Dictionary::wideStrLen(mOutputChars + insertAt * MAX_WORD_LENGTH))) {
+        // TODO: How should we sort words with the same frequency?
+        if (frequency > mFrequencies[insertAt]) {
             break;
         }
         insertAt++;
@@ -371,6 +371,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
     mStackInputIndex[0] = 0;
     mStackDiffs[0] = 0;
     mStackSiblingPos[0] = rootPosition;
+    mStackOutputIndex[0] = 0;
 
     // Depth first search
     while (depth >= 0) {
@@ -381,14 +382,15 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
             int inputIndex = mStackInputIndex[depth];
             int diffs = mStackDiffs[depth];
             int siblingPos = mStackSiblingPos[depth];
+            int outputIndex = mStackOutputIndex[depth];
             int firstChildPos;
             // depth will never be greater than maxDepth because in that case,
             // needsToTraverseChildrenNodes should be false
-            const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth,
+            const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, outputIndex,
                     maxDepth, traverseAllNodes, matchWeight, inputIndex, diffs, skipPos,
                     excessivePos, transposedPos, nextLetters, nextLettersSize, &childCount,
                     &firstChildPos, &traverseAllNodes, &matchWeight, &inputIndex, &diffs,
-                    &siblingPos);
+                    &siblingPos, &outputIndex);
             // Update next sibling pos
             mStackSiblingPos[depth] = siblingPos;
             if (needsToTraverseChildrenNodes) {
@@ -400,6 +402,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
                 mStackInputIndex[depth] = inputIndex;
                 mStackDiffs[depth] = diffs;
                 mStackSiblingPos[depth] = firstChildPos;
+                mStackOutputIndex[depth] = outputIndex;
             }
         } else {
             // Goes to parent sibling node
@@ -582,12 +585,13 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
         int newInputIndex;
         int newDiffs;
         int newSiblingPos;
+        int newOutputIndex;
         const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
                 traverseAllNodes, matchWeight, inputIndex, diffs,
                 skipPos, excessivePos, transposedPos,
                 nextLetters, nextLettersSize,
                 &newCount, &newChildPosition, &newTraverseAllNodes, &newMatchRate,
-                &newInputIndex, &newDiffs, &newSiblingPos);
+                &newInputIndex, &newDiffs, &newSiblingPos, &newOutputIndex);
         siblingPos = newSiblingPos;
 
         if (needsToTraverseChildrenNodes) {
@@ -753,7 +757,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
         const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
         int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
         bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs,
-        int *nextSiblingPosition) {
+        int *nextSiblingPosition, int *nextOutputIndex) {
     if (DEBUG_DICT) {
         int inputCount = 0;
         if (skipPos >= 0) ++inputCount;
@@ -771,6 +775,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
 
     *nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
             &c, &childPosition, &terminal, &freq);
+    *nextOutputIndex = depth + 1;
 
     const bool needsToTraverseChildrenNodes = childPosition != 0;
 
@@ -927,13 +932,15 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh
 // TODO: use uint32_t instead of unsigned short
 bool UnigramDictionary::isValidWord(unsigned short *word, int length) {
     if (IS_LATEST_DICT_VERSION) {
-        return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
+        return (getFrequency(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
     } else {
-        return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD);
+        return (getFrequency(0, word, 0, length) != NOT_VALID_WORD);
     }
 }
 
-int UnigramDictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
+
+// Require strict exact match.
+int UnigramDictionary::getFrequency(int pos, unsigned short *word, int offset, int length) const {
     // returns address of bigram data of that word
     // return -99 if not found
 
@@ -950,7 +957,7 @@ int UnigramDictionary::isValidWordRec(int pos, unsigned short *word, int offset,
                 }
             } else {
                 if (childPos != 0) {
-                    int t = isValidWordRec(childPos, word, offset + 1, length);
+                    int t = getFrequency(childPos, word, offset + 1, length);
                     if (t > 0) {
                         return t;
                     }
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index cf871fffea..b8e4914fab 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -59,7 +59,7 @@ private:
     void getSuggestionCandidates(const int skipPos, const int excessivePos,
             const int transposedPos, int *nextLetters, const int nextLettersSize,
             const int maxDepth);
-    int isValidWordRec(int pos, unsigned short *word, int offset, int length);
+    int getFrequency(int pos, unsigned short *word, int offset, int length) const;
     void getVersionNumber();
     bool checkIfDictVersionIsLatest();
     int getAddress(int *pos);
@@ -100,7 +100,7 @@ private:
             const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
             int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
             bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
-            int *nextSiblingPosition);
+            int *nextSiblingPosition, int *nextOutputIndex);
     int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word);
     // Process a node by considering missing space
     bool processCurrentNodeForExactMatch(const int firstChildPos,
@@ -145,6 +145,7 @@ private:
     int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];
     int mStackDiffs[MAX_WORD_LENGTH_INTERNAL];
     int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
+    int mStackOutputIndex[MAX_WORD_LENGTH_INTERNAL];
     int mNextLettersFrequency[NEXT_LETTERS_SIZE];
 };
 
-- 
GitLab