From 985312e88f11e3ce61f35191df59c6bdf9e80e79 Mon Sep 17 00:00:00 2001
From: satok <satok@google.com>
Date: Fri, 5 Aug 2011 21:21:01 +0900
Subject: [PATCH] Refactor the correction algorithm related to missing
 character correction

Change-Id: If68f2aaea7df48d013aea5401cee4ec0df32111a
---
 native/src/correction_state.cpp | 73 ++++++++++++++++++++++++---------
 native/src/correction_state.h   |  7 +++-
 native/src/proximity_info.cpp   |  9 +---
 native/src/proximity_info.h     |  2 +-
 4 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/native/src/correction_state.cpp b/native/src/correction_state.cpp
index 9000e9e9cc..0de11ce192 100644
--- a/native/src/correction_state.cpp
+++ b/native/src/correction_state.cpp
@@ -30,10 +30,9 @@ namespace latinime {
 //////////////////////
 static const char QUOTE = '\'';
 
-inline bool CorrectionState::needsToSkipCurrentNode(const unsigned short c) {
+inline bool CorrectionState::isQuote(const unsigned short c) {
     const unsigned short userTypedChar = mProximityInfo->getPrimaryCharAt(mInputIndex);
-    // Skip the ' or other letter and continue deeper
-    return (c == QUOTE && userTypedChar != QUOTE) || mSkipPos == mOutputIndex;
+    return (c == QUOTE && userTypedChar != QUOTE);
 }
 
 /////////////////////
@@ -50,6 +49,7 @@ void CorrectionState::initCorrectionState(const ProximityInfo *pi, const int inp
     mInputLength = inputLength;
     mMaxDepth = maxDepth;
     mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
+    mSkippedOutputIndex = -1;
 }
 
 void CorrectionState::setCorrectionParams(const int skipPos, const int excessivePos,
@@ -77,9 +77,8 @@ int CorrectionState::getFreqForSplitTwoWords(const int firstFreq, const int seco
 }
 
 int CorrectionState::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
-    const int outputIndex = mOutputIndex - 1;
-    const int inputIndex = (mCurrentStateType == TRAVERSE_ALL_ON_TERMINAL
-            || mCurrentStateType == TRAVERSE_ALL_NOT_ON_TERMINAL) ? mInputIndex : mInputIndex - 1;
+    const int outputIndex = mTerminalOutputIndex;
+    const int inputIndex = mTerminalInputIndex;
     *wordLength = outputIndex + 1;
     if (mProximityInfo->sameAsTyped(mWord, outputIndex + 1) || outputIndex < MIN_SUGGEST_DEPTH) {
         return -1;
@@ -145,22 +144,36 @@ bool CorrectionState::needsToPrune() const {
             || mDiffs > mMaxEditDistance);
 }
 
+CorrectionState::CorrectionStateType CorrectionState::processSkipChar(
+        const int32_t c, const bool isTerminal) {
+    mWord[mOutputIndex] = c;
+    if (needsToTraverseAll() && isTerminal) {
+        mTerminalInputIndex = mInputIndex;
+        mTerminalOutputIndex = mOutputIndex;
+        incrementOutputIndex();
+        return TRAVERSE_ALL_ON_TERMINAL;
+    } else {
+        incrementOutputIndex();
+        return TRAVERSE_ALL_NOT_ON_TERMINAL;
+    }
+}
+
 CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
         const int32_t c, const bool isTerminal) {
-    mCurrentStateType = NOT_ON_TERMINAL;
+    CorrectionStateType currentStateType = NOT_ON_TERMINAL;
     // This has to be done for each virtual char (this forwards the "inputIndex" which
     // is the index in the user-inputted chars, as read by proximity chars.
     if (mExcessivePos == mOutputIndex && mInputIndex < mInputLength - 1) {
         incrementInputIndex();
     }
 
-    if (mTraverseAllNodes || needsToSkipCurrentNode(c)) {
-        mWord[mOutputIndex] = c;
-        if (needsToTraverseAll() && isTerminal) {
-            mCurrentStateType = TRAVERSE_ALL_ON_TERMINAL;
-        } else {
-            mCurrentStateType = TRAVERSE_ALL_NOT_ON_TERMINAL;
-        }
+    bool skip = false;
+    if (mSkipPos >= 0) {
+        skip = mSkipPos == mOutputIndex;
+    }
+
+    if (mTraverseAllNodes || isQuote(c)) {
+        return processSkipChar(c, isTerminal);
     } else {
         int inputIndexForProximity = mInputIndex;
 
@@ -173,12 +186,30 @@ CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
             }
         }
 
+        const bool checkProximityChars =
+                !(mSkipPos >= 0 || mExcessivePos >= 0 || mTransposedPos >= 0);
         int matchedProximityCharId = mProximityInfo->getMatchedProximityId(
-                inputIndexForProximity, c, this);
-        if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId) {
-            mCurrentStateType = UNRELATED;
-            return mCurrentStateType;
+                inputIndexForProximity, c, checkProximityChars);
+
+        const bool unrelated = ProximityInfo::UNRELATED_CHAR == matchedProximityCharId;
+        if (unrelated) {
+            if (skip) {
+                // Skip this letter and continue deeper
+                mSkippedOutputIndex = mOutputIndex;
+                return processSkipChar(c, isTerminal);
+            } else {
+                return UNRELATED;
+            }
         }
+
+        // No need to skip. Finish traversing and increment skipPos.
+        // TODO: Remove this?
+        if (skip) {
+            mWord[mOutputIndex] = c;
+            incrementOutputIndex();
+            return TRAVERSE_ALL_NOT_ON_TERMINAL;
+        }
+
         mWord[mOutputIndex] = c;
         // If inputIndex is greater than mInputLength, that means there is no
         // proximity chars. So, we don't need to check proximity.
@@ -195,7 +226,9 @@ CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
                         || (mExcessivePos == mInputLength - 1
                                     && getInputIndex() == mInputLength - 2);
         if (isSameAsUserTypedLength && isTerminal) {
-            mCurrentStateType = ON_TERMINAL;
+            mTerminalInputIndex = mInputIndex;
+            mTerminalOutputIndex = mOutputIndex;
+            currentStateType = ON_TERMINAL;
         }
         // Start traversing all nodes after the index exceeds the user typed length
         if (isSameAsUserTypedLength) {
@@ -213,7 +246,7 @@ CorrectionState::CorrectionStateType CorrectionState::processCharAndCalcState(
     // Also, the next char is one "virtual node" depth more than this char.
     incrementOutputIndex();
 
-    return mCurrentStateType;
+    return currentStateType;
 }
 
 CorrectionState::~CorrectionState() {
diff --git a/native/src/correction_state.h b/native/src/correction_state.h
index a548bcb68f..7ea5aa37d3 100644
--- a/native/src/correction_state.h
+++ b/native/src/correction_state.h
@@ -101,6 +101,7 @@ private:
     int mMaxDepth;
     int mInputLength;
     int mSkipPos;
+    int mSkippedOutputIndex;
     int mExcessivePos;
     int mTransposedPos;
     int mSpaceProximityPos;
@@ -109,12 +110,14 @@ private:
     int mMatchedCharCount;
     int mInputIndex;
     int mOutputIndex;
+    int mTerminalInputIndex;
+    int mTerminalOutputIndex;
     int mDiffs;
     bool mTraverseAllNodes;
-    CorrectionStateType mCurrentStateType;
     unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
 
-    inline bool needsToSkipCurrentNode(const unsigned short c);
+    inline bool isQuote(const unsigned short c);
+    inline CorrectionStateType processSkipChar(const int32_t c, const bool isTerminal);
 
     class RankingAlgorithm {
     public:
diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp
index bed92cf9ea..d437e251ae 100644
--- a/native/src/proximity_info.cpp
+++ b/native/src/proximity_info.cpp
@@ -114,10 +114,7 @@ bool ProximityInfo::existsAdjacentProximityChars(const int index) const {
 // in their list. The non-accented version of the character should be considered
 // "close", but not the other keys close to the non-accented version.
 ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(
-        const int index, const unsigned short c, CorrectionState *correctionState) const {
-    const int skipPos = correctionState->getSkipPos();
-    const int excessivePos = correctionState->getExcessivePos();
-    const int transposedPos = correctionState->getTransposedPos();
+        const int index, const unsigned short c, const bool checkProximityChars) const {
     const int *currentChars = getProximityCharsAt(index);
     const unsigned short baseLowerC = Dictionary::toBaseLowerCase(c);
 
@@ -126,9 +123,7 @@ ProximityInfo::ProximityType ProximityInfo::getMatchedProximityId(
     if (currentChars[0] == baseLowerC || currentChars[0] == c)
         return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
 
-    // If one of those is true, we should not check for close characters at all.
-    if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0)
-        return UNRELATED_CHAR;
+    if (!checkProximityChars) return UNRELATED_CHAR;
 
     // If the non-accented, lowercased version of that first character matches c,
     // then we have a non-accented version of the accented character the user
diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h
index b28191d019..a9477e41a0 100644
--- a/native/src/proximity_info.h
+++ b/native/src/proximity_info.h
@@ -44,7 +44,7 @@ public:
     bool existsCharInProximityAt(const int index, const int c) const;
     bool existsAdjacentProximityChars(const int index) const;
     ProximityType getMatchedProximityId(
-            const int index, const unsigned short c, CorrectionState *correctionState) const;
+            const int index, const unsigned short c, const bool checkProximityChars) const;
     bool sameAsTyped(const unsigned short *word, int length) const;
 private:
     int getStartIndexFromCoordinates(const int x, const int y) const;
-- 
GitLab