From 25e8eda9afb5c36703bd50b263ab0dd3a3b38d31 Mon Sep 17 00:00:00 2001
From: Tom Ouyang <ouyang@google.com>
Date: Tue, 2 Apr 2013 17:23:57 -0700
Subject: [PATCH] Add mechanism to handle digraphs in DicNode

This is needed to support for German umlaut handling for both gesture and typing input.

Bug: 8493920

Change-Id: I292e2a4911c54aa5566c38411016a220bb35a1f4
---
 native/jni/src/digraph_utils.cpp              | 74 ++++++++++++++-----
 native/jni/src/digraph_utils.h                | 19 ++++-
 .../jni/src/suggest/core/dicnode/dic_node.h   | 19 ++++-
 .../core/dicnode/dic_node_state_scoring.h     | 23 ++++++
 4 files changed, 114 insertions(+), 21 deletions(-)

diff --git a/native/jni/src/digraph_utils.cpp b/native/jni/src/digraph_utils.cpp
index 8781c50770..6a1ab02716 100644
--- a/native/jni/src/digraph_utils.cpp
+++ b/native/jni/src/digraph_utils.cpp
@@ -27,39 +27,47 @@ const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
 const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] =
         { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE
         { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE
+const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
+        { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES };
 
 /* static */ bool DigraphUtils::hasDigraphForCodePoint(
         const int dictFlags, const int compositeGlyphCodePoint) {
-    if (DigraphUtils::getDigraphForCodePoint(dictFlags, compositeGlyphCodePoint)) {
+    const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags);
+    if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) {
         return true;
     }
     return false;
 }
 
-// Retrieves the set of all digraphs associated with the given dictionary.
-// Returns the size of the digraph array, or 0 if none exist.
-/* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(
-        const int dictFlags, const DigraphUtils::digraph_t **digraphs) {
+// Returns the digraph type associated with the given dictionary.
+/* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary(
+        const int dictFlags) {
     if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & dictFlags) {
-        *digraphs = DigraphUtils::GERMAN_UMLAUT_DIGRAPHS;
-        return NELEMS(DigraphUtils::GERMAN_UMLAUT_DIGRAPHS);
+        return DIGRAPH_TYPE_GERMAN_UMLAUT;
     }
     if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & dictFlags) {
-        *digraphs = DigraphUtils::FRENCH_LIGATURES_DIGRAPHS;
-        return NELEMS(DigraphUtils::FRENCH_LIGATURES_DIGRAPHS);
+        return DIGRAPH_TYPE_FRENCH_LIGATURES;
     }
-    return 0;
+    return DIGRAPH_TYPE_NONE;
+}
+
+// Retrieves the set of all digraphs associated with the given dictionary flags.
+// Returns the size of the digraph array, or 0 if none exist.
+/* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(
+        const int dictFlags, const DigraphUtils::digraph_t **const digraphs) {
+    const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags);
+    return getAllDigraphsForDigraphTypeAndReturnSize(digraphType, digraphs);
 }
 
 // Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index
 // (which specifies the first or second codepoint in the digraph).
-/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int dictFlags,
-        const int compositeGlyphCodePoint, const DigraphCodePointIndex digraphCodePointIndex) {
+/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
+        const DigraphCodePointIndex digraphCodePointIndex) {
     if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) {
         return NOT_A_CODE_POINT;
     }
-    const DigraphUtils::digraph_t *digraph =
-            DigraphUtils::getDigraphForCodePoint(dictFlags, compositeGlyphCodePoint);
+    const DigraphUtils::digraph_t *const digraph =
+            DigraphUtils::getDigraphForCodePoint(compositeGlyphCodePoint);
     if (!digraph) {
         return NOT_A_CODE_POINT;
     }
@@ -72,16 +80,48 @@ const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] =
     return NOT_A_CODE_POINT;
 }
 
+// Retrieves the set of all digraphs associated with the given digraph type.
+// Returns the size of the digraph array, or 0 if none exist.
+/* static */ int DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(
+        const DigraphUtils::DigraphType digraphType,
+        const DigraphUtils::digraph_t **const digraphs) {
+    if (digraphType == DigraphUtils::DIGRAPH_TYPE_GERMAN_UMLAUT) {
+        *digraphs = GERMAN_UMLAUT_DIGRAPHS;
+        return NELEMS(GERMAN_UMLAUT_DIGRAPHS);
+    }
+    if (digraphType == DIGRAPH_TYPE_FRENCH_LIGATURES) {
+        *digraphs = FRENCH_LIGATURES_DIGRAPHS;
+        return NELEMS(FRENCH_LIGATURES_DIGRAPHS);
+    }
+    return 0;
+}
+
 /**
  * Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
- * dictFlags: the dictionary flags needed to determine which digraphs are supported.
  * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
  */
 /* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint(
-        const int dictFlags, const int compositeGlyphCodePoint) {
+        const int compositeGlyphCodePoint) {
+    for (size_t i = 0; i < NELEMS(USED_DIGRAPH_TYPES); i++) {
+        const DigraphUtils::digraph_t *const digraph = getDigraphForDigraphTypeAndCodePoint(
+                USED_DIGRAPH_TYPES[i], compositeGlyphCodePoint);
+        if (digraph) {
+            return digraph;
+        }
+    }
+    return 0;
+}
+
+/**
+ * Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
+ * digraphType: the type of digraphs supported.
+ * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
+ */
+/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint(
+        const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) {
     const DigraphUtils::digraph_t *digraphs = 0;
     const int digraphsSize =
-            DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(dictFlags, &digraphs);
+            DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(digraphType, &digraphs);
     for (int i = 0; i < digraphsSize; i++) {
         if (digraphs[i].compositeGlyph == compositeGlyphCodePoint) {
             return &digraphs[i];
diff --git a/native/jni/src/digraph_utils.h b/native/jni/src/digraph_utils.h
index 6e364b67af..94435228e7 100644
--- a/native/jni/src/digraph_utils.h
+++ b/native/jni/src/digraph_utils.h
@@ -27,21 +27,34 @@ class DigraphUtils {
         SECOND_DIGRAPH_CODEPOINT
     } DigraphCodePointIndex;
 
+    typedef enum {
+        DIGRAPH_TYPE_NONE,
+        DIGRAPH_TYPE_GERMAN_UMLAUT,
+        DIGRAPH_TYPE_FRENCH_LIGATURES
+    } DigraphType;
+
     typedef struct { int first; int second; int compositeGlyph; } digraph_t;
 
     static bool hasDigraphForCodePoint(const int dictFlags, const int compositeGlyphCodePoint);
     static int getAllDigraphsForDictionaryAndReturnSize(
-            const int dictFlags, const digraph_t **digraphs);
+            const int dictFlags, const digraph_t **const digraphs);
     static int getDigraphCodePointForIndex(const int dictFlags, const int compositeGlyphCodePoint,
             const DigraphCodePointIndex digraphCodePointIndex);
+    static int getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
+            const DigraphCodePointIndex digraphCodePointIndex);
 
  private:
     DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils);
-    static const digraph_t *getDigraphForCodePoint(
-            const int dictFlags, const int compositeGlyphCodePoint);
+    static DigraphType getDigraphTypeForDictionary(const int dictFlags);
+    static int getAllDigraphsForDigraphTypeAndReturnSize(
+            const DigraphType digraphType, const digraph_t **const digraphs);
+    static const digraph_t *getDigraphForCodePoint(const int compositeGlyphCodePoint);
+    static const digraph_t *getDigraphForDigraphTypeAndCodePoint(
+            const DigraphType digraphType, const int compositeGlyphCodePoint);
 
     static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
     static const digraph_t FRENCH_LIGATURES_DIGRAPHS[];
+    static const DigraphType USED_DIGRAPH_TYPES[];
 };
 } // namespace latinime
 #endif // DIGRAPH_UTILS_H
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index cde7b99a7d..32faae52ca 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -23,6 +23,7 @@
 #include "dic_node_profiler.h"
 #include "dic_node_properties.h"
 #include "dic_node_release_listener.h"
+#include "digraph_utils.h"
 
 #if DEBUG_DICT
 #define LOGI_SHOW_ADD_COST_PROP \
@@ -399,8 +400,15 @@ class DicNode {
     // TODO: Remove     //
     //////////////////////
     // TODO: Remove once touch path is merged into ProximityInfoState
+    // Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
     int getNodeCodePoint() const {
-        return mDicNodeProperties.getNodeCodePoint();
+        const int codePoint = mDicNodeProperties.getNodeCodePoint();
+        const DigraphUtils::DigraphCodePointIndex digraphIndex =
+                mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
+        if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
+            return codePoint;
+        }
+        return DigraphUtils::getDigraphCodePointForIndex(codePoint, digraphIndex);
     }
 
     ////////////////////////////////
@@ -452,6 +460,15 @@ class DicNode {
         mDicNodeState.mDicNodeStateScoring.setDoubleLetterLevel(doubleLetterLevel);
     }
 
+    bool isInDigraph() const {
+        return mDicNodeState.mDicNodeStateScoring.getDigraphIndex()
+                != DigraphUtils::NOT_A_DIGRAPH_INDEX;
+    }
+
+    void advanceDigraphIndex() {
+        mDicNodeState.mDicNodeStateScoring.advanceDigraphIndex();
+    }
+
     uint8_t getFlags() const {
         return mDicNodeProperties.getFlags();
     }
diff --git a/native/jni/src/suggest/core/dicnode/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/dic_node_state_scoring.h
index 8e816329fc..8902d31229 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node_state_scoring.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node_state_scoring.h
@@ -20,6 +20,7 @@
 #include <stdint.h>
 
 #include "defines.h"
+#include "digraph_utils.h"
 
 namespace latinime {
 
@@ -27,6 +28,7 @@ class DicNodeStateScoring {
  public:
     AK_FORCE_INLINE DicNodeStateScoring()
             : mDoubleLetterLevel(NOT_A_DOUBLE_LETTER),
+              mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
               mEditCorrectionCount(0), mProximityCorrectionCount(0),
               mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
               mTotalPrevWordsLanguageCost(0.0f), mRawLength(0.0f) {
@@ -43,6 +45,7 @@ class DicNodeStateScoring {
         mTotalPrevWordsLanguageCost = 0.0f;
         mRawLength = 0.0f;
         mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
+        mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
     }
 
     AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
@@ -54,6 +57,7 @@ class DicNodeStateScoring {
         mTotalPrevWordsLanguageCost = scoring->mTotalPrevWordsLanguageCost;
         mRawLength = scoring->mRawLength;
         mDoubleLetterLevel = scoring->mDoubleLetterLevel;
+        mDigraphIndex = scoring->mDigraphIndex;
     }
 
     void addCost(const float spatialCost, const float languageCost, const bool doNormalization,
@@ -126,6 +130,24 @@ class DicNodeStateScoring {
         }
     }
 
+    DigraphUtils::DigraphCodePointIndex getDigraphIndex() const {
+        return mDigraphIndex;
+    }
+
+    void advanceDigraphIndex() {
+        switch(mDigraphIndex) {
+            case DigraphUtils::NOT_A_DIGRAPH_INDEX:
+                mDigraphIndex = DigraphUtils::FIRST_DIGRAPH_CODEPOINT;
+                break;
+            case DigraphUtils::FIRST_DIGRAPH_CODEPOINT:
+                mDigraphIndex = DigraphUtils::SECOND_DIGRAPH_CODEPOINT;
+                break;
+            case DigraphUtils::SECOND_DIGRAPH_CODEPOINT:
+                mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
+                break;
+        }
+    }
+
     float getTotalPrevWordsLanguageCost() const {
         return mTotalPrevWordsLanguageCost;
     }
@@ -135,6 +157,7 @@ class DicNodeStateScoring {
     // Use a default copy constructor and an assign operator because shallow copies are ok
     // for this class
     DoubleLetterLevel mDoubleLetterLevel;
+    DigraphUtils::DigraphCodePointIndex mDigraphIndex;
 
     int16_t mEditCorrectionCount;
     int16_t mProximityCorrectionCount;
-- 
GitLab