From 20a6dea1cabfd8822824f7dca828d898e5b91cbc Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Wed, 25 Apr 2012 18:49:31 +0900
Subject: [PATCH] Add a flag for bigram presence in the header

This is a cherry-pick of Icb602762 onto jb-dev.

Bug: 6355745
Change-Id: Icb602762bb0d81472f024fa491571062ec1fc4e9
---
 .../latin/makedict/BinaryDictInputOutput.java | 10 +++++--
 .../latin/makedict/FusionDictionary.java      | 28 ++++++++++++++++++-
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index 88da7b0d88..d82d503c45 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -131,6 +131,7 @@ public class BinaryDictInputOutput {
     // These options need to be the same numeric values as the one in the native reading code.
     private static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
     private static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
+    private static final int CONTAINS_BIGRAMS_FLAG = 0x8;
 
     // TODO: Make this value adaptative to content data, store it in the header, and
     // use it in the reading code.
@@ -752,9 +753,12 @@ public class BinaryDictInputOutput {
     /**
      * Makes the 2-byte value for options flags.
      */
-    private static final int makeOptionsValue(final DictionaryOptions options) {
+    private static final int makeOptionsValue(final FusionDictionary dictionary) {
+        final DictionaryOptions options = dictionary.mOptions;
+        final boolean hasBigrams = dictionary.hasBigrams();
         return (options.mFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0)
-                + (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0);
+                + (options.mGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0)
+                + (hasBigrams ? CONTAINS_BIGRAMS_FLAG : 0);
     }
 
     /**
@@ -970,7 +974,7 @@ public class BinaryDictInputOutput {
             headerBuffer.write((byte) (0xFF & version));
         }
         // Options flags
-        final int options = makeOptionsValue(dict.mOptions);
+        final int options = makeOptionsValue(dict);
         headerBuffer.write((byte) (0xFF & (options >> 8)));
         headerBuffer.write((byte) (0xFF & options));
         if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index c293b2ba48..b08702e47a 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -563,7 +563,7 @@ public class FusionDictionary implements Iterable<Word> {
      * Recursively count the number of nodes in a given branch of the trie.
      *
      * @param node the node to count.
-     * @result the number of nodes in this branch.
+     * @return the number of nodes in this branch.
      */
     public static int countNodes(final Node node) {
         int size = 1;
@@ -575,6 +575,32 @@ public class FusionDictionary implements Iterable<Word> {
         return size;
     }
 
+    // Recursively find out whether there are any bigrams.
+    // This can be pretty expensive especially if there aren't any (we return as soon
+    // as we find one, so it's much cheaper if there are bigrams)
+    private static boolean hasBigramsInternal(final Node node) {
+        if (null == node) return false;
+        for (int i = node.mData.size() - 1; i >= 0; --i) {
+            CharGroup group = node.mData.get(i);
+            if (null != group.mBigrams) return true;
+            if (hasBigramsInternal(group.mChildren)) return true;
+        }
+        return false;
+    }
+
+    /**
+     * Finds out whether there are any bigrams in this dictionary.
+     *
+     * @return true if there is any bigram, false otherwise.
+     */
+    // TODO: this is expensive especially for large dictionaries without any bigram.
+    // The up side is, this is always accurate and correct and uses no memory. We should
+    // find a more efficient way of doing this, without compromising too much on memory
+    // and ease of use.
+    public boolean hasBigrams() {
+        return hasBigramsInternal(mRoot);
+    }
+
     // Historically, the tails of the words were going to be merged to save space.
     // However, that would prevent the code to search for a specific address in log(n)
     // time so this was abandoned.
-- 
GitLab