From 522a04ea5b249d0af556647d2abcad57e5b99b4f Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Mon, 23 Apr 2012 15:37:07 +0900
Subject: [PATCH] Pass words as int[] to the native code.

We need to get the bigrams during the call to getSuggestions for
bug#6313806. We already give an int[] to getSuggestions and we
wanted to get rid of char[]'s anyway because it doesn't work with
surrogate pairs, so here we go.

Bug: 6313806
Change-Id: I56ce99f1db6b3302cdf42f0527343bded837091e
---
 .../inputmethod/latin/BinaryDictionary.java   | 12 +++++------
 ...oid_inputmethod_latin_BinaryDictionary.cpp | 20 +++++++++----------
 native/jni/src/bigram_dictionary.cpp          |  4 ++--
 native/jni/src/bigram_dictionary.h            |  4 ++--
 native/jni/src/binary_format.h                |  6 +++---
 native/jni/src/dictionary.cpp                 |  2 +-
 native/jni/src/dictionary.h                   |  4 ++--
 native/jni/src/unigram_dictionary.cpp         |  2 +-
 native/jni/src/unigram_dictionary.h           |  2 +-
 9 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 9429ef4115..a644ec0d9f 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -83,11 +83,11 @@ public class BinaryDictionary extends Dictionary {
     private native long openNative(String sourceDir, long dictOffset, long dictSize,
             int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
     private native void closeNative(long dict);
-    private native boolean isValidWordNative(long dict, char[] word, int wordLength);
+    private native boolean isValidWordNative(long dict, int[] word, int wordLength);
     private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
             int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
             boolean useFullEditDistance, char[] outputChars, int[] scores);
-    private native int getBigramsNative(long dict, char[] prevWord, int prevWordLength,
+    private native int getBigramsNative(long dict, int[] prevWord, int prevWordLength,
             int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores,
             int maxWordLength, int maxBigrams);
     private static native double calcNormalizedScoreNative(
@@ -105,7 +105,7 @@ public class BinaryDictionary extends Dictionary {
             final WordCallback callback) {
         if (mNativeDict == 0) return;
 
-        char[] chars = previousWord.toString().toCharArray();
+        int[] codePoints = StringUtils.toCodePointArray(previousWord.toString());
         Arrays.fill(mOutputChars_bigrams, (char) 0);
         Arrays.fill(mBigramScores, 0);
 
@@ -115,8 +115,8 @@ public class BinaryDictionary extends Dictionary {
             mInputCodes[0] = codes.getCodeAt(0);
         }
 
-        int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize,
-                mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS);
+        int count = getBigramsNative(mNativeDict, codePoints, codePoints.length, mInputCodes,
+                codesSize, mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS);
         if (count > MAX_BIGRAMS) {
             count = MAX_BIGRAMS;
         }
@@ -200,7 +200,7 @@ public class BinaryDictionary extends Dictionary {
     @Override
     public boolean isValidWord(CharSequence word) {
         if (word == null) return false;
-        char[] chars = word.toString().toCharArray();
+        int[] chars = StringUtils.toCodePointArray(word.toString());
         return isValidWordNative(mNativeDict, chars, chars.length);
     }
 
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 2ef72e1e87..3e72ce6845 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -153,30 +153,30 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object,
 }
 
 static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlong dict,
-        jcharArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize,
+        jintArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize,
         jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams) {
     Dictionary *dictionary = (Dictionary*)dict;
     if (!dictionary) return 0;
-    jchar *prevWord = env->GetCharArrayElements(prevWordArray, 0);
+    jint *prevWord = env->GetIntArrayElements(prevWordArray, 0);
     int *inputCodes = env->GetIntArrayElements(inputArray, 0);
     jchar *outputChars = env->GetCharArrayElements(outputArray, 0);
     int *frequencies = env->GetIntArrayElements(frequencyArray, 0);
-    int count = dictionary->getBigrams((unsigned short*) prevWord, prevWordLength, inputCodes,
+    int count = dictionary->getBigrams(prevWord, prevWordLength, inputCodes,
             inputArraySize, (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams);
     env->ReleaseIntArrayElements(frequencyArray, frequencies, 0);
     env->ReleaseCharArrayElements(outputArray, outputChars, 0);
     env->ReleaseIntArrayElements(inputArray, inputCodes, JNI_ABORT);
-    env->ReleaseCharArrayElements(prevWordArray, prevWord, JNI_ABORT);
+    env->ReleaseIntArrayElements(prevWordArray, prevWord, JNI_ABORT);
     return count;
 }
 
 static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict,
-        jcharArray wordArray, jint wordLength) {
+        jintArray wordArray, jint wordLength) {
     Dictionary *dictionary = (Dictionary*)dict;
     if (!dictionary) return (jboolean) false;
-    jchar *word = env->GetCharArrayElements(wordArray, 0);
-    jboolean result = dictionary->isValidWord((unsigned short*) word, wordLength);
-    env->ReleaseCharArrayElements(wordArray, word, JNI_ABORT);
+    jint *word = env->GetIntArrayElements(wordArray, 0);
+    jboolean result = dictionary->isValidWord(word, wordLength);
+    env->ReleaseIntArrayElements(wordArray, word, JNI_ABORT);
     return result;
 }
 
@@ -236,8 +236,8 @@ static JNINativeMethod sMethods[] = {
     {"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
     {"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
             (void*)latinime_BinaryDictionary_getSuggestions},
-    {"isValidWordNative", "(J[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
-    {"getBigramsNative", "(J[CI[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
+    {"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
+    {"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
     {"calcNormalizedScoreNative", "([CI[CII)D",
             (void*)latinime_BinaryDictionary_calcNormalizedScore},
     {"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance}
diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp
index 320b0af68a..927381fdbf 100644
--- a/native/jni/src/bigram_dictionary.cpp
+++ b/native/jni/src/bigram_dictionary.cpp
@@ -96,7 +96,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ
  * and the bigrams are used to boost unigram result scores, it makes little sense to
  * reduce their scope to the ones that match the first letter.
  */
-int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, int *codes,
+int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *codes,
         int codesSize, unsigned short *bigramChars, int *bigramFreq, int maxWordLength,
         int maxBigrams) {
     // TODO: remove unused arguments, and refrain from storing stuff in members of this class
@@ -134,7 +134,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i
 // Returns a pointer to the start of the bigram list.
 // If the word is not found or has no bigrams, this function returns 0.
 int BigramDictionary::getBigramListForWord(const uint8_t* const root,
-        const unsigned short *prevWord, const int prevWordLength) {
+        const int32_t *prevWord, const int prevWordLength) {
     int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
 
     if (NOT_VALID_WORD == pos) return 0;
diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h
index 1612131c46..07e47f0590 100644
--- a/native/jni/src/bigram_dictionary.h
+++ b/native/jni/src/bigram_dictionary.h
@@ -25,10 +25,10 @@ class Dictionary;
 class BigramDictionary {
  public:
     BigramDictionary(const unsigned char *dict, int maxWordLength, Dictionary *parentDictionary);
-    int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
+    int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
             unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
     int getBigramListForWord(const uint8_t* const root,
-        const unsigned short *prevWord, const int prevWordLength);
+        const int32_t *prevWord, const int prevWordLength);
     ~BigramDictionary();
  private:
     bool addWordBigram(unsigned short *word, int length, int frequency);
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index f59302460c..b8ac95250f 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -62,7 +62,7 @@ class BinaryFormat {
     static bool hasChildrenInFlags(const uint8_t flags);
     static int getAttributeAddressAndForwardPointer(const uint8_t* const dict, const uint8_t flags,
             int *pos);
-    static int getTerminalPosition(const uint8_t* const root, const uint16_t* const inWord,
+    static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord,
             const int length);
     static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
             uint16_t* outWord);
@@ -304,7 +304,7 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t* con
 // This function gets the byte position of the last chargroup of the exact matching word in the
 // dictionary. If no match is found, it returns NOT_VALID_WORD.
 inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
-        const uint16_t* const inWord, const int length) {
+        const int32_t* const inWord, const int length) {
     int pos = 0;
     int wordPos = 0;
 
@@ -313,7 +313,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root,
         // there was no match (or we would have found it).
         if (wordPos > length) return NOT_VALID_WORD;
         int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos);
-        const uint16_t wChar = inWord[wordPos];
+        const int32_t wChar = inWord[wordPos];
         while (true) {
             // If there are no more character groups in this node, it means we could not
             // find a matching character for this depth, therefore there is no match.
diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp
index 90ec207f0b..9dc2072238 100644
--- a/native/jni/src/dictionary.cpp
+++ b/native/jni/src/dictionary.cpp
@@ -54,7 +54,7 @@ Dictionary::~Dictionary() {
     delete mBigramDictionary;
 }
 
-bool Dictionary::isValidWord(unsigned short *word, int length) {
+bool Dictionary::isValidWord(const int32_t *word, int length) {
     return mUnigramDictionary->isValidWord(word, length);
 }
 
diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h
index 66a5c2150d..dea5763d04 100644
--- a/native/jni/src/dictionary.h
+++ b/native/jni/src/dictionary.h
@@ -40,13 +40,13 @@ class Dictionary {
                 codesSize, useFullEditDistance, outWords, frequencies);
     }
 
-    int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
+    int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
             unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams) {
         return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
                 maxWordLength, maxBigrams);
     }
 
-    bool isValidWord(unsigned short *word, int length);
+    bool isValidWord(const int32_t *word, int length);
     void *getDict() { return (void *)mDict; }
     int getDictSize() { return mDictSize; }
     int getMmapFd() { return mMmapFd; }
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index ab8570e6f7..05c124b948 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -730,7 +730,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor
     return maxFreq;
 }
 
-bool UnigramDictionary::isValidWord(const uint16_t* const inWord, const int length) const {
+bool UnigramDictionary::isValidWord(const int32_t* const inWord, const int length) const {
     return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length);
 }
 
diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h
index 4479cd94e3..b09c0006d4 100644
--- a/native/jni/src/unigram_dictionary.h
+++ b/native/jni/src/unigram_dictionary.h
@@ -71,7 +71,7 @@ class UnigramDictionary {
 
     UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
             int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags);
-    bool isValidWord(const uint16_t* const inWord, const int length) const;
+    bool isValidWord(const int32_t* const inWord, const int length) const;
     int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
     int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
             Correction *correction, const int *xcoordinates,
-- 
GitLab