From 5f430e0189f6c0a1a34edb0e00809bc53a525a7b Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Mon, 7 Apr 2014 23:41:29 +0900
Subject: [PATCH] [CB25] Refactor some array copying

Bug: 13406701
Change-Id: I09edd822f5cc2446b324d41c76d835bcff1191e6
---
 .../inputmethod/latin/BinaryDictionary.java   | 13 ++--
 .../inputmethod/latin/WordComposer.java       | 43 ++++++++-----
 .../inputmethod/latin/utils/StringUtils.java  | 33 +++++++++-
 .../latin/utils/StringAndJsonUtilsTests.java  | 64 +++++++++++++++++++
 4 files changed, 130 insertions(+), 23 deletions(-)

diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index 7b37777f53..5e36d97032 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -247,7 +247,9 @@ public final class BinaryDictionary extends Dictionary {
             final String prevWord, final ProximityInfo proximityInfo,
             final boolean blockOffensiveWords, final int[] additionalFeaturesOptions,
             final int sessionId, final float[] inOutLanguageWeight) {
-        if (!isValidDictionary()) return null;
+        if (!isValidDictionary()) {
+            return null;
+        }
 
         Arrays.fill(mInputCodePoints, Constants.NOT_A_CODE);
         // TODO: toLowerCase in the native code
@@ -257,12 +259,11 @@ public final class BinaryDictionary extends Dictionary {
         final boolean isGesture = composer.isBatchMode();
         final int inputSize;
         if (!isGesture) {
-            final int composerSize = composer.sizeWithoutTrailingSingleQuotes();
-            if (composerSize > MAX_WORD_LENGTH - 1) return null;
-            for (int i = 0; i < composerSize; i++) {
-                mInputCodePoints[i] = composer.getCodeAt(i);
+            inputSize = composer.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount(
+                    mInputCodePoints, MAX_WORD_LENGTH);
+            if (inputSize < 0) {
+                return null;
             }
-            inputSize = composerSize;
         } else {
             inputSize = inputPointers.getPointerSize();
         }
diff --git a/java/src/com/android/inputmethod/latin/WordComposer.java b/java/src/com/android/inputmethod/latin/WordComposer.java
index 81d642ff22..02f18cdd30 100644
--- a/java/src/com/android/inputmethod/latin/WordComposer.java
+++ b/java/src/com/android/inputmethod/latin/WordComposer.java
@@ -131,29 +131,42 @@ public final class WordComposer {
         return mCodePointSize;
     }
 
-    public boolean isSingleLetter() {
-        return size() == 1;
+    /**
+     * Copy the code points in the typed word to a destination array of ints.
+     *
+     * If the array is too small to hold the code points in the typed word, nothing is copied and
+     * -1 is returned.
+     *
+     * @param destination the array of ints.
+     * @param maxSize the size of the array.
+     * @return the number of copied code points.
+     */
+    public int copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount(
+            final int[] destination, final int maxSize) {
+        int i = mTypedWordCache.length() - 1;
+        while (i >= 0 && mTypedWordCache.charAt(i) == Constants.CODE_SINGLE_QUOTE) {
+            --i;
+        }
+        if (i < 0) {
+            // The string is empty or contains only single quotes.
+            return 0;
+        }
+        final int codePointSize = Character.codePointCount(mTypedWordCache, 0, i);
+        if (codePointSize > maxSize) {
+            return -1;
+        }
+        return StringUtils.copyCodePointsAndReturnCodePointCount(destination, mTypedWordCache, 0,
+                i + 1, true /* downCase */);
     }
 
-    // When the composition contains trailing quotes, we don't pass them to the suggestion engine.
-    // This is because "'tgis'" should be corrected to "'this'", but we can't afford to consider
-    // single quotes as separators because of their very common use as apostrophes.
-    public int sizeWithoutTrailingSingleQuotes() {
-        return size() - mTrailingSingleQuotesCount;
+    public boolean isSingleLetter() {
+        return size() == 1;
     }
 
     public final boolean isComposingWord() {
         return size() > 0;
     }
 
-    // TODO: make sure that the index should not exceed MAX_WORD_LENGTH
-    public int getCodeAt(int index) {
-        if (index >= MAX_WORD_LENGTH) {
-            return -1;
-        }
-        return mPrimaryKeyCodes[index];
-    }
-
     public InputPointers getInputPointers() {
         return mInputPointers;
     }
diff --git a/java/src/com/android/inputmethod/latin/utils/StringUtils.java b/java/src/com/android/inputmethod/latin/utils/StringUtils.java
index accbc8b7bc..374badc19c 100644
--- a/java/src/com/android/inputmethod/latin/utils/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/StringUtils.java
@@ -191,13 +191,42 @@ public final class StringUtils {
         }
         final int[] codePoints =
                 new int[Character.codePointCount(charSequence, startIndex, endIndex)];
+        copyCodePointsAndReturnCodePointCount(codePoints, charSequence, startIndex, endIndex,
+                false /* downCase */);
+        return codePoints;
+    }
+
+    /**
+     * Copies the codepoints in a CharSequence to an int array.
+     *
+     * This method assumes there is enough space in the array to store the code points. The size
+     * can be measured with Character#codePointCount(CharSequence, int, int) before passing to this
+     * method. If the int array is too small, an ArrayIndexOutOfBoundsException will be thrown.
+     * Also, this method makes no effort to be thread-safe. Do not modify the CharSequence while
+     * this method is running, or the behavior is undefined.
+     * This method can optionally downcase code points before copying them, but it pays no attention
+     * to locale while doing so.
+     *
+     * @param destination the int array.
+     * @param charSequence the CharSequence.
+     * @param startIndex the start index inside the string in java chars, inclusive.
+     * @param endIndex the end index inside the string in java chars, exclusive.
+     * @param downCase if this is true, code points will be downcased before being copied.
+     * @return the number of copied code points.
+     */
+    public static int copyCodePointsAndReturnCodePointCount(final int[] destination,
+            final CharSequence charSequence, final int startIndex, final int endIndex,
+            final boolean downCase) {
         int destIndex = 0;
         for (int index = startIndex; index < endIndex;
                 index = Character.offsetByCodePoints(charSequence, index, 1)) {
-            codePoints[destIndex] = Character.codePointAt(charSequence, index);
+            final int codePoint = Character.codePointAt(charSequence, index);
+            // TODO: stop using this, as it's not aware of the locale and does not always do
+            // the right thing.
+            destination[destIndex] = downCase ? Character.toLowerCase(codePoint) : codePoint;
             destIndex++;
         }
-        return codePoints;
+        return destIndex;
     }
 
     public static int[] toSortedCodePointArray(final String string) {
diff --git a/tests/src/com/android/inputmethod/latin/utils/StringAndJsonUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/StringAndJsonUtilsTests.java
index e55c32bd08..2a4ead3837 100644
--- a/tests/src/com/android/inputmethod/latin/utils/StringAndJsonUtilsTests.java
+++ b/tests/src/com/android/inputmethod/latin/utils/StringAndJsonUtilsTests.java
@@ -308,4 +308,68 @@ public class StringAndJsonUtilsTests extends AndroidTestCase {
             assertEquals(objs[i], newObjArray.get(i));
         }
     }
+
+    public void testToCodePointArray() {
+        final String STR_WITH_SUPPLEMENTARY_CHAR = "abcde\uD861\uDED7fgh\u0000\u2002\u2003\u3000xx";
+        final int[] EXPECTED_RESULT = new int[] { 'a', 'b', 'c', 'd', 'e', 0x286D7, 'f', 'g', 'h',
+                0, 0x2002, 0x2003, 0x3000, 'x', 'x'};
+        final int[] codePointArray = StringUtils.toCodePointArray(STR_WITH_SUPPLEMENTARY_CHAR, 0,
+                STR_WITH_SUPPLEMENTARY_CHAR.length());
+        assertEquals("toCodePointArray, size matches", codePointArray.length,
+                EXPECTED_RESULT.length);
+        for (int i = 0; i < EXPECTED_RESULT.length; ++i) {
+            assertEquals("toCodePointArray position " + i, codePointArray[i], EXPECTED_RESULT[i]);
+        }
+    }
+
+    public void testCopyCodePointsAndReturnCodePointCount() {
+        final String STR_WITH_SUPPLEMENTARY_CHAR = "AbcDE\uD861\uDED7fGh\u0000\u2002\u3000あx";
+        final int[] EXPECTED_RESULT = new int[] { 'A', 'b', 'c', 'D', 'E', 0x286D7,
+                'f', 'G', 'h', 0, 0x2002, 0x3000, 'あ', 'x'};
+        final int[] EXPECTED_RESULT_DOWNCASE = new int[] { 'a', 'b', 'c', 'd', 'e', 0x286D7,
+                'f', 'g', 'h', 0, 0x2002, 0x3000, 'あ', 'x'};
+
+        int[] codePointArray = new int[50];
+        int codePointCount = StringUtils.copyCodePointsAndReturnCodePointCount(codePointArray,
+                STR_WITH_SUPPLEMENTARY_CHAR, 0,
+                STR_WITH_SUPPLEMENTARY_CHAR.length(), false /* downCase */);
+        assertEquals("copyCodePointsAndReturnCodePointCount, size matches", codePointCount,
+                EXPECTED_RESULT.length);
+        for (int i = 0; i < codePointCount; ++i) {
+            assertEquals("copyCodePointsAndReturnCodePointCount position " + i, codePointArray[i],
+                    EXPECTED_RESULT[i]);
+        }
+
+        codePointCount = StringUtils.copyCodePointsAndReturnCodePointCount(codePointArray,
+                STR_WITH_SUPPLEMENTARY_CHAR, 0,
+                STR_WITH_SUPPLEMENTARY_CHAR.length(), true /* downCase */);
+        assertEquals("copyCodePointsAndReturnCodePointCount downcase, size matches", codePointCount,
+                EXPECTED_RESULT_DOWNCASE.length);
+        for (int i = 0; i < codePointCount; ++i) {
+            assertEquals("copyCodePointsAndReturnCodePointCount position " + i, codePointArray[i],
+                    EXPECTED_RESULT_DOWNCASE[i]);
+        }
+
+        final int JAVA_CHAR_COUNT = 8;
+        final int CODEPOINT_COUNT = 7;
+        codePointCount = StringUtils.copyCodePointsAndReturnCodePointCount(codePointArray,
+                STR_WITH_SUPPLEMENTARY_CHAR, 0, JAVA_CHAR_COUNT, false /* downCase */);
+        assertEquals("copyCodePointsAndReturnCodePointCount, size matches", codePointCount,
+                CODEPOINT_COUNT);
+        for (int i = 0; i < codePointCount; ++i) {
+            assertEquals("copyCodePointsAndReturnCodePointCount position " + i, codePointArray[i],
+                    EXPECTED_RESULT[i]);
+        }
+
+        boolean exceptionHappened = false;
+        codePointArray = new int[5];
+        try {
+            codePointCount = StringUtils.copyCodePointsAndReturnCodePointCount(codePointArray,
+                    STR_WITH_SUPPLEMENTARY_CHAR, 0, JAVA_CHAR_COUNT, false /* downCase */);
+        } catch (ArrayIndexOutOfBoundsException e) {
+            exceptionHappened = true;
+        }
+        assertTrue("copyCodePointsAndReturnCodePointCount throws when array is too small",
+                exceptionHappened);
+    }
 }
-- 
GitLab