Fix bugs and add tests

Change-Id: I6b56b91ace57f4a49584b5dceb71b145859f839e

Fix bugs and add tests
c8744949 · Jean Chalard · 7d3836d6 · c8744949 · c8744949
Commit c8744949 authored 11 years ago by Jean Chalard
--- a/java/src/com/android/inputmethod/latin/StringUtils.java
+++ b/java/src/com/android/inputmethod/latin/StringUtils.java
@@ -115,11 +115,12 @@ public final class StringUtils {
        // - This does not work for Greek, because it returns upper case instead of title case.
        // - It does not work for Serbian, because it fails to account for the "lj" character,
        // which should be "Lj" in title case and "LJ" in upper case.
-        // - It does not work for Dutch, because it fails to account for the "ij" digraph, which
+        // - It does not work for Dutch, because it fails to account for the "ij" digraph when it's
-        // are two different characters but both should be capitalized as "IJ" as if they were
+        // written as two separate code points. They are two different characters but both should
-        // a single letter.
+        // be capitalized as "IJ" as if they were a single letter in most words (not all). If the
-        // - It also does not work with unicode surrogate code points.
+        // unicode char for the ligature is used however, it works.
-        return s.toUpperCase(locale).charAt(0) + s.substring(1);
+        final int cutoff = s.offsetByCodePoints(0, 1);
+        return s.substring(0, cutoff).toUpperCase(locale) + s.substring(cutoff).toLowerCase(locale);
    }
    private static final int[] EMPTY_CODEPOINTS = {};
@@ -176,17 +177,27 @@ public final class StringUtils {
        return list.toArray(new String[list.size()]);
    }
-    // This method assumes the text is not empty or null.
+    // This method assumes the text is not null. For the empty string, it returns CAPITALIZE_NONE.
    public static int getCapitalizationType(final String text) {
        // If the first char is not uppercase, then the word is either all lower case or
        // camel case, and in either case we return CAPITALIZE_NONE.
-        if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
        final int len = text.length();
+        int index = 0;
+        for (; index < len; index = text.offsetByCodePoints(index, 1)) {
+            if (Character.isLetter(text.codePointAt(index))) {
+                break;
+            }
+        }
+        if (index == len) return CAPITALIZE_NONE;
+        if (!Character.isUpperCase(text.codePointAt(index))) {
+            return CAPITALIZE_NONE;
+        }
        int capsCount = 1;
        int letterCount = 1;
-        for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) {
+        for (index = text.offsetByCodePoints(index, 1); index < len;
+                index = text.offsetByCodePoints(index, 1)) {
            if (1 != capsCount && letterCount != capsCount) break;
-            final int codePoint = text.codePointAt(i);
+            final int codePoint = text.codePointAt(index);
            if (Character.isUpperCase(codePoint)) {
                ++capsCount;
                ++letterCount;

--- a/tests/src/com/android/inputmethod/latin/StringUtilsTests.java
+++ b/tests/src/com/android/inputmethod/latin/StringUtilsTests.java
@@ -19,6 +19,8 @@ package com.android.inputmethod.latin;
 import android.test.AndroidTestCase;
 import android.test.suitebuilder.annotation.SmallTest;
+import java.util.Locale;
 @SmallTest
 public class StringUtilsTests extends AndroidTestCase {
    public void testContainsInArray() {
@@ -90,4 +92,48 @@ public class StringUtilsTests extends AndroidTestCase {
        assertEquals("in 5 elements at position 2,4", "key1,key3,key5",
                StringUtils.removeFromCsvIfExists("key", "key1,key,key3,key,key5"));
    }
+    public void testToTitleCase() {
+        assertEquals("SSaa",
+                StringUtils.toTitleCase("ßaa", Locale.GERMAN));
+        assertEquals("Aßa",
+                StringUtils.toTitleCase("aßa", Locale.GERMAN));
+        assertEquals("Iab",
+                StringUtils.toTitleCase("iab", Locale.ENGLISH));
+        assertEquals("Camelcase",
+                StringUtils.toTitleCase("cAmElCaSe", Locale.ENGLISH));
+        assertEquals("İab",
+                StringUtils.toTitleCase("iab", new Locale("tr")));
+        assertEquals("Aib",
+                StringUtils.toTitleCase("AİB", new Locale("tr")));
+        // For one character, toTitleCase returns the string as is. Not sure what the motivation
+        // is, but that's how it works now.
+        assertEquals("a",
+                StringUtils.toTitleCase("a", Locale.ENGLISH));
+        assertEquals("A",
+                StringUtils.toTitleCase("A", Locale.ENGLISH));
+    }
+    public void testGetCapitalizationType() {
+        assertEquals(StringUtils.CAPITALIZE_NONE,
+                StringUtils.getCapitalizationType("capitalize"));
+        assertEquals(StringUtils.CAPITALIZE_NONE,
+                StringUtils.getCapitalizationType("cApITalize"));
+        assertEquals(StringUtils.CAPITALIZE_NONE,
+                StringUtils.getCapitalizationType("capitalizE"));
+        assertEquals(StringUtils.CAPITALIZE_NONE,
+                StringUtils.getCapitalizationType("__c a piu$@tali56ze"));
+        assertEquals(StringUtils.CAPITALIZE_FIRST,
+                StringUtils.getCapitalizationType("A__c a piu$@tali56ze"));
+        assertEquals(StringUtils.CAPITALIZE_FIRST,
+                StringUtils.getCapitalizationType("Capitalize"));
+        assertEquals(StringUtils.CAPITALIZE_FIRST,
+                StringUtils.getCapitalizationType("     Capitalize"));
+        assertEquals(StringUtils.CAPITALIZE_ALL,
+                StringUtils.getCapitalizationType("CAPITALIZE"));
+        assertEquals(StringUtils.CAPITALIZE_ALL,
+                StringUtils.getCapitalizationType("  PI26LIE"));
+        assertEquals(StringUtils.CAPITALIZE_NONE,
+                StringUtils.getCapitalizationType(""));
+    }
 }