From de4e8dedccc7b6db6df4c3f75d9f2458432c558a Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Thu, 25 Aug 2011 18:04:21 +0900
Subject: [PATCH] Allow sharing dictionaries between similar locales.

Bug: 5058488
Change-Id: Ib12013f58afad957a8205b439f87480cc12ea06f
---
 .../latin/BinaryDictionaryFileDumper.java     |  39 +++--
 .../latin/BinaryDictionaryGetter.java         |  63 +++++--
 .../inputmethod/latin/LocaleUtils.java        | 157 ++++++++++++++++++
 .../inputmethod/latin/WordListInfo.java       |  29 ++++
 4 files changed, 259 insertions(+), 29 deletions(-)
 create mode 100644 java/src/com/android/inputmethod/latin/LocaleUtils.java
 create mode 100644 java/src/com/android/inputmethod/latin/WordListInfo.java

diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java
index 89944407ec..e95172d1f8 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryFileDumper.java
@@ -67,25 +67,34 @@ public class BinaryDictionaryFileDumper {
      * Queries a content provider for the list of word lists for a specific locale
      * available to copy into Latin IME.
      */
-    private static List<String> getWordListIds(final Locale locale, final Context context) {
+    private static List<WordListInfo> getWordListWordListInfos(final Locale locale,
+            final Context context) {
         final ContentResolver resolver = context.getContentResolver();
         final Uri dictionaryPackUri = getProviderUri(locale.toString());
 
         final Cursor c = resolver.query(dictionaryPackUri, DICTIONARY_PROJECTION, null, null, null);
-        if (null == c) return Collections.<String>emptyList();
+        if (null == c) return Collections.<WordListInfo>emptyList();
         if (c.getCount() <= 0 || !c.moveToFirst()) {
             c.close();
-            return Collections.<String>emptyList();
+            return Collections.<WordListInfo>emptyList();
         }
 
-        final List<String> list = new ArrayList<String>();
-        do {
-            final String id = c.getString(0);
-            if (TextUtils.isEmpty(id)) continue;
-            list.add(id);
-        } while (c.moveToNext());
-        c.close();
-        return list;
+        try {
+            final List<WordListInfo> list = new ArrayList<WordListInfo>();
+            do {
+                final String wordListId = c.getString(0);
+                final String wordListLocale = c.getString(1);
+                if (TextUtils.isEmpty(wordListId)) continue;
+                list.add(new WordListInfo(wordListId, wordListLocale));
+            } while (c.moveToNext());
+            c.close();
+            return list;
+        } catch (Exception e) {
+            // Just in case we hit a problem in communication with the dictionary pack.
+            // We don't want to die.
+            Log.e(TAG, "Exception communicating with the dictionary pack : " + e);
+            return Collections.<WordListInfo>emptyList();
+        }
     }
 
 
@@ -108,7 +117,7 @@ public class BinaryDictionaryFileDumper {
      * to the cache file name designated by its id and locale, overwriting it if already present
      * and creating it (and its containing directory) if necessary.
      */
-    private static AssetFileAddress cacheWordList(final String id, final Locale locale,
+    private static AssetFileAddress cacheWordList(final String id, final String locale,
             final ContentResolver resolver, final Context context) {
 
         final int COMPRESSED_CRYPTED_COMPRESSED = 0;
@@ -213,10 +222,10 @@ public class BinaryDictionaryFileDumper {
     public static List<AssetFileAddress> cacheWordListsFromContentProvider(final Locale locale,
             final Context context) {
         final ContentResolver resolver = context.getContentResolver();
-        final List<String> idList = getWordListIds(locale, context);
+        final List<WordListInfo> idList = getWordListWordListInfos(locale, context);
         final List<AssetFileAddress> fileAddressList = new ArrayList<AssetFileAddress>();
-        for (String id : idList) {
-            final AssetFileAddress afd = cacheWordList(id, locale, resolver, context);
+        for (WordListInfo id : idList) {
+            final AssetFileAddress afd = cacheWordList(id.mId, id.mLocale, resolver, context);
             if (null != afd) {
                 fileAddressList.add(afd);
             }
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
index 38344300c5..360c944d24 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java
@@ -108,12 +108,19 @@ class BinaryDictionaryGetter {
         return sb.toString();
     }
 
+    /**
+     * Helper method to get the top level cache directory.
+     */
+    private static String getWordListCacheDirectory(final Context context) {
+        return context.getFilesDir() + File.separator + "dicts";
+    }
+
     /**
      * Find out the cache directory associated with a specific locale.
      */
-    private static String getCacheDirectoryForLocale(Locale locale, Context context) {
-        final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toString());
-        final String absoluteDirectoryName = context.getFilesDir() + File.separator
+    private static String getCacheDirectoryForLocale(final String locale, final Context context) {
+        final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale);
+        final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator
                 + relativeDirectoryName;
         final File directory = new File(absoluteDirectoryName);
         if (!directory.exists()) {
@@ -135,11 +142,11 @@ class BinaryDictionaryGetter {
      * named like the locale, except it will also escape characters that look dangerous
      * to some file systems.
      * @param id the id of the dictionary for which to get a file name
-     * @param locale the locale for which to get the file name
+     * @param locale the locale for which to get the file name as a string
      * @param context the context to use for getting the directory
      * @return the name of the file to be created
      */
-    public static String getCacheFileName(String id, Locale locale, Context context) {
+    public static String getCacheFileName(String id, String locale, Context context) {
         final String fileName = replaceFileNameDangerousCharacters(id);
         return getCacheDirectoryForLocale(locale, context) + File.separator + fileName;
     }
@@ -198,26 +205,54 @@ class BinaryDictionaryGetter {
         }
     }
 
+    /**
+     * Helper method to the list of cache directories, one for each distinct locale.
+     */
+    private static File[] getCachedDirectoryList(final Context context) {
+        return new File(getWordListCacheDirectory(context)).listFiles();
+    }
+
     /**
      * Returns the list of cached files for a specific locale.
      *
-     * @param locale the locale to find the dictionary files for.
+     * @param locale the locale to find the dictionary files for, as a string.
      * @param context the context on which to open the files upon.
      * @return an array of binary dictionary files, which may be empty but may not be null.
      */
-    private static File[] getCachedWordLists(final Locale locale,
+    private static File[] getCachedWordLists(final String locale,
             final Context context) {
-        final String directoryName = getCacheDirectoryForLocale(locale, context);
-        final File[] cacheFiles = new File(directoryName).listFiles();
-        if (null == cacheFiles) return EMPTY_FILE_ARRAY;
-        return cacheFiles;
+        final File[] directoryList = getCachedDirectoryList(context);
+        if (null == directoryList) return EMPTY_FILE_ARRAY;
+        final ArrayList<File> cacheFiles = new ArrayList<File>();
+        for (File directory : directoryList) {
+            if (!directory.isDirectory()) continue;
+            final String dirLocale = getWordListIdFromFileName(directory.getName());
+            if (LocaleUtils.isMatch(LocaleUtils.getMatchLevel(dirLocale, locale))) {
+                final File[] wordLists = directory.listFiles();
+                if (null != wordLists) {
+                    for (File wordList : wordLists) {
+                        cacheFiles.add(wordList);
+                    }
+                }
+            }
+        }
+        if (cacheFiles.isEmpty()) return EMPTY_FILE_ARRAY;
+        return cacheFiles.toArray(EMPTY_FILE_ARRAY);
     }
 
     /**
-     * Returns the id of the main dict for a specified locale.
+     * Returns the id associated with the main word list for a specified locale.
+     *
+     * Word lists stored in Android Keyboard's resources are referred to as the "main"
+     * word lists. Since they can be updated like any other list, we need to assign a
+     * unique ID to them. This ID is just the name of the language (locale-wise) they
+     * are for, and this method returns this ID.
      */
     private static String getMainDictId(final Locale locale) {
-        return locale.toString();
+        // This works because we don't include by default different dictionaries for
+        // different countries. This actually needs to return the id that we would
+        // like to use for word lists included in resources, and the following is okay.
+        return locale.getLanguage().toString();
     }
 
     /**
@@ -239,7 +274,7 @@ class BinaryDictionaryGetter {
         // storage, but we don't really care about what was copied NOW: what we want is the
         // list of everything we ever cached, so we ignore the return value.
         BinaryDictionaryFileDumper.cacheWordListsFromContentProvider(locale, context);
-        final File[] cachedWordLists = getCachedWordLists(locale, context);
+        final File[] cachedWordLists = getCachedWordLists(locale.toString(), context);
 
         final String mainDictId = getMainDictId(locale);
 
diff --git a/java/src/com/android/inputmethod/latin/LocaleUtils.java b/java/src/com/android/inputmethod/latin/LocaleUtils.java
new file mode 100644
index 0000000000..054f1f9b84
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/LocaleUtils.java
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin;
+
+import android.text.TextUtils;
+
+/**
+ * A class to help with handling Locales in string form.
+ *
+ * This file has the same meaning and features (and shares all of its code) with
+ * the one in the dictionary pack. They need to be kept synchronized; for any
+ * update/bugfix to this file, consider also updating/fixing the version in the
+ * dictionary pack.
+ */
+public class LocaleUtils {
+
+    private final static String TAG = LocaleUtils.class.getSimpleName();
+
+    // Locale match level constants.
+    // A higher level of match is guaranteed to have a higher numerical value.
+    // Some room is left within constants to add match cases that may arise necessary
+    // in the future, for example differentiating between the case where the countries
+    // are both present and different, and the case where one of the locales does not
+    // specify the countries. This difference is not needed now.
+
+    // Nothing matches.
+    public static final int LOCALE_NO_MATCH = 0;
+    // The languages matches, but the country are different. Or, the reference locale requires a
+    // country and the tested locale does not have one.
+    public static final int LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER = 3;
+    // The languages and country match, but the variants are different. Or, the reference locale
+    // requires a variant and the tested locale does not have one.
+    public static final int LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER = 6;
+    // The required locale is null or empty so it will accept anything, and the tested locale
+    // is non-null and non-empty.
+    public static final int LOCALE_ANY_MATCH = 10;
+    // The language matches, and the tested locale specifies a country but the reference locale
+    // does not require one.
+    public static final int LOCALE_LANGUAGE_MATCH = 15;
+    // The language and the country match, and the tested locale specifies a variant but the
+    // reference locale does not require one.
+    public static final int LOCALE_LANGUAGE_AND_COUNTRY_MATCH = 20;
+    // The compared locales are fully identical. This is the best match level.
+    public static final int LOCALE_FULL_MATCH = 30;
+
+    // The level at which a match is "normally" considered a locale match with standard algorithms.
+    // Don't use this directly, use #isMatch to test.
+    private static final int LOCALE_MATCH = LOCALE_ANY_MATCH;
+
+    // Make this match the maximum match level. If this evolves to have more than 2 digits
+    // when written in base 10, also adjust the getMatchLevelSortedString method.
+    private static final int MATCH_LEVEL_MAX = 30;
+
+    /**
+     * Return how well a tested locale matches a reference locale.
+     *
+     * This will check the tested locale against the reference locale and return a measure of how
+     * a well it matches the reference. The general idea is that the tested locale has to match
+     * every specified part of the required locale. A full match occur when they are equal, a
+     * partial match when the tested locale agrees with the reference locale but is more specific,
+     * and a difference when the tested locale does not comply with all requirements from the
+     * reference locale.
+     * In more detail, if the reference locale specifies at least a language and the testedLocale
+     * does not specify one, or specifies a different one, LOCALE_NO_MATCH is returned. If the
+     * reference locale is empty or null, it will match anything - in the form of LOCALE_FULL_MATCH
+     * if the tested locale is empty or null, and LOCALE_ANY_MATCH otherwise. If the reference and
+     * tested locale agree on the language, but not on the country,
+     * LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER is returned if the reference locale specifies a country,
+     * and LOCALE_LANGUAGE_MATCH otherwise.
+     * If they agree on both the language and the country, but not on the variant,
+     * LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER is returned if the reference locale
+     * specifies a variant, and LOCALE_LANGUAGE_AND_COUNTRY_MATCH otherwise. If everything matches,
+     * LOCALE_FULL_MATCH is returned.
+     * Examples:
+     * en <=> en_US  => LOCALE_LANGUAGE_MATCH
+     * en_US <=> en => LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER
+     * en_US_POSIX <=> en_US_Android  =>  LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER
+     * en_US <=> en_US_Android => LOCALE_LANGUAGE_AND_COUNTRY_MATCH
+     * sp_US <=> en_US  =>  LOCALE_NO_MATCH
+     * de <=> de  => LOCALE_FULL_MATCH
+     * en_US <=> en_US => LOCALE_FULL_MATCH
+     * "" <=> en_US => LOCALE_ANY_MATCH
+     *
+     * @param referenceLocale the reference locale to test against.
+     * @param testedLocale the locale to test.
+     * @return a constant that measures how well the tested locale matches the reference locale.
+     */
+    public static int getMatchLevel(String referenceLocale, String testedLocale) {
+        if (TextUtils.isEmpty(referenceLocale)) {
+            return TextUtils.isEmpty(testedLocale) ? LOCALE_FULL_MATCH : LOCALE_ANY_MATCH;
+        }
+        if (null == testedLocale) return LOCALE_NO_MATCH;
+        String[] referenceParams = referenceLocale.split("_", 3);
+        String[] testedParams = testedLocale.split("_", 3);
+        // By spec of String#split, [0] cannot be null and length cannot be 0.
+        if (!referenceParams[0].equals(testedParams[0])) return LOCALE_NO_MATCH;
+        switch (referenceParams.length) {
+        case 1:
+            return 1 == testedParams.length ? LOCALE_FULL_MATCH : LOCALE_LANGUAGE_MATCH;
+        case 2:
+            if (1 == testedParams.length) return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER;
+            if (!referenceParams[1].equals(testedParams[1]))
+                return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER;
+            if (3 == testedParams.length) return LOCALE_LANGUAGE_AND_COUNTRY_MATCH;
+            return LOCALE_FULL_MATCH;
+        case 3:
+            if (1 == testedParams.length) return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER;
+            if (!referenceParams[1].equals(testedParams[1]))
+                return LOCALE_LANGUAGE_MATCH_COUNTRY_DIFFER;
+            if (2 == testedParams.length) return LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER;
+            if (!referenceParams[2].equals(testedParams[2]))
+                return LOCALE_LANGUAGE_AND_COUNTRY_MATCH_VARIANT_DIFFER;
+            return LOCALE_FULL_MATCH;
+        }
+        // It should be impossible to come here
+        return LOCALE_NO_MATCH;
+    }
+
+    /**
+     * Return a string that represents this match level, with better matches first.
+     *
+     * The strings are sorted in lexicographic order: a better match will always be less than
+     * a worse match when compared together.
+     */
+    public static String getMatchLevelSortedString(int matchLevel) {
+        // This works because the match levels are 0~99 (actually 0~30)
+        // Ideally this should use a number of digits equals to the 1og10 of the greater matchLevel
+        return String.format("%02d", MATCH_LEVEL_MAX - matchLevel);
+    }
+
+    /**
+     * Find out whether a match level should be considered a match.
+     *
+     * This method takes a match level as returned by the #getMatchLevel method, and returns whether
+     * it should be considered a match in the usual sense with standard Locale functions.
+     *
+     * @param level the match level, as returned by getMatchLevel.
+     * @return whether this is a match or not.
+     */
+    public static boolean isMatch(int level) {
+        return LOCALE_MATCH <= level;
+    }
+}
diff --git a/java/src/com/android/inputmethod/latin/WordListInfo.java b/java/src/com/android/inputmethod/latin/WordListInfo.java
new file mode 100644
index 0000000000..54f04d78fc
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/WordListInfo.java
@@ -0,0 +1,29 @@
+/**
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.android.inputmethod.latin;
+
+/**
+ * Information container for a word list.
+ */
+public class WordListInfo {
+    public final String mId;
+    public final String mLocale;
+    public WordListInfo(final String id, final String locale) {
+        mId = id;
+        mLocale = locale;
+    }
+}
-- 
GitLab