diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index a08e28c8bb4f2da28ba7611d628396cf3732501d..106f025199726232af6cbdea5a97cb7a44ee7bd4 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -148,7 +148,7 @@ public final class BinaryDictIOUtils { * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ - public static void readUnigramsAndBigramsBinary(final Ver3DictDecoder dictDecoder, + /* package */ static void readUnigramsAndBigramsBinary(final Ver3DictDecoder dictDecoder, final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, UnsupportedFormatException { diff --git a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java index d5fcacc09dd53d12009304fbbcd1f20cc7f58eb4..11a3f0b3a989f0549ea5c1b77ac3665a9da1b406 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictDecoder.java @@ -29,6 +29,8 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; +import java.util.ArrayList; +import java.util.TreeMap; /** * An interface of binary dictionary decoder. @@ -71,6 +73,21 @@ public interface DictDecoder { public int getTerminalPosition(final String word) throws IOException, UnsupportedFormatException; + /** + * Reads unigrams and bigrams from the binary file. + * Doesn't store a full memory representation of the dictionary. + * + * @param words the map to store the address as a key and the word as a value. + * @param frequencies the map to store the address as a key and the frequency as a value. + * @param bigrams the map to store the address as a key and the list of address as a value. + * @throws IOException if the file can't be read. + * @throws UnsupportedFormatException if the format of the file is not recognized. + */ + public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words, + final TreeMap<Integer, Integer> frequencies, + final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams) + throws IOException, UnsupportedFormatException; + // Flags for DictionaryBufferFactory. public static final int USE_READONLY_BYTEBUFFER = 0x01000000; public static final int USE_BYTEARRAY = 0x02000000; diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java index 77e6393ee9c0dcd202ddb75ac7338118ae14ba31..1fff9b49ed97fa9181af87e34523c5f4610c1dbb 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver3DictDecoder.java @@ -31,6 +31,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.TreeMap; /** * An implementation of DictDecoder for version 3 binary dictionary. @@ -317,4 +318,16 @@ public class Ver3DictDecoder implements DictDecoder { } return BinaryDictIOUtils.getTerminalPosition(this, word); } + + @Override + public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words, + final TreeMap<Integer, Integer> frequencies, + final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams) + throws IOException, UnsupportedFormatException { + if (mDictBuffer == null) { + openDictBuffer(); + } + BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams); + } + } diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java index 9d3d8a5da2cd954d158b8aadd05caafb16504a1f..99788f6f2fe7551835d485fc8d0567ea6386ab4a 100644 --- a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java @@ -32,7 +32,8 @@ import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigram import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; -import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; /** * Reads and writes Binary files for a UserHistoryDictionary. @@ -119,12 +120,11 @@ public final class UserHistoryDictIOUtils { */ public static void readDictionaryBinary(final Ver3DictDecoder dictDecoder, final OnAddWordListener dict) { - final Map<Integer, String> unigrams = CollectionUtils.newTreeMap(); - final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap(); - final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap(); + final TreeMap<Integer, String> unigrams = CollectionUtils.newTreeMap(); + final TreeMap<Integer, Integer> frequencies = CollectionUtils.newTreeMap(); + final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap(); try { - BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, unigrams, frequencies, - bigrams); + dictDecoder.readUnigramsAndBigramsBinary(unigrams, frequencies, bigrams); } catch (IOException e) { Log.e(TAG, "IO exception while reading file", e); } catch (UnsupportedFormatException e) { @@ -139,10 +139,11 @@ public final class UserHistoryDictIOUtils { * Adds all unigrams and bigrams in maps to OnAddWordListener. */ @UsedForTesting - static void addWordsFromWordMap(final Map<Integer, String> unigrams, - final Map<Integer, Integer> frequencies, - final Map<Integer, ArrayList<PendingAttribute>> bigrams, final OnAddWordListener to) { - for (Map.Entry<Integer, String> entry : unigrams.entrySet()) { + static void addWordsFromWordMap(final TreeMap<Integer, String> unigrams, + final TreeMap<Integer, Integer> frequencies, + final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams, + final OnAddWordListener to) { + for (Entry<Integer, String> entry : unigrams.entrySet()) { final String word1 = entry.getValue(); final int unigramFrequency = frequencies.get(entry.getKey()); to.setUnigram(word1, null, unigramFrequency); diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index bb5b96a485c84fad7bad7b94637e58578484ced7..72ec5a302da28c0ed5803482c19a2e537a2196c4 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -39,10 +39,10 @@ import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Map.Entry; import java.util.Random; import java.util.Set; +import java.util.TreeMap; /** * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils. @@ -61,13 +61,13 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { private static final int USE_BYTE_ARRAY = 1; private static final int USE_BYTE_BUFFER = 2; - private static final List<String> sWords = CollectionUtils.newArrayList(); + private static final ArrayList<String> sWords = CollectionUtils.newArrayList(); private static final SparseArray<List<Integer>> sEmptyBigrams = CollectionUtils.newSparseArray(); private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray(); private static final SparseArray<List<Integer>> sChainBigrams = CollectionUtils.newSparseArray(); - private static final Map<String, List<String>> sShortcuts = CollectionUtils.newHashMap(); + private static final HashMap<String, List<String>> sShortcuts = CollectionUtils.newHashMap(); private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2); private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE = @@ -177,7 +177,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { * Adds unigrams to the dictionary. */ private void addUnigrams(final int number, final FusionDictionary dict, - final List<String> words, final Map<String, List<String>> shortcutMap) { + final List<String> words, final HashMap<String, List<String>> shortcutMap) { for (int i = 0; i < number; ++i) { final String word = words.get(i); final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList(); @@ -234,7 +234,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } private void checkDictionary(final FusionDictionary dict, final List<String> words, - final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap) { + final SparseArray<List<Integer>> bigrams, + final HashMap<String, List<String>> shortcutMap) { assertNotNull(dict); // check unigram @@ -255,7 +256,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // check shortcut if (shortcutMap != null) { - for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) { + for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) { assertTrue(words.contains(entry.getKey())); final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, entry.getKey()); @@ -278,8 +279,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // Tests for readDictionaryBinary and writeDictionaryBinary private long timeReadingAndCheckDict(final File file, final List<String> words, - final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap, - final int bufferType) { + final SparseArray<List<Integer>> bigrams, + final HashMap<String, List<String>> shortcutMap, final int bufferType) { long now, diff = -1; FusionDictionary dict = null; @@ -302,7 +303,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // Tests for readDictionaryBinary and writeDictionaryBinary private String runReadAndWrite(final List<String> words, - final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcuts, + final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts, final int bufferType, final FormatSpec.FormatOptions formatOptions, final String message) { File file = null; @@ -387,9 +388,9 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { private void checkWordMap(final List<String> expectedWords, final SparseArray<List<Integer>> expectedBigrams, - final Map<Integer, String> resultWords, - final Map<Integer, Integer> resultFrequencies, - final Map<Integer, ArrayList<PendingAttribute>> resultBigrams) { + final TreeMap<Integer, String> resultWords, + final TreeMap<Integer, Integer> resultFrequencies, + final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams) { // check unigrams final Set<String> actualWordsSet = new HashSet<String>(resultWords.values()); final Set<String> expectedWordsSet = new HashSet<String>(expectedWords); @@ -400,7 +401,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } // check bigrams - final Map<String, List<String>> expBigrams = new HashMap<String, List<String>>(); + final HashMap<String, List<String>> expBigrams = new HashMap<String, List<String>>(); for (int i = 0; i < expectedBigrams.size(); ++i) { final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); for (int w2 : expectedBigrams.valueAt(i)) { @@ -411,7 +412,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } } - final Map<String, List<String>> actBigrams = new HashMap<String, List<String>>(); + final HashMap<String, List<String>> actBigrams = new HashMap<String, List<String>>(); for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { final String word1 = resultWords.get(entry.getKey()); final int unigramFreq = resultFrequencies.get(entry.getKey()); @@ -435,10 +436,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final SparseArray<List<Integer>> bigrams, final int bufferType) { FileInputStream inStream = null; - final Map<Integer, String> resultWords = CollectionUtils.newTreeMap(); - final Map<Integer, ArrayList<PendingAttribute>> resultBigrams = + final TreeMap<Integer, String> resultWords = CollectionUtils.newTreeMap(); + final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = CollectionUtils.newTreeMap(); - final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap(); + final TreeMap<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap(); long now = -1, diff = -1; try { @@ -446,8 +447,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { dictDecoder.openDictBuffer(); assertNotNull("Can't get buffer.", dictDecoder.getDictBuffer()); now = System.currentTimeMillis(); - BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, resultWords, resultFreqs, - resultBigrams); + dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); diff = System.currentTimeMillis() - now; } catch (IOException e) { Log.e(TAG, "IOException", e); @@ -467,7 +467,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { return diff; } - private String runReadUnigramsAndBigramsBinary(final List<String> words, + private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words, final SparseArray<List<Integer>> bigrams, final int bufferType, final FormatSpec.FormatOptions formatOptions, final String message) { File file = null; @@ -496,8 +496,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { + " : " + message + " : " + outputOptions(bufferType, formatOptions); } - private void runReadUnigramsAndBigramsTests(final List<String> results, final int bufferType, - final FormatSpec.FormatOptions formatOptions) { + private void runReadUnigramsAndBigramsTests(final ArrayList<String> results, + final int bufferType, final FormatSpec.FormatOptions formatOptions) { results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram")); results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, @@ -507,7 +507,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { - final List<String> results = CollectionUtils.newArrayList(); + final ArrayList<String> results = CollectionUtils.newArrayList(); runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2); runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); @@ -519,7 +519,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } public void testReadUnigramsAndBigramsBinaryWithByteArray() { - final List<String> results = CollectionUtils.newArrayList(); + final ArrayList<String> results = CollectionUtils.newArrayList(); runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2); runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);