From b3c98901c5fc1460b54cdf27d74405f27c88e74b Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Tue, 23 Oct 2012 17:14:12 +0900
Subject: [PATCH] Add auto detection and decoding of dictionary files. (A2)

Bug: 7388852
Change-Id: I25e755fc15f5b383acc046f668e9681efa4f0c2f
---
 .../latin/makedict/BinaryDictInputOutput.java |  11 +-
 .../latin/makedict/FusionDictionary.java      |   6 +
 .../dicttool/BinaryDictOffdeviceUtils.java    |  82 +++++++++++++-
 .../inputmethod/latin/dicttool/Compress.java  |  14 ++-
 .../BinaryDictOffdeviceUtilsTests.java        | 106 ++++++++++++++++++
 tools/dicttool/tests/etc/test-dicttool.sh     |   1 +
 6 files changed, 206 insertions(+), 14 deletions(-)
 create mode 100644 tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java

diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index da52369746..031306e1de 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -1698,6 +1698,14 @@ public final class BinaryDictInputOutput {
         return newDict;
     }
 
+    /**
+     * Helper method to pass a file name instead of a File object to isBinaryDictionary.
+     */
+    public static boolean isBinaryDictionary(final String filename) {
+        final File file = new File(filename);
+        return isBinaryDictionary(file);
+    }
+
     /**
      * Basic test to find out whether the file is a binary dictionary or not.
      *
@@ -1706,10 +1714,9 @@ public final class BinaryDictInputOutput {
      * @param filename The name of the file to test.
      * @return true if it's a binary dictionary, false otherwise
      */
-    public static boolean isBinaryDictionary(final String filename) {
+    public static boolean isBinaryDictionary(final File file) {
         FileInputStream inStream = null;
         try {
-            final File file = new File(filename);
             inStream = new FileInputStream(file);
             final ByteBuffer buffer = inStream.getChannel().map(
                     FileChannel.MapMode.READ_ONLY, 0, file.length());
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 7fd13d78bc..44537986b0 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -16,6 +16,7 @@
 
 package com.android.inputmethod.latin.makedict;
 
+import com.android.inputmethod.annotations.UsedForTesting;
 import com.android.inputmethod.latin.Constants;
 
 import java.util.ArrayList;
@@ -141,6 +142,11 @@ public final class FusionDictionary implements Iterable<Word> {
             return NOT_A_TERMINAL != mFrequency;
         }
 
+        @UsedForTesting
+        public int getFrequency() {
+            return mFrequency;
+        }
+
         public boolean hasSeveralChars() {
             assert(mChars.length > 0);
             return 1 < mChars.length;
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index 83c5d9ac6c..9dcd7eb425 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -16,19 +16,42 @@
 
 package com.android.inputmethod.latin.dicttool;
 
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
+
+import java.io.File;
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.util.ArrayList;
 
 /**
-* Class grouping utilities for offline dictionary making.
-*
-* Those should not be used on-device, essentially because they are quite
-* liberal about I/O and performance.
-*/
-public class BinaryDictOffdeviceUtils {
+ * Class grouping utilities for offline dictionary making.
+ *
+ * Those should not be used on-device, essentially because they are quite
+ * liberal about I/O and performance.
+ */
+public final class BinaryDictOffdeviceUtils {
+    // Prefix and suffix are arbitrary, the values do not really matter
+    private final static String PREFIX = "dicttool";
+    private final static String SUFFIX = ".tmp";
+
+    public final static String COMPRESSION = "compression";
+
+    public static class DecoderChainSpec {
+        ArrayList<String> mDecoderSpec = new ArrayList<String>();
+        File mFile;
+        public DecoderChainSpec addStep(final String stepDescription) {
+            mDecoderSpec.add(stepDescription);
+            return this;
+        }
+    }
+
     public static void copy(final InputStream input, final OutputStream output) throws IOException {
         final byte[] buffer = new byte[1000];
         final BufferedInputStream in = new BufferedInputStream(input);
@@ -38,4 +61,51 @@ public class BinaryDictOffdeviceUtils {
         in.close();
         out.close();
     }
+
+    /**
+     * Returns a decrypted/uncompressed binary dictionary.
+     *
+     * This will decrypt/uncompress any number of times as necessary until it finds the binary
+     * dictionary signature, and copy the decoded file to a temporary place.
+     * If this is not a binary dictionary, the method returns null.
+     */
+    public static DecoderChainSpec getRawBinaryDictionaryOrNull(final File src) {
+        return getRawBinaryDictionaryOrNullInternal(new DecoderChainSpec(), src);
+    }
+
+    private static DecoderChainSpec getRawBinaryDictionaryOrNullInternal(
+            final DecoderChainSpec spec, final File src) {
+        // TODO: arrange for the intermediary files to be deleted
+        if (BinaryDictInputOutput.isBinaryDictionary(src)) {
+            spec.mFile = src;
+            return spec;
+        }
+        // It's not a raw dictionary - try to see if it's compressed.
+        final File uncompressedFile = tryGetUncompressedFile(src);
+        if (null != uncompressedFile) {
+            final DecoderChainSpec newSpec =
+                    getRawBinaryDictionaryOrNullInternal(spec, uncompressedFile);
+            if (null == newSpec) return null;
+            return newSpec.addStep(COMPRESSION);
+        }
+        return null;
+    }
+
+    /* Try to uncompress the file passed as an argument.
+     *
+     * If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
+     * is returned.
+     */
+    private static File tryGetUncompressedFile(final File src) {
+        try {
+            final File dst = File.createTempFile(PREFIX, SUFFIX);
+            final FileOutputStream dstStream = new FileOutputStream(dst);
+            copy(Compress.getUncompressedStream(new BufferedInputStream(new FileInputStream(src))),
+                    new BufferedOutputStream(dstStream)); // #copy() closes the streams
+            return dst;
+        } catch (IOException e) {
+            // Could not uncompress the file: presumably the file is simply not a compressed file
+            return null;
+        }
+    }
 }
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java
index 49e90ada22..072de5c01a 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Compress.java
@@ -16,6 +16,8 @@
 
 package com.android.inputmethod.latin.dicttool;
 
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
@@ -27,12 +29,12 @@ import java.util.zip.GZIPOutputStream;
 
 public class Compress {
 
-    private static OutputStream getCompressedStream(final OutputStream out)
+    public static OutputStream getCompressedStream(final OutputStream out)
         throws java.io.IOException {
         return new GZIPOutputStream(out);
     }
 
-    private static InputStream getUncompressedStream(final InputStream in) throws IOException {
+    public static InputStream getUncompressedStream(final InputStream in) throws IOException {
         return new GZIPInputStream(in);
     }
 
@@ -55,9 +57,9 @@ public class Compress {
             final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
             final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
             final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
-                    : new FileInputStream(new File(inFilename));
+                    : new BufferedInputStream(new FileInputStream(new File(inFilename)));
             final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
-                    : new FileOutputStream(new File(outFilename));
+                    : new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
             BinaryDictOffdeviceUtils.copy(input, new GZIPOutputStream(output));
         }
     }
@@ -81,9 +83,9 @@ public class Compress {
             final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
             final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
             final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
-                    : new FileInputStream(new File(inFilename));
+                    : new BufferedInputStream(new FileInputStream(new File(inFilename)));
             final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
-                    : new FileOutputStream(new File(outFilename));
+                    : new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
             BinaryDictOffdeviceUtils.copy(new GZIPInputStream(input), output);
         }
     }
diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
new file mode 100644
index 0000000000..7a686e556a
--- /dev/null
+++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.dicttool;
+
+import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
+import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary;
+import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
+import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
+import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
+
+import junit.framework.TestCase;
+
+import java.io.File;
+import java.io.BufferedOutputStream;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+/**
+ * Unit tests for BinaryDictOffdeviceUtilsTests
+ */
+public class BinaryDictOffdeviceUtilsTests extends TestCase {
+    private static final int TEST_FREQ = 37; // Some arbitrary value unlikely to happen by chance
+
+    public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
+        // Create a thrice-compressed dictionary file.
+        final FusionDictionary dict = new FusionDictionary(new Node(),
+                new DictionaryOptions(new HashMap<String, String>(),
+                        false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
+        dict.add("foo", TEST_FREQ, null, false /* isNotAWord */);
+        dict.add("fta", 1, null, false /* isNotAWord */);
+        dict.add("ftb", 1, null, false /* isNotAWord */);
+        dict.add("bar", 1, null, false /* isNotAWord */);
+        dict.add("fool", 1, null, false /* isNotAWord */);
+
+        final File dst = File.createTempFile("testGetRawDict", ".tmp");
+        final OutputStream out = Compress.getCompressedStream(
+                Compress.getCompressedStream(
+                        Compress.getCompressedStream(
+                                new BufferedOutputStream(new FileOutputStream(dst)))));
+
+        BinaryDictInputOutput.writeDictionaryBinary(out, dict, new FormatOptions(2, false));
+
+        // Test for an actually compressed dictionary and its contents
+        final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
+                BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst);
+        for (final String step : decodeSpec.mDecoderSpec) {
+            assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step);
+        }
+        assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size());
+        final FileInputStream inStream = new FileInputStream(decodeSpec.mFile);
+        final ByteBuffer buffer = inStream.getChannel().map(
+                FileChannel.MapMode.READ_ONLY, 0, decodeSpec.mFile.length());
+        final FusionDictionary resultDict = BinaryDictInputOutput.readDictionaryBinary(
+                new BinaryDictInputOutput.ByteBufferWrapper(buffer),
+                null /* dict : an optional dictionary to add words to, or null */);
+        assertEquals("Dictionary can't be read back correctly",
+                resultDict.findWordInTree(resultDict.mRoot, "foo").getFrequency(), TEST_FREQ);
+    }
+
+    public void testGetRawDictFails() throws IOException {
+        // Randomly create some 4k file containing garbage
+        final File dst = File.createTempFile("testGetRawDict", ".tmp");
+        final OutputStream out = new BufferedOutputStream(new FileOutputStream(dst));
+        for (int i = 0; i < 1024; ++i) {
+            out.write(0x12345678);
+        }
+        out.close();
+
+        // Test that a random data file actually fails
+        assertNull("Wrongly identified data file",
+                BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst));
+
+        final File gzDst = File.createTempFile("testGetRawDict", ".tmp");
+        final OutputStream gzOut =
+                Compress.getCompressedStream(new BufferedOutputStream(new FileOutputStream(gzDst)));
+        for (int i = 0; i < 1024; ++i) {
+            gzOut.write(0x12345678);
+        }
+        gzOut.close();
+
+        // Test that a compressed random data file actually fails
+        assertNull("Wrongly identified data file",
+                BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(gzDst));
+    }
+}
diff --git a/tools/dicttool/tests/etc/test-dicttool.sh b/tools/dicttool/tests/etc/test-dicttool.sh
index 1283be21a5..0f3ed6d62e 100755
--- a/tools/dicttool/tests/etc/test-dicttool.sh
+++ b/tools/dicttool/tests/etc/test-dicttool.sh
@@ -14,3 +14,4 @@
 # limitations under the License.
 
 java -classpath ${ANDROID_HOST_OUT}/framework/junit.jar:${ANDROID_HOST_OUT}/framework/dicttool_aosp.jar junit.textui.TestRunner com.android.inputmethod.latin.makedict.BinaryDictInputOutputTest
+java -classpath ${ANDROID_HOST_OUT}/framework/junit.jar:${ANDROID_HOST_OUT}/framework/dicttool_aosp.jar junit.textui.TestRunner com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtilsTests
-- 
GitLab