From 2b7110ff1f55f33ad411efef4184ce822040a10b Mon Sep 17 00:00:00 2001
From: Yuichiro Hanada <yhanada@google.com>
Date: Thu, 17 Oct 2013 19:10:56 +0900
Subject: [PATCH] (2/2) Implement insertWord in Ver4DictUpdater.

Change-Id: I2328a9df0a009b564e8acaf4180f9b0c1ed0901a
---
 .../makedict/SparseTableContentReader.java    |  16 +--
 .../makedict/SparseTableContentUpdater.java   | 123 +++++++++++++++++
 .../latin/makedict/Ver4DictDecoder.java       |   7 +-
 .../latin/makedict/Ver4DictUpdater.java       | 128 +++++++++++++++++-
 .../makedict/BinaryDictIOUtilsTests.java      |   3 +-
 5 files changed, 257 insertions(+), 20 deletions(-)
 create mode 100644 java/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java

diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java
index 00f401ea75..06088b6516 100644
--- a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java
+++ b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentReader.java
@@ -40,16 +40,16 @@ public class SparseTableContentReader {
         public void read(final DictBuffer buffer);
     }
 
-    private final int mContentCount;
-    private final int mBlockSize;
+    protected final int mContentCount;
+    protected final int mBlockSize;
     protected final File mBaseDir;
-    private final File mLookupTableFile;
-    private final File[] mAddressTableFiles;
-    private final File[] mContentFiles;
-    private DictBuffer mLookupTableBuffer;
-    private final DictBuffer[] mAddressTableBuffers;
+    protected final File mLookupTableFile;
+    protected final File[] mAddressTableFiles;
+    protected final File[] mContentFiles;
+    protected DictBuffer mLookupTableBuffer;
+    protected final DictBuffer[] mAddressTableBuffers;
     private final DictBuffer[] mContentBuffers;
-    private final DictionaryBufferFactory mFactory;
+    protected final DictionaryBufferFactory mFactory;
 
     /**
      * Sole constructor of SparseTableContentReader.
diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java
new file mode 100644
index 0000000000..4518f21b96
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/makedict/SparseTableContentUpdater.java
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin.makedict;
+
+import com.android.inputmethod.latin.makedict.DictDecoder.DictionaryBufferFactory;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * An auxiliary class for updating data associated with SparseTable.
+ */
+public class SparseTableContentUpdater extends SparseTableContentReader {
+    protected OutputStream mLookupTableOutStream;
+    protected OutputStream[] mAddressTableOutStreams;
+    protected OutputStream[] mContentOutStreams;
+
+    public SparseTableContentUpdater(final String name, final int blockSize,
+            final File baseDir, final String[] contentFilenames, final String[] contentIds,
+            final DictionaryBufferFactory factory) {
+        super(name, blockSize, baseDir, contentFilenames, contentIds, factory);
+        mAddressTableOutStreams = new OutputStream[mContentCount];
+        mContentOutStreams = new OutputStream[mContentCount];
+    }
+
+    protected void openStreamsAndBuffers() throws IOException {
+        openBuffers();
+        mLookupTableOutStream = new FileOutputStream(mLookupTableFile, true /* append */);
+        for (int i = 0; i < mContentCount; ++i) {
+            mAddressTableOutStreams[i] = new FileOutputStream(mAddressTableFiles[i],
+                    true /* append */);
+            mContentOutStreams[i] = new FileOutputStream(mContentFiles[i], true /* append */);
+        }
+    }
+
+    /**
+     * Set the contentIndex-th elements of contentId-th table.
+     *
+     * @param contentId the id of the content table.
+     * @param contentIndex the index where to set the valie.
+     * @param value the value to set.
+     */
+    protected void setContentValue(final int contentId, final int contentIndex, final int value)
+            throws IOException {
+        if ((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES
+                >= mLookupTableBuffer.limit()) {
+            // Need to extend the lookup table
+            final int currentSize = mLookupTableBuffer.limit()
+                    / SparseTable.SIZE_OF_INT_IN_BYTES;
+            final int target = contentIndex / mBlockSize + 1;
+            for (int i = currentSize; i < target; ++i) {
+                BinaryDictEncoderUtils.writeUIntToStream(mLookupTableOutStream,
+                        SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES);
+            }
+            // We need to reopen the byte buffer of the lookup table because a MappedByteBuffer in
+            // Java isn't expanded automatically when the underlying file is expanded.
+            reopenLookupTable();
+        }
+
+        mLookupTableBuffer.position((contentIndex / mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES);
+        int posInAddressTable = mLookupTableBuffer.readInt();
+        if (posInAddressTable == SparseTable.NOT_EXIST) {
+            // Need to extend the address table
+            mLookupTableBuffer.position(mLookupTableBuffer.position()
+                    - SparseTable.SIZE_OF_INT_IN_BYTES);
+            posInAddressTable = mAddressTableBuffers[0].limit() / mBlockSize;
+            BinaryDictEncoderUtils.writeUIntToDictBuffer(mLookupTableBuffer,
+                    posInAddressTable, SparseTable.SIZE_OF_INT_IN_BYTES);
+            for (int i = 0; i < mContentCount; ++i) {
+                for (int j = 0; j < mBlockSize; ++j) {
+                    BinaryDictEncoderUtils.writeUIntToStream(mAddressTableOutStreams[i],
+                            SparseTable.NOT_EXIST, SparseTable.SIZE_OF_INT_IN_BYTES);
+                }
+            }
+            // We need to reopen the byte buffers of the address tables because a MappedByteBuffer
+            // in Java isn't expanded automatically when the underlying file is expanded.
+            reopenAddressTables();
+        }
+        posInAddressTable += (contentIndex % mBlockSize) * SparseTable.SIZE_OF_INT_IN_BYTES;
+
+        mAddressTableBuffers[contentId].position(posInAddressTable);
+        BinaryDictEncoderUtils.writeUIntToDictBuffer(mAddressTableBuffers[contentId],
+                value, SparseTable.SIZE_OF_INT_IN_BYTES);
+    }
+
+    private void reopenLookupTable() throws IOException {
+        mLookupTableOutStream.flush();
+        mLookupTableBuffer = mFactory.getDictionaryBuffer(mLookupTableFile);
+    }
+
+    private void reopenAddressTables() throws IOException {
+        for (int i = 0; i < mContentCount; ++i) {
+            mAddressTableOutStreams[i].flush();
+            mAddressTableBuffers[i] = mFactory.getDictionaryBuffer(mAddressTableFiles[i]);
+        }
+    }
+
+    protected void close() throws IOException {
+        mLookupTableOutStream.close();
+        for (final OutputStream stream : mAddressTableOutStreams) {
+            stream.close();
+        }
+        for (final OutputStream stream : mContentOutStreams) {
+            stream.close();
+        }
+    }
+}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index ae344a2cdc..f0fed3fda3 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -46,7 +46,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
     protected static final int FILETYPE_BIGRAM_FREQ = 4;
     protected static final int FILETYPE_SHORTCUT = 5;
 
-    private final File mDictDirectory;
+    protected final File mDictDirectory;
     protected final DictionaryBufferFactory mBufferFactory;
     protected DictBuffer mDictBuffer;
     protected DictBuffer mFrequencyBuffer;
@@ -178,7 +178,8 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
         }
 
         // TODO: Consolidate this method and BigramContentWriter.getContentFilenames.
-        private static String[] getContentFilenames(final String name, final boolean hasTimestamp) {
+        protected static String[] getContentFilenames(final String name,
+                final boolean hasTimestamp) {
             final String[] contentFilenames;
             if (hasTimestamp) {
                 contentFilenames = new String[] { name + FormatSpec.BIGRAM_FILE_EXTENSION,
@@ -190,7 +191,7 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
         }
 
         // TODO: Consolidate this method and BigramContentWriter.getContentIds.
-        private static String[] getContentIds(final boolean hasTimestamp) {
+        protected static String[] getContentIds(final boolean hasTimestamp) {
             final String[] contentIds;
             if (hasTimestamp) {
                 contentIds = new String[] { FormatSpec.BIGRAM_FREQ_CONTENT_ID,
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java
index d1e7238121..65860ee728 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictUpdater.java
@@ -22,6 +22,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import com.android.inputmethod.latin.utils.CollectionUtils;
 
 import android.util.Log;
 
@@ -31,6 +32,7 @@ import java.io.IOException;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Iterator;
 
 /**
  * An implementation of DictUpdater for version 4 binary dictionary.
@@ -50,6 +52,91 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
         mFrequencyFile = getFile(FILETYPE_FREQUENCY);
     }
 
+    private static class BigramContentUpdater extends SparseTableContentUpdater {
+        private final boolean mHasTimestamp;
+
+        public BigramContentUpdater(final String name, final File baseDir,
+                final boolean hasTimestamp) {
+            super(name + FormatSpec.BIGRAM_FILE_EXTENSION,
+                    FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
+                    BigramContentReader.getContentFilenames(name, hasTimestamp),
+                    BigramContentReader.getContentIds(hasTimestamp),
+                    new DictionaryBufferFromWritableByteBufferFactory());
+            mHasTimestamp = hasTimestamp;
+        }
+
+        public void insertBigramEntries(final int terminalId, final int frequency,
+                final ArrayList<PendingAttribute> entries) throws IOException {
+            if (terminalId < 0) {
+                throw new RuntimeException("Invalid terminal id : " + terminalId);
+            }
+            openStreamsAndBuffers();
+
+            if (entries == null || entries.isEmpty()) {
+                setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId,
+                        SparseTable.NOT_EXIST);
+                return;
+            }
+            final int positionOfEntries =
+                    (int) mContentFiles[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX].length();
+            setContentValue(FormatSpec.BIGRAM_FREQ_CONTENT_INDEX, terminalId, positionOfEntries);
+
+            final Iterator<PendingAttribute> bigramIterator = entries.iterator();
+            while (bigramIterator.hasNext()) {
+                final PendingAttribute entry = bigramIterator.next();
+                final int flags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
+                        0 /* offset */, entry.mFrequency, frequency, "" /* word */);
+                BinaryDictEncoderUtils.writeUIntToStream(
+                        mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], flags,
+                        FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
+                BinaryDictEncoderUtils.writeUIntToStream(
+                        mContentOutStreams[FormatSpec.BIGRAM_FREQ_CONTENT_INDEX], entry.mAddress,
+                        FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE);
+            }
+            close();
+        }
+    }
+
+    private static class ShortcutContentUpdater extends SparseTableContentUpdater {
+        public ShortcutContentUpdater(final String name, final File baseDir) {
+            super(name + FormatSpec.SHORTCUT_FILE_EXTENSION,
+                    FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, baseDir,
+                    new String[] { name + FormatSpec.SHORTCUT_FILE_EXTENSION },
+                    new String[] { FormatSpec.SHORTCUT_CONTENT_ID },
+                    new DictionaryBufferFromWritableByteBufferFactory());
+        }
+
+        public void insertShortcuts(final int terminalId,
+                final ArrayList<WeightedString> shortcuts) throws IOException {
+            if (terminalId < 0) {
+                throw new RuntimeException("Invalid terminal id : " + terminalId);
+            }
+            openStreamsAndBuffers();
+            if (shortcuts == null || shortcuts.isEmpty()) {
+                setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId,
+                        SparseTable.NOT_EXIST);
+                return;
+            }
+
+            final int positionOfShortcuts =
+                    (int) mContentFiles[FormatSpec.SHORTCUT_CONTENT_INDEX].length();
+            setContentValue(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId, positionOfShortcuts);
+
+            final Iterator<WeightedString> shortcutIterator = shortcuts.iterator();
+            while (shortcutIterator.hasNext()) {
+                final WeightedString target = shortcutIterator.next();
+                final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
+                        shortcutIterator.hasNext(), target.mFrequency);
+                BinaryDictEncoderUtils.writeUIntToStream(
+                        mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX], shortcutFlags,
+                        FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
+                CharEncoding.writeString(mContentOutStreams[FormatSpec.SHORTCUT_CONTENT_INDEX],
+                        target.mWord);
+            }
+            close();
+        }
+    }
+
     @Override
     public void deleteWord(final String word) throws IOException, UnsupportedFormatException {
         if (mDictBuffer == null) openDictBuffer();
@@ -574,6 +661,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
                 true /* append */);
         BinaryDictEncoderUtils.writeUIntToStream(frequencyStream, frequency,
                 FormatSpec.FREQUENCY_AND_FLAGS_SIZE);
+        frequencyStream.close();
     }
 
     private void insertTerminalPosition(final int posOfTerminal) throws IOException {
@@ -581,14 +669,37 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
                 getFile(FILETYPE_TERMINAL_ADDRESS_TABLE), true /* append */);
         BinaryDictEncoderUtils.writeUIntToStream(terminalPosStream, posOfTerminal,
                 FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
+        terminalPosStream.close();
     }
 
-    private void insertBigrams(final int terminalId, final ArrayList<PendingAttribute> bigrams) {
-        // TODO: Implement.
+    private void insertBigrams(final int terminalId, final int frequency,
+            final ArrayList<PendingAttribute> bigramAddresses)
+                    throws IOException, UnsupportedFormatException {
+        openDictBuffer();
+        final BigramContentUpdater updater = new BigramContentUpdater(mDictDirectory.getName(),
+                mDictDirectory, false);
+
+        // Convert addresses to terminal ids.
+        final ArrayList<PendingAttribute> bigrams = CollectionUtils.newArrayList();
+        mDictBuffer.position(0);
+        final FileHeader header = readHeader();
+        for (PendingAttribute attr : bigramAddresses) {
+            mDictBuffer.position(attr.mAddress);
+            final Ver4PtNodeInfo info = readVer4PtNodeInfo(attr.mAddress, header.mFormatOptions);
+            if (info.mTerminalId == PtNode.NOT_A_TERMINAL) {
+                throw new RuntimeException("We can't have a bigram target that's not a terminal.");
+            }
+            bigrams.add(new PendingAttribute(frequency, info.mTerminalId));
+        }
+        updater.insertBigramEntries(terminalId, frequency, bigrams);
+        close();
     }
 
-    private void insertShortcuts(final int terminalId, final ArrayList<WeightedString> shortcuts) {
-        // TODO: Implement.
+    private void insertShortcuts(final int terminalId, final ArrayList<WeightedString> shortcuts)
+            throws IOException {
+        final ShortcutContentUpdater updater = new ShortcutContentUpdater(mDictDirectory.getName(),
+                mDictDirectory);
+        updater.insertShortcuts(terminalId, shortcuts);
     }
 
     private void openBuffersAndStream() throws IOException {
@@ -597,7 +708,10 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
     }
 
     private void close() throws IOException {
-        mDictStream.close();
+        if (mDictStream != null) {
+            mDictStream.close();
+            mDictStream = null;
+        }
         mDictBuffer = null;
         mFrequencyBuffer = null;
         mTerminalAddressTableBuffer = null;
@@ -620,7 +734,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
         mDictBuffer.put((byte) newFlags);
 
         updateFrequency(terminalId, frequency);
-        insertBigrams(terminalId,
+        insertBigrams(terminalId, frequency,
                 DynamicBinaryDictIOUtils.resolveBigramPositions(this, bigramStrings));
         insertShortcuts(terminalId, shortcuts);
     }
@@ -650,7 +764,7 @@ public class Ver4DictUpdater extends Ver4DictDecoder implements DictUpdater {
         insertTerminalPosition(posOfTerminal);
         close();
 
-        insertBigrams(newTerminalId,
+        insertBigrams(newTerminalId, frequency,
                 DynamicBinaryDictIOUtils.resolveBigramPositions(this, bigramStrings));
         insertShortcuts(newTerminalId, shortcuts);
     }
diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
index b831f1fb3b..8bea3c074a 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java
@@ -330,8 +330,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
 
     public void testInsertWordWithBigrams() {
         runTestInsertWordWithBigrams(BinaryDictUtils.VERSION3_WITH_DYNAMIC_UPDATE);
-        // TODO: Add a test for version 4.
-        // runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
+        runTestInsertWordWithBigrams(BinaryDictUtils.VERSION4_WITH_DYNAMIC_UPDATE);
     }
 
     private void runTestRandomWords(final FormatOptions formatOptions) {
-- 
GitLab