Skip to content
Snippets Groups Projects
Commit cc356d04 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Implement ver4 dictionary bigram removing methods."

parents b87ed461 ff4b1d1f
No related branches found
No related tags found
No related merge requests found
Showing with 122 additions and 7 deletions
...@@ -91,6 +91,33 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget ...@@ -91,6 +91,33 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
return mBigramDictContent->copyBigramList(bigramListPos, writingPos); return mBigramDictContent->copyBigramList(bigramListPos, writingPos);
} }
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
// Bigram list does't exist.
return false;
}
const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos);
if (entryPosToUpdate == NOT_A_DICT_POS) {
// Bigram entry doesn't exist.
return false;
}
int readingPos = entryPosToUpdate;
bool hasNext = false;
int probability = NOT_A_PROBABILITY;
int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
&originalTargetTerminalId, &readingPos);
if (targetTerminalId != originalTargetTerminalId) {
// Bigram entry doesn't exist.
return false;
}
int writingPos = entryPosToUpdate;
// Remove bigram entry by overwriting target terminal Id.
return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext,
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
}
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
const int bigramListPos) const { const int bigramListPos) const {
bool hasNext = true; bool hasNext = true;
......
...@@ -42,6 +42,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { ...@@ -42,6 +42,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability, bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
bool *const outAddedNewEntry); bool *const outAddedNewEntry);
bool removeEntry(const int terminalId, const int targetTerminalId);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
......
...@@ -31,9 +31,12 @@ void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbabili ...@@ -31,9 +31,12 @@ void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbabili
if (outHasNext) { if (outHasNext) {
*outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0; *outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
} }
const int targetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
if (outTargetTerminalId) { if (outTargetTerminalId) {
*outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( *outTargetTerminalId =
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); (targetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
Ver4DictConstants::NOT_A_TERMINAL_ID : targetTerminalId;
} }
} }
...@@ -45,7 +48,10 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability ...@@ -45,7 +48,10 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
return false; return false;
} }
return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalId, const int targetTerminalIdToWrite =
(targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos); Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos);
} }
......
...@@ -42,6 +42,10 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16; ...@@ -42,6 +42,10 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3; const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
// invalid terminal ID in bigram lists.
const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
(1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1; const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F; const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80; const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
......
...@@ -47,6 +47,7 @@ class Ver4DictConstants { ...@@ -47,6 +47,7 @@ class Ver4DictConstants {
static const int BIGRAM_FLAGS_FIELD_SIZE; static const int BIGRAM_FLAGS_FIELD_SIZE;
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
static const int BIGRAM_PROBABILITY_MASK; static const int BIGRAM_PROBABILITY_MASK;
static const int BIGRAM_HAS_NEXT_MASK; static const int BIGRAM_HAS_NEXT_MASK;
......
...@@ -192,8 +192,8 @@ bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( ...@@ -192,8 +192,8 @@ bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) { const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
// TODO: Implement. return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
return false; targetPtNodeParam->getTerminalId());
} }
} }
...@@ -188,8 +188,31 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le ...@@ -188,8 +188,31 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) { const int *const word1, const int length1) {
// TODO: Implement. if (!mBuffers.get()->isUpdatable()) {
return false; AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
}
if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
mDictBuffer.getTailPosition());
return false;
}
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {
return false;
}
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) {
return false;
}
if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
mBigramCount--;
return true;
} else {
return false;
}
} }
void Ver4PatriciaTriePolicy::flush(const char *const filePath) { void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
......
...@@ -246,4 +246,56 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { ...@@ -246,4 +246,56 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
} }
public void testRemoveBigramWords() {
final String dictVersion = Long.toString(System.currentTimeMillis());
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
getDictionaryOptions(TEST_LOCALE, dictVersion));
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
try {
encoder.writeDictionary(dict, FORMAT_OPTIONS);
} catch (IOException e) {
Log.e(TAG, "IOException while writing dictionary", e);
} catch (UnsupportedFormatException e) {
Log.e(TAG, "Unsupported format", e);
}
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertTrue(binaryDictionary.isValidDictionary());
final int unigramProbability = 100;
final int bigramProbability = 10;
binaryDictionary.addUnigramWord("aaa", unigramProbability);
binaryDictionary.addUnigramWord("abb", unigramProbability);
binaryDictionary.addUnigramWord("bcc", unigramProbability);
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
binaryDictionary.removeBigramWords("aaa", "abb");
assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
binaryDictionary.removeBigramWords("aaa", "bcc");
assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
binaryDictionary.removeBigramWords("abb", "aaa");
assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
binaryDictionary.removeBigramWords("abb", "bcc");
assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
binaryDictionary.removeBigramWords("aaa", "abb");
// Test remove non-existing bigram operation.
binaryDictionary.removeBigramWords("aaa", "abb");
binaryDictionary.removeBigramWords("bcc", "aaa");
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment