Skip to content
Snippets Groups Projects
Commit 4cc9c8b5 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Quit reading unigram probability in Ver4PatriciaTrieNodeReader."

parents a2251ef4 cb4f5441
No related branches found
No related tags found
No related merge requests found
......@@ -63,9 +63,14 @@ const WordAttributes LanguageModelDictContent::getWordAttributes(const WordIdArr
int probability = NOT_A_PROBABILITY;
if (mHasHistoricalInfo) {
const int rawProbability = ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), headerPolicy)
+ ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */);
probability = std::min(rawProbability, MAX_PROBABILITY);
probabilityEntry.getHistoricalInfo(), headerPolicy);
if (rawProbability == NOT_A_PROBABILITY) {
// The entry should not be treated as a valid entry.
continue;
}
probability = std::min(rawProbability
+ ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */),
MAX_PROBABILITY);
} else {
probability = probabilityEntry.getProbability();
}
......
......@@ -51,26 +51,17 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
const int parentPos =
DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
int codePoints[MAX_WORD_LENGTH];
const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos);
// Code point table is not used for ver4 dictionaries.
const int codePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictBuf, flags, MAX_WORD_LENGTH, nullptr /* codePointTable */, codePoints, &pos);
int terminalIdFieldPos = NOT_A_DICT_POS;
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
int probability = NOT_A_PROBABILITY;
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
terminalIdFieldPos = pos;
if (usesAdditionalBuffer) {
terminalIdFieldPos += mBuffer->getOriginalBufferSize();
}
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
// TODO: Quit reading probability here.
const ProbabilityEntry probabilityEntry =
mLanguageModelDictContent->getProbabilityEntry(terminalId);
if (probabilityEntry.hasHistoricalInfo()) {
probability = ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
} else {
probability = probabilityEntry.getProbability();
}
}
int childrenPosFieldPos = pos;
if (usesAdditionalBuffer) {
......@@ -91,8 +82,8 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
// The destination position is stored at the same place as the parent position.
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
} else {
return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
return PtNodeParams(headPos, flags, parentPos, codePointCount, codePoints,
terminalIdFieldPos, terminalId, NOT_A_PROBABILITY, childrenPosFieldPos, childrenPos,
newSiblingNodePos);
}
}
......
......@@ -29,15 +29,12 @@ class LanguageModelDictContent;
/*
* This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
* node and reads node attributes including probability form language model.
* node and reads node attributes.
*/
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
public:
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
const LanguageModelDictContent *const languageModelDictContent,
const HeaderPolicy *const headerPolicy)
: mBuffer(buffer), mLanguageModelDictContent(languageModelDictContent),
mHeaderPolicy(headerPolicy) {}
explicit Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer)
: mBuffer(buffer) {}
~Ver4PatriciaTrieNodeReader() {}
......@@ -50,8 +47,6 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
const BufferWithExtendableBuffer *const mBuffer;
const LanguageModelDictContent *const mLanguageModelDictContent;
const HeaderPolicy *const mHeaderPolicy;
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
const int siblingNodePos) const;
......
......@@ -56,13 +56,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
if (!ptNodeParams.isValid()) {
break;
}
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
if (isTerminal && mHeaderPolicy->isDecayingDict()) {
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
// valid terminal DicNode.
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
}
const bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
wordId, ptNodeParams.getCodePointArrayView());
......
......@@ -45,8 +45,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mDictBuffer(mBuffers->getWritableTrieBuffer()),
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
mPtNodeArrayReader(mDictBuffer),
mNodeReader(mDictBuffer), mPtNodeArrayReader(mDictBuffer),
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
&mPtNodeArrayReader, &mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
......
......@@ -73,8 +73,7 @@ bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
int *const outUnigramCount, int *const outBigramCount) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
mBuffers->getLanguageModelDictContent(), headerPolicy);
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer());
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable());
......@@ -137,8 +136,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
}
// Create policy instances for the GCed dictionary.
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getLanguageModelDictContent(), headerPolicy);
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer());
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable());
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment