diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h index bc9d57671ed54aa6f0d1df7f3dfe3aefb07504bb..178b06554dea74cc59704212c24512457cd8477c 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h @@ -24,6 +24,11 @@ namespace latinime { class BinaryDictionaryBigramsIterator { public: + // Empty iterator. + BinaryDictionaryBigramsIterator() + : mBigramsStructurePolicy(nullptr), mPos(NOT_A_DICT_POS), + mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mHasNext(false) {} + BinaryDictionaryBigramsIterator( const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos) : mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos), diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp index 1052241262869488bf31d2555939448a50142d11..012e4dc9cb0b7ab3a5eec2df089954d3a45bae45 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp @@ -53,9 +53,8 @@ int MultiBigramMap::getBigramProbability( void MultiBigramMap::BigramMap::init( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) { - const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), - bigramsListPos); + BinaryDictionaryBigramsIterator bigramsIt = + structurePolicy->getBigramsIteratorOfPtNode(nodePos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { @@ -89,9 +88,8 @@ int MultiBigramMap::readBigramProbabilityFromBinaryDictionary( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, const int nextWordPosition, const int unigramProbability) { int bigramProbability = NOT_A_PROBABILITY; - const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), - bigramsListPos); + BinaryDictionaryBigramsIterator bigramsIt = + structurePolicy->getBigramsIteratorOfPtNode(nodePos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPosition) { diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index b72601109e4edf0ff3c76321634782ff851469b6..a48d64473dcf52809a3bc86e998984c32a069498 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -20,6 +20,7 @@ #include <memory> #include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/property/word_property.h" namespace latinime { @@ -61,12 +62,10 @@ class DictionaryStructureWithBufferPolicy { virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0; - virtual int getBigramsPositionOfPtNode(const int nodePos) const = 0; + virtual BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int nodePos) const = 0; virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0; - virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0; - virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0; // Returns whether the update was success or not. diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h index e350c69969fff2d85ebd9864bb2e61e0db3f2409..76276f52844a6168a9cef5dd0d45ad8138890a63 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -92,11 +92,9 @@ class PrevWordsInfo { BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const { - const int bigramListPos = getBigramListPositionForWordWithTryingLowerCaseSearch( + return getBigramsIteratorForWordWithTryingLowerCaseSearch( dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0], mIsBeginningOfSentence[0]); - return BinaryDictionaryBigramsIterator(dictStructurePolicy->getBigramsStructurePolicy(), - bigramListPos); } // n is 1-indexed. @@ -156,12 +154,12 @@ class PrevWordsInfo { codePoints, codePointCount, true /* forceLowerCaseSearch */); } - static int getBigramListPositionForWordWithTryingLowerCaseSearch( + static BinaryDictionaryBigramsIterator getBigramsIteratorForWordWithTryingLowerCaseSearch( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *const wordCodePoints, const int wordCodePointCount, const bool isBeginningOfSentence) { if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) { - return NOT_A_DICT_POS; + return BinaryDictionaryBigramsIterator(); } int codePoints[MAX_WORD_LENGTH]; int codePointCount = wordCodePointCount; @@ -170,30 +168,30 @@ class PrevWordsInfo { codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints, codePointCount, MAX_WORD_LENGTH); if (codePointCount <= 0) { - return NOT_A_DICT_POS; + return BinaryDictionaryBigramsIterator(); } } - int pos = getBigramListPositionForWord(dictStructurePolicy, codePoints, - codePointCount, false /* forceLowerCaseSearch */); - // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the - // dictionary or has no bigrams - if (NOT_A_DICT_POS == pos) { - // If no bigrams for this exact word, search again in lower case. - pos = getBigramListPositionForWord(dictStructurePolicy, codePoints, - codePointCount, true /* forceLowerCaseSearch */); - } - return pos; + BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorForWord(dictStructurePolicy, + codePoints, codePointCount, false /* forceLowerCaseSearch */); + // getBigramsIteratorForWord returns an empty iterator if this word isn't in the dictionary + // or has no bigrams. + if (bigramsIt.hasNext()) { + return bigramsIt; + } + // If no bigrams for this exact word, search again in lower case. + return getBigramsIteratorForWord(dictStructurePolicy, codePoints, + codePointCount, true /* forceLowerCaseSearch */); } - static int getBigramListPositionForWord( + static BinaryDictionaryBigramsIterator getBigramsIteratorForWord( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *wordCodePoints, const int wordCodePointCount, const bool forceLowerCaseSearch) { - if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS; + if (!wordCodePoints || wordCodePointCount <= 0) return BinaryDictionaryBigramsIterator(); const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( wordCodePoints, wordCodePointCount, forceLowerCaseSearch); - if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS; - return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos); + if (NOT_A_DICT_POS == terminalPtNodePos) return BinaryDictionaryBigramsIterator(); + return dictStructurePolicy->getBigramsIteratorOfPtNode(terminalPtNodePos); } void clear() { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 9780ae048b108d919f68cd8e5dae04c37c2cc2db..f478d9b91219a2f2bccf55e90ea1d177c9fb8044 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -154,6 +154,12 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con ptNodeParams.getTerminalId()); } +BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode( + const int ptNodePos) const { + const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos); + return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition); +} + int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 16b1bd2c1602552847f323d55619edd520aaf1cc..6d97c7cc8e763280b8041acfd0076ef6fbb7da5c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -94,16 +94,12 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getShortcutPositionOfPtNode(const int ptNodePos) const; - int getBigramsPositionOfPtNode(const int ptNodePos) const; + BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { return mHeaderPolicy; } - const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { - return &mBigramPolicy; - } - const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { return &mShortcutPolicy; } @@ -167,6 +163,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int mBigramCount; std::vector<int> mTerminalPtNodePositionsForIteratingWords; mutable bool mIsCorrupted; + + int getBigramsPositionOfPtNode(const int ptNodePos) const; }; } // namespace v402 } // namespace backward diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 002593c49150605a1a04adcea622189fa46e54af..91d76040f9655c8ab8c6a22d9876495e99db864e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -304,6 +304,12 @@ int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos(); } +BinaryDictionaryBigramsIterator PatriciaTriePolicy::getBigramsIteratorOfPtNode( + const int ptNodePos) const { + const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos); + return BinaryDictionaryBigramsIterator(&mBigramListPolicy, bigramsPosition); +} + int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; @@ -322,7 +328,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod int bigramPos = NOT_A_DICT_POS; int siblingPos = NOT_A_DICT_POS; PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(), - getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, + &mBigramListPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos); // Skip PtNodes don't start with Unicode code point because they represent non-word information. if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { @@ -352,7 +358,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin std::vector<BigramProperty> bigrams; const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); int bigramWord1CodePoints[MAX_WORD_LENGTH]; - BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos); + BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos); while (bigramsIt.hasNext()) { // Fetch the next bigram information and forward the iterator. bigramsIt.next(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index ec84074083c10076ffc4ad10dab21704c65dceff..7c0b9d3c5f0dff32905b77fbbce92c3212268b3c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -67,16 +67,12 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getShortcutPositionOfPtNode(const int ptNodePos) const; - int getBigramsPositionOfPtNode(const int ptNodePos) const; + BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { return &mHeaderPolicy; } - const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { - return &mBigramListPolicy; - } - const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { return &mShortcutListPolicy; } @@ -158,6 +154,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { std::vector<int> mTerminalPtNodePositionsForIteratingWords; mutable bool mIsCorrupted; + int getBigramsPositionOfPtNode(const int ptNodePos) const; int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, DicNodeVector *const childDicNodes) const; }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 46107d92ae76240303272399e73b28694bffb476..0b5764aba0a970f024e76379bc6c73937395f5a7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -144,6 +144,12 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con ptNodeParams.getTerminalId()); } +BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode( + const int ptNodePos) const { + const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos); + return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition); +} + int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 5d66a2cce753d38b87d8499372f4b0617588f02d..85929b7857dced13f1181c05ce52c9ca1950e9f1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -76,16 +76,12 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getShortcutPositionOfPtNode(const int ptNodePos) const; - int getBigramsPositionOfPtNode(const int ptNodePos) const; + BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { return mHeaderPolicy; } - const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { - return &mBigramPolicy; - } - const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { return &mShortcutPolicy; } @@ -146,6 +142,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int mBigramCount; std::vector<int> mTerminalPtNodePositionsForIteratingWords; mutable bool mIsCorrupted; + + int getBigramsPositionOfPtNode(const int ptNodePos) const; }; } // namespace latinime #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H