diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index ae228fb98ef1a52224a526462708777d6e2406c4..865aab632d228d51e8d7a7d538c2c4cb44f48247 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -20,29 +20,34 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node_profiler.h" #include "suggest/core/dicnode/dic_node_release_listener.h" +#include "suggest/core/dicnode/dic_node_utils.h" #include "suggest/core/dicnode/internal/dic_node_state.h" #include "suggest/core/dicnode/internal/dic_node_properties.h" #include "suggest/core/dictionary/digraph_utils.h" #include "suggest/core/dictionary/error_type_utils.h" +#include "suggest/core/layout/proximity_info_state.h" #include "utils/char_utils.h" #if DEBUG_DICT #define LOGI_SHOW_ADD_COST_PROP \ - do { char charBuf[50]; \ - INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \ - AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \ - __FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \ - getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0) + do { \ + char charBuf[50]; \ + INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \ + AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \ + __FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \ + getInputIndex(0), getNormalizedCompoundDistance(), charBuf); \ + } while (0) #define DUMP_WORD_AND_SCORE(header) \ - do { char charBuf[50]; char prevWordCharBuf[50]; \ - INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \ - INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(), \ - mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \ - NELEMS(prevWordCharBuf)); \ - AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d, %5f,", header, \ - getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \ - getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \ - getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \ + do { \ + char charBuf[50]; \ + INTS_TO_CHARS(getOutputWordBuf(), \ + getNodeCodePointCount() \ + + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength(), \ + charBuf, NELEMS(charBuf)); \ + AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %d, %5f,", header, \ + getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \ + getNormalizedCompoundDistance(), getRawLength(), charBuf, \ + getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \ } while (0) #else #define LOGI_SHOW_ADD_COST_PROP @@ -103,8 +108,8 @@ class DicNode { void initByCopy(const DicNode *const dicNode) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; - mDicNodeProperties.init(&dicNode->mDicNodeProperties); - mDicNodeState.init(&dicNode->mDicNodeState); + mDicNodeProperties.initByCopy(&dicNode->mDicNodeProperties); + mDicNodeState.initByCopy(&dicNode->mDicNodeState); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } @@ -112,12 +117,8 @@ class DicNode { void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) { mIsUsed = true; mIsCachedForNextSuggestion = false; - mDicNodeProperties.init( - NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, - NOT_A_PROBABILITY /* probability */, false /* isTerminal */, - true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, - 0 /* terminalDepth */); - mDicNodeState.init(prevWordPtNodePos); + mDicNodeProperties.init(rootPtNodeArrayPos, prevWordPtNodePos); + mDicNodeState.init(); PROF_NODE_RESET(mProfiler); } @@ -125,13 +126,8 @@ class DicNode { void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; - mDicNodeProperties.init( - NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, - NOT_A_PROBABILITY /* probability */, false /* isTerminal */, - true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, - 0 /* terminalDepth */); + mDicNodeProperties.init(rootPtNodeArrayPos, dicNode->mDicNodeProperties.getPtNodePos()); mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState, - dicNode->mDicNodeProperties.getPtNodePos(), dicNode->mDicNodeProperties.getDepth()); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } @@ -141,7 +137,7 @@ class DicNode { mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion; const int parentCodePoint = parentDicNode->getNodeTypedCodePoint(); mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint); - mDicNodeState.init(&parentDicNode->mDicNodeState); + mDicNodeState.initByCopy(&parentDicNode->mDicNodeState); PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler); } @@ -156,7 +152,7 @@ class DicNode { dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0], probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, - newLeavingDepth); + newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordTerminalPtNodePos()); mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mergedNodeCodePoints); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); @@ -200,7 +196,7 @@ class DicNode { // Used to expand the node in DicNodeUtils int getNodeTypedCodePoint() const { - return mDicNodeState.mDicNodeStateOutput.getCodePointAt(getNodeCodePointCount()); + return mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(getNodeCodePointCount()); } // Check if the current word and the previous word can be considered as a valid multiple word @@ -211,19 +207,19 @@ class DicNode { } // Treat suggestion as invalid if the current and the previous word are single character // words. - const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength() - - mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1; + const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength() + - mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1; const int currentWordLen = getNodeCodePointCount(); return (prevWordLen != 1 || currentWordLen != 1); } bool isFirstCharUppercase() const { - const int c = mDicNodeState.mDicNodeStateOutput.getCodePointAt(0); + const int c = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(0); return CharUtils::isAsciiUpper(c); } bool isFirstWord() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS; + return mDicNodeProperties.getPrevWordTerminalPtNodePos() == NOT_A_DICT_POS; } bool isCompletion(const int inputSize) const { @@ -241,7 +237,7 @@ class DicNode { // Used to get bigram probability in DicNodeUtils int getPrevWordTerminalPtNodePos() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); + return mDicNodeProperties.getPrevWordTerminalPtNodePos(); } // Used in DicNodeUtils @@ -263,8 +259,8 @@ class DicNode { bool shouldBeFilteredBySafetyNetForBigram() const { const uint16_t currentDepth = getNodeCodePointCount(); - const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength() - - mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1; + const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength() + - mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1; return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1)); } @@ -277,7 +273,7 @@ class DicNode { } bool isTotalInputSizeExceedingLimit() const { - const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(); + const int prevWordsLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength(); const int currentWordDepth = getNodeCodePointCount(); // TODO: 3 can be 2? Needs to be investigated. // TODO: Have a const variable for 3 (or 2) @@ -285,25 +281,24 @@ class DicNode { } void outputResult(int *dest) const { - const uint16_t prevWordLength = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(); + const uint16_t prevWordLength = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength(); const uint16_t currentDepth = getNodeCodePointCount(); - DicNodeUtils::appendTwoWords(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(), - prevWordLength, getOutputWordBuf(), currentDepth, dest); + memmove(dest, getOutputWordBuf(), (prevWordLength + currentDepth) * sizeof(dest[0])); DUMP_WORD_AND_SCORE("OUTPUT"); } // "Total" in this context (and other methods in this class) means the whole suggestion. When // this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only // the one that corresponds to the last word of the suggestion, and all the previous words - // are concatenated together in mPrevWord - which contains a space at the end. + // are concatenated together in mDicNodeStateOutput. int getTotalNodeSpaceCount() const { if (isFirstWord()) return 0; - return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(), - mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()); + return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStateOutput.getCodePointBuf(), + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()); } int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const { - const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(); + const int inputIndex = mDicNodeState.mDicNodeStateOutput.getSecondWordFirstInputIndex(); if (inputIndex == NOT_AN_INDEX) { return NOT_AN_INDEX; } else { @@ -312,7 +307,7 @@ class DicNode { } bool hasMultipleWords() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() > 0; + return mDicNodeState.mDicNodeStateOutput.getPrevWordCount() > 0; } int getProximityCorrectionCount() const { @@ -346,7 +341,7 @@ class DicNode { // Used to commit input partially int getPrevWordPtNodePos() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); + return mDicNodeProperties.getPrevWordTerminalPtNodePos(); } AK_FORCE_INLINE const int *getOutputWordBuf() const { @@ -425,7 +420,7 @@ class DicNode { float getLanguageDistanceRatePerWordForScoring() const { const float langDist = getLanguageDistanceForScoring(); const float totalWordCount = - static_cast<float>(mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1); + static_cast<float>(mDicNodeState.mDicNodeStateOutput.getPrevWordCount() + 1); return langDist / totalWordCount; } @@ -469,7 +464,7 @@ class DicNode { // Returns code point count including spaces inline uint16_t getTotalNodeCodePointCount() const { - return getNodeCodePointCount() + mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(); + return getNodeCodePointCount() + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength(); } AK_FORCE_INLINE void dump(const char *tag) const { @@ -516,8 +511,9 @@ class DicNode { return depthDiff > 0; } for (int i = 0; i < depth; ++i) { - const int codePoint = mDicNodeState.mDicNodeStateOutput.getCodePointAt(i); - const int rightCodePoint = right->mDicNodeState.mDicNodeStateOutput.getCodePointAt(i); + const int codePoint = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i); + const int rightCodePoint = + right->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i); if (codePoint != rightCodePoint) { return rightCodePoint > codePoint; } @@ -574,8 +570,8 @@ class DicNode { } AK_FORCE_INLINE void updateInputIndexG(const DicNode_InputStateG *const inputStateG) { - if (mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() == 1 && isFirstLetter()) { - mDicNodeState.mDicNodeStatePrevWord.setSecondWordFirstInputIndex( + if (mDicNodeState.mDicNodeStateOutput.getPrevWordCount() == 1 && isFirstLetter()) { + mDicNodeState.mDicNodeStateOutput.setSecondWordFirstInputIndex( inputStateG->mInputIndex); } mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId, diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h index ab02e6192e53dff9b9a6260170c0bfaf91a5274e..6ddb7f1afdb1ee3b232ebd734a6dff976bdba88a 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h @@ -29,16 +29,18 @@ namespace latinime { class DicNodeProperties { public: AK_FORCE_INLINE DicNodeProperties() - : mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0), - mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false), - mDepth(0), mLeavingDepth(0) {} + : mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS), + mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT), + mIsTerminal(false), mHasChildrenPtNodes(false), + mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0), + mPrevWordTerminalPtNodePos(NOT_A_DICT_POS) {} ~DicNodeProperties() {} // Should be called only once per DicNode is initialized. void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability, const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord, - const uint16_t depth, const uint16_t leavingDepth) { + const uint16_t depth, const uint16_t leavingDepth, const int prevWordNodePos) { mPtNodePos = pos; mChildrenPtNodeArrayPos = childrenPos; mDicNodeCodePoint = nodeCodePoint; @@ -48,10 +50,24 @@ class DicNodeProperties { mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; mLeavingDepth = leavingDepth; + mPrevWordTerminalPtNodePos = prevWordNodePos; } - // Init for copy - void init(const DicNodeProperties *const dicNodeProp) { + // Init for root with prevWordPtNodePos which is used for bigram + void init(const int rootPtNodeArrayPos, const int prevWordNodePos) { + mPtNodePos = NOT_A_DICT_POS; + mChildrenPtNodeArrayPos = rootPtNodeArrayPos; + mDicNodeCodePoint = NOT_A_CODE_POINT; + mProbability = NOT_A_PROBABILITY; + mIsTerminal = false; + mHasChildrenPtNodes = true; + mIsBlacklistedOrNotAWord = false; + mDepth = 0; + mLeavingDepth = 0; + mPrevWordTerminalPtNodePos = prevWordNodePos; + } + + void initByCopy(const DicNodeProperties *const dicNodeProp) { mPtNodePos = dicNodeProp->mPtNodePos; mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; @@ -61,6 +77,7 @@ class DicNodeProperties { mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth; mLeavingDepth = dicNodeProp->mLeavingDepth; + mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos; } // Init as passing child @@ -74,6 +91,7 @@ class DicNodeProperties { mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child mLeavingDepth = dicNodeProp->mLeavingDepth; + mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos; } int getPtNodePos() const { @@ -113,6 +131,10 @@ class DicNodeProperties { return mIsBlacklistedOrNotAWord; } + int getPrevWordTerminalPtNodePos() const { + return mPrevWordTerminalPtNodePos; + } + private: // Caution!!! // Use a default copy constructor and an assign operator because shallow copies are ok @@ -126,6 +148,7 @@ class DicNodeProperties { bool mIsBlacklistedOrNotAWord; uint16_t mDepth; uint16_t mLeavingDepth; + int mPrevWordTerminalPtNodePos; }; } // namespace latinime #endif // LATINIME_DIC_NODE_PROPERTIES_H diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state.h index a41667567264f0b283a31867aed32a1c62d59afc..badb1f5f22b8a024b00dca6fb3144e24e1ca437b 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state.h @@ -20,7 +20,6 @@ #include "defines.h" #include "suggest/core/dicnode/internal/dic_node_state_input.h" #include "suggest/core/dicnode/internal/dic_node_state_output.h" -#include "suggest/core/dicnode/internal/dic_node_state_prevword.h" #include "suggest/core/dicnode/internal/dic_node_state_scoring.h" namespace latinime { @@ -29,65 +28,50 @@ class DicNodeState { public: DicNodeStateInput mDicNodeStateInput; DicNodeStateOutput mDicNodeStateOutput; - DicNodeStatePrevWord mDicNodeStatePrevWord; DicNodeStateScoring mDicNodeStateScoring; AK_FORCE_INLINE DicNodeState() - : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(), - mDicNodeStateScoring() { - } + : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {} ~DicNodeState() {} DicNodeState &operator=(const DicNodeState& src) { - init(&src); + initByCopy(&src); return *this; } DicNodeState(const DicNodeState& src) - : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(), - mDicNodeStateScoring() { - init(&src); + : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() { + initByCopy(&src); } - // Init with prevWordPos - void init(const int prevWordPos) { + // Init for root + void init() { mDicNodeStateInput.init(); mDicNodeStateOutput.init(); - mDicNodeStatePrevWord.init(prevWordPos); mDicNodeStateScoring.init(); } // Init with previous word. void initAsRootWithPreviousWord(const DicNodeState *prevWordDicNodeState, - const int prevWordPos, const int prevWordCodePointCount) { - mDicNodeStateOutput.init(); // reset for next word + const int prevWordCodePointCount) { + mDicNodeStateOutput.init(&prevWordDicNodeState->mDicNodeStateOutput); mDicNodeStateInput.init( &prevWordDicNodeState->mDicNodeStateInput, true /* resetTerminalDiffCost */); - mDicNodeStateScoring.init(&prevWordDicNodeState->mDicNodeStateScoring); - mDicNodeStatePrevWord.init( - prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordCount() + 1, - prevWordPos, - prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordBuf(), - prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordLength(), - prevWordDicNodeState->mDicNodeStateOutput.getCodePointBuf(), - prevWordCodePointCount, - prevWordDicNodeState->mDicNodeStatePrevWord.getSecondWordFirstInputIndex(), - prevWordDicNodeState->mDicNodeStateInput.getInputIndex(0) /* lastInputIndex */); + mDicNodeStateScoring.initByCopy(&prevWordDicNodeState->mDicNodeStateScoring); } // Init by copy - AK_FORCE_INLINE void init(const DicNodeState *const src) { - mDicNodeStateInput.init(&src->mDicNodeStateInput); - mDicNodeStateOutput.init(&src->mDicNodeStateOutput); - mDicNodeStatePrevWord.init(&src->mDicNodeStatePrevWord); - mDicNodeStateScoring.init(&src->mDicNodeStateScoring); + AK_FORCE_INLINE void initByCopy(const DicNodeState *const src) { + mDicNodeStateInput.initByCopy(&src->mDicNodeStateInput); + mDicNodeStateOutput.initByCopy(&src->mDicNodeStateOutput); + mDicNodeStateScoring.initByCopy(&src->mDicNodeStateScoring); } // Init by copy and adding merged node code points. void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { - init(src); + initByCopy(src); mDicNodeStateOutput.addMergedNodeCodePoints( mergedNodeCodePointCount, mergedNodeCodePoints); } diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_input.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_input.h index 03042a8a7022802c1a1246ef35fdb619ac9348bd..50a37ba3ef4a627d9b8c3b59a208377e7ab559c4 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_input.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_input.h @@ -53,7 +53,7 @@ class DicNodeStateInput { mTerminalDiffCost[pointerId] = terminalDiffCost; } - void init(const DicNodeStateInput *const src) { + void initByCopy(const DicNodeStateInput *const src) { init(src, false); } diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h index bdb182c1d1e3e45edd8d346df24cae87d13d0b9f..ea48de1ea8d562ab1c5cb84926556a85d269d3e6 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_output.h @@ -25,24 +25,53 @@ namespace latinime { +// Class to have information to be output. This can contain previous words when the suggestion +// is a multi-word suggestion. class DicNodeStateOutput { public: - DicNodeStateOutput() : mOutputtedCodePointCount(0) {} + DicNodeStateOutput() + : mOutputtedCodePointCount(0), mCurrentWordStart(0), mPrevWordCount(0), + mPrevWordsLength(0), mPrevWordStart(0), mSecondWordFirstInputIndex(NOT_AN_INDEX) {} ~DicNodeStateOutput() {} + // Init for root void init() { mOutputtedCodePointCount = 0; - mCodePointsBuf[0] = 0; + mCurrentWordStart = 0; + mOutputCodePoints[0] = 0; + mPrevWordCount = 0; + mPrevWordsLength = 0; + mPrevWordStart = 0; + mSecondWordFirstInputIndex = NOT_AN_INDEX; } + // Init for next word. void init(const DicNodeStateOutput *const stateOutput) { - memmove(mCodePointsBuf, stateOutput->mCodePointsBuf, - stateOutput->mOutputtedCodePointCount * sizeof(mCodePointsBuf[0])); + mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount + 1; + memmove(mOutputCodePoints, stateOutput->mOutputCodePoints, + stateOutput->mOutputtedCodePointCount * sizeof(mOutputCodePoints[0])); + mOutputCodePoints[stateOutput->mOutputtedCodePointCount] = KEYCODE_SPACE; + mCurrentWordStart = stateOutput->mOutputtedCodePointCount + 1; + mPrevWordCount = std::min(static_cast<int16_t>(stateOutput->mPrevWordCount + 1), + static_cast<int16_t>(MAX_RESULTS)); + mPrevWordsLength = stateOutput->mOutputtedCodePointCount + 1; + mPrevWordStart = stateOutput->mCurrentWordStart; + mSecondWordFirstInputIndex = stateOutput->mSecondWordFirstInputIndex; + } + + void initByCopy(const DicNodeStateOutput *const stateOutput) { + memmove(mOutputCodePoints, stateOutput->mOutputCodePoints, + stateOutput->mOutputtedCodePointCount * sizeof(mOutputCodePoints[0])); mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount; if (mOutputtedCodePointCount < MAX_WORD_LENGTH) { - mCodePointsBuf[mOutputtedCodePointCount] = 0; + mOutputCodePoints[mOutputtedCodePointCount] = 0; } + mCurrentWordStart = stateOutput->mCurrentWordStart; + mPrevWordCount = stateOutput->mPrevWordCount; + mPrevWordsLength = stateOutput->mPrevWordsLength; + mPrevWordStart = stateOutput->mPrevWordStart; + mSecondWordFirstInputIndex = stateOutput->mSecondWordFirstInputIndex; } void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount, @@ -51,29 +80,72 @@ class DicNodeStateOutput { const int additionalCodePointCount = std::min( static_cast<int>(mergedNodeCodePointCount), MAX_WORD_LENGTH - mOutputtedCodePointCount); - memmove(&mCodePointsBuf[mOutputtedCodePointCount], mergedNodeCodePoints, - additionalCodePointCount * sizeof(mCodePointsBuf[0])); + memmove(&mOutputCodePoints[mOutputtedCodePointCount], mergedNodeCodePoints, + additionalCodePointCount * sizeof(mOutputCodePoints[0])); mOutputtedCodePointCount = static_cast<uint16_t>( - mOutputtedCodePointCount + mergedNodeCodePointCount); + mOutputtedCodePointCount + additionalCodePointCount); if (mOutputtedCodePointCount < MAX_WORD_LENGTH) { - mCodePointsBuf[mOutputtedCodePointCount] = 0; + mOutputCodePoints[mOutputtedCodePointCount] = 0; } } } - int getCodePointAt(const int index) const { - return mCodePointsBuf[index]; + int getCurrentWordCodePointAt(const int index) const { + return mOutputCodePoints[mCurrentWordStart + index]; } const int *getCodePointBuf() const { - return mCodePointsBuf; + return mOutputCodePoints; + } + + void setSecondWordFirstInputIndex(const int inputIndex) { + mSecondWordFirstInputIndex = inputIndex; + } + + int getSecondWordFirstInputIndex() const { + return mSecondWordFirstInputIndex; + } + + // TODO: remove + int16_t getPrevWordsLength() const { + return mPrevWordsLength; + } + + int16_t getPrevWordCount() const { + return mPrevWordCount; + } + + int16_t getPrevWordStart() const { + return mPrevWordStart; + } + + int getOutputCodePointAt(const int id) const { + return mOutputCodePoints[id]; } private: DISALLOW_COPY_AND_ASSIGN(DicNodeStateOutput); + // When the DicNode represents "this is a pen": + // mOutputtedCodePointCount is 13, which is total code point count of "this is a pen" including + // spaces. + // mCurrentWordStart indicates the head of "pen", thus it is 10. + // This contains 3 previous words, "this", "is" and "a"; thus, mPrevWordCount is 3. + // mPrevWordsLength is length of "this is a ", which is 10. + // mPrevWordStart is the start index of "a"; thus, it is 8. + // mSecondWordFirstInputIndex is the first input index of "is". + uint16_t mOutputtedCodePointCount; - int mCodePointsBuf[MAX_WORD_LENGTH]; + int mOutputCodePoints[MAX_WORD_LENGTH]; + int16_t mCurrentWordStart; + // Previous word count in mOutputCodePoints. + int16_t mPrevWordCount; + // Total length of previous words in mOutputCodePoints. This is being used by the algorithm + // that may want to look at the previous word information. + int16_t mPrevWordsLength; + // Start index of the previous word in mOutputCodePoints. This is being used for auto commit. + int16_t mPrevWordStart; + int mSecondWordFirstInputIndex; }; } // namespace latinime #endif // LATINIME_DIC_NODE_STATE_OUTPUT_H diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h deleted file mode 100644 index 409841e2d434b7e52e3e57ad3749c12e7ac8948d..0000000000000000000000000000000000000000 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_DIC_NODE_STATE_PREVWORD_H -#define LATINIME_DIC_NODE_STATE_PREVWORD_H - -#include <algorithm> -#include <cstring> // for memset() and memmove() -#include <stdint.h> - -#include "defines.h" -#include "suggest/core/dicnode/dic_node_utils.h" -#include "suggest/core/layout/proximity_info_state.h" - -namespace latinime { - -class DicNodeStatePrevWord { - public: - AK_FORCE_INLINE DicNodeStatePrevWord() - : mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), - mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {} - - ~DicNodeStatePrevWord() {} - - void init(const int prevWordNodePos) { - mPrevWordLength = 0; - mPrevWordCount = 0; - mPrevWordStart = 0; - mPrevWordPtNodePos = prevWordNodePos; - mSecondWordFirstInputIndex = NOT_AN_INDEX; - mPrevWord[0] = 0; - } - - // Init by copy - AK_FORCE_INLINE void init(const DicNodeStatePrevWord *const prevWord) { - mPrevWordLength = prevWord->mPrevWordLength; - mPrevWordCount = prevWord->mPrevWordCount; - mPrevWordStart = prevWord->mPrevWordStart; - mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos; - mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex; - memmove(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0])); - } - - void init(const int16_t prevWordCount, const int prevWordNodePos, const int *const src0, - const int16_t length0, const int *const src1, const int16_t length1, - const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) { - mPrevWordCount = std::min(prevWordCount, static_cast<int16_t>(MAX_RESULTS)); - mPrevWordPtNodePos = prevWordNodePos; - int twoWordsLen = - DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord); - if (twoWordsLen >= MAX_WORD_LENGTH) { - twoWordsLen = MAX_WORD_LENGTH - 1; - } - mPrevWord[twoWordsLen] = KEYCODE_SPACE; - mPrevWordStart = length0; - mPrevWordLength = static_cast<int16_t>(twoWordsLen + 1); - mSecondWordFirstInputIndex = prevWordSecondWordFirstInputIndex; - } - - void setSecondWordFirstInputIndex(const int inputIndex) { - mSecondWordFirstInputIndex = inputIndex; - } - - int getSecondWordFirstInputIndex() const { - return mSecondWordFirstInputIndex; - } - - // TODO: remove - int16_t getPrevWordLength() const { - return mPrevWordLength; - } - - int16_t getPrevWordCount() const { - return mPrevWordCount; - } - - int16_t getPrevWordStart() const { - return mPrevWordStart; - } - - int getPrevWordPtNodePos() const { - return mPrevWordPtNodePos; - } - - int getPrevWordCodePointAt(const int id) const { - return mPrevWord[id]; - } - - const int *getPrevWordBuf() const { - return mPrevWord; - } - - private: - DISALLOW_COPY_AND_ASSIGN(DicNodeStatePrevWord); - - int16_t mPrevWordCount; - int16_t mPrevWordLength; - int16_t mPrevWordStart; - int mPrevWordPtNodePos; - int mSecondWordFirstInputIndex; - int mPrevWord[MAX_WORD_LENGTH]; -}; -} // namespace latinime -#endif // LATINIME_DIC_NODE_STATE_PREVWORD_H diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h index b0db55fc1c8f4857314db6a1184f3f43f38de99f..f164edbee6f5f8e97d1a4ac3e655550400c4dd31 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h @@ -53,7 +53,7 @@ class DicNodeStateScoring { mContainedErrorTypes = ErrorTypeUtils::NOT_AN_ERROR; } - AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) { + AK_FORCE_INLINE void initByCopy(const DicNodeStateScoring *const scoring) { mEditCorrectionCount = scoring->mEditCorrectionCount; mProximityCorrectionCount = scoring->mProximityCorrectionCount; mCompletionCount = scoring->mCompletionCount;