diff --git a/common/src/com/android/inputmethod/latin/common/Constants.java b/common/src/com/android/inputmethod/latin/common/Constants.java index abc377a84c5ec8ee55072a0755644c6aa19ef5d1..a860d3560d9a912237e1c1b84fc8272228bc3470 100644 --- a/common/src/com/android/inputmethod/latin/common/Constants.java +++ b/common/src/com/android/inputmethod/latin/common/Constants.java @@ -179,7 +179,7 @@ public final class Constants { // (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported in Java side. Needs to modify // MAX_PREV_WORD_COUNT_FOR_N_GRAM in native/jni/src/defines.h for suggestions. - public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 2; + public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3; // Key events coming any faster than this are long-presses. public static final int LONG_PRESS_MILLISECONDS = 200; diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 0e67b4d5af49984ae99276b4254150a3617baa5d..10b930e4f5168bee58ca411fbde39f82f021bc3f 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -275,7 +275,7 @@ static inline void showStackTrace() { #define MAX_POINTER_COUNT_G 2 // (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported. -#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 2 +#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 3 #define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \ TypeName() = delete diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index a2a0f11b4ce43d25bf4b903b6b79672275bd1597..c93f3101701169bbd9be90c71e87b631d4a3e120 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -31,10 +31,11 @@ const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::DATE_KEY = "date"; const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME"; const char *const HeaderPolicy::NGRAM_COUNT_KEYS[] = - {"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT"}; + {"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT", "QUADGRAM_COUNT"}; const char *const HeaderPolicy::MAX_NGRAM_COUNT_KEYS[] = - {"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT"}; -const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000}; + {"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT", + "MAX_QUADGRAM_ENTRY_COUNT"}; +const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000, 30000}; const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE"; // Historical info is information that is needed to support decaying such as timestamp, level and // count. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp index 29bc7f7190018f2c6b93d5941068b4b19669c2da..025ee99322a5aa311e41aacf0e1c8fb2be0fcbff 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp @@ -19,12 +19,13 @@ namespace latinime { // Used to provide stable probabilities even if the user's input count is small. -const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNTS[] = {8192, 2, 2}; +const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNTS[] = {8192, 2, 2, 1}; // Encoded backoff weights. -// Note that we give positive value for trigrams that means the weight is more than 1. +// Note that we give positive values for trigrams and quadgrams that means the weight is more than +// 1. // TODO: Apply backoff for main dictionaries and quit giving a positive backoff weight. -const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHTS[] = {-32, 0, 8}; +const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHTS[] = {-32, -4, 2, 8}; // This value is used to remove too old entries from the dictionary. const int DynamicLanguageModelProbabilityUtils::DURATION_TO_DISCARD_ENTRY_IN_SECONDS = diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h index b38047f4952511441a923990442a7c344b4b3a25..644ae2ca7e2057ead36ac0391efcfc6249c43767 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h @@ -66,7 +66,7 @@ class DynamicLanguageModelProbabilityUtils { private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicLanguageModelProbabilityUtils); - static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 2, "Max supported Ngram is Trigram."); + static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 3, "Max supported Ngram is Quadgram."); static const int ASSUMED_MIN_COUNTS[]; static const int ENCODED_BACKOFF_WEIGHTS[]; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h b/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h index 7269913e8016664d820a01baaaf8fa7a4f0297ea..5e443026ea5dd506ae5b45bee49b15175b0dac6c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h @@ -27,7 +27,7 @@ namespace latinime { // Copyable but immutable class EntryCounts final { public: - EntryCounts() : mEntryCounts({{0, 0, 0}}) {} + EntryCounts() : mEntryCounts({{0, 0, 0, 0}}) {} explicit EntryCounts(const std::array<int, MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1> &counters) : mEntryCounts(counters) {} diff --git a/native/jni/src/utils/ngram_utils.h b/native/jni/src/utils/ngram_utils.h index 6227812d43ba9c207944df2edcb49404c2c262b4..fa85ba35f8c3bd8e491ab929e8d9583e6fc3788d 100644 --- a/native/jni/src/utils/ngram_utils.h +++ b/native/jni/src/utils/ngram_utils.h @@ -25,6 +25,7 @@ enum class NgramType : int { Unigram = 0, Bigram = 1, Trigram = 2, + Quadgram = 3, NotANgramType = -1, };