Skip to content
Snippets Groups Projects
Commit 22f951c8 authored by Keisuke Kuroynagi's avatar Keisuke Kuroynagi Committed by Android (Google) Code Review
Browse files

Merge "Remove checkFirstCharacter from BigramDictionary."

parents 975113d0 2a2aac25
No related branches found
No related tags found
No related merge requests found
......@@ -186,7 +186,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j
scores, spaceIndices, outputTypes);
} else {
count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
inputCodePoints, inputSize, outputCodePoints, scores, outputTypes);
outputCodePoints, scores, outputTypes);
}
// Copy back the output values
......
......@@ -87,21 +87,14 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int
/* Parameters :
* prevWord: the word before, the one for which we need to look up bigrams.
* prevWordLength: its length.
* inputCodePoints: what user typed, in the same format as for UnigramDictionary::getSuggestions.
* inputSize: the size of the codes array.
* bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
* bigramProbability: an array to output frequencies.
* outBigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
* outBigramProbability: an array to output frequencies.
* outputTypes: an array to output types.
* This method returns the number of bigrams this word has, for backward compatibility.
* Note: this is not the number of bigrams output in the array, which is the number of
* bigrams this word has WHOSE first letter also matches the letter the user typed.
* TODO: this may not be a sensible thing to do. It makes sense when the bigrams are
* used to match the first letter of the second word, but once the user has typed more
* and the bigrams are used to boost unigram result scores, it makes little sense to
* reduce their scope to the ones that match the first letter.
*/
int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, int *inputCodePoints,
int inputSize, int *bigramCodePoints, int *bigramProbability, int *outputTypes) const {
int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength,
int *const outBigramCodePoints, int *const outBigramProbability,
int *const outputTypes) const {
// TODO: remove unused arguments, and refrain from storing stuff in members of this class
// TODO: have "in" arguments before "out" ones, and make out args explicit in the name
......@@ -126,21 +119,16 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in
getCodePointsAndProbabilityAndReturnCodePointCount(
mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH,
bigramBuffer, &unigramProbability);
// inputSize == 0 means we are trying to find bigram predictions.
if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) {
const int bigramProbabilityTemp = bigramsIt.getProbability();
// Due to space constraints, the probability for bigrams is approximate - the lower the
// unigram probability, the worse the precision. The theoritical maximum error in
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
// in very bad cases. This means that sometimes, we'll see some bigrams interverted
// here, but it can't get too bad.
const int probability = ProbabilityUtils::computeProbabilityForBigram(
unigramProbability, bigramProbabilityTemp);
addWordBigram(bigramBuffer, length, probability, bigramProbability, bigramCodePoints,
outputTypes);
++bigramCount;
}
// Due to space constraints, the probability for bigrams is approximate - the lower the
// unigram probability, the worse the precision. The theoritical maximum error in
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
// in very bad cases. This means that sometimes, we'll see some bigrams interverted
// here, but it can't get too bad.
const int probability = ProbabilityUtils::computeProbabilityForBigram(
unigramProbability, bigramsIt.getProbability());
addWordBigram(bigramBuffer, length, probability, outBigramProbability, outBigramCodePoints,
outputTypes);
++bigramCount;
}
return min(bigramCount, MAX_RESULTS);
}
......@@ -157,22 +145,6 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
mBinaryDictionaryInfo, pos);
}
bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const {
// Checks whether this word starts with same character or neighboring characters of
// what user typed.
int maxAlt = MAX_ALTERNATIVES;
const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word);
while (maxAlt > 0) {
if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) {
return true;
}
inputCodePoints++;
maxAlt--;
}
return false;
}
bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
int length1) const {
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
......
......@@ -27,8 +27,8 @@ class BigramDictionary {
public:
BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo);
int getPredictions(const int *word, int length, int *inputCodePoints, int inputSize,
int *outWords, int *frequencies, int *outputTypes) const;
int getPredictions(const int *word, int length, int *outBigramCodePoints,
int *outBigramProbability, int *outputTypes) const;
bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
~BigramDictionary();
......@@ -37,13 +37,10 @@ class BigramDictionary {
void addWordBigram(int *word, int length, int probability, int *bigramProbability,
int *bigramCodePoints, int *outputTypes) const;
bool checkFirstCharacter(int *word, int *inputCodePoints) const;
int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const;
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
// TODO: Re-implement proximity correction for bigram correction
static const int MAX_ALTERNATIVES = 1;
};
} // namespace latinime
#endif // LATINIME_BIGRAM_DICTIONARY_H
......@@ -76,11 +76,10 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
}
}
int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, int inputSize,
int *outWords, int *frequencies, int *outputTypes) const {
int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies,
int *outputTypes) const {
if (length <= 0) return 0;
return mBigramDictionary->getPredictions(word, length, inputCodePoints, inputSize, outWords,
frequencies, outputTypes);
return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes);
}
int Dictionary::getProbability(const int *word, int length) const {
......
......@@ -62,8 +62,8 @@ class Dictionary {
const SuggestOptions *const suggestOptions, int *outWords, int *frequencies,
int *spaceIndices, int *outputTypes) const;
int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords,
int *frequencies, int *outputTypes) const;
int getBigrams(const int *word, int length, int *outWords, int *frequencies,
int *outputTypes) const;
int getProbability(const int *word, int length) const;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment