Skip to content
Snippets Groups Projects
Commit b2632553 authored by satok's avatar satok Committed by Android (Google) Code Review
Browse files

Merge "Use priority queue for native string buffer"

parents 8e404536 16379df6
No related branches found
No related tags found
No related merge requests found
......@@ -46,15 +46,19 @@ LOCAL_MODULE := libjni_latinime
LOCAL_MODULE_TAGS := user
# For STL
LOCAL_C_INCLUDES += external/stlport/stlport bionic
LOCAL_SHARED_LIBRARIES += libstlport
ifeq ($(FLAG_DO_PROFILE), true)
$(warning Making profiling version of native library)
LOCAL_CFLAGS += -DFLAG_DO_PROFILE
LOCAL_SHARED_LIBRARIES := libcutils libutils
LOCAL_SHARED_LIBRARIES += libcutils libutils
else # FLAG_DO_PROFILE
ifeq ($(FLAG_DBG), true)
$(warning Making debug version of native library)
LOCAL_CFLAGS += -DFLAG_DBG
LOCAL_SHARED_LIBRARIES := libcutils libutils
LOCAL_SHARED_LIBRARIES += libcutils libutils
endif # FLAG_DBG
endif # FLAG_DO_PROFILE
......
......@@ -101,6 +101,7 @@ static void prof_out(void) {
#define DEBUG_PROXIMITY_INFO true
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ true
#define DEBUG_WORDS_PRIORITY_QUEUE true
#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
......@@ -125,6 +126,7 @@ static void dumpWord(const unsigned short* word, const int length) {
#define DEBUG_PROXIMITY_INFO false
#define DEBUG_CORRECTION false
#define DEBUG_CORRECTION_FREQ false
#define DEBUG_WORDS_PRIORITY_QUEUE false
#define DUMP_WORD(word, length)
......
......@@ -49,10 +49,12 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
LOGI("UnigramDictionary - constructor");
}
mCorrection = new Correction(typedLetterMultiplier, fullWordMultiplier);
mWordsPriorityQueue = new WordsPriorityQueue(maxWords, maxWordLength);
}
UnigramDictionary::~UnigramDictionary() {
delete mCorrection;
delete mWordsPriorityQueue;
}
static inline unsigned int getCodesBufferSize(const int* codes, const int codesSize,
......@@ -88,7 +90,7 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes
void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) {
const int currentDepth, int* codesDest) {
if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) {
for (int i = 0; i < codesRemain; ++i) {
......@@ -105,8 +107,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
codesBuffer, codesBufferSize, flags,
codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1,
currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, outWords,
frequencies);
currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS);
// Copy the second char of the digraph in place, then continue processing on
// the remaining part of the word.
......@@ -115,8 +116,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
BYTES_IN_ONE_CHAR);
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
codesBuffer, codesBufferSize, flags, codesSrc + i * MAX_PROXIMITY_CHARS,
codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS,
outWords, frequencies);
codesRemain - i, currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS);
return;
}
}
......@@ -132,8 +132,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
memcpy(codesDest, codesSrc, remainingBytes);
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
(codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies,
flags);
(codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, flags);
}
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
......@@ -144,28 +143,24 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
{ // Incrementally tune the word and try all possibilities
int codesBuffer[getCodesBufferSize(codes, codesSize, MAX_PROXIMITY_CHARS)];
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
codesSize, flags, codes, codesSize, 0, codesBuffer, outWords, frequencies);
codesSize, flags, codes, codesSize, 0, codesBuffer);
} else { // Normal processing
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
outWords, frequencies, flags);
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags);
}
PROF_START(20);
// Get the word count
int suggestedWordsCount = 0;
while (suggestedWordsCount < MAX_WORDS && mFrequencies[suggestedWordsCount] > 0) {
suggestedWordsCount++;
}
const int suggestedWordsCount =
mWordsPriorityQueue->outputSuggestions(frequencies, outWords);
if (DEBUG_DICT) {
LOGI("Returning %d words", suggestedWordsCount);
/// Print the returned words
for (int j = 0; j < suggestedWordsCount; ++j) {
#ifdef FLAG_DBG
short unsigned int* w = mOutputChars + j * MAX_WORD_LENGTH;
short unsigned int* w = outWords + j * MAX_WORD_LENGTH;
char s[MAX_WORD_LENGTH];
for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i];
LOGI("%s %i", s, mFrequencies[j]);
LOGI("%s %i", s, frequencies[j]);
#endif
}
}
......@@ -176,12 +171,12 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize,
unsigned short *outWords, int *frequencies, const int flags) {
const int flags) {
PROF_OPEN;
PROF_START(0);
initSuggestions(
proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies);
proximityInfo, xcoordinates, ycoordinates, codes, codesSize);
if (DEBUG_DICT) assert(codesSize == mInputLength);
const int maxDepth = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
......@@ -241,71 +236,19 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
}
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
const int *yCoordinates, const int *codes, const int codesSize,
unsigned short *outWords, int *frequencies) {
const int *yCoordinates, const int *codes, const int codesSize) {
if (DEBUG_DICT) {
LOGI("initSuggest");
}
mFrequencies = frequencies;
mOutputChars = outWords;
mInputLength = codesSize;
proximityInfo->setInputParams(codes, codesSize, xCoordinates, yCoordinates);
mProximityInfo = proximityInfo;
mWordsPriorityQueue->clear();
}
// TODO: We need to optimize addWord by using STL or something
// TODO: This needs to take an const unsigned short* and not tinker with its contents
bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
word[length] = 0;
if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) {
#ifdef FLAG_DBG
char s[length + 1];
for (int i = 0; i <= length; i++) s[i] = word[i];
LOGI("Found word = %s, freq = %d", s, frequency);
#endif
}
if (length > MAX_WORD_LENGTH) {
if (DEBUG_DICT) {
LOGI("Exceeded max word length.");
}
return false;
}
// Find the right insertion point
int insertAt = 0;
while (insertAt < MAX_WORDS) {
// TODO: How should we sort words with the same frequency?
if (frequency > mFrequencies[insertAt]) {
break;
}
insertAt++;
}
if (insertAt < MAX_WORDS) {
if (DEBUG_DICT) {
#ifdef FLAG_DBG
char s[length + 1];
for (int i = 0; i <= length; i++) s[i] = word[i];
LOGI("Added word = %s, freq = %d, %d", s, frequency, S_INT_MAX);
#endif
}
memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]),
(char*) mFrequencies + insertAt * sizeof(mFrequencies[0]),
(MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));
mFrequencies[insertAt] = frequency;
memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),
(char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short),
(MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);
unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH;
while (length--) {
*dest++ = *word++;
}
*dest = 0; // NULL terminate
if (DEBUG_DICT) {
LOGI("Added word at %d", insertAt);
}
return true;
}
return false;
void UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
mWordsPriorityQueue->push(frequency, word, length);
}
static const char QUOTE = '\'';
......
......@@ -22,6 +22,7 @@
#include "correction_state.h"
#include "defines.h"
#include "proximity_info.h"
#include "words_priority_queue.h"
namespace latinime {
......@@ -73,18 +74,16 @@ public:
private:
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
unsigned short *outWords, int *frequencies, const int flags);
const int *ycoordinates, const int *codes, const int codesSize, const int flags);
bool isDigraph(const int* codes, const int i, const int codesSize) const;
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain,
const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies);
const int currentDepth, int* codesDest);
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
const int *ycoordinates, const int *codes, const int codesSize,
unsigned short *outWords, int *frequencies);
const int *ycoordinates, const int *codes, const int codesSize);
void getSuggestionCandidates(const bool useFullEditDistance);
bool addWord(unsigned short *word, int length, int frequency);
void addWord(unsigned short *word, int length, int frequency);
void getSplitTwoWordsSuggestion(const int inputLength, Correction *correction);
void getMissingSpaceWords(const int inputLength, const int missingSpacePos,
Correction *correction, const bool useFullEditDistance);
......@@ -123,8 +122,7 @@ private:
};
static const struct digraph_t { int first; int second; } GERMAN_UMLAUT_DIGRAPHS[];
int *mFrequencies;
unsigned short *mOutputChars;
WordsPriorityQueue *mWordsPriorityQueue;
ProximityInfo *mProximityInfo;
Correction *mCorrection;
int mInputLength;
......
/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_WORDS_PRIORITY_QUEUE_H
#define LATINIME_WORDS_PRIORITY_QUEUE_H
#include <iostream>
#include <queue>
#include "defines.h"
namespace latinime {
class WordsPriorityQueue {
private:
class SuggestedWord {
public:
int mScore;
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
int mWordLength;
bool mUsed;
void setParams(int score, unsigned short* word, int wordLength) {
mScore = score;
mWordLength = wordLength;
memcpy(mWord, word, sizeof(unsigned short) * wordLength);
mUsed = true;
}
};
struct wordComparator {
bool operator ()(SuggestedWord * left, SuggestedWord * right) {
return left->mScore > right->mScore;
}
};
SuggestedWord* getFreeSuggestedWord(int score, unsigned short* word,
int wordLength) {
for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) {
if (!mSuggestedWords[i].mUsed) {
mSuggestedWords[i].setParams(score, word, wordLength);
return &mSuggestedWords[i];
}
}
return 0;
}
typedef std::priority_queue<SuggestedWord*, std::vector<SuggestedWord*>,
wordComparator> Suggestions;
Suggestions mSuggestions;
const unsigned int MAX_WORDS;
const unsigned int MAX_WORD_LENGTH;
SuggestedWord* mSuggestedWords;
public:
WordsPriorityQueue(int maxWords, int maxWordLength) :
MAX_WORDS((unsigned int) maxWords), MAX_WORD_LENGTH(
(unsigned int) maxWordLength) {
mSuggestedWords = new SuggestedWord[maxWordLength];
for (int i = 0; i < maxWordLength; ++i) {
mSuggestedWords[i].mUsed = false;
}
}
~WordsPriorityQueue() {
delete[] mSuggestedWords;
}
void push(int score, unsigned short* word, int wordLength) {
SuggestedWord* sw = 0;
if (mSuggestions.size() >= MAX_WORDS) {
sw = mSuggestions.top();
const int minScore = sw->mScore;
if (minScore >= score) {
return;
} else {
sw->mUsed = false;
mSuggestions.pop();
}
}
if (sw == 0) {
sw = getFreeSuggestedWord(score, word, wordLength);
} else {
sw->setParams(score, word, wordLength);
}
if (sw == 0) {
LOGE("SuggestedWord is accidentally null.");
return;
}
if (DEBUG_WORDS_PRIORITY_QUEUE) {
LOGI("Push word. %d, %d", score, wordLength);
DUMP_WORD(word, wordLength);
}
mSuggestions.push(sw);
}
int outputSuggestions(int *frequencies, unsigned short *outputChars) {
const unsigned int size = min(MAX_WORDS, mSuggestions.size());
int index = size - 1;
while (!mSuggestions.empty() && index >= 0) {
SuggestedWord* sw = mSuggestions.top();
if (DEBUG_WORDS_PRIORITY_QUEUE) {
LOGI("dump word. %d", sw->mScore);
DUMP_WORD(sw->mWord, sw->mWordLength);
}
const unsigned int wordLength = sw->mWordLength;
char* targetAdr = (char*) outputChars
+ (index) * MAX_WORD_LENGTH * sizeof(short);
frequencies[index] = sw->mScore;
memcpy(targetAdr, sw->mWord, (wordLength) * sizeof(short));
if (wordLength < MAX_WORD_LENGTH) {
((unsigned short*) targetAdr)[wordLength] = 0;
}
sw->mUsed = false;
mSuggestions.pop();
--index;
}
return size;
}
void clear() {
while (!mSuggestions.empty()) {
SuggestedWord* sw = mSuggestions.top();
if (DEBUG_WORDS_PRIORITY_QUEUE) {
LOGI("Clear word. %d", sw->mScore);
DUMP_WORD(sw->mWord, sw->mWordLength);
}
sw->mUsed = false;
mSuggestions.pop();
}
}
};
}
#endif // LATINIME_WORDS_PRIORITY_QUEUE_H
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment