Skip to content
Snippets Groups Projects
Commit 30088259 authored by satok's avatar satok
Browse files

Fix parameters of native functions and refactor Dictionary

- created bigram/unigram dictionary classes

Change-Id: I233a28ed8d611870db3f4cf8f25fc45b5d41529b
parent d4952c8f
No related branches found
No related tags found
No related merge requests found
......@@ -49,11 +49,11 @@ public class BinaryDictionary extends Dictionary {
private int mDicTypeId;
private int mNativeDict;
private int mDictLength;
private int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_ALTERNATIVES];
private char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS];
private char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS];
private int[] mFrequencies = new int[MAX_WORDS];
private int[] mFrequencies_bigrams = new int[MAX_BIGRAMS];
private final int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_ALTERNATIVES];
private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS];
private final char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS];
private final int[] mFrequencies = new int[MAX_WORDS];
private final int[] mFrequencies_bigrams = new int[MAX_BIGRAMS];
// Keep a reference to the native dict direct buffer in Java to avoid
// unexpected deallocation of the direct buffer.
private ByteBuffer mNativeDictDirectBuffer;
......@@ -94,18 +94,19 @@ public class BinaryDictionary extends Dictionary {
}
mDictLength = byteBuffer.capacity();
mNativeDict = openNative(mNativeDictDirectBuffer,
TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER);
TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER,
MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES);
}
mDicTypeId = dicTypeId;
}
private native int openNative(ByteBuffer bb, int typedLetterMultiplier,
int fullWordMultiplier);
int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives);
private native void closeNative(int dict);
private native boolean isValidWordNative(int nativeData, char[] word, int wordLength);
private native int getSuggestionsNative(int dict, int[] inputCodes, int codesSize,
char[] outputChars, int[] frequencies, int maxWordLength, int maxWords,
int maxAlternatives, int skipPos, int[] nextLettersFrequencies, int nextLettersSize);
char[] outputChars, int[] frequencies,
int[] nextLettersFrequencies, int nextLettersSize);
private native int getBigramsNative(int dict, char[] prevWord, int prevWordLength,
int[] inputCodes, int inputCodesLength, char[] outputChars, int[] frequencies,
int maxWordLength, int maxBigrams, int maxAlternatives);
......@@ -131,7 +132,8 @@ public class BinaryDictionary extends Dictionary {
Log.e(TAG, "Read " + got + " bytes, expected " + total);
} else {
mNativeDict = openNative(mNativeDictDirectBuffer,
TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER);
TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER,
MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES);
mDictLength = total;
}
} catch (IOException e) {
......@@ -188,7 +190,7 @@ public class BinaryDictionary extends Dictionary {
final int codesSize = codes.size();
// Won't deal with really long words.
if (codesSize > MAX_WORD_LENGTH - 1) return;
Arrays.fill(mInputCodes, -1);
for (int i = 0; i < codesSize; i++) {
int[] alternatives = codes.getCodesAt(i);
......@@ -199,8 +201,7 @@ public class BinaryDictionary extends Dictionary {
Arrays.fill(mFrequencies, 0);
int count = getSuggestionsNative(mNativeDict, mInputCodes, codesSize, mOutputChars,
mFrequencies, MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES, -1,
nextLettersFrequencies,
mFrequencies, nextLettersFrequencies,
nextLettersFrequencies != null ? nextLettersFrequencies.length : 0);
for (int j = 0; j < count; j++) {
......
......@@ -4,9 +4,11 @@ include $(CLEAR_VARS)
LOCAL_C_INCLUDES += $(LOCAL_PATH)/src
LOCAL_SRC_FILES := \
jni/com_android_inputmethod_latin_BinaryDictionary.cpp \
src/dictionary.cpp \
src/char_utils.cpp
jni/com_android_inputmethod_latin_BinaryDictionary.cpp \
src/bigram_dictionary.cpp \
src/char_utils.cpp \
src/dictionary.cpp \
src/unigram_dictionary.cpp
#FLAG_DBG := true
......
......@@ -42,21 +42,23 @@ static void throwException(JNIEnv *env, const char* ex, const char* fmt, int dat
static jint latinime_BinaryDictionary_open
(JNIEnv *env, jobject object, jobject dictDirectBuffer,
jint typedLetterMultiplier, jint fullWordMultiplier)
jint typedLetterMultiplier, jint fullWordMultiplier, jint maxWordLength, jint maxWords,
jint maxAlternatives)
{
void *dict = env->GetDirectBufferAddress(dictDirectBuffer);
if (dict == NULL) {
fprintf(stderr, "DICT: Dictionary buffer is null\n");
return 0;
}
Dictionary *dictionary = new Dictionary(dict, typedLetterMultiplier, fullWordMultiplier);
Dictionary *dictionary = new Dictionary(dict, typedLetterMultiplier, fullWordMultiplier,
maxWordLength, maxWords, maxAlternatives);
return (jint) dictionary;
}
static int latinime_BinaryDictionary_getSuggestions(
JNIEnv *env, jobject object, jint dict, jintArray inputArray, jint arraySize,
jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxWords,
jint maxAlternatives, jint skipPos, jintArray nextLettersArray, jint nextLettersSize)
jcharArray outputArray, jintArray frequencyArray,
jintArray nextLettersArray, jint nextLettersSize)
{
Dictionary *dictionary = (Dictionary*) dict;
if (dictionary == NULL) return 0;
......@@ -68,8 +70,7 @@ static int latinime_BinaryDictionary_getSuggestions(
: NULL;
int count = dictionary->getSuggestions(inputCodes, arraySize, (unsigned short*) outputChars,
frequencies, maxWordLength, maxWords, maxAlternatives, skipPos, nextLetters,
nextLettersSize);
frequencies, nextLetters, nextLettersSize);
env->ReleaseIntArrayElements(frequencyArray, frequencies, 0);
env->ReleaseIntArrayElements(inputArray, inputCodes, JNI_ABORT);
......@@ -123,17 +124,16 @@ static jboolean latinime_BinaryDictionary_isValidWord
static void latinime_BinaryDictionary_close
(JNIEnv *env, jobject object, jint dict)
{
Dictionary *dictionary = (Dictionary*) dict;
delete (Dictionary*) dict;
}
// ----------------------------------------------------------------------------
static JNINativeMethod gMethods[] = {
{"openNative", "(Ljava/nio/ByteBuffer;II)I",
{"openNative", "(Ljava/nio/ByteBuffer;IIIII)I",
(void*)latinime_BinaryDictionary_open},
{"closeNative", "(I)V", (void*)latinime_BinaryDictionary_close},
{"getSuggestionsNative", "(I[II[C[IIIII[II)I", (void*)latinime_BinaryDictionary_getSuggestions},
{"getSuggestionsNative", "(I[II[C[I[II)I", (void*)latinime_BinaryDictionary_getSuggestions},
{"isValidWordNative", "(I[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
{"getBigramsNative", "(I[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams}
};
......
/*
**
** Copyright 2010, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#include "bigram_dictionary.h"
namespace latinime {
BigramDictionary::BigramDictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier,
int maxWordLength, int maxWords, int maxAlternatives, Dictionary *parentDictionary)
{
}
BigramDictionary::~BigramDictionary()
{
}
// TODO: Move functions related to bigram to here
} // namespace latinime
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_BIGRAM_DICTIONARY_H
#define LATINIME_BIGRAM_DICTIONARY_H
namespace latinime {
class Dictionary;
class BigramDictionary {
public:
BigramDictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength,
int maxWords, int maxAlternatives, Dictionary *parentDictionary);
~BigramDictionary();
private:
};
// ----------------------------------------------------------------------------
}; // namespace latinime
#endif // LATINIME_BIGRAM_DICTIONARY_H
This diff is collapsed.
......@@ -17,6 +17,9 @@
#ifndef LATINIME_DICTIONARY_H
#define LATINIME_DICTIONARY_H
#include "bigram_dictionary.h"
#include "unigram_dictionary.h"
namespace latinime {
// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
......@@ -35,68 +38,32 @@ namespace latinime {
class Dictionary {
public:
Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier);
Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength,
int maxWords, int maxAlternatives);
int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
int *nextLetters, int nextLettersSize);
int *nextLetters, int nextLettersSize) {
return mUnigramDictionary->getSuggestions(codes, codesSize, outWords, frequencies,
nextLetters, nextLettersSize);
}
// TODO: Call mBigramDictionary instead of mUnigramDictionary
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams,
int maxAlternatives);
bool isValidWord(unsigned short *word, int length);
int maxAlternatives) {
return mUnigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies,
maxWordLength, maxBigrams, maxAlternatives);
}
bool isValidWord(unsigned short *word, int length) {
return mUnigramDictionary->isValidWord(word, length);
}
void setAsset(void *asset) { mAsset = asset; }
void *getAsset() { return mAsset; }
~Dictionary();
private:
void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
int maxWordLength, int maxWords, int maxAlternatives);
int getSuggestionCandidates(int inputLength, int maxWords, int skipPos, int *nextLetters,
int nextLettersSize);
void getVersionNumber();
bool checkIfDictVersionIsLatest();
int getAddress(int *pos);
int getBigramAddress(int *pos, bool advance);
int getFreq(int *pos);
int getBigramFreq(int *pos);
void searchForTerminalNode(int address, int frequency);
bool getFirstBitOfByte(int *pos) { return (mDict[*pos] & 0x80) > 0; }
bool getSecondBitOfByte(int *pos) { return (mDict[*pos] & 0x40) > 0; }
bool getTerminal(int *pos) { return (mDict[*pos] & FLAG_TERMINAL_MASK) > 0; }
int getCount(int *pos) { return mDict[(*pos)++] & 0xFF; }
unsigned short getChar(int *pos);
int wideStrLen(unsigned short *str);
bool sameAsTyped(unsigned short *word, int length);
bool checkFirstCharacter(unsigned short *word);
bool addWord(unsigned short *word, int length, int frequency);
bool addWordBigram(unsigned short *word, int length, int frequency);
unsigned short toLowerCase(unsigned short c);
void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize);
int isValidWordRec(int pos, unsigned short *word, int offset, int length);
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
unsigned char *mDict;
void *mAsset;
int *mFrequencies;
int *mBigramFreq;
int mMaxWords;
int mMaxBigrams;
int mMaxWordLength;
unsigned short *mOutputChars;
unsigned short *mBigramChars;
int *mInputCodes;
int mInputLength;
int mMaxAlternatives;
unsigned short mWord[128];
int mMaxEditDistance;
int mFullWordMultiplier;
int mTypedLetterMultiplier;
int mVersion;
int mBigram;
BigramDictionary *mBigramDictionary;
UnigramDictionary *mUnigramDictionary;
};
// ----------------------------------------------------------------------------
......
This diff is collapsed.
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_UNIGRAM_DICTIONARY_H
#define LATINIME_UNIGRAM_DICTIONARY_H
namespace latinime {
// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
#define ADDRESS_MASK 0x3FFFFF
// The bit that decides if an address follows in the next 22 bits
#define FLAG_ADDRESS_MASK 0x40
// The bit that decides if this is a terminal node for a word. The node could still have children,
// if the word has other endings.
#define FLAG_TERMINAL_MASK 0x80
#define FLAG_BIGRAM_READ 0x80
#define FLAG_BIGRAM_CHILDEXIST 0x40
#define FLAG_BIGRAM_CONTINUED 0x80
#define FLAG_BIGRAM_FREQ 0x7F
class Dictionary;
class UnigramDictionary {
public:
UnigramDictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength,
int maxWords, int maxAlternatives, Dictionary *parentDictionary);
int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
int *nextLetters, int nextLettersSize);
int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams,
int maxAlternatives);
bool isValidWord(unsigned short *word, int length);
~UnigramDictionary();
private:
void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
int getSuggestionCandidates(int inputLength, int skipPos, int *nextLetters, int nextLettersSize);
void getVersionNumber();
bool checkIfDictVersionIsLatest();
int getAddress(int *pos);
int getBigramAddress(int *pos, bool advance);
int getFreq(int *pos);
int getBigramFreq(int *pos);
void searchForTerminalNode(int address, int frequency);
bool getFirstBitOfByte(int *pos) { return (mDict[*pos] & 0x80) > 0; }
bool getSecondBitOfByte(int *pos) { return (mDict[*pos] & 0x40) > 0; }
bool getTerminal(int *pos) { return (mDict[*pos] & FLAG_TERMINAL_MASK) > 0; }
int getCount(int *pos) { return mDict[(*pos)++] & 0xFF; }
unsigned short getChar(int *pos);
int wideStrLen(unsigned short *str);
bool sameAsTyped(unsigned short *word, int length);
bool checkFirstCharacter(unsigned short *word);
bool addWord(unsigned short *word, int length, int frequency);
bool addWordBigram(unsigned short *word, int length, int frequency);
unsigned short toLowerCase(unsigned short c);
void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize);
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
int isValidWordRec(int pos, unsigned short *word, int offset, int length);
unsigned char *mDict;
Dictionary *mParentDictionary;
const int MAX_WORDS;
const int MAX_WORD_LENGTH;
const int MAX_ALTERNATIVES;
int *mFrequencies;
int *mBigramFreq;
int mMaxBigrams;
unsigned short *mOutputChars;
unsigned short *mBigramChars;
int *mInputCodes;
int mInputLength;
unsigned short mWord[128];
int mMaxEditDistance;
int mFullWordMultiplier;
int mTypedLetterMultiplier;
int mVersion;
int mBigram;
};
// ----------------------------------------------------------------------------
}; // namespace latinime
#endif // LATINIME_UNIGRAM_DICTIONARY_H
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment