Skip to content
Snippets Groups Projects
Commit 17284659 authored by Xiaojun Bi's avatar Xiaojun Bi
Browse files

Use DistracterFilter when getting Language Model Param of a word

A preparation for implementing Distracter Filter.

This CL should be checked in together with Icfefa0463180

Bug: 13142176

Change-Id: If9e5ca840c4a81cb0339ab70f146b42fbfa7af5d
parent 0de529b4
No related branches found
No related tags found
No related merge requests found
......@@ -40,7 +40,7 @@ public class DistracterFilter {
mKeyboard = keyboard;
}
public boolean isDistractorToWordsInDictionaries(final String prevWord,
public boolean isDistracterToWordsInDictionaries(final String prevWord,
final String targetWord) {
// TODO: to be implemented
return false;
......
......@@ -80,7 +80,8 @@ public final class LanguageModelParam {
public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
final ArrayList<String> tokens, final int timestamp,
final DictionaryFacilitatorForSuggest dictionaryFacilitator,
final SpacingAndPunctuations spacingAndPunctuations) {
final SpacingAndPunctuations spacingAndPunctuations,
final DistracterFilter distracterFilter) {
final ArrayList<LanguageModelParam> languageModelParams =
CollectionUtils.newArrayList();
final int N = tokens.size();
......@@ -109,7 +110,8 @@ public final class LanguageModelParam {
}
final LanguageModelParam languageModelParam =
detectWhetherVaildWordOrNotAndGetLanguageModelParam(
prevWord, tempWord, timestamp, dictionaryFacilitator);
prevWord, tempWord, timestamp, dictionaryFacilitator,
distracterFilter);
if (languageModelParam == null) {
continue;
}
......@@ -121,27 +123,33 @@ public final class LanguageModelParam {
private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam(
final String prevWord, final String targetWord, final int timestamp,
final DictionaryFacilitatorForSuggest dictionaryFacilitator) {
final DictionaryFacilitatorForSuggest dictionaryFacilitator,
final DistracterFilter distracterFilter) {
final Locale locale = dictionaryFacilitator.getLocale();
if (locale == null) {
return null;
}
if (!dictionaryFacilitator.isValidWord(targetWord, true /* ignoreCase */)) {
// OOV word.
return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
false /* isValidWord */, locale);
}
if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) {
return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
true /* isValidWord */, locale);
}
final String lowerCaseTargetWord = targetWord.toLowerCase(locale);
if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
// Add the lower-cased word.
return createAndGetLanguageModelParamOfWord(prevWord, lowerCaseTargetWord,
timestamp, true /* isValidWord */, locale);
}
// Treat the word as an OOV word.
// Treat the word as an OOV word. The following statement checks whether this OOV
// is a distracter to words in dictionaries. Being a distracter means the OOV word is
// too close to a common word in dictionaries (e.g., the OOV "mot" is very close to "not").
// Adding such a word to dictonaries would interfere with entering in-dictionary words. For
// example, adding "mot" to dictionaries might interfere with entering "not".
// This kind of OOV should be filtered out.
if (distracterFilter.isDistracterToWordsInDictionaries(prevWord, targetWord)) {
return null;
}
return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
false /* isValidWord */, locale);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment