Skip to content
Snippets Groups Projects
Commit da99cfc2 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Introduce OffdeviceIntermediateDict for dicttolkit."

parents 7a3e6242 cd105409
No related branches found
No related tags found
No related merge requests found
......@@ -22,8 +22,13 @@ LATIN_IME_DICT_TOOLKIT_SRC_FILES := \
help_executor.cpp \
info_executor.cpp \
makedict_executor.cpp) \
$(addprefix offdevice_intermediate_dict/, \
offdevice_intermediate_dict.cpp) \
utils/command_utils.cpp
LATIN_IME_DICT_TOOLKIT_TEST_FILES := \
dict_toolkit_defines_test.cpp \
utils/command_utils_test.cpp
$(addprefix offdevice_intermediate_dict/, \
offdevice_intermediate_dict_test.cpp) \
$(addprefix utils/, \
command_utils_test.cpp)
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h"
namespace latinime {
namespace dicttoolkit {
bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) {
const CodePointArrayView codePoints = wordProperty.getCodePoints();
if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) {
return false;
}
return addWordInner(codePoints, wordProperty, mRootPtNodeArray);
}
bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints,
const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) {
auto ptNodeList = ptNodeArray.getMutablePtNodeList();
auto ptNodeIt = ptNodeList->begin();
for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) {
const auto &ptNode = *ptNodeIt;
const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
if (codePoints[0] < ptNodeCodePoints[0]) {
continue;
}
if (codePoints[0] > ptNodeCodePoints[0]) {
break;
}
size_t i = 1;
for (; i < codePoints.size(); ++i) {
if (i >= ptNodeCodePoints.size()) {
// Add new child.
return addWordInner(codePoints.skip(i), wordProperty,
ptNode->getChildrenPtNodeArray());
}
if (codePoints[i] != ptNodeCodePoints[i]) {
break;
}
}
if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) {
// All code points matched.
if (ptNode->getWordProperty()) {
// Adding the same word multiple times is not supported.
return false;
}
ptNodeList->insert(ptNodeIt,
std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode));
ptNodeList->erase(ptNodeIt);
return true;
}
// The (i+1)-th elements are different.
// Create and Add new parent ptNode for the common part.
auto newPtNode = codePoints.size() == i
? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)
: std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i));
ptNodeList->insert(ptNodeIt, newPtNode);
OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray =
newPtNode->getChildrenPtNodeArray();
// Add new child for the existing ptNode.
childrenPtNodeArray.getMutablePtNodeList()->push_back(
std::make_shared<OffdeviceIntermediateDictPtNode>(
ptNodeCodePoints.skip(i), *ptNode));
ptNodeList->erase(ptNodeIt);
if (codePoints.size() != i) {
// Add a child for the new word.
return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray);
}
return true;
}
ptNodeList->insert(ptNodeIt,
std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty));
return true;
}
const WordProperty *OffdeviceIntermediateDict::getWordProperty(
const CodePointArrayView codePoints) const {
const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray;
for (size_t i = 0; i < codePoints.size();) {
bool foundNext = false;
for (const auto ptNode : ptNodeArray->getPtNodeList()) {
const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
if (codePoints[i] < ptNodeCodePoints[0]) {
continue;
}
if (codePoints[i] > ptNodeCodePoints[0]
|| codePoints.size() < ptNodeCodePoints.size()) {
return nullptr;
}
for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) {
if (codePoints[i + j] != ptNodeCodePoints[j]) {
return nullptr;
}
}
i += ptNodeCodePoints.size();
if (i == codePoints.size()) {
return ptNode->getWordProperty();
}
ptNodeArray = &ptNode->getChildrenPtNodeArray();
foundNext = true;
break;
}
if (!foundNext) {
break;
}
}
return nullptr;
}
} // namespace dicttoolkit
} // namespace latinime
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
#include "dict_toolkit_defines.h"
#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "utils/int_array_view.h"
namespace latinime {
namespace dicttoolkit {
/**
* On memory patricia trie to represent a dictionary.
*/
class OffdeviceIntermediateDict final {
public:
bool addWord(const WordProperty &wordProperty);
// The returned value will be invalid after modifying the dictionary. e.g. calling addWord().
const WordProperty *getWordProperty(const CodePointArrayView codePoints) const;
private:
DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict);
OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray;
bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty,
OffdeviceIntermediateDictPtNodeArray &ptNodeArray);
};
} // namespace dicttoolkit
} // namespace latinime
#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
#include <memory>
#include "dict_toolkit_defines.h"
#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "utils/int_array_view.h"
namespace latinime {
namespace dicttoolkit {
class OffdeviceIntermediateDictPtNode final {
public:
// Non-terminal
OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints)
: mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
mWortProperty(nullptr) {}
// Terminal
OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
const WordProperty &wordProperty)
: mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
mWortProperty(new WordProperty(wordProperty)) {}
// Replacing PtNodeCodePoints.
OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
const OffdeviceIntermediateDictPtNode &ptNode)
: mPtNodeCodePoints(ptNodeCodePoints.toVector()),
mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
mWortProperty(new WordProperty(*ptNode.mWortProperty)) {}
// Replacing WordProperty.
OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty,
const OffdeviceIntermediateDictPtNode &ptNode)
: mPtNodeCodePoints(ptNode.mPtNodeCodePoints),
mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
mWortProperty(new WordProperty(wordProperty)) {}
const WordProperty *getWordProperty() const {
return mWortProperty.get();
}
const CodePointArrayView getPtNodeCodePoints() const {
return CodePointArrayView(mPtNodeCodePoints);
}
OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() {
return mChildrenPtNodeArray;
}
private:
DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode);
const std::vector<int> mPtNodeCodePoints;
OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray;
const std::unique_ptr<WordProperty> mWortProperty;
};
} // namespace dicttoolkit
} // namespace latinime
#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
#include <list>
#include <memory>
#include "dict_toolkit_defines.h"
namespace latinime {
namespace dicttoolkit {
class OffdeviceIntermediateDictPtNode;
class OffdeviceIntermediateDictPtNodeArray final {
public:
const std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> &getPtNodeList() const {
return mPtNodes;
}
std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> *getMutablePtNodeList() {
return &mPtNodes;
}
private:
DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray);
std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> mPtNodes;
};
} // namespace dicttoolkit
} // namespace latinime
#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
#include <gtest/gtest.h>
#include <vector>
#include "suggest/core/dictionary/property/word_property.h"
#include "utils/int_array_view.h"
namespace latinime {
namespace dicttoolkit {
namespace {
const std::vector<int> getCodePointVector(const char *str) {
std::vector<int> codePoints;
while (*str) {
codePoints.push_back(*str);
++str;
}
return codePoints;
}
const WordProperty getDummpWordProperty(const std::vector<int> &&codePoints) {
return WordProperty(std::move(codePoints), UnigramProperty(), std::vector<NgramProperty>());
}
TEST(OffdeviceIntermediateDictTest, TestAddWordProperties) {
OffdeviceIntermediateDict dict;
EXPECT_EQ(nullptr, dict.getWordProperty(CodePointArrayView()));
const WordProperty wordProperty0 = getDummpWordProperty(getCodePointVector("abcd"));
EXPECT_TRUE(dict.addWord(wordProperty0));
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints()));
const WordProperty wordProperty1 = getDummpWordProperty(getCodePointVector("efgh"));
EXPECT_TRUE(dict.addWord(wordProperty1));
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints()));
const WordProperty wordProperty2 = getDummpWordProperty(getCodePointVector("ab"));
EXPECT_TRUE(dict.addWord(wordProperty2));
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints()));
const WordProperty wordProperty3 = getDummpWordProperty(getCodePointVector("abcdefg"));
EXPECT_TRUE(dict.addWord(wordProperty3));
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints()));
const WordProperty wordProperty4 = getDummpWordProperty(getCodePointVector("efef"));
EXPECT_TRUE(dict.addWord(wordProperty4));
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints()));
const WordProperty wordProperty5 = getDummpWordProperty(getCodePointVector("ef"));
EXPECT_TRUE(dict.addWord(wordProperty5));
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints()));
const WordProperty wordProperty6 = getDummpWordProperty(getCodePointVector("abcd"));
EXPECT_FALSE(dict.addWord(wordProperty6)) << "Adding the same word multiple times should fail.";
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints()));
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints()));
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints()));
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints()));
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints()));
EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints()));
}
} // namespace
} // namespace dicttoolkit
} // namespace latinime
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment