Skip to content
Snippets Groups Projects
Commit edbf6b72 authored by Tom Ouyang's avatar Tom Ouyang Committed by Android (Google) Code Review
Browse files

Merge "Fix lower case conversion bug for some characters" into jb-mr1-dev

parents 270c0dec edd5b736
No related branches found
No related tags found
No related merge requests found
......@@ -88,6 +88,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
{ 0x00C5, 0x00E5 }, // LATIN CAPITAL LETTER A WITH RING ABOVE
{ 0x00C6, 0x00E6 }, // LATIN CAPITAL LETTER AE
{ 0x00D0, 0x00F0 }, // LATIN CAPITAL LETTER ETH
{ 0x00D1, 0x00F1 }, // LATIN CAPITAL LETTER N WITH TILDE
{ 0x00D5, 0x00F5 }, // LATIN CAPITAL LETTER O WITH TILDE
{ 0x00D6, 0x00F6 }, // LATIN CAPITAL LETTER O WITH DIAERESIS
{ 0x00D8, 0x00F8 }, // LATIN CAPITAL LETTER O WITH STROKE
......@@ -219,6 +220,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
{ 0x0416, 0x0436 }, // CYRILLIC CAPITAL LETTER ZHE
{ 0x0417, 0x0437 }, // CYRILLIC CAPITAL LETTER ZE
{ 0x0418, 0x0438 }, // CYRILLIC CAPITAL LETTER I
{ 0x0419, 0x0439 }, // CYRILLIC CAPITAL LETTER SHORT I
{ 0x041A, 0x043A }, // CYRILLIC CAPITAL LETTER KA
{ 0x041B, 0x043B }, // CYRILLIC CAPITAL LETTER EL
{ 0x041C, 0x043C }, // CYRILLIC CAPITAL LETTER EM
......
......@@ -23,7 +23,9 @@
namespace latinime {
inline static bool isAsciiUpper(unsigned short c) {
return isupper(static_cast<int>(c)) != 0;
// Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
return (c >= 'A' && c <= 'Z');
}
inline static unsigned short toAsciiLower(unsigned short c) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment