Skip to content
Snippets Groups Projects
Commit edd5b736 authored by Tom Ouyang's avatar Tom Ouyang
Browse files

Fix lower case conversion bug for some characters

Bug: 7232296
Change-Id: Iaf3f6be55f1bdc2294533938bb54fedcf25fb0cb
parent a161bdac
No related branches found
No related tags found
No related merge requests found
......@@ -88,6 +88,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
{ 0x00C5, 0x00E5 }, // LATIN CAPITAL LETTER A WITH RING ABOVE
{ 0x00C6, 0x00E6 }, // LATIN CAPITAL LETTER AE
{ 0x00D0, 0x00F0 }, // LATIN CAPITAL LETTER ETH
{ 0x00D1, 0x00F1 }, // LATIN CAPITAL LETTER N WITH TILDE
{ 0x00D5, 0x00F5 }, // LATIN CAPITAL LETTER O WITH TILDE
{ 0x00D6, 0x00F6 }, // LATIN CAPITAL LETTER O WITH DIAERESIS
{ 0x00D8, 0x00F8 }, // LATIN CAPITAL LETTER O WITH STROKE
......@@ -219,6 +220,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
{ 0x0416, 0x0436 }, // CYRILLIC CAPITAL LETTER ZHE
{ 0x0417, 0x0437 }, // CYRILLIC CAPITAL LETTER ZE
{ 0x0418, 0x0438 }, // CYRILLIC CAPITAL LETTER I
{ 0x0419, 0x0439 }, // CYRILLIC CAPITAL LETTER SHORT I
{ 0x041A, 0x043A }, // CYRILLIC CAPITAL LETTER KA
{ 0x041B, 0x043B }, // CYRILLIC CAPITAL LETTER EL
{ 0x041C, 0x043C }, // CYRILLIC CAPITAL LETTER EM
......
......@@ -23,7 +23,9 @@
namespace latinime {
inline static bool isAsciiUpper(unsigned short c) {
return isupper(static_cast<int>(c)) != 0;
// Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
return (c >= 'A' && c <= 'Z');
}
inline static unsigned short toAsciiLower(unsigned short c) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment