From edd5b7365f4ed709426cd685d5506b9461c9a2f9 Mon Sep 17 00:00:00 2001
From: Tom Ouyang <ouyang@google.com>
Date: Tue, 25 Sep 2012 17:04:35 -0700
Subject: [PATCH] Fix lower case conversion bug for some characters

Bug: 7232296
Change-Id: Iaf3f6be55f1bdc2294533938bb54fedcf25fb0cb
---
 native/jni/src/char_utils.cpp | 2 ++
 native/jni/src/char_utils.h   | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/char_utils.cpp
index 9d886da31a..d0547a9827 100644
--- a/native/jni/src/char_utils.cpp
+++ b/native/jni/src/char_utils.cpp
@@ -88,6 +88,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
     { 0x00C5, 0x00E5 },  // LATIN CAPITAL LETTER A WITH RING ABOVE
     { 0x00C6, 0x00E6 },  // LATIN CAPITAL LETTER AE
     { 0x00D0, 0x00F0 },  // LATIN CAPITAL LETTER ETH
+    { 0x00D1, 0x00F1 },  // LATIN CAPITAL LETTER N WITH TILDE
     { 0x00D5, 0x00F5 },  // LATIN CAPITAL LETTER O WITH TILDE
     { 0x00D6, 0x00F6 },  // LATIN CAPITAL LETTER O WITH DIAERESIS
     { 0x00D8, 0x00F8 },  // LATIN CAPITAL LETTER O WITH STROKE
@@ -219,6 +220,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
     { 0x0416, 0x0436 },  // CYRILLIC CAPITAL LETTER ZHE
     { 0x0417, 0x0437 },  // CYRILLIC CAPITAL LETTER ZE
     { 0x0418, 0x0438 },  // CYRILLIC CAPITAL LETTER I
+    { 0x0419, 0x0439 },  // CYRILLIC CAPITAL LETTER SHORT I
     { 0x041A, 0x043A },  // CYRILLIC CAPITAL LETTER KA
     { 0x041B, 0x043B },  // CYRILLIC CAPITAL LETTER EL
     { 0x041C, 0x043C },  // CYRILLIC CAPITAL LETTER EM
diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h
index b17f262ec8..20cf2e8b52 100644
--- a/native/jni/src/char_utils.h
+++ b/native/jni/src/char_utils.h
@@ -23,7 +23,9 @@
 namespace latinime {
 
 inline static bool isAsciiUpper(unsigned short c) {
-    return isupper(static_cast<int>(c)) != 0;
+    // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
+    // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
+    return (c >= 'A' && c <= 'Z');
 }
 
 inline static unsigned short toAsciiLower(unsigned short c) {
-- 
GitLab