diff --git a/java/res/values-el/donottranslate-config-spacing-and-punctuations.xml b/java/res/values-el/donottranslate-config-spacing-and-punctuations.xml new file mode 100644 index 0000000000000000000000000000000000000000..117c075a6c52e079c4665f06643556d304f41808 --- /dev/null +++ b/java/res/values-el/donottranslate-config-spacing-and-punctuations.xml @@ -0,0 +1,23 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- +/* +** +** Copyright 2014, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ +--> +<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> + <!-- Symbols that terminate sentences and require capitalization on the next char --> + <string name="symbols_sentence_terminators">.;!?</string> +</resources> diff --git a/java/res/values/donottranslate-config-spacing-and-punctuations.xml b/java/res/values/donottranslate-config-spacing-and-punctuations.xml index 2faf578d276525d90ed4e4251a0ce68619a1c8fc..06144d021f28173c7889eda1bd2fdc78b266ff95 100644 --- a/java/res/values/donottranslate-config-spacing-and-punctuations.xml +++ b/java/res/values/donottranslate-config-spacing-and-punctuations.xml @@ -33,9 +33,14 @@ <string name="symbols_word_separators">"	 
 "()[]{}*&<>+=|.,;:!?/_\"</string> <!-- Word connectors --> <string name="symbols_word_connectors">\'-</string> - <!-- The sentence separator code point, for capitalization --> + <!-- The sentence separator code point, for capitalization and auto-insertion --> <!-- U+002E: "." FULL STOP ; 2Eh = 46d --> <integer name="sentence_separator">46</integer> + <!-- The abbreviation marker code point --> + <!-- U+002E: "." FULL STOP ; 2Eh = 46d --> + <integer name="abbreviation_marker">46</integer> + <!-- Symbols that terminate sentences and require capitalization on the next char --> + <string name="symbols_sentence_terminators">.?!</string> <!-- Whether this language uses spaces between words --> <bool name="current_language_has_spaces">true</bool> </resources> diff --git a/java/src/com/android/inputmethod/latin/settings/SpacingAndPunctuations.java b/java/src/com/android/inputmethod/latin/settings/SpacingAndPunctuations.java index b8d2a2248fb6eaf27b45cee6cfe7356fd9172aae..8f2e9de61a516f694d605509edb3ebf4e2a4531a 100644 --- a/java/src/com/android/inputmethod/latin/settings/SpacingAndPunctuations.java +++ b/java/src/com/android/inputmethod/latin/settings/SpacingAndPunctuations.java @@ -35,6 +35,8 @@ public final class SpacingAndPunctuations { public final int[] mSortedWordSeparators; public final PunctuationSuggestions mSuggestPuncList; private final int mSentenceSeparator; + private final int mAbbreviationMarker; + private final int[] mSortedSentenceTerminators; public final String mSentenceSeparatorAndSpace; public final boolean mCurrentLanguageHasSpaces; public final boolean mUsesAmericanTypography; @@ -54,7 +56,10 @@ public final class SpacingAndPunctuations { res.getString(R.string.symbols_word_connectors)); mSortedWordSeparators = StringUtils.toSortedCodePointArray( res.getString(R.string.symbols_word_separators)); + mSortedSentenceTerminators = StringUtils.toSortedCodePointArray( + res.getString(R.string.symbols_sentence_terminators)); mSentenceSeparator = res.getInteger(R.integer.sentence_separator); + mAbbreviationMarker = res.getInteger(R.integer.abbreviation_marker); mSentenceSeparatorAndSpace = new String(new int[] { mSentenceSeparator, Constants.CODE_SPACE }, 0, 2); mCurrentLanguageHasSpaces = res.getBoolean(R.bool.current_language_has_spaces); @@ -92,6 +97,14 @@ public final class SpacingAndPunctuations { return Arrays.binarySearch(mSortedSymbolsClusteringTogether, code) >= 0; } + public boolean isSentenceTerminator(final int code) { + return Arrays.binarySearch(mSortedSentenceTerminators, code) >= 0; + } + + public boolean isAbbreviationMarker(final int code) { + return code == mAbbreviationMarker; + } + public boolean isSentenceSeparator(final int code) { return code == mSentenceSeparator; } diff --git a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java index 92bd02c54af3e073a38a1e82987f9b6d0ed5f4f1..02f1c5f00a8eb86958f1fc5f21b193d27de1a067 100644 --- a/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/CapsModeUtils.java @@ -213,13 +213,22 @@ public final class CapsModeUtils { char c = cs.charAt(--j); // We found the next interesting chunk of text ; next we need to determine if it's the - // end of a sentence. If we have a question mark or an exclamation mark, it's the end of - // a sentence. If it's neither, the only remaining case is the period so we get the opposite - // case out of the way. - if (c == Constants.CODE_QUESTION_MARK || c == Constants.CODE_EXCLAMATION_MARK) { + // end of a sentence. If we have a sentence terminator (typically a question mark or an + // exclamation mark), then it's the end of a sentence; however, we treat the abbreviation + // marker specially because usually is the same char as the sentence separator (the + // period in most languages) and in this case we need to apply a heuristic to determine + // in which of these senses it's used. + if (spacingAndPunctuations.isSentenceTerminator(c) + && !spacingAndPunctuations.isAbbreviationMarker(c)) { return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS | TextUtils.CAP_MODE_SENTENCES) & reqModes; } + // If we reach here, we know we have whitespace before the cursor and before that there + // is something that either does not terminate the sentence, or a symbol preceded by the + // start of the text, or it's the sentence separator AND it happens to be the same code + // point as the abbreviation marker. + // If it's a symbol or something that does not terminate the sentence, then we need to + // return caps for MODE_CHARACTERS and MODE_WORDS, but not for MODE_SENTENCES. if (!spacingAndPunctuations.isSentenceSeparator(c) || j <= 0) { return (TextUtils.CAP_MODE_CHARACTERS | TextUtils.CAP_MODE_WORDS) & reqModes; } diff --git a/tests/src/com/android/inputmethod/latin/utils/CapsModeUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/CapsModeUtilsTests.java index 5d366476593aa1532de8c77c58bdc5fbde645dc8..4646a823d109208a9cafbc93d74fdde338e4254c 100644 --- a/tests/src/com/android/inputmethod/latin/utils/CapsModeUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/utils/CapsModeUtilsTests.java @@ -136,5 +136,17 @@ public class CapsModeUtilsTests extends AndroidTestCase { allPathsForCaps("Word; ", c | w, sp, false); allPathsForCaps("Word;", c | w, sp, true); allPathsForCaps("Word;", c, sp, false); + // Test for sentence terminators in Greek + sp = job.runInLocale(res, LocaleUtils.constructLocaleFromString("el")); + allPathsForCaps("Word? ", c | w | s, sp, false); + allPathsForCaps("Word?", c | w | s, sp, true); + allPathsForCaps("Word?", c, sp, false); + allPathsForCaps("Word! ", c | w | s, sp, false); + allPathsForCaps("Word!", c | w | s, sp, true); + allPathsForCaps("Word!", c, sp, false); + // In Greek ";" is the question mark and it terminates the sentence + allPathsForCaps("Word; ", c | w | s, sp, false); + allPathsForCaps("Word;", c | w | s, sp, true); + allPathsForCaps("Word;", c, sp, false); } }