diff --git a/java/src/com/android/inputmethod/research/FixedLogBuffer.java b/java/src/com/android/inputmethod/research/FixedLogBuffer.java
index 641bf7eae25b18f3a0f847a06d8a49c49dd04fe3..4249af544c58e881a5018d2a311a293b76c6eebb 100644
--- a/java/src/com/android/inputmethod/research/FixedLogBuffer.java
+++ b/java/src/com/android/inputmethod/research/FixedLogBuffer.java
@@ -57,28 +57,29 @@ public class FixedLogBuffer extends LogBuffer {
      */
     @Override
     public void shiftIn(final LogUnit newLogUnit) {
-        if (!newLogUnit.hasWord()) {
-            // This LogUnit isn't a word, so it doesn't count toward the word-limit.
+        if (!newLogUnit.hasOneOrMoreWords()) {
+            // This LogUnit doesn't contain any word, so it doesn't count toward the word-limit.
             super.shiftIn(newLogUnit);
             return;
         }
+        final int numWordsIncoming = newLogUnit.getNumWords();
         if (mNumActualWords >= mWordCapacity) {
             // Give subclass a chance to handle the buffer full condition by shifting out logUnits.
             onBufferFull();
             // If still full, evict.
             if (mNumActualWords >= mWordCapacity) {
-                shiftOutWords(1);
+                shiftOutWords(numWordsIncoming);
             }
         }
         super.shiftIn(newLogUnit);
-        mNumActualWords++; // Must be a word, or we wouldn't be here.
+        mNumActualWords += numWordsIncoming;
     }
 
     @Override
     public LogUnit unshiftIn() {
         final LogUnit logUnit = super.unshiftIn();
-        if (logUnit != null && logUnit.hasWord()) {
-            mNumActualWords--;
+        if (logUnit != null && logUnit.hasOneOrMoreWords()) {
+            mNumActualWords -= logUnit.getNumWords();
         }
         return logUnit;
     }
@@ -109,8 +110,8 @@ public class FixedLogBuffer extends LogBuffer {
     @Override
     public LogUnit shiftOut() {
         final LogUnit logUnit = super.shiftOut();
-        if (logUnit != null && logUnit.hasWord()) {
-            mNumActualWords--;
+        if (logUnit != null && logUnit.hasOneOrMoreWords()) {
+            mNumActualWords -= logUnit.getNumWords();
         }
         return logUnit;
     }
@@ -121,15 +122,15 @@ public class FixedLogBuffer extends LogBuffer {
      * If there are less than {@code numWords} word-containing {@link LogUnit}s, shifts out
      * all {@code LogUnit}s in the buffer.
      *
-     * @param numWords the number of word-containing {@link LogUnit}s to shift out
+     * @param numWords the minimum number of word-containing {@link LogUnit}s to shift out
      * @return the number of actual {@code LogUnit}s shifted out
      */
     protected int shiftOutWords(final int numWords) {
         int numWordContainingLogUnitsShiftedOut = 0;
         for (LogUnit logUnit = shiftOut(); logUnit != null
                 && numWordContainingLogUnitsShiftedOut < numWords; logUnit = shiftOut()) {
-            if (logUnit.hasWord()) {
-                numWordContainingLogUnitsShiftedOut++;
+            if (logUnit.hasOneOrMoreWords()) {
+                numWordContainingLogUnitsShiftedOut += logUnit.getNumWords();
             }
         }
         return numWordContainingLogUnitsShiftedOut;
@@ -144,27 +145,31 @@ public class FixedLogBuffer extends LogBuffer {
     }
 
     /**
-     * Returns a list of {@link LogUnit}s at the front of the buffer that have associated words.  No
-     * more than {@code n} LogUnits will have words associated with them.  If there are not enough
-     * LogUnits in the buffer to meet the word requirement, returns the all LogUnits.
+     * Returns a list of {@link LogUnit}s at the front of the buffer that have words associated with
+     * them.
+     *
+     * There will be no more than {@code n} words in the returned list.  So if 2 words are
+     * requested, and the first LogUnit has 3 words, it is not returned.  If 2 words are requested,
+     * and the first LogUnit has only 1 word, and the next LogUnit 2 words, only the first LogUnit
+     * is returned.  If the first LogUnit has no words associated with it, and the second LogUnit
+     * has three words, then only the first LogUnit (which has no associated words) is returned.  If
+     * there are not enough LogUnits in the buffer to meet the word requirement, then all LogUnits
+     * will be returned.
      *
      * @param n The maximum number of {@link LogUnit}s with words to return.
      * @return The list of the {@link LogUnit}s containing the first n words
      */
     public ArrayList<LogUnit> peekAtFirstNWords(int n) {
         final LinkedList<LogUnit> logUnits = getLogUnits();
-        final int length = logUnits.size();
         // Allocate space for n*2 logUnits.  There will be at least n, one for each word, and
         // there may be additional for punctuation, between-word commands, etc.  This should be
         // enough that reallocation won't be necessary.
-        final ArrayList<LogUnit> list = new ArrayList<LogUnit>(n * 2);
-        for (int i = 0; i < length && n > 0; i++) {
-            final LogUnit logUnit = logUnits.get(i);
-            list.add(logUnit);
-            if (logUnit.hasWord()) {
-                n--;
-            }
+        final ArrayList<LogUnit> resultList = new ArrayList<LogUnit>(n * 2);
+        for (final LogUnit logUnit : logUnits) {
+            n -= logUnit.getNumWords();
+            if (n < 0) break;
+            resultList.add(logUnit);
         }
-        return list;
+        return resultList;
     }
 }
diff --git a/java/src/com/android/inputmethod/research/LogUnit.java b/java/src/com/android/inputmethod/research/LogUnit.java
index 1c01675bdd58f13f8e898b2e97933d0b6ab00284..4d60bda53507f1e9a7fcf4788fd83dba873a9989 100644
--- a/java/src/com/android/inputmethod/research/LogUnit.java
+++ b/java/src/com/android/inputmethod/research/LogUnit.java
@@ -25,10 +25,10 @@ import com.android.inputmethod.latin.SuggestedWords;
 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
 import com.android.inputmethod.latin.define.ProductionFlag;
 
-import java.io.IOException;
-import java.io.StringWriter;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
+import java.util.regex.Pattern;
 
 /**
  * A group of log statements related to each other.
@@ -49,27 +49,45 @@ public class LogUnit {
     private static final boolean DEBUG = false
             && ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS_DEBUG;
 
+    private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
+    private static final String[] EMPTY_STRING_ARRAY = new String[0];
+
     private final ArrayList<LogStatement> mLogStatementList;
     private final ArrayList<Object[]> mValuesList;
     // Assume that mTimeList is sorted in increasing order.  Do not insert null values into
     // mTimeList.
     private final ArrayList<Long> mTimeList;
-    // Word that this LogUnit generates.  Should be null if the LogUnit does not generate a genuine
-    // word (i.e. separators alone do not count as a word).  Should never be empty.
-    private String mWord;
+    // Words that this LogUnit generates.  Should be null if the data in the LogUnit does not
+    // generate a genuine word (i.e. separators alone do not count as a word).  Should never be
+    // empty.  Note that if the user types spaces explicitly, then normally mWords should contain
+    // only a single word; it will only contain space-separate multiple words if the user does not
+    // enter a space, and the system enters one automatically.
+    private String mWords;
+    private String[] mWordArray = EMPTY_STRING_ARRAY;
     private boolean mMayContainDigit;
     private boolean mIsPartOfMegaword;
     private boolean mContainsCorrection;
 
-    // mCorrectionType indicates whether the word was corrected at all, and if so, whether it was
-    // to a different word or just a "typo" correction.  It is considered a "typo" if the final
-    // word was listed in the suggestions available the first time the word was gestured or
-    // tapped.
+    // mCorrectionType indicates whether the word was corrected at all, and if so, the nature of the
+    // correction.
     private int mCorrectionType;
+    // LogUnits start in this state.  If a word is entered without being corrected, it will have
+    // this CorrectiontType.
     public static final int CORRECTIONTYPE_NO_CORRECTION = 0;
+    // The LogUnit was corrected manually by the user in an unspecified way.
     public static final int CORRECTIONTYPE_CORRECTION = 1;
+    // The LogUnit was corrected manually by the user to a word not in the list of suggestions of
+    // the first word typed here.  (Note: this is a heuristic value, it may be incorrect, for
+    // example, if the user repositions the cursor).
     public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2;
+    // The LogUnit was corrected manually by the user to a word that was in the list of suggestions
+    // of the first word typed here.  (Again, a heuristic).  It is probably a typo correction.
     public static final int CORRECTIONTYPE_TYPO = 3;
+    // TODO: Rather than just tracking the current state, keep a historical record of the LogUnit's
+    // state and statistics.  This should include how many times it has been corrected, whether
+    // other LogUnit edits were done between edits to this LogUnit, etc.  Also track when a LogUnit
+    // previously contained a word, but was corrected to empty (because it was deleted, and there is
+    // no known replacement).
 
     private SuggestedWords mSuggestedWords;
 
@@ -166,7 +184,7 @@ public class LogUnit {
         final LogStatement logStatement;
         if (canIncludePrivateData) {
             LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA.outputToLocked(jsonWriter,
-                    SystemClock.uptimeMillis(), getWord(), getCorrectionType());
+                    SystemClock.uptimeMillis(), getWordsAsString(), getCorrectionType());
         } else {
             LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA.outputToLocked(jsonWriter,
                     SystemClock.uptimeMillis());
@@ -181,22 +199,22 @@ public class LogUnit {
     }
 
     /**
-     * Mark the current logUnit as containing data to generate {@code word}.
+     * Mark the current logUnit as containing data to generate {@code newWords}.
      *
      * If {@code setWord()} was previously called for this LogUnit, then the method will try to
      * determine what kind of correction it is, and update its internal state of the correctionType
      * accordingly.
      *
-     * @param word The word this LogUnit generates.  Caller should not pass null or the empty
+     * @param newWords The words this LogUnit generates.  Caller should not pass null or the empty
      * string.
      */
-    public void setWord(final String word) {
-        if (hasWord()) {
+    public void setWords(final String newWords) {
+        if (hasOneOrMoreWords()) {
             // The word was already set once, and it is now being changed.  See if the new word
             // is close to the old word.  If so, then the change is probably a typo correction.
             // If not, the user may have decided to enter a different word, so flag it.
             if (mSuggestedWords != null) {
-                if (isInSuggestedWords(word, mSuggestedWords)) {
+                if (isInSuggestedWords(newWords, mSuggestedWords)) {
                     mCorrectionType = CORRECTIONTYPE_TYPO;
                 } else {
                     mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD;
@@ -206,38 +224,71 @@ public class LogUnit {
                 // Mark it as a generic correction.
                 mCorrectionType = CORRECTIONTYPE_CORRECTION;
             }
+        } else {
+            mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
+        }
+        mWords = newWords;
+
+        // Update mWordArray
+        mWordArray = (TextUtils.isEmpty(mWords)) ? EMPTY_STRING_ARRAY
+                : WHITESPACE_PATTERN.split(mWords);
+        if (mWordArray.length > 0 && TextUtils.isEmpty(mWordArray[0])) {
+            // Empty string at beginning of array.  Must have been whitespace at the start of the
+            // word.  Remove the empty string.
+            mWordArray = Arrays.copyOfRange(mWordArray, 1, mWordArray.length);
         }
-        mWord = word;
     }
 
-    public String getWord() {
-        return mWord;
+    public String getWordsAsString() {
+        return mWords;
+    }
+
+    /**
+     * Retuns the words generated by the data in this LogUnit.
+     *
+     * The first word may be an empty string, if the data in the LogUnit started by generating
+     * whitespace.
+     *
+     * @return the array of words. an empty list of there are no words associated with this LogUnit.
+     */
+    public String[] getWordsAsStringArray() {
+        return mWordArray;
+    }
+
+    public boolean hasOneOrMoreWords() {
+        return mWordArray.length >= 1;
     }
 
-    public boolean hasWord() {
-        return mWord != null && !TextUtils.isEmpty(mWord.trim());
+    public int getNumWords() {
+        return mWordArray.length;
     }
 
+    // TODO: Refactor to eliminate getter/setters
     public void setMayContainDigit() {
         mMayContainDigit = true;
     }
 
+    // TODO: Refactor to eliminate getter/setters
     public boolean mayContainDigit() {
         return mMayContainDigit;
     }
 
+    // TODO: Refactor to eliminate getter/setters
     public void setContainsCorrection() {
         mContainsCorrection = true;
     }
 
+    // TODO: Refactor to eliminate getter/setters
     public boolean containsCorrection() {
         return mContainsCorrection;
     }
 
+    // TODO: Refactor to eliminate getter/setters
     public void setCorrectionType(final int correctionType) {
         mCorrectionType = correctionType;
     }
 
+    // TODO: Refactor to eliminate getter/setters
     public int getCorrectionType() {
         return mCorrectionType;
     }
@@ -267,7 +318,7 @@ public class LogUnit {
                         new ArrayList<Object[]>(laterValues),
                         new ArrayList<Long>(laterTimes),
                         true /* isPartOfMegaword */);
-                newLogUnit.mWord = null;
+                newLogUnit.mWords = null;
                 newLogUnit.mMayContainDigit = mMayContainDigit;
                 newLogUnit.mContainsCorrection = mContainsCorrection;
 
@@ -287,9 +338,9 @@ public class LogUnit {
         mLogStatementList.addAll(logUnit.mLogStatementList);
         mValuesList.addAll(logUnit.mValuesList);
         mTimeList.addAll(logUnit.mTimeList);
-        mWord = null;
-        if (logUnit.mWord != null) {
-            setWord(logUnit.mWord);
+        mWords = null;
+        if (logUnit.mWords != null) {
+            setWords(logUnit.mWords);
         }
         mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit;
         mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection;
diff --git a/java/src/com/android/inputmethod/research/MainLogBuffer.java b/java/src/com/android/inputmethod/research/MainLogBuffer.java
index cd4c1db6e0b3579dbcd761a0eb7f16a2e1c03f08..42ef5d3b6f2887e45829e1d21436bc676fe90560 100644
--- a/java/src/com/android/inputmethod/research/MainLogBuffer.java
+++ b/java/src/com/android/inputmethod/research/MainLogBuffer.java
@@ -126,10 +126,7 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
             final int length = logUnits.size();
             for (int i = 0; i < length; i++) {
                 final LogUnit logUnit = logUnits.get(i);
-                final String word = logUnit.getWord();
-                if (word != null) {
-                    numWordsInLogUnitList++;
-                }
+                numWordsInLogUnitList += logUnit.getNumWords();
             }
             return numWordsInLogUnitList >= minNGramSize;
         }
@@ -153,29 +150,31 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
         // the complete buffer contents in detail.
         int numWordsInLogUnitList = 0;
         final int length = logUnits.size();
-        for (int i = 0; i < length; i++) {
-            final LogUnit logUnit = logUnits.get(i);
-            if (!logUnit.hasWord()) {
+        for (final LogUnit logUnit : logUnits) {
+            if (!logUnit.hasOneOrMoreWords()) {
                 // Digits outside words are a privacy threat.
                 if (logUnit.mayContainDigit()) {
                     return false;
                 }
             } else {
-                numWordsInLogUnitList++;
-                final String word = logUnit.getWord();
-                // Words not in the dictionary are a privacy threat.
-                if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
-                    if (DEBUG) {
-                        Log.d(TAG, "NOT SAFE!: hasLetters: " + ResearchLogger.hasLetters(word)
-                                + ", isValid: " + (dictionary.isValidWord(word)));
+                numWordsInLogUnitList += logUnit.getNumWords();
+                final String[] words = logUnit.getWordsAsStringArray();
+                for (final String word : words) {
+                    // Words not in the dictionary are a privacy threat.
+                    if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
+                        if (DEBUG) {
+                            Log.d(TAG, "\"" + word + "\" NOT SAFE!: hasLetters: "
+                                    + ResearchLogger.hasLetters(word)
+                                    + ", isValid: " + (dictionary.isValidWord(word)));
+                        }
+                        return false;
                     }
-                    return false;
                 }
             }
         }
 
-        // Finally, only return true if the minNGramSize is met.
-        return numWordsInLogUnitList >= minNGramSize;
+        // Finally, only return true if the ngram is the right size.
+        return numWordsInLogUnitList == minNGramSize;
     }
 
     public void shiftAndPublishAll() {
@@ -198,11 +197,14 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
             shiftOutWords(N_GRAM_SIZE);
             mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
         } else {
-            // No good n-gram at front, and buffer is full.  Shift out the first word (or if there
-            // is none, the existing logUnits).
-            logUnits = peekAtFirstNWords(1);
+            // No good n-gram at front, and buffer is full.  Shift out up through the first logUnit
+            // with associated words (or if there is none, all the existing logUnits).
+            logUnits.clear();
+            for (LogUnit logUnit = shiftOut(); logUnit != null && !logUnit.hasOneOrMoreWords();
+                    logUnit = shiftOut()) {
+                logUnits.add(logUnit);
+            }
             publish(logUnits, false /* canIncludePrivateData */);
-            shiftOutWords(1);
         }
     }
 
diff --git a/java/src/com/android/inputmethod/research/ResearchLogger.java b/java/src/com/android/inputmethod/research/ResearchLogger.java
index cd18e3de617856b53dba9cc3321a96198d37b076..1f6845c8b8a7213cd18a29b728696032c64ac6f2 100644
--- a/java/src/com/android/inputmethod/research/ResearchLogger.java
+++ b/java/src/com/android/inputmethod/research/ResearchLogger.java
@@ -397,13 +397,14 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
                 protected void publish(final ArrayList<LogUnit> logUnits,
                         boolean canIncludePrivateData) {
                     canIncludePrivateData |= IS_LOGGING_EVERYTHING;
-                    final int length = logUnits.size();
-                    for (int i = 0; i < length; i++) {
-                        final LogUnit logUnit = logUnits.get(i);
-                        final String word = logUnit.getWord();
-                        if (word != null && word.length() > 0 && hasLetters(word)) {
-                            Log.d(TAG, "onPublish: " + word + ", hc: "
-                                    + logUnit.containsCorrection());
+                    for (final LogUnit logUnit : logUnits) {
+                        if (DEBUG) {
+                            final String wordsString = logUnit.getWordsAsString();
+                            Log.d(TAG, "onPublish: '" + wordsString
+                                    + "', hc: " + logUnit.containsCorrection()
+                                    + ", cipd: " + canIncludePrivateData);
+                        }
+                        for (final String word : logUnit.getWordsAsStringArray()) {
                             final Dictionary dictionary = getDictionary();
                             mStatistics.recordWordEntered(
                                     dictionary != null && dictionary.isValidWord(word),
@@ -852,8 +853,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
 
     /* package for test */ void commitCurrentLogUnit() {
         if (DEBUG) {
-            Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasWord() ?
-                    ": " + mCurrentLogUnit.getWord() : ""));
+            Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasOneOrMoreWords() ?
+                    ": " + mCurrentLogUnit.getWordsAsString() : ""));
         }
         if (!mCurrentLogUnit.isEmpty()) {
             if (mMainLogBuffer != null) {
@@ -893,8 +894,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
 
         // Check that expected word matches.
         if (oldLogUnit != null) {
-            final String oldLogUnitWord = oldLogUnit.getWord();
-            if (oldLogUnitWord != null && !oldLogUnitWord.equals(expectedWord)) {
+            final String oldLogUnitWords = oldLogUnit.getWordsAsString();
+            if (oldLogUnitWords != null && !oldLogUnitWords.equals(expectedWord)) {
                 return;
             }
         }
@@ -916,7 +917,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
         enqueueEvent(LOGSTATEMENT_UNCOMMIT_CURRENT_LOGUNIT);
         if (DEBUG) {
             Log.d(TAG, "uncommitCurrentLogUnit (dump=" + dumpCurrentLogUnit + ") back to "
-                    + (mCurrentLogUnit.hasWord() ? ": '" + mCurrentLogUnit.getWord() + "'" : ""));
+                    + (mCurrentLogUnit.hasOneOrMoreWords() ? ": '"
+                        + mCurrentLogUnit.getWordsAsString() + "'" : ""));
         }
     }
 
@@ -950,8 +952,9 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
         }
         for (LogUnit logUnit : logUnits) {
             if (DEBUG) {
-                Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord()
-                        : "<wordless>") + ", correction?: " + logUnit.containsCorrection());
+                Log.d(TAG, "publishLogBuffer: " + (logUnit.hasOneOrMoreWords()
+                        ? logUnit.getWordsAsString() : "<wordless>")
+                        + ", correction?: " + logUnit.containsCorrection());
             }
             researchLog.publish(logUnit, canIncludePrivateData);
         }
@@ -986,7 +989,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
             return;
         }
         if (word.length() > 0 && hasLetters(word)) {
-            mCurrentLogUnit.setWord(word);
+            mCurrentLogUnit.setWords(word);
         }
         final LogUnit newLogUnit = mCurrentLogUnit.splitByTime(maxTime);
         enqueueCommitText(word, isBatchMode);
@@ -1478,7 +1481,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
         }
         if (originallyTypedWord.length() > 0 && hasLetters(originallyTypedWord)) {
             if (logUnit != null) {
-                logUnit.setWord(originallyTypedWord);
+                logUnit.setWords(originallyTypedWord);
             }
         }
         researchLogger.enqueueEvent(logUnit != null ? logUnit : researchLogger.mCurrentLogUnit,
@@ -1616,7 +1619,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
      * Log a call to LatinIME.commitCurrentAutoCorrection().
      *
      * SystemResponse: The IME has committed an auto-correction.  An auto-correction changes the raw
-     * text input to another word that the user more likely desired to type.
+     * text input to another word (or words) that the user more likely desired to type.
      */
     private static final LogStatement LOGSTATEMENT_LATINIME_COMMITCURRENTAUTOCORRECTION =
             new LogStatement("LatinIMECommitCurrentAutoCorrection", true, true, "typedWord",
@@ -1827,7 +1830,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
             final int enteredWordPos, final SuggestedWords suggestedWords) {
         final ResearchLogger researchLogger = getInstance();
         if (!TextUtils.isEmpty(enteredText) && hasLetters(enteredText.toString())) {
-            researchLogger.mCurrentLogUnit.setWord(enteredText.toString());
+            researchLogger.mCurrentLogUnit.setWords(enteredText.toString());
         }
         researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_ONENDBATCHINPUT, enteredText,
                 enteredWordPos);