From 76f614720db6a282c3e890969a600585122cdcc5 Mon Sep 17 00:00:00 2001
From: Marc Blank <mblank@google.com>
Date: Sat, 26 Mar 2011 19:19:35 -0700
Subject: [PATCH] Highlight utility for HTML and text

* Add utilities for adding highlighting markup to HTML and
  highlighting spans to plain text (for Webview and UI,
  respectively)
* Rename Snippet.java to TextUtilities.java and move to utility
  package

Change-Id: Ic443ab5ce9c0199fa82a68e1592bf259494cadd2
---
 .../emailcommon/provider/EmailContent.java    |   6 +-
 .../utility/ConversionUtilities.java          |   5 +-
 .../TextUtilities.java}                       | 282 +++++++++++++++--
 .../android/email/provider/ProviderTests.java |   9 +-
 .../emailcommon/mail/SnippetTests.java        | 183 -----------
 .../utility/TextUtilitiesTests.java           | 298 ++++++++++++++++++
 6 files changed, 562 insertions(+), 221 deletions(-)
 rename emailcommon/src/com/android/emailcommon/{mail/Snippet.java => utility/TextUtilities.java} (63%)
 delete mode 100644 tests/src/com/android/emailcommon/mail/SnippetTests.java
 create mode 100644 tests/src/com/android/emailcommon/utility/TextUtilitiesTests.java

diff --git a/emailcommon/src/com/android/emailcommon/provider/EmailContent.java b/emailcommon/src/com/android/emailcommon/provider/EmailContent.java
index 19f86db30..1bc77150b 100644
--- a/emailcommon/src/com/android/emailcommon/provider/EmailContent.java
+++ b/emailcommon/src/com/android/emailcommon/provider/EmailContent.java
@@ -16,7 +16,7 @@
 
 package com.android.emailcommon.provider;
 
-import com.android.emailcommon.mail.Snippet;
+import com.android.emailcommon.utility.TextUtilities;
 import com.android.emailcommon.utility.Utility;
 
 import android.content.ContentProviderOperation;
@@ -839,9 +839,9 @@ public abstract class EmailContent {
             ContentProviderOperation.Builder b = ContentProviderOperation.newInsert(mBaseUri);
             // Generate the snippet here, before we create the CPO for Message
             if (mText != null) {
-                mSnippet = Snippet.fromPlainText(mText);
+                mSnippet = TextUtilities.makeSnippetFromPlainText(mText);
             } else if (mHtml != null) {
-                mSnippet = Snippet.fromHtmlText(mHtml);
+                mSnippet = TextUtilities.makeSnippetFromHtmlText(mHtml);
             }
             ops.add(b.withValues(toContentValues()).build());
 
diff --git a/emailcommon/src/com/android/emailcommon/utility/ConversionUtilities.java b/emailcommon/src/com/android/emailcommon/utility/ConversionUtilities.java
index dca31d647..41ba12d14 100644
--- a/emailcommon/src/com/android/emailcommon/utility/ConversionUtilities.java
+++ b/emailcommon/src/com/android/emailcommon/utility/ConversionUtilities.java
@@ -20,7 +20,6 @@ import com.android.emailcommon.internet.MimeHeader;
 import com.android.emailcommon.internet.MimeUtility;
 import com.android.emailcommon.mail.MessagingException;
 import com.android.emailcommon.mail.Part;
-import com.android.emailcommon.mail.Snippet;
 import com.android.emailcommon.provider.EmailContent;
 
 import android.text.TextUtils;
@@ -116,13 +115,13 @@ public class ConversionUtilities {
         if (!TextUtils.isEmpty(sbText)) {
             String text = sbText.toString();
             body.mTextContent = text;
-            localMessage.mSnippet = Snippet.fromPlainText(text);
+            localMessage.mSnippet = TextUtilities.makeSnippetFromPlainText(text);
         }
         if (!TextUtils.isEmpty(sbHtml)) {
             String text = sbHtml.toString();
             body.mHtmlContent = text;
             if (localMessage.mSnippet == null) {
-                localMessage.mSnippet = Snippet.fromHtmlText(text);
+                localMessage.mSnippet = TextUtilities.makeSnippetFromHtmlText(text);
             }
         }
         if (sbHtmlReply != null && sbHtmlReply.length() != 0) {
diff --git a/emailcommon/src/com/android/emailcommon/mail/Snippet.java b/emailcommon/src/com/android/emailcommon/utility/TextUtilities.java
similarity index 63%
rename from emailcommon/src/com/android/emailcommon/mail/Snippet.java
rename to emailcommon/src/com/android/emailcommon/utility/TextUtilities.java
index 38f982e5d..1e0abd9d8 100644
--- a/emailcommon/src/com/android/emailcommon/mail/Snippet.java
+++ b/emailcommon/src/com/android/emailcommon/utility/TextUtilities.java
@@ -14,38 +14,39 @@
  * limitations under the License.
  */
 
-package com.android.emailcommon.mail;
+package com.android.emailcommon.utility;
 
+import android.graphics.Color;
+import android.text.Spannable;
+import android.text.SpannableString;
+import android.text.SpannableStringBuilder;
 import android.text.TextUtils;
+import android.text.style.BackgroundColorSpan;
 
+import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.StringTokenizer;
+
+public class TextUtilities {
+    // Highlight color is yellow, as in other apps.
+    // TODO Push for this to be a global (style-related?) constant
+    private static final int HIGHLIGHT_COLOR_INT = Color.YELLOW;
+    /*package*/ static final String HIGHLIGHT_COLOR_STRING =
+        '#' + Integer.toHexString(HIGHLIGHT_COLOR_INT);
 
-/**
- * Class to generate a short 'snippet' from either plain text or html text
- *
- * If the sync protocol can get plain text, that's great, but we'll still strip out extraneous
- * whitespace.  If it's HTML, we'll 1) strip out tags, 2) turn entities into the appropriate
- * characters, and 3) strip out extraneous whitespace, all in one pass
- *
- * Why not use an existing class?  The best answer is performance; yet another answer is
- * correctness (e.g. Html.textFromHtml simply doesn't generate well-stripped text).  But performance
- * is key; we frequently sync text that is 10K or (much) longer, yet we really only care about a
- * small amount of text for the snippet.  So it's critically important that we just stop when we've
- * gotten enough; existing methods that exist will go through the entire incoming string, at great
- * (and useless) expense.
- */
-public class Snippet {
     // This is how many chars we'll allow in a snippet
-    private static final int MAX_PLAIN_TEXT_SCAN_LENGTH = 200;
+    private static final int MAX_SNIPPET_LENGTH = 200;
     // For some reason, isWhitespace() returns false with the following...
     /*package*/ static final char NON_BREAKING_SPACE_CHARACTER = (char)160;
 
     // Tags whose content must be stripped as well
     static final String[] STRIP_TAGS =
         new String[] {"title", "script", "style", "applet", "head"};
-    // The number of characters we peel off for testing against STRIP_TAGS
-    static final int STRIP_TAG_LENGTH = 6;
+    // The number of characters we peel off for testing against STRIP_TAGS; this should be the
+    // maximum size of the strings in STRIP_TAGS
+    static final int MAX_STRIP_TAG_LENGTH = 6;
 
     static final Map<String, Character> ESCAPE_STRINGS;
     static {
@@ -307,12 +308,27 @@ public class Snippet {
         ESCAPE_STRINGS.put("&euro", '\u20AC');
     }
 
-    public static String fromHtmlText(String text) {
-        return fromText(text, true);
+    /**
+     * Code to generate a short 'snippet' from either plain text or html text
+     *
+     * If the sync protocol can get plain text, that's great, but we'll still strip out extraneous
+     * whitespace.  If it's HTML, we'll 1) strip out tags, 2) turn entities into the appropriate
+     * characters, and 3) strip out extraneous whitespace, all in one pass
+     *
+     * Why not use an existing class?  The best answer is performance; yet another answer is
+     * correctness (e.g. Html.textFromHtml simply doesn't generate well-stripped text).  But
+     * performance is key; we frequently sync text that is 10K or (much) longer, yet we really only
+     * care about a small amount of text for the snippet.  So it's critically important that we just
+     * stop when we've gotten enough; existing methods that exist will go through the entire
+     * incoming string, at great (and useless, in this case) expense.
+     */
+
+    public static String makeSnippetFromHtmlText(String text) {
+        return makeSnippetFromText(text, true);
     }
 
-    public static String fromPlainText(String text) {
-        return fromText(text, false);
+    public static String makeSnippetFromPlainText(String text) {
+        return makeSnippetFromText(text, false);
     }
 
     /**
@@ -342,13 +358,13 @@ public class Snippet {
         return htmlText.indexOf("/" + tag, startPos);
     }
 
-    public static String fromText(String text, boolean stripHtml) {
+    public static String makeSnippetFromText(String text, boolean stripHtml) {
         // Handle null and empty string
         if (TextUtils.isEmpty(text)) return "";
 
         final int length = text.length();
         // Use char[] instead of StringBuilder purely for performance; fewer method calls, etc.
-        char[] buffer = new char[MAX_PLAIN_TEXT_SCAN_LENGTH];
+        char[] buffer = new char[MAX_SNIPPET_LENGTH];
         // skipCount is an array of a single int; that int is set inside stripHtmlEntity and is
         // used to determine how many characters can be "skipped" due to the transformation of the
         // entity to a single character.  When Java allows multiple return values, we can make this
@@ -361,7 +377,7 @@ public class Snippet {
         boolean inTag = false;
 
         // Walk through the text until we're done with the input OR we've got a large enough snippet
-        for (int i = 0; i < length && bufferCount < MAX_PLAIN_TEXT_SCAN_LENGTH; i++) {
+        for (int i = 0; i < length && bufferCount < MAX_SNIPPET_LENGTH; i++) {
             char c = text.charAt(i);
             if (stripHtml && !inTag && (c == '<')) {
                 // Find tags to strip; they will begin with <! or !- or </ or <letter
@@ -370,8 +386,8 @@ public class Snippet {
                     if (peek == '!' || peek == '-' || peek == '/' || Character.isLetter(peek)) {
                         inTag = true;
                         // Strip content of title, script, style and applet tags
-                        if (i < (length - (STRIP_TAG_LENGTH + 2))) {
-                            String tag = text.substring(i + 1, i + STRIP_TAG_LENGTH + 1);
+                        if (i < (length - (MAX_STRIP_TAG_LENGTH + 2))) {
+                            String tag = text.substring(i + 1, i + MAX_STRIP_TAG_LENGTH + 1);
                             String tagLowerCase = tag.toLowerCase();
                             boolean stripContent = false;
                             for (String stripTag: STRIP_TAGS) {
@@ -484,4 +500,214 @@ public class Snippet {
         return '&';
     }
 
+    /**
+     * Given a string of HTML text and a query containing any number of search terms, returns
+     * an HTML string in which those search terms are highlighted (intended for use in a WebView)
+     *
+     * @param text the HTML text to process
+     * @param query the search terms
+     * @return HTML text with the search terms highlighted
+     */
+    public static String highlightTermsInHtml(String text, String query) {
+        try {
+            return highlightTerms(text, query, true).toString();
+        } catch (IOException e) {
+            // Can't happen, but we must catch this
+            return text;
+        }
+    }
+
+    /**
+     * Given a string of plain text and a query containing any number of search terms, returns
+     * a CharSequence in which those search terms are highlighted (intended for use in a TextView)
+     *
+     * @param text the text to process
+     * @param query the search terms
+     * @return a CharSequence with the search terms highlighted
+     */
+    public static CharSequence highlightTermsInText(String text, String query) {
+        try {
+            return highlightTerms(text, query, false);
+        } catch (IOException e) {
+            // Can't happen, but we must catch this
+            return text;
+        }
+    }
+
+    static class SearchTerm {
+        final String mTerm;
+        final String mTermLowerCase;
+        final int mLength;
+        int mMatchLength = 0;
+        int mMatchStart = -1;
+
+        SearchTerm(String term, boolean html) {
+            mTerm = term;
+            mTermLowerCase = term.toLowerCase();
+            mLength = term.length();
+        }
+    }
+
+    /**
+     * Generate a version of the incoming text in which all search terms in a query are highlighted.
+     * If the input is HTML, we return a StringBuilder with additional markup as required
+     * If the input is text, we return a SpannableStringBuilder with additional spans as required
+     *
+     * @param text the text to be processed
+     * @param query the query, which can contain multiple terms separated by whitespace
+     * @param html whether or not the text to be processed is HTML
+     * @return highlighted text
+     *
+     * @throws IOException as Appendable requires this
+     */
+    public static CharSequence highlightTerms(String text, String query, boolean html)
+            throws IOException {
+        // Handle null and empty string
+        if (TextUtils.isEmpty(text)) return "";
+        final int length = text.length();
+
+        // Break up the query into search terms
+        ArrayList<SearchTerm> terms = new ArrayList<SearchTerm>();
+        if (query != null) {
+            StringTokenizer st = new StringTokenizer(query);
+            while (st.hasMoreTokens()) {
+                terms.add(new SearchTerm(st.nextToken(), html));
+            }
+        }
+
+        // Our appendable depends on whether we're building HTML text (for webview) or spannable
+        // text (for UI)
+        final Appendable sb = html ? new StringBuilder() : new SpannableStringBuilder();
+        // Indicates whether we're in the middle of an HTML tag
+        boolean inTag = false;
+        // The position of the last input character copied to output
+        int lastOut = -1;
+
+        // Walk through the text until we're done with the input
+        // Just copy any HTML tags directly into the output; search for terms in the remaining text
+        for (int i = 0; i < length; i++) {
+            char chr = text.charAt(i);
+            if (html) {
+                if (!inTag && (chr == '<')) {
+                    // Find tags; they will begin with <! or !- or </ or <letter
+                    if (i < (length - 1)) {
+                        char peek = text.charAt(i + 1);
+                        if (peek == '!' || peek == '-' || peek == '/' || Character.isLetter(peek)) {
+                            inTag = true;
+                            // Skip content of title, script, style and applet tags
+                            if (i < (length - (MAX_STRIP_TAG_LENGTH + 2))) {
+                                String tag = text.substring(i + 1, i + MAX_STRIP_TAG_LENGTH + 1);
+                                String tagLowerCase = tag.toLowerCase();
+                                boolean stripContent = false;
+                                for (String stripTag: STRIP_TAGS) {
+                                    if (tagLowerCase.startsWith(stripTag)) {
+                                        stripContent = true;
+                                        tag = tag.substring(0, stripTag.length());
+                                        break;
+                                    }
+                                }
+                                if (stripContent) {
+                                    // Look for the end of this tag
+                                    int endTagPosition = findTagEnd(text, tag, i);
+                                    if (endTagPosition < 0) {
+                                        sb.append(text.substring(i));
+                                        break;
+                                    } else {
+                                        sb.append(text.substring(i, endTagPosition - 1));
+                                        i = endTagPosition - 1;
+                                        chr = text.charAt(i);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                } else if (inTag && (chr == '>')) {
+                    inTag = false;
+                }
+
+                if (inTag) {
+                    sb.append(chr);
+                    continue;
+                }
+            }
+
+            // After all that, we've got some "body" text
+            char chrLowerCase = Character.toLowerCase(chr);
+            // Whether or not the current character should be appended to the output; we inhibit
+            // this while any search terms match
+            boolean appendNow = true;
+            // Look through search terms for matches
+            for (SearchTerm t: terms) {
+                if (chrLowerCase == t.mTermLowerCase.charAt(t.mMatchLength)) {
+                    if (t.mMatchLength++ == 0) {
+                        // New match start
+                        t.mMatchStart = i;
+                    }
+                    if (t.mMatchLength == t.mLength) {
+                        String matchText = text.substring(t.mMatchStart, t.mMatchStart + t.mLength);
+                        // Completed match; add highlight and reset term
+                        if (t.mMatchStart <= lastOut) {
+                            matchText = text.substring(lastOut + 1, i + 1);
+                        }
+                        /*else*/
+                        if (matchText.length() == 0) {} else
+                        if (html) {
+                            sb.append("<span style=\"background-color: " + HIGHLIGHT_COLOR_STRING +
+                                    "\">");
+                            sb.append(matchText);
+                            sb.append("</span>");
+                        } else {
+                            SpannableString highlightSpan = new SpannableString(matchText);
+                            highlightSpan.setSpan(new BackgroundColorSpan(HIGHLIGHT_COLOR_INT), 0,
+                                    highlightSpan.length(), Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
+                            sb.append(highlightSpan);
+                        }
+                        lastOut = t.mMatchStart + t.mLength - 1;
+                        t.mMatchLength = 0;
+                        t.mMatchStart = -1;
+                    }
+                    appendNow = false;
+                } else {
+                    if (t.mMatchStart >= 0) {
+                        // We're no longer matching; check for other matches in progress
+                        int leastOtherStart = -1;
+                        for (SearchTerm ot: terms) {
+                            // Save away the lowest match start for other search terms
+                            if ((ot != t) && (ot.mMatchStart >= 0) && ((leastOtherStart < 0) ||
+                                    (ot.mMatchStart <= leastOtherStart))) {
+                                leastOtherStart = ot.mMatchStart;
+                            }
+                        }
+                        int matchEnd = t.mMatchStart + t.mMatchLength;
+                        if (leastOtherStart < 0 || leastOtherStart > matchEnd) {
+                            // Append the whole thing
+                            if (t.mMatchStart > lastOut) {
+                                sb.append(text.substring(t.mMatchStart, matchEnd));
+                                lastOut = matchEnd;
+                            }
+                        } else if (leastOtherStart == t.mMatchStart) {
+                            // Ok to append the current char
+                        } else if (leastOtherStart < t.mMatchStart) {
+                            // We're already covered by another search term, so don't append
+                            appendNow = false;
+                        } else if (t.mMatchStart > lastOut) {
+                            // Append the piece of our term that's not already covered
+                            sb.append(text.substring(t.mMatchStart, leastOtherStart));
+                            lastOut = leastOtherStart;
+                        }
+                    }
+                    // Reset this term
+                    t.mMatchLength = 0;
+                    t.mMatchStart = -1;
+                }
+            }
+
+            if (appendNow) {
+                sb.append(chr);
+                lastOut = i;
+            }
+        }
+
+        return (CharSequence)sb;
+   }
 }
diff --git a/tests/src/com/android/email/provider/ProviderTests.java b/tests/src/com/android/email/provider/ProviderTests.java
index 048d840e2..4b91d7f80 100644
--- a/tests/src/com/android/email/provider/ProviderTests.java
+++ b/tests/src/com/android/email/provider/ProviderTests.java
@@ -16,7 +16,6 @@
 
 package com.android.email.provider;
 
-import com.android.emailcommon.mail.Snippet;
 import com.android.emailcommon.provider.EmailContent;
 import com.android.emailcommon.provider.EmailContent.Account;
 import com.android.emailcommon.provider.EmailContent.AccountColumns;
@@ -29,6 +28,7 @@ import com.android.emailcommon.provider.EmailContent.Mailbox;
 import com.android.emailcommon.provider.EmailContent.MailboxColumns;
 import com.android.emailcommon.provider.EmailContent.Message;
 import com.android.emailcommon.provider.EmailContent.MessageColumns;
+import com.android.emailcommon.utility.TextUtilities;
 import com.android.emailcommon.utility.Utility;
 
 import android.content.ContentResolver;
@@ -46,7 +46,6 @@ import android.test.ProviderTestCase2;
 
 import java.io.File;
 import java.io.IOException;
-import java.net.URISyntaxException;
 import java.util.ArrayList;
 
 /**
@@ -504,7 +503,8 @@ public class ProviderTests extends ProviderTestCase2<EmailProvider> {
         message.save(mMockContext);
         Message restoredMessage = Message.restoreMessageWithId(mMockContext, message.mId);
         // We should have the plain text as the snippet
-        assertEquals(restoredMessage.mSnippet, Snippet.fromPlainText(message.mText));
+        assertEquals(restoredMessage.mSnippet,
+                TextUtilities.makeSnippetFromPlainText(message.mText));
 
         // Start again
         message = ProviderTestUtils.setupMessage("message", account.mId, box.mId, false,
@@ -514,7 +514,8 @@ public class ProviderTests extends ProviderTestCase2<EmailProvider> {
         message.save(mMockContext);
         restoredMessage = Message.restoreMessageWithId(mMockContext, message.mId);
         // We should have the plain text as the snippet
-        assertEquals(restoredMessage.mSnippet, Snippet.fromHtmlText(message.mHtml));
+        assertEquals(restoredMessage.mSnippet,
+                TextUtilities.makeSnippetFromHtmlText(message.mHtml));
     }
 
     /**
diff --git a/tests/src/com/android/emailcommon/mail/SnippetTests.java b/tests/src/com/android/emailcommon/mail/SnippetTests.java
deleted file mode 100644
index 8cee45bd1..000000000
--- a/tests/src/com/android/emailcommon/mail/SnippetTests.java
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (C) 2010 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * This is a series of unit tests for snippet creation
- *
- * You can run this entire test case with:
- *   runtest -c com.android.email.SnippetTests email
- */
-package com.android.emailcommon.mail;
-
-import com.android.emailcommon.mail.Snippet;
-
-import android.test.AndroidTestCase;
-
-/**
- * Tests of Snippet
- *
- * You can run this entire test case with:
- *   runtest -c com.android.email.SnippetTests email
- */
-public class SnippetTests extends AndroidTestCase {
-
-    public void testPlainSnippet() {
-        // Test the simplest cases
-        assertEquals("", Snippet.fromPlainText(null));
-        assertEquals("", Snippet.fromPlainText(""));
-
-        // Test handling leading, trailing, and duplicated whitespace
-        // Just test common whitespace characters; we calls Character.isWhitespace() internally, so
-        // other whitespace should be fine as well
-        assertEquals("", Snippet.fromPlainText(" \n\r\t\r\t\n"));
-        char c = Snippet.NON_BREAKING_SPACE_CHARACTER;
-        assertEquals("foo", Snippet.fromPlainText(c + "\r\n\tfoo \n\t\r" + c));
-        assertEquals("foo bar", Snippet.fromPlainText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
-
-        // Handle duplicated - and =
-        assertEquals("Foo-Bar=Bletch", Snippet.fromPlainText("Foo-----Bar=======Bletch"));
-
-        // We shouldn't muck with HTML entities
-        assertEquals("&nbsp;&gt;", Snippet.fromPlainText("&nbsp;&gt;"));
-    }
-
-    public void testHtmlSnippet() {
-        // Test the simplest cases
-        assertEquals("", Snippet.fromHtmlText(null));
-        assertEquals("", Snippet.fromHtmlText(""));
-
-        // Test handling leading, trailing, and duplicated whitespace
-        // Just test common whitespace characters; we calls Character.isWhitespace() internally, so
-        // other whitespace should be fine as well
-        assertEquals("", Snippet.fromHtmlText(" \n\r\t\r\t\n"));
-        char c = Snippet.NON_BREAKING_SPACE_CHARACTER;
-        assertEquals("foo", Snippet.fromHtmlText(c + "\r\n\tfoo \n\t\r" + c));
-        assertEquals("foo bar", Snippet.fromHtmlText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
-
-        // Handle duplicated - and =
-        assertEquals("Foo-Bar=Bletch", Snippet.fromPlainText("Foo-----Bar=======Bletch"));
-
-        // We should catch HTML entities in these tests
-        assertEquals(">", Snippet.fromHtmlText("&nbsp;&gt;"));
-        assertEquals("&<> \"", Snippet.fromHtmlText("&amp;&lt;&gt;&nbsp;&quot;"));
-        // Test for decimal and hex entities
-        assertEquals("ABC", Snippet.fromHtmlText("&#65;&#66;&#67;"));
-        assertEquals("ABC", Snippet.fromHtmlText("&#x41;&#x42;&#x43;"));
-
-        // Test for stripping simple tags
-        assertEquals("Hi there", Snippet.fromHtmlText("<html>Hi there</html>"));
-        // TODO: Add tests here if/when we find problematic HTML
-    }
-
-    public void testStripHtmlEntityEdgeCases() {
-        int[] skipCount = new int[1];
-        // Bare & isn't an entity
-        char c = Snippet.stripHtmlEntity("&", 0, skipCount);
-        assertEquals(c, '&');
-        assertEquals(0, skipCount[0]);
-        // Also not legal
-        c = Snippet.stripHtmlEntity("&;", 0, skipCount);
-        assertEquals(c, '&');
-        assertEquals(0, skipCount[0]);
-        // This is an entity, but shouldn't be found
-        c = Snippet.stripHtmlEntity("&nosuch;", 0, skipCount);
-        assertEquals(c, '&');
-        assertEquals(0, skipCount[0]);
-        // This is too long for an entity, even though it starts like a valid one
-        c = Snippet.stripHtmlEntity("&nbspandmore;", 0, skipCount);
-        assertEquals(c, '&');
-        assertEquals(0, skipCount[0]);
-        // Illegal decimal entities
-        c = Snippet.stripHtmlEntity("&#ABC", 0, skipCount);
-        assertEquals(c, '&');
-        assertEquals(0, skipCount[0]);
-        c = Snippet.stripHtmlEntity("&#12B", 0, skipCount);
-        assertEquals(c, '&');
-        assertEquals(0, skipCount[0]);
-        // Illegal hex entities
-        c = Snippet.stripHtmlEntity("&#xABC", 0, skipCount);
-        assertEquals(c, '&');
-        assertEquals(0, skipCount[0]);
-        // Illegal hex entities
-        c = Snippet.stripHtmlEntity("&#x19G", 0, skipCount);
-        assertEquals(c, '&');
-        assertEquals(0, skipCount[0]);
-    }
-
-    public void testStripContent() {
-        assertEquals("Visible", Snippet.fromHtmlText(
-            "<html><style foo=\"bar\">Not</style>Visible</html>"));
-        assertEquals("Visible", Snippet.fromHtmlText(
-            "<html><STYLE foo=\"bar\">Not</STYLE>Visible</html>"));
-        assertEquals("IsVisible", Snippet.fromHtmlText(
-            "<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"));
-        assertEquals("Visible", Snippet.fromHtmlText(
-            "<html>Visible<style foo=\"bar\">Not"));
-        assertEquals("VisibleAgainVisible", Snippet.fromHtmlText(
-            "<html>Visible<style foo=\"bar\">Not</style>AgainVisible"));
-        assertEquals("VisibleAgainVisible", Snippet.fromHtmlText(
-            "<html>Visible<style foo=\"bar\"/>AgainVisible"));
-        assertEquals("VisibleAgainVisible", Snippet.fromHtmlText(
-            "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible"));
-    }
-
-    /**
-     * We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position
-     * for the tag named 'tag' and then check whether the calculated end position matches the known
-     * correct position.  HTML text not containing an ampersand should generate a calculated end of
-     * -1
-     * @param text the HTML text to test
-     */
-    private void findTagEnd(String text, String tag) {
-        int calculatedEnd = Snippet.findTagEnd(text , tag, 0);
-        int knownEnd = text.indexOf('@') + 2;
-        if (knownEnd == 1) {
-            // indexOf will return -1, so we'll get 1 as knownEnd
-            assertEquals(-1, calculatedEnd);
-        } else {
-            assertEquals(calculatedEnd, knownEnd);
-        }
-    }
-
-    public void testFindTagEnd() {
-        // Test with <tag ... />
-        findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag");
-        // Test with <tag ...> ... </tag>
-        findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag");
-        // Test with incomplete tag
-        findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag");
-        // Test with space at end of tag
-        findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag ");
-    }
-
-    // For debugging large HTML samples
-
-//    private String readLargeSnippet(String fn) {
-//        File file = mContext.getFileStreamPath(fn);
-//        StringBuffer sb = new StringBuffer();
-//        BufferedReader reader = null;
-//        try {
-//            String text;
-//            reader = new BufferedReader(new FileReader(file));
-//            while ((text = reader.readLine()) != null) {
-//                sb.append(text);
-//                sb.append(" ");
-//            }
-//        } catch (IOException e) {
-//        }
-//        return sb.toString();
-//    }
- }
diff --git a/tests/src/com/android/emailcommon/utility/TextUtilitiesTests.java b/tests/src/com/android/emailcommon/utility/TextUtilitiesTests.java
new file mode 100644
index 000000000..3e1bd9dbf
--- /dev/null
+++ b/tests/src/com/android/emailcommon/utility/TextUtilitiesTests.java
@@ -0,0 +1,298 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This is a series of unit tests for snippet creation and highlighting
+ *
+ * You can run this entire test case with:
+ *   runtest -c com.android.emailcommon.utility.TextUtilitiesTests email
+ */
+package com.android.emailcommon.utility;
+
+import android.test.AndroidTestCase;
+import android.text.SpannableStringBuilder;
+import android.text.style.BackgroundColorSpan;
+
+public class TextUtilitiesTests extends AndroidTestCase {
+
+    public void testPlainSnippet() {
+        // Test the simplest cases
+        assertEquals("", TextUtilities.makeSnippetFromPlainText(null));
+        assertEquals("", TextUtilities.makeSnippetFromPlainText(""));
+
+        // Test handling leading, trailing, and duplicated whitespace
+        // Just test common whitespace characters; we calls Character.isWhitespace() internally, so
+        // other whitespace should be fine as well
+        assertEquals("", TextUtilities.makeSnippetFromPlainText(" \n\r\t\r\t\n"));
+        char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER;
+        assertEquals("foo", TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \n\t\r" + c));
+        assertEquals("foo bar",
+                TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
+
+        // Handle duplicated - and =
+        assertEquals("Foo-Bar=Bletch",
+                TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch"));
+
+        // We shouldn't muck with HTML entities
+        assertEquals("&nbsp;&gt;", TextUtilities.makeSnippetFromPlainText("&nbsp;&gt;"));
+    }
+
+    public void testHtmlSnippet() {
+        // Test the simplest cases
+        assertEquals("", TextUtilities.makeSnippetFromHtmlText(null));
+        assertEquals("", TextUtilities.makeSnippetFromHtmlText(""));
+
+        // Test handling leading, trailing, and duplicated whitespace
+        // Just test common whitespace characters; we calls Character.isWhitespace() internally, so
+        // other whitespace should be fine as well
+        assertEquals("", TextUtilities.makeSnippetFromHtmlText(" \n\r\t\r\t\n"));
+        char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER;
+        assertEquals("foo", TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \n\t\r" + c));
+        assertEquals("foo bar",
+                TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
+
+        // Handle duplicated - and =
+        assertEquals("Foo-Bar=Bletch",
+                TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch"));
+
+        // We should catch HTML entities in these tests
+        assertEquals(">", TextUtilities.makeSnippetFromHtmlText("&nbsp;&gt;"));
+        assertEquals("&<> \"", TextUtilities.makeSnippetFromHtmlText("&amp;&lt;&gt;&nbsp;&quot;"));
+        // Test for decimal and hex entities
+        assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("&#65;&#66;&#67;"));
+        assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("&#x41;&#x42;&#x43;"));
+
+        // Test for stripping simple tags
+        assertEquals("Hi there", TextUtilities.makeSnippetFromHtmlText("<html>Hi there</html>"));
+        // TODO: Add tests here if/when we find problematic HTML
+    }
+
+    public void testStripHtmlEntityEdgeCases() {
+        int[] skipCount = new int[1];
+        // Bare & isn't an entity
+        char c = TextUtilities.stripHtmlEntity("&", 0, skipCount);
+        assertEquals(c, '&');
+        assertEquals(0, skipCount[0]);
+        // Also not legal
+        c = TextUtilities.stripHtmlEntity("&;", 0, skipCount);
+        assertEquals(c, '&');
+        assertEquals(0, skipCount[0]);
+        // This is an entity, but shouldn't be found
+        c = TextUtilities.stripHtmlEntity("&nosuch;", 0, skipCount);
+        assertEquals(c, '&');
+        assertEquals(0, skipCount[0]);
+        // This is too long for an entity, even though it starts like a valid one
+        c = TextUtilities.stripHtmlEntity("&nbspandmore;", 0, skipCount);
+        assertEquals(c, '&');
+        assertEquals(0, skipCount[0]);
+        // Illegal decimal entities
+        c = TextUtilities.stripHtmlEntity("&#ABC", 0, skipCount);
+        assertEquals(c, '&');
+        assertEquals(0, skipCount[0]);
+        c = TextUtilities.stripHtmlEntity("&#12B", 0, skipCount);
+        assertEquals(c, '&');
+        assertEquals(0, skipCount[0]);
+        // Illegal hex entities
+        c = TextUtilities.stripHtmlEntity("&#xABC", 0, skipCount);
+        assertEquals(c, '&');
+        assertEquals(0, skipCount[0]);
+        // Illegal hex entities
+        c = TextUtilities.stripHtmlEntity("&#x19G", 0, skipCount);
+        assertEquals(c, '&');
+        assertEquals(0, skipCount[0]);
+    }
+
+    public void testStripContent() {
+        assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
+            "<html><style foo=\"bar\">Not</style>Visible</html>"));
+        assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
+            "<html><STYLE foo=\"bar\">Not</STYLE>Visible</html>"));
+        assertEquals("IsVisible", TextUtilities.makeSnippetFromHtmlText(
+            "<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"));
+        assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
+            "<html>Visible<style foo=\"bar\">Not"));
+        assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
+            "<html>Visible<style foo=\"bar\">Not</style>AgainVisible"));
+        assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
+            "<html>Visible<style foo=\"bar\"/>AgainVisible"));
+        assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
+            "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible"));
+    }
+
+    /**
+     * We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position
+     * for the tag named 'tag' and then check whether the calculated end position matches the known
+     * correct position.  HTML text not containing an ampersand should generate a calculated end of
+     * -1
+     * @param text the HTML text to test
+     */
+    private void findTagEnd(String text, String tag) {
+        int calculatedEnd = TextUtilities.findTagEnd(text , tag, 0);
+        int knownEnd = text.indexOf('@') + 2;
+        if (knownEnd == 1) {
+            // indexOf will return -1, so we'll get 1 as knownEnd
+            assertEquals(-1, calculatedEnd);
+        } else {
+            assertEquals(calculatedEnd, knownEnd);
+        }
+    }
+
+    public void testFindTagEnd() {
+        // Test with <tag ... />
+        findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag");
+        // Test with <tag ...> ... </tag>
+        findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag");
+        // Test with incomplete tag
+        findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag");
+        // Test with space at end of tag
+        findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag ");
+    }
+
+    private void assertHighlightUnchanged(String str) {
+        assertEquals(str, TextUtilities.highlightTermsInHtml(str, null));
+    }
+
+    public void testHighlightNoTerm() {
+        // With no search terms, the html should be unchanged
+        assertHighlightUnchanged("<html><style foo=\"bar\">Not</style>Visible</html>");
+        assertHighlightUnchanged("<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>");
+        assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not");
+        assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not</style>AgainVisible");
+        assertHighlightUnchanged("<html>Visible<style foo=\"bar\"/>AgainVisible");
+        assertHighlightUnchanged(
+                "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible");
+    }
+
+    public void testHighlightSingleTermHtml() {
+        String str = "<html><style foo=\"bar\">Not</style>Visible</html>";
+        // Test that tags aren't highlighted
+        assertEquals(str, TextUtilities.highlightTermsInHtml(
+                "<html><style foo=\"bar\">Not</style>Visible</html>", "style"));
+        // Test that non-tags are
+        assertEquals("<html><style foo=\"bar\">Not</style><span " +
+                "style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING +
+                "\">Visi</span>ble</html>",
+                TextUtilities.highlightTermsInHtml(str, "Visi"));
+        assertEquals("<html>Visible<style foo=\"bar\">Not</style>A<span" +
+                " style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING +
+                "\">gain</span>Visible",
+                TextUtilities.highlightTermsInHtml(
+                        "<html>Visible<style foo=\"bar\">Not</style>AgainVisible", "gain"));
+    }
+
+    public void testHighlightSingleTermText() {
+        // Sprinkle text with a few HTML characters to make sure they're ignored
+        String text = "This< should be visibl>e";
+        // We should find this, because search terms are case insensitive
+        SpannableStringBuilder ssb =
+            (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Visi");
+        BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
+        assertEquals(1, spans.length);
+        BackgroundColorSpan span = spans[0];
+        assertEquals(text.indexOf("visi"), ssb.getSpanStart(span));
+        assertEquals(text.indexOf("bl>e"), ssb.getSpanEnd(span));
+        // Heh; this next test fails.. we use the search term!
+        assertEquals(text, ssb.toString());
+
+        // Multiple instances of the term
+        text = "The research word should be a search result";
+        ssb = (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Search");
+        spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
+        assertEquals(2, spans.length);
+        span = spans[0];
+        assertEquals(text.indexOf("search word"), ssb.getSpanStart(span));
+        assertEquals(text.indexOf(" word"), ssb.getSpanEnd(span));
+        span = spans[1];
+        assertEquals(text.indexOf("search result"), ssb.getSpanStart(span));
+        assertEquals(text.indexOf(" result"), ssb.getSpanEnd(span));
+        assertEquals(text, ssb.toString());
+    }
+
+    public void testHighlightTwoTermText() {
+        String text = "This should be visible";
+        // We should find this, because search terms are case insensitive
+        SpannableStringBuilder ssb =
+            (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "visi should");
+        BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
+        assertEquals(2, spans.length);
+        BackgroundColorSpan span = spans[0];
+        assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
+        assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span));
+        span = spans[1];
+        assertEquals(text.indexOf("visi"), ssb.getSpanStart(span));
+        assertEquals(text.indexOf("ble"), ssb.getSpanEnd(span));
+        assertEquals(text, ssb.toString());
+    }
+
+    public void testHighlightDuplicateTermText() {
+        String text = "This should be visible";
+        // We should find this, because search terms are case insensitive
+        SpannableStringBuilder ssb =
+            (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should should");
+        BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
+        assertEquals(1, spans.length);
+        BackgroundColorSpan span = spans[0];
+        assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
+        assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span));
+    }
+
+    public void testHighlightOverlapTermText() {
+        String text = "This shoulder is visible";
+        // We should find this, because search terms are case insensitive
+        SpannableStringBuilder ssb =
+            (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should ould");
+        BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
+        assertEquals(1, spans.length);
+        BackgroundColorSpan span = spans[0];
+        assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
+        assertEquals(text.indexOf("er is"), ssb.getSpanEnd(span));
+    }
+
+
+    public void testHighlightOverlapTermText2() {
+        String text = "The shoulders are visible";
+        // We should find this, because search terms are case insensitive
+        SpannableStringBuilder ssb =
+            (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "shoulder shoulders");
+        BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
+        assertEquals(2, spans.length);
+        BackgroundColorSpan span = spans[0];
+        assertEquals(text.indexOf("shoulder"), ssb.getSpanStart(span));
+        assertEquals(text.indexOf("s are visible"), ssb.getSpanEnd(span));
+        span = spans[1];
+        // Just the 's' should be caught in the 2nd span
+        assertEquals(text.indexOf("s are visible"), ssb.getSpanStart(span));
+        assertEquals(text.indexOf(" are visible"), ssb.getSpanEnd(span));
+        assertEquals(text, ssb.toString());
+    }
+    // For debugging large HTML samples
+
+//    private String readLargeSnippet(String fn) {
+//        File file = mContext.getFileStreamPath(fn);
+//        StringBuffer sb = new StringBuffer();
+//        BufferedReader reader = null;
+//        try {
+//            String text;
+//            reader = new BufferedReader(new FileReader(file));
+//            while ((text = reader.readLine()) != null) {
+//                sb.append(text);
+//                sb.append(" ");
+//            }
+//        } catch (IOException e) {
+//        }
+//        return sb.toString();
+//    }
+ }