Merge "Highlight utility for HTML and text"
This commit is contained in:
commit
566c4f9117
|
@ -16,7 +16,7 @@
|
|||
|
||||
package com.android.emailcommon.provider;
|
||||
|
||||
import com.android.emailcommon.mail.Snippet;
|
||||
import com.android.emailcommon.utility.TextUtilities;
|
||||
import com.android.emailcommon.utility.Utility;
|
||||
|
||||
import android.content.ContentProviderOperation;
|
||||
|
@ -839,9 +839,9 @@ public abstract class EmailContent {
|
|||
ContentProviderOperation.Builder b = ContentProviderOperation.newInsert(mBaseUri);
|
||||
// Generate the snippet here, before we create the CPO for Message
|
||||
if (mText != null) {
|
||||
mSnippet = Snippet.fromPlainText(mText);
|
||||
mSnippet = TextUtilities.makeSnippetFromPlainText(mText);
|
||||
} else if (mHtml != null) {
|
||||
mSnippet = Snippet.fromHtmlText(mHtml);
|
||||
mSnippet = TextUtilities.makeSnippetFromHtmlText(mHtml);
|
||||
}
|
||||
ops.add(b.withValues(toContentValues()).build());
|
||||
|
||||
|
|
|
@ -20,7 +20,6 @@ import com.android.emailcommon.internet.MimeHeader;
|
|||
import com.android.emailcommon.internet.MimeUtility;
|
||||
import com.android.emailcommon.mail.MessagingException;
|
||||
import com.android.emailcommon.mail.Part;
|
||||
import com.android.emailcommon.mail.Snippet;
|
||||
import com.android.emailcommon.provider.EmailContent;
|
||||
|
||||
import android.text.TextUtils;
|
||||
|
@ -116,13 +115,13 @@ public class ConversionUtilities {
|
|||
if (!TextUtils.isEmpty(sbText)) {
|
||||
String text = sbText.toString();
|
||||
body.mTextContent = text;
|
||||
localMessage.mSnippet = Snippet.fromPlainText(text);
|
||||
localMessage.mSnippet = TextUtilities.makeSnippetFromPlainText(text);
|
||||
}
|
||||
if (!TextUtils.isEmpty(sbHtml)) {
|
||||
String text = sbHtml.toString();
|
||||
body.mHtmlContent = text;
|
||||
if (localMessage.mSnippet == null) {
|
||||
localMessage.mSnippet = Snippet.fromHtmlText(text);
|
||||
localMessage.mSnippet = TextUtilities.makeSnippetFromHtmlText(text);
|
||||
}
|
||||
}
|
||||
if (sbHtmlReply != null && sbHtmlReply.length() != 0) {
|
||||
|
|
|
@ -14,38 +14,39 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.emailcommon.mail;
|
||||
package com.android.emailcommon.utility;
|
||||
|
||||
import android.graphics.Color;
|
||||
import android.text.Spannable;
|
||||
import android.text.SpannableString;
|
||||
import android.text.SpannableStringBuilder;
|
||||
import android.text.TextUtils;
|
||||
import android.text.style.BackgroundColorSpan;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
public class TextUtilities {
|
||||
// Highlight color is yellow, as in other apps.
|
||||
// TODO Push for this to be a global (style-related?) constant
|
||||
private static final int HIGHLIGHT_COLOR_INT = Color.YELLOW;
|
||||
/*package*/ static final String HIGHLIGHT_COLOR_STRING =
|
||||
'#' + Integer.toHexString(HIGHLIGHT_COLOR_INT);
|
||||
|
||||
/**
|
||||
* Class to generate a short 'snippet' from either plain text or html text
|
||||
*
|
||||
* If the sync protocol can get plain text, that's great, but we'll still strip out extraneous
|
||||
* whitespace. If it's HTML, we'll 1) strip out tags, 2) turn entities into the appropriate
|
||||
* characters, and 3) strip out extraneous whitespace, all in one pass
|
||||
*
|
||||
* Why not use an existing class? The best answer is performance; yet another answer is
|
||||
* correctness (e.g. Html.textFromHtml simply doesn't generate well-stripped text). But performance
|
||||
* is key; we frequently sync text that is 10K or (much) longer, yet we really only care about a
|
||||
* small amount of text for the snippet. So it's critically important that we just stop when we've
|
||||
* gotten enough; existing methods that exist will go through the entire incoming string, at great
|
||||
* (and useless) expense.
|
||||
*/
|
||||
public class Snippet {
|
||||
// This is how many chars we'll allow in a snippet
|
||||
private static final int MAX_PLAIN_TEXT_SCAN_LENGTH = 200;
|
||||
private static final int MAX_SNIPPET_LENGTH = 200;
|
||||
// For some reason, isWhitespace() returns false with the following...
|
||||
/*package*/ static final char NON_BREAKING_SPACE_CHARACTER = (char)160;
|
||||
|
||||
// Tags whose content must be stripped as well
|
||||
static final String[] STRIP_TAGS =
|
||||
new String[] {"title", "script", "style", "applet", "head"};
|
||||
// The number of characters we peel off for testing against STRIP_TAGS
|
||||
static final int STRIP_TAG_LENGTH = 6;
|
||||
// The number of characters we peel off for testing against STRIP_TAGS; this should be the
|
||||
// maximum size of the strings in STRIP_TAGS
|
||||
static final int MAX_STRIP_TAG_LENGTH = 6;
|
||||
|
||||
static final Map<String, Character> ESCAPE_STRINGS;
|
||||
static {
|
||||
|
@ -307,12 +308,27 @@ public class Snippet {
|
|||
ESCAPE_STRINGS.put("&euro", '\u20AC');
|
||||
}
|
||||
|
||||
public static String fromHtmlText(String text) {
|
||||
return fromText(text, true);
|
||||
/**
|
||||
* Code to generate a short 'snippet' from either plain text or html text
|
||||
*
|
||||
* If the sync protocol can get plain text, that's great, but we'll still strip out extraneous
|
||||
* whitespace. If it's HTML, we'll 1) strip out tags, 2) turn entities into the appropriate
|
||||
* characters, and 3) strip out extraneous whitespace, all in one pass
|
||||
*
|
||||
* Why not use an existing class? The best answer is performance; yet another answer is
|
||||
* correctness (e.g. Html.textFromHtml simply doesn't generate well-stripped text). But
|
||||
* performance is key; we frequently sync text that is 10K or (much) longer, yet we really only
|
||||
* care about a small amount of text for the snippet. So it's critically important that we just
|
||||
* stop when we've gotten enough; existing methods that exist will go through the entire
|
||||
* incoming string, at great (and useless, in this case) expense.
|
||||
*/
|
||||
|
||||
public static String makeSnippetFromHtmlText(String text) {
|
||||
return makeSnippetFromText(text, true);
|
||||
}
|
||||
|
||||
public static String fromPlainText(String text) {
|
||||
return fromText(text, false);
|
||||
public static String makeSnippetFromPlainText(String text) {
|
||||
return makeSnippetFromText(text, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -342,13 +358,13 @@ public class Snippet {
|
|||
return htmlText.indexOf("/" + tag, startPos);
|
||||
}
|
||||
|
||||
public static String fromText(String text, boolean stripHtml) {
|
||||
public static String makeSnippetFromText(String text, boolean stripHtml) {
|
||||
// Handle null and empty string
|
||||
if (TextUtils.isEmpty(text)) return "";
|
||||
|
||||
final int length = text.length();
|
||||
// Use char[] instead of StringBuilder purely for performance; fewer method calls, etc.
|
||||
char[] buffer = new char[MAX_PLAIN_TEXT_SCAN_LENGTH];
|
||||
char[] buffer = new char[MAX_SNIPPET_LENGTH];
|
||||
// skipCount is an array of a single int; that int is set inside stripHtmlEntity and is
|
||||
// used to determine how many characters can be "skipped" due to the transformation of the
|
||||
// entity to a single character. When Java allows multiple return values, we can make this
|
||||
|
@ -361,7 +377,7 @@ public class Snippet {
|
|||
boolean inTag = false;
|
||||
|
||||
// Walk through the text until we're done with the input OR we've got a large enough snippet
|
||||
for (int i = 0; i < length && bufferCount < MAX_PLAIN_TEXT_SCAN_LENGTH; i++) {
|
||||
for (int i = 0; i < length && bufferCount < MAX_SNIPPET_LENGTH; i++) {
|
||||
char c = text.charAt(i);
|
||||
if (stripHtml && !inTag && (c == '<')) {
|
||||
// Find tags to strip; they will begin with <! or !- or </ or <letter
|
||||
|
@ -370,8 +386,8 @@ public class Snippet {
|
|||
if (peek == '!' || peek == '-' || peek == '/' || Character.isLetter(peek)) {
|
||||
inTag = true;
|
||||
// Strip content of title, script, style and applet tags
|
||||
if (i < (length - (STRIP_TAG_LENGTH + 2))) {
|
||||
String tag = text.substring(i + 1, i + STRIP_TAG_LENGTH + 1);
|
||||
if (i < (length - (MAX_STRIP_TAG_LENGTH + 2))) {
|
||||
String tag = text.substring(i + 1, i + MAX_STRIP_TAG_LENGTH + 1);
|
||||
String tagLowerCase = tag.toLowerCase();
|
||||
boolean stripContent = false;
|
||||
for (String stripTag: STRIP_TAGS) {
|
||||
|
@ -484,4 +500,214 @@ public class Snippet {
|
|||
return '&';
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a string of HTML text and a query containing any number of search terms, returns
|
||||
* an HTML string in which those search terms are highlighted (intended for use in a WebView)
|
||||
*
|
||||
* @param text the HTML text to process
|
||||
* @param query the search terms
|
||||
* @return HTML text with the search terms highlighted
|
||||
*/
|
||||
public static String highlightTermsInHtml(String text, String query) {
|
||||
try {
|
||||
return highlightTerms(text, query, true).toString();
|
||||
} catch (IOException e) {
|
||||
// Can't happen, but we must catch this
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a string of plain text and a query containing any number of search terms, returns
|
||||
* a CharSequence in which those search terms are highlighted (intended for use in a TextView)
|
||||
*
|
||||
* @param text the text to process
|
||||
* @param query the search terms
|
||||
* @return a CharSequence with the search terms highlighted
|
||||
*/
|
||||
public static CharSequence highlightTermsInText(String text, String query) {
|
||||
try {
|
||||
return highlightTerms(text, query, false);
|
||||
} catch (IOException e) {
|
||||
// Can't happen, but we must catch this
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
static class SearchTerm {
|
||||
final String mTerm;
|
||||
final String mTermLowerCase;
|
||||
final int mLength;
|
||||
int mMatchLength = 0;
|
||||
int mMatchStart = -1;
|
||||
|
||||
SearchTerm(String term, boolean html) {
|
||||
mTerm = term;
|
||||
mTermLowerCase = term.toLowerCase();
|
||||
mLength = term.length();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a version of the incoming text in which all search terms in a query are highlighted.
|
||||
* If the input is HTML, we return a StringBuilder with additional markup as required
|
||||
* If the input is text, we return a SpannableStringBuilder with additional spans as required
|
||||
*
|
||||
* @param text the text to be processed
|
||||
* @param query the query, which can contain multiple terms separated by whitespace
|
||||
* @param html whether or not the text to be processed is HTML
|
||||
* @return highlighted text
|
||||
*
|
||||
* @throws IOException as Appendable requires this
|
||||
*/
|
||||
public static CharSequence highlightTerms(String text, String query, boolean html)
|
||||
throws IOException {
|
||||
// Handle null and empty string
|
||||
if (TextUtils.isEmpty(text)) return "";
|
||||
final int length = text.length();
|
||||
|
||||
// Break up the query into search terms
|
||||
ArrayList<SearchTerm> terms = new ArrayList<SearchTerm>();
|
||||
if (query != null) {
|
||||
StringTokenizer st = new StringTokenizer(query);
|
||||
while (st.hasMoreTokens()) {
|
||||
terms.add(new SearchTerm(st.nextToken(), html));
|
||||
}
|
||||
}
|
||||
|
||||
// Our appendable depends on whether we're building HTML text (for webview) or spannable
|
||||
// text (for UI)
|
||||
final Appendable sb = html ? new StringBuilder() : new SpannableStringBuilder();
|
||||
// Indicates whether we're in the middle of an HTML tag
|
||||
boolean inTag = false;
|
||||
// The position of the last input character copied to output
|
||||
int lastOut = -1;
|
||||
|
||||
// Walk through the text until we're done with the input
|
||||
// Just copy any HTML tags directly into the output; search for terms in the remaining text
|
||||
for (int i = 0; i < length; i++) {
|
||||
char chr = text.charAt(i);
|
||||
if (html) {
|
||||
if (!inTag && (chr == '<')) {
|
||||
// Find tags; they will begin with <! or !- or </ or <letter
|
||||
if (i < (length - 1)) {
|
||||
char peek = text.charAt(i + 1);
|
||||
if (peek == '!' || peek == '-' || peek == '/' || Character.isLetter(peek)) {
|
||||
inTag = true;
|
||||
// Skip content of title, script, style and applet tags
|
||||
if (i < (length - (MAX_STRIP_TAG_LENGTH + 2))) {
|
||||
String tag = text.substring(i + 1, i + MAX_STRIP_TAG_LENGTH + 1);
|
||||
String tagLowerCase = tag.toLowerCase();
|
||||
boolean stripContent = false;
|
||||
for (String stripTag: STRIP_TAGS) {
|
||||
if (tagLowerCase.startsWith(stripTag)) {
|
||||
stripContent = true;
|
||||
tag = tag.substring(0, stripTag.length());
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (stripContent) {
|
||||
// Look for the end of this tag
|
||||
int endTagPosition = findTagEnd(text, tag, i);
|
||||
if (endTagPosition < 0) {
|
||||
sb.append(text.substring(i));
|
||||
break;
|
||||
} else {
|
||||
sb.append(text.substring(i, endTagPosition - 1));
|
||||
i = endTagPosition - 1;
|
||||
chr = text.charAt(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (inTag && (chr == '>')) {
|
||||
inTag = false;
|
||||
}
|
||||
|
||||
if (inTag) {
|
||||
sb.append(chr);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// After all that, we've got some "body" text
|
||||
char chrLowerCase = Character.toLowerCase(chr);
|
||||
// Whether or not the current character should be appended to the output; we inhibit
|
||||
// this while any search terms match
|
||||
boolean appendNow = true;
|
||||
// Look through search terms for matches
|
||||
for (SearchTerm t: terms) {
|
||||
if (chrLowerCase == t.mTermLowerCase.charAt(t.mMatchLength)) {
|
||||
if (t.mMatchLength++ == 0) {
|
||||
// New match start
|
||||
t.mMatchStart = i;
|
||||
}
|
||||
if (t.mMatchLength == t.mLength) {
|
||||
String matchText = text.substring(t.mMatchStart, t.mMatchStart + t.mLength);
|
||||
// Completed match; add highlight and reset term
|
||||
if (t.mMatchStart <= lastOut) {
|
||||
matchText = text.substring(lastOut + 1, i + 1);
|
||||
}
|
||||
/*else*/
|
||||
if (matchText.length() == 0) {} else
|
||||
if (html) {
|
||||
sb.append("<span style=\"background-color: " + HIGHLIGHT_COLOR_STRING +
|
||||
"\">");
|
||||
sb.append(matchText);
|
||||
sb.append("</span>");
|
||||
} else {
|
||||
SpannableString highlightSpan = new SpannableString(matchText);
|
||||
highlightSpan.setSpan(new BackgroundColorSpan(HIGHLIGHT_COLOR_INT), 0,
|
||||
highlightSpan.length(), Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
|
||||
sb.append(highlightSpan);
|
||||
}
|
||||
lastOut = t.mMatchStart + t.mLength - 1;
|
||||
t.mMatchLength = 0;
|
||||
t.mMatchStart = -1;
|
||||
}
|
||||
appendNow = false;
|
||||
} else {
|
||||
if (t.mMatchStart >= 0) {
|
||||
// We're no longer matching; check for other matches in progress
|
||||
int leastOtherStart = -1;
|
||||
for (SearchTerm ot: terms) {
|
||||
// Save away the lowest match start for other search terms
|
||||
if ((ot != t) && (ot.mMatchStart >= 0) && ((leastOtherStart < 0) ||
|
||||
(ot.mMatchStart <= leastOtherStart))) {
|
||||
leastOtherStart = ot.mMatchStart;
|
||||
}
|
||||
}
|
||||
int matchEnd = t.mMatchStart + t.mMatchLength;
|
||||
if (leastOtherStart < 0 || leastOtherStart > matchEnd) {
|
||||
// Append the whole thing
|
||||
if (t.mMatchStart > lastOut) {
|
||||
sb.append(text.substring(t.mMatchStart, matchEnd));
|
||||
lastOut = matchEnd;
|
||||
}
|
||||
} else if (leastOtherStart == t.mMatchStart) {
|
||||
// Ok to append the current char
|
||||
} else if (leastOtherStart < t.mMatchStart) {
|
||||
// We're already covered by another search term, so don't append
|
||||
appendNow = false;
|
||||
} else if (t.mMatchStart > lastOut) {
|
||||
// Append the piece of our term that's not already covered
|
||||
sb.append(text.substring(t.mMatchStart, leastOtherStart));
|
||||
lastOut = leastOtherStart;
|
||||
}
|
||||
}
|
||||
// Reset this term
|
||||
t.mMatchLength = 0;
|
||||
t.mMatchStart = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (appendNow) {
|
||||
sb.append(chr);
|
||||
lastOut = i;
|
||||
}
|
||||
}
|
||||
|
||||
return (CharSequence)sb;
|
||||
}
|
||||
}
|
|
@ -16,7 +16,6 @@
|
|||
|
||||
package com.android.email.provider;
|
||||
|
||||
import com.android.emailcommon.mail.Snippet;
|
||||
import com.android.emailcommon.provider.EmailContent;
|
||||
import com.android.emailcommon.provider.EmailContent.Account;
|
||||
import com.android.emailcommon.provider.EmailContent.AccountColumns;
|
||||
|
@ -29,6 +28,7 @@ import com.android.emailcommon.provider.EmailContent.Mailbox;
|
|||
import com.android.emailcommon.provider.EmailContent.MailboxColumns;
|
||||
import com.android.emailcommon.provider.EmailContent.Message;
|
||||
import com.android.emailcommon.provider.EmailContent.MessageColumns;
|
||||
import com.android.emailcommon.utility.TextUtilities;
|
||||
import com.android.emailcommon.utility.Utility;
|
||||
|
||||
import android.content.ContentResolver;
|
||||
|
@ -46,7 +46,6 @@ import android.test.ProviderTestCase2;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
|
@ -504,7 +503,8 @@ public class ProviderTests extends ProviderTestCase2<EmailProvider> {
|
|||
message.save(mMockContext);
|
||||
Message restoredMessage = Message.restoreMessageWithId(mMockContext, message.mId);
|
||||
// We should have the plain text as the snippet
|
||||
assertEquals(restoredMessage.mSnippet, Snippet.fromPlainText(message.mText));
|
||||
assertEquals(restoredMessage.mSnippet,
|
||||
TextUtilities.makeSnippetFromPlainText(message.mText));
|
||||
|
||||
// Start again
|
||||
message = ProviderTestUtils.setupMessage("message", account.mId, box.mId, false,
|
||||
|
@ -514,7 +514,8 @@ public class ProviderTests extends ProviderTestCase2<EmailProvider> {
|
|||
message.save(mMockContext);
|
||||
restoredMessage = Message.restoreMessageWithId(mMockContext, message.mId);
|
||||
// We should have the plain text as the snippet
|
||||
assertEquals(restoredMessage.mSnippet, Snippet.fromHtmlText(message.mHtml));
|
||||
assertEquals(restoredMessage.mSnippet,
|
||||
TextUtilities.makeSnippetFromHtmlText(message.mHtml));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,183 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2010 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This is a series of unit tests for snippet creation
|
||||
*
|
||||
* You can run this entire test case with:
|
||||
* runtest -c com.android.email.SnippetTests email
|
||||
*/
|
||||
package com.android.emailcommon.mail;
|
||||
|
||||
import com.android.emailcommon.mail.Snippet;
|
||||
|
||||
import android.test.AndroidTestCase;
|
||||
|
||||
/**
|
||||
* Tests of Snippet
|
||||
*
|
||||
* You can run this entire test case with:
|
||||
* runtest -c com.android.email.SnippetTests email
|
||||
*/
|
||||
public class SnippetTests extends AndroidTestCase {
|
||||
|
||||
public void testPlainSnippet() {
|
||||
// Test the simplest cases
|
||||
assertEquals("", Snippet.fromPlainText(null));
|
||||
assertEquals("", Snippet.fromPlainText(""));
|
||||
|
||||
// Test handling leading, trailing, and duplicated whitespace
|
||||
// Just test common whitespace characters; we calls Character.isWhitespace() internally, so
|
||||
// other whitespace should be fine as well
|
||||
assertEquals("", Snippet.fromPlainText(" \n\r\t\r\t\n"));
|
||||
char c = Snippet.NON_BREAKING_SPACE_CHARACTER;
|
||||
assertEquals("foo", Snippet.fromPlainText(c + "\r\n\tfoo \n\t\r" + c));
|
||||
assertEquals("foo bar", Snippet.fromPlainText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
|
||||
|
||||
// Handle duplicated - and =
|
||||
assertEquals("Foo-Bar=Bletch", Snippet.fromPlainText("Foo-----Bar=======Bletch"));
|
||||
|
||||
// We shouldn't muck with HTML entities
|
||||
assertEquals(" >", Snippet.fromPlainText(" >"));
|
||||
}
|
||||
|
||||
public void testHtmlSnippet() {
|
||||
// Test the simplest cases
|
||||
assertEquals("", Snippet.fromHtmlText(null));
|
||||
assertEquals("", Snippet.fromHtmlText(""));
|
||||
|
||||
// Test handling leading, trailing, and duplicated whitespace
|
||||
// Just test common whitespace characters; we calls Character.isWhitespace() internally, so
|
||||
// other whitespace should be fine as well
|
||||
assertEquals("", Snippet.fromHtmlText(" \n\r\t\r\t\n"));
|
||||
char c = Snippet.NON_BREAKING_SPACE_CHARACTER;
|
||||
assertEquals("foo", Snippet.fromHtmlText(c + "\r\n\tfoo \n\t\r" + c));
|
||||
assertEquals("foo bar", Snippet.fromHtmlText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
|
||||
|
||||
// Handle duplicated - and =
|
||||
assertEquals("Foo-Bar=Bletch", Snippet.fromPlainText("Foo-----Bar=======Bletch"));
|
||||
|
||||
// We should catch HTML entities in these tests
|
||||
assertEquals(">", Snippet.fromHtmlText(" >"));
|
||||
assertEquals("&<> \"", Snippet.fromHtmlText("&<> ""));
|
||||
// Test for decimal and hex entities
|
||||
assertEquals("ABC", Snippet.fromHtmlText("ABC"));
|
||||
assertEquals("ABC", Snippet.fromHtmlText("ABC"));
|
||||
|
||||
// Test for stripping simple tags
|
||||
assertEquals("Hi there", Snippet.fromHtmlText("<html>Hi there</html>"));
|
||||
// TODO: Add tests here if/when we find problematic HTML
|
||||
}
|
||||
|
||||
public void testStripHtmlEntityEdgeCases() {
|
||||
int[] skipCount = new int[1];
|
||||
// Bare & isn't an entity
|
||||
char c = Snippet.stripHtmlEntity("&", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
// Also not legal
|
||||
c = Snippet.stripHtmlEntity("&;", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
// This is an entity, but shouldn't be found
|
||||
c = Snippet.stripHtmlEntity("&nosuch;", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
// This is too long for an entity, even though it starts like a valid one
|
||||
c = Snippet.stripHtmlEntity(" andmore;", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
// Illegal decimal entities
|
||||
c = Snippet.stripHtmlEntity("&#ABC", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
c = Snippet.stripHtmlEntity("B", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
// Illegal hex entities
|
||||
c = Snippet.stripHtmlEntity("઼", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
// Illegal hex entities
|
||||
c = Snippet.stripHtmlEntity("G", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
}
|
||||
|
||||
public void testStripContent() {
|
||||
assertEquals("Visible", Snippet.fromHtmlText(
|
||||
"<html><style foo=\"bar\">Not</style>Visible</html>"));
|
||||
assertEquals("Visible", Snippet.fromHtmlText(
|
||||
"<html><STYLE foo=\"bar\">Not</STYLE>Visible</html>"));
|
||||
assertEquals("IsVisible", Snippet.fromHtmlText(
|
||||
"<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"));
|
||||
assertEquals("Visible", Snippet.fromHtmlText(
|
||||
"<html>Visible<style foo=\"bar\">Not"));
|
||||
assertEquals("VisibleAgainVisible", Snippet.fromHtmlText(
|
||||
"<html>Visible<style foo=\"bar\">Not</style>AgainVisible"));
|
||||
assertEquals("VisibleAgainVisible", Snippet.fromHtmlText(
|
||||
"<html>Visible<style foo=\"bar\"/>AgainVisible"));
|
||||
assertEquals("VisibleAgainVisible", Snippet.fromHtmlText(
|
||||
"<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible"));
|
||||
}
|
||||
|
||||
/**
|
||||
* We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position
|
||||
* for the tag named 'tag' and then check whether the calculated end position matches the known
|
||||
* correct position. HTML text not containing an ampersand should generate a calculated end of
|
||||
* -1
|
||||
* @param text the HTML text to test
|
||||
*/
|
||||
private void findTagEnd(String text, String tag) {
|
||||
int calculatedEnd = Snippet.findTagEnd(text , tag, 0);
|
||||
int knownEnd = text.indexOf('@') + 2;
|
||||
if (knownEnd == 1) {
|
||||
// indexOf will return -1, so we'll get 1 as knownEnd
|
||||
assertEquals(-1, calculatedEnd);
|
||||
} else {
|
||||
assertEquals(calculatedEnd, knownEnd);
|
||||
}
|
||||
}
|
||||
|
||||
public void testFindTagEnd() {
|
||||
// Test with <tag ... />
|
||||
findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag");
|
||||
// Test with <tag ...> ... </tag>
|
||||
findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag");
|
||||
// Test with incomplete tag
|
||||
findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag");
|
||||
// Test with space at end of tag
|
||||
findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag ");
|
||||
}
|
||||
|
||||
// For debugging large HTML samples
|
||||
|
||||
// private String readLargeSnippet(String fn) {
|
||||
// File file = mContext.getFileStreamPath(fn);
|
||||
// StringBuffer sb = new StringBuffer();
|
||||
// BufferedReader reader = null;
|
||||
// try {
|
||||
// String text;
|
||||
// reader = new BufferedReader(new FileReader(file));
|
||||
// while ((text = reader.readLine()) != null) {
|
||||
// sb.append(text);
|
||||
// sb.append(" ");
|
||||
// }
|
||||
// } catch (IOException e) {
|
||||
// }
|
||||
// return sb.toString();
|
||||
// }
|
||||
}
|
|
@ -0,0 +1,298 @@
|
|||
/*
|
||||
* Copyright (C) 2010 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This is a series of unit tests for snippet creation and highlighting
|
||||
*
|
||||
* You can run this entire test case with:
|
||||
* runtest -c com.android.emailcommon.utility.TextUtilitiesTests email
|
||||
*/
|
||||
package com.android.emailcommon.utility;
|
||||
|
||||
import android.test.AndroidTestCase;
|
||||
import android.text.SpannableStringBuilder;
|
||||
import android.text.style.BackgroundColorSpan;
|
||||
|
||||
public class TextUtilitiesTests extends AndroidTestCase {
|
||||
|
||||
public void testPlainSnippet() {
|
||||
// Test the simplest cases
|
||||
assertEquals("", TextUtilities.makeSnippetFromPlainText(null));
|
||||
assertEquals("", TextUtilities.makeSnippetFromPlainText(""));
|
||||
|
||||
// Test handling leading, trailing, and duplicated whitespace
|
||||
// Just test common whitespace characters; we calls Character.isWhitespace() internally, so
|
||||
// other whitespace should be fine as well
|
||||
assertEquals("", TextUtilities.makeSnippetFromPlainText(" \n\r\t\r\t\n"));
|
||||
char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER;
|
||||
assertEquals("foo", TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \n\t\r" + c));
|
||||
assertEquals("foo bar",
|
||||
TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
|
||||
|
||||
// Handle duplicated - and =
|
||||
assertEquals("Foo-Bar=Bletch",
|
||||
TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch"));
|
||||
|
||||
// We shouldn't muck with HTML entities
|
||||
assertEquals(" >", TextUtilities.makeSnippetFromPlainText(" >"));
|
||||
}
|
||||
|
||||
public void testHtmlSnippet() {
|
||||
// Test the simplest cases
|
||||
assertEquals("", TextUtilities.makeSnippetFromHtmlText(null));
|
||||
assertEquals("", TextUtilities.makeSnippetFromHtmlText(""));
|
||||
|
||||
// Test handling leading, trailing, and duplicated whitespace
|
||||
// Just test common whitespace characters; we calls Character.isWhitespace() internally, so
|
||||
// other whitespace should be fine as well
|
||||
assertEquals("", TextUtilities.makeSnippetFromHtmlText(" \n\r\t\r\t\n"));
|
||||
char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER;
|
||||
assertEquals("foo", TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \n\t\r" + c));
|
||||
assertEquals("foo bar",
|
||||
TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
|
||||
|
||||
// Handle duplicated - and =
|
||||
assertEquals("Foo-Bar=Bletch",
|
||||
TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch"));
|
||||
|
||||
// We should catch HTML entities in these tests
|
||||
assertEquals(">", TextUtilities.makeSnippetFromHtmlText(" >"));
|
||||
assertEquals("&<> \"", TextUtilities.makeSnippetFromHtmlText("&<> ""));
|
||||
// Test for decimal and hex entities
|
||||
assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("ABC"));
|
||||
assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("ABC"));
|
||||
|
||||
// Test for stripping simple tags
|
||||
assertEquals("Hi there", TextUtilities.makeSnippetFromHtmlText("<html>Hi there</html>"));
|
||||
// TODO: Add tests here if/when we find problematic HTML
|
||||
}
|
||||
|
||||
public void testStripHtmlEntityEdgeCases() {
|
||||
int[] skipCount = new int[1];
|
||||
// Bare & isn't an entity
|
||||
char c = TextUtilities.stripHtmlEntity("&", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
// Also not legal
|
||||
c = TextUtilities.stripHtmlEntity("&;", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
// This is an entity, but shouldn't be found
|
||||
c = TextUtilities.stripHtmlEntity("&nosuch;", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
// This is too long for an entity, even though it starts like a valid one
|
||||
c = TextUtilities.stripHtmlEntity(" andmore;", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
// Illegal decimal entities
|
||||
c = TextUtilities.stripHtmlEntity("&#ABC", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
c = TextUtilities.stripHtmlEntity("B", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
// Illegal hex entities
|
||||
c = TextUtilities.stripHtmlEntity("઼", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
// Illegal hex entities
|
||||
c = TextUtilities.stripHtmlEntity("G", 0, skipCount);
|
||||
assertEquals(c, '&');
|
||||
assertEquals(0, skipCount[0]);
|
||||
}
|
||||
|
||||
public void testStripContent() {
|
||||
assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
|
||||
"<html><style foo=\"bar\">Not</style>Visible</html>"));
|
||||
assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
|
||||
"<html><STYLE foo=\"bar\">Not</STYLE>Visible</html>"));
|
||||
assertEquals("IsVisible", TextUtilities.makeSnippetFromHtmlText(
|
||||
"<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"));
|
||||
assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
|
||||
"<html>Visible<style foo=\"bar\">Not"));
|
||||
assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
|
||||
"<html>Visible<style foo=\"bar\">Not</style>AgainVisible"));
|
||||
assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
|
||||
"<html>Visible<style foo=\"bar\"/>AgainVisible"));
|
||||
assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
|
||||
"<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible"));
|
||||
}
|
||||
|
||||
/**
|
||||
* We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position
|
||||
* for the tag named 'tag' and then check whether the calculated end position matches the known
|
||||
* correct position. HTML text not containing an ampersand should generate a calculated end of
|
||||
* -1
|
||||
* @param text the HTML text to test
|
||||
*/
|
||||
private void findTagEnd(String text, String tag) {
|
||||
int calculatedEnd = TextUtilities.findTagEnd(text , tag, 0);
|
||||
int knownEnd = text.indexOf('@') + 2;
|
||||
if (knownEnd == 1) {
|
||||
// indexOf will return -1, so we'll get 1 as knownEnd
|
||||
assertEquals(-1, calculatedEnd);
|
||||
} else {
|
||||
assertEquals(calculatedEnd, knownEnd);
|
||||
}
|
||||
}
|
||||
|
||||
public void testFindTagEnd() {
|
||||
// Test with <tag ... />
|
||||
findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag");
|
||||
// Test with <tag ...> ... </tag>
|
||||
findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag");
|
||||
// Test with incomplete tag
|
||||
findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag");
|
||||
// Test with space at end of tag
|
||||
findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag ");
|
||||
}
|
||||
|
||||
private void assertHighlightUnchanged(String str) {
|
||||
assertEquals(str, TextUtilities.highlightTermsInHtml(str, null));
|
||||
}
|
||||
|
||||
public void testHighlightNoTerm() {
|
||||
// With no search terms, the html should be unchanged
|
||||
assertHighlightUnchanged("<html><style foo=\"bar\">Not</style>Visible</html>");
|
||||
assertHighlightUnchanged("<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>");
|
||||
assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not");
|
||||
assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not</style>AgainVisible");
|
||||
assertHighlightUnchanged("<html>Visible<style foo=\"bar\"/>AgainVisible");
|
||||
assertHighlightUnchanged(
|
||||
"<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible");
|
||||
}
|
||||
|
||||
public void testHighlightSingleTermHtml() {
|
||||
String str = "<html><style foo=\"bar\">Not</style>Visible</html>";
|
||||
// Test that tags aren't highlighted
|
||||
assertEquals(str, TextUtilities.highlightTermsInHtml(
|
||||
"<html><style foo=\"bar\">Not</style>Visible</html>", "style"));
|
||||
// Test that non-tags are
|
||||
assertEquals("<html><style foo=\"bar\">Not</style><span " +
|
||||
"style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING +
|
||||
"\">Visi</span>ble</html>",
|
||||
TextUtilities.highlightTermsInHtml(str, "Visi"));
|
||||
assertEquals("<html>Visible<style foo=\"bar\">Not</style>A<span" +
|
||||
" style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING +
|
||||
"\">gain</span>Visible",
|
||||
TextUtilities.highlightTermsInHtml(
|
||||
"<html>Visible<style foo=\"bar\">Not</style>AgainVisible", "gain"));
|
||||
}
|
||||
|
||||
public void testHighlightSingleTermText() {
|
||||
// Sprinkle text with a few HTML characters to make sure they're ignored
|
||||
String text = "This< should be visibl>e";
|
||||
// We should find this, because search terms are case insensitive
|
||||
SpannableStringBuilder ssb =
|
||||
(SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Visi");
|
||||
BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
|
||||
assertEquals(1, spans.length);
|
||||
BackgroundColorSpan span = spans[0];
|
||||
assertEquals(text.indexOf("visi"), ssb.getSpanStart(span));
|
||||
assertEquals(text.indexOf("bl>e"), ssb.getSpanEnd(span));
|
||||
// Heh; this next test fails.. we use the search term!
|
||||
assertEquals(text, ssb.toString());
|
||||
|
||||
// Multiple instances of the term
|
||||
text = "The research word should be a search result";
|
||||
ssb = (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Search");
|
||||
spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
|
||||
assertEquals(2, spans.length);
|
||||
span = spans[0];
|
||||
assertEquals(text.indexOf("search word"), ssb.getSpanStart(span));
|
||||
assertEquals(text.indexOf(" word"), ssb.getSpanEnd(span));
|
||||
span = spans[1];
|
||||
assertEquals(text.indexOf("search result"), ssb.getSpanStart(span));
|
||||
assertEquals(text.indexOf(" result"), ssb.getSpanEnd(span));
|
||||
assertEquals(text, ssb.toString());
|
||||
}
|
||||
|
||||
public void testHighlightTwoTermText() {
|
||||
String text = "This should be visible";
|
||||
// We should find this, because search terms are case insensitive
|
||||
SpannableStringBuilder ssb =
|
||||
(SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "visi should");
|
||||
BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
|
||||
assertEquals(2, spans.length);
|
||||
BackgroundColorSpan span = spans[0];
|
||||
assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
|
||||
assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span));
|
||||
span = spans[1];
|
||||
assertEquals(text.indexOf("visi"), ssb.getSpanStart(span));
|
||||
assertEquals(text.indexOf("ble"), ssb.getSpanEnd(span));
|
||||
assertEquals(text, ssb.toString());
|
||||
}
|
||||
|
||||
public void testHighlightDuplicateTermText() {
|
||||
String text = "This should be visible";
|
||||
// We should find this, because search terms are case insensitive
|
||||
SpannableStringBuilder ssb =
|
||||
(SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should should");
|
||||
BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
|
||||
assertEquals(1, spans.length);
|
||||
BackgroundColorSpan span = spans[0];
|
||||
assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
|
||||
assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span));
|
||||
}
|
||||
|
||||
public void testHighlightOverlapTermText() {
|
||||
String text = "This shoulder is visible";
|
||||
// We should find this, because search terms are case insensitive
|
||||
SpannableStringBuilder ssb =
|
||||
(SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should ould");
|
||||
BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
|
||||
assertEquals(1, spans.length);
|
||||
BackgroundColorSpan span = spans[0];
|
||||
assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
|
||||
assertEquals(text.indexOf("er is"), ssb.getSpanEnd(span));
|
||||
}
|
||||
|
||||
|
||||
public void testHighlightOverlapTermText2() {
|
||||
String text = "The shoulders are visible";
|
||||
// We should find this, because search terms are case insensitive
|
||||
SpannableStringBuilder ssb =
|
||||
(SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "shoulder shoulders");
|
||||
BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
|
||||
assertEquals(2, spans.length);
|
||||
BackgroundColorSpan span = spans[0];
|
||||
assertEquals(text.indexOf("shoulder"), ssb.getSpanStart(span));
|
||||
assertEquals(text.indexOf("s are visible"), ssb.getSpanEnd(span));
|
||||
span = spans[1];
|
||||
// Just the 's' should be caught in the 2nd span
|
||||
assertEquals(text.indexOf("s are visible"), ssb.getSpanStart(span));
|
||||
assertEquals(text.indexOf(" are visible"), ssb.getSpanEnd(span));
|
||||
assertEquals(text, ssb.toString());
|
||||
}
|
||||
// For debugging large HTML samples
|
||||
|
||||
// private String readLargeSnippet(String fn) {
|
||||
// File file = mContext.getFileStreamPath(fn);
|
||||
// StringBuffer sb = new StringBuffer();
|
||||
// BufferedReader reader = null;
|
||||
// try {
|
||||
// String text;
|
||||
// reader = new BufferedReader(new FileReader(file));
|
||||
// while ((text = reader.readLine()) != null) {
|
||||
// sb.append(text);
|
||||
// sb.append(" ");
|
||||
// }
|
||||
// } catch (IOException e) {
|
||||
// }
|
||||
// return sb.toString();
|
||||
// }
|
||||
}
|
Loading…
Reference in New Issue