Properly handle style (and similar) tags in HTML snippets

Bug: 3285281
Change-Id: Ia45dbe39b47f1bcb647efbf1860ed4b933976df8
This commit is contained in:
Marc Blank 2010-12-15 13:50:11 -08:00
parent ac7194b4a9
commit 5a8be8e7ed
2 changed files with 99 additions and 0 deletions

View File

@ -41,6 +41,10 @@ public class Snippet {
// For some reason, isWhitespace() returns false with the following...
/*package*/ static final char NON_BREAKING_SPACE_CHARACTER = (char)160;
// Tags whose content must be stripped as well
static final String[] STRIP_TAGS = new String[] {"title ", "script", "style ", "applet"};
static final int STRIP_TAG_LENGTH = 6;
// Note: ESCAPE_STRINGS is taken from the StringUtil class which is part of the
// unbundled_google package
static final Map<String, Character> ESCAPE_STRINGS;
@ -311,6 +315,33 @@ public class Snippet {
return fromText(text, false);
}
/**
* Find the end of this tag; there are two alternatives: <tag .../> or <tag ...> ... </tag>
* @param htmlText some HTML text
* @param tag the HTML tag
* @param startPos the start position in the HTML text where the tag starts
* @return the position just before the end of the tag or -1 if not found
*/
/*package*/ static int findTagEnd(String htmlText, String tag, int startPos) {
if (tag.endsWith(" ")) {
tag = tag.substring(0, tag.length() - 1);
}
int length = htmlText.length();
char prevChar = 0;
for (int i = startPos; i < length; i++) {
char c = htmlText.charAt(i);
if (c == '>') {
if (prevChar == '/') {
return i - 1;
}
break;
}
prevChar = c;
}
// We didn't find /> at the end of the tag so find </tag>
return htmlText.indexOf("/" + tag, startPos);
}
public static String fromText(String text, boolean stripHtml) {
// Handle null and empty string
if (TextUtils.isEmpty(text)) return "";
@ -338,6 +369,26 @@ public class Snippet {
char peek = text.charAt(i + 1);
if (peek == '!' || peek == '-' || peek == '/' || Character.isLetter(peek)) {
inTag = true;
// Strip content of title, script, style and applet tags
if (i < (length - (STRIP_TAG_LENGTH + 2))) {
String tag = text.substring(i + 1, i + STRIP_TAG_LENGTH + 1);
boolean stripContent = false;
for (String stripTag: STRIP_TAGS) {
if (stripTag.equals(tag)) {
stripContent = true;
break;
}
}
if (stripContent) {
// Look for the end of this tag
int endTagPosition = findTagEnd(text, tag, i);
if (endTagPosition < 0) {
break;
} else {
i = endTagPosition;
}
}
}
}
}
} else if (stripHtml && inTag && (c == '>')) {

View File

@ -24,6 +24,12 @@ package com.android.email;
import android.test.AndroidTestCase;
/**
* Tests of Snippet
*
* You can run this entire test case with:
* runtest -c com.android.email.SnippetTests email
*/
public class SnippetTests extends AndroidTestCase {
public void testPlainSnippet() {
@ -108,4 +114,46 @@ public class SnippetTests extends AndroidTestCase {
assertEquals(c, '&');
assertEquals(0, skipCount[0]);
}
public void testStripContent() {
assertEquals("Visible", Snippet.fromHtmlText(
"<html><style foo=\"bar\">Not</style>Visible</html>"));
assertEquals("IsVisible", Snippet.fromHtmlText(
"<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"));
assertEquals("Visible", Snippet.fromHtmlText(
"<html>Visible<style foo=\"bar\">Not"));
assertEquals("VisibleAgainVisible", Snippet.fromHtmlText(
"<html>Visible<style foo=\"bar\">Not</style>AgainVisible"));
assertEquals("VisibleAgainVisible", Snippet.fromHtmlText(
"<html>Visible<style foo=\"bar\"/>AgainVisible"));
}
/**
* We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position
* for the tag named 'tag' and then check whether the calculated end position matches the known
* correct position. HTML text not containing an ampersand should generate a calculated end of
* -1
* @param text the HTML text to test
*/
private void findTagEnd(String text, String tag) {
int calculatedEnd = Snippet.findTagEnd(text , tag, 0);
int knownEnd = text.indexOf('@') + 2;
if (knownEnd == 1) {
// indexOf will return -1, so we'll get 1 as knownEnd
assertEquals(-1, calculatedEnd);
} else {
assertEquals(calculatedEnd, knownEnd);
}
}
public void testFindTagEnd() {
// Test with <tag ... />
findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag");
// Test with <tag ...> ... </tag>
findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag");
// Test with incomplete tag
findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag");
// Test with space at end of tag
findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag ");
}
}