Properly handle style (and similar) tags in HTML snippets

Bug: 3285281 Change-Id: Ia45dbe39b47f1bcb647efbf1860ed4b933976df8
2010-12-15 13:50:11 -08:00 · 2010-12-15 13:50:11 -08:00 · 5a8be8e7ed
parent ac7194b4a9
commit 5a8be8e7ed
2 changed files with 99 additions and 0 deletions
--- a/src/com/android/email/Snippet.java
+++ b/src/com/android/email/Snippet.java
@ -41,6 +41,10 @@ public class Snippet {
    // For some reason, isWhitespace() returns false with the following...
    /*package*/ static final char NON_BREAKING_SPACE_CHARACTER = (char)160;

+    // Tags whose content must be stripped as well
+    static final String[] STRIP_TAGS = new String[] {"title ", "script", "style ", "applet"};
+    static final int STRIP_TAG_LENGTH = 6;
+
    // Note: ESCAPE_STRINGS is taken from the StringUtil class which is part of the
    // unbundled_google package
    static final Map<String, Character> ESCAPE_STRINGS;
@ -311,6 +315,33 @@ public class Snippet {
        return fromText(text, false);
    }

+    /**
+     * Find the end of this tag; there are two alternatives: <tag .../> or <tag ...> ... </tag>
+     * @param htmlText some HTML text
+     * @param tag the HTML tag
+     * @param startPos the start position in the HTML text where the tag starts
+     * @return the position just before the end of the tag or -1 if not found
+     */
+    /*package*/ static int findTagEnd(String htmlText, String tag, int startPos) {
+        if (tag.endsWith(" ")) {
+            tag = tag.substring(0, tag.length() - 1);
+        }
+        int length = htmlText.length();
+        char prevChar = 0;
+        for (int i = startPos; i < length; i++) {
+            char c = htmlText.charAt(i);
+            if (c == '>') {
+               if (prevChar == '/') {
+                   return i - 1;
+               }
+               break;
+            }
+            prevChar = c;
+        }
+        // We didn't find /> at the end of the tag so find </tag>
+        return htmlText.indexOf("/" + tag, startPos);
+    }
+
    public static String fromText(String text, boolean stripHtml) {
        // Handle null and empty string
        if (TextUtils.isEmpty(text)) return "";
@ -338,6 +369,26 @@ public class Snippet {
                    char peek = text.charAt(i + 1);
                    if (peek == '!' || peek == '-' || peek == '/' || Character.isLetter(peek)) {
                        inTag = true;
+                        // Strip content of title, script, style and applet tags
+                        if (i < (length - (STRIP_TAG_LENGTH + 2))) {
+                            String tag = text.substring(i + 1, i + STRIP_TAG_LENGTH + 1);
+                            boolean stripContent = false;
+                            for (String stripTag: STRIP_TAGS) {
+                                if (stripTag.equals(tag)) {
+                                    stripContent = true;
+                                    break;
+                                }
+                            }
+                            if (stripContent) {
+                                // Look for the end of this tag
+                                int endTagPosition = findTagEnd(text, tag, i);
+                                if (endTagPosition < 0) {
+                                    break;
+                                } else {
+                                    i = endTagPosition;
+                                }
+                            }
+                        }
                    }
                }
            } else if (stripHtml && inTag && (c == '>')) {
--- a/tests/src/com/android/email/SnippetTests.java
+++ b/tests/src/com/android/email/SnippetTests.java
@ -24,6 +24,12 @@ package com.android.email;

 import android.test.AndroidTestCase;

+/**
+ * Tests of Snippet
+ *
+ * You can run this entire test case with:
+ *   runtest -c com.android.email.SnippetTests email
+ */
 public class SnippetTests extends AndroidTestCase {

    public void testPlainSnippet() {
@ -108,4 +114,46 @@ public class SnippetTests extends AndroidTestCase {
        assertEquals(c, '&');
        assertEquals(0, skipCount[0]);
    }
+
+    public void testStripContent() {
+        assertEquals("Visible", Snippet.fromHtmlText(
+            "<html><style foo=\"bar\">Not</style>Visible</html>"));
+        assertEquals("IsVisible", Snippet.fromHtmlText(
+            "<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"));
+        assertEquals("Visible", Snippet.fromHtmlText(
+            "<html>Visible<style foo=\"bar\">Not"));
+        assertEquals("VisibleAgainVisible", Snippet.fromHtmlText(
+            "<html>Visible<style foo=\"bar\">Not</style>AgainVisible"));
+        assertEquals("VisibleAgainVisible", Snippet.fromHtmlText(
+            "<html>Visible<style foo=\"bar\"/>AgainVisible"));
+    }
+
+    /**
+     * We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position
+     * for the tag named 'tag' and then check whether the calculated end position matches the known
+     * correct position.  HTML text not containing an ampersand should generate a calculated end of
+     * -1
+     * @param text the HTML text to test
+     */
+    private void findTagEnd(String text, String tag) {
+        int calculatedEnd = Snippet.findTagEnd(text , tag, 0);
+        int knownEnd = text.indexOf('@') + 2;
+        if (knownEnd == 1) {
+            // indexOf will return -1, so we'll get 1 as knownEnd
+            assertEquals(-1, calculatedEnd);
+        } else {
+            assertEquals(calculatedEnd, knownEnd);
+        }
+    }
+
+    public void testFindTagEnd() {
+        // Test with <tag ... />
+        findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag");
+        // Test with <tag ...> ... </tag>
+        findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag");
+        // Test with incomplete tag
+        findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag");
+        // Test with space at end of tag
+        findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag ");
+    }
 }