Trim the mime type portion of Content-Type.
Fixes parsing of Content-Type headers like "text/plain ; charset=iso-8859-1" Added unit tests for whitespace and the charset parameter
This commit is contained in:
parent
5e062514bf
commit
2517a3dfd1
@ -189,7 +189,7 @@ public class MimeUtility {
|
||||
}
|
||||
String[] parts = unfold(header).split(";");
|
||||
if (name == null) {
|
||||
return parts[0];
|
||||
return parts[0].trim();
|
||||
}
|
||||
String lowerCaseName = name.toLowerCase();
|
||||
for (String part : parts) {
|
||||
|
@ -24,7 +24,9 @@ import com.android.email.mail.Message.RecipientType;
|
||||
import android.test.suitebuilder.annotation.SmallTest;
|
||||
import android.test.suitebuilder.annotation.MediumTest;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
@ -420,5 +422,48 @@ public class MimeMessageTest extends TestCase {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for parsing headers with extra whitespace and commennts.
|
||||
*
|
||||
* The lines up to Content-Type were copied directly out of RFC 2822
|
||||
* "Section A.5. White space, comments, and other oddities"
|
||||
*/
|
||||
public void testWhiteSpace() throws MessagingException, IOException {
|
||||
String entireMessage =
|
||||
"From: Pete(A wonderful \\) chap) <pete(his account)@silly.test(his host)>\r\n"+
|
||||
"To:A Group(Some people)\r\n"+
|
||||
" :Chris Jones <c@(Chris's host.)public.example>,\r\n"+
|
||||
" joe@example.org,\r\n"+
|
||||
" John <jdoe@one.test> (my dear friend); (the end of the group)\r\n"+
|
||||
"Cc:(Empty list)(start)Undisclosed recipients :(nobody(that I know)) ;\r\n"+
|
||||
"Date: Thu,\r\n"+
|
||||
" 13\r\n"+
|
||||
" Feb\r\n"+
|
||||
" 1969\r\n"+
|
||||
" 23:32\r\n"+
|
||||
" -0330 (Newfoundland Time)\r\n"+
|
||||
"Message-ID: <testabcd.1234@silly.test>\r\n"+
|
||||
"Content-Type: \r\n"+
|
||||
" TEXT/hTML \r\n"+
|
||||
" ; x-blah=\"y-blah\" ; \r\n"+
|
||||
" CHARSET=\"us-ascii\" ; (comment)\r\n"+
|
||||
"\r\n"+
|
||||
"<html><body>Testing.</body></html>\r\n";
|
||||
MimeMessage mm = null;
|
||||
mm = new MimeMessage(new ByteArrayInputStream(
|
||||
entireMessage.getBytes("us-ascii")));
|
||||
assertTrue(mm.getMimeType(), MimeUtility.mimeTypeMatches("text/html",mm.getMimeType()));
|
||||
assertEquals(new Date(-27723480000L),mm.getSentDate());
|
||||
assertEquals("<testabcd.1234@silly.test>",mm.getMessageId());
|
||||
Address[] toAddresses = mm.getRecipients(MimeMessage.RecipientType.TO);
|
||||
assertEquals("joe@example.org", toAddresses[1].getAddress());
|
||||
assertEquals("jdoe@one.test", toAddresses[2].getAddress());
|
||||
|
||||
|
||||
// Note: The parentheses in the middle of email addresses are not removed.
|
||||
//assertEquals("c@public.example", toAddresses[0].getAddress());
|
||||
//assertEquals("pete@silly.test",mm.getFrom()[0].getAddress());
|
||||
}
|
||||
|
||||
// TODO more test for writeTo()
|
||||
}
|
||||
|
@ -350,7 +350,94 @@ public class MimeUtilityTest extends TestCase {
|
||||
gotText = MimeUtility.getTextFromPart(p);
|
||||
assertEquals(theText, gotText);
|
||||
}
|
||||
// TODO: Tests of charset decoding in getTextFromPart()
|
||||
|
||||
/** Test for usage of Content-Type in getTextFromPart(Part part).
|
||||
*
|
||||
* For example 'Content-Type: text/html; charset=utf-8'
|
||||
*
|
||||
* If the body part has no mime-type, refuses to parse content as text.
|
||||
* If the mime-type does not match text/*, it will not get parsed.
|
||||
* Then, the charset parameter is used, with a default of ASCII.
|
||||
*
|
||||
* This test works by using a string that is valid Unicode, and is also
|
||||
* valid when decoded from UTF-8 bytes into Windows-1252 (so that
|
||||
* auto-detection is not possible), and checks that the correct conversion
|
||||
* was made, based on the Content-Type header.
|
||||
*
|
||||
*/
|
||||
public void testContentTypeCharset() throws MessagingException {
|
||||
final String UNICODE_EXPECT = "This is some happy unicode text \u263a";
|
||||
// What you get if you encode to UTF-8 (\xe2\x98\xba) and reencode with Windows-1252
|
||||
final String WINDOWS1252_EXPECT = "This is some happy unicode text \u00e2\u02dc\u00ba";
|
||||
TextBody tb = new TextBody(UNICODE_EXPECT);
|
||||
MimeBodyPart p = new MimeBodyPart();
|
||||
|
||||
String gotText, mimeType, charset;
|
||||
// TEST 0: Standard Content-Type header; no extraneous spaces or fields
|
||||
p.setBody(tb);
|
||||
// We call setHeader after setBody, since setBody overwrites Content-Type
|
||||
p.setHeader(MimeHeader.HEADER_CONTENT_TYPE, "text/html; charset=utf-8");
|
||||
gotText = MimeUtility.getTextFromPart(p);
|
||||
assertTrue(MimeUtility.mimeTypeMatches(p.getMimeType(), "text/html"));
|
||||
assertEquals(UNICODE_EXPECT, gotText);
|
||||
|
||||
p.setBody(tb);
|
||||
p.setHeader(MimeHeader.HEADER_CONTENT_TYPE, "text/html; charset=windows-1252");
|
||||
gotText = MimeUtility.getTextFromPart(p);
|
||||
assertTrue(MimeUtility.mimeTypeMatches(p.getMimeType(), "text/html"));
|
||||
assertEquals(WINDOWS1252_EXPECT, gotText);
|
||||
|
||||
// TEST 1: Extra fields and quotes in Content-Type (from RFC 2045)
|
||||
p.setBody(tb);
|
||||
p.setHeader(MimeHeader.HEADER_CONTENT_TYPE,
|
||||
"text/html; prop1 = \"test\"; charset = \"utf-8\"; prop2 = \"test\"");
|
||||
gotText = MimeUtility.getTextFromPart(p);
|
||||
assertTrue(MimeUtility.mimeTypeMatches(p.getMimeType(), "text/html"));
|
||||
assertEquals(UNICODE_EXPECT, gotText);
|
||||
|
||||
p.setBody(tb);
|
||||
p.setHeader(MimeHeader.HEADER_CONTENT_TYPE,
|
||||
"text/html; prop1 = \"test\"; charset = \"windows-1252\"; prop2 = \"test\"");
|
||||
gotText = MimeUtility.getTextFromPart(p);
|
||||
assertTrue(MimeUtility.mimeTypeMatches(p.getMimeType(), "text/html"));
|
||||
assertEquals(WINDOWS1252_EXPECT, gotText);
|
||||
|
||||
// TEST 2: Mixed case in Content-Type header:
|
||||
// RFC 2045 says that content types, subtypes and parameter names
|
||||
// are case-insensitive.
|
||||
|
||||
p.setBody(tb);
|
||||
p.setHeader(MimeHeader.HEADER_CONTENT_TYPE, "TEXT/HtmL ; CHARseT=utf-8");
|
||||
gotText = MimeUtility.getTextFromPart(p);
|
||||
assertTrue(MimeUtility.mimeTypeMatches(p.getMimeType(), "text/html"));
|
||||
assertEquals(UNICODE_EXPECT, gotText);
|
||||
|
||||
p.setBody(tb);
|
||||
p.setHeader(MimeHeader.HEADER_CONTENT_TYPE, "TEXT/HtmL ; CHARseT=windows-1252");
|
||||
gotText = MimeUtility.getTextFromPart(p);
|
||||
assertTrue(MimeUtility.mimeTypeMatches(p.getMimeType(), "text/html"));
|
||||
assertEquals(WINDOWS1252_EXPECT, gotText);
|
||||
|
||||
// TEST 3: Comments in Content-Type header field (from RFC 2045)
|
||||
// Thunderbird permits comments after the end of a parameter, as in this example.
|
||||
// Not something that I have seen in the real world outside RFC 2045.
|
||||
|
||||
p.setBody(tb);
|
||||
p.setHeader(MimeHeader.HEADER_CONTENT_TYPE,
|
||||
"text/html; charset=utf-8 (Plain text)");
|
||||
gotText = MimeUtility.getTextFromPart(p);
|
||||
assertTrue(MimeUtility.mimeTypeMatches(p.getMimeType(), "text/html"));
|
||||
// Note: This test does not pass.
|
||||
//assertEquals(UNICODE_EXPECT, gotText);
|
||||
|
||||
p.setBody(tb);
|
||||
p.setHeader(MimeHeader.HEADER_CONTENT_TYPE,
|
||||
"text/html; charset=windows-1252 (Plain text)");
|
||||
gotText = MimeUtility.getTextFromPart(p);
|
||||
assertTrue(MimeUtility.mimeTypeMatches(p.getMimeType(), "text/html"));
|
||||
// Note: These tests does not pass.
|
||||
//assertEquals(WINDOWS1252_EXPECT, gotText);
|
||||
}
|
||||
|
||||
/** Tests for various aspects of mimeTypeMatches(String mimeType, String matchAgainst) */
|
||||
public void testMimeTypeMatches() {
|
||||
|
Loading…
Reference in New Issue
Block a user