265 lines
9.5 KiB
Java
265 lines
9.5 KiB
Java
/****************************************************************
|
|
* Licensed to the Apache Software Foundation (ASF) under one *
|
|
* or more contributor license agreements. See the NOTICE file *
|
|
* distributed with this work for additional information *
|
|
* regarding copyright ownership. The ASF licenses this file *
|
|
* to you under the Apache License, Version 2.0 (the *
|
|
* "License"); you may not use this file except in compliance *
|
|
* with the License. You may obtain a copy of the License at *
|
|
* *
|
|
* http://www.apache.org/licenses/LICENSE-2.0 *
|
|
* *
|
|
* Unless required by applicable law or agreed to in writing, *
|
|
* software distributed under the License is distributed on an *
|
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
|
|
* KIND, either express or implied. See the License for the *
|
|
* specific language governing permissions and limitations *
|
|
* under the License. *
|
|
****************************************************************/
|
|
|
|
package org.apache.james.mime4j.decoder;
|
|
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.apache.james.mime4j.util.CharsetUtil;
|
|
|
|
import java.io.ByteArrayInputStream;
|
|
import java.io.ByteArrayOutputStream;
|
|
import java.io.IOException;
|
|
import java.io.UnsupportedEncodingException;
|
|
|
|
/**
|
|
* Static methods for decoding strings, byte arrays and encoded words.
|
|
*
|
|
*
|
|
* @version $Id: DecoderUtil.java,v 1.3 2005/02/07 15:33:59 ntherning Exp $
|
|
*/
|
|
public class DecoderUtil {
|
|
private static Log log = LogFactory.getLog(DecoderUtil.class);
|
|
|
|
/**
|
|
* Decodes a string containing quoted-printable encoded data.
|
|
*
|
|
* @param s the string to decode.
|
|
* @return the decoded bytes.
|
|
*/
|
|
public static byte[] decodeBaseQuotedPrintable(String s) {
|
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
|
|
try {
|
|
byte[] bytes = s.getBytes("US-ASCII");
|
|
|
|
QuotedPrintableInputStream is = new QuotedPrintableInputStream(
|
|
new ByteArrayInputStream(bytes));
|
|
|
|
int b = 0;
|
|
while ((b = is.read()) != -1) {
|
|
baos.write(b);
|
|
}
|
|
} catch (IOException e) {
|
|
/*
|
|
* This should never happen!
|
|
*/
|
|
log.error(e);
|
|
}
|
|
|
|
return baos.toByteArray();
|
|
}
|
|
|
|
/**
|
|
* Decodes a string containing base64 encoded data.
|
|
*
|
|
* @param s the string to decode.
|
|
* @return the decoded bytes.
|
|
*/
|
|
public static byte[] decodeBase64(String s) {
|
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
|
|
try {
|
|
byte[] bytes = s.getBytes("US-ASCII");
|
|
|
|
Base64InputStream is = new Base64InputStream(
|
|
new ByteArrayInputStream(bytes));
|
|
|
|
int b = 0;
|
|
while ((b = is.read()) != -1) {
|
|
baos.write(b);
|
|
}
|
|
} catch (IOException e) {
|
|
/*
|
|
* This should never happen!
|
|
*/
|
|
log.error(e);
|
|
}
|
|
|
|
return baos.toByteArray();
|
|
}
|
|
|
|
/**
|
|
* Decodes an encoded word encoded with the 'B' encoding (described in
|
|
* RFC 2047) found in a header field body.
|
|
*
|
|
* @param encodedWord the encoded word to decode.
|
|
* @param charset the Java charset to use.
|
|
* @return the decoded string.
|
|
* @throws UnsupportedEncodingException if the given Java charset isn't
|
|
* supported.
|
|
*/
|
|
public static String decodeB(String encodedWord, String charset)
|
|
throws UnsupportedEncodingException {
|
|
|
|
return new String(decodeBase64(encodedWord), charset);
|
|
}
|
|
|
|
/**
|
|
* Decodes an encoded word encoded with the 'Q' encoding (described in
|
|
* RFC 2047) found in a header field body.
|
|
*
|
|
* @param encodedWord the encoded word to decode.
|
|
* @param charset the Java charset to use.
|
|
* @return the decoded string.
|
|
* @throws UnsupportedEncodingException if the given Java charset isn't
|
|
* supported.
|
|
*/
|
|
public static String decodeQ(String encodedWord, String charset)
|
|
throws UnsupportedEncodingException {
|
|
|
|
/*
|
|
* Replace _ with =20
|
|
*/
|
|
StringBuffer sb = new StringBuffer();
|
|
for (int i = 0; i < encodedWord.length(); i++) {
|
|
char c = encodedWord.charAt(i);
|
|
if (c == '_') {
|
|
sb.append("=20");
|
|
} else {
|
|
sb.append(c);
|
|
}
|
|
}
|
|
|
|
return new String(decodeBaseQuotedPrintable(sb.toString()), charset);
|
|
}
|
|
|
|
/**
|
|
* Decodes a string containing encoded words as defined by RFC 2047.
|
|
* Encoded words in have the form
|
|
* =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
|
|
* quoted-printable and 'B' or 'b' for Base64.
|
|
*
|
|
* ANDROID: COPIED FROM A NEWER VERSION OF MIME4J
|
|
*
|
|
* @param body the string to decode.
|
|
* @return the decoded string.
|
|
*/
|
|
public static String decodeEncodedWords(String body) {
|
|
|
|
// ANDROID: Most strings will not include "=?" so a quick test can prevent unneeded
|
|
// object creation. This could also be handled via lazy creation of the StringBuilder.
|
|
if (body.indexOf("=?") == -1) {
|
|
return body;
|
|
}
|
|
|
|
int previousEnd = 0;
|
|
boolean previousWasEncoded = false;
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
while (true) {
|
|
int begin = body.indexOf("=?", previousEnd);
|
|
int end = begin == -1 ? -1 : body.indexOf("?=", begin + 2);
|
|
if (end == -1) {
|
|
if (previousEnd == 0)
|
|
return body;
|
|
|
|
sb.append(body.substring(previousEnd));
|
|
return sb.toString();
|
|
}
|
|
end += 2;
|
|
|
|
String sep = body.substring(previousEnd, begin);
|
|
|
|
String decoded = decodeEncodedWord(body, begin, end);
|
|
if (decoded == null) {
|
|
sb.append(sep);
|
|
sb.append(body.substring(begin, end));
|
|
} else {
|
|
if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
|
|
sb.append(sep);
|
|
}
|
|
sb.append(decoded);
|
|
}
|
|
|
|
previousEnd = end;
|
|
previousWasEncoded = decoded != null;
|
|
}
|
|
}
|
|
|
|
// return null on error
|
|
private static String decodeEncodedWord(String body, int begin, int end) {
|
|
int qm1 = body.indexOf('?', begin + 2);
|
|
if (qm1 == end - 2)
|
|
return null;
|
|
|
|
int qm2 = body.indexOf('?', qm1 + 1);
|
|
if (qm2 == end - 2)
|
|
return null;
|
|
|
|
String mimeCharset = body.substring(begin + 2, qm1);
|
|
String encoding = body.substring(qm1 + 1, qm2);
|
|
String encodedText = body.substring(qm2 + 1, end - 2);
|
|
|
|
String charset = CharsetUtil.toJavaCharset(mimeCharset);
|
|
if (charset == null) {
|
|
if (log.isWarnEnabled()) {
|
|
log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
|
|
+ body.substring(begin, end) + "' doesn't have a "
|
|
+ "corresponding Java charset");
|
|
}
|
|
return null;
|
|
} else if (!CharsetUtil.isDecodingSupported(charset)) {
|
|
if (log.isWarnEnabled()) {
|
|
log.warn("Current JDK doesn't support decoding of charset '"
|
|
+ charset + "' (MIME charset '" + mimeCharset
|
|
+ "' in encoded word '" + body.substring(begin, end)
|
|
+ "')");
|
|
}
|
|
return null;
|
|
}
|
|
|
|
if (encodedText.length() == 0) {
|
|
if (log.isWarnEnabled()) {
|
|
log.warn("Missing encoded text in encoded word: '"
|
|
+ body.substring(begin, end) + "'");
|
|
}
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
if (encoding.equalsIgnoreCase("Q")) {
|
|
return DecoderUtil.decodeQ(encodedText, charset);
|
|
} else if (encoding.equalsIgnoreCase("B")) {
|
|
return DecoderUtil.decodeB(encodedText, charset);
|
|
} else {
|
|
if (log.isWarnEnabled()) {
|
|
log.warn("Warning: Unknown encoding in encoded word '"
|
|
+ body.substring(begin, end) + "'");
|
|
}
|
|
return null;
|
|
}
|
|
} catch (UnsupportedEncodingException e) {
|
|
// should not happen because of isDecodingSupported check above
|
|
if (log.isWarnEnabled()) {
|
|
log.warn("Unsupported encoding in encoded word '"
|
|
+ body.substring(begin, end) + "'", e);
|
|
}
|
|
return null;
|
|
} catch (RuntimeException e) {
|
|
if (log.isWarnEnabled()) {
|
|
log.warn("Could not decode encoded word '"
|
|
+ body.substring(begin, end) + "'", e);
|
|
}
|
|
return null;
|
|
}
|
|
}
|
|
}
|