From 7c03217316b21d59a00cf1abed093470e8004144 Mon Sep 17 00:00:00 2001 From: Martin Hibdon Date: Wed, 9 Oct 2013 11:03:40 -0700 Subject: [PATCH] Add a retry backoff/limit policy to attachment download b/11081672 Prior to this, any time the AttachmentDownloadService got a CONNECTION_ERROR, it would just instantly retry, without any limit on the number of tries. This is bad if the server is in a funny state, we'll just keep spamming it with multiple connection attempts per second. Also, this kills the client device's battery and responsiveness. Now, it will retry instantly five times, and then retry on a 10 second delay 5 more times. After that it will give up. Even if it gives up, if the user visits an email with an attachment, or taps on an attachment to expand it, we'll start the process over. So we shouldn't have permanent apparently data loss, even if we fail on the first 10 tries. I'm not certain that this is the best backoff/limit policy, maybe we should add a delay after even the first connection error. But I'm hesitant to change this at this point, it's possible that something is relying on this behavior and we don't have a lot of soak time left. Change-Id: I53d75d5d214ccca887a89cf65b799fe640cc9bc5 --- .../service/AttachmentDownloadService.java | 57 ++++++++++++++++--- .../email/service/EmailServiceStub.java | 3 +- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/com/android/email/service/AttachmentDownloadService.java b/src/com/android/email/service/AttachmentDownloadService.java index 869b9f2cf..1639185ba 100644 --- a/src/com/android/email/service/AttachmentDownloadService.java +++ b/src/com/android/email/service/AttachmentDownloadService.java @@ -29,6 +29,7 @@ import android.net.ConnectivityManager; import android.net.Uri; import android.os.IBinder; import android.os.RemoteException; +import android.os.SystemClock; import android.text.format.DateUtils; import com.android.email.AttachmentInfo; @@ -60,6 +61,13 @@ import java.util.concurrent.ConcurrentHashMap; public class AttachmentDownloadService extends Service implements Runnable { public static final String TAG = "AttachmentService"; + // Minimum wait time before retrying a download that failed due to connection error + private static final long CONNECTION_ERROR_RETRY_MILLIS = 10 * DateUtils.SECOND_IN_MILLIS; + // Number of retries before we start delaying between + private static final long CONNECTION_ERROR_DELAY_THRESHOLD = 5; + // Maximum time to retry for connection errors. + private static final long CONNECTION_ERROR_MAX_RETRIES = 10; + // Our idle time, waiting for notifications; this is something of a failsafe private static final int PROCESS_QUEUE_WAIT_TIME = 30 * ((int)DateUtils.MINUTE_IN_MILLIS); // How often our watchdog checks for callback timeouts @@ -175,6 +183,8 @@ public class AttachmentDownloadService extends Service implements Runnable { int lastProgress; long lastCallbackTime; long startTime; + long retryCount; + long retryStartTime; private DownloadRequest(Context context, Attachment attachment) { attachmentId = attachment.mId; @@ -335,6 +345,12 @@ public class AttachmentDownloadService extends Service implements Runnable { continue; } if (!req.inProgress) { + final long currentTime = SystemClock.elapsedRealtime(); + if (req.retryCount > 0 && req.retryStartTime > currentTime) { + LogUtils.d(TAG, "== waiting to retry attachment %d", req.attachmentId); + setWatchdogAlarm(CONNECTION_ERROR_RETRY_MILLIS); + continue; + } mDownloadSet.tryStartDownload(req); } } @@ -428,7 +444,7 @@ public class AttachmentDownloadService extends Service implements Runnable { if (MailActivityEmail.DEBUG) { LogUtils.d(TAG, "== Download of " + req.attachmentId + " timed out"); } - cancelDownload(req); + cancelDownload(req); } } // Check whether we can start new downloads... @@ -475,7 +491,7 @@ public class AttachmentDownloadService extends Service implements Runnable { return mDownloadsInProgress.get(attachmentId); } - private void setWatchdogAlarm() { + private void setWatchdogAlarm(final long delay) { // Lazily initialize the pending intent if (mWatchdogPendingIntent == null) { Intent intent = new Intent(mContext, Watchdog.class); @@ -484,10 +500,14 @@ public class AttachmentDownloadService extends Service implements Runnable { } // Set the alarm AlarmManager am = (AlarmManager)mContext.getSystemService(Context.ALARM_SERVICE); - am.set(AlarmManager.RTC_WAKEUP, System.currentTimeMillis() + WATCHDOG_CHECK_INTERVAL, + am.set(AlarmManager.RTC_WAKEUP, System.currentTimeMillis() + delay, mWatchdogPendingIntent); } + private void setWatchdogAlarm() { + setWatchdogAlarm(WATCHDOG_CHECK_INTERVAL); + } + /** * Do the work of starting an attachment download using the EmailService interface, and * set our watchdog alarm @@ -539,14 +559,31 @@ public class AttachmentDownloadService extends Service implements Runnable { DownloadRequest req = mDownloadSet.findDownloadRequest(attachmentId); if (statusCode == EmailServiceStatus.CONNECTION_ERROR) { // If this needs to be retried, just process the queue again - if (MailActivityEmail.DEBUG) { - LogUtils.d(TAG, "== The download for attachment #" + attachmentId + - " will be retried"); - } if (req != null) { - req.inProgress = false; + req.retryCount++; + if (req.retryCount > CONNECTION_ERROR_MAX_RETRIES) { + LogUtils.d(TAG, "Connection Error #%d, giving up", attachmentId); + remove(req); + } else if (req.retryCount > CONNECTION_ERROR_DELAY_THRESHOLD) { + // TODO: I'm not sure this is a great retry/backoff policy, but we're + // afraid of changing behavior too much in case something relies upon it. + // So now, for the first five errors, we'll retry immediately. For the next + // five tries, we'll add a ten second delay between each. After that, we'll + // give up. + LogUtils.d(TAG, "ConnectionError #%d, retried %d times, adding delay", + attachmentId, req.retryCount); + req.inProgress = false; + req.retryStartTime = SystemClock.elapsedRealtime() + + CONNECTION_ERROR_RETRY_MILLIS; + setWatchdogAlarm(CONNECTION_ERROR_RETRY_MILLIS); + } else { + LogUtils.d(TAG, "ConnectionError #%d, retried %d times, adding delay", + attachmentId, req.retryCount); + req.inProgress = false; + req.retryStartTime = 0; + kick(); + } } - kick(); return; } @@ -610,6 +647,7 @@ public class AttachmentDownloadService extends Service implements Runnable { EmailContent.delete(mContext, Attachment.CONTENT_URI, attachment.mId); } else { // If there really is a message, retry + // TODO: How will this get retried? It's still marked as inProgress? kick(); return; } @@ -665,6 +703,7 @@ public class AttachmentDownloadService extends Service implements Runnable { public void loadAttachmentStatus(long messageId, long attachmentId, int statusCode, int progress) { // Record status and progress + LogUtils.d(TAG, "loadAttachmentStatus %d %d %d", messageId, attachmentId, statusCode); DownloadRequest req = mDownloadSet.getDownloadInProgress(attachmentId); if (req != null) { if (MailActivityEmail.DEBUG) { diff --git a/src/com/android/email/service/EmailServiceStub.java b/src/com/android/email/service/EmailServiceStub.java index 5b4733376..fa4cfd841 100644 --- a/src/com/android/email/service/EmailServiceStub.java +++ b/src/com/android/email/service/EmailServiceStub.java @@ -407,7 +407,8 @@ public abstract class EmailServiceStub extends IEmailService.Stub implements IEm break; } } - } catch (MessagingException e) { + } catch (MessagingException me) { + LogUtils.i(Logging.LOG_TAG, me, "Error in updateFolderList"); // We'll hope this is temporary } finally { if (localFolderCursor != null) {