From da71abeb8f0b0bce9de837d6614bcbc8ad7a39c6 Mon Sep 17 00:00:00 2001 From: Marc Blank Date: Fri, 23 Apr 2010 19:35:00 -0700 Subject: [PATCH] Improve sync thread alerting mechanism * When a sync thread triggers an alarm by failing to return from an HttpPost beyond the socket timeout, we call abort() on the HttpPost to force it to stop * It appears that there are cases in which this is insufficient, and the thread remains hung in a blocked state * The result of this failure is to prevent the syncing mailbox from ever syncing again, and is typically seen by a failure to receive new mail (as reported in the referenced bug) * In this CL, we add code to wait for 10 seconds after calling the abort() method. If the HttpPost is still hung, we interrupt() the thread, and have SyncManager release the Mailbox, so that another thread can be started. Bug: 2615293 Change-Id: I6a48195fc68bb950126006326a5b30448d3bbb63 --- .../android/exchange/AbstractSyncService.java | 11 ++- src/com/android/exchange/EasSyncService.java | 73 ++++++++++++++++--- src/com/android/exchange/SyncManager.java | 9 ++- 3 files changed, 76 insertions(+), 17 deletions(-) diff --git a/src/com/android/exchange/AbstractSyncService.java b/src/com/android/exchange/AbstractSyncService.java index d0c0d6f58..f1fe834e6 100644 --- a/src/com/android/exchange/AbstractSyncService.java +++ b/src/com/android/exchange/AbstractSyncService.java @@ -82,11 +82,14 @@ public abstract class AbstractSyncService implements Runnable { public abstract void stop(); /** - * Sent by SyncManager to indicate that an alarm has fired for this service. Typically, this - * means that a network operation has timed out. The service is NOT stopped, but service - * behavior is not otherwise defined (i.e. it's service dependent) + * Sent by SyncManager to indicate that an alarm has fired for this service, and that its + * pending (network) operation has timed out. The service is NOT automatically stopped, + * although the behavior is service dependent. + * + * @return true if the operation was stopped normally; false if the thread needed to be + * interrupted. */ - public abstract void alarm(); + public abstract boolean alarm(); /** * Sent by SyncManager to request that the service reset itself cleanly; the meaning of this diff --git a/src/com/android/exchange/EasSyncService.java b/src/com/android/exchange/EasSyncService.java index 5e85bb307..f727081c2 100644 --- a/src/com/android/exchange/EasSyncService.java +++ b/src/com/android/exchange/EasSyncService.java @@ -93,6 +93,7 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.lang.Thread.State; import java.net.URI; import java.net.URLEncoder; import java.security.cert.CertificateException; @@ -159,6 +160,9 @@ public class EasSyncService extends AbstractSyncService { static private final int PROTOCOL_PING_STATUS_COMPLETED = 1; + // The amount of time we allow for a thread to release its post lock after receiving an alert + static private final int POST_LOCK_TIMEOUT = 10*SECONDS; + // Fallbacks (in minutes) for ping loop failures static private final int MAX_PING_FAILURES = 1; static private final int PING_FALLBACK_INBOX = 5; @@ -182,7 +186,8 @@ public class EasSyncService extends AbstractSyncService { public ContentResolver mContentResolver; private String[] mBindArguments = new String[2]; private ArrayList mPingChangeList; - private HttpPost mPendingPost = null; + // The HttpPost in progress + private volatile HttpPost mPendingPost = null; // The ping time (in seconds) private int mPingHeartbeat = PING_STARTING_HEARTBEAT; // The longest successful ping heartbeat @@ -221,25 +226,69 @@ public class EasSyncService extends AbstractSyncService { } @Override - public void alarm() { + /** + * Try to wake up a sync thread that is waiting on an HttpClient POST and has waited past its + * socket timeout without having thrown an Exception + * + * @return true if the POST was successfully stopped; false if we've failed and interrupted + * the thread + */ + public boolean alarm() { + HttpPost post; + String threadName = mThread.getName(); + + // Synchronize here so that we are guaranteed to have valid mPendingPost and mPostLock + // executePostWithTimeout (which executes the HttpPost) also uses this lock synchronized(getSynchronizer()) { - if (mPendingPost != null) { - URI uri = mPendingPost.getURI(); - if (uri != null) { - String query = uri.getQuery(); - if (query == null) { - query = "POST"; + // Get a reference to the current post lock + post = mPendingPost; + if (post != null) { + if (Eas.USER_LOG) { + URI uri = post.getURI(); + if (uri != null) { + String query = uri.getQuery(); + if (query == null) { + query = "POST"; + } + userLog(threadName, ": Alert, aborting ", query); + } else { + userLog(threadName, ": Alert, no URI?"); } - userLog("Alert, aborting " + query); - } else { - userLog("Alert, no URI?"); } + // Abort the POST mPostAborted = true; - mPendingPost.abort(); + post.abort(); } else { + // If there's no POST, we're done userLog("Alert, no pending POST"); + return true; } } + + // Wait for the POST to finish + try { + Thread.sleep(POST_LOCK_TIMEOUT); + } catch (InterruptedException e) { + } + + State s = mThread.getState(); + if (Eas.USER_LOG) { + userLog(threadName + ": State = " + s.name()); + } + + synchronized (getSynchronizer()) { + // If the thread is still hanging around and the same post is pending, let's try to + // stop the thread with an interrupt. + if ((s != State.TERMINATED) && (mPendingPost != null) && (mPendingPost == post)) { + mStop = true; + mThread.interrupt(); + userLog("Interrupting..."); + // Let the caller know we had to interrupt the thread + return false; + } + } + // Let the caller know that the alarm was handled normally + return true; } @Override diff --git a/src/com/android/exchange/SyncManager.java b/src/com/android/exchange/SyncManager.java index 0e17d089c..37209129c 100644 --- a/src/com/android/exchange/SyncManager.java +++ b/src/com/android/exchange/SyncManager.java @@ -1431,7 +1431,14 @@ public class SyncManager extends Service implements Runnable { } service.mAccount = Account.restoreAccountWithId(INSTANCE, m.mAccountKey); service.mMailbox = m; - service.alarm(); + // Send the alarm to the sync service + if (!service.alarm()) { + // A false return means that we were forced to interrupt the thread + // In this case, we release the mailbox so that we can start another + // thread to do the work + log("Alarm failed; releasing mailbox"); + syncManager.releaseMailbox(id); + } } }}).start(); }