Improve sync thread alerting mechanism

* When a sync thread triggers an alarm by failing to return from
  an HttpPost beyond the socket timeout, we call abort() on the
  HttpPost to force it to stop
* It appears that there are cases in which this is insufficient,
  and the thread remains hung in a blocked state
* The result of this failure is to prevent the syncing mailbox from
  ever syncing again, and is typically seen by a failure to receive
  new mail (as reported in the referenced bug)
* In this CL, we add code to wait for 10 seconds after calling the
  abort() method.  If the HttpPost is still hung, we interrupt() the
  thread, and have SyncManager release the Mailbox, so that another
  thread can be started.

Bug: 2615293
Change-Id: I6a48195fc68bb950126006326a5b30448d3bbb63
This commit is contained in:
Marc Blank 2010-04-23 19:35:00 -07:00
parent 0f68676828
commit da71abeb8f
3 changed files with 76 additions and 17 deletions

View File

@ -82,11 +82,14 @@ public abstract class AbstractSyncService implements Runnable {
public abstract void stop();
/**
* Sent by SyncManager to indicate that an alarm has fired for this service. Typically, this
* means that a network operation has timed out. The service is NOT stopped, but service
* behavior is not otherwise defined (i.e. it's service dependent)
* Sent by SyncManager to indicate that an alarm has fired for this service, and that its
* pending (network) operation has timed out. The service is NOT automatically stopped,
* although the behavior is service dependent.
*
* @return true if the operation was stopped normally; false if the thread needed to be
* interrupted.
*/
public abstract void alarm();
public abstract boolean alarm();
/**
* Sent by SyncManager to request that the service reset itself cleanly; the meaning of this

View File

@ -93,6 +93,7 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.Thread.State;
import java.net.URI;
import java.net.URLEncoder;
import java.security.cert.CertificateException;
@ -159,6 +160,9 @@ public class EasSyncService extends AbstractSyncService {
static private final int PROTOCOL_PING_STATUS_COMPLETED = 1;
// The amount of time we allow for a thread to release its post lock after receiving an alert
static private final int POST_LOCK_TIMEOUT = 10*SECONDS;
// Fallbacks (in minutes) for ping loop failures
static private final int MAX_PING_FAILURES = 1;
static private final int PING_FALLBACK_INBOX = 5;
@ -182,7 +186,8 @@ public class EasSyncService extends AbstractSyncService {
public ContentResolver mContentResolver;
private String[] mBindArguments = new String[2];
private ArrayList<String> mPingChangeList;
private HttpPost mPendingPost = null;
// The HttpPost in progress
private volatile HttpPost mPendingPost = null;
// The ping time (in seconds)
private int mPingHeartbeat = PING_STARTING_HEARTBEAT;
// The longest successful ping heartbeat
@ -221,25 +226,69 @@ public class EasSyncService extends AbstractSyncService {
}
@Override
public void alarm() {
/**
* Try to wake up a sync thread that is waiting on an HttpClient POST and has waited past its
* socket timeout without having thrown an Exception
*
* @return true if the POST was successfully stopped; false if we've failed and interrupted
* the thread
*/
public boolean alarm() {
HttpPost post;
String threadName = mThread.getName();
// Synchronize here so that we are guaranteed to have valid mPendingPost and mPostLock
// executePostWithTimeout (which executes the HttpPost) also uses this lock
synchronized(getSynchronizer()) {
if (mPendingPost != null) {
URI uri = mPendingPost.getURI();
if (uri != null) {
String query = uri.getQuery();
if (query == null) {
query = "POST";
// Get a reference to the current post lock
post = mPendingPost;
if (post != null) {
if (Eas.USER_LOG) {
URI uri = post.getURI();
if (uri != null) {
String query = uri.getQuery();
if (query == null) {
query = "POST";
}
userLog(threadName, ": Alert, aborting ", query);
} else {
userLog(threadName, ": Alert, no URI?");
}
userLog("Alert, aborting " + query);
} else {
userLog("Alert, no URI?");
}
// Abort the POST
mPostAborted = true;
mPendingPost.abort();
post.abort();
} else {
// If there's no POST, we're done
userLog("Alert, no pending POST");
return true;
}
}
// Wait for the POST to finish
try {
Thread.sleep(POST_LOCK_TIMEOUT);
} catch (InterruptedException e) {
}
State s = mThread.getState();
if (Eas.USER_LOG) {
userLog(threadName + ": State = " + s.name());
}
synchronized (getSynchronizer()) {
// If the thread is still hanging around and the same post is pending, let's try to
// stop the thread with an interrupt.
if ((s != State.TERMINATED) && (mPendingPost != null) && (mPendingPost == post)) {
mStop = true;
mThread.interrupt();
userLog("Interrupting...");
// Let the caller know we had to interrupt the thread
return false;
}
}
// Let the caller know that the alarm was handled normally
return true;
}
@Override

View File

@ -1431,7 +1431,14 @@ public class SyncManager extends Service implements Runnable {
}
service.mAccount = Account.restoreAccountWithId(INSTANCE, m.mAccountKey);
service.mMailbox = m;
service.alarm();
// Send the alarm to the sync service
if (!service.alarm()) {
// A false return means that we were forced to interrupt the thread
// In this case, we release the mailbox so that we can start another
// thread to do the work
log("Alarm failed; releasing mailbox");
syncManager.releaseMailbox(id);
}
}
}}).start();
}