From 02cf75e0ca7e37c0f92b31d965eb41006d849962 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Tue, 3 May 2011 16:21:41 -0700 Subject: [PATCH 1/4] Fix a race-condtion in SurfaceFlinger that could lead to a crash. Client::mLayers could be accessed from different threads. On one side from Client::attachLayer() which is currently called from a binder thread; on the other side from Client::detachLayer() which is always called from the main thread. This could lead to a corruption of Client::mLayers. We fix this issue by adding an internal lock to Client. Bug: 4483046 Change-Id: I5262bf1124d9a65ec6f8ffd8e367356fc33a7536 --- services/surfaceflinger/SurfaceFlinger.cpp | 20 ++++++++++++-------- services/surfaceflinger/SurfaceFlinger.h | 12 +++++++++--- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp index c08e2c941..9f007b314 100644 --- a/services/surfaceflinger/SurfaceFlinger.cpp +++ b/services/surfaceflinger/SurfaceFlinger.cpp @@ -1030,15 +1030,15 @@ status_t SurfaceFlinger::addLayer_l(const sp& layer) ssize_t SurfaceFlinger::addClientLayer(const sp& client, const sp& lbc) { - Mutex::Autolock _l(mStateLock); - // attach this layer to the client - ssize_t name = client->attachLayer(lbc); + size_t name = client->attachLayer(lbc); + + Mutex::Autolock _l(mStateLock); // add this layer to the current state list addLayer_l(lbc); - return name; + return ssize_t(name); } status_t SurfaceFlinger::removeLayer(const sp& layer) @@ -2253,15 +2253,17 @@ status_t Client::initCheck() const { return NO_ERROR; } -ssize_t Client::attachLayer(const sp& layer) +size_t Client::attachLayer(const sp& layer) { - int32_t name = android_atomic_inc(&mNameGenerator); + Mutex::Autolock _l(mLock); + size_t name = mNameGenerator++; mLayers.add(name, layer); return name; } void Client::detachLayer(const LayerBaseClient* layer) { + Mutex::Autolock _l(mLock); // we do a linear search here, because this doesn't happen often const size_t count = mLayers.size(); for (size_t i=0 ; i Client::getLayerUser(int32_t i) const { +sp Client::getLayerUser(int32_t i) const +{ + Mutex::Autolock _l(mLock); sp lbc; - const wp& layer(mLayers.valueFor(i)); + wp layer(mLayers.valueFor(i)); if (layer != 0) { lbc = layer.promote(); LOGE_IF(lbc==0, "getLayerUser(name=%d) is dead", int(i)); diff --git a/services/surfaceflinger/SurfaceFlinger.h b/services/surfaceflinger/SurfaceFlinger.h index df1ca4892..bb89f4314 100644 --- a/services/surfaceflinger/SurfaceFlinger.h +++ b/services/surfaceflinger/SurfaceFlinger.h @@ -66,7 +66,7 @@ public: status_t initCheck() const; // protected by SurfaceFlinger::mStateLock - ssize_t attachLayer(const sp& layer); + size_t attachLayer(const sp& layer); void detachLayer(const LayerBaseClient* layer); sp getLayerUser(int32_t i) const; @@ -82,9 +82,15 @@ private: virtual status_t destroySurface(SurfaceID surfaceId); virtual status_t setState(int32_t count, const layer_state_t* states); - DefaultKeyedVector< size_t, wp > mLayers; + // constant sp mFlinger; - int32_t mNameGenerator; + + // protected by mLock + DefaultKeyedVector< size_t, wp > mLayers; + size_t mNameGenerator; + + // thread-safe + mutable Mutex mLock; }; class UserClient : public BnSurfaceComposerClient From 34cd9b72f1474ccbfd2338049d81a24bfa814f35 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Tue, 3 May 2011 17:04:02 -0700 Subject: [PATCH 2/4] Fix a race in SurfaceFlinger that could cause layers to be leaked forever. The transaction flags were atomically read-and-cleared to determine if a transaction was needed, in the later case, mStateLock was taken to keep the current state still during the transaction. This left a small window open, where a layer could be removed after the transaction flags were checked but before the transaction was started holding the lock. In that situation eTraversalNeeded would be set but only seen during the next transaction cycle; however, because we're handling this transaction (because of another flag) it will be commited, "loosing" the information about the layer being removed -- so when the next transaction cycle due to eTraversalNeeded starts, it won't notice that layers have been removed and won't populated the ditchedLayers array. Bug: 4483049 Change-Id: Ibb5989312f871339928ee1aa3f9567770d72969b --- services/surfaceflinger/SurfaceFlinger.cpp | 17 ++++++++++++++++- services/surfaceflinger/SurfaceFlinger.h | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp index 9f007b314..5314cb09e 100644 --- a/services/surfaceflinger/SurfaceFlinger.cpp +++ b/services/surfaceflinger/SurfaceFlinger.cpp @@ -366,7 +366,7 @@ bool SurfaceFlinger::threadLoop() if (LIKELY(mTransactionCount == 0)) { // if we're in a global transaction, don't do anything. const uint32_t mask = eTransactionNeeded | eTraversalNeeded; - uint32_t transactionFlags = getTransactionFlags(mask); + uint32_t transactionFlags = peekTransactionFlags(mask); if (LIKELY(transactionFlags)) { handleTransaction(transactionFlags); } @@ -477,7 +477,17 @@ void SurfaceFlinger::handleTransaction(uint32_t transactionFlags) Mutex::Autolock _l(mStateLock); const nsecs_t now = systemTime(); mDebugInTransaction = now; + + // Here we're guaranteed that some transaction flags are set + // so we can call handleTransactionLocked() unconditionally. + // We call getTransactionFlags(), which will also clear the flags, + // with mStateLock held to guarantee that mCurrentState won't change + // until the transaction is commited. + + const uint32_t mask = eTransactionNeeded | eTraversalNeeded; + transactionFlags = getTransactionFlags(mask); handleTransactionLocked(transactionFlags, ditchedLayers); + mLastTransactionTime = systemTime() - now; mDebugInTransaction = 0; // here the transaction has been committed @@ -1085,6 +1095,11 @@ status_t SurfaceFlinger::invalidateLayerVisibility(const sp& layer) return NO_ERROR; } +uint32_t SurfaceFlinger::peekTransactionFlags(uint32_t flags) +{ + return android_atomic_release_load(&mTransactionFlags); +} + uint32_t SurfaceFlinger::getTransactionFlags(uint32_t flags) { return android_atomic_and(~flags, &mTransactionFlags) & flags; diff --git a/services/surfaceflinger/SurfaceFlinger.h b/services/surfaceflinger/SurfaceFlinger.h index bb89f4314..a76a1c118 100644 --- a/services/surfaceflinger/SurfaceFlinger.h +++ b/services/surfaceflinger/SurfaceFlinger.h @@ -325,6 +325,7 @@ private: status_t purgatorizeLayer_l(const sp& layer); uint32_t getTransactionFlags(uint32_t flags); + uint32_t peekTransactionFlags(uint32_t flags); uint32_t setTransactionFlags(uint32_t flags); void commitTransaction(); From 3d6881f3940dd19268006e71dd2ed9896588dfb6 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Thu, 19 May 2011 18:03:31 -0700 Subject: [PATCH 3/4] RefBase subclasses can now decide how they want to be destroyed. This adds a destroy() virtual on RefBase which sublasses can implement. destroy() is called in lieu of the destructor whenthe last strong ref goes away. Bug: 4483050 Change-Id: I8cbf6044a6fd3f01043a45592b5a60fa1e5fade2 --- include/utils/RefBase.h | 9 ++++++++- libs/utils/RefBase.cpp | 12 +++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/utils/RefBase.h b/include/utils/RefBase.h index c24c0dbcb..1b4a31000 100644 --- a/include/utils/RefBase.h +++ b/include/utils/RefBase.h @@ -115,7 +115,14 @@ public: protected: RefBase(); virtual ~RefBase(); - + + // called when the last reference goes away. this is responsible for + // calling the destructor. The default implementation just does + // "delete this;". + // Make sure to never acquire a strong reference from this function. The + // same restrictions than for destructors apply. + virtual void destroy() const; + //! Flags for extendObjectLifetime() enum { OBJECT_LIFETIME_WEAK = 0x0001, diff --git a/libs/utils/RefBase.cpp b/libs/utils/RefBase.cpp index 0bd1af4eb..c68e32a0d 100644 --- a/libs/utils/RefBase.cpp +++ b/libs/utils/RefBase.cpp @@ -298,6 +298,10 @@ void RefBase::incStrong(const void* id) const const_cast(this)->onFirstRef(); } +void RefBase::destroy() const { + delete this; +} + void RefBase::decStrong(const void* id) const { weakref_impl* const refs = mRefs; @@ -310,7 +314,7 @@ void RefBase::decStrong(const void* id) const if (c == 1) { const_cast(this)->onLastStrongRef(id); if ((refs->mFlags&OBJECT_LIFETIME_WEAK) != OBJECT_LIFETIME_WEAK) { - delete this; + destroy(); } } refs->removeWeakRef(id); @@ -370,7 +374,8 @@ void RefBase::weakref_type::decWeak(const void* id) if ((impl->mFlags&OBJECT_LIFETIME_WEAK) != OBJECT_LIFETIME_WEAK) { if (impl->mStrong == INITIAL_STRONG_VALUE) - delete impl->mBase; + if (impl->mBase) + impl->mBase->destroy(); else { // LOGV("Freeing refs %p of old RefBase %p\n", this, impl->mBase); delete impl; @@ -378,7 +383,8 @@ void RefBase::weakref_type::decWeak(const void* id) } else { impl->mBase->onLastWeakRef(id); if ((impl->mFlags&OBJECT_LIFETIME_FOREVER) != OBJECT_LIFETIME_FOREVER) { - delete impl->mBase; + if (impl->mBase) + impl->mBase->destroy(); } } } From b4081bb52aa71b5ffe747d3b5aabd6a2cd392a75 Mon Sep 17 00:00:00 2001 From: Mathias Agopian Date: Thu, 19 May 2011 15:38:14 -0700 Subject: [PATCH 4/4] Fix a race that could cause GL commands to be executed from the wrong thread. Bug: 4483050 Change-Id: I37f0f3156059c208c6168ee6131d0e267d823188 --- services/surfaceflinger/Layer.cpp | 20 +--- services/surfaceflinger/Layer.h | 2 +- services/surfaceflinger/LayerBase.cpp | 5 +- services/surfaceflinger/LayerBase.h | 7 +- services/surfaceflinger/SurfaceFlinger.cpp | 120 +++++++++------------ services/surfaceflinger/SurfaceFlinger.h | 13 ++- 6 files changed, 70 insertions(+), 97 deletions(-) diff --git a/services/surfaceflinger/Layer.cpp b/services/surfaceflinger/Layer.cpp index c9dcef370..4739cc307 100644 --- a/services/surfaceflinger/Layer.cpp +++ b/services/surfaceflinger/Layer.cpp @@ -76,6 +76,10 @@ Layer::~Layer() } } +void Layer::destroy() const { + mFlinger->destroyLayer(this); +} + status_t Layer::setToken(const sp& userClient, SharedClient* sharedClient, int32_t token) { @@ -123,22 +127,6 @@ sp Layer::createSurface() const return mSurface; } -status_t Layer::ditch() -{ - // NOTE: Called from the main UI thread - - // the layer is not on screen anymore. free as much resources as possible - mFreezeLock.clear(); - - EGLDisplay dpy(mFlinger->graphicPlane(0).getEGLDisplay()); - mBufferManager.destroy(dpy); - mSurface.clear(); - - Mutex::Autolock _l(mLock); - mWidth = mHeight = 0; - return NO_ERROR; -} - status_t Layer::setBuffers( uint32_t w, uint32_t h, PixelFormat format, uint32_t flags) { diff --git a/services/surfaceflinger/Layer.h b/services/surfaceflinger/Layer.h index 7bb207a1f..51b52f9eb 100644 --- a/services/surfaceflinger/Layer.h +++ b/services/surfaceflinger/Layer.h @@ -78,7 +78,6 @@ public: virtual bool needsFiltering() const; virtual bool isSecure() const { return mSecure; } virtual sp createSurface() const; - virtual status_t ditch(); virtual void onRemoved(); virtual bool setBypass(bool enable); @@ -95,6 +94,7 @@ public: return mFreezeLock; } protected: + virtual void destroy() const; virtual void dump(String8& result, char* scratch, size_t size) const; private: diff --git a/services/surfaceflinger/LayerBase.cpp b/services/surfaceflinger/LayerBase.cpp index 21c36e184..3986fdef0 100644 --- a/services/surfaceflinger/LayerBase.cpp +++ b/services/surfaceflinger/LayerBase.cpp @@ -583,10 +583,7 @@ LayerBaseClient::Surface::~Surface() */ // destroy client resources - sp layer = getOwner(); - if (layer != 0) { - mFlinger->destroySurface(layer); - } + mFlinger->destroySurface(mOwner); } sp LayerBaseClient::Surface::getOwner() const { diff --git a/services/surfaceflinger/LayerBase.h b/services/surfaceflinger/LayerBase.h index afc5ec8d5..e69cb6a26 100644 --- a/services/surfaceflinger/LayerBase.h +++ b/services/surfaceflinger/LayerBase.h @@ -196,10 +196,6 @@ public: */ virtual bool isSecure() const { return false; } - /** Called from the main thread, when the surface is removed from the - * draw list */ - virtual status_t ditch() { return NO_ERROR; } - /** called with the state lock when the surface is removed from the * current list */ virtual void onRemoved() { }; @@ -264,7 +260,8 @@ protected: volatile int32_t mInvalidate; -protected: +public: + // called from class SurfaceFlinger virtual ~LayerBase(); private: diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp index 5314cb09e..a93d75684 100644 --- a/services/surfaceflinger/SurfaceFlinger.cpp +++ b/services/surfaceflinger/SurfaceFlinger.cpp @@ -358,6 +358,9 @@ bool SurfaceFlinger::threadLoop() { waitForEvent(); + // call Layer's destructor + handleDestroyLayers(); + // check for transactions if (UNLIKELY(mConsoleSignals)) { handleConsoleEvents(); @@ -467,47 +470,26 @@ void SurfaceFlinger::handleConsoleEvents() void SurfaceFlinger::handleTransaction(uint32_t transactionFlags) { - Vector< sp > ditchedLayers; + Mutex::Autolock _l(mStateLock); + const nsecs_t now = systemTime(); + mDebugInTransaction = now; - /* - * Perform and commit the transaction - */ + // Here we're guaranteed that some transaction flags are set + // so we can call handleTransactionLocked() unconditionally. + // We call getTransactionFlags(), which will also clear the flags, + // with mStateLock held to guarantee that mCurrentState won't change + // until the transaction is committed. - { // scope for the lock - Mutex::Autolock _l(mStateLock); - const nsecs_t now = systemTime(); - mDebugInTransaction = now; + const uint32_t mask = eTransactionNeeded | eTraversalNeeded; + transactionFlags = getTransactionFlags(mask); + handleTransactionLocked(transactionFlags); - // Here we're guaranteed that some transaction flags are set - // so we can call handleTransactionLocked() unconditionally. - // We call getTransactionFlags(), which will also clear the flags, - // with mStateLock held to guarantee that mCurrentState won't change - // until the transaction is commited. - - const uint32_t mask = eTransactionNeeded | eTraversalNeeded; - transactionFlags = getTransactionFlags(mask); - handleTransactionLocked(transactionFlags, ditchedLayers); - - mLastTransactionTime = systemTime() - now; - mDebugInTransaction = 0; - // here the transaction has been committed - } - - /* - * Clean-up all layers that went away - * (do this without the lock held) - */ - const size_t count = ditchedLayers.size(); - for (size_t i=0 ; iditch(); - } - } + mLastTransactionTime = systemTime() - now; + mDebugInTransaction = 0; + // here the transaction has been committed } -void SurfaceFlinger::handleTransactionLocked( - uint32_t transactionFlags, Vector< sp >& ditchedLayers) +void SurfaceFlinger::handleTransactionLocked(uint32_t transactionFlags) { const LayerVector& currentLayers(mCurrentState.layersSortedByZ); const size_t count = currentLayers.size(); @@ -579,7 +561,6 @@ void SurfaceFlinger::handleTransactionLocked( const sp& layer(previousLayers[i]); if (currentLayers.indexOf( layer ) < 0) { // this layer is not visible anymore - ditchedLayers.add(layer); mDirtyRegionRemovedLayer.orSelf(layer->visibleRegionScreen); } } @@ -589,6 +570,31 @@ void SurfaceFlinger::handleTransactionLocked( commitTransaction(); } +void SurfaceFlinger::destroyLayer(LayerBase const* layer) +{ + Mutex::Autolock _l(mDestroyedLayerLock); + mDestroyedLayers.add(layer); + signalEvent(); +} + +void SurfaceFlinger::handleDestroyLayers() +{ + Vector destroyedLayers; + + { // scope for the lock + Mutex::Autolock _l(mDestroyedLayerLock); + destroyedLayers = mDestroyedLayers; + mDestroyedLayers.clear(); + } + + // call destructors without a lock held + const size_t count = destroyedLayers.size(); + for (size_t i=0 ; igetName().string()); + delete destroyedLayers[i]; + } +} + sp SurfaceFlinger::getFreezeLock() const { return new FreezeLock(const_cast(this)); @@ -1329,38 +1335,18 @@ status_t SurfaceFlinger::removeSurface(const sp& client, SurfaceID sid) return err; } -status_t SurfaceFlinger::destroySurface(const sp& layer) +status_t SurfaceFlinger::destroySurface(const wp& layer) { // called by ~ISurface() when all references are gone - - class MessageDestroySurface : public MessageBase { - SurfaceFlinger* flinger; - sp layer; - public: - MessageDestroySurface( - SurfaceFlinger* flinger, const sp& layer) - : flinger(flinger), layer(layer) { } - virtual bool handler() { - sp l(layer); - layer.clear(); // clear it outside of the lock; - Mutex::Autolock _l(flinger->mStateLock); - /* - * remove the layer from the current list -- chances are that it's - * not in the list anyway, because it should have been removed - * already upon request of the client (eg: window manager). - * However, a buggy client could have not done that. - * Since we know we don't have any more clients, we don't need - * to use the purgatory. - */ - status_t err = flinger->removeLayer_l(l); - LOGE_IF(err<0 && err != NAME_NOT_FOUND, - "error removing layer=%p (%s)", l.get(), strerror(-err)); - return true; - } - }; - - postMessageAsync( new MessageDestroySurface(this, layer) ); - return NO_ERROR; + status_t err = NO_ERROR; + sp l(layer.promote()); + if (l != NULL) { + Mutex::Autolock _l(mStateLock); + err = removeLayer_l(l); + LOGE_IF(err<0 && err != NAME_NOT_FOUND, + "error removing layer=%p (%s)", l.get(), strerror(-err)); + } + return err; } status_t SurfaceFlinger::setClientState( diff --git a/services/surfaceflinger/SurfaceFlinger.h b/services/surfaceflinger/SurfaceFlinger.h index a76a1c118..9fa98cf2e 100644 --- a/services/surfaceflinger/SurfaceFlinger.h +++ b/services/surfaceflinger/SurfaceFlinger.h @@ -218,6 +218,7 @@ public: status_t removeLayer(const sp& layer); status_t addLayer(const sp& layer); status_t invalidateLayerVisibility(const sp& layer); + void destroyLayer(LayerBase const* layer); sp getLayer(const sp& sur) const; @@ -255,7 +256,7 @@ private: uint32_t w, uint32_t h, uint32_t flags); status_t removeSurface(const sp& client, SurfaceID sid); - status_t destroySurface(const sp& layer); + status_t destroySurface(const wp& layer); status_t setClientState(const sp& client, int32_t count, const layer_state_t* states); @@ -300,9 +301,8 @@ public: // hack to work around gcc 4.0.3 bug private: void handleConsoleEvents(); void handleTransaction(uint32_t transactionFlags); - void handleTransactionLocked( - uint32_t transactionFlags, - Vector< sp >& ditchedLayers); + void handleTransactionLocked(uint32_t transactionFlags); + void handleDestroyLayers(); void computeVisibleRegions( LayerVector& currentLayers, @@ -422,6 +422,11 @@ private: // these are thread safe mutable Barrier mReadyToRunBarrier; + + // protected by mDestroyedLayerLock; + mutable Mutex mDestroyedLayerLock; + Vector mDestroyedLayers; + // atomic variables enum { eConsoleReleased = 1,