improve GLES jumptables

in the common case this saves one instructions per jump
(which will help with the i-cache).

this change also gets rid of the "use slow tls" option,
which was useless. So at least now architectures that don't have
assembly bindings will perform much better.

Change-Id: I31be6c06ad2136b50ef3a1ac14682d7812ad40d2
This commit is contained in:
Mathias Agopian 2013-06-14 19:08:36 -07:00
parent 23e81a2103
commit e0ea89ceef
6 changed files with 50 additions and 133 deletions

View File

@ -230,9 +230,6 @@ static int gl_no_context() {
static void early_egl_init(void)
{
#if !USE_FAST_TLS_KEY
pthread_key_create(&gGLWrapperKey, NULL);
#endif
#if EGL_TRACE
pthread_key_create(&gGLTraceKey, NULL);
initEglTraceLevel();
@ -341,42 +338,11 @@ void gl_noop() {
// ----------------------------------------------------------------------------
#if USE_FAST_TLS_KEY
// We have a dedicated TLS slot in bionic
static inline gl_hooks_t const * volatile * get_tls_hooks() {
volatile void *tls_base = __get_tls();
gl_hooks_t const * volatile * tls_hooks =
reinterpret_cast<gl_hooks_t const * volatile *>(tls_base);
return tls_hooks;
}
void setGlThreadSpecific(gl_hooks_t const *value) {
gl_hooks_t const * volatile * tls_hooks = get_tls_hooks();
tls_hooks[TLS_SLOT_OPENGL_API] = value;
}
gl_hooks_t const* getGlThreadSpecific() {
gl_hooks_t const * volatile * tls_hooks = get_tls_hooks();
gl_hooks_t const* hooks = tls_hooks[TLS_SLOT_OPENGL_API];
if (hooks) return hooks;
return &gHooksNoContext;
}
#else
void setGlThreadSpecific(gl_hooks_t const *value) {
pthread_setspecific(gGLWrapperKey, value);
}
gl_hooks_t const* getGlThreadSpecific() {
gl_hooks_t const* hooks = static_cast<gl_hooks_t*>(pthread_getspecific(gGLWrapperKey));
if (hooks) return hooks;
return &gHooksNoContext;
}
#endif
// ----------------------------------------------------------------------------
// GL / EGL hooks
// ----------------------------------------------------------------------------

View File

@ -849,9 +849,7 @@ __eglMustCastToProperFunctionPointerType eglGetProcAddress(const char *procname)
}
if (found) {
#if USE_FAST_TLS_KEY
addr = gExtensionForwarders[slot];
#endif
sGLExtentionMap.add(name, addr);
sGLExtentionSlot++;
}

View File

@ -34,9 +34,7 @@ namespace android {
#undef GL_EXTENSION_LIST
#undef GET_TLS
#if USE_FAST_TLS_KEY
#if defined(__arm__)
#if defined(__arm__)
#define GET_TLS(reg) "mrc p15, 0, " #reg ", c13, c0, 3 \n"
@ -58,7 +56,7 @@ namespace android {
: \
);
#elif defined(__mips__)
#elif defined(__mips__)
#define API_ENTRY(_api) __attribute__((noinline)) _api
@ -88,27 +86,21 @@ namespace android {
ext.extensions[_api])) \
: \
);
#endif
#else
#error Unsupported architecture
#endif
#if defined(CALL_GL_EXTENSION_API)
#define GL_EXTENSION_NAME(_n) __glExtFwd##_n
#define GL_EXTENSION(_n) \
void API_ENTRY(GL_EXTENSION_NAME(_n))() { \
CALL_GL_EXTENSION_API(_n); \
}
#else
#define GL_EXTENSION_NAME(_n) NULL
#define GL_EXTENSION_NAME(_n) NULL
#define GL_EXTENSION(_n)
#warning "eglGetProcAddress() partially supported"
#define GL_EXTENSION(_n)
#warning "eglGetProcAddress() partially supported"
#endif

View File

@ -40,13 +40,11 @@ using namespace android;
#undef CALL_GL_API
#undef CALL_GL_API_RETURN
#if USE_FAST_TLS_KEY
#if defined(__arm__)
#if defined(__arm__) && !USE_SLOW_BINDING
#define GET_TLS(reg) "mrc p15, 0, " #reg ", c13, c0, 3 \n"
#define API_ENTRY(_api) __attribute__((naked)) _api
#define API_ENTRY(_api) __attribute__((noinline)) _api
#define CALL_GL_API(_api, ...) \
asm volatile( \
@ -54,15 +52,13 @@ using namespace android;
"ldr r12, [r12, %[tls]] \n" \
"cmp r12, #0 \n" \
"ldrne pc, [r12, %[api]] \n" \
"mov r0, #0 \n" \
"bx lr \n" \
: \
: [tls] "J"(TLS_SLOT_OPENGL_API*4), \
[api] "J"(__builtin_offsetof(gl_hooks_t, gl._api)) \
: \
);
#elif defined(__mips__)
#elif defined(__mips__) && !USE_SLOW_BINDING
#define API_ENTRY(_api) __attribute__((noinline)) _api
@ -94,30 +90,21 @@ using namespace android;
: \
);
#else
#error Unsupported architecture
#endif
#define CALL_GL_API_RETURN(_api, ...) \
CALL_GL_API(_api, __VA_ARGS__) \
return 0; // placate gcc's warnings. never reached.
#else
#define API_ENTRY(_api) _api
#define CALL_GL_API(_api, ...) \
gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl; \
_c->_api(__VA_ARGS__);
#define CALL_GL_API_RETURN(_api, ...) \
gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl; \
return _c->_api(__VA_ARGS__)
if (_c) return _c->_api(__VA_ARGS__);
#endif
#define CALL_GL_API_RETURN(_api, ...) \
CALL_GL_API(_api, __VA_ARGS__) \
return 0;
extern "C" {
#include "gl3_api.in"
@ -139,7 +126,8 @@ const GLubyte * glGetString(GLenum name)
{
const GLubyte * ret = egl_get_string_for_current_context(name);
if (ret == NULL) {
ret = __glGetString(name);
gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;
ret = _c->glGetString(name);
}
return ret;
}

View File

@ -31,9 +31,6 @@
using namespace android;
// set this to 1 for crude GL debugging
#define CHECK_FOR_GL_ERRORS 0
// ----------------------------------------------------------------------------
// extensions for the framework
// ----------------------------------------------------------------------------
@ -95,13 +92,11 @@ GL_API void GL_APIENTRY glWeightPointerOESBounds(GLint size, GLenum type,
#undef CALL_GL_API
#undef CALL_GL_API_RETURN
#if USE_FAST_TLS_KEY && !CHECK_FOR_GL_ERRORS
#if defined(__arm__)
#if defined(__arm__) && !USE_SLOW_BINDING
#define GET_TLS(reg) "mrc p15, 0, " #reg ", c13, c0, 3 \n"
#define API_ENTRY(_api) __attribute__((naked)) _api
#define API_ENTRY(_api) __attribute__((noinline)) _api
#define CALL_GL_API(_api, ...) \
asm volatile( \
@ -109,15 +104,13 @@ GL_API void GL_APIENTRY glWeightPointerOESBounds(GLint size, GLenum type,
"ldr r12, [r12, %[tls]] \n" \
"cmp r12, #0 \n" \
"ldrne pc, [r12, %[api]] \n" \
"mov r0, #0 \n" \
"bx lr \n" \
: \
: [tls] "J"(TLS_SLOT_OPENGL_API*4), \
[api] "J"(__builtin_offsetof(gl_hooks_t, gl._api)) \
: \
);
#elif defined(__mips__)
#elif defined(__mips__) && !USE_SLOW_BINDING
#define API_ENTRY(_api) __attribute__((noinline)) _api
@ -149,43 +142,20 @@ GL_API void GL_APIENTRY glWeightPointerOESBounds(GLint size, GLenum type,
: \
);
#else
#error Unsupported architecture
#endif
#define CALL_GL_API_RETURN(_api, ...) \
CALL_GL_API(_api, __VA_ARGS__) \
return 0; // placate gcc's warnings. never reached.
#else
#if CHECK_FOR_GL_ERRORS
#define CHECK_GL_ERRORS(_api) \
do { GLint err = glGetError(); \
ALOGE_IF(err != GL_NO_ERROR, "%s failed (0x%04X)", #_api, err); \
} while(false);
#else
#define CHECK_GL_ERRORS(_api) do { } while(false);
#endif
#define API_ENTRY(_api) _api
#define CALL_GL_API(_api, ...) \
gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl; \
_c->_api(__VA_ARGS__); \
CHECK_GL_ERRORS(_api)
#define CALL_GL_API_RETURN(_api, ...) \
gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl; \
return _c->_api(__VA_ARGS__)
#define CALL_GL_API(_api, ...) \
gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl; \
if (_c) return _c->_api(__VA_ARGS__);
#endif
#define CALL_GL_API_RETURN(_api, ...) \
CALL_GL_API(_api, __VA_ARGS__) \
return 0;
extern "C" {
#include "gl_api.in"
@ -202,11 +172,11 @@ extern "C" {
extern "C" const GLubyte * __glGetString(GLenum name);
const GLubyte * glGetString(GLenum name)
{
const GLubyte * glGetString(GLenum name) {
const GLubyte * ret = egl_get_string_for_current_context(name);
if (ret == NULL) {
ret = __glGetString(name);
gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;
ret = _c->glGetString(name);
}
return ret;
}

View File

@ -32,13 +32,11 @@
#include <GLES3/gl3.h>
#include <GLES3/gl3ext.h>
#if !defined(__arm__) && !defined(__mips__)
#define USE_SLOW_BINDING 1
#else
#define USE_SLOW_BINDING 0
#endif
// set to 1 for debugging
#define USE_SLOW_BINDING 0
#undef NELEM
#define NELEM(x) (sizeof(x)/sizeof(*(x)))
#define NELEM(x) (sizeof(x)/sizeof(*(x)))
// maximum number of GL extensions that can be used simultaneously in
// a given process. this limitation exists because we need to have
@ -47,15 +45,7 @@
#define MAX_NUMBER_OF_GL_EXTENSIONS 256
#if defined(HAVE_ANDROID_OS) && !USE_SLOW_BINDING && __OPTIMIZE__
#define USE_FAST_TLS_KEY 1
#else
#define USE_FAST_TLS_KEY 0
#endif
#if USE_FAST_TLS_KEY
# include <bionic_tls.h> /* special private C library header */
#endif
#include <bionic_tls.h> /* special private C library header */
// ----------------------------------------------------------------------------
namespace android {
@ -84,7 +74,20 @@ struct gl_hooks_t {
#undef EGL_ENTRY
EGLAPI void setGlThreadSpecific(gl_hooks_t const *value);
EGLAPI gl_hooks_t const* getGlThreadSpecific();
// We have a dedicated TLS slot in bionic
inline gl_hooks_t const * volatile * get_tls_hooks() {
volatile void *tls_base = __get_tls();
gl_hooks_t const * volatile * tls_hooks =
reinterpret_cast<gl_hooks_t const * volatile *>(tls_base);
return tls_hooks;
}
inline EGLAPI gl_hooks_t const* getGlThreadSpecific() {
gl_hooks_t const * volatile * tls_hooks = get_tls_hooks();
gl_hooks_t const* hooks = tls_hooks[TLS_SLOT_OPENGL_API];
return hooks;
}
// ----------------------------------------------------------------------------
}; // namespace android