diff options
author | Matt Turner <mattst88@gmail.com> | 2008-03-19 18:17:10 -0400 |
---|---|---|
committer | Alex Deucher <alex@samba.(none)> | 2008-03-19 18:17:10 -0400 |
commit | 17cd42ed31814ba329a6a68edd0d75390a7da40e (patch) | |
tree | 7fbf87fdf2e6369bf0ad99aba0d91da21589dbaa | |
parent | c83827b4d2b6f03c54429e757a756eb99ff8be6b (diff) |
Enable BSR in Log2 functions
This patch edits RADEONLog2 and ATILog2 to use the x86 BSR instruction instead
of looping through bits. It should provide a somewhat of a speed increase in
this function on x86 and AMD64 architectures.
Note: the BSR instruction was added with the 80386 CPU and is therefore not
compatible with earlier CPUs, though I highly doubt it's even possible to use a
286 in conjunction with a Radeon.
The inline assembly also works with Intel's compiler (icc).
-rw-r--r-- | src/radeon_exa.c | 9 | ||||
-rw-r--r-- | src/radeon_render.c | 9 |
2 files changed, 16 insertions, 2 deletions
diff --git a/src/radeon_exa.c b/src/radeon_exa.c index 4da4841a..a6ededa2 100644 --- a/src/radeon_exa.c +++ b/src/radeon_exa.c @@ -99,10 +99,17 @@ static __inline__ int RADEONLog2(int val) { int bits; - +#if (defined __i386__ || defined __x86_64__) && (defined __GNUC__) + __asm volatile("bsrl %1, %0" + : "=r" (bits) + : "c" (val) + ); + return bits; +#else for (bits = 0; val != 0; val >>= 1, ++bits) ; return bits - 1; +#endif } static __inline__ CARD32 F_TO_DW(float val) diff --git a/src/radeon_render.c b/src/radeon_render.c index a80d1360..950753ca 100644 --- a/src/radeon_render.c +++ b/src/radeon_render.c @@ -250,10 +250,17 @@ static __inline__ int ATILog2(int val) { int bits; - +#if (defined __i386__ || defined __x86_64__) && (defined __GNUC__) + __asm volatile("bsrl %1, %0" + : "=r" (bits) + : "c" (val) + ); + return bits; +#else for (bits = 0; val != 0; val >>= 1, ++bits) ; return bits - 1; +#endif } static void |