summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2008-03-19 18:17:10 -0400
committerAlex Deucher <alex@samba.(none)>2008-03-19 18:17:10 -0400
commit17cd42ed31814ba329a6a68edd0d75390a7da40e (patch)
tree7fbf87fdf2e6369bf0ad99aba0d91da21589dbaa
parentc83827b4d2b6f03c54429e757a756eb99ff8be6b (diff)
Enable BSR in Log2 functions
This patch edits RADEONLog2 and ATILog2 to use the x86 BSR instruction instead of looping through bits. It should provide a somewhat of a speed increase in this function on x86 and AMD64 architectures. Note: the BSR instruction was added with the 80386 CPU and is therefore not compatible with earlier CPUs, though I highly doubt it's even possible to use a 286 in conjunction with a Radeon. The inline assembly also works with Intel's compiler (icc).
-rw-r--r--src/radeon_exa.c9
-rw-r--r--src/radeon_render.c9
2 files changed, 16 insertions, 2 deletions
diff --git a/src/radeon_exa.c b/src/radeon_exa.c
index 4da4841a..a6ededa2 100644
--- a/src/radeon_exa.c
+++ b/src/radeon_exa.c
@@ -99,10 +99,17 @@ static __inline__ int
RADEONLog2(int val)
{
int bits;
-
+#if (defined __i386__ || defined __x86_64__) && (defined __GNUC__)
+ __asm volatile("bsrl %1, %0"
+ : "=r" (bits)
+ : "c" (val)
+ );
+ return bits;
+#else
for (bits = 0; val != 0; val >>= 1, ++bits)
;
return bits - 1;
+#endif
}
static __inline__ CARD32 F_TO_DW(float val)
diff --git a/src/radeon_render.c b/src/radeon_render.c
index a80d1360..950753ca 100644
--- a/src/radeon_render.c
+++ b/src/radeon_render.c
@@ -250,10 +250,17 @@ static __inline__ int
ATILog2(int val)
{
int bits;
-
+#if (defined __i386__ || defined __x86_64__) && (defined __GNUC__)
+ __asm volatile("bsrl %1, %0"
+ : "=r" (bits)
+ : "c" (val)
+ );
+ return bits;
+#else
for (bits = 0; val != 0; val >>= 1, ++bits)
;
return bits - 1;
+#endif
}
static void