summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2015-04-01 23:00:57 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2015-04-01 23:13:29 +0100
commite47eb0c5e588a6cfc4e6f12824814f11e802ed51 (patch)
tree1e0b68d82aa4bccbcf6d90c044986c919e72e4bb /src
parent7df58456b8d99a9953a871c656657cbc923dc238 (diff)
sna: Don't unroll BLT points
The compiler is smarter than I am; unrolling hurts here. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src')
-rw-r--r--src/sna/sna_accel.c42
-rw-r--r--src/sna/sna_blt.c66
2 files changed, 19 insertions, 89 deletions
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 6fa15b29..a11a77d5 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4593,7 +4593,7 @@ static inline bool box32_trim_and_translate(Box32Rec *box, DrawablePtr d, GCPtr
return box32_clip(box, gc);
}
-static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y)
+static inline void box_add_xy(BoxPtr box, int16_t x, int16_t y)
{
if (box->x1 > x)
box->x1 = x;
@@ -4606,6 +4606,11 @@ static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y)
box->y2 = y;
}
+static inline void box_add_pt(BoxPtr box, const DDXPointRec *pt)
+{
+ box_add_xy(box, pt->x, pt->y);
+}
+
static inline bool box32_to_box16(const Box32Rec *b32, BoxRec *b16)
{
b16->x1 = b32->x1;
@@ -8968,36 +8973,11 @@ sna_poly_point_extents(DrawablePtr drawable, GCPtr gc,
last.x += pt->x;
last.y += pt->y;
pt++;
- box_add_pt(&box, last.x, last.y);
+ box_add_xy(&box, last.x, last.y);
}
} else {
- --n; ++pt;
- while (n >= 8) {
- box_add_pt(&box, pt[0].x, pt[0].y);
- box_add_pt(&box, pt[1].x, pt[1].y);
- box_add_pt(&box, pt[2].x, pt[2].y);
- box_add_pt(&box, pt[3].x, pt[3].y);
- box_add_pt(&box, pt[4].x, pt[4].y);
- box_add_pt(&box, pt[5].x, pt[5].y);
- box_add_pt(&box, pt[6].x, pt[6].y);
- box_add_pt(&box, pt[7].x, pt[7].y);
- pt += 8;
- n -= 8;
- }
- if (n & 4) {
- box_add_pt(&box, pt[0].x, pt[0].y);
- box_add_pt(&box, pt[1].x, pt[1].y);
- box_add_pt(&box, pt[2].x, pt[2].y);
- box_add_pt(&box, pt[3].x, pt[3].y);
- pt += 4;
- }
- if (n & 2) {
- box_add_pt(&box, pt[0].x, pt[0].y);
- box_add_pt(&box, pt[1].x, pt[1].y);
- pt += 2;
- }
- if (n & 1)
- box_add_pt(&box, pt[0].x, pt[0].y);
+ while (--n)
+ box_add_pt(&box, ++pt);
}
box.x2++;
box.y2++;
@@ -9709,7 +9689,7 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc,
y += pt->y;
if (blt)
blt &= pt->x == 0 || pt->y == 0;
- box_add_pt(&box, x, y);
+ box_add_xy(&box, x, y);
}
} else {
int x = box.x1;
@@ -9721,7 +9701,7 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc,
x = pt->x;
y = pt->y;
}
- box_add_pt(&box, pt->x, pt->y);
+ box_add_pt(&box, pt);
}
}
box.x2++;
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 59b8141c..b62d7fd7 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -3185,65 +3185,15 @@ fastcall static void sna_blt_fill_op_points(struct sna *sna,
assert(kgem->nbatch < kgem->surface);
if ((dx|dy) == 0) {
- while (n_this_time >= 8) {
- *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0);
- *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0);
- *((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0);
- *((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0);
- *((uint64_t *)b + 4) = pt_add(cmd, p+4, 0, 0);
- *((uint64_t *)b + 5) = pt_add(cmd, p+5, 0, 0);
- *((uint64_t *)b + 6) = pt_add(cmd, p+6, 0, 0);
- *((uint64_t *)b + 7) = pt_add(cmd, p+7, 0, 0);
- b += 16;
- n_this_time -= 8;
- p += 8;
- }
- if (n_this_time & 4) {
- *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0);
- *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0);
- *((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0);
- *((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0);
- b += 8;
- p += 4;
- }
- if (n_this_time & 2) {
- *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0);
- *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0);
- b += 4;
- p += 2;
- }
- if (n_this_time & 1)
- *((uint64_t *)b + 0) = pt_add(cmd, p++, 0, 0);
+ do {
+ *(uint64_t *)b = pt_add(cmd, p++, 0, 0);
+ b += 2;
+ } while (--n_this_time);
} else {
- while (n_this_time >= 8) {
- *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy);
- *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy);
- *((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy);
- *((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy);
- *((uint64_t *)b + 4) = pt_add(cmd, p+4, dx, dy);
- *((uint64_t *)b + 5) = pt_add(cmd, p+5, dx, dy);
- *((uint64_t *)b + 6) = pt_add(cmd, p+6, dx, dy);
- *((uint64_t *)b + 7) = pt_add(cmd, p+7, dx, dy);
- b += 16;
- n_this_time -= 8;
- p += 8;
- }
- if (n_this_time & 4) {
- *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy);
- *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy);
- *((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy);
- *((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy);
- b += 8;
- p += 8;
- }
- if (n_this_time & 2) {
- *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy);
- *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy);
- b += 4;
- p += 2;
- }
- if (n_this_time & 1)
- *((uint64_t *)b + 0) = pt_add(cmd, p++, dx, dy);
+ do {
+ *(uint64_t *)b = pt_add(cmd, p++, dx, dy);
+ b += 2;
+ } while (--n_this_time);
}
if (!n)