diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2015-04-01 23:00:57 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2015-04-01 23:13:29 +0100 |
commit | e47eb0c5e588a6cfc4e6f12824814f11e802ed51 (patch) | |
tree | 1e0b68d82aa4bccbcf6d90c044986c919e72e4bb /src | |
parent | 7df58456b8d99a9953a871c656657cbc923dc238 (diff) |
sna: Don't unroll BLT points
The compiler is smarter than I am; unrolling hurts here.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src')
-rw-r--r-- | src/sna/sna_accel.c | 42 | ||||
-rw-r--r-- | src/sna/sna_blt.c | 66 |
2 files changed, 19 insertions, 89 deletions
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index 6fa15b29..a11a77d5 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -4593,7 +4593,7 @@ static inline bool box32_trim_and_translate(Box32Rec *box, DrawablePtr d, GCPtr return box32_clip(box, gc); } -static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y) +static inline void box_add_xy(BoxPtr box, int16_t x, int16_t y) { if (box->x1 > x) box->x1 = x; @@ -4606,6 +4606,11 @@ static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y) box->y2 = y; } +static inline void box_add_pt(BoxPtr box, const DDXPointRec *pt) +{ + box_add_xy(box, pt->x, pt->y); +} + static inline bool box32_to_box16(const Box32Rec *b32, BoxRec *b16) { b16->x1 = b32->x1; @@ -8968,36 +8973,11 @@ sna_poly_point_extents(DrawablePtr drawable, GCPtr gc, last.x += pt->x; last.y += pt->y; pt++; - box_add_pt(&box, last.x, last.y); + box_add_xy(&box, last.x, last.y); } } else { - --n; ++pt; - while (n >= 8) { - box_add_pt(&box, pt[0].x, pt[0].y); - box_add_pt(&box, pt[1].x, pt[1].y); - box_add_pt(&box, pt[2].x, pt[2].y); - box_add_pt(&box, pt[3].x, pt[3].y); - box_add_pt(&box, pt[4].x, pt[4].y); - box_add_pt(&box, pt[5].x, pt[5].y); - box_add_pt(&box, pt[6].x, pt[6].y); - box_add_pt(&box, pt[7].x, pt[7].y); - pt += 8; - n -= 8; - } - if (n & 4) { - box_add_pt(&box, pt[0].x, pt[0].y); - box_add_pt(&box, pt[1].x, pt[1].y); - box_add_pt(&box, pt[2].x, pt[2].y); - box_add_pt(&box, pt[3].x, pt[3].y); - pt += 4; - } - if (n & 2) { - box_add_pt(&box, pt[0].x, pt[0].y); - box_add_pt(&box, pt[1].x, pt[1].y); - pt += 2; - } - if (n & 1) - box_add_pt(&box, pt[0].x, pt[0].y); + while (--n) + box_add_pt(&box, ++pt); } box.x2++; box.y2++; @@ -9709,7 +9689,7 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc, y += pt->y; if (blt) blt &= pt->x == 0 || pt->y == 0; - box_add_pt(&box, x, y); + box_add_xy(&box, x, y); } } else { int x = box.x1; @@ -9721,7 +9701,7 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc, x = pt->x; y = pt->y; } - box_add_pt(&box, pt->x, pt->y); + box_add_pt(&box, pt); } } box.x2++; diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c index 59b8141c..b62d7fd7 100644 --- a/src/sna/sna_blt.c +++ b/src/sna/sna_blt.c @@ -3185,65 +3185,15 @@ fastcall static void sna_blt_fill_op_points(struct sna *sna, assert(kgem->nbatch < kgem->surface); if ((dx|dy) == 0) { - while (n_this_time >= 8) { - *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); - *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); - *((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0); - *((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0); - *((uint64_t *)b + 4) = pt_add(cmd, p+4, 0, 0); - *((uint64_t *)b + 5) = pt_add(cmd, p+5, 0, 0); - *((uint64_t *)b + 6) = pt_add(cmd, p+6, 0, 0); - *((uint64_t *)b + 7) = pt_add(cmd, p+7, 0, 0); - b += 16; - n_this_time -= 8; - p += 8; - } - if (n_this_time & 4) { - *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); - *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); - *((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0); - *((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0); - b += 8; - p += 4; - } - if (n_this_time & 2) { - *((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0); - *((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0); - b += 4; - p += 2; - } - if (n_this_time & 1) - *((uint64_t *)b + 0) = pt_add(cmd, p++, 0, 0); + do { + *(uint64_t *)b = pt_add(cmd, p++, 0, 0); + b += 2; + } while (--n_this_time); } else { - while (n_this_time >= 8) { - *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); - *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); - *((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy); - *((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy); - *((uint64_t *)b + 4) = pt_add(cmd, p+4, dx, dy); - *((uint64_t *)b + 5) = pt_add(cmd, p+5, dx, dy); - *((uint64_t *)b + 6) = pt_add(cmd, p+6, dx, dy); - *((uint64_t *)b + 7) = pt_add(cmd, p+7, dx, dy); - b += 16; - n_this_time -= 8; - p += 8; - } - if (n_this_time & 4) { - *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); - *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); - *((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy); - *((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy); - b += 8; - p += 8; - } - if (n_this_time & 2) { - *((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy); - *((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy); - b += 4; - p += 2; - } - if (n_this_time & 1) - *((uint64_t *)b + 0) = pt_add(cmd, p++, dx, dy); + do { + *(uint64_t *)b = pt_add(cmd, p++, dx, dy); + b += 2; + } while (--n_this_time); } if (!n) |