diff options
author | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2006-11-26 20:16:04 +0000 |
---|---|---|
committer | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2006-11-26 20:16:04 +0000 |
commit | 7574a81a9689a839aa58c67363864d06cdc42e65 (patch) | |
tree | 05fd20d3023227a0fd83a41429779e779a3a951b /driver/xf86-video-sunffb/src/ffb_asm.s | |
parent | c6eab3e9811ee497c7bc59c98e0a048e8c4064e0 (diff) |
Importing xf86-video-sunffb 1.1.0
Diffstat (limited to 'driver/xf86-video-sunffb/src/ffb_asm.s')
-rw-r--r-- | driver/xf86-video-sunffb/src/ffb_asm.s | 349 |
1 files changed, 349 insertions, 0 deletions
diff --git a/driver/xf86-video-sunffb/src/ffb_asm.s b/driver/xf86-video-sunffb/src/ffb_asm.s new file mode 100644 index 000000000..dfc59be2e --- /dev/null +++ b/driver/xf86-video-sunffb/src/ffb_asm.s @@ -0,0 +1,349 @@ +/* + * ffb_asm.s: Fast Creator raster op inner loops. + * + * Copyright (C) 1999 David S. Miller (davem@redhat.com) + * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * JAKUB JELINEK OR DAVID MILLER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +/* $XFree86$ */ + +/* Hardware register offsets */ +#define FFB_BY 0x0060 +#define FFB_DY 0x0068 +#define FFB_BH 0x0070 +#define FFB_PPC 0x0200 +#define FFB_DRAWOP 0x0300 +#define FFB_UCSR 0x0900 + +#define FFB_DRAWOP_VSCROLL 0x0b + +#define FIFO_CACHE 0x00 + +#define BOX_X1 0x00 +#define BOX_Y1 0x02 +#define BOX_X2 0x04 +#define BOX_Y2 0x06 + +#define POINT_X 0x00 +#define POINT_Y 0x02 + +#define RECT_X 0x00 +#define RECT_Y 0x02 +#define RECT_W 0x04 +#define RECT_H 0x06 + +#define SEG_X1 0x00 +#define SEG_Y1 0x02 +#define SEG_X2 0x04 +#define SEG_Y2 0x06 + + .text + + .align 32 + .globl FFB_STIPPLE_LOAD + /* %o0 = &ffbregs->pattern[0] + * %o1 = &stipple->bits[0] + */ +FFB_STIPPLE_LOAD: + cmp %g0, 0 +1: ldx [%o1 + 0x00], %g1 + ldx [%o1 + 0x08], %g2 + ldx [%o1 + 0x10], %g3 + + ldx [%o1 + 0x18], %g4 + add %o0, 0x40, %o0 + ldx [%o1 + 0x20], %g5 + ldx [%o1 + 0x28], %o2 + + ldx [%o1 + 0x30], %o3 + ldx [%o1 + 0x38], %o4 + stx %g1, [%o0 - 0x40] + stx %g2, [%o0 - 0x38] + + stx %g3, [%o0 - 0x30] + stx %g4, [%o0 - 0x28] + add %o1, 0x40, %o1 + stx %g5, [%o0 - 0x20] + + stx %o2, [%o0 - 0x18] + stx %o3, [%o0 - 0x10] + stx %o4, [%o0 - 0x08] + be,pt %icc, 1b + + cmp %g0, 1 + retl + nop + +#define FIFO_WAIT(ffbregs, goal, this_label, done_label) \ +this_label: \ + lduw [ffbregs + FFB_UCSR], %g1; \ + and %g1, 0xfff, %g1; \ + subcc %g1, (4 + goal), %g1; \ + bge,pt %icc, done_label; \ + nop; \ + ba,a,pt %xcc, this_label + + .align 32 + .globl FFB_PPT_BOX_LOOP + /* This is only used (currently) by vscroll, so we put + * the creator hwbug workaround in here (writing + * the drawop each iteration). + * + * %o0 = ffbpriv, %o1 = ffbregs, + * %o2 = pbox, %o3 = pbox_last, %o4 = ppt + */ +FFB_PPT_BOX_LOOP: + lduh [%o0 + FIFO_CACHE], %g1 /* Load Group */ + sethi %hi(FFB_DRAWOP), %g2 /* IEU0 */ + cmp %o2, %o3 /* IEU1 */ + bgu,pn %icc, 2f /* CTI */ + + or %g2,%lo(FFB_DRAWOP),%g2 /* IEU0 Group */ +1: lduh [%o4 + POINT_X], %g3 /* Load Group */ + lduh [%o4 + POINT_Y], %g4 /* Load Group */ + lduh [%o2 + BOX_Y2], %g5 /* Load Group */ + + lduh [%o2 + BOX_Y1], %o5 /* Load Group */ + sllx %g4, 32, %g4 /* IEU0 */ + or %g3, %g4, %g3 /* IEU0 Group */ + sub %g5, %o5, %g5 /* IEU1 */ + + sllx %o5, 32, %g4 /* IEU0 Group */ + sllx %g5, 32, %g5 /* IEU0 Group */ + lduh [%o2 + BOX_X2], %o5 /* Load */ + add %o4, 0x4, %o4 /* IEU1 */ + + lduh [%o2 + BOX_X1], %g7 /* Load Group */ + add %o2, 0x8, %o2 /* IEU0 */ + sub %o5, %g7, %o5 /* IEU0 Group */ + or %g4, %g7, %g4 /* IEU1 */ + + or %g5, %o5, %g5 /* IEU0 Group */ + subcc %g1, 7, %g1 /* IEU1 */ + bl,pn %icc, FFB_PPT_BOX_WAIT /* CTI */ +9: cmp %o2, %o3 /* IEU1 Group */ + + /* This works around BUG ID 1189858 -DaveM */ + mov FFB_DRAWOP_VSCROLL, %o5 /* IEU0 */ + stw %o5, [%o1 + %g2] /* STORE */ + stx %g3, [%o1 + FFB_BY] /* STORE Group */ + stx %g4, [%o1 + FFB_DY] /* STORE Group */ + + bleu,pt %icc, 1b /* CTI */ + stx %g5, [%o1 + FFB_BH] /* STORE Group */ +2: retl /* CTI Group */ + sth %g1, [%o0 + FIFO_CACHE] /* STORE */ + + FIFO_WAIT(%o1, 7, FFB_PPT_BOX_WAIT, 9b) + + .align 32 + .globl FFB_BOX_LOOP + /* %o0 = ffbpriv, %o1 = ffbregs, + * %o2 = pbox, %o3 = pbox_last + */ +FFB_BOX_LOOP: + lduh [%o0 + FIFO_CACHE], %g1 /* Load Group */ + cmp %o2, %o3 /* IEU0 */ + bgu,pn %icc, 2f /* CTI */ + nop /* IEU0 Group */ + +1: lduw [%o2 + BOX_X1], %g4 /* Load Group */ + lduw [%o2 + BOX_X2], %g3 /* Load Group */ + sllx %g4, 32, %o5 /* IEU0 */ + srl %g4, 16, %g7 /* IEU0 Group */ + + add %o2, 8, %o2 /* IEU1 */ + sllx %g3, 32, %g5 /* IEU0 Group */ + subcc %g1, 4, %g1 /* IEU1 */ + srl %g3, 16, %o4 /* IEU0 Group */ + + or %o5, %g7, %o5 /* IEU1 */ + or %g5, %o4, %g5 /* IEU0 Group */ + bl,pn %icc, FFB_BOX_WAIT /* CTI */ + sub %g5, %o5, %g5 /* IEU0 Group */ + +9: cmp %o2, %o3 /* IEU1 */ + stx %o5, [%o1 + FFB_BY] /* Store */ + bleu,pt %icc, 1b /* CTI Group */ + stx %g5, [%o1 + FFB_BH] /* Store */ +2: retl /* CTI Group */ + sth %g1, [%o0 + FIFO_CACHE] /* Store */ + + FIFO_WAIT(%o1, 4, FFB_BOX_WAIT, 9b) + + .align 32 + .globl FFB_RECT_LOOP + /* %o0 = ffbpriv, %o1 = ffbregs, + * %o2 = prect, %o3 = prect_last, + * %o4 = xOrg, %o5 = yOrg + */ +FFB_RECT_LOOP: + lduh [%o0 + FIFO_CACHE], %g1 /* Load Group */ + sllx %o5, 32, %o5 /* IEU0 */ + cmp %o2, %o3 /* IEU1 */ + bgu,pn %icc, 2f /* CTI */ + + or %o4, %o5, %o4 /* IEU0 Group */ +1: lduh [%o2 + RECT_X], %g4 /* Load Group */ + lduh [%o2 + RECT_Y], %o5 /* Load Group */ + lduh [%o2 + RECT_W], %g3 /* Load Group */ + + lduh [%o2 + RECT_H], %g2 /* Load Group */ + sllx %o5, 32, %g7 /* IEU0 */ + add %o2, 8, %o2 /* IEU1 */ + orcc %g7, %g4, %g7 /* IEU1 Group */ + + sllx %g2, 32, %g5 /* IEU0 */ + or %g5, %g3, %g5 /* IEU0 Group */ + add %g7, %o4, %g7 /* IEU1 */ + subcc %g1, 4, %g1 /* IEU1 Group */ + + bl,pn %icc, FFB_RECT_WAIT /* CTI */ +9: cmp %o2, %o3 /* IEU1 Group */ + stx %g7, [%o1 + FFB_BY] /* Store */ + bleu,pt %icc, 1b /* CTI */ + + stx %g5, [%o1 + FFB_BH] /* Store Group */ +2: retl /* CTI Group */ + sth %g1, [%o0 + FIFO_CACHE] /* Store */ + + FIFO_WAIT(%o1, 4, FFB_RECT_WAIT, 9b) + + .align 32 + .globl FFB_PPT_WIDTH_LOOP + /* %o0 = ffbpriv, %o1 = ffbregs, + * %o2 = ppt, %o3 = ppt_last, + * %o4 = pwidth + */ +FFB_PPT_WIDTH_LOOP: + lduh [%o0 + FIFO_CACHE], %g1 /* Load Group */ + cmp %o2, %o3 /* IEU1 */ + bgu,pn %icc, 2f /* CTI */ + nop + +8: lduw [%o2 + POINT_X], %g4 /* Load Group */ + lduw [%o4], %g2 /* Load Group */ + sllx %g4, 32, %g7 +1: srl %g4, 16, %g5 /* IEU0 Group */ + + add %o2, 4, %o2 /* IEU1 */ + brz,pn %g2, 8b /* CTI+IEU1 Group */ + add %o4, 4, %o4 /* IEU0 */ + or %g5, %g7, %g7 /* IEU0 Group */ + + subcc %g1, 5, %g1 /* IEU1 */ + bl,pn %icc, FFB_PPT_WIDTH_WAIT/* CTI */ + add %g7, %g2, %o5 /* IEU0 Group */ +9: stw %g0, [%o1 + FFB_PPC] /* Store Group */ + + cmp %o2, %o3 /* IEU1 */ + stx %g7, [%o1 + FFB_BY] /* Store Group */ + bleu,pt %icc, 8b /* CTI */ + stx %o5, [%o1 + FFB_BH] /* Store Group */ + +2: retl /* CTI Group */ + sth %g1, [%o0 + FIFO_CACHE] /* Store */ + + FIFO_WAIT(%o1, 5, FFB_PPT_WIDTH_WAIT, 9b) + + .align 32 + .globl FFB_LINE_LOOP1 + /* %o0 = ffbpriv, %o1 = ffbregs, + * %o2 = ppt, %o3 = ppt_last, + * %o4 = xOrg, %o5 = yOrg + * + * We return ppt_last + 1. + */ +FFB_LINE_LOOP1: + lduh [%o0 + FIFO_CACHE], %g1 + sllx %o5, 32, %o5 + and %o4, 0x7ff, %o4 + cmp %o2, %o3 + + bgu,pn %icc, 2f + or %o4, %o5, %o4 +1: lduw [%o2 + POINT_X], %g4 + sllx %g4, 32, %g7 + + srl %g4, 16, %g3 + add %o2, 4, %o2 + or %g7, %g3, %g7 + subcc %g1, 3, %g1 + + bl,pn %icc, FFB_LINE1_WAIT + add %g7, %o4, %g7 +9: cmp %o2, %o3 + stw %g0, [%o1 + FFB_PPC] + + bleu,pt %icc, 1b + stx %g7, [%o1 + FFB_BH] +2: nop + sth %g1, [%o0 + FIFO_CACHE] + + retl + mov %o2, %o0 + + FIFO_WAIT(%o1, 3, FFB_LINE1_WAIT, 9b) + + .align 32 + .globl FFB_LINE_LOOP2 + /* %o0 = ffbpriv, %o1 = ffbregs, + * %o2 = ppt, %o3 = ppt_last, + * %o4 = &xOrg, %o5 = &yOrg + * + * We return ppt_last + 1. + * The only difference between the previous routine + * is that here we accumulate the origin values. + */ +FFB_LINE_LOOP2: + lduw [%o5], %g5 + lduw [%o4], %g2 + lduh [%o0 + FIFO_CACHE], %g1 + sllx %g5, 32, %g5 + + and %g2, 0x7ff, %g2 + cmp %o2, %o3 + bgu,pn %icc, 2f + or %g5, %g2, %g5 + +1: lduw [%o2 + POINT_X], %g4 + sllx %g4, 32, %g7 + srl %g4, 16, %g3 + add %o2, 4, %o2 + + or %g7, %g3, %g7 + subcc %g1, 3, %g1 + bl,pn %icc, FFB_LINE2_WAIT + add %g7, %g5, %g5 + +9: cmp %o2, %o3 + stw %g0, [%o1 + FFB_PPC] + bleu,pt %icc, 1b + stx %g5, [%o1 + FFB_BH] + +2: sth %g1, [%o0 + FIFO_CACHE] + srlx %g5, 32, %g4 + stw %g5, [%o4] + stw %g4, [%o5] + + retl + mov %o2, %o0 + + FIFO_WAIT(%o1, 3, FFB_LINE2_WAIT, 9b) |