diff options
author | Tan Hu <tan.hu@zte.com.cn> | 2016-05-27 17:05:14 +0800 |
---|---|---|
committer | Michel Dänzer <michel@daenzer.net> | 2016-06-01 18:17:16 +0900 |
commit | 9b9ad669c748f53247e53fa3f3b03a77da5e5cb3 (patch) | |
tree | b4068b0f3177f19b561f0695572f5e0bebcee63a /src | |
parent | aa07b365d7b0610411e118f105e49daff5f5a5cf (diff) |
EXA/6xx/7xx: fast solid pixmap support
Solid pixmaps are currently implemented with scratch pixmaps, which
is slow. This replaces the hack with a proper implementation. The
Composite shader can now either sample a src/mask or use a constant
value.
r6xx still be used on some machine,
Ported from commit 94d0d14914a025525a0766669b556eaa6681def7.
Signed-off-by: Tan Hu <tan.hu@zte.com.cn>
Reviewed-by: Grigori Goronzy <greg@chown.ath.cx>
Diffstat (limited to 'src')
-rw-r--r-- | src/r600_exa.c | 257 | ||||
-rw-r--r-- | src/r600_shader.c | 418 |
2 files changed, 526 insertions, 149 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c index 8d11ce71..10df4eca 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -1165,6 +1165,134 @@ static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskP } +static void R600SetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, uint32_t fg, int unit) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0; + + uint32_t w = (fg >> 24) & 0xff; + uint32_t z = (fg >> 16) & 0xff; + uint32_t y = (fg >> 8) & 0xff; + uint32_t x = (fg >> 0) & 0xff; + float xf = (float)x / 255; /* R */ + float yf = (float)y / 255; /* G */ + float zf = (float)z / 255; /* B */ + float wf = (float)w / 255; /* A */ + + /* component swizzles */ + switch (format) { + case PICT_a1r5g5b5: + case PICT_a8r8g8b8: + pix_r = zf; /* R */ + pix_g = yf; /* G */ + pix_b = xf; /* B */ + pix_a = wf; /* A */ + break; + case PICT_a8b8g8r8: + pix_r = xf; /* R */ + pix_g = yf; /* G */ + pix_b = zf; /* B */ + pix_a = wf; /* A */ + break; + case PICT_x8b8g8r8: + pix_r = xf; /* R */ + pix_g = yf; /* G */ + pix_b = zf; /* B */ + pix_a = 1.0; /* A */ + break; + case PICT_b8g8r8a8: + pix_r = yf; /* R */ + pix_g = zf; /* G */ + pix_b = wf; /* B */ + pix_a = xf; /* A */ + break; + case PICT_b8g8r8x8: + pix_r = yf; /* R */ + pix_g = zf; /* G */ + pix_b = wf; /* B */ + pix_a = 1.0; /* A */ + break; + case PICT_x1r5g5b5: + case PICT_x8r8g8b8: + case PICT_r5g6b5: + pix_r = zf; /* R */ + pix_g = yf; /* G */ + pix_b = xf; /* B */ + pix_a = 1.0; /* A */ + break; + case PICT_a8: + pix_r = 0.0; /* R */ + pix_g = 0.0; /* G */ + pix_b = 0.0; /* B */ + pix_a = xf; /* A */ + break; + default: + ErrorF("Bad format 0x%x\n", format); + } + + if (unit == 0) { + if (!accel_state->msk_pic) { + if (PICT_FORMAT_RGB(format) == 0) { + pix_r = 0.0; + pix_g = 0.0; + pix_b = 0.0; + } + + if (PICT_FORMAT_A(format) == 0) + pix_a = 1.0; + } else { + if (accel_state->component_alpha) { + if (accel_state->src_alpha) { + if (PICT_FORMAT_A(format) == 0) { + pix_r = 1.0; + pix_g = 1.0; + pix_b = 1.0; + pix_a = 1.0; + } else { + pix_r = pix_a; + pix_g = pix_a; + pix_b = pix_a; + } + } else { + if (PICT_FORMAT_A(format) == 0) + pix_a = 1.0; + } + } else { + if (PICT_FORMAT_RGB(format) == 0) { + pix_r = 0; + pix_g = 0; + pix_b = 0; + } + + if (PICT_FORMAT_A(format) == 0) + pix_a = 1.0; + } + } + } else { + if (accel_state->component_alpha) { + if (PICT_FORMAT_A(format) == 0) + pix_a = 1.0; + } else { + if (PICT_FORMAT_A(format) == 0) { + pix_r = 1.0; + pix_g = 1.0; + pix_b = 1.0; + pix_a = 1.0; + } else { + pix_r = pix_a; + pix_g = pix_a; + pix_b = pix_a; + } + } + } + + buf[0] = pix_r; + buf[1] = pix_g; + buf[2] = pix_b; + buf[3] = pix_a; +} + static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) @@ -1177,31 +1305,27 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, cb_config_t cb_conf; shader_config_t vs_conf, ps_conf; struct r600_accel_object src_obj, mask_obj, dst_obj; + uint32_t ps_bool_consts = 0; + float ps_alu_consts[8]; if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8)) return FALSE; - if (!pSrc) { - pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color); - if (!pSrc) - RADEON_FALLBACK(("Failed to create solid scratch pixmap\n")); + if (pSrc) { + src_obj.bo = radeon_get_pixmap_bo(pSrc); + src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); + src_obj.surface = radeon_get_pixmap_surface(pSrc); + src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); + src_obj.width = pSrc->drawable.width; + src_obj.height = pSrc->drawable.height; + src_obj.bpp = pSrc->drawable.bitsPerPixel; + src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; } dst_obj.bo = radeon_get_pixmap_bo(pDst); - src_obj.bo = radeon_get_pixmap_bo(pSrc); dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst); - src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); dst_obj.surface = radeon_get_pixmap_surface(pDst); - src_obj.surface = radeon_get_pixmap_surface(pSrc); - - src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); - - src_obj.width = pSrc->drawable.width; - src_obj.height = pSrc->drawable.height; - src_obj.bpp = pSrc->drawable.bitsPerPixel; - src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; - dst_obj.width = pDst->drawable.width; dst_obj.height = pDst->drawable.height; dst_obj.bpp = pDst->drawable.bitsPerPixel; @@ -1211,34 +1335,17 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; if (pMaskPicture) { - if (!pMask) { - pMask = RADEONSolidPixmap(pScreen, pMaskPicture->pSourcePict->solidFill.color); - if (!pMask) { - if (!pSrcPicture->pDrawable) - pScreen->DestroyPixmap(pSrc); - RADEON_FALLBACK(("Failed to create solid scratch pixmap\n")); - } + if (pMask) { + mask_obj.bo = radeon_get_pixmap_bo(pMask); + mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask); + mask_obj.surface = radeon_get_pixmap_surface(pMask); + mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8); + mask_obj.width = pMask->drawable.width; + mask_obj.height = pMask->drawable.height; + mask_obj.bpp = pMask->drawable.bitsPerPixel; + mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; } - mask_obj.bo = radeon_get_pixmap_bo(pMask); - mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask); - mask_obj.surface = radeon_get_pixmap_surface(pMask); - - mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8); - - mask_obj.width = pMask->drawable.width; - mask_obj.height = pMask->drawable.height; - mask_obj.bpp = pMask->drawable.bitsPerPixel; - mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; - - if (!R600SetAccelState(pScrn, - &src_obj, - &mask_obj, - &dst_obj, - accel_state->comp_vs_offset, accel_state->comp_ps_offset, - 3, 0xffffffff)) - return FALSE; - accel_state->msk_pic = pMaskPicture; if (pMaskPicture->componentAlpha) { accel_state->component_alpha = TRUE; @@ -1251,19 +1358,19 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, accel_state->src_alpha = FALSE; } } else { - if (!R600SetAccelState(pScrn, - &src_obj, - NULL, - &dst_obj, - accel_state->comp_vs_offset, accel_state->comp_ps_offset, - 3, 0xffffffff)) - return FALSE; - accel_state->msk_pic = NULL; accel_state->component_alpha = FALSE; accel_state->src_alpha = FALSE; } + if (!R600SetAccelState(pScrn, + pSrc ? &src_obj : NULL, + (pMaskPicture && pMask) ? &mask_obj : NULL, + &dst_obj, + accel_state->comp_vs_offset, accel_state->comp_ps_offset, + 3, 0xffffffff)) + return FALSE; + if (!R600GetDestFormat(pDstPicture, &dst_format)) return FALSE; @@ -1284,10 +1391,13 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); - if (!R600TextureSetup(pSrcPicture, pSrc, 0)) { - R600IBDiscard(pScrn); - return FALSE; - } + if (pSrc) { + if (!R600TextureSetup(pSrcPicture, pSrc, 0)) { + R600IBDiscard(pScrn); + return FALSE; + } + } else + accel_state->is_transform[0] = FALSE; if (pMask) { if (!R600TextureSetup(pMaskPicture, pMask, 1)) { @@ -1297,12 +1407,16 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, } else accel_state->is_transform[1] = FALSE; + if (pSrc) + ps_bool_consts |= (1 << 0); + if (pMask) + ps_bool_consts |= (1 << 1); + r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts); + if (pMask) { r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0)); - r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0)); } else { r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0)); - r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0)); } /* Shader */ @@ -1315,7 +1429,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.shader_size = accel_state->ps_size; - ps_conf.num_gprs = 3; + ps_conf.num_gprs = 2; ps_conf.stack_size = 1; ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; @@ -1381,6 +1495,27 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, else r600_set_spi(pScrn, (1 - 1), 1); + if (!pSrc) { + /* solid src color */ + R600SetSolidConsts(pScrn, &ps_alu_consts[0], pSrcPicture->format, + pSrcPicture->pSourcePict->solidFill.color, 0); + } + + if (!pMaskPicture) { + /* use identity constant if there is no mask */ + ps_alu_consts[4] = 1.0; + ps_alu_consts[5] = 1.0; + ps_alu_consts[6] = 1.0; + ps_alu_consts[7] = 1.0; + } else if (!pMask) { + /* solid mask color */ + R600SetSolidConsts(pScrn, &ps_alu_consts[4], pMaskPicture->format, + pMaskPicture->pSourcePict->solidFill.color, 1); + } + + r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps, + sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); + if (accel_state->vsync) RADEONVlineHelperClear(pScrn); @@ -1405,7 +1540,7 @@ static void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst, accel_state->vline_y1, accel_state->vline_y2); - vtx_size = accel_state->msk_pic ? 24 : 16; + vtx_size = accel_state->msk_pix ? 24 : 16; r600_finish_op(pScrn, vtx_size); } @@ -1418,12 +1553,6 @@ static void R600DoneComposite(PixmapPtr pDst) struct radeon_accel_state *accel_state = info->accel_state; R600FinishComposite(pScrn, pDst, accel_state); - - if (!accel_state->src_pic->pDrawable) - pScreen->DestroyPixmap(accel_state->src_pix); - - if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable) - pScreen->DestroyPixmap(accel_state->msk_pix); } static void R600Composite(PixmapPtr pDst, @@ -1455,7 +1584,7 @@ static void R600Composite(PixmapPtr pDst, if (accel_state->vsync) RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); - if (accel_state->msk_pic) { + if (accel_state->msk_pix) { vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24); diff --git a/src/r600_shader.c b/src/r600_shader.c index 4cb2fc89..26a6ab64 100644 --- a/src/r600_shader.c +++ b/src/r600_shader.c @@ -2318,9 +2318,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(3)); + /* call fetch-mask if boolean1 == true */ + shader[i++] = CF_DWORD0(ADDR(10)); shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), + CF_CONST(1), COND(SQ_CF_COND_BOOL), I_COUNT(0), CALL_COUNT(0), @@ -2330,9 +2331,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(0)); /* 1 */ - shader[i++] = CF_DWORD0(ADDR(7)); + /* call read-constant-mask if boolean1 == false */ + shader[i++] = CF_DWORD0(ADDR(12)); shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), + CF_CONST(1), COND(SQ_CF_COND_NOT_BOOL), I_COUNT(0), CALL_COUNT(0), @@ -2342,33 +2344,36 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(0)); /* 2 */ - shader[i++] = CF_DWORD0(ADDR(0)); + /* call fetch-src if boolean0 == true */ + shader[i++] = CF_DWORD0(ADDR(6)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), + COND(SQ_CF_COND_BOOL), I_COUNT(0), CALL_COUNT(0), - END_OF_PROGRAM(1), + END_OF_PROGRAM(0), VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_NOP), + CF_INST(SQ_CF_INST_CALL), WHOLE_QUAD_MODE(0), - BARRIER(1)); + BARRIER(0)); - /* 3 - mask sub */ - shader[i++] = CF_DWORD0(ADDR(14)); + /* 3 */ + /* call read-constant-src if boolean0 == false */ + shader[i++] = CF_DWORD0(ADDR(8)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), - I_COUNT(2), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), CALL_COUNT(0), END_OF_PROGRAM(0), VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_TEX), + CF_INST(SQ_CF_INST_CALL), WHOLE_QUAD_MODE(0), - BARRIER(1)); + BARRIER(0)); /* 4 */ - shader[i++] = CF_ALU_DWORD0(ADDR(10), + /* src IN mask (GPR0 := GPR1 .* GPR0) */ + shader[i++] = CF_ALU_DWORD0(ADDR(14), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); @@ -2382,9 +2387,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) BARRIER(1)); /* 5 */ + /* export pixel data */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), TYPE(SQ_EXPORT_PIXEL), - RW_GPR(2), + RW_GPR(0), RW_REL(ABSOLUTE), INDEX_GPR(0), ELEM_SIZE(1)); @@ -2394,55 +2400,57 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_W(SQ_SEL_W), R6xx_ELEM_LOOP(0), BURST_COUNT(1), - END_OF_PROGRAM(0), + END_OF_PROGRAM(1), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); + /* subroutine fetch src */ /* 6 */ - shader[i++] = CF_DWORD0(ADDR(0)); + /* fetch src into GPR0*/ + shader[i++] = CF_DWORD0(ADDR(26)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), - I_COUNT(0), + I_COUNT(1), CALL_COUNT(0), END_OF_PROGRAM(0), VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_RETURN), + CF_INST(SQ_CF_INST_TEX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 7 non-mask sub */ - shader[i++] = CF_DWORD0(ADDR(18)); + /* 7 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), - I_COUNT(1), + I_COUNT(0), CALL_COUNT(0), END_OF_PROGRAM(0), VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_TEX), + CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); + + /* subroutine read-constant-src*/ /* 8 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), - TYPE(SQ_EXPORT_PIXEL), - RW_GPR(0), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(1)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - R6xx_ELEM_LOOP(0), - BURST_COUNT(1), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(1)); + /* read constants into GPR0 */ + shader[i++] = CF_ALU_DWORD0(ADDR(18), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); /* 9 */ + /* return */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -2455,8 +2463,67 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 10 - alu 0 */ - /* MUL gpr[2].x gpr[1].x gpr[0].x */ + /* subroutine fetch mask */ + /* 10 */ + /* fetch mask into GPR1*/ + shader[i++] = CF_DWORD0(ADDR(28)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_TEX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 11 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* subroutine read-constant-mask*/ + /* 12 */ + /* read constants into GPR1 */ + shader[i++] = CF_ALU_DWORD0(ADDR(22), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 13 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* ALU clauses */ + + /* 14 - alu 0 */ + /* MUL gpr[0].x gpr[1].x gpr[0].x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), @@ -2478,12 +2545,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_MUL), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(1)); - /* 11 - alu 1 */ - /* MUL gpr[2].y gpr[1].y gpr[0].y */ + /* 15 - alu 1 */ + /* MUL gpr[0].y gpr[1].y gpr[0].y */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), @@ -2505,12 +2572,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_MUL), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(1)); - /* 12 - alu 2 */ - /* MUL gpr[2].z gpr[1].z gpr[0].z */ + /* 16 - alu 2 */ + /* MUL gpr[0].z gpr[1].z gpr[0].z */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), @@ -2532,12 +2599,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_MUL), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(1)); - /* 13 - alu 3 */ - /* MUL gpr[2].w gpr[1].w gpr[0].w */ + /* 17 - alu 3 */ + /* MUL gpr[0].w gpr[1].w gpr[0].w */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_W), @@ -2559,12 +2626,222 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_MUL), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + + /* 18 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + /* 19 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* 20 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + /* 21 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(1)); - /* 14/15 - src - mask */ + /* 22 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + /* 23 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* 24 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + /* 25 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + + /* 26/27 - src */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -2592,7 +2869,7 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 16/17 - mask */ + /* 28/29 - mask */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -2621,34 +2898,5 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 18/19 - src - non-mask */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - BC_FRAC_MODE(0), - FETCH_WHOLE_QUAD(0), - RESOURCE_ID(0), - SRC_GPR(0), - SRC_REL(ABSOLUTE), - R7xx_ALT_CONST(0)); - shader[i++] = TEX_DWORD1(DST_GPR(0), - DST_REL(ABSOLUTE), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_Z), - DST_SEL_W(SQ_SEL_W), - LOD_BIAS(0), - COORD_TYPE_X(TEX_NORMALIZED), - COORD_TYPE_Y(TEX_NORMALIZED), - COORD_TYPE_Z(TEX_NORMALIZED), - COORD_TYPE_W(TEX_NORMALIZED)); - shader[i++] = TEX_DWORD2(OFFSET_X(0), - OFFSET_Y(0), - OFFSET_Z(0), - SAMPLER_ID(0), - SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; - return i; } |