diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/r600_exa.c | 93 | ||||
-rw-r--r-- | src/r600_shader.c | 537 |
2 files changed, 550 insertions, 80 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c index 2dc33a83..18831f7f 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -924,17 +924,6 @@ do { \ #define xFixedToFloat(f) (((float) (f)) / 65536) -static inline void transformPoint(PictTransform *transform, xPointFixed *point) -{ - PictVector v; - v.vector[0] = point->x; - v.vector[1] = point->y; - v.vector[2] = xFixed1; - PictureTransformPoint(transform, &v); - point->x = v.vector[0]; - point->y = v.vector[1]; -} - struct blendinfo { Bool dst_alpha; Bool src_alpha; @@ -1099,6 +1088,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, tex_resource_t tex_res; tex_sampler_t tex_samp; int pix_r, pix_g, pix_b, pix_a; + float vs_alu_consts[8]; CLEAR (tex_res); CLEAR (tex_samp); @@ -1118,9 +1108,6 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, break; } - accel_state->texW[unit] = w; - accel_state->texH[unit] = h; - /* ErrorF("Tex %d setup %dx%d\n", unit, w, h); */ /* flush texture cache */ @@ -1294,9 +1281,34 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, if (pPict->transform != 0) { accel_state->is_transform[unit] = TRUE; accel_state->transform[unit] = pPict->transform; - } else + + vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]); + vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]); + vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]); + vs_alu_consts[3] = 1.0 / w; + + vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]); + vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]); + vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]); + vs_alu_consts[7] = 1.0 / h; + } else { accel_state->is_transform[unit] = FALSE; + vs_alu_consts[0] = 1.0; + vs_alu_consts[1] = 0.0; + vs_alu_consts[2] = 0.0; + vs_alu_consts[3] = 1.0 / w; + + vs_alu_consts[4] = 0.0; + vs_alu_consts[5] = 1.0; + vs_alu_consts[6] = 0.0; + vs_alu_consts[7] = 1.0 / h; + } + + /* VS alu constants */ + set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2), + sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); + return TRUE; } @@ -1586,14 +1598,6 @@ static void R600Composite(PixmapPtr pDst, srcBottomRight.x = IntToxFixed(srcX + w); srcBottomRight.y = IntToxFixed(srcY + h); - /* XXX do transform in vertex shader */ - if (accel_state->is_transform[0]) { - transformPoint(accel_state->transform[0], &srcTopLeft); - transformPoint(accel_state->transform[0], &srcTopRight); - transformPoint(accel_state->transform[0], &srcBottomLeft); - transformPoint(accel_state->transform[0], &srcBottomRight); - } - if (accel_state->has_mask) { xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight; @@ -1616,33 +1620,26 @@ static void R600Composite(PixmapPtr pDst, maskBottomRight.x = IntToxFixed(maskX + w); maskBottomRight.y = IntToxFixed(maskY + h); - if (accel_state->is_transform[1]) { - transformPoint(accel_state->transform[1], &maskTopLeft); - transformPoint(accel_state->transform[1], &maskTopRight); - transformPoint(accel_state->transform[1], &maskBottomLeft); - transformPoint(accel_state->transform[1], &maskBottomRight); - } - vb[0] = (float)dstX; vb[1] = (float)dstY; - vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0]; - vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]; - vb[4] = xFixedToFloat(maskTopLeft.x) / accel_state->texW[1]; - vb[5] = xFixedToFloat(maskTopLeft.y) / accel_state->texH[1]; + vb[2] = xFixedToFloat(srcTopLeft.x); + vb[3] = xFixedToFloat(srcTopLeft.y); + vb[4] = xFixedToFloat(maskTopLeft.x); + vb[5] = xFixedToFloat(maskTopLeft.y); vb[6] = (float)dstX; vb[7] = (float)(dstY + h); - vb[8] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0]; - vb[9] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]; - vb[10] = xFixedToFloat(maskBottomLeft.x) / accel_state->texW[1]; - vb[11] = xFixedToFloat(maskBottomLeft.y) / accel_state->texH[1]; + vb[8] = xFixedToFloat(srcBottomLeft.x); + vb[9] = xFixedToFloat(srcBottomLeft.y); + vb[10] = xFixedToFloat(maskBottomLeft.x); + vb[11] = xFixedToFloat(maskBottomLeft.y); vb[12] = (float)(dstX + w); vb[13] = (float)(dstY + h); - vb[14] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0]; - vb[15] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]; - vb[16] = xFixedToFloat(maskBottomRight.x) / accel_state->texW[1]; - vb[17] = xFixedToFloat(maskBottomRight.y) / accel_state->texH[1]; + vb[14] = xFixedToFloat(srcBottomRight.x); + vb[15] = xFixedToFloat(srcBottomRight.y); + vb[16] = xFixedToFloat(maskBottomRight.x); + vb[17] = xFixedToFloat(maskBottomRight.y); } else { if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) { @@ -1657,18 +1654,18 @@ static void R600Composite(PixmapPtr pDst, vb[0] = (float)dstX; vb[1] = (float)dstY; - vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0]; - vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]; + vb[2] = xFixedToFloat(srcTopLeft.x); + vb[3] = xFixedToFloat(srcTopLeft.y); vb[4] = (float)dstX; vb[5] = (float)(dstY + h); - vb[6] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0]; - vb[7] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]; + vb[6] = xFixedToFloat(srcBottomLeft.x); + vb[7] = xFixedToFloat(srcBottomLeft.y); vb[8] = (float)(dstX + w); vb[9] = (float)(dstY + h); - vb[10] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0]; - vb[11] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]; + vb[10] = xFixedToFloat(srcBottomRight.x); + vb[11] = xFixedToFloat(srcBottomRight.y); } accel_state->vb_index += 3; diff --git a/src/r600_shader.c b/src/r600_shader.c index 0a820cf3..fba8dcb4 100644 --- a/src/r600_shader.c +++ b/src/r600_shader.c @@ -1322,7 +1322,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(0)); /* 1 */ - shader[i++] = CF_DWORD0(ADDR(14)); + shader[i++] = CF_DWORD0(ADDR(28)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_NOT_BOOL), @@ -1346,7 +1346,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(1)); /* 3 - mask sub */ - shader[i++] = CF_DWORD0(ADDR(8)); + shader[i++] = CF_DWORD0(ADDR(22)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1357,7 +1357,22 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_VTX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 4 - dst */ + + /* 4 - ALU */ + shader[i++] = CF_ALU_DWORD0(ADDR(9), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(12), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 5 - dst */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), TYPE(SQ_EXPORT_POS), RW_GPR(2), @@ -1366,8 +1381,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) ELEM_SIZE(0)); shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), R6xx_ELEM_LOOP(0), BURST_COUNT(1), END_OF_PROGRAM(0), @@ -1375,7 +1390,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 5 - src */ + /* 6 - src */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), TYPE(SQ_EXPORT_PARAM), RW_GPR(1), @@ -1384,8 +1399,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) ELEM_SIZE(0)); shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), R6xx_ELEM_LOOP(0), BURST_COUNT(1), END_OF_PROGRAM(0), @@ -1393,7 +1408,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT), WHOLE_QUAD_MODE(0), BARRIER(0)); - /* 6 - mask */ + /* 7 - mask */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), TYPE(SQ_EXPORT_PARAM), RW_GPR(0), @@ -1402,8 +1417,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) ELEM_SIZE(0)); shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), R6xx_ELEM_LOOP(0), BURST_COUNT(1), END_OF_PROGRAM(0), @@ -1411,7 +1426,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(0)); - /* 7 */ + /* 8 */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -1423,7 +1438,301 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 8/9 - dst */ + + + /* 9 srcX MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(256), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 10 srcY MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(257), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 11 srcX MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(256), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 12 srcY MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(257), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 13 maskX MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(258), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(258), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 14 maskY MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(259), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 15 srcX MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(258), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 16 srcY MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(259), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 17 srcX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(256), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + + /* 18 srcY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(257), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + + /* 19 maskX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(258), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + + /* 20 maskY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(259), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* 21 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + + /* 22/23 - dst */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -1448,7 +1757,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; - /* 10/11 - src */ + /* 24/25 - src */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -1461,8 +1770,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(0), DST_SEL_X(SQ_SEL_X), DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_0), - DST_SEL_W(SQ_SEL_1), + DST_SEL_Z(SQ_SEL_1), + DST_SEL_W(SQ_SEL_0), USE_CONST_FIELDS(0), DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ @@ -1473,7 +1782,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; - /* 12/13 - mask */ + /* 26/27 - mask */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -1486,8 +1795,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(0), DST_SEL_X(SQ_SEL_X), DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_0), - DST_SEL_W(SQ_SEL_1), + DST_SEL_Z(SQ_SEL_1), + DST_SEL_W(SQ_SEL_0), USE_CONST_FIELDS(0), DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ @@ -1499,8 +1808,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; - /* 14 - non-mask sub */ - shader[i++] = CF_DWORD0(ADDR(18)); + /* 28 - non-mask sub */ + shader[i++] = CF_DWORD0(ADDR(40)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1511,7 +1820,22 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_VTX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 15 - dst */ + + /* 29 - ALU */ + shader[i++] = CF_ALU_DWORD0(ADDR(33), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(6), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 30 - dst */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), TYPE(SQ_EXPORT_POS), RW_GPR(1), @@ -1520,8 +1844,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) ELEM_SIZE(0)); shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), R6xx_ELEM_LOOP(0), BURST_COUNT(0), END_OF_PROGRAM(0), @@ -1529,7 +1853,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 16 - src */ + /* 31 - src */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), TYPE(SQ_EXPORT_PARAM), RW_GPR(0), @@ -1538,8 +1862,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) ELEM_SIZE(0)); shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), R6xx_ELEM_LOOP(0), BURST_COUNT(0), END_OF_PROGRAM(0), @@ -1547,7 +1871,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(0)); - /* 17 */ + /* 32 */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -1559,7 +1883,156 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 18/19 - dst */ + + + /* 33 srcX MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(256), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 34 srcY MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(257), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 35 srcX MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(256), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 36 srcY MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(257), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* 37 srcX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(256), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + + /* 38 srcY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(257), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + + /* 39 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + + /* 40/41 - dst */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -1584,7 +2057,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; - /* 20/21 - src */ + /* 42/43 - src */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -1597,8 +2070,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(0), DST_SEL_X(SQ_SEL_X), DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_0), - DST_SEL_W(SQ_SEL_1), + DST_SEL_Z(SQ_SEL_1), + DST_SEL_W(SQ_SEL_0), USE_CONST_FIELDS(0), DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ |