From 48ffad957f1dbca909515ffa00629f4caa68706b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Cano?= Date: Fri, 11 Feb 2011 17:00:31 -0500 Subject: kms: 6xx/7xx big endian accel support agd5f: minor cleanups Signed-off-by: Alex Deucher --- src/r600_exa.c | 31 ++++++++++++++++ src/r600_reg.h | 21 ++++++----- src/r600_shader.c | 60 +++++++++++++++++++++++++------ src/r600_shader.h | 80 +++++++++++++++++++++--------------------- src/r600_textured_videofuncs.c | 6 ++++ src/r6xx_accel.c | 11 ++++++ 6 files changed, 150 insertions(+), 59 deletions(-) (limited to 'src') diff --git a/src/r600_exa.c b/src/r600_exa.c index 8057d56d..ea584827 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -274,9 +274,15 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) } else if (accel_state->dst_obj.bpp == 16) { cb_conf.format = COLOR_5_6_5; cb_conf.comp_swap = 2; /* RGB */ +#if X_BYTE_ORDER == X_BIG_ENDIAN + cb_conf.endian = ENDIAN_8IN16; +#endif } else { cb_conf.format = COLOR_8_8_8_8; cb_conf.comp_swap = 1; /* ARGB */ +#if X_BYTE_ORDER == X_BIG_ENDIAN + cb_conf.endian = ENDIAN_8IN32; +#endif } cb_conf.source_format = 1; cb_conf.blend_clamp = 1; @@ -941,6 +947,19 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, tex_res.mip_bo = accel_state->src_obj[unit].bo; tex_res.request_size = 1; +#if X_BYTE_ORDER == X_BIG_ENDIAN + switch (accel_state->src_obj[unit].bpp) { + case 16: + tex_res.endian = SQ_ENDIAN_8IN16; + break; + case 32: + tex_res.endian = SQ_ENDIAN_8IN32; + break; + default : + break; + } +#endif + /* component swizzles */ switch (pPict->format) { case PICT_a1r5g5b5: @@ -1409,6 +1428,18 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, cb_conf.rop = 3; if (accel_state->dst_obj.tiling_flags == 0) cb_conf.array_mode = 1; +#if X_BYTE_ORDER == X_BIG_ENDIAN + switch (dst_obj.bpp) { + case 16: + cb_conf.endian = ENDIAN_8IN16; + break; + case 32: + cb_conf.endian = ENDIAN_8IN32; + break; + default: + break; + } +#endif r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); if (pMask) diff --git a/src/r600_reg.h b/src/r600_reg.h index 937926ba..95c924f5 100644 --- a/src/r600_reg.h +++ b/src/r600_reg.h @@ -117,16 +117,19 @@ enum { /* IT_WAIT_REG_MEM operation encoding */ -#define IT_WAIT_ALWAYS (0<<0) -#define IT_WAIT_LT (1<<0) -#define IT_WAIT_LE (2<<0) -#define IT_WAIT_EQ (3<<0) -#define IT_WAIT_NE (4<<0) -#define IT_WAIT_GE (5<<0) -#define IT_WAIT_GT (6<<0) -#define IT_WAIT_REG (0<<4) -#define IT_WAIT_MEM (1<<4) +#define IT_WAIT_ALWAYS (0 << 0) +#define IT_WAIT_LT (1 << 0) +#define IT_WAIT_LE (2 << 0) +#define IT_WAIT_EQ (3 << 0) +#define IT_WAIT_NE (4 << 0) +#define IT_WAIT_GE (5 << 0) +#define IT_WAIT_GT (6 << 0) +#define IT_WAIT_REG (0 << 4) +#define IT_WAIT_MEM (1 << 4) #define IT_WAIT_ADDR(x) ((x) >> 2) +/* IT_INDEX_TYPE */ +#define IT_INDEX_TYPE_SWAP_MODE(x) ((x) << 2) + #endif diff --git a/src/r600_shader.c b/src/r600_shader.c index 7dceffec..ab2f4850 100644 --- a/src/r600_shader.c +++ b/src/r600_shader.c @@ -111,7 +111,11 @@ int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; @@ -341,7 +345,11 @@ int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; @@ -366,7 +374,11 @@ int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(8), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; @@ -596,7 +608,11 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; @@ -621,7 +637,11 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(8), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; @@ -2191,7 +2211,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; @@ -2216,7 +2240,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(8), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; @@ -2241,7 +2269,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(16), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; @@ -2267,7 +2299,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; @@ -2292,7 +2328,11 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(8), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; diff --git a/src/r600_shader.h b/src/r600_shader.h index 3d5acc74..d79ed400 100644 --- a/src/r600_shader.h +++ b/src/r600_shader.h @@ -157,32 +157,32 @@ #define SRC_SEL_Z(x) (x) #define SRC_SEL_W(x) (x) -#define CF_DWORD0(addr) (addr) +#define CF_DWORD0(addr) cpu_to_le32((addr)) // R7xx has another entry (COUNT3), but that is only used for adding a bit to count. // We allow one more bit for count in the argument of the macro on R7xx instead. // R6xx: [0,7] R7xx: [1,16] #define CF_DWORD1(pc, cf_const, cond, count, call_count, eop, vpm, cf_inst, wqm, b) \ - (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \ - ((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)) + cpu_to_le32((((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \ + ((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31))) -#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)) +#define CF_ALU_DWORD0(addr, kb0, kb1, km0) cpu_to_le32((((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))) #define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, uw, cf_inst, wqm, b) \ - (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \ - ((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)) + cpu_to_le32((((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \ + ((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))) #define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \ - (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \ - ((es) << 30)) + cpu_to_le32((((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \ + ((es) << 30))) // R7xx apparently doesn't have the ELEM_LOOP entry any more // We still expose it, but ELEM_LOOP is explicitely R6xx now. // TODO: is this just forgotten in the docs, or really not available any more? #define CF_ALLOC_IMP_EXP_DWORD1_BUF(array_size, comp_mask, el, bc, eop, vpm, cf_inst, wqm, b) \ - (((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \ - ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)) + cpu_to_le32((((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \ + ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31))) #define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, el, bc, eop, vpm, cf_inst, wqm, b) \ - (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \ - ((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \ - ((wqm) << 30) | ((b) << 31)) + cpu_to_le32((((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \ + ((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \ + ((wqm) << 30) | ((b) << 31))) // ALU clause insts #define SRC0_SEL(x) (x) @@ -242,18 +242,18 @@ #define CLAMP(x) (x) #define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \ - (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \ - ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \ - ((im) << 26) | ((ps) << 29) | ((last) << 31)) + cpu_to_le32((((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \ + ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \ + ((im) << 26) | ((ps) << 29) | ((last) << 31))) // R7xx has alu_inst at a different slot, and no fog merge any more (no fix function fog any more) #define R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ - (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ - ((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \ - ((dr) << 28) | ((de) << 29) | ((clamp) << 31)) + cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ + ((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \ + ((dr) << 28) | ((de) << 29) | ((clamp) << 31))) #define R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ - (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ - ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \ - ((dr) << 28) | ((de) << 29) | ((clamp) << 31)) + cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ + ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \ + ((dr) << 28) | ((de) << 29) | ((clamp) << 31))) // This is a general chipset macro, but due to selection by chipid typically not usable in static arrays // Fog is NOT USED on R7xx, even if specified. #define ALU_DWORD1_OP2(chipfamily, s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ @@ -261,9 +261,9 @@ R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) : \ R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp)) #define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \ - (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \ - ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \ - ((de) << 29) | ((clamp) << 31)) + cpu_to_le32((((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \ + ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \ + ((de) << 29) | ((clamp) << 31))) // VTX clause insts // vxt insts @@ -301,17 +301,17 @@ #define MEGA_FETCH(x) (x) #define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \ - (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \ - ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26)) + cpu_to_le32((((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \ + ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26))) #define VTX_DWORD1_SEM(semantic_id, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ - (((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ - ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)) + cpu_to_le32((((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))) #define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ - (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ - ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)) + cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))) #define VTX_DWORD2(offset, es, cbns, mf) \ - (((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19)) -#define VTX_DWORD_PAD 0x00000000 + cpu_to_le32((((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19))) +#define VTX_DWORD_PAD cpu_to_le32(0x00000000) // TEX clause insts // tex insts @@ -337,15 +337,15 @@ // R7xx has an additional parameter ALT_CONST. We always expose it, but ALT_CONST is R7xx only #define TEX_DWORD0(tex_inst, bfm, fwq, resource_id, src_gpr, sr, ac) \ - (((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \ - ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24)) + cpu_to_le32((((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \ + ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24))) #define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \ - (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ - ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)) + cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))) #define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \ - (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \ - ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)) -#define TEX_DWORD_PAD 0x00000000 + cpu_to_le32((((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \ + ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))) +#define TEX_DWORD_PAD cpu_to_le32(0x00000000) extern int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs); extern int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps); diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index fdf76b2a..4ff0833e 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -439,10 +439,16 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) cb_conf.format = COLOR_5_6_5; cb_conf.comp_swap = 2; /* RGB */ } +#if X_BYTE_ORDER == X_BIG_ENDIAN + cb_conf.endian = ENDIAN_8IN16; +#endif break; case 32: cb_conf.format = COLOR_8_8_8_8; cb_conf.comp_swap = 1; /* ARGB */ +#if X_BYTE_ORDER == X_BIG_ENDIAN + cb_conf.endian = ENDIAN_8IN32; +#endif break; default: return; diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index a9d1cb41..b5acf85a 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -1184,7 +1184,11 @@ r600_draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32 BEGIN_BATCH(8 + count); EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); PACK3(ib, IT_INDEX_TYPE, 1); +#if X_BYTE_ORDER == X_BIG_ENDIAN + E32(ib, IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); +#else E32(ib, draw_conf->index_type); +#endif PACK3(ib, IT_NUM_INSTANCES, 1); E32(ib, draw_conf->num_instances); @@ -1214,7 +1218,11 @@ r600_draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) BEGIN_BATCH(10); EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); PACK3(ib, IT_INDEX_TYPE, 1); +#if X_BYTE_ORDER == X_BIG_ENDIAN + E32(ib, IT_INDEX_TYPE_SWAP_MODE(ENDIAN_8IN32) | draw_conf->index_type); +#else E32(ib, draw_conf->index_type); +#endif PACK3(ib, IT_NUM_INSTANCES, 1); E32(ib, draw_conf->num_instances); PACK3(ib, IT_DRAW_INDEX_AUTO, 2); @@ -1249,6 +1257,9 @@ void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size) vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op; vtx_res.bo = accel_state->vbo.vb_bo; +#if X_BYTE_ORDER == X_BIG_ENDIAN + vtx_res.endian = SQ_ENDIAN_8IN32; +#endif r600_set_vtx_resource(pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT); /* Draw */ -- cgit v1.2.3