diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 8 | ||||
-rw-r--r-- | src/evergreen_accel.c | 1112 | ||||
-rw-r--r-- | src/evergreen_exa.c | 1907 | ||||
-rw-r--r-- | src/evergreen_reg.h | 247 | ||||
-rw-r--r-- | src/evergreen_reg_auto.h | 4039 | ||||
-rw-r--r-- | src/evergreen_shader.c | 2790 | ||||
-rw-r--r-- | src/evergreen_shader.h | 292 | ||||
-rw-r--r-- | src/evergreen_state.h | 338 | ||||
-rw-r--r-- | src/evergreen_textured_videofuncs.c | 556 | ||||
-rw-r--r-- | src/r600_exa.c | 144 | ||||
-rw-r--r-- | src/r600_state.h | 42 | ||||
-rw-r--r-- | src/r600_textured_videofuncs.c | 60 | ||||
-rw-r--r-- | src/r6xx_accel.c | 122 | ||||
-rw-r--r-- | src/radeon.h | 39 | ||||
-rw-r--r-- | src/radeon_accel.c | 5 | ||||
-rw-r--r-- | src/radeon_dri2.c | 89 | ||||
-rw-r--r-- | src/radeon_exa_shared.c | 50 | ||||
-rw-r--r-- | src/radeon_exa_shared.h | 4 | ||||
-rw-r--r-- | src/radeon_kms.c | 35 | ||||
-rw-r--r-- | src/radeon_reg.h | 6 | ||||
-rw-r--r-- | src/radeon_textured_video.c | 8 | ||||
-rw-r--r-- | src/radeon_vbo.c | 38 | ||||
-rw-r--r-- | src/radeon_vbo.h | 40 |
23 files changed, 11692 insertions, 279 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 033047e4..e05722cc 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -53,7 +53,8 @@ RADEON_KMS_SRCS=radeon_dri2.c radeon_kms.c drmmode_display.c radeon_vbo.c endif if USE_EXA -RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c r600_shader.c radeon_exa_shared.c +RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c r600_shader.c radeon_exa_shared.c \ + evergreen_exa.c evergreen_accel.c evergreen_shader.c evergreen_textured_videofuncs.c endif AM_CFLAGS = \ @@ -129,6 +130,11 @@ EXTRA_DIST = \ r600_reg_r7xx.h \ r600_shader.h \ r600_state.h \ + evergreen_reg.h \ + evergreen_reg_auto.h \ + evergreen_reg_r7xx.h \ + evergreen_shader.h \ + evergreen_state.h \ ati.h \ ativersion.h \ bicubic_table.h \ diff --git a/src/evergreen_accel.c b/src/evergreen_accel.c new file mode 100644 index 00000000..38666eef --- /dev/null +++ b/src/evergreen_accel.c @@ -0,0 +1,1112 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Alex Deucher <alexander.deucher@amd.com> + * + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" + +#include <errno.h> + +#include "radeon.h" +#include "evergreen_shader.h" +#include "radeon_reg.h" +#include "evergreen_reg.h" +#include "evergreen_state.h" + +#include "radeon_drm.h" +#include "radeon_vbo.h" +#include "radeon_exa_shared.h" + +void +evergreen_start_3d(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + + BEGIN_BATCH(3); + PACK3(IT_CONTEXT_CONTROL, 2); + E32(0x80000000); + E32(0x80000000); + END_BATCH(); + +} + +/* + * Setup of functional groups + */ + +// asic stack/thread/gpr limits - need to query the drm +static void +evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf) +{ + uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3; + uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2; + uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3; + RADEONInfoPtr info = RADEONPTR(pScrn); + + if (info->ChipFamily == CHIP_FAMILY_CEDAR) + sq_config = 0; + else + sq_config = VC_ENABLE_bit; + + sq_config |= (EXPORT_SRC_C_bit | + (sq_conf->cs_prio << CS_PRIO_shift) | + (sq_conf->ls_prio << LS_PRIO_shift) | + (sq_conf->hs_prio << HS_PRIO_shift) | + (sq_conf->ps_prio << PS_PRIO_shift) | + (sq_conf->vs_prio << VS_PRIO_shift) | + (sq_conf->gs_prio << GS_PRIO_shift) | + (sq_conf->es_prio << ES_PRIO_shift)); + + sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | + (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | + (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); + sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | + (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); + sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) | + (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift)); + + sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | + (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | + (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | + (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); + sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) | + (sq_conf->num_ls_threads << NUM_LS_THREADS_shift)); + + sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | + (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); + + sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | + (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); + + sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) | + (sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift)); + + BEGIN_BATCH(16); + /* disable dyn gprs */ + EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); + PACK0(SQ_CONFIG, 4); + E32(sq_config); + E32(sq_gpr_resource_mgmt_1); + E32(sq_gpr_resource_mgmt_2); + E32(sq_gpr_resource_mgmt_3); + PACK0(SQ_THREAD_RESOURCE_MGMT, 5); + E32(sq_thread_resource_mgmt); + E32(sq_thread_resource_mgmt_2); + E32(sq_stack_resource_mgmt_1); + E32(sq_stack_resource_mgmt_2); + E32(sq_stack_resource_mgmt_3); + END_BATCH(); +} + +void +evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain) +{ + uint32_t cb_color_info, cb_color_attrib, cb_color_dim; + int pitch, slice, h; + RADEONInfoPtr info = RADEONPTR(pScrn); + + cb_color_info = ((cb_conf->endian << ENDIAN_shift) | + (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | + (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | + (cb_conf->number_type << NUMBER_TYPE_shift) | + (cb_conf->comp_swap << COMP_SWAP_shift) | + (cb_conf->source_format << SOURCE_FORMAT_shift) | + (cb_conf->resource_type << RESOURCE_TYPE_shift)); + if (cb_conf->blend_clamp) + cb_color_info |= BLEND_CLAMP_bit; + if (cb_conf->fast_clear) + cb_color_info |= FAST_CLEAR_bit; + if (cb_conf->compression) + cb_color_info |= COMPRESSION_bit; + if (cb_conf->blend_bypass) + cb_color_info |= BLEND_BYPASS_bit; + if (cb_conf->simple_float) + cb_color_info |= SIMPLE_FLOAT_bit; + if (cb_conf->round_mode) + cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; + if (cb_conf->tile_compact) + cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit; + if (cb_conf->rat) + cb_color_info |= RAT_bit; + + /* bit 4 needs to be set for linear and depth/stencil surfaces */ + cb_color_attrib = CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit; + + pitch = (cb_conf->w / 8) - 1; + h = RADEON_ALIGN(cb_conf->h, 8); + slice = ((cb_conf->w * h) / 64) - 1; + + switch (cb_conf->resource_type) { + case BUFFER: + /* number of elements in the surface */ + cb_color_dim = pitch * slice; + break; + default: + /* w/h of the surface */ + cb_color_dim = (((cb_conf->w - 1) << WIDTH_MAX_shift) | + ((cb_conf->h - 1) << HEIGHT_MAX_shift)); + break; + } + + BEGIN_BATCH(3 + 2); + EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8)); + RELOC_BATCH(cb_conf->bo, 0, domain); + END_BATCH(); + + /* Set CMASK & FMASK buffer to the offset of color buffer as + * we don't use those this shouldn't cause any issue and we + * then have a valid cmd stream + */ + BEGIN_BATCH(3 + 2); + EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0 >> 8)); + RELOC_BATCH(cb_conf->bo, 0, domain); + END_BATCH(); + BEGIN_BATCH(3 + 2); + EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0 >> 8)); + RELOC_BATCH(cb_conf->bo, 0, domain); + END_BATCH(); + + /* tiling config */ + BEGIN_BATCH(3 + 2); + EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib); + RELOC_BATCH(cb_conf->bo, 0, domain); + END_BATCH(); + BEGIN_BATCH(3 + 2); + EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info); + RELOC_BATCH(cb_conf->bo, 0, domain); + END_BATCH(); + + BEGIN_BATCH(24); + EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch); + EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice); + EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0); + EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim); + EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0); + EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0); + PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4); + E32(0); + E32(0); + E32(0); + E32(0); + END_BATCH(); +} + +static void +evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type, + uint32_t size, uint64_t mc_addr, + struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + uint32_t cp_coher_size; + if (size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((size + 255) >> 8); + + BEGIN_BATCH(5 + 2); + PACK3(IT_SURFACE_SYNC, 4); + E32(sync_type); + E32(cp_coher_size); + E32((mc_addr >> 8)); + E32(10); /* poll interval */ + RELOC_BATCH(bo, rdomains, wdomain); + END_BATCH(); +} + +/* inserts a wait for vline in the command stream */ +void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, + xf86CrtcPtr crtc, int start, int stop) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + drmmode_crtc_private_ptr drmmode_crtc; + uint32_t offset; + + if (!crtc) + return; + + drmmode_crtc = crtc->driver_private; + + if (stop < start) + return; + + if (!crtc->enabled) + return; + + if (info->cs) { + if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen)) + return; + } else { +#ifdef USE_EXA + if (info->useEXA) + offset = exaGetPixmapOffset(pPix); + else +#endif + offset = pPix->devPrivate.ptr - info->FB; + + /* if drawing to front buffer */ + if (offset != 0) + return; + } + + start = max(start, 0); + stop = min(stop, crtc->mode.VDisplay); + + if (start > crtc->mode.VDisplay) + return; + + BEGIN_BATCH(11); + /* set the VLINE range */ + EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */ + (start << EVERGREEN_VLINE_START_SHIFT) | + (stop << EVERGREEN_VLINE_END_SHIFT)); + + /* tell the CP to poll the VLINE state register */ + PACK3(IT_WAIT_REG_MEM, 6); + E32(IT_WAIT_REG | IT_WAIT_EQ); + E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS)); + E32(0); + E32(0); // Ref value + E32(EVERGREEN_VLINE_STAT); // Mask + E32(10); // Wait interval + /* add crtc reloc */ + PACK3(IT_NOP, 1); + E32(drmmode_crtc->mode_crtc->crtc_id); + END_BATCH(); +} + +void +evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + uint32_t sq_pgm_resources; + + sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | + (fs_conf->stack_size << STACK_SIZE_shift)); + + if (fs_conf->dx10_clamp) + sq_pgm_resources |= DX10_CLAMP_bit; + + BEGIN_BATCH(3 + 2); + EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8); + RELOC_BATCH(fs_conf->bo, domain, 0); + END_BATCH(); + + BEGIN_BATCH(3); + EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources); + END_BATCH(); +} + +void +evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + uint32_t sq_pgm_resources, sq_pgm_resources_2; + + sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | + (vs_conf->stack_size << STACK_SIZE_shift)); + + if (vs_conf->dx10_clamp) + sq_pgm_resources |= DX10_CLAMP_bit; + if (vs_conf->uncached_first_inst) + sq_pgm_resources |= UNCACHED_FIRST_INST_bit; + + sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) | + (vs_conf->double_round << DOUBLE_ROUND_shift)); + + if (vs_conf->allow_sdi) + sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit; + if (vs_conf->allow_sd0) + sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit; + if (vs_conf->allow_ddi) + sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit; + if (vs_conf->allow_ddo) + sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit; + + /* flush SQ cache */ + evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, + vs_conf->shader_size, vs_conf->shader_addr, + vs_conf->bo, domain, 0); + + BEGIN_BATCH(3 + 2); + EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8); + RELOC_BATCH(vs_conf->bo, domain, 0); + END_BATCH(); + + BEGIN_BATCH(4); + PACK0(SQ_PGM_RESOURCES_VS, 2); + E32(sq_pgm_resources); + E32(sq_pgm_resources_2); + END_BATCH(); +} + +void +evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + uint32_t sq_pgm_resources, sq_pgm_resources_2; + + sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | + (ps_conf->stack_size << STACK_SIZE_shift)); + + if (ps_conf->dx10_clamp) + sq_pgm_resources |= DX10_CLAMP_bit; + if (ps_conf->uncached_first_inst) + sq_pgm_resources |= UNCACHED_FIRST_INST_bit; + if (ps_conf->clamp_consts) + sq_pgm_resources |= CLAMP_CONSTS_bit; + + sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) | + (ps_conf->double_round << DOUBLE_ROUND_shift)); + + if (ps_conf->allow_sdi) + sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit; + if (ps_conf->allow_sd0) + sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit; + if (ps_conf->allow_ddi) + sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit; + if (ps_conf->allow_ddo) + sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit; + + /* flush SQ cache */ + evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, + ps_conf->shader_size, ps_conf->shader_addr, + ps_conf->bo, domain, 0); + + BEGIN_BATCH(3 + 2); + EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8); + RELOC_BATCH(ps_conf->bo, domain, 0); + END_BATCH(); + + BEGIN_BATCH(5); + PACK0(SQ_PGM_RESOURCES_PS, 3); + E32(sq_pgm_resources); + E32(sq_pgm_resources_2); + E32(ps_conf->export_mode); + END_BATCH(); +} + +void +evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + /* size reg is units of 16 consts (4 dwords each) */ + uint32_t size = const_conf->size_bytes >> 8; + + if (size == 0) + size = 1; + + /* flush SQ cache */ + evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit, + const_conf->size_bytes, const_conf->const_addr, + const_conf->bo, domain, 0); + + switch (const_conf->type) { + case SHADER_TYPE_VS: + BEGIN_BATCH(3); + EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size); + END_BATCH(); + BEGIN_BATCH(3 + 2); + EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8); + RELOC_BATCH(const_conf->bo, domain, 0); + END_BATCH(); + break; + case SHADER_TYPE_PS: + BEGIN_BATCH(3); + EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size); + END_BATCH(); + BEGIN_BATCH(3 + 2); + EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8); + RELOC_BATCH(const_conf->bo, domain, 0); + END_BATCH(); + break; + default: + ErrorF("Unsupported const type %d\n", const_conf->type); + break; + } + +} + +void +evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each + * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs. + */ + BEGIN_BATCH(3); + EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val); + END_BATCH(); +} + +static void +evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4; + + sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | + ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | + (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | + (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | + (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); + if (res->clamp_x) + sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; + + if (res->format_comp_all) + sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; + + if (res->srf_mode_all) + sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; + + sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) | + (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) | + (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) | + (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift)); + + if (res->uncached) + sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit; + + /* XXX ??? */ + sq_vtx_constant_word4 = 0; + + /* flush vertex cache */ + if (info->ChipFamily == CHIP_FAMILY_CEDAR) + evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, + accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr, + res->bo, + domain, 0); + else + evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit, + accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr, + res->bo, + domain, 0); + + BEGIN_BATCH(10 + 2); + PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8); + E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS + E32((res->vtx_num_entries << 2) - 1); // 1: SIZE + E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN + E32(sq_vtx_constant_word3); // 3: swizzles + E32(sq_vtx_constant_word4); // 4: num elements + E32(0); // 5: n/a + E32(0); // 6: n/a + E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift); // 7: TYPE + RELOC_BATCH(res->bo, domain, 0); + END_BATCH(); +} + +void +evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; + uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7; + + sq_tex_resource_word0 = (tex_res->dim << DIM_shift); + + if (tex_res->w) + sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) | + ((tex_res->w - 1) << TEX_WIDTH_shift)); + + if (tex_res->tile_type) + sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit; + + sq_tex_resource_word1 = (tex_res->array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift); + + if (tex_res->h) + sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); + if (tex_res->depth) + sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); + + sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | + (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | + (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | + (tex_res->format_comp_w << FORMAT_COMP_W_shift) | + (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | + (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | + (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | + (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | + (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | + (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | + (tex_res->base_level << BASE_LEVEL_shift)); + + if (tex_res->srf_mode_all) + sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; + if (tex_res->force_degamma) + sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; + + sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | + (tex_res->base_array << BASE_ARRAY_shift) | + (tex_res->last_array << LAST_ARRAY_shift)); + + sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) | + (tex_res->perf_modulation << PERF_MODULATION_shift)); + + if (tex_res->interlaced) + sq_tex_resource_word6 |= INTERLACED_bit; + + sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) | + (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift)); + + /* flush texture cache */ + evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit, + tex_res->size, tex_res->base, + tex_res->bo, domain, 0); + + BEGIN_BATCH(10 + 4); + PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8); + E32(sq_tex_resource_word0); + E32(sq_tex_resource_word1); + E32(((tex_res->base) >> 8)); + E32(((tex_res->mip_base) >> 8)); + E32(sq_tex_resource_word4); + E32(sq_tex_resource_word5); + E32(sq_tex_resource_word6); + E32(sq_tex_resource_word7); + RELOC_BATCH(tex_res->bo, domain, 0); + RELOC_BATCH(tex_res->mip_bo, domain, 0); + END_BATCH(); +} + +void +evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; + + sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | + (s->clamp_y << CLAMP_Y_shift) | + (s->clamp_z << CLAMP_Z_shift) | + (s->xy_mag_filter << XY_MAG_FILTER_shift) | + (s->xy_min_filter << XY_MIN_FILTER_shift) | + (s->z_filter << Z_FILTER_shift) | + (s->mip_filter << MIP_FILTER_shift) | + (s->border_color << BORDER_COLOR_TYPE_shift) | + (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | + (s->chroma_key << CHROMA_KEY_shift)); + + sq_tex_sampler_word1 = ((s->min_lod << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift) | + (s->max_lod << MAX_LOD_shift) | + (s->perf_mip << PERF_MIP_shift) | + (s->perf_z << PERF_Z_shift)); + + + sq_tex_sampler_word2 = ((s->lod_bias << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) | + (s->lod_bias2 << LOD_BIAS_SEC_shift)); + + if (s->mc_coord_truncate) + sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; + if (s->force_degamma) + sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; + if (s->truncate_coord) + sq_tex_sampler_word2 |= TRUNCATE_COORD_bit; + if (s->disable_cube_wrap) + sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit; + if (s->type) + sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; + + BEGIN_BATCH(5); + PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); + E32(sq_tex_sampler_word0); + E32(sq_tex_sampler_word1); + E32(sq_tex_sampler_word2); + END_BATCH(); +} + +//XXX deal with clip offsets in clip setup +void +evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + + BEGIN_BATCH(4); + PACK0(PA_SC_SCREEN_SCISSOR_TL, 2); + E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | + (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); + E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | + (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); + END_BATCH(); +} + +void +evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + + BEGIN_BATCH(4); + PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2); + E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | + (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | + WINDOW_OFFSET_DISABLE_bit)); + E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | + (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); + END_BATCH(); +} + +void +evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + + BEGIN_BATCH(4); + PACK0(PA_SC_GENERIC_SCISSOR_TL, 2); + E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | + (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | + WINDOW_OFFSET_DISABLE_bit)); + E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | + (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); + END_BATCH(); +} + +void +evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + + BEGIN_BATCH(4); + PACK0(PA_SC_WINDOW_SCISSOR_TL, 2); + E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | + (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | + WINDOW_OFFSET_DISABLE_bit)); + E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | + (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); + END_BATCH(); +} + +void +evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + + BEGIN_BATCH(4); + PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2); + E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | + (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); + E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | + (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); + END_BATCH(); +} + +/* + * Setup of default state + */ + +void +evergreen_set_default_state(ScrnInfoPtr pScrn) +{ + tex_resource_t tex_res; + shader_config_t fs_conf; + sq_config_t sq_conf; + int i; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + if (accel_state->XInited3D) + return; + + memset(&tex_res, 0, sizeof(tex_resource_t)); + memset(&fs_conf, 0, sizeof(shader_config_t)); + + accel_state->XInited3D = TRUE; + + evergreen_start_3d(pScrn); + + /* SQ */ + sq_conf.ps_prio = 0; + sq_conf.vs_prio = 1; + sq_conf.gs_prio = 2; + sq_conf.es_prio = 3; + sq_conf.hs_prio = 0; + sq_conf.ls_prio = 0; + sq_conf.cs_prio = 0; + + switch (info->ChipFamily) { + case CHIP_FAMILY_CEDAR: + default: + sq_conf.num_ps_gprs = 93; + sq_conf.num_vs_gprs = 46; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 31; + sq_conf.num_es_gprs = 31; + sq_conf.num_hs_gprs = 23; + sq_conf.num_ls_gprs = 23; + sq_conf.num_ps_threads = 96; + sq_conf.num_vs_threads = 16; + sq_conf.num_gs_threads = 16; + sq_conf.num_es_threads = 16; + sq_conf.num_hs_threads = 16; + sq_conf.num_ls_threads = 16; + sq_conf.num_ps_stack_entries = 42; + sq_conf.num_vs_stack_entries = 42; + sq_conf.num_gs_stack_entries = 42; + sq_conf.num_es_stack_entries = 42; + sq_conf.num_hs_stack_entries = 42; + sq_conf.num_ls_stack_entries = 42; + break; + case CHIP_FAMILY_REDWOOD: + sq_conf.num_ps_gprs = 93; + sq_conf.num_vs_gprs = 46; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 31; + sq_conf.num_es_gprs = 31; + sq_conf.num_hs_gprs = 23; + sq_conf.num_ls_gprs = 23; + sq_conf.num_ps_threads = 128; + sq_conf.num_vs_threads = 20; + sq_conf.num_gs_threads = 20; + sq_conf.num_es_threads = 20; + sq_conf.num_hs_threads = 20; + sq_conf.num_ls_threads = 20; + sq_conf.num_ps_stack_entries = 42; + sq_conf.num_vs_stack_entries = 42; + sq_conf.num_gs_stack_entries = 42; + sq_conf.num_es_stack_entries = 42; + sq_conf.num_hs_stack_entries = 42; + sq_conf.num_ls_stack_entries = 42; + break; + case CHIP_FAMILY_JUNIPER: + sq_conf.num_ps_gprs = 93; + sq_conf.num_vs_gprs = 46; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 31; + sq_conf.num_es_gprs = 31; + sq_conf.num_hs_gprs = 23; + sq_conf.num_ls_gprs = 23; + sq_conf.num_ps_threads = 128; + sq_conf.num_vs_threads = 20; + sq_conf.num_gs_threads = 20; + sq_conf.num_es_threads = 20; + sq_conf.num_hs_threads = 20; + sq_conf.num_ls_threads = 20; + sq_conf.num_ps_stack_entries = 85; + sq_conf.num_vs_stack_entries = 85; + sq_conf.num_gs_stack_entries = 85; + sq_conf.num_es_stack_entries = 85; + sq_conf.num_hs_stack_entries = 85; + sq_conf.num_ls_stack_entries = 85; + break; + case CHIP_FAMILY_CYPRESS: + case CHIP_FAMILY_HEMLOCK: + sq_conf.num_ps_gprs = 93; + sq_conf.num_vs_gprs = 46; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 31; + sq_conf.num_es_gprs = 31; + sq_conf.num_hs_gprs = 23; + sq_conf.num_ls_gprs = 23; + sq_conf.num_ps_threads = 128; + sq_conf.num_vs_threads = 20; + sq_conf.num_gs_threads = 20; + sq_conf.num_es_threads = 20; + sq_conf.num_hs_threads = 20; + sq_conf.num_ls_threads = 20; + sq_conf.num_ps_stack_entries = 85; + sq_conf.num_vs_stack_entries = 85; + sq_conf.num_gs_stack_entries = 85; + sq_conf.num_es_stack_entries = 85; + sq_conf.num_hs_stack_entries = 85; + sq_conf.num_ls_stack_entries = 85; + break; + } + + evergreen_sq_setup(pScrn, &sq_conf); + + BEGIN_BATCH(24); + EREG(SQ_LDS_ALLOC_PS, 0); + EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0); + + PACK0(SQ_ESGS_RING_ITEMSIZE, 6); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + + PACK0(SQ_GS_VERT_ITEMSIZE, 4); + E32(0); + E32(0); + E32(0); + E32(0); + + PACK0(SQ_VTX_BASE_VTX_LOC, 2); + E32(0); + E32(0); + END_BATCH(); + + /* DB */ + BEGIN_BATCH(3 + 2); + EREG(DB_Z_INFO, 0); + RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(3 + 2); + EREG(DB_STENCIL_INFO, 0); + RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(3 + 2); + EREG(DB_HTILE_DATA_BASE, 0); + RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(49); + EREG(DB_DEPTH_CONTROL, 0); + + PACK0(PA_SC_VPORT_ZMIN_0, 2); + EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0 + EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0 + + PACK0(DB_RENDER_CONTROL, 5); + E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL + E32(0); // DB_COUNT_CONTROL + E32(0); // DB_DEPTH_VIEW + E32(0x2a); // DB_RENDER_OVERRIDE + E32(0); // DB_RENDER_OVERRIDE2 + + PACK0(DB_STENCIL_CLEAR, 2); + E32(0); // DB_STENCIL_CLEAR + E32(0); // DB_DEPTH_CLEAR + + EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | + (2 << ALPHA_TO_MASK_OFFSET1_shift) | + (2 << ALPHA_TO_MASK_OFFSET2_shift) | + (2 << ALPHA_TO_MASK_OFFSET3_shift))); + + EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) | + DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ + + // SX + EREG(SX_MISC, 0); + + // CB + PACK0(SX_ALPHA_TEST_CONTROL, 5); + E32(0); // SX_ALPHA_TEST_CONTROL + E32(0x00000000); //CB_BLEND_RED + E32(0x00000000); //CB_BLEND_GREEN + E32(0x00000000); //CB_BLEND_BLUE + E32(0x00000000); //CB_BLEND_ALPHA + + EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask); + + // SC + EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | + (0 << WINDOW_Y_OFFSET_shift))); + EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); + EREG(PA_SC_EDGERULE, 0xAAAAAAAA); + EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0); + END_BATCH(); + + /* clip boolean is set to always visible -> doesn't matter */ + for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) + evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192); + + for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) + evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192); + + BEGIN_BATCH(50); + PACK0(PA_SC_MODE_CNTL_0, 2); + E32(0); // PA_SC_MODE_CNTL_0 + E32(0); // PA_SC_MODE_CNTL_1 + + PACK0(PA_SC_LINE_CNTL, 16); + E32(0); // PA_SC_LINE_CNTL + E32(0); // PA_SC_AA_CONFIG + E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) | + PIX_CENTER_bit)); // PA_SU_VTX_CNTL + EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ + EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ + EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ + EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ + E32(0); // PA_SC_AA_SAMPLE_LOCS_0 + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); // PA_SC_AA_SAMPLE_LOCS_7 + E32(0xFFFFFFFF); // PA_SC_AA_MASK + + // CL + PACK0(PA_CL_CLIP_CNTL, 8); + E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL + E32(FACE_bit); // PA_SU_SC_MODE_CNTL + E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL + E32(0); // PA_CL_VS_OUT_CNTL + E32(0); // PA_CL_NANINF_CNTL + E32(0); // PA_SU_LINE_STIPPLE_CNTL + E32(0); // PA_SU_LINE_STIPPLE_SCALE + E32(0); // PA_SU_PRIM_FILTER_CNTL + + // SU + PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + + PACK0(SPI_INPUT_Z, 8); + E32(0); // SPI_INPUT_Z + E32(0); // SPI_FOG_CNTL + E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL + E32(0); // SPI_PS_IN_CONTROL_2 + E32(0); + E32(0); + E32(0); + E32(0); + END_BATCH(); + + // clear FS + fs_conf.bo = accel_state->shaders_bo; + evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM); + + // VGT + BEGIN_BATCH(46); + + PACK0(VGT_MAX_VTX_INDX, 4); + E32(0xffffff); + E32(0); + E32(0); + E32(0); + + PACK0(VGT_INSTANCE_STEP_RATE_0, 2); + E32(0); + E32(0); + + PACK0(VGT_REUSE_OFF, 2); + E32(0); + E32(0); + + PACK0(PA_SU_POINT_SIZE, 17); + E32(0); // PA_SU_POINT_SIZE + E32(0); // PA_SU_POINT_MINMAX + E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL + E32(0); // PA_SC_LINE_STIPPLE + E32(0); // VGT_OUTPUT_PATH_CNTL + E32(0); // VGT_HOS_CNTL + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); + E32(0); // VGT_GS_MODE + + EREG(VGT_PRIMITIVEID_EN, 0); + EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0); + EREG(VGT_SHADER_STAGES_EN, 0); + + PACK0(VGT_STRMOUT_CONFIG, 2); + E32(0); + E32(0); + END_BATCH(); +} + + +/* + * Commands + */ + +void +evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + + BEGIN_BATCH(10); + EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type); + PACK3(IT_INDEX_TYPE, 1); + E32(draw_conf->index_type); + PACK3(IT_NUM_INSTANCES, 1); + E32(draw_conf->num_instances); + PACK3(IT_DRAW_INDEX_AUTO, 2); + E32(draw_conf->num_indices); + E32(draw_conf->vgt_draw_initiator); + END_BATCH(); +} + +void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + draw_config_t draw_conf; + vtx_resource_t vtx_res; + + if (accel_state->vbo.vb_start_op == -1) + return; + + CLEAR (draw_conf); + CLEAR (vtx_res); + + if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { + radeon_ib_discard(pScrn); + radeon_cs_flush_indirect(pScrn); + radeon_vb_discard(pScrn, &accel_state->vbo); + radeon_vb_discard(pScrn, &accel_state->cbuf); + return; + } + + /* Vertex buffer setup */ + accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; + vtx_res.id = SQ_FETCH_RESOURCE_vs; + vtx_res.vtx_size_dw = vtx_size / 4; + vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; + vtx_res.vb_addr = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op; + vtx_res.bo = accel_state->vbo.vb_bo; + vtx_res.dst_sel_x = SQ_SEL_X; + vtx_res.dst_sel_y = SQ_SEL_Y; + vtx_res.dst_sel_z = SQ_SEL_Z; + vtx_res.dst_sel_w = SQ_SEL_W; + evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT); + + /* Draw */ + draw_conf.prim_type = DI_PT_RECTLIST; + draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; + draw_conf.num_instances = 1; + draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; + draw_conf.index_type = DI_INDEX_SIZE_16_BIT; + + evergreen_draw_auto(pScrn, &draw_conf); + + /* sync dst surface */ + evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), + accel_state->dst_size, accel_state->dst_obj.offset, + accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); + + accel_state->vbo.vb_start_op = -1; + accel_state->cbuf.vb_start_op = -1; + accel_state->ib_reset_op = 0; + +} + diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c new file mode 100644 index 00000000..7b2a65ee --- /dev/null +++ b/src/evergreen_exa.c @@ -0,0 +1,1907 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Alex Deucher <alexander.deucher@amd.com> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" + +#include "exa.h" + +#include "radeon.h" +#include "radeon_macros.h" +#include "radeon_reg.h" +#include "evergreen_shader.h" +#include "evergreen_reg.h" +#include "evergreen_state.h" +#include "radeon_exa_shared.h" +#include "radeon_vbo.h" + +uint32_t EVERGREEN_ROP[16] = { + RADEON_ROP3_ZERO, /* GXclear */ + RADEON_ROP3_DSa, /* Gxand */ + RADEON_ROP3_SDna, /* GXandReverse */ + RADEON_ROP3_S, /* GXcopy */ + RADEON_ROP3_DSna, /* GXandInverted */ + RADEON_ROP3_D, /* GXnoop */ + RADEON_ROP3_DSx, /* GXxor */ + RADEON_ROP3_DSo, /* GXor */ + RADEON_ROP3_DSon, /* GXnor */ + RADEON_ROP3_DSxn, /* GXequiv */ + RADEON_ROP3_Dn, /* GXinvert */ + RADEON_ROP3_SDno, /* GXorReverse */ + RADEON_ROP3_Sn, /* GXcopyInverted */ + RADEON_ROP3_DSno, /* GXorInverted */ + RADEON_ROP3_DSan, /* GXnand */ + RADEON_ROP3_ONE, /* GXset */ +}; + +Bool +EVERGREENSetAccelState(ScrnInfoPtr pScrn, + struct r600_accel_object *src0, + struct r600_accel_object *src1, + struct r600_accel_object *dst, + uint32_t vs_offset, uint32_t ps_offset, + int rop, Pixel planemask) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + int ret; + + if (src0) { + memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object)); + accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8); + } else { + memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object)); + accel_state->src_size[0] = 0; + } + + if (src1) { + memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object)); + accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8); + } else { + memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object)); + accel_state->src_size[1] = 0; + } + + if (dst) { + memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object)); + accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8); + } else { + memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object)); + accel_state->dst_size = 0; + } + + accel_state->rop = rop; + accel_state->planemask = planemask; + + /* bad pitch */ + if (accel_state->src_obj[0].pitch & 7) + RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch)); + + /* bad offset */ + if (accel_state->src_obj[0].offset & 0xff) + RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset)); + + /* bad pitch */ + if (accel_state->src_obj[1].pitch & 7) + RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch)); + + /* bad offset */ + if (accel_state->src_obj[1].offset & 0xff) + RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset)); + + if (accel_state->dst_obj.pitch & 7) + RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch)); + + if (accel_state->dst_obj.offset & 0xff) + RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset)); + + accel_state->vs_size = 512; + accel_state->ps_size = 512; + + accel_state->vs_mc_addr = vs_offset; + accel_state->ps_mc_addr = ps_offset; + + radeon_cs_space_reset_bos(info->cs); + radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->src_obj[0].bo) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo, + accel_state->src_obj[0].domain, 0); + if (accel_state->src_obj[1].bo) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo, + accel_state->src_obj[1].domain, 0); + if (accel_state->dst_obj.bo) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo, + 0, accel_state->dst_obj.domain); + ret = radeon_cs_space_check(info->cs); + if (ret) + RADEON_FALLBACK(("Not enough RAM to hw accel operation\n")); + + return TRUE; +} + +static void +EVERGREENDoneSolid(PixmapPtr pPix); + +static Bool +EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) +{ + ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + cb_config_t cb_conf; + shader_config_t vs_conf, ps_conf; + int pmask = 0; + uint32_t a, r, g, b; + float *ps_alu_consts; + const_config_t ps_const_conf; + struct r600_accel_object dst; + + //return FALSE; + + if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel)) + RADEON_FALLBACK(("EVERGREENCheckDatatype failed\n")); + if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel)) + RADEON_FALLBACK(("invalid planemask\n")); + + dst.offset = 0; + dst.bo = radeon_get_pixmap_bo(pPix); + + dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); + dst.width = pPix->drawable.width; + dst.height = pPix->drawable.height; + dst.bpp = pPix->drawable.bitsPerPixel; + dst.domain = RADEON_GEM_DOMAIN_VRAM; + + if (!EVERGREENSetAccelState(pScrn, + NULL, + NULL, + &dst, + accel_state->solid_vs_offset, accel_state->solid_ps_offset, + alu, pm)) + return FALSE; + + CLEAR (cb_conf); + CLEAR (vs_conf); + CLEAR (ps_conf); + CLEAR (ps_const_conf); + + radeon_vbo_check(pScrn, &accel_state->vbo, 16); + radeon_vbo_check(pScrn, &accel_state->cbuf, 256); + radeon_cp_start(pScrn); + + evergreen_set_default_state(pScrn); + + evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + + /* Shader */ + vs_conf.shader_addr = accel_state->vs_mc_addr; + vs_conf.shader_size = accel_state->vs_size; + vs_conf.num_gprs = 2; + vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; + evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); + + ps_conf.shader_addr = accel_state->ps_mc_addr; + ps_conf.shader_size = accel_state->ps_size; + ps_conf.num_gprs = 1; + ps_conf.stack_size = 0; + ps_conf.clamp_consts = 0; + ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; + evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); + + cb_conf.id = 0; + cb_conf.w = accel_state->dst_obj.pitch; + cb_conf.h = accel_state->dst_obj.height; + cb_conf.base = accel_state->dst_obj.offset; + cb_conf.bo = accel_state->dst_obj.bo; + + if (accel_state->dst_obj.bpp == 8) { + cb_conf.format = COLOR_8; + cb_conf.comp_swap = 3; /* A */ + } else if (accel_state->dst_obj.bpp == 16) { + cb_conf.format = COLOR_5_6_5; + cb_conf.comp_swap = 2; /* RGB */ + } else { + cb_conf.format = COLOR_8_8_8_8; + cb_conf.comp_swap = 1; /* ARGB */ + } + cb_conf.source_format = EXPORT_4C_16BPC; + cb_conf.blend_clamp = 1; + evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); + + /* Render setup */ + if (accel_state->planemask & 0x000000ff) + pmask |= 4; /* B */ + if (accel_state->planemask & 0x0000ff00) + pmask |= 2; /* G */ + if (accel_state->planemask & 0x00ff0000) + pmask |= 1; /* R */ + if (accel_state->planemask & 0xff000000) + pmask |= 8; /* A */ + + BEGIN_BATCH(23); + EREG(CB_TARGET_MASK, (pmask << TARGET0_ENABLE_shift)); + EREG(CB_COLOR_CONTROL, (EVERGREEN_ROP[accel_state->rop] | + (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift))); + EREG(CB_BLEND0_CONTROL, 0); + + /* Interpolator setup */ + /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */ + EREG(SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift)); + EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); + /* color semantic id 0 -> GPR[0] */ + EREG(SPI_PS_INPUT_CNTL_0 + (0 << 2), ((0 << SEMANTIC_shift) | + (0x03 << DEFAULT_VAL_shift) | + FLAT_SHADE_bit)); + + /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x + * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ + /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */ + PACK0(SPI_PS_IN_CONTROL_0, 3); + E32(((0 << NUM_INTERP_shift) | + LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0 + E32(0); // SPI_PS_IN_CONTROL_1 + E32(FLAT_SHADE_ENA_bit); // SPI_INTERP_CONTROL_0 + END_BATCH(); + + + /* PS alu constants */ + ps_const_conf.size_bytes = 256; + ps_const_conf.type = SHADER_TYPE_PS; + ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256); + if (accel_state->dst_obj.bpp == 16) { + r = (fg >> 11) & 0x1f; + g = (fg >> 5) & 0x3f; + b = (fg >> 0) & 0x1f; + ps_alu_consts[0] = (float)r / 31; /* R */ + ps_alu_consts[1] = (float)g / 63; /* G */ + ps_alu_consts[2] = (float)b / 31; /* B */ + ps_alu_consts[3] = 1.0; /* A */ + } else if (accel_state->dst_obj.bpp == 8) { + a = (fg >> 0) & 0xff; + ps_alu_consts[0] = 0.0; /* R */ + ps_alu_consts[1] = 0.0; /* G */ + ps_alu_consts[2] = 0.0; /* B */ + ps_alu_consts[3] = (float)a / 255; /* A */ + } else { + a = (fg >> 24) & 0xff; + r = (fg >> 16) & 0xff; + g = (fg >> 8) & 0xff; + b = (fg >> 0) & 0xff; + ps_alu_consts[0] = (float)r / 255; /* R */ + ps_alu_consts[1] = (float)g / 255; /* G */ + ps_alu_consts[2] = (float)b / 255; /* B */ + ps_alu_consts[3] = (float)a / 255; /* A */ + } + radeon_vbo_commit(pScrn, &accel_state->cbuf); + + ps_const_conf.bo = accel_state->cbuf.vb_bo; + ps_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op; + evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT); + + if (accel_state->vsync) + RADEONVlineHelperClear(pScrn); + + return TRUE; +} + + +static void +EVERGREENSolid(PixmapPtr pPix, int x1, int y1, int x2, int y2) +{ + ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + float *vb; + + if (accel_state->vsync) + RADEONVlineHelperSet(pScrn, x1, y1, x2, y2); + + vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8); + + vb[0] = (float)x1; + vb[1] = (float)y1; + + vb[2] = (float)x1; + vb[3] = (float)y2; + + vb[4] = (float)x2; + vb[5] = (float)y2; + + radeon_vbo_commit(pScrn, &accel_state->vbo); +} + +static void +EVERGREENDoneSolid(PixmapPtr pPix) +{ + ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + if (accel_state->vsync) + evergreen_cp_wait_vline_sync(pScrn, pPix, + accel_state->vline_crtc, + accel_state->vline_y1, + accel_state->vline_y2); + + evergreen_finish_op(pScrn, 8); +} + +static void +EVERGREENDoPrepareCopy(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + int pmask = 0; + cb_config_t cb_conf; + tex_resource_t tex_res; + tex_sampler_t tex_samp; + shader_config_t vs_conf, ps_conf; + + CLEAR (cb_conf); + CLEAR (tex_res); + CLEAR (tex_samp); + CLEAR (vs_conf); + CLEAR (ps_conf); + + radeon_vbo_check(pScrn, &accel_state->vbo, 16); + radeon_cp_start(pScrn); + + evergreen_set_default_state(pScrn); + + evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + + /* Shader */ + vs_conf.shader_addr = accel_state->vs_mc_addr; + vs_conf.shader_size = accel_state->vs_size; + vs_conf.num_gprs = 2; + vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; + evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); + + ps_conf.shader_addr = accel_state->ps_mc_addr; + ps_conf.shader_size = accel_state->ps_size; + ps_conf.num_gprs = 1; + ps_conf.stack_size = 0; + ps_conf.clamp_consts = 0; + ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; + evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); + + /* Texture */ + tex_res.id = 0; + tex_res.w = accel_state->src_obj[0].width; + tex_res.h = accel_state->src_obj[0].height; + tex_res.pitch = accel_state->src_obj[0].pitch; + tex_res.depth = 0; + tex_res.dim = SQ_TEX_DIM_2D; + tex_res.base = accel_state->src_obj[0].offset; + tex_res.mip_base = accel_state->src_obj[0].offset; + tex_res.size = accel_state->src_size[0]; + tex_res.bo = accel_state->src_obj[0].bo; + tex_res.mip_bo = accel_state->src_obj[0].bo; + if (accel_state->src_obj[0].bpp == 8) { + tex_res.format = FMT_8; + tex_res.dst_sel_x = SQ_SEL_1; /* R */ + tex_res.dst_sel_y = SQ_SEL_1; /* G */ + tex_res.dst_sel_z = SQ_SEL_1; /* B */ + tex_res.dst_sel_w = SQ_SEL_X; /* A */ + } else if (accel_state->src_obj[0].bpp == 16) { + tex_res.format = FMT_5_6_5; + tex_res.dst_sel_x = SQ_SEL_Z; /* R */ + tex_res.dst_sel_y = SQ_SEL_Y; /* G */ + tex_res.dst_sel_z = SQ_SEL_X; /* B */ + tex_res.dst_sel_w = SQ_SEL_1; /* A */ + } else { + tex_res.format = FMT_8_8_8_8; + tex_res.dst_sel_x = SQ_SEL_Z; /* R */ + tex_res.dst_sel_y = SQ_SEL_Y; /* G */ + tex_res.dst_sel_z = SQ_SEL_X; /* B */ + tex_res.dst_sel_w = SQ_SEL_W; /* A */ + } + + tex_res.base_level = 0; + tex_res.last_level = 0; + tex_res.perf_modulation = 0; + evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); + + tex_samp.id = 0; + tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_z = SQ_TEX_WRAP; + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; + tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; + tex_samp.mip_filter = 0; /* no mipmap */ + evergreen_set_tex_sampler (pScrn, &tex_samp); + + cb_conf.id = 0; + cb_conf.w = accel_state->dst_obj.pitch; + cb_conf.h = accel_state->dst_obj.height; + cb_conf.base = accel_state->dst_obj.offset; + cb_conf.bo = accel_state->dst_obj.bo; + if (accel_state->dst_obj.bpp == 8) { + cb_conf.format = COLOR_8; + cb_conf.comp_swap = 3; /* A */ + } else if (accel_state->dst_obj.bpp == 16) { + cb_conf.format = COLOR_5_6_5; + cb_conf.comp_swap = 2; /* RGB */ + } else { + cb_conf.format = COLOR_8_8_8_8; + cb_conf.comp_swap = 1; /* ARGB */ + } + cb_conf.source_format = EXPORT_4C_16BPC; + cb_conf.blend_clamp = 1; + evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); + + /* Render setup */ + if (accel_state->planemask & 0x000000ff) + pmask |= 4; /* B */ + if (accel_state->planemask & 0x0000ff00) + pmask |= 2; /* G */ + if (accel_state->planemask & 0x00ff0000) + pmask |= 1; /* R */ + if (accel_state->planemask & 0xff000000) + pmask |= 8; /* A */ + + BEGIN_BATCH(23); + EREG(CB_TARGET_MASK, (pmask << TARGET0_ENABLE_shift)); + EREG(CB_COLOR_CONTROL, (EVERGREEN_ROP[accel_state->rop] | + (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift))); + EREG(CB_BLEND0_CONTROL, 0); + + /* Interpolator setup */ + /* export tex coord from VS */ + EREG(SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); + EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); + /* color semantic id 0 -> GPR[0] */ + EREG(SPI_PS_INPUT_CNTL_0 + (0 << 2), ((0 << SEMANTIC_shift) | + (0x01 << DEFAULT_VAL_shift))); + + /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x + * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ + /* input tex coord from VS */ + PACK0(SPI_PS_IN_CONTROL_0, 3); + E32(((1 << NUM_INTERP_shift) | + LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0 + E32(0); //SPI_PS_IN_CONTROL_1 + E32(0); // SPI_INTERP_CONTROL_0 + END_BATCH(); + +} + +static void +EVERGREENDoCopy(ScrnInfoPtr pScrn) +{ + evergreen_finish_op(pScrn, 16); +} + +static void +EVERGREENDoCopyVline(PixmapPtr pPix) +{ + ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + if (accel_state->vsync) + evergreen_cp_wait_vline_sync(pScrn, pPix, + accel_state->vline_crtc, + accel_state->vline_y1, + accel_state->vline_y2); + + evergreen_finish_op(pScrn, 16); +} + +static void +EVERGREENAppendCopyVertex(ScrnInfoPtr pScrn, + int srcX, int srcY, + int dstX, int dstY, + int w, int h) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + float *vb; + + vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); + + vb[0] = (float)dstX; + vb[1] = (float)dstY; + vb[2] = (float)srcX; + vb[3] = (float)srcY; + + vb[4] = (float)dstX; + vb[5] = (float)(dstY + h); + vb[6] = (float)srcX; + vb[7] = (float)(srcY + h); + + vb[8] = (float)(dstX + w); + vb[9] = (float)(dstY + h); + vb[10] = (float)(srcX + w); + vb[11] = (float)(srcY + h); + + radeon_vbo_commit(pScrn, &accel_state->vbo); +} + +static Bool +EVERGREENPrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, + int xdir, int ydir, + int rop, + Pixel planemask) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + struct r600_accel_object src_obj, dst_obj; + + //return FALSE; + + if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel)) + RADEON_FALLBACK(("EVERGREENCheckDatatype src failed\n")); + if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel)) + RADEON_FALLBACK(("EVERGREENCheckDatatype dst failed\n")); + if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel)) + RADEON_FALLBACK(("Invalid planemask\n")); + + dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); + src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); + + accel_state->same_surface = FALSE; + + src_obj.offset = 0; + dst_obj.offset = 0; + src_obj.bo = radeon_get_pixmap_bo(pSrc); + dst_obj.bo = radeon_get_pixmap_bo(pDst); + if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst)) + accel_state->same_surface = TRUE; + + src_obj.width = pSrc->drawable.width; + src_obj.height = pSrc->drawable.height; + src_obj.bpp = pSrc->drawable.bitsPerPixel; + src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; + + dst_obj.width = pDst->drawable.width; + dst_obj.height = pDst->drawable.height; + dst_obj.bpp = pDst->drawable.bitsPerPixel; + dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; + + if (!EVERGREENSetAccelState(pScrn, + &src_obj, + NULL, + &dst_obj, + accel_state->copy_vs_offset, accel_state->copy_ps_offset, + rop, planemask)) + return FALSE; + + if (accel_state->same_surface == TRUE) { + unsigned long size = pDst->drawable.height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8; + + if (accel_state->copy_area_bo) { + radeon_bo_unref(accel_state->copy_area_bo); + accel_state->copy_area_bo = NULL; + } + accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0, + RADEON_GEM_DOMAIN_VRAM, + 0); + if (accel_state->copy_area_bo == NULL) + RADEON_FALLBACK(("temp copy surface alloc failed\n")); + + radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo, + RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_VRAM); + if (radeon_cs_space_check(info->cs)) { + radeon_bo_unref(accel_state->copy_area_bo); + accel_state->copy_area_bo = NULL; + return FALSE; + } + accel_state->copy_area = (void*)accel_state->copy_area_bo; + } else + EVERGREENDoPrepareCopy(pScrn); + + if (accel_state->vsync) + RADEONVlineHelperClear(pScrn); + + return TRUE; +} + +static void +EVERGREENCopy(PixmapPtr pDst, + int srcX, int srcY, + int dstX, int dstY, + int w, int h) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY)) + return; + + if (accel_state->vsync) + RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); + + if (accel_state->same_surface && accel_state->copy_area) { + uint32_t orig_dst_domain = accel_state->dst_obj.domain; + uint32_t orig_src_domain = accel_state->src_obj[0].domain; + struct radeon_bo *orig_bo = accel_state->dst_obj.bo; + + /* src to tmp */ + accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; + accel_state->dst_obj.bo = accel_state->copy_area_bo; + accel_state->dst_obj.offset = 0; + EVERGREENDoPrepareCopy(pScrn); + EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); + EVERGREENDoCopy(pScrn); + + /* tmp to dst */ + accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM; + accel_state->src_obj[0].bo = accel_state->copy_area_bo; + accel_state->src_obj[0].offset = 0; + accel_state->dst_obj.domain = orig_dst_domain; + accel_state->dst_obj.bo = orig_bo; + accel_state->dst_obj.offset = 0; + EVERGREENDoPrepareCopy(pScrn); + EVERGREENAppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); + EVERGREENDoCopyVline(pDst); + + /* restore state */ + accel_state->src_obj[0].domain = orig_src_domain; + accel_state->src_obj[0].bo = orig_bo; + accel_state->src_obj[0].offset = 0; + } else + EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); + +} + +static void +EVERGREENDoneCopy(PixmapPtr pDst) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + if (!accel_state->same_surface) + EVERGREENDoCopyVline(pDst); + + if (accel_state->copy_area) + accel_state->copy_area = NULL; + +} + + +#define xFixedToFloat(f) (((float) (f)) / 65536) + +struct blendinfo { + Bool dst_alpha; + Bool src_alpha; + uint32_t blend_cntl; +}; + +static struct blendinfo EVERGREENBlendOp[] = { + /* Clear */ + {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, + /* Src */ + {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, + /* Dst */ + {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, + /* Over */ + {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, + /* OverReverse */ + {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, + /* In */ + {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, + /* InReverse */ + {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, + /* Out */ + {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, + /* OutReverse */ + {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, + /* Atop */ + {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, + /* AtopReverse */ + {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, + /* Xor */ + {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, + /* Add */ + {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, +}; + +struct formatinfo { + unsigned int fmt; + uint32_t card_fmt; +}; + +static struct formatinfo EVERGREENTexFormats[] = { + {PICT_a8r8g8b8, FMT_8_8_8_8}, + {PICT_x8r8g8b8, FMT_8_8_8_8}, + {PICT_a8b8g8r8, FMT_8_8_8_8}, + {PICT_x8b8g8r8, FMT_8_8_8_8}, +#ifdef PICT_TYPE_BGRA + {PICT_b8g8r8a8, FMT_8_8_8_8}, + {PICT_b8g8r8x8, FMT_8_8_8_8}, +#endif + {PICT_r5g6b5, FMT_5_6_5}, + {PICT_a1r5g5b5, FMT_1_5_5_5}, + {PICT_x1r5g5b5, FMT_1_5_5_5}, + {PICT_a8, FMT_8}, +}; + +static uint32_t EVERGREENGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format) +{ + uint32_t sblend, dblend; + + sblend = EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask; + dblend = EVERGREENBlendOp[op].blend_cntl & COLOR_DESTBLEND_mask; + + /* If there's no dst alpha channel, adjust the blend op so that we'll treat + * it as always 1. + */ + if (PICT_FORMAT_A(dst_format) == 0 && EVERGREENBlendOp[op].dst_alpha) { + if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift)) + sblend = (BLEND_ONE << COLOR_SRCBLEND_shift); + else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift)) + sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift); + } + + /* If the source alpha is being used, then we should only be in a case where + * the source blend factor is 0, and the source blend value is the mask + * channels multiplied by the source picture's alpha. + */ + if (pMask && pMask->componentAlpha && EVERGREENBlendOp[op].src_alpha) { + if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) { + dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift); + } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) { + dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift); + } + } + + return sblend | dblend; +} + +static Bool EVERGREENGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format) +{ + switch (pDstPicture->format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: +#ifdef PICT_TYPE_BGRA + case PICT_b8g8r8a8: + case PICT_b8g8r8x8: +#endif + *dst_format = COLOR_8_8_8_8; + break; + case PICT_r5g6b5: + *dst_format = COLOR_5_6_5; + break; + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + *dst_format = COLOR_1_5_5_5; + break; + case PICT_a8: + *dst_format = COLOR_8; + break; + default: + RADEON_FALLBACK(("Unsupported dest format 0x%x\n", + (int)pDstPicture->format)); + } + return TRUE; +} + +static Bool EVERGREENCheckCompositeTexture(PicturePtr pPict, + PicturePtr pDstPict, + int op, + int unit) +{ + int w = pPict->pDrawable->width; + int h = pPict->pDrawable->height; + unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone; + unsigned int i; + int max_tex_w, max_tex_h; + + max_tex_w = 16384; + max_tex_h = 16384; + + if ((w > max_tex_w) || (h > max_tex_h)) + RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h)); + + for (i = 0; i < sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]); i++) { + if (EVERGREENTexFormats[i].fmt == pPict->format) + break; + } + if (i == sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0])) + RADEON_FALLBACK(("Unsupported picture format 0x%x\n", + (int)pPict->format)); + + if (pPict->filter != PictFilterNearest && + pPict->filter != PictFilterBilinear) + RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter)); + + /* for REPEAT_NONE, Render semantics are that sampling outside the source + * picture results in alpha=0 pixels. We can implement this with a border color + * *if* our source texture has an alpha channel, otherwise we need to fall + * back. If we're not transformed then we hope that upper layers have clipped + * rendering to the bounds of the source drawable, in which case it doesn't + * matter. I have not, however, verified that the X server always does such + * clipping. + */ + /* FIXME evergreen */ + if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) { + if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0))) + RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n")); + } + + return TRUE; +} + +static void EVERGREENXFormSetup(PicturePtr pPict, PixmapPtr pPix, + int unit, float *vs_alu_consts) +{ + ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + int w = pPict->pDrawable->width; + int h = pPict->pDrawable->height; + int const_offset = unit * 8; + + if (pPict->transform != 0) { + accel_state->is_transform[unit] = TRUE; + accel_state->transform[unit] = pPict->transform; + + vs_alu_consts[0 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][0]); + vs_alu_consts[1 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][1]); + vs_alu_consts[2 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][2]); + vs_alu_consts[3 + const_offset] = 1.0 / w; + + vs_alu_consts[4 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][0]); + vs_alu_consts[5 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][1]); + vs_alu_consts[6 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][2]); + vs_alu_consts[7 + const_offset] = 1.0 / h; + } else { + accel_state->is_transform[unit] = FALSE; + + vs_alu_consts[0 + const_offset] = 1.0; + vs_alu_consts[1 + const_offset] = 0.0; + vs_alu_consts[2 + const_offset] = 0.0; + vs_alu_consts[3 + const_offset] = 1.0 / w; + + vs_alu_consts[4 + const_offset] = 0.0; + vs_alu_consts[5 + const_offset] = 1.0; + vs_alu_consts[6 + const_offset] = 0.0; + vs_alu_consts[7 + const_offset] = 1.0 / h; + } + +} + +static Bool EVERGREENTextureSetup(PicturePtr pPict, PixmapPtr pPix, + int unit) +{ + ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + int w = pPict->pDrawable->width; + int h = pPict->pDrawable->height; + unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone; + unsigned int i; + tex_resource_t tex_res; + tex_sampler_t tex_samp; + int pix_r, pix_g, pix_b, pix_a; + + CLEAR (tex_res); + CLEAR (tex_samp); + + for (i = 0; i < sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]); i++) { + if (EVERGREENTexFormats[i].fmt == pPict->format) + break; + } + + /* Texture */ + tex_res.id = unit; + tex_res.w = w; + tex_res.h = h; + tex_res.pitch = accel_state->src_obj[unit].pitch; + tex_res.depth = 0; + tex_res.dim = SQ_TEX_DIM_2D; + tex_res.base = accel_state->src_obj[unit].offset; + tex_res.mip_base = accel_state->src_obj[unit].offset; + tex_res.size = accel_state->src_size[unit]; + tex_res.format = EVERGREENTexFormats[i].card_fmt; + tex_res.bo = accel_state->src_obj[unit].bo; + tex_res.mip_bo = accel_state->src_obj[unit].bo; + + /* component swizzles */ + switch (pPict->format) { + case PICT_a1r5g5b5: + case PICT_a8r8g8b8: + pix_r = SQ_SEL_Z; /* R */ + pix_g = SQ_SEL_Y; /* G */ + pix_b = SQ_SEL_X; /* B */ + pix_a = SQ_SEL_W; /* A */ + break; + case PICT_a8b8g8r8: + pix_r = SQ_SEL_X; /* R */ + pix_g = SQ_SEL_Y; /* G */ + pix_b = SQ_SEL_Z; /* B */ + pix_a = SQ_SEL_W; /* A */ + break; + case PICT_x8b8g8r8: + pix_r = SQ_SEL_X; /* R */ + pix_g = SQ_SEL_Y; /* G */ + pix_b = SQ_SEL_Z; /* B */ + pix_a = SQ_SEL_1; /* A */ + break; +#ifdef PICT_TYPE_BGRA + case PICT_b8g8r8a8: + pix_r = SQ_SEL_Y; /* R */ + pix_g = SQ_SEL_Z; /* G */ + pix_b = SQ_SEL_W; /* B */ + pix_a = SQ_SEL_X; /* A */ + break; + case PICT_b8g8r8x8: + pix_r = SQ_SEL_Y; /* R */ + pix_g = SQ_SEL_Z; /* G */ + pix_b = SQ_SEL_W; /* B */ + pix_a = SQ_SEL_1; /* A */ + break; +#endif + case PICT_x1r5g5b5: + case PICT_x8r8g8b8: + case PICT_r5g6b5: + pix_r = SQ_SEL_Z; /* R */ + pix_g = SQ_SEL_Y; /* G */ + pix_b = SQ_SEL_X; /* B */ + pix_a = SQ_SEL_1; /* A */ + break; + case PICT_a8: + pix_r = SQ_SEL_0; /* R */ + pix_g = SQ_SEL_0; /* G */ + pix_b = SQ_SEL_0; /* B */ + pix_a = SQ_SEL_X; /* A */ + break; + default: + RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format)); + } + + if (unit == 0) { + if (!accel_state->msk_pic) { + if (PICT_FORMAT_RGB(pPict->format) == 0) { + pix_r = SQ_SEL_0; + pix_g = SQ_SEL_0; + pix_b = SQ_SEL_0; + } + + if (PICT_FORMAT_A(pPict->format) == 0) + pix_a = SQ_SEL_1; + } else { + if (accel_state->component_alpha) { + if (accel_state->src_alpha) { + if (PICT_FORMAT_A(pPict->format) == 0) { + pix_r = SQ_SEL_1; + pix_g = SQ_SEL_1; + pix_b = SQ_SEL_1; + pix_a = SQ_SEL_1; + } else { + pix_r = pix_a; + pix_g = pix_a; + pix_b = pix_a; + } + } else { + if (PICT_FORMAT_A(pPict->format) == 0) + pix_a = SQ_SEL_1; + } + } else { + if (PICT_FORMAT_RGB(pPict->format) == 0) { + pix_r = SQ_SEL_0; + pix_g = SQ_SEL_0; + pix_b = SQ_SEL_0; + } + + if (PICT_FORMAT_A(pPict->format) == 0) + pix_a = SQ_SEL_1; + } + } + } else { + if (accel_state->component_alpha) { + if (PICT_FORMAT_A(pPict->format) == 0) + pix_a = SQ_SEL_1; + } else { + if (PICT_FORMAT_A(pPict->format) == 0) { + pix_r = SQ_SEL_1; + pix_g = SQ_SEL_1; + pix_b = SQ_SEL_1; + pix_a = SQ_SEL_1; + } else { + pix_r = pix_a; + pix_g = pix_a; + pix_b = pix_a; + } + } + } + + tex_res.dst_sel_x = pix_r; /* R */ + tex_res.dst_sel_y = pix_g; /* G */ + tex_res.dst_sel_z = pix_b; /* B */ + tex_res.dst_sel_w = pix_a; /* A */ + + tex_res.base_level = 0; + tex_res.last_level = 0; + tex_res.perf_modulation = 0; + evergreen_set_tex_resource (pScrn, &tex_res, accel_state->src_obj[unit].domain); + + tex_samp.id = unit; + tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK; + + switch (repeatType) { + case RepeatNormal: + tex_samp.clamp_x = SQ_TEX_WRAP; + tex_samp.clamp_y = SQ_TEX_WRAP; + break; + case RepeatPad: + tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; + break; + case RepeatReflect: + tex_samp.clamp_x = SQ_TEX_MIRROR; + tex_samp.clamp_y = SQ_TEX_MIRROR; + break; + case RepeatNone: + tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER; + tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER; + break; + default: + RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType)); + } + + switch (pPict->filter) { + case PictFilterNearest: + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; + break; + case PictFilterBilinear: + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; + break; + default: + RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); + } + + tex_samp.clamp_z = SQ_TEX_WRAP; + tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; + tex_samp.mip_filter = 0; /* no mipmap */ + evergreen_set_tex_sampler (pScrn, &tex_samp); + + return TRUE; +} + +static Bool EVERGREENCheckComposite(int op, PicturePtr pSrcPicture, + PicturePtr pMaskPicture, + PicturePtr pDstPicture) +{ + uint32_t tmp1; + PixmapPtr pSrcPixmap, pDstPixmap; + int max_tex_w, max_tex_h, max_dst_w, max_dst_h; + + /* Check for unsupported compositing operations. */ + if (op >= (int) (sizeof(EVERGREENBlendOp) / sizeof(EVERGREENBlendOp[0]))) + RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); + + if (!pSrcPicture->pDrawable) + RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n")); + + pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable); + + max_tex_w = 8192; + max_tex_h = 8192; + max_dst_w = 8192; + max_dst_h = 8192; + + if (pSrcPixmap->drawable.width >= max_tex_w || + pSrcPixmap->drawable.height >= max_tex_h) { + RADEON_FALLBACK(("Source w/h too large (%d,%d).\n", + pSrcPixmap->drawable.width, + pSrcPixmap->drawable.height)); + } + + pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable); + + if (pDstPixmap->drawable.width >= max_dst_w || + pDstPixmap->drawable.height >= max_dst_h) { + RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n", + pDstPixmap->drawable.width, + pDstPixmap->drawable.height)); + } + + if (pMaskPicture) { + PixmapPtr pMaskPixmap; + + if (!pMaskPicture->pDrawable) + RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n")); + + pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable); + + if (pMaskPixmap->drawable.width >= max_tex_w || + pMaskPixmap->drawable.height >= max_tex_h) { + RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n", + pMaskPixmap->drawable.width, + pMaskPixmap->drawable.height)); + } + + if (pMaskPicture->componentAlpha) { + /* Check if it's component alpha that relies on a source alpha and + * on the source value. We can only get one of those into the + * single source value that we get to blend with. + */ + if (EVERGREENBlendOp[op].src_alpha && + (EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) != + (BLEND_ZERO << COLOR_SRCBLEND_shift)) { + RADEON_FALLBACK(("Component alpha not supported with source " + "alpha and source value blending.\n")); + } + } + + if (!EVERGREENCheckCompositeTexture(pMaskPicture, pDstPicture, op, 1)) + return FALSE; + } + + if (!EVERGREENCheckCompositeTexture(pSrcPicture, pDstPicture, op, 0)) + return FALSE; + + if (!EVERGREENGetDestFormat(pDstPicture, &tmp1)) + return FALSE; + + return TRUE; + +} + +static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture, + PicturePtr pMaskPicture, PicturePtr pDstPicture, + PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) +{ + ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + uint32_t blendcntl, dst_format; + cb_config_t cb_conf; + shader_config_t vs_conf, ps_conf; + const_config_t vs_const_conf; + struct r600_accel_object src_obj, mask_obj, dst_obj; + float *cbuf; + + //return FALSE; + + if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8) + return FALSE; + + src_obj.offset = 0; + dst_obj.offset = 0; + src_obj.bo = radeon_get_pixmap_bo(pSrc); + dst_obj.bo = radeon_get_pixmap_bo(pDst); + + src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); + dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); + + src_obj.width = pSrc->drawable.width; + src_obj.height = pSrc->drawable.height; + src_obj.bpp = pSrc->drawable.bitsPerPixel; + src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; + + dst_obj.width = pDst->drawable.width; + dst_obj.height = pDst->drawable.height; + dst_obj.bpp = pDst->drawable.bitsPerPixel; + dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; + + if (pMask) { + mask_obj.offset = 0; + mask_obj.bo = radeon_get_pixmap_bo(pMask); + mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8); + + mask_obj.width = pMask->drawable.width; + mask_obj.height = pMask->drawable.height; + mask_obj.bpp = pMask->drawable.bitsPerPixel; + mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; + + if (!EVERGREENSetAccelState(pScrn, + &src_obj, + &mask_obj, + &dst_obj, + accel_state->comp_vs_offset, accel_state->comp_ps_offset, + 3, 0xffffffff)) + return FALSE; + + accel_state->msk_pic = pMaskPicture; + if (pMaskPicture->componentAlpha) { + accel_state->component_alpha = TRUE; + if (EVERGREENBlendOp[op].src_alpha) + accel_state->src_alpha = TRUE; + else + accel_state->src_alpha = FALSE; + } else { + accel_state->component_alpha = FALSE; + accel_state->src_alpha = FALSE; + } + } else { + if (!EVERGREENSetAccelState(pScrn, + &src_obj, + NULL, + &dst_obj, + accel_state->comp_vs_offset, accel_state->comp_ps_offset, + 3, 0xffffffff)) + return FALSE; + + accel_state->msk_pic = NULL; + accel_state->component_alpha = FALSE; + accel_state->src_alpha = FALSE; + } + + if (!EVERGREENGetDestFormat(pDstPicture, &dst_format)) + return FALSE; + + CLEAR (cb_conf); + CLEAR (vs_conf); + CLEAR (ps_conf); + CLEAR (vs_const_conf); + + if (pMask) + radeon_vbo_check(pScrn, &accel_state->vbo, 24); + else + radeon_vbo_check(pScrn, &accel_state->vbo, 16); + + radeon_vbo_check(pScrn, &accel_state->cbuf, 256); + + radeon_cp_start(pScrn); + + evergreen_set_default_state(pScrn); + + evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + + if (!EVERGREENTextureSetup(pSrcPicture, pSrc, 0)) { + radeon_ib_discard(pScrn); + radeon_cs_flush_indirect(pScrn); + radeon_vb_discard(pScrn, &accel_state->vbo); + radeon_vb_discard(pScrn, &accel_state->cbuf); + return FALSE; + } + + if (pMask) { + if (!EVERGREENTextureSetup(pMaskPicture, pMask, 1)) { + radeon_ib_discard(pScrn); + radeon_cs_flush_indirect(pScrn); + radeon_vb_discard(pScrn, &accel_state->vbo); + radeon_vb_discard(pScrn, &accel_state->cbuf); + return FALSE; + } + } else + accel_state->is_transform[1] = FALSE; + + if (pMask) { + evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0)); + evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0)); + } else { + evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0)); + evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0)); + } + + /* Shader */ + vs_conf.shader_addr = accel_state->vs_mc_addr; + vs_conf.shader_size = accel_state->vs_size; + vs_conf.num_gprs = 3; + vs_conf.stack_size = 1; + vs_conf.bo = accel_state->shaders_bo; + evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); + + ps_conf.shader_addr = accel_state->ps_mc_addr; + ps_conf.shader_size = accel_state->ps_size; + ps_conf.num_gprs = 3; + ps_conf.stack_size = 1; + ps_conf.clamp_consts = 0; + ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; + evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); + + cb_conf.id = 0; + cb_conf.w = accel_state->dst_obj.pitch; + cb_conf.h = accel_state->dst_obj.height; + cb_conf.base = accel_state->dst_obj.offset; + cb_conf.format = dst_format; + cb_conf.bo = accel_state->dst_obj.bo; + + switch (pDstPicture->format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + default: + cb_conf.comp_swap = 1; /* ARGB */ + break; + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + cb_conf.comp_swap = 0; /* ABGR */ + break; +#ifdef PICT_TYPE_BGRA + case PICT_b8g8r8a8: + case PICT_b8g8r8x8: + cb_conf.comp_swap = 3; /* BGRA */ + break; +#endif + case PICT_r5g6b5: + cb_conf.comp_swap = 2; /* RGB */ + break; + case PICT_a8: + cb_conf.comp_swap = 3; /* A */ + break; + } + cb_conf.source_format = EXPORT_4C_16BPC; + cb_conf.blend_clamp = 1; + evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); + + blendcntl = EVERGREENGetBlendCntl(op, pMaskPicture, pDstPicture->format); + + BEGIN_BATCH(24); + EREG(CB_TARGET_MASK, (0xf << TARGET0_ENABLE_shift)); + EREG(CB_COLOR_CONTROL, (EVERGREEN_ROP[3] | + (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift))); + EREG(CB_BLEND0_CONTROL, blendcntl | CB_BLEND0_CONTROL__ENABLE_bit); + + /* Interpolator setup */ + if (pMask) { + /* export 2 tex coords from VS */ + EREG(SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift)); + /* src = semantic id 0; mask = semantic id 1 */ + EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | + (1 << SEMANTIC_1_shift))); + } else { + /* export 1 tex coords from VS */ + EREG(SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); + /* src = semantic id 0 */ + EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); + } + + PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2); + /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ + E32(((0 << SEMANTIC_shift) | + (0x01 << DEFAULT_VAL_shift))); + /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ + E32(((1 << SEMANTIC_shift) | + (0x01 << DEFAULT_VAL_shift))); + + PACK0(SPI_PS_IN_CONTROL_0, 3); + if (pMask) { + /* input 2 tex coords from VS */ + E32(((2 << NUM_INTERP_shift) | + LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0 + } else { + /* input 1 tex coords from VS */ + E32(((1 << NUM_INTERP_shift) | + LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0 + } + E32(0); // SPI_PS_IN_CONTROL_1 + E32(0); // SPI_INTERP_CONTROL_0 + END_BATCH(); + + /* VS alu constants */ + vs_const_conf.size_bytes = 256; + vs_const_conf.type = SHADER_TYPE_VS; + cbuf = radeon_vbo_space(pScrn, &accel_state->cbuf, 256); + + EVERGREENXFormSetup(pSrcPicture, pSrc, 0, cbuf); + if (pMask) + EVERGREENXFormSetup(pMaskPicture, pMask, 1, cbuf); + + radeon_vbo_commit(pScrn, &accel_state->cbuf); + + /* VS alu constants */ + vs_const_conf.bo = accel_state->cbuf.vb_bo; + vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op; + evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT); + + if (accel_state->vsync) + RADEONVlineHelperClear(pScrn); + + return TRUE; +} + +static void EVERGREENComposite(PixmapPtr pDst, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + float *vb; + + if (accel_state->vsync) + RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); + + if (accel_state->msk_pic) { + + vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24); + + vb[0] = (float)dstX; + vb[1] = (float)dstY; + vb[2] = (float)srcX; + vb[3] = (float)srcY; + vb[4] = (float)maskX; + vb[5] = (float)maskY; + + vb[6] = (float)dstX; + vb[7] = (float)(dstY + h); + vb[8] = (float)srcX; + vb[9] = (float)(srcY + h); + vb[10] = (float)maskX; + vb[11] = (float)(maskY + h); + + vb[12] = (float)(dstX + w); + vb[13] = (float)(dstY + h); + vb[14] = (float)(srcX + w); + vb[15] = (float)(srcY + h); + vb[16] = (float)(maskX + w); + vb[17] = (float)(maskY + h); + + radeon_vbo_commit(pScrn, &accel_state->vbo); + + } else { + + vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); + + vb[0] = (float)dstX; + vb[1] = (float)dstY; + vb[2] = (float)srcX; + vb[3] = (float)srcY; + + vb[4] = (float)dstX; + vb[5] = (float)(dstY + h); + vb[6] = (float)srcX; + vb[7] = (float)(srcY + h); + + vb[8] = (float)(dstX + w); + vb[9] = (float)(dstY + h); + vb[10] = (float)(srcX + w); + vb[11] = (float)(srcY + h); + + radeon_vbo_commit(pScrn, &accel_state->vbo); + } + + +} + +static void EVERGREENDoneComposite(PixmapPtr pDst) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + int vtx_size; + + if (accel_state->vsync) + evergreen_cp_wait_vline_sync(pScrn, pDst, + accel_state->vline_crtc, + accel_state->vline_y1, + accel_state->vline_y2); + + vtx_size = accel_state->msk_pic ? 24 : 16; + + evergreen_finish_op(pScrn, vtx_size); +} + +static Bool +EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, + char *src, int src_pitch) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + struct radeon_exa_pixmap_priv *driver_priv; + struct radeon_bo *scratch; + unsigned size; + uint32_t dst_domain; + int bpp = pDst->drawable.bitsPerPixel; + uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256); + uint32_t src_pitch_hw = scratch_pitch / (bpp / 8); + uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8); + Bool r; + int i; + struct r600_accel_object src_obj, dst_obj; + + if (bpp < 8) + return FALSE; + + driver_priv = exaGetPixmapDriverPrivate(pDst); + + /* If we know the BO won't be busy, don't bother */ + if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs) && + !radeon_bo_is_busy(driver_priv->bo, &dst_domain)) + return FALSE; + + size = scratch_pitch * h; + scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0); + if (scratch == NULL) { + return FALSE; + } + + src_obj.pitch = src_pitch_hw; + src_obj.width = w; + src_obj.height = h; + src_obj.offset = 0; + src_obj.bpp = bpp; + src_obj.domain = RADEON_GEM_DOMAIN_GTT; + src_obj.bo = scratch; + + dst_obj.pitch = dst_pitch_hw; + dst_obj.width = pDst->drawable.width; + dst_obj.height = pDst->drawable.height; + dst_obj.offset = 0; + dst_obj.bpp = bpp; + dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; + dst_obj.bo = radeon_get_pixmap_bo(pDst); + + if (!EVERGREENSetAccelState(pScrn, + &src_obj, + NULL, + &dst_obj, + accel_state->copy_vs_offset, accel_state->copy_ps_offset, + 3, 0xffffffff)) { + r = FALSE; + goto out; + } + + r = radeon_bo_map(scratch, 0); + if (r) { + r = FALSE; + goto out; + } + r = TRUE; + size = w * bpp / 8; + for (i = 0; i < h; i++) { + memcpy(scratch->ptr + i * scratch_pitch, src, size); + src += src_pitch; + } + radeon_bo_unmap(scratch); + + if (info->accel_state->vsync) + RADEONVlineHelperSet(pScrn, x, y, x + w, y + h); + + /* blit from gart to vram */ + EVERGREENDoPrepareCopy(pScrn); + EVERGREENAppendCopyVertex(pScrn, 0, 0, x, y, w, h); + EVERGREENDoCopyVline(pDst); + +out: + radeon_bo_unref(scratch); + return r; +} + +static Bool +EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, + int h, char *dst, int dst_pitch) +{ + ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + struct radeon_exa_pixmap_priv *driver_priv; + struct radeon_bo *scratch; + unsigned size; + uint32_t src_domain = 0; + int bpp = pSrc->drawable.bitsPerPixel; + uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256); + uint32_t dst_pitch_hw = scratch_pitch / (bpp / 8); + uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8); + Bool r; + struct r600_accel_object src_obj, dst_obj; + + if (bpp < 8) + return FALSE; + + driver_priv = exaGetPixmapDriverPrivate(pSrc); + + /* If we know the BO won't end up in VRAM anyway, don't bother */ + if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) { + src_domain = radeon_bo_get_src_domain(driver_priv->bo); + if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == + (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) + src_domain = 0; + } + + if (!src_domain) + radeon_bo_is_busy(driver_priv->bo, &src_domain); + + if (src_domain != RADEON_GEM_DOMAIN_VRAM) + return FALSE; + + size = scratch_pitch * h; + scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0); + if (scratch == NULL) { + return FALSE; + } + radeon_cs_space_reset_bos(info->cs); + radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; + radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0); + accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT; + radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain); + r = radeon_cs_space_check(info->cs); + if (r) { + r = FALSE; + goto out; + } + + src_obj.pitch = src_pitch_hw; + src_obj.width = pSrc->drawable.width; + src_obj.height = pSrc->drawable.height; + src_obj.offset = 0; + src_obj.bpp = bpp; + src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; + src_obj.bo = radeon_get_pixmap_bo(pSrc); + + dst_obj.pitch = dst_pitch_hw; + dst_obj.width = w; + dst_obj.height = h; + dst_obj.offset = 0; + dst_obj.bo = scratch; + dst_obj.bpp = bpp; + dst_obj.domain = RADEON_GEM_DOMAIN_GTT; + + if (!EVERGREENSetAccelState(pScrn, + &src_obj, + NULL, + &dst_obj, + accel_state->copy_vs_offset, accel_state->copy_ps_offset, + 3, 0xffffffff)) { + r = FALSE; + goto out; + } + + /* blit from vram to gart */ + EVERGREENDoPrepareCopy(pScrn); + EVERGREENAppendCopyVertex(pScrn, x, y, 0, 0, w, h); + EVERGREENDoCopy(pScrn); + + radeon_cs_flush_indirect(pScrn); + + r = radeon_bo_map(scratch, 0); + if (r) { + r = FALSE; + goto out; + } + r = TRUE; + w *= bpp / 8; + size = 0; + while (h--) { + memcpy(dst, scratch->ptr + size, w); + size += scratch_pitch; + dst += dst_pitch; + } + radeon_bo_unmap(scratch); +out: + radeon_bo_unref(scratch); + return r; +} + +static int +EVERGREENMarkSync(ScreenPtr pScreen) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + return ++accel_state->exaSyncMarker; + +} + +static void +EVERGREENSync(ScreenPtr pScreen, int marker) +{ + return; +} + +static Bool +EVERGREENAllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + /* 512 bytes per shader for now */ + int size = 512 * 9; + + accel_state->shaders = NULL; + + accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0, + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->shaders_bo == NULL) { + ErrorF("Allocating shader failed\n"); + return FALSE; + } + return TRUE; +} + +Bool +EVERGREENLoadShaders(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + RADEONChipFamily ChipSet = info->ChipFamily; + uint32_t *shader; + int ret; + + ret = radeon_bo_map(accel_state->shaders_bo, 1); + if (ret) { + FatalError("failed to map shader %d\n", ret); + return FALSE; + } + shader = accel_state->shaders_bo->ptr; + + /* solid vs --------------------------------------- */ + accel_state->solid_vs_offset = 0; + evergreen_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4); + + /* solid ps --------------------------------------- */ + accel_state->solid_ps_offset = 512; + evergreen_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4); + + /* copy vs --------------------------------------- */ + accel_state->copy_vs_offset = 1024; + evergreen_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4); + + /* copy ps --------------------------------------- */ + accel_state->copy_ps_offset = 1536; + evergreen_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4); + + /* comp vs --------------------------------------- */ + accel_state->comp_vs_offset = 2048; + evergreen_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4); + + /* comp ps --------------------------------------- */ + accel_state->comp_ps_offset = 2560; + evergreen_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4); + + /* xv vs --------------------------------------- */ + accel_state->xv_vs_offset = 3072; + evergreen_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4); + + /* xv ps --------------------------------------- */ + accel_state->xv_ps_offset = 3584; + evergreen_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); + + radeon_bo_unmap(accel_state->shaders_bo); + + return TRUE; +} + +Bool +EVERGREENDrawInit(ScreenPtr pScreen) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + + if (info->accel_state->exa == NULL) { + xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n"); + return FALSE; + } + + /* accel requires kms */ + if (!info->cs) + return FALSE; + + info->accel_state->exa->exa_major = EXA_VERSION_MAJOR; + info->accel_state->exa->exa_minor = EXA_VERSION_MINOR; + + info->accel_state->exa->PrepareSolid = EVERGREENPrepareSolid; + info->accel_state->exa->Solid = EVERGREENSolid; + info->accel_state->exa->DoneSolid = EVERGREENDoneSolid; + + info->accel_state->exa->PrepareCopy = EVERGREENPrepareCopy; + info->accel_state->exa->Copy = EVERGREENCopy; + info->accel_state->exa->DoneCopy = EVERGREENDoneCopy; + + info->accel_state->exa->MarkSync = EVERGREENMarkSync; + info->accel_state->exa->WaitMarker = EVERGREENSync; + + info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap; + info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap; + info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen; + info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS; + info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS; + info->accel_state->exa->UploadToScreen = EVERGREENUploadToScreen; + info->accel_state->exa->DownloadFromScreen = EVERGREENDownloadFromScreen; + + info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; +#ifdef EXA_SUPPORTS_PREPARE_AUX + info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX; +#endif + +#ifdef EXA_HANDLES_PIXMAPS + info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS; +#ifdef EXA_MIXED_PIXMAPS + info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS; +#endif +#endif + info->accel_state->exa->pixmapOffsetAlign = 256; + info->accel_state->exa->pixmapPitchAlign = 256; + + info->accel_state->exa->CheckComposite = EVERGREENCheckComposite; + info->accel_state->exa->PrepareComposite = EVERGREENPrepareComposite; + info->accel_state->exa->Composite = EVERGREENComposite; + info->accel_state->exa->DoneComposite = EVERGREENDoneComposite; + +#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3) + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n"); + + info->accel_state->exa->maxPitchBytes = 32768; + info->accel_state->exa->maxX = 8192; +#else + info->accel_state->exa->maxX = 8192; +#endif + info->accel_state->exa->maxY = 8192; + + /* not supported yet */ + if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) { + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n"); + info->accel_state->vsync = TRUE; + } else + info->accel_state->vsync = FALSE; + + if (!exaDriverInit(pScreen, info->accel_state->exa)) { + xfree(info->accel_state->exa); + return FALSE; + } + + info->accel_state->XInited3D = FALSE; + info->accel_state->copy_area = NULL; + info->accel_state->src_obj[0].bo = NULL; + info->accel_state->src_obj[1].bo = NULL; + info->accel_state->dst_obj.bo = NULL; + info->accel_state->copy_area_bo = NULL; + info->accel_state->vbo.vb_start_op = -1; + info->accel_state->cbuf.vb_start_op = -1; + info->accel_state->finish_op = evergreen_finish_op; + info->accel_state->vbo.verts_per_op = 3; + info->accel_state->cbuf.verts_per_op = 1; + RADEONVlineHelperClear(pScrn); + + radeon_vbo_init_lists(pScrn); + + if (!EVERGREENAllocShaders(pScrn, pScreen)) + return FALSE; + + if (!EVERGREENLoadShaders(pScrn)) + return FALSE; + + exaMarkSync(pScreen); + + return TRUE; + +} + diff --git a/src/evergreen_reg.h b/src/evergreen_reg.h new file mode 100644 index 00000000..4608f080 --- /dev/null +++ b/src/evergreen_reg.h @@ -0,0 +1,247 @@ +/* + * Evergeen Register documentation + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _EVERGREEN_REG_H_ +#define _EVERGREEN_REG_H_ + +/* + * Register definitions + */ + +#include "evergreen_reg_auto.h" + +enum { + SHADER_TYPE_PS, + SHADER_TYPE_VS, + SHADER_TYPE_GS, + SHADER_TYPE_HS, + SHADER_TYPE_LS, + SHADER_TYPE_CS, + SHADER_TYPE_FS, +}; + + +/* SET_*_REG offsets + ends */ +enum { + SET_CONFIG_REG_offset = 0x00008000, + SET_CONFIG_REG_end = 0x0000ac00, + SET_CONTEXT_REG_offset = 0x00028000, + SET_CONTEXT_REG_end = 0x00029000, + SET_RESOURCE_offset = 0x00030000, + SET_RESOURCE_end = 0x00038000, + SET_SAMPLER_offset = 0x0003c000, + SET_SAMPLER_end = 0x0003c600, + SET_CTL_CONST_offset = 0x0003cff0, + SET_CTL_CONST_end = 0x0003ff0c, + SET_LOOP_CONST_offset = 0x0003a200, + SET_LOOP_CONST_end = 0x0003a500, + SET_BOOL_CONST_offset = 0x0003a500, + SET_BOOL_CONST_end = 0x0003a518, +}; + +/* Packet3 commands */ +enum { + IT_NOP = 0x10, + IT_INDIRECT_BUFFER_END = 0x17, + IT_SET_PREDICATION = 0x20, + IT_COND_EXEC = 0x22, + IT_PRED_EXEC = 0x23, + IT_DRAW_INDEX_2 = 0x27, + IT_CONTEXT_CONTROL = 0x28, + IT_DRAW_INDEX_OFFSET = 0x29, + IT_INDEX_TYPE = 0x2A, + IT_DRAW_INDEX = 0x2B, + IT_DRAW_INDEX_AUTO = 0x2D, + IT_DRAW_INDEX_IMMD = 0x2E, + IT_NUM_INSTANCES = 0x2F, + IT_INDIRECT_BUFFER = 0x32, + IT_STRMOUT_BUFFER_UPDATE = 0x34, + IT_MEM_SEMAPHORE = 0x39, + IT_MPEG_INDEX = 0x3A, + IT_WAIT_REG_MEM = 0x3C, + IT_MEM_WRITE = 0x3D, + IT_SURFACE_SYNC = 0x43, + IT_ME_INITIALIZE = 0x44, + IT_COND_WRITE = 0x45, + IT_EVENT_WRITE = 0x46, + IT_EVENT_WRITE_EOP = 0x47, + IT_EVENT_WRITE_EOS = 0x48, + IT_SET_CONFIG_REG = 0x68, + IT_SET_CONTEXT_REG = 0x69, + IT_SET_ALU_CONST = 0x6A, + IT_SET_BOOL_CONST = 0x6B, + IT_SET_LOOP_CONST = 0x6C, + IT_SET_RESOURCE = 0x6D, + IT_SET_SAMPLER = 0x6E, + IT_SET_CTL_CONST = 0x6F, +}; + +/* IT_WAIT_REG_MEM operation encoding */ + +#define IT_WAIT_ALWAYS (0 << 0) +#define IT_WAIT_LT (1 << 0) +#define IT_WAIT_LE (2 << 0) +#define IT_WAIT_EQ (3 << 0) +#define IT_WAIT_NE (4 << 0) +#define IT_WAIT_GE (5 << 0) +#define IT_WAIT_GT (6 << 0) +#define IT_WAIT_REG (0 << 4) +#define IT_WAIT_MEM (1 << 4) + +#define IT_WAIT_ADDR(x) ((x) >> 2) + +enum { + + SQ_LDS_ALLOC_PS = 0x288ec, + SQ_DYN_GPR_RESOURCE_LIMIT_1 = 0x28838, + SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x8d8c, + + WAIT_UNTIL = 0x8040, + WAIT_CP_DMA_IDLE_bit = 1 << 8, + WAIT_CMDFIFO_bit = 1 << 10, + WAIT_3D_IDLE_bit = 1 << 15, + WAIT_3D_IDLECLEAN_bit = 1 << 17, + WAIT_EXTERN_SIG_bit = 1 << 19, + CMDFIFO_ENTRIES_mask = 0xf << 20, + CMDFIFO_ENTRIES_shift = 20, + + CP_COHER_CNTL = 0x85f0, + DEST_BASE_0_ENA_bit = 1 << 0, + DEST_BASE_1_ENA_bit = 1 << 1, + SO0_DEST_BASE_ENA_bit = 1 << 2, + SO1_DEST_BASE_ENA_bit = 1 << 3, + SO2_DEST_BASE_ENA_bit = 1 << 4, + SO3_DEST_BASE_ENA_bit = 1 << 5, + CB0_DEST_BASE_ENA_bit = 1 << 6, + CB1_DEST_BASE_ENA_bit = 1 << 7, + CB2_DEST_BASE_ENA_bit = 1 << 8, + CB3_DEST_BASE_ENA_bit = 1 << 9, + CB4_DEST_BASE_ENA_bit = 1 << 10, + CB5_DEST_BASE_ENA_bit = 1 << 11, + CB6_DEST_BASE_ENA_bit = 1 << 12, + CB7_DEST_BASE_ENA_bit = 1 << 13, + DB_DEST_BASE_ENA_bit = 1 << 14, + CB8_DEST_BASE_ENA_bit = 1 << 15, + CB9_DEST_BASE_ENA_bit = 1 << 16, + CB10_DEST_BASE_ENA_bit = 1 << 17, + CB11_DEST_BASE_ENA_bit = 1 << 18, + FULL_CACHE_ENA_bit = 1 << 20, + TC_ACTION_ENA_bit = 1 << 23, + VC_ACTION_ENA_bit = 1 << 24, + CB_ACTION_ENA_bit = 1 << 25, + DB_ACTION_ENA_bit = 1 << 26, + SH_ACTION_ENA_bit = 1 << 27, + SX_ACTION_ENA_bit = 1 << 28, + CP_COHER_SIZE = 0x85f4, + CP_COHER_BASE = 0x85f8, + CP_COHER_STATUS = 0x85fc, + MATCHING_GFX_CNTX_mask = 0xff << 0, + MATCHING_GFX_CNTX_shift = 0, + STATUS_bit = 1 << 31, + +// SQ_VTX_CONSTANT_WORD2_0 = 0x00030008, +// SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20, + FMT_INVALID=0, FMT_8, FMT_4_4, FMT_3_3_2, + FMT_16=5, FMT_16_FLOAT, FMT_8_8, + FMT_5_6_5, FMT_6_5_5, FMT_1_5_5_5, FMT_4_4_4_4, + FMT_5_5_5_1, FMT_32, FMT_32_FLOAT, FMT_16_16, + FMT_16_16_FLOAT=16, FMT_8_24, FMT_8_24_FLOAT, FMT_24_8, + FMT_24_8_FLOAT, FMT_10_11_11, FMT_10_11_11_FLOAT, FMT_11_11_10, + FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8, FMT_10_10_10_2, + FMT_X24_8_32_FLOAT, FMT_32_32, FMT_32_32_FLOAT, FMT_16_16_16_16, + FMT_16_16_16_16_FLOAT=32, FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT, + FMT_1 = 37, FMT_GB_GR=39, + FMT_BG_RG, FMT_32_AS_8, FMT_32_AS_8_8, FMT_5_9_9_9_SHAREDEXP, + FMT_8_8_8, FMT_16_16_16, FMT_16_16_16_FLOAT, FMT_32_32_32, + FMT_32_32_32_FLOAT=48, + +// High level register file lengths + SQ_FETCH_RESOURCE = SQ_TEX_RESOURCE_WORD0_0, + SQ_FETCH_RESOURCE_ps_num = 176, + SQ_FETCH_RESOURCE_vs_num = 160, + SQ_FETCH_RESOURCE_gs_num = 160, + SQ_FETCH_RESOURCE_hs_num = 160, + SQ_FETCH_RESOURCE_ls_num = 160, + SQ_FETCH_RESOURCE_cs_num = 176, + SQ_FETCH_RESOURCE_fs_num = 32, + SQ_FETCH_RESOURCE_all_num = 1024, + SQ_FETCH_RESOURCE_offset = 32, + SQ_FETCH_RESOURCE_ps = 0, // 0...175 + SQ_FETCH_RESOURCE_vs = SQ_FETCH_RESOURCE_ps + SQ_FETCH_RESOURCE_ps_num, // 176...335 + SQ_FETCH_RESOURCE_gs = SQ_FETCH_RESOURCE_vs + SQ_FETCH_RESOURCE_fs_num, // 336...495 + SQ_FETCH_RESOURCE_hs = SQ_FETCH_RESOURCE_gs + SQ_FETCH_RESOURCE_gs_num, // 496...655 + SQ_FETCH_RESOURCE_ls = SQ_FETCH_RESOURCE_hs + SQ_FETCH_RESOURCE_hs_num, // 656...815 + SQ_FETCH_RESOURCE_cs = SQ_FETCH_RESOURCE_ls + SQ_FETCH_RESOURCE_ls_num, // 816...991 + SQ_FETCH_RESOURCE_fs = SQ_FETCH_RESOURCE_cs + SQ_FETCH_RESOURCE_cs_num, // 992...1023 + + SQ_TEX_SAMPLER_WORD = SQ_TEX_SAMPLER_WORD0_0, + SQ_TEX_SAMPLER_WORD_ps_num = 18, + SQ_TEX_SAMPLER_WORD_vs_num = 18, + SQ_TEX_SAMPLER_WORD_gs_num = 18, + SQ_TEX_SAMPLER_WORD_hs_num = 18, + SQ_TEX_SAMPLER_WORD_ls_num = 18, + SQ_TEX_SAMPLER_WORD_cs_num = 18, + SQ_TEX_SAMPLER_WORD_all_num = 108, + SQ_TEX_SAMPLER_WORD_offset = 12, + SQ_TEX_SAMPLER_WORD_ps = 0, // 0...17 + SQ_TEX_SAMPLER_WORD_vs = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num, // 18...35 + SQ_TEX_SAMPLER_WORD_gs = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num, // 36...53 + SQ_TEX_SAMPLER_WORD_hs = SQ_TEX_SAMPLER_WORD_gs + SQ_TEX_SAMPLER_WORD_gs_num, // 54...71 + SQ_TEX_SAMPLER_WORD_ls = SQ_TEX_SAMPLER_WORD_hs + SQ_TEX_SAMPLER_WORD_hs_num, // 72...89 + SQ_TEX_SAMPLER_WORD_cs = SQ_TEX_SAMPLER_WORD_ls + SQ_TEX_SAMPLER_WORD_ls_num, // 90...107 + + SQ_LOOP_CONST = SQ_LOOP_CONST_0, + SQ_LOOP_CONST_ps_num = 32, + SQ_LOOP_CONST_vs_num = 32, + SQ_LOOP_CONST_gs_num = 32, + SQ_LOOP_CONST_hs_num = 32, + SQ_LOOP_CONST_ls_num = 32, + SQ_LOOP_CONST_cs_num = 32, + SQ_LOOP_CONST_all_num = 192, + SQ_LOOP_CONST_offset = 4, + SQ_LOOP_CONST_ps = 0, // 0...31 + SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num, // 32...63 + SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num, // 64...95 + SQ_LOOP_CONST_hs = SQ_LOOP_CONST_gs + SQ_LOOP_CONST_gs_num, // 96...127 + SQ_LOOP_CONST_ls = SQ_LOOP_CONST_hs + SQ_LOOP_CONST_hs_num, // 128...159 + SQ_LOOP_CONST_cs = SQ_LOOP_CONST_ls + SQ_LOOP_CONST_ls_num, // 160...191 + + SQ_BOOL_CONST = SQ_BOOL_CONST_0, /* 32 bits each */ + SQ_BOOL_CONST_ps_num = 1, + SQ_BOOL_CONST_vs_num = 1, + SQ_BOOL_CONST_gs_num = 1, + SQ_BOOL_CONST_hs_num = 1, + SQ_BOOL_CONST_ls_num = 1, + SQ_BOOL_CONST_cs_num = 1, + SQ_BOOL_CONST_all_num = 6, + SQ_BOOL_CONST_offset = 4, + SQ_BOOL_CONST_ps = 0, + SQ_BOOL_CONST_vs = SQ_BOOL_CONST_ps + SQ_BOOL_CONST_ps_num, + SQ_BOOL_CONST_gs = SQ_BOOL_CONST_vs + SQ_BOOL_CONST_vs_num, + SQ_BOOL_CONST_hs = SQ_BOOL_CONST_gs + SQ_BOOL_CONST_gs_num, + SQ_BOOL_CONST_ls = SQ_BOOL_CONST_hs + SQ_BOOL_CONST_hs_num, + SQ_BOOL_CONST_cs = SQ_BOOL_CONST_ls + SQ_BOOL_CONST_ls_num, + +}; + +#endif diff --git a/src/evergreen_reg_auto.h b/src/evergreen_reg_auto.h new file mode 100644 index 00000000..5c615864 --- /dev/null +++ b/src/evergreen_reg_auto.h @@ -0,0 +1,4039 @@ +/* + * Evergreen Register documentation + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _EVERGREEN_REG_AUTO_H +#define _EVERGREEN_REG_AUTO_H + +enum { + + VGT_VTX_VECT_EJECT_REG = 0x000088b0, + PRIM_COUNT_mask = 0x3ff << 0, + PRIM_COUNT_shift = 0, + VGT_LAST_COPY_STATE = 0x000088c0, + SRC_STATE_ID_mask = 0x07 << 0, + SRC_STATE_ID_shift = 0, + DST_STATE_ID_mask = 0x07 << 16, + DST_STATE_ID_shift = 16, + VGT_CACHE_INVALIDATION = 0x000088c4, + CACHE_INVALIDATION_mask = 0x03 << 0, + CACHE_INVALIDATION_shift = 0, + VC_ONLY = 0x00, + TC_ONLY = 0x01, + VC_AND_TC = 0x02, + VS_NO_EXTRA_BUFFER_bit = 1 << 5, + AUTO_INVLD_EN_mask = 0x03 << 6, + AUTO_INVLD_EN_shift = 6, + VGT_GS_VERTEX_REUSE = 0x000088d4, + VERT_REUSE_mask = 0x1f << 0, + VERT_REUSE_shift = 0, + VGT_CNTL_STATUS = 0x000088f0, + VGT_OUT_INDX_BUSY_bit = 1 << 0, + VGT_OUT_BUSY_bit = 1 << 1, + VGT_PT_BUSY_bit = 1 << 2, + VGT_TE_BUSY_bit = 1 << 3, + VGT_VR_BUSY_bit = 1 << 4, + VGT_GRP_BUSY_bit = 1 << 5, + VGT_DMA_REQ_BUSY_bit = 1 << 6, + VGT_DMA_BUSY_bit = 1 << 7, + VGT_GS_BUSY_bit = 1 << 8, + VGT_HS_BUSY_bit = 1 << 9, + VGT_TE11_BUSY_bit = 1 << 10, + VGT_BUSY_bit = 1 << 11, + VGT_PRIMITIVE_TYPE = 0x00008958, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift = 0, + DI_PT_NONE = 0x00, + DI_PT_POINTLIST = 0x01, + DI_PT_LINELIST = 0x02, + DI_PT_LINESTRIP = 0x03, + DI_PT_TRILIST = 0x04, + DI_PT_TRIFAN = 0x05, + DI_PT_TRISTRIP = 0x06, + DI_PT_UNUSED_0 = 0x07, + DI_PT_UNUSED_1 = 0x08, + DI_PT_PATCH = 0x09, + DI_PT_LINELIST_ADJ = 0x0a, + DI_PT_LINESTRIP_ADJ = 0x0b, + DI_PT_TRILIST_ADJ = 0x0c, + DI_PT_TRISTRIP_ADJ = 0x0d, + DI_PT_UNUSED_3 = 0x0e, + DI_PT_UNUSED_4 = 0x0f, + DI_PT_TRI_WITH_WFLAGS = 0x10, + DI_PT_RECTLIST = 0x11, + DI_PT_LINELOOP = 0x12, + DI_PT_QUADLIST = 0x13, + DI_PT_QUADSTRIP = 0x14, + DI_PT_POLYGON = 0x15, + DI_PT_2D_COPY_RECT_LIST_V0 = 0x16, + DI_PT_2D_COPY_RECT_LIST_V1 = 0x17, + DI_PT_2D_COPY_RECT_LIST_V2 = 0x18, + DI_PT_2D_COPY_RECT_LIST_V3 = 0x19, + DI_PT_2D_FILL_RECT_LIST = 0x1a, + DI_PT_2D_LINE_STRIP = 0x1b, + DI_PT_2D_TRI_STRIP = 0x1c, + VGT_INDEX_TYPE = 0x0000895c, + INDEX_TYPE_mask = 0x03 << 0, + INDEX_TYPE_shift = 0, + DI_INDEX_SIZE_16_BIT = 0x00, + DI_INDEX_SIZE_32_BIT = 0x01, + VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960, + VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964, + VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968, + VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c, + VGT_NUM_INDICES = 0x00008970, + VGT_NUM_INSTANCES = 0x00008974, + PA_CL_CNTL_STATUS = 0x00008a10, + CL_BUSY_bit = 1 << 31, + PA_CL_ENHANCE = 0x00008a14, + CLIP_VTX_REORDER_ENA_bit = 1 << 0, + NUM_CLIP_SEQ_mask = 0x03 << 1, + NUM_CLIP_SEQ_shift = 1, + CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3, + VE_NAN_PROC_DISABLE_bit = 1 << 4, + PA_SU_CNTL_STATUS = 0x00008a50, + SU_BUSY_bit = 1 << 31, + PA_SU_LINE_STIPPLE_VALUE = 0x00008a60, + LINE_STIPPLE_VALUE_mask = 0xffffff << 0, + LINE_STIPPLE_VALUE_shift = 0, + PA_SC_LINE_STIPPLE_STATE = 0x00008b10, + CURRENT_PTR_mask = 0x0f << 0, + CURRENT_PTR_shift = 0, + CURRENT_COUNT_mask = 0xff << 8, + CURRENT_COUNT_shift = 8, + SQ_CONFIG = 0x00008c00, + VC_ENABLE_bit = 1 << 0, + EXPORT_SRC_C_bit = 1 << 1, + CS_PRIO_mask = 0x03 << 18, + CS_PRIO_shift = 18, + LS_PRIO_mask = 0x03 << 20, + LS_PRIO_shift = 20, + HS_PRIO_mask = 0x03 << 22, + HS_PRIO_shift = 22, + PS_PRIO_mask = 0x03 << 24, + PS_PRIO_shift = 24, + VS_PRIO_mask = 0x03 << 26, + VS_PRIO_shift = 26, + GS_PRIO_mask = 0x03 << 28, + GS_PRIO_shift = 28, + ES_PRIO_mask = 0x03 << 30, + ES_PRIO_shift = 30, + SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04, + NUM_PS_GPRS_mask = 0xff << 0, + NUM_PS_GPRS_shift = 0, + NUM_VS_GPRS_mask = 0xff << 16, + NUM_VS_GPRS_shift = 16, + NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28, + NUM_CLAUSE_TEMP_GPRS_shift = 28, + SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08, + NUM_GS_GPRS_mask = 0xff << 0, + NUM_GS_GPRS_shift = 0, + NUM_ES_GPRS_mask = 0xff << 16, + NUM_ES_GPRS_shift = 16, + SQ_GPR_RESOURCE_MGMT_3 = 0x00008c0c, + NUM_HS_GPRS_mask = 0xff << 0, + NUM_HS_GPRS_shift = 0, + NUM_LS_GPRS_mask = 0xff << 16, + NUM_LS_GPRS_shift = 16, + SQ_GLOBAL_GPR_RESOURCE_MGMT_1 = 0x00008c10, + PS_GGPR_BASE_mask = 0xff << 0, + PS_GGPR_BASE_shift = 0, + VS_GGPR_BASE_mask = 0xff << 8, + VS_GGPR_BASE_shift = 8, + GS_GGPR_BASE_mask = 0xff << 16, + GS_GGPR_BASE_shift = 16, + ES_GGPR_BASE_mask = 0xff << 24, + ES_GGPR_BASE_shift = 24, + SQ_GLOBAL_GPR_RESOURCE_MGMT_2 = 0x00008c14, + HS_GGPR_BASE_mask = 0xff << 0, + HS_GGPR_BASE_shift = 0, + LS_GGPR_BASE_mask = 0xff << 8, + LS_GGPR_BASE_shift = 8, + CS_GGPR_BASE_mask = 0xff << 16, + CS_GGPR_BASE_shift = 16, + SQ_THREAD_RESOURCE_MGMT = 0x00008c18, + NUM_PS_THREADS_mask = 0xff << 0, + NUM_PS_THREADS_shift = 0, + NUM_VS_THREADS_mask = 0xff << 8, + NUM_VS_THREADS_shift = 8, + NUM_GS_THREADS_mask = 0xff << 16, + NUM_GS_THREADS_shift = 16, + NUM_ES_THREADS_mask = 0xff << 24, + NUM_ES_THREADS_shift = 24, + SQ_THREAD_RESOURCE_MGMT_2 = 0x00008c1c, + NUM_HS_THREADS_mask = 0xff << 0, + NUM_HS_THREADS_shift = 0, + NUM_LS_THREADS_mask = 0xff << 8, + NUM_LS_THREADS_shift = 8, + SQ_STACK_RESOURCE_MGMT_1 = 0x00008c20, + NUM_PS_STACK_ENTRIES_mask = 0xfff << 0, + NUM_PS_STACK_ENTRIES_shift = 0, + NUM_VS_STACK_ENTRIES_mask = 0xfff << 16, + NUM_VS_STACK_ENTRIES_shift = 16, + SQ_STACK_RESOURCE_MGMT_2 = 0x00008c24, + NUM_GS_STACK_ENTRIES_mask = 0xfff << 0, + NUM_GS_STACK_ENTRIES_shift = 0, + NUM_ES_STACK_ENTRIES_mask = 0xfff << 16, + NUM_ES_STACK_ENTRIES_shift = 16, + SQ_STACK_RESOURCE_MGMT_3 = 0x00008c28, + NUM_HS_STACK_ENTRIES_mask = 0xfff << 0, + NUM_HS_STACK_ENTRIES_shift = 0, + NUM_LS_STACK_ENTRIES_mask = 0xfff << 16, + NUM_LS_STACK_ENTRIES_shift = 16, + SQ_ESGS_RING_BASE = 0x00008c40, + SQ_ESGS_RING_SIZE = 0x00008c44, + SQ_GSVS_RING_BASE = 0x00008c48, + SQ_GSVS_RING_SIZE = 0x00008c4c, + SQ_ESTMP_RING_BASE = 0x00008c50, + SQ_ESTMP_RING_SIZE = 0x00008c54, + SQ_GSTMP_RING_BASE = 0x00008c58, + SQ_GSTMP_RING_SIZE = 0x00008c5c, + SQ_VSTMP_RING_BASE = 0x00008c60, + SQ_VSTMP_RING_SIZE = 0x00008c64, + SQ_PSTMP_RING_BASE = 0x00008c68, + SQ_PSTMP_RING_SIZE = 0x00008c6c, + SQ_CONST_MEM_BASE = 0x00008df8, + SQ_ALU_WORD1_OP3 = 0x00008dfc, + SRC2_SEL_mask = 0x1ff << 0, + SRC2_SEL_shift = 0, + SQ_ALU_SRC_LDS_OQ_A = 0xdb, + SQ_ALU_SRC_LDS_OQ_B = 0xdc, + SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, + SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, + SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, + SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, + SQ_ALU_SRC_TIME_HI = 0xe3, + SQ_ALU_SRC_TIME_LO = 0xe4, + SQ_ALU_SRC_MASK_HI = 0xe5, + SQ_ALU_SRC_MASK_LO = 0xe6, + SQ_ALU_SRC_HW_WAVE_ID = 0xe7, + SQ_ALU_SRC_SIMD_ID = 0xe8, + SQ_ALU_SRC_SE_ID = 0xe9, + SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, + SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, + SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, + SQ_ALU_SRC_HW_ALU_ODD = 0xed, + SQ_ALU_SRC_LOOP_IDX = 0xee, + SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, + SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, + SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, + SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, + SQ_ALU_SRC_1_DBL_L = 0xf4, + SQ_ALU_SRC_1_DBL_M = 0xf5, + SQ_ALU_SRC_0_5_DBL_L = 0xf6, + SQ_ALU_SRC_0_5_DBL_M = 0xf7, + SQ_ALU_SRC_0 = 0xf8, + SQ_ALU_SRC_1 = 0xf9, + SQ_ALU_SRC_1_INT = 0xfa, + SQ_ALU_SRC_M_1_INT = 0xfb, + SQ_ALU_SRC_0_5 = 0xfc, + SQ_ALU_SRC_LITERAL = 0xfd, + SQ_ALU_SRC_PV = 0xfe, + SQ_ALU_SRC_PS = 0xff, + SRC2_REL_bit = 1 << 9, + SRC2_CHAN_mask = 0x03 << 10, + SRC2_CHAN_shift = 10, + SQ_CHAN_X = 0x00, + SQ_CHAN_Y = 0x01, + SQ_CHAN_Z = 0x02, + SQ_CHAN_W = 0x03, + SRC2_NEG_bit = 1 << 12, + SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13, + SQ_ALU_WORD1_OP3__ALU_INST_shift = 13, + SQ_OP3_INST_BFE_UINT = 0x04, + SQ_OP3_INST_BFE_INT = 0x05, + SQ_OP3_INST_BFI_INT = 0x06, + SQ_OP3_INST_FMA = 0x07, + SQ_OP3_INST_CNDNE_64 = 0x09, + SQ_OP3_INST_FMA_64 = 0x0a, + SQ_OP3_INST_LERP_UINT = 0x0b, + SQ_OP3_INST_BIT_ALIGN_INT = 0x0c, + SQ_OP3_INST_BYTE_ALIGN_INT = 0x0d, + SQ_OP3_INST_SAD_ACCUM_UINT = 0x0e, + SQ_OP3_INST_SAD_ACCUM_HI_UINT = 0x0f, + SQ_OP3_INST_MULADD_UINT24 = 0x10, + SQ_OP3_INST_LDS_IDX_OP = 0x11, + SQ_OP3_INST_MULADD = 0x14, + SQ_OP3_INST_MULADD_M2 = 0x15, + SQ_OP3_INST_MULADD_M4 = 0x16, + SQ_OP3_INST_MULADD_D2 = 0x17, + SQ_OP3_INST_MULADD_IEEE = 0x18, + SQ_OP3_INST_CNDE = 0x19, + SQ_OP3_INST_CNDGT = 0x1a, + SQ_OP3_INST_CNDGE = 0x1b, + SQ_OP3_INST_CNDE_INT = 0x1c, + SQ_OP3_INST_CNDGT_INT = 0x1d, + SQ_OP3_INST_CNDGE_INT = 0x1e, + SQ_OP3_INST_MUL_LIT = 0x1f, + SQ_ALU_WORD1_LDS_DIRECT_LITERAL_LO = 0x00008dfc, + OFFSET_A_mask = 0x1fff << 0, + OFFSET_A_shift = 0, + STRIDE_A_mask = 0x7f << 13, + STRIDE_A_shift = 13, + THREAD_REL_A_bit = 1 << 22, + SQ_TEX_WORD2 = 0x00008dfc, + OFFSET_X_mask = 0x1f << 0, + OFFSET_X_shift = 0, + OFFSET_Y_mask = 0x1f << 5, + OFFSET_Y_shift = 5, + OFFSET_Z_mask = 0x1f << 10, + OFFSET_Z_shift = 10, + SAMPLER_ID_mask = 0x1f << 15, + SAMPLER_ID_shift = 15, + SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20, + SQ_TEX_WORD2__SRC_SEL_X_shift = 20, + SQ_SEL_X = 0x00, + SQ_SEL_Y = 0x01, + SQ_SEL_Z = 0x02, + SQ_SEL_W = 0x03, + SQ_SEL_0 = 0x04, + SQ_SEL_1 = 0x05, + SRC_SEL_Y_mask = 0x07 << 23, + SRC_SEL_Y_shift = 23, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SRC_SEL_Z_mask = 0x07 << 26, + SRC_SEL_Z_shift = 26, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SRC_SEL_W_mask = 0x07 << 29, + SRC_SEL_W_shift = 29, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc, + BURST_COUNT_mask = 0x0f << 16, + BURST_COUNT_shift = 16, + VALID_PIXEL_MODE_bit = 1 << 20, + END_OF_PROGRAM_bit = 1 << 21, + SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0xff << 22, + SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 22, + SQ_CF_INST_MEM_STREAM0_BUF0 = 0x40, + SQ_CF_INST_MEM_STREAM0_BUF1 = 0x41, + SQ_CF_INST_MEM_STREAM0_BUF2 = 0x42, + SQ_CF_INST_MEM_STREAM0_BUF3 = 0x43, + SQ_CF_INST_MEM_STREAM1_BUF0 = 0x44, + SQ_CF_INST_MEM_STREAM1_BUF1 = 0x45, + SQ_CF_INST_MEM_STREAM1_BUF2 = 0x46, + SQ_CF_INST_MEM_STREAM1_BUF3 = 0x47, + SQ_CF_INST_MEM_STREAM2_BUF0 = 0x48, + SQ_CF_INST_MEM_STREAM2_BUF1 = 0x49, + SQ_CF_INST_MEM_STREAM2_BUF2 = 0x4a, + SQ_CF_INST_MEM_STREAM2_BUF3 = 0x4b, + SQ_CF_INST_MEM_STREAM3_BUF0 = 0x4c, + SQ_CF_INST_MEM_STREAM3_BUF1 = 0x4d, + SQ_CF_INST_MEM_STREAM3_BUF2 = 0x4e, + SQ_CF_INST_MEM_STREAM3_BUF3 = 0x4f, + SQ_CF_INST_MEM_SCRATCH = 0x50, + SQ_CF_INST_MEM_RING = 0x52, + SQ_CF_INST_EXPORT = 0x53, + SQ_CF_INST_EXPORT_DONE = 0x54, + SQ_CF_INST_MEM_EXPORT = 0x55, + SQ_CF_INST_MEM_RAT = 0x56, + SQ_CF_INST_MEM_RAT_CACHELESS = 0x57, + SQ_CF_INST_MEM_RING1 = 0x58, + SQ_CF_INST_MEM_RING2 = 0x59, + SQ_CF_INST_MEM_RING3 = 0x5a, + SQ_CF_INST_MEM_EXPORT_COMBINED = 0x5b, + SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS = 0x5c, + MARK_bit = 1 << 30, + BARRIER_bit = 1 << 31, + SQ_CF_ALU_WORD1 = 0x00008dfc, + KCACHE_MODE1_mask = 0x03 << 0, + KCACHE_MODE1_shift = 0, + SQ_CF_KCACHE_NOP = 0x00, + SQ_CF_KCACHE_LOCK_1 = 0x01, + SQ_CF_KCACHE_LOCK_2 = 0x02, + SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, + KCACHE_ADDR0_mask = 0xff << 2, + KCACHE_ADDR0_shift = 2, + KCACHE_ADDR1_mask = 0xff << 10, + KCACHE_ADDR1_shift = 10, + SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18, + SQ_CF_ALU_WORD1__COUNT_shift = 18, + SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, + SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26, + SQ_CF_ALU_WORD1__CF_INST_shift = 26, + SQ_CF_INST_ALU = 0x08, + SQ_CF_INST_ALU_PUSH_BEFORE = 0x09, + SQ_CF_INST_ALU_POP_AFTER = 0x0a, + SQ_CF_INST_ALU_POP2_AFTER = 0x0b, + SQ_CF_INST_ALU_EXTENDED = 0x0c, + SQ_CF_INST_ALU_CONTINUE = 0x0d, + SQ_CF_INST_ALU_BREAK = 0x0e, + SQ_CF_INST_ALU_ELSE_AFTER = 0x0f, + WHOLE_QUAD_MODE_bit = 1 << 30, +/* BARRIER_bit = 1 << 31, */ + SQ_TEX_WORD1 = 0x00008dfc, + SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0, + SQ_TEX_WORD1__DST_GPR_shift = 0, + SQ_TEX_WORD1__DST_REL_bit = 1 << 7, + SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9, + SQ_TEX_WORD1__DST_SEL_X_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_SEL_MASK = 0x07, + SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12, + SQ_TEX_WORD1__DST_SEL_Y_shift = 12, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15, + SQ_TEX_WORD1__DST_SEL_Z_shift = 15, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18, + SQ_TEX_WORD1__DST_SEL_W_shift = 18, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21, + SQ_TEX_WORD1__LOD_BIAS_shift = 21, + COORD_TYPE_X_bit = 1 << 28, + COORD_TYPE_Y_bit = 1 << 29, + COORD_TYPE_Z_bit = 1 << 30, + COORD_TYPE_W_bit = 1 << 31, + SQ_VTX_WORD0 = 0x00008dfc, + VTX_INST_mask = 0x1f << 0, + VTX_INST_shift = 0, + SQ_VTX_INST_FETCH = 0x00, + SQ_VTX_INST_SEMANTIC = 0x01, + SQ_VTX_INST_GET_BUFFER_RESINFO = 0x0e, + FETCH_TYPE_mask = 0x03 << 5, + FETCH_TYPE_shift = 5, + SQ_VTX_FETCH_VERTEX_DATA = 0x00, + SQ_VTX_FETCH_INSTANCE_DATA = 0x01, + SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02, + FETCH_WHOLE_QUAD_bit = 1 << 7, + BUFFER_ID_mask = 0xff << 8, + BUFFER_ID_shift = 8, + SQ_VTX_WORD0__SRC_GPR_mask = 0x7f << 16, + SQ_VTX_WORD0__SRC_GPR_shift = 16, + SRC_REL_bit = 1 << 23, + SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24, + SQ_VTX_WORD0__SRC_SEL_X_shift = 24, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ + MEGA_FETCH_COUNT_mask = 0x3f << 26, + MEGA_FETCH_COUNT_shift = 26, + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc, + SEL_X_mask = 0x07 << 0, + SEL_X_shift = 0, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SEL_Y_mask = 0x07 << 3, + SEL_Y_shift = 3, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SEL_Z_mask = 0x07 << 6, + SEL_Z_shift = 6, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SEL_W_mask = 0x07 << 9, + SEL_W_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_RD_WORD0 = 0x00008dfc, + MEM_INST_mask = 0x1f << 0, + MEM_INST_shift = 0, + SQ_MEM_INST_MEM = 0x02, + SQ_MEM_RD_WORD0__ELEM_SIZE_mask = 0x03 << 5, + SQ_MEM_RD_WORD0__ELEM_SIZE_shift = 5, +/* FETCH_WHOLE_QUAD_bit = 1 << 7, */ + MEM_OP_mask = 0x07 << 8, + MEM_OP_shift = 8, + SQ_MEM_OP_RD_SCRATCH = 0x00, + SQ_MEM_OP_RD_SCATTER = 0x02, + SQ_MEM_OP_GDS = 0x04, + SQ_MEM_OP_TF_WRITE = 0x05, + SQ_MEM_RD_WORD0__UNCACHED_bit = 1 << 11, + INDEXED_bit = 1 << 12, + SQ_MEM_RD_WORD0__SRC_GPR_mask = 0x7f << 16, + SQ_MEM_RD_WORD0__SRC_GPR_shift = 16, +/* SRC_REL_bit = 1 << 23, */ + SQ_MEM_RD_WORD0__SRC_SEL_X_mask = 0x03 << 24, + SQ_MEM_RD_WORD0__SRC_SEL_X_shift = 24, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ + BURST_CNT_mask = 0x0f << 26, + BURST_CNT_shift = 26, + SQ_ALU_WORD1 = 0x00008dfc, + SQ_ALU_WORD1__ENCODING_mask = 0x07 << 15, + SQ_ALU_WORD1__ENCODING_shift = 15, + BANK_SWIZZLE_mask = 0x07 << 18, + BANK_SWIZZLE_shift = 18, + SQ_ALU_VEC_012 = 0x00, + SQ_ALU_VEC_021 = 0x01, + SQ_ALU_VEC_120 = 0x02, + SQ_ALU_VEC_102 = 0x03, + SQ_ALU_VEC_201 = 0x04, + SQ_ALU_VEC_210 = 0x05, + SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21, + SQ_ALU_WORD1__DST_GPR_shift = 21, + SQ_ALU_WORD1__DST_REL_bit = 1 << 28, + DST_CHAN_mask = 0x03 << 29, + DST_CHAN_shift = 29, + CHAN_X = 0x00, + CHAN_Y = 0x01, + CHAN_Z = 0x02, + CHAN_W = 0x03, + SQ_ALU_WORD1__CLAMP_bit = 1 << 31, + SQ_CF_ALU_WORD0_EXT = 0x00008dfc, + KCACHE_BANK_INDEX_MODE0_mask = 0x03 << 4, + KCACHE_BANK_INDEX_MODE0_shift = 4, + SQ_CF_INDEX_NONE = 0x00, + SQ_CF_INDEX_0 = 0x01, + SQ_CF_INDEX_1 = 0x02, + SQ_CF_INVALID = 0x03, + KCACHE_BANK_INDEX_MODE1_mask = 0x03 << 6, + KCACHE_BANK_INDEX_MODE1_shift = 6, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + KCACHE_BANK_INDEX_MODE2_mask = 0x03 << 8, + KCACHE_BANK_INDEX_MODE2_shift = 8, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + KCACHE_BANK_INDEX_MODE3_mask = 0x03 << 10, + KCACHE_BANK_INDEX_MODE3_shift = 10, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + KCACHE_BANK2_mask = 0x0f << 22, + KCACHE_BANK2_shift = 22, + KCACHE_BANK3_mask = 0x0f << 26, + KCACHE_BANK3_shift = 26, + KCACHE_MODE2_mask = 0x03 << 30, + KCACHE_MODE2_shift = 30, +/* SQ_CF_KCACHE_NOP = 0x00, */ +/* SQ_CF_KCACHE_LOCK_1 = 0x01, */ +/* SQ_CF_KCACHE_LOCK_2 = 0x02, */ +/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */ + SQ_ALU_WORD0_LDS_IDX_OP = 0x00008dfc, + SRC0_SEL_mask = 0x1ff << 0, + SRC0_SEL_shift = 0, +/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */ +/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */ +/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */ +/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */ +/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */ +/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */ +/* SQ_ALU_SRC_TIME_HI = 0xe3, */ +/* SQ_ALU_SRC_TIME_LO = 0xe4, */ +/* SQ_ALU_SRC_MASK_HI = 0xe5, */ +/* SQ_ALU_SRC_MASK_LO = 0xe6, */ +/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */ +/* SQ_ALU_SRC_SIMD_ID = 0xe8, */ +/* SQ_ALU_SRC_SE_ID = 0xe9, */ +/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */ +/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */ +/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */ +/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */ +/* SQ_ALU_SRC_LOOP_IDX = 0xee, */ +/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */ +/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */ +/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */ +/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */ +/* SQ_ALU_SRC_1_DBL_L = 0xf4, */ +/* SQ_ALU_SRC_1_DBL_M = 0xf5, */ +/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */ +/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */ +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ + SRC0_REL_bit = 1 << 9, + SRC0_CHAN_mask = 0x03 << 10, + SRC0_CHAN_shift = 10, +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + IDX_OFFSET_4_bit = 1 << 12, + SRC1_SEL_mask = 0x1ff << 13, + SRC1_SEL_shift = 13, +/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */ +/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */ +/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */ +/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */ +/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */ +/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */ +/* SQ_ALU_SRC_TIME_HI = 0xe3, */ +/* SQ_ALU_SRC_TIME_LO = 0xe4, */ +/* SQ_ALU_SRC_MASK_HI = 0xe5, */ +/* SQ_ALU_SRC_MASK_LO = 0xe6, */ +/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */ +/* SQ_ALU_SRC_SIMD_ID = 0xe8, */ +/* SQ_ALU_SRC_SE_ID = 0xe9, */ +/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */ +/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */ +/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */ +/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */ +/* SQ_ALU_SRC_LOOP_IDX = 0xee, */ +/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */ +/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */ +/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */ +/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */ +/* SQ_ALU_SRC_1_DBL_L = 0xf4, */ +/* SQ_ALU_SRC_1_DBL_M = 0xf5, */ +/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */ +/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */ +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ + SRC1_REL_bit = 1 << 22, + SRC1_CHAN_mask = 0x03 << 23, + SRC1_CHAN_shift = 23, +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + IDX_OFFSET_5_bit = 1 << 25, + INDEX_MODE_mask = 0x07 << 26, + INDEX_MODE_shift = 26, + SQ_INDEX_AR_X = 0x00, + SQ_INDEX_LOOP = 0x04, + SQ_INDEX_GLOBAL = 0x05, + SQ_INDEX_GLOBAL_AR_X = 0x06, + PRED_SEL_mask = 0x03 << 29, + PRED_SEL_shift = 29, + SQ_PRED_SEL_OFF = 0x00, + SQ_PRED_SEL_ZERO = 0x02, + SQ_PRED_SEL_ONE = 0x03, + LAST_bit = 1 << 31, + SQ_MEM_GDS_WORD2 = 0x00008dfc, + SQ_MEM_GDS_WORD2__DST_SEL_X_mask = 0x07 << 0, + SQ_MEM_GDS_WORD2__DST_SEL_X_shift = 0, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_GDS_WORD2__DST_SEL_Y_mask = 0x07 << 3, + SQ_MEM_GDS_WORD2__DST_SEL_Y_shift = 3, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_GDS_WORD2__DST_SEL_Z_mask = 0x07 << 6, + SQ_MEM_GDS_WORD2__DST_SEL_Z_shift = 6, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_GDS_WORD2__DST_SEL_W_mask = 0x07 << 9, + SQ_MEM_GDS_WORD2__DST_SEL_W_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_CF_ALLOC_EXPORT_WORD0_RAT = 0x00008dfc, + RAT_ID_mask = 0x0f << 0, + RAT_ID_shift = 0, + RAT_INST_mask = 0x3f << 4, + RAT_INST_shift = 4, + SQ_EXPORT_RAT_INST_NOP = 0x00, + SQ_EXPORT_RAT_INST_STORE_TYPED = 0x01, + SQ_EXPORT_RAT_INST_STORE_RAW = 0x02, + SQ_EXPORT_RAT_INST_STORE_RAW_FDENORM = 0x03, + SQ_EXPORT_RAT_INST_CMPXCHG_INT = 0x04, + SQ_EXPORT_RAT_INST_CMPXCHG_FLT = 0x05, + SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM = 0x06, + SQ_EXPORT_RAT_INST_ADD = 0x07, + SQ_EXPORT_RAT_INST_SUB = 0x08, + SQ_EXPORT_RAT_INST_RSUB = 0x09, + SQ_EXPORT_RAT_INST_MIN_INT = 0x0a, + SQ_EXPORT_RAT_INST_MIN_UINT = 0x0b, + SQ_EXPORT_RAT_INST_MAX_INT = 0x0c, + SQ_EXPORT_RAT_INST_MAX_UINT = 0x0d, + SQ_EXPORT_RAT_INST_AND = 0x0e, + SQ_EXPORT_RAT_INST_OR = 0x0f, + SQ_EXPORT_RAT_INST_XOR = 0x10, + SQ_EXPORT_RAT_INST_MSKOR = 0x11, + SQ_EXPORT_RAT_INST_INC_UINT = 0x12, + SQ_EXPORT_RAT_INST_DEC_UINT = 0x13, + SQ_EXPORT_RAT_INST_NOP_RTN = 0x20, + SQ_EXPORT_RAT_INST_XCHG_RTN = 0x22, + SQ_EXPORT_RAT_INST_XCHG_FDENORM_RTN = 0x23, + SQ_EXPORT_RAT_INST_CMPXCHG_INT_RTN = 0x24, + SQ_EXPORT_RAT_INST_CMPXCHG_FLT_RTN = 0x25, + SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM_RTN = 0x26, + SQ_EXPORT_RAT_INST_ADD_RTN = 0x27, + SQ_EXPORT_RAT_INST_SUB_RTN = 0x28, + SQ_EXPORT_RAT_INST_RSUB_RTN = 0x29, + SQ_EXPORT_RAT_INST_MIN_INT_RTN = 0x2a, + SQ_EXPORT_RAT_INST_MIN_UINT_RTN = 0x2b, + SQ_EXPORT_RAT_INST_MAX_INT_RTN = 0x2c, + SQ_EXPORT_RAT_INST_MAX_UINT_RTN = 0x2d, + SQ_EXPORT_RAT_INST_AND_RTN = 0x2e, + SQ_EXPORT_RAT_INST_OR_RTN = 0x2f, + SQ_EXPORT_RAT_INST_XOR_RTN = 0x30, + SQ_EXPORT_RAT_INST_MSKOR_RTN = 0x31, + SQ_EXPORT_RAT_INST_INC_UINT_RTN = 0x32, + SQ_EXPORT_RAT_INST_DEC_UINT_RTN = 0x33, + RAT_INDEX_MODE_mask = 0x03 << 11, + RAT_INDEX_MODE_shift = 11, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + SQ_CF_ALLOC_EXPORT_WORD0_RAT__TYPE_mask = 0x03 << 13, + SQ_CF_ALLOC_EXPORT_WORD0_RAT__TYPE_shift = 13, + SQ_EXPORT_PIXEL = 0x00, + SQ_EXPORT_POS = 0x01, + SQ_EXPORT_PARAM = 0x02, + X_UNUSED_FOR_SX_EXPORTS = 0x03, + RW_GPR_mask = 0x7f << 15, + RW_GPR_shift = 15, + RW_REL_bit = 1 << 22, + INDEX_GPR_mask = 0x7f << 23, + INDEX_GPR_shift = 23, + SQ_CF_ALLOC_EXPORT_WORD0_RAT__ELEM_SIZE_mask = 0x03 << 30, + SQ_CF_ALLOC_EXPORT_WORD0_RAT__ELEM_SIZE_shift = 30, + SQ_CF_ALU_WORD0 = 0x00008dfc, + SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0, + SQ_CF_ALU_WORD0__ADDR_shift = 0, + KCACHE_BANK0_mask = 0x0f << 22, + KCACHE_BANK0_shift = 22, + KCACHE_BANK1_mask = 0x0f << 26, + KCACHE_BANK1_shift = 26, + KCACHE_MODE0_mask = 0x03 << 30, + KCACHE_MODE0_shift = 30, +/* SQ_CF_KCACHE_NOP = 0x00, */ +/* SQ_CF_KCACHE_LOCK_1 = 0x01, */ +/* SQ_CF_KCACHE_LOCK_2 = 0x02, */ +/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */ + SQ_MEM_GDS_WORD1 = 0x00008dfc, + SQ_MEM_GDS_WORD1__DST_GPR_mask = 0x7f << 0, + SQ_MEM_GDS_WORD1__DST_GPR_shift = 0, + DST_REL_MODE_mask = 0x03 << 7, + DST_REL_MODE_shift = 7, + SQ_REL_NONE = 0x00, + SQ_REL_LOOP = 0x01, + SQ_REL_GLOBAL = 0x02, + GDS_OP_mask = 0x3f << 9, + GDS_OP_shift = 9, + SQ_DS_INST_ADD = 0x00, + SQ_DS_INST_SUB = 0x01, + SQ_DS_INST_RSUB = 0x02, + SQ_DS_INST_INC = 0x03, + SQ_DS_INST_DEC = 0x04, + SQ_DS_INST_MIN_INT = 0x05, + SQ_DS_INST_MAX_INT = 0x06, + SQ_DS_INST_MIN_UINT = 0x07, + SQ_DS_INST_MAX_UINT = 0x08, + SQ_DS_INST_AND = 0x09, + SQ_DS_INST_OR = 0x0a, + SQ_DS_INST_XOR = 0x0b, + SQ_DS_INST_MSKOR = 0x0c, + SQ_DS_INST_WRITE = 0x0d, + SQ_DS_INST_WRITE_REL = 0x0e, + SQ_DS_INST_WRITE2 = 0x0f, + SQ_DS_INST_CMP_STORE = 0x10, + SQ_DS_INST_CMP_STORE_SPF = 0x11, + SQ_DS_INST_BYTE_WRITE = 0x12, + SQ_DS_INST_SHORT_WRITE = 0x13, + SQ_DS_INST_ADD_RET = 0x20, + SQ_DS_INST_SUB_RET = 0x21, + SQ_DS_INST_RSUB_RET = 0x22, + SQ_DS_INST_INC_RET = 0x23, + SQ_DS_INST_DEC_RET = 0x24, + SQ_DS_INST_MIN_INT_RET = 0x25, + SQ_DS_INST_MAX_INT_RET = 0x26, + SQ_DS_INST_MIN_UINT_RET = 0x27, + SQ_DS_INST_MAX_UINT_RET = 0x28, + SQ_DS_INST_AND_RET = 0x29, + SQ_DS_INST_OR_RET = 0x2a, + SQ_DS_INST_XOR_RET = 0x2b, + SQ_DS_INST_MSKOR_RET = 0x2c, + SQ_DS_INST_XCHG_RET = 0x2d, + SQ_DS_INST_XCHG_REL_RET = 0x2e, + SQ_DS_INST_XCHG2_RET = 0x2f, + SQ_DS_INST_CMP_XCHG_RET = 0x30, + SQ_DS_INST_CMP_XCHG_SPF_RET = 0x31, + SQ_DS_INST_READ_RET = 0x32, + SQ_DS_INST_READ_REL_RET = 0x33, + SQ_DS_INST_READ2_RET = 0x34, + SQ_DS_INST_READWRITE_RET = 0x35, + SQ_DS_INST_BYTE_READ_RET = 0x36, + SQ_DS_INST_UBYTE_READ_RET = 0x37, + SQ_DS_INST_SHORT_READ_RET = 0x38, + SQ_DS_INST_USHORT_READ_RET = 0x39, + SQ_DS_INST_ATOMIC_ORDERED_ALLOC_RET = 0x3f, + DS_OFFSET_mask = 0x7f << 16, + DS_OFFSET_shift = 16, + UAV_INDEX_MODE_mask = 0x03 << 24, + UAV_INDEX_MODE_shift = 24, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + UAV_ID_mask = 0x0f << 26, + UAV_ID_shift = 26, + ALLOC_CONSUME_bit = 1 << 30, + BCAST_FIRST_REQ_bit = 1 << 31, + SQ_MEM_RD_WORD2 = 0x00008dfc, + ARRAY_BASE_mask = 0x1fff << 0, + ARRAY_BASE_shift = 0, + SQ_MEM_RD_WORD2__ENDIAN_SWAP_mask = 0x03 << 16, + SQ_MEM_RD_WORD2__ENDIAN_SWAP_shift = 16, + SQ_ENDIAN_NONE = 0x00, + SQ_ENDIAN_8IN16 = 0x01, + SQ_ENDIAN_8IN32 = 0x02, + SQ_MEM_RD_WORD2__ARRAY_SIZE_mask = 0xfff << 20, + SQ_MEM_RD_WORD2__ARRAY_SIZE_shift = 20, + SQ_CF_ALU_WORD1_EXT = 0x00008dfc, + KCACHE_MODE3_mask = 0x03 << 0, + KCACHE_MODE3_shift = 0, +/* SQ_CF_KCACHE_NOP = 0x00, */ +/* SQ_CF_KCACHE_LOCK_1 = 0x01, */ +/* SQ_CF_KCACHE_LOCK_2 = 0x02, */ +/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */ + KCACHE_ADDR2_mask = 0xff << 2, + KCACHE_ADDR2_shift = 2, + KCACHE_ADDR3_mask = 0xff << 10, + KCACHE_ADDR3_shift = 10, + SQ_CF_ALU_WORD1_EXT__CF_INST_mask = 0x0f << 26, + SQ_CF_ALU_WORD1_EXT__CF_INST_shift = 26, +/* SQ_CF_INST_ALU = 0x08, */ +/* SQ_CF_INST_ALU_PUSH_BEFORE = 0x09, */ +/* SQ_CF_INST_ALU_POP_AFTER = 0x0a, */ +/* SQ_CF_INST_ALU_POP2_AFTER = 0x0b, */ +/* SQ_CF_INST_ALU_EXTENDED = 0x0c, */ +/* SQ_CF_INST_ALU_CONTINUE = 0x0d, */ +/* SQ_CF_INST_ALU_BREAK = 0x0e, */ +/* SQ_CF_INST_ALU_ELSE_AFTER = 0x0f, */ +/* BARRIER_bit = 1 << 31, */ + SQ_CF_GWS_WORD0 = 0x00008dfc, + VALUE_mask = 0x3ff << 0, + VALUE_shift = 0, + RESOURCE_mask = 0x1f << 16, + RESOURCE_shift = 16, + SIGN_bit = 1 << 25, + VAL_INDEX_MODE_mask = 0x03 << 26, + VAL_INDEX_MODE_shift = 26, + SQ_GWS_INDEX_NONE = 0x00, + SQ_GWS_INDEX_0 = 0x01, + SQ_GWS_INDEX_1 = 0x02, + SQ_GWS_INDEX_MIX = 0x03, + RSRC_INDEX_MODE_mask = 0x03 << 28, + RSRC_INDEX_MODE_shift = 28, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + GWS_OPCODE_mask = 0x03 << 30, + GWS_OPCODE_shift = 30, + SQ_GWS_SEMA_V = 0x00, + SQ_GWS_SEMA_P = 0x01, + SQ_GWS_BARRIER = 0x02, + SQ_GWS_INIT = 0x03, + SQ_VTX_WORD2 = 0x00008dfc, + SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0, + SQ_VTX_WORD2__OFFSET_shift = 0, + SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16, + SQ_VTX_WORD2__ENDIAN_SWAP_shift = 16, +/* SQ_ENDIAN_NONE = 0x00, */ +/* SQ_ENDIAN_8IN16 = 0x01, */ +/* SQ_ENDIAN_8IN32 = 0x02, */ + CONST_BUF_NO_STRIDE_bit = 1 << 18, + MEGA_FETCH_bit = 1 << 19, + SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20, + BUFFER_INDEX_MODE_mask = 0x03 << 21, + BUFFER_INDEX_MODE_shift = 21, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc, + SQ_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_mask = 0xfff << 0, + SQ_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_shift = 0, + COMP_MASK_mask = 0x0f << 12, + COMP_MASK_shift = 12, + SQ_CF_WORD0 = 0x00008dfc, + SQ_CF_WORD0__ADDR_mask = 0xffffff << 0, + SQ_CF_WORD0__ADDR_shift = 0, + JUMPTABLE_SEL_mask = 0x07 << 24, + JUMPTABLE_SEL_shift = 24, + SQ_CF_JUMPTABLE_SEL_CONST_A = 0x00, + SQ_CF_JUMPTABLE_SEL_CONST_B = 0x01, + SQ_CF_JUMPTABLE_SEL_CONST_C = 0x02, + SQ_CF_JUMPTABLE_SEL_CONST_D = 0x03, + SQ_CF_JUMPTABLE_SEL_INDEX_0 = 0x04, + SQ_CF_JUMPTABLE_SEL_INDEX_1 = 0x05, + SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc, +/* ARRAY_BASE_mask = 0x1fff << 0, */ +/* ARRAY_BASE_shift = 0, */ + SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13, + SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13, +/* SQ_EXPORT_PIXEL = 0x00, */ +/* SQ_EXPORT_POS = 0x01, */ +/* SQ_EXPORT_PARAM = 0x02, */ +/* X_UNUSED_FOR_SX_EXPORTS = 0x03, */ +/* RW_GPR_mask = 0x7f << 15, */ +/* RW_GPR_shift = 15, */ +/* RW_REL_bit = 1 << 22, */ +/* INDEX_GPR_mask = 0x7f << 23, */ +/* INDEX_GPR_shift = 23, */ + SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_mask = 0x03 << 30, + SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_shift = 30, + SQ_MEM_GDS_WORD0 = 0x00008dfc, +/* MEM_INST_mask = 0x1f << 0, */ +/* MEM_INST_shift = 0, */ +/* SQ_MEM_INST_MEM = 0x02, */ +/* MEM_OP_mask = 0x07 << 8, */ +/* MEM_OP_shift = 8, */ +/* SQ_MEM_OP_RD_SCRATCH = 0x00, */ +/* SQ_MEM_OP_RD_SCATTER = 0x02, */ +/* SQ_MEM_OP_GDS = 0x04, */ +/* SQ_MEM_OP_TF_WRITE = 0x05, */ + SQ_MEM_GDS_WORD0__SRC_GPR_mask = 0x7f << 11, + SQ_MEM_GDS_WORD0__SRC_GPR_shift = 11, + SRC_REL_MODE_mask = 0x03 << 18, + SRC_REL_MODE_shift = 18, +/* SQ_REL_NONE = 0x00, */ +/* SQ_REL_LOOP = 0x01, */ +/* SQ_REL_GLOBAL = 0x02, */ + SQ_MEM_GDS_WORD0__SRC_SEL_X_mask = 0x07 << 20, + SQ_MEM_GDS_WORD0__SRC_SEL_X_shift = 20, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SRC_SEL_Y_mask = 0x07 << 23, */ +/* SRC_SEL_Y_shift = 23, */ +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SRC_SEL_Z_mask = 0x07 << 26, */ +/* SRC_SEL_Z_shift = 26, */ +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_ALU_WORD1_LDS_DIRECT_LITERAL_HI = 0x00008dfc, + OFFSET_B_mask = 0x1fff << 0, + OFFSET_B_shift = 0, + STRIDE_B_mask = 0x7f << 13, + STRIDE_B_shift = 13, + THREAD_REL_B_bit = 1 << 22, + DIRECT_READ_32_bit = 1 << 31, + SQ_VTX_WORD1 = 0x00008dfc, + SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9, + SQ_VTX_WORD1__DST_SEL_X_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12, + SQ_VTX_WORD1__DST_SEL_Y_shift = 12, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15, + SQ_VTX_WORD1__DST_SEL_Z_shift = 15, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18, + SQ_VTX_WORD1__DST_SEL_W_shift = 18, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + USE_CONST_FIELDS_bit = 1 << 21, + SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22, + SQ_VTX_WORD1__DATA_FORMAT_shift = 22, + SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28, + SQ_VTX_WORD1__NUM_FORMAT_ALL_shift = 28, + SQ_NUM_FORMAT_NORM = 0x00, + SQ_NUM_FORMAT_INT = 0x01, + SQ_NUM_FORMAT_SCALED = 0x02, + SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30, + SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31, + SQ_ALU_WORD1_OP2 = 0x00008dfc, + SRC0_ABS_bit = 1 << 0, + SRC1_ABS_bit = 1 << 1, + UPDATE_EXECUTE_MASK_bit = 1 << 2, + UPDATE_PRED_bit = 1 << 3, + WRITE_MASK_bit = 1 << 4, + OMOD_mask = 0x03 << 5, + OMOD_shift = 5, + SQ_ALU_OMOD_OFF = 0x00, + SQ_ALU_OMOD_M2 = 0x01, + SQ_ALU_OMOD_M4 = 0x02, + SQ_ALU_OMOD_D2 = 0x03, + SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x7ff << 7, + SQ_ALU_WORD1_OP2__ALU_INST_shift = 7, + SQ_OP2_INST_ADD = 0x00, + SQ_OP2_INST_MUL = 0x01, + SQ_OP2_INST_MUL_IEEE = 0x02, + SQ_OP2_INST_MAX = 0x03, + SQ_OP2_INST_MIN = 0x04, + SQ_OP2_INST_MAX_DX10 = 0x05, + SQ_OP2_INST_MIN_DX10 = 0x06, + SQ_OP2_INST_SETE = 0x08, + SQ_OP2_INST_SETGT = 0x09, + SQ_OP2_INST_SETGE = 0x0a, + SQ_OP2_INST_SETNE = 0x0b, + SQ_OP2_INST_SETE_DX10 = 0x0c, + SQ_OP2_INST_SETGT_DX10 = 0x0d, + SQ_OP2_INST_SETGE_DX10 = 0x0e, + SQ_OP2_INST_SETNE_DX10 = 0x0f, + SQ_OP2_INST_FRACT = 0x10, + SQ_OP2_INST_TRUNC = 0x11, + SQ_OP2_INST_CEIL = 0x12, + SQ_OP2_INST_RNDNE = 0x13, + SQ_OP2_INST_FLOOR = 0x14, + SQ_OP2_INST_ASHR_INT = 0x15, + SQ_OP2_INST_LSHR_INT = 0x16, + SQ_OP2_INST_LSHL_INT = 0x17, + SQ_OP2_INST_MOV = 0x19, + SQ_OP2_INST_NOP = 0x1a, + SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, + SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, + SQ_OP2_INST_PRED_SETE = 0x20, + SQ_OP2_INST_PRED_SETGT = 0x21, + SQ_OP2_INST_PRED_SETGE = 0x22, + SQ_OP2_INST_PRED_SETNE = 0x23, + SQ_OP2_INST_PRED_SET_INV = 0x24, + SQ_OP2_INST_PRED_SET_POP = 0x25, + SQ_OP2_INST_PRED_SET_CLR = 0x26, + SQ_OP2_INST_PRED_SET_RESTORE = 0x27, + SQ_OP2_INST_PRED_SETE_PUSH = 0x28, + SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, + SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, + SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, + SQ_OP2_INST_KILLE = 0x2c, + SQ_OP2_INST_KILLGT = 0x2d, + SQ_OP2_INST_KILLGE = 0x2e, + SQ_OP2_INST_KILLNE = 0x2f, + SQ_OP2_INST_AND_INT = 0x30, + SQ_OP2_INST_OR_INT = 0x31, + SQ_OP2_INST_XOR_INT = 0x32, + SQ_OP2_INST_NOT_INT = 0x33, + SQ_OP2_INST_ADD_INT = 0x34, + SQ_OP2_INST_SUB_INT = 0x35, + SQ_OP2_INST_MAX_INT = 0x36, + SQ_OP2_INST_MIN_INT = 0x37, + SQ_OP2_INST_MAX_UINT = 0x38, + SQ_OP2_INST_MIN_UINT = 0x39, + SQ_OP2_INST_SETE_INT = 0x3a, + SQ_OP2_INST_SETGT_INT = 0x3b, + SQ_OP2_INST_SETGE_INT = 0x3c, + SQ_OP2_INST_SETNE_INT = 0x3d, + SQ_OP2_INST_SETGT_UINT = 0x3e, + SQ_OP2_INST_SETGE_UINT = 0x3f, + SQ_OP2_INST_KILLGT_UINT = 0x40, + SQ_OP2_INST_KILLGE_UINT = 0x41, + SQ_OP2_INST_PRED_SETE_INT = 0x42, + SQ_OP2_INST_PRED_SETGT_INT = 0x43, + SQ_OP2_INST_PRED_SETGE_INT = 0x44, + SQ_OP2_INST_PRED_SETNE_INT = 0x45, + SQ_OP2_INST_KILLE_INT = 0x46, + SQ_OP2_INST_KILLGT_INT = 0x47, + SQ_OP2_INST_KILLGE_INT = 0x48, + SQ_OP2_INST_KILLNE_INT = 0x49, + SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, + SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, + SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, + SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, + SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, + SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, + SQ_OP2_INST_FLT_TO_INT = 0x50, + SQ_OP2_INST_BFREV_INT = 0x51, + SQ_OP2_INST_ADDC_UINT = 0x52, + SQ_OP2_INST_SUBB_UINT = 0x53, + SQ_OP2_INST_GROUP_BARRIER = 0x54, + SQ_OP2_INST_GROUP_SEQ_BEGIN = 0x55, + SQ_OP2_INST_GROUP_SEQ_END = 0x56, + SQ_OP2_INST_SET_MODE = 0x57, + SQ_OP2_INST_SET_CF_IDX0 = 0x58, + SQ_OP2_INST_SET_CF_IDX1 = 0x59, + SQ_OP2_INST_SET_LDS_SIZE = 0x5a, + SQ_OP2_INST_EXP_IEEE = 0x81, + SQ_OP2_INST_LOG_CLAMPED = 0x82, + SQ_OP2_INST_LOG_IEEE = 0x83, + SQ_OP2_INST_RECIP_CLAMPED = 0x84, + SQ_OP2_INST_RECIP_FF = 0x85, + SQ_OP2_INST_RECIP_IEEE = 0x86, + SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x87, + SQ_OP2_INST_RECIPSQRT_FF = 0x88, + SQ_OP2_INST_RECIPSQRT_IEEE = 0x89, + SQ_OP2_INST_SQRT_IEEE = 0x8a, + SQ_OP2_INST_SIN = 0x8d, + SQ_OP2_INST_COS = 0x8e, + SQ_OP2_INST_MULLO_INT = 0x8f, + SQ_OP2_INST_MULHI_INT = 0x90, + SQ_OP2_INST_MULLO_UINT = 0x91, + SQ_OP2_INST_MULHI_UINT = 0x92, + SQ_OP2_INST_RECIP_INT = 0x93, + SQ_OP2_INST_RECIP_UINT = 0x94, + SQ_OP2_INST_RECIP_64 = 0x95, + SQ_OP2_INST_RECIP_CLAMPED_64 = 0x96, + SQ_OP2_INST_RECIPSQRT_64 = 0x97, + SQ_OP2_INST_RECIPSQRT_CLAMPED_64 = 0x98, + SQ_OP2_INST_SQRT_64 = 0x99, + SQ_OP2_INST_FLT_TO_UINT = 0x9a, + SQ_OP2_INST_INT_TO_FLT = 0x9b, + SQ_OP2_INST_UINT_TO_FLT = 0x9c, + SQ_OP2_INST_BFM_INT = 0xa0, + SQ_OP2_INST_FLT32_TO_FLT16 = 0xa2, + SQ_OP2_INST_FLT16_TO_FLT32 = 0xa3, + SQ_OP2_INST_UBYTE0_FLT = 0xa4, + SQ_OP2_INST_UBYTE1_FLT = 0xa5, + SQ_OP2_INST_UBYTE2_FLT = 0xa6, + SQ_OP2_INST_UBYTE3_FLT = 0xa7, + SQ_OP2_INST_BCNT_INT = 0xaa, + SQ_OP2_INST_FFBH_UINT = 0xab, + SQ_OP2_INST_FFBL_INT = 0xac, + SQ_OP2_INST_FFBH_INT = 0xad, + SQ_OP2_INST_FLT_TO_UINT4 = 0xae, + SQ_OP2_INST_DOT_IEEE = 0xaf, + SQ_OP2_INST_FLT_TO_INT_RPI = 0xb0, + SQ_OP2_INST_FLT_TO_INT_FLOOR = 0xb1, + SQ_OP2_INST_MULHI_UINT24 = 0xb2, + SQ_OP2_INST_MBCNT_32HI_INT = 0xb3, + SQ_OP2_INST_OFFSET_TO_FLT = 0xb4, + SQ_OP2_INST_MUL_UINT24 = 0xb5, + SQ_OP2_INST_BCNT_ACCUM_PREV_INT = 0xb6, + SQ_OP2_INST_MBCNT_32LO_ACCUM_PREV_INT = 0xb7, + SQ_OP2_INST_SETE_64 = 0xb8, + SQ_OP2_INST_SETNE_64 = 0xb9, + SQ_OP2_INST_SETGT_64 = 0xba, + SQ_OP2_INST_SETGE_64 = 0xbb, + SQ_OP2_INST_MIN_64 = 0xbc, + SQ_OP2_INST_MAX_64 = 0xbd, + SQ_OP2_INST_DOT4 = 0xbe, + SQ_OP2_INST_DOT4_IEEE = 0xbf, + SQ_OP2_INST_CUBE = 0xc0, + SQ_OP2_INST_MAX4 = 0xc1, + SQ_OP2_INST_FREXP_64 = 0xc4, + SQ_OP2_INST_LDEXP_64 = 0xc5, + SQ_OP2_INST_FRACT_64 = 0xc6, + SQ_OP2_INST_PRED_SETGT_64 = 0xc7, + SQ_OP2_INST_PRED_SETE_64 = 0xc8, + SQ_OP2_INST_PRED_SETGE_64 = 0xc9, + SQ_OP2_INST_MUL_64 = 0xca, + SQ_OP2_INST_ADD_64 = 0xcb, + SQ_OP2_INST_MOVA_INT = 0xcc, + SQ_OP2_INST_FLT64_TO_FLT32 = 0xcd, + SQ_OP2_INST_FLT32_TO_FLT64 = 0xce, + SQ_OP2_INST_SAD_ACCUM_PREV_UINT = 0xcf, + SQ_OP2_INST_DOT = 0xd0, + SQ_OP2_INST_MUL_PREV = 0xd1, + SQ_OP2_INST_MUL_IEEE_PREV = 0xd2, + SQ_OP2_INST_ADD_PREV = 0xd3, + SQ_OP2_INST_MULADD_PREV = 0xd4, + SQ_OP2_INST_MULADD_IEEE_PREV = 0xd5, + SQ_OP2_INST_INTERP_XY = 0xd6, + SQ_OP2_INST_INTERP_ZW = 0xd7, + SQ_OP2_INST_INTERP_X = 0xd8, + SQ_OP2_INST_INTERP_Z = 0xd9, + SQ_OP2_INST_STORE_FLAGS = 0xda, + SQ_OP2_INST_LOAD_STORE_FLAGS = 0xdb, + SQ_OP2_INST_INTERP_LOAD_P0 = 0xe0, + SQ_OP2_INST_INTERP_LOAD_P10 = 0xe1, + SQ_OP2_INST_INTERP_LOAD_P20 = 0xe2, + SQ_CF_WORD1 = 0x00008dfc, + POP_COUNT_mask = 0x07 << 0, + POP_COUNT_shift = 0, + CF_CONST_mask = 0x1f << 3, + CF_CONST_shift = 3, + COND_mask = 0x03 << 8, + COND_shift = 8, + SQ_CF_COND_ACTIVE = 0x00, + SQ_CF_COND_FALSE = 0x01, + SQ_CF_COND_BOOL = 0x02, + SQ_CF_COND_NOT_BOOL = 0x03, + SQ_CF_WORD1__COUNT_mask = 0x3f << 10, + SQ_CF_WORD1__COUNT_shift = 10, +/* VALID_PIXEL_MODE_bit = 1 << 20, */ +/* END_OF_PROGRAM_bit = 1 << 21, */ + SQ_CF_WORD1__CF_INST_mask = 0xff << 22, + SQ_CF_WORD1__CF_INST_shift = 22, + SQ_CF_INST_NOP = 0x00, + SQ_CF_INST_TC = 0x01, + SQ_CF_INST_VC = 0x02, + SQ_CF_INST_GDS = 0x03, + SQ_CF_INST_LOOP_START = 0x04, + SQ_CF_INST_LOOP_END = 0x05, + SQ_CF_INST_LOOP_START_DX10 = 0x06, + SQ_CF_INST_LOOP_START_NO_AL = 0x07, + SQ_CF_INST_LOOP_CONTINUE = 0x08, + SQ_CF_INST_LOOP_BREAK = 0x09, + SQ_CF_INST_JUMP = 0x0a, + SQ_CF_INST_PUSH = 0x0b, + SQ_CF_INST_ELSE = 0x0d, + SQ_CF_INST_POP = 0x0e, + SQ_CF_INST_CALL = 0x12, + SQ_CF_INST_CALL_FS = 0x13, + SQ_CF_INST_RETURN = 0x14, + SQ_CF_INST_EMIT_VERTEX = 0x15, + SQ_CF_INST_EMIT_CUT_VERTEX = 0x16, + SQ_CF_INST_CUT_VERTEX = 0x17, + SQ_CF_INST_KILL = 0x18, + SQ_CF_INST_WAIT_ACK = 0x1a, + SQ_CF_INST_TC_ACK = 0x1b, + SQ_CF_INST_VC_ACK = 0x1c, + SQ_CF_INST_JUMPTABLE = 0x1d, + SQ_CF_INST_GLOBAL_WAVE_SYNC = 0x1e, + SQ_CF_INST_HALT = 0x1f, +/* WHOLE_QUAD_MODE_bit = 1 << 30, */ +/* BARRIER_bit = 1 << 31, */ + SQ_VTX_WORD1_SEM = 0x00008dfc, + SEMANTIC_ID_mask = 0xff << 0, + SEMANTIC_ID_shift = 0, + SQ_TEX_WORD0 = 0x00008dfc, + TEX_INST_mask = 0x1f << 0, + TEX_INST_shift = 0, + SQ_TEX_INST_LD = 0x03, + SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04, + SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05, + SQ_TEX_INST_GET_LOD = 0x06, + SQ_TEX_INST_GET_GRADIENTS_H = 0x07, + SQ_TEX_INST_GET_GRADIENTS_V = 0x08, + SQ_TEX_INST_SET_TEXTURE_OFFSETS = 0x09, + SQ_TEX_INST_KEEP_GRADIENTS = 0x0a, + SQ_TEX_INST_SET_GRADIENTS_H = 0x0b, + SQ_TEX_INST_SET_GRADIENTS_V = 0x0c, + SQ_TEX_INST_PASS = 0x0d, + SQ_TEX_INST_SAMPLE = 0x10, + SQ_TEX_INST_SAMPLE_L = 0x11, + SQ_TEX_INST_SAMPLE_LB = 0x12, + SQ_TEX_INST_SAMPLE_LZ = 0x13, + SQ_TEX_INST_SAMPLE_G = 0x14, + SQ_TEX_INST_GATHER4 = 0x15, + SQ_TEX_INST_SAMPLE_G_LB = 0x16, + SQ_TEX_INST_GATHER4_O = 0x17, + SQ_TEX_INST_SAMPLE_C = 0x18, + SQ_TEX_INST_SAMPLE_C_L = 0x19, + SQ_TEX_INST_SAMPLE_C_LB = 0x1a, + SQ_TEX_INST_SAMPLE_C_LZ = 0x1b, + SQ_TEX_INST_SAMPLE_C_G = 0x1c, + SQ_TEX_INST_GATHER4_C = 0x1d, + SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e, + SQ_TEX_INST_GATHER4_C_O = 0x1f, + INST_MOD_mask = 0x03 << 5, + INST_MOD_shift = 5, +/* FETCH_WHOLE_QUAD_bit = 1 << 7, */ + RESOURCE_ID_mask = 0xff << 8, + RESOURCE_ID_shift = 8, + SQ_TEX_WORD0__SRC_GPR_mask = 0x7f << 16, + SQ_TEX_WORD0__SRC_GPR_shift = 16, +/* SRC_REL_bit = 1 << 23, */ + SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24, + RESOURCE_INDEX_MODE_mask = 0x03 << 25, + RESOURCE_INDEX_MODE_shift = 25, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + SAMPLER_INDEX_MODE_mask = 0x03 << 27, + SAMPLER_INDEX_MODE_shift = 27, +/* SQ_CF_INDEX_NONE = 0x00, */ +/* SQ_CF_INDEX_0 = 0x01, */ +/* SQ_CF_INDEX_1 = 0x02, */ +/* SQ_CF_INVALID = 0x03, */ + SQ_VTX_WORD1_GPR = 0x00008dfc, + SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0, + SQ_VTX_WORD1_GPR__DST_GPR_shift = 0, + SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7, + SQ_ALU_WORD1_LDS_IDX_OP = 0x00008dfc, +/* SRC2_SEL_mask = 0x1ff << 0, */ +/* SRC2_SEL_shift = 0, */ +/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */ +/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */ +/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */ +/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */ +/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */ +/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */ +/* SQ_ALU_SRC_TIME_HI = 0xe3, */ +/* SQ_ALU_SRC_TIME_LO = 0xe4, */ +/* SQ_ALU_SRC_MASK_HI = 0xe5, */ +/* SQ_ALU_SRC_MASK_LO = 0xe6, */ +/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */ +/* SQ_ALU_SRC_SIMD_ID = 0xe8, */ +/* SQ_ALU_SRC_SE_ID = 0xe9, */ +/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */ +/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */ +/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */ +/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */ +/* SQ_ALU_SRC_LOOP_IDX = 0xee, */ +/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */ +/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */ +/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */ +/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */ +/* SQ_ALU_SRC_1_DBL_L = 0xf4, */ +/* SQ_ALU_SRC_1_DBL_M = 0xf5, */ +/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */ +/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */ +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ +/* SRC2_REL_bit = 1 << 9, */ +/* SRC2_CHAN_mask = 0x03 << 10, */ +/* SRC2_CHAN_shift = 10, */ +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + IDX_OFFSET_1_bit = 1 << 12, + SQ_ALU_WORD1_LDS_IDX_OP__ALU_INST_mask = 0x1f << 13, + SQ_ALU_WORD1_LDS_IDX_OP__ALU_INST_shift = 13, +/* SQ_OP3_INST_BFE_UINT = 0x04, */ +/* SQ_OP3_INST_BFE_INT = 0x05, */ +/* SQ_OP3_INST_BFI_INT = 0x06, */ +/* SQ_OP3_INST_FMA = 0x07, */ +/* SQ_OP3_INST_CNDNE_64 = 0x09, */ +/* SQ_OP3_INST_FMA_64 = 0x0a, */ +/* SQ_OP3_INST_LERP_UINT = 0x0b, */ +/* SQ_OP3_INST_BIT_ALIGN_INT = 0x0c, */ +/* SQ_OP3_INST_BYTE_ALIGN_INT = 0x0d, */ +/* SQ_OP3_INST_SAD_ACCUM_UINT = 0x0e, */ +/* SQ_OP3_INST_SAD_ACCUM_HI_UINT = 0x0f, */ +/* SQ_OP3_INST_MULADD_UINT24 = 0x10, */ +/* SQ_OP3_INST_LDS_IDX_OP = 0x11, */ +/* SQ_OP3_INST_MULADD = 0x14, */ +/* SQ_OP3_INST_MULADD_M2 = 0x15, */ +/* SQ_OP3_INST_MULADD_M4 = 0x16, */ +/* SQ_OP3_INST_MULADD_D2 = 0x17, */ +/* SQ_OP3_INST_MULADD_IEEE = 0x18, */ +/* SQ_OP3_INST_CNDE = 0x19, */ +/* SQ_OP3_INST_CNDGT = 0x1a, */ +/* SQ_OP3_INST_CNDGE = 0x1b, */ +/* SQ_OP3_INST_CNDE_INT = 0x1c, */ +/* SQ_OP3_INST_CNDGT_INT = 0x1d, */ +/* SQ_OP3_INST_CNDGE_INT = 0x1e, */ +/* SQ_OP3_INST_MUL_LIT = 0x1f, */ +/* BANK_SWIZZLE_mask = 0x07 << 18, */ +/* BANK_SWIZZLE_shift = 18, */ +/* SQ_ALU_VEC_012 = 0x00, */ +/* SQ_ALU_VEC_021 = 0x01, */ +/* SQ_ALU_VEC_120 = 0x02, */ +/* SQ_ALU_VEC_102 = 0x03, */ +/* SQ_ALU_VEC_201 = 0x04, */ +/* SQ_ALU_VEC_210 = 0x05, */ + LDS_OP_mask = 0x3f << 21, + LDS_OP_shift = 21, +/* SQ_DS_INST_ADD = 0x00, */ +/* SQ_DS_INST_SUB = 0x01, */ +/* SQ_DS_INST_RSUB = 0x02, */ +/* SQ_DS_INST_INC = 0x03, */ +/* SQ_DS_INST_DEC = 0x04, */ +/* SQ_DS_INST_MIN_INT = 0x05, */ +/* SQ_DS_INST_MAX_INT = 0x06, */ +/* SQ_DS_INST_MIN_UINT = 0x07, */ +/* SQ_DS_INST_MAX_UINT = 0x08, */ +/* SQ_DS_INST_AND = 0x09, */ +/* SQ_DS_INST_OR = 0x0a, */ +/* SQ_DS_INST_XOR = 0x0b, */ +/* SQ_DS_INST_MSKOR = 0x0c, */ +/* SQ_DS_INST_WRITE = 0x0d, */ +/* SQ_DS_INST_WRITE_REL = 0x0e, */ +/* SQ_DS_INST_WRITE2 = 0x0f, */ +/* SQ_DS_INST_CMP_STORE = 0x10, */ +/* SQ_DS_INST_CMP_STORE_SPF = 0x11, */ +/* SQ_DS_INST_BYTE_WRITE = 0x12, */ +/* SQ_DS_INST_SHORT_WRITE = 0x13, */ +/* SQ_DS_INST_ADD_RET = 0x20, */ +/* SQ_DS_INST_SUB_RET = 0x21, */ +/* SQ_DS_INST_RSUB_RET = 0x22, */ +/* SQ_DS_INST_INC_RET = 0x23, */ +/* SQ_DS_INST_DEC_RET = 0x24, */ +/* SQ_DS_INST_MIN_INT_RET = 0x25, */ +/* SQ_DS_INST_MAX_INT_RET = 0x26, */ +/* SQ_DS_INST_MIN_UINT_RET = 0x27, */ +/* SQ_DS_INST_MAX_UINT_RET = 0x28, */ +/* SQ_DS_INST_AND_RET = 0x29, */ +/* SQ_DS_INST_OR_RET = 0x2a, */ +/* SQ_DS_INST_XOR_RET = 0x2b, */ +/* SQ_DS_INST_MSKOR_RET = 0x2c, */ +/* SQ_DS_INST_XCHG_RET = 0x2d, */ +/* SQ_DS_INST_XCHG_REL_RET = 0x2e, */ +/* SQ_DS_INST_XCHG2_RET = 0x2f, */ +/* SQ_DS_INST_CMP_XCHG_RET = 0x30, */ +/* SQ_DS_INST_CMP_XCHG_SPF_RET = 0x31, */ +/* SQ_DS_INST_READ_RET = 0x32, */ +/* SQ_DS_INST_READ_REL_RET = 0x33, */ +/* SQ_DS_INST_READ2_RET = 0x34, */ +/* SQ_DS_INST_READWRITE_RET = 0x35, */ +/* SQ_DS_INST_BYTE_READ_RET = 0x36, */ +/* SQ_DS_INST_UBYTE_READ_RET = 0x37, */ +/* SQ_DS_INST_SHORT_READ_RET = 0x38, */ +/* SQ_DS_INST_USHORT_READ_RET = 0x39, */ +/* SQ_DS_INST_ATOMIC_ORDERED_ALLOC_RET = 0x3f, */ + IDX_OFFSET_0_bit = 1 << 27, + IDX_OFFSET_2_bit = 1 << 28, +/* DST_CHAN_mask = 0x03 << 29, */ +/* DST_CHAN_shift = 29, */ +/* CHAN_X = 0x00, */ +/* CHAN_Y = 0x01, */ +/* CHAN_Z = 0x02, */ +/* CHAN_W = 0x03, */ + IDX_OFFSET_3_bit = 1 << 31, + SQ_CF_ENCODING_WORD1 = 0x00008dfc, + SQ_CF_ENCODING_WORD1__ENCODING_mask = 0x03 << 28, + SQ_CF_ENCODING_WORD1__ENCODING_shift = 28, + SQ_CF_ENCODING_INST_CF = 0x00, + SQ_CF_ENCODING_INST_ALLOC_EXPORT = 0x01, + SQ_CF_ENCODING_INST_ALU0 = 0x02, + SQ_CF_ENCODING_INST_ALU1 = 0x03, + SQ_ALU_WORD0 = 0x00008dfc, +/* SRC0_SEL_mask = 0x1ff << 0, */ +/* SRC0_SEL_shift = 0, */ +/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */ +/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */ +/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */ +/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */ +/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */ +/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */ +/* SQ_ALU_SRC_TIME_HI = 0xe3, */ +/* SQ_ALU_SRC_TIME_LO = 0xe4, */ +/* SQ_ALU_SRC_MASK_HI = 0xe5, */ +/* SQ_ALU_SRC_MASK_LO = 0xe6, */ +/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */ +/* SQ_ALU_SRC_SIMD_ID = 0xe8, */ +/* SQ_ALU_SRC_SE_ID = 0xe9, */ +/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */ +/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */ +/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */ +/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */ +/* SQ_ALU_SRC_LOOP_IDX = 0xee, */ +/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */ +/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */ +/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */ +/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */ +/* SQ_ALU_SRC_1_DBL_L = 0xf4, */ +/* SQ_ALU_SRC_1_DBL_M = 0xf5, */ +/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */ +/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */ +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ +/* SRC0_REL_bit = 1 << 9, */ +/* SRC0_CHAN_mask = 0x03 << 10, */ +/* SRC0_CHAN_shift = 10, */ +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + SRC0_NEG_bit = 1 << 12, +/* SRC1_SEL_mask = 0x1ff << 13, */ +/* SRC1_SEL_shift = 13, */ +/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */ +/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */ +/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */ +/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */ +/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */ +/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */ +/* SQ_ALU_SRC_TIME_HI = 0xe3, */ +/* SQ_ALU_SRC_TIME_LO = 0xe4, */ +/* SQ_ALU_SRC_MASK_HI = 0xe5, */ +/* SQ_ALU_SRC_MASK_LO = 0xe6, */ +/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */ +/* SQ_ALU_SRC_SIMD_ID = 0xe8, */ +/* SQ_ALU_SRC_SE_ID = 0xe9, */ +/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */ +/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */ +/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */ +/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */ +/* SQ_ALU_SRC_LOOP_IDX = 0xee, */ +/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */ +/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */ +/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */ +/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */ +/* SQ_ALU_SRC_1_DBL_L = 0xf4, */ +/* SQ_ALU_SRC_1_DBL_M = 0xf5, */ +/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */ +/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */ +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ +/* SRC1_REL_bit = 1 << 22, */ +/* SRC1_CHAN_mask = 0x03 << 23, */ +/* SRC1_CHAN_shift = 23, */ +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + SRC1_NEG_bit = 1 << 25, +/* INDEX_MODE_mask = 0x07 << 26, */ +/* INDEX_MODE_shift = 26, */ +/* SQ_INDEX_AR_X = 0x00, */ +/* SQ_INDEX_LOOP = 0x04, */ +/* SQ_INDEX_GLOBAL = 0x05, */ +/* SQ_INDEX_GLOBAL_AR_X = 0x06, */ +/* PRED_SEL_mask = 0x03 << 29, */ +/* PRED_SEL_shift = 29, */ +/* SQ_PRED_SEL_OFF = 0x00, */ +/* SQ_PRED_SEL_ZERO = 0x02, */ +/* SQ_PRED_SEL_ONE = 0x03, */ +/* LAST_bit = 1 << 31, */ + SQ_MEM_RD_WORD1 = 0x00008dfc, + SQ_MEM_RD_WORD1__DST_GPR_mask = 0x7f << 0, + SQ_MEM_RD_WORD1__DST_GPR_shift = 0, + SQ_MEM_RD_WORD1__DST_REL_bit = 1 << 7, + SQ_MEM_RD_WORD1__DST_SEL_X_mask = 0x07 << 9, + SQ_MEM_RD_WORD1__DST_SEL_X_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_RD_WORD1__DST_SEL_Y_mask = 0x07 << 12, + SQ_MEM_RD_WORD1__DST_SEL_Y_shift = 12, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_RD_WORD1__DST_SEL_Z_mask = 0x07 << 15, + SQ_MEM_RD_WORD1__DST_SEL_Z_shift = 15, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_RD_WORD1__DST_SEL_W_mask = 0x07 << 18, + SQ_MEM_RD_WORD1__DST_SEL_W_shift = 18, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_MEM_RD_WORD1__DATA_FORMAT_mask = 0x3f << 22, + SQ_MEM_RD_WORD1__DATA_FORMAT_shift = 22, + SQ_MEM_RD_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28, + SQ_MEM_RD_WORD1__NUM_FORMAT_ALL_shift = 28, +/* SQ_NUM_FORMAT_NORM = 0x00, */ +/* SQ_NUM_FORMAT_INT = 0x01, */ +/* SQ_NUM_FORMAT_SCALED = 0x02, */ + SQ_MEM_RD_WORD1__FORMAT_COMP_ALL_bit = 1 << 30, + SQ_MEM_RD_WORD1__SRF_MODE_ALL_bit = 1 << 31, + SQ_LSTMP_RING_BASE = 0x00008e10, + SQ_LSTMP_RING_SIZE = 0x00008e14, + SQ_HSTMP_RING_BASE = 0x00008e18, + SQ_HSTMP_RING_SIZE = 0x00008e1c, + SX_EXPORT_BUFFER_SIZES = 0x0000900c, + COLOR_BUFFER_SIZE_mask = 0xff << 0, + COLOR_BUFFER_SIZE_shift = 0, + POSITION_BUFFER_SIZE_mask = 0xff << 8, + POSITION_BUFFER_SIZE_shift = 8, + SMX_BUFFER_SIZE_mask = 0xff << 16, + SMX_BUFFER_SIZE_shift = 16, + SX_MEMORY_EXPORT_BASE = 0x00009010, + SX_MEMORY_EXPORT_SIZE = 0x00009014, + SPI_CONFIG_CNTL = 0x00009100, + GPR_WRITE_PRIORITY_mask = 0x3ffff << 0, + GPR_WRITE_PRIORITY_shift = 0, + SPI_CONFIG_CNTL_1 = 0x0000913c, + VTX_DONE_DELAY_mask = 0x0f << 0, + VTX_DONE_DELAY_shift = 0, + X_DELAY_14_CLKS = 0x00, + X_DELAY_16_CLKS = 0x01, + X_DELAY_18_CLKS = 0x02, + X_DELAY_20_CLKS = 0x03, + X_DELAY_22_CLKS = 0x04, + X_DELAY_24_CLKS = 0x05, + X_DELAY_26_CLKS = 0x06, + X_DELAY_28_CLKS = 0x07, + X_DELAY_30_CLKS = 0x08, + X_DELAY_32_CLKS = 0x09, + X_DELAY_34_CLKS = 0x0a, + X_DELAY_4_CLKS = 0x0b, + X_DELAY_6_CLKS = 0x0c, + X_DELAY_8_CLKS = 0x0d, + X_DELAY_10_CLKS = 0x0e, + X_DELAY_12_CLKS = 0x0f, + INTERP_ONE_PRIM_PER_ROW_bit = 1 << 4, + BC_OPTIMIZE_DISABLE_bit = 1 << 5, + PC_LIMIT_ENABLE_bit = 1 << 6, + PC_LIMIT_STRICT_bit = 1 << 7, + PC_LIMIT_SIZE_mask = 0xffff << 16, + PC_LIMIT_SIZE_shift = 16, + TD_CNTL = 0x00009494, + SYNC_PHASE_SH_mask = 0x03 << 0, + SYNC_PHASE_SH_shift = 0, + PAD_STALL_EN_bit = 1 << 8, + GATHER4_FLOAT_MODE_bit = 1 << 16, + TD_STATUS = 0x00009498, + BUSY_bit = 1 << 31, + TA_CNTL_AUX = 0x00009508, + TA_CNTL_AUX__DISABLE_CUBE_WRAP_bit = 1 << 0, + DISABLE_CUBE_ANISO_bit = 1 << 1, + GETLOD_SELECT_mask = 0x03 << 2, + GETLOD_SELECT_shift = 2, + X_SAMPLER_AND_RESOURCE_CLAMPED_LOD_IN_RESOURCE= 0x00, + DISABLE_IDLE_STALL_bit = 1 << 4, + TEX_COORD_PRECISION_bit = 1 << 28, + LOD_LOG2_TRUNC_bit = 1 << 29, + DB_ZPASS_COUNT_LOW = 0x00009870, + DB_ZPASS_COUNT_HI = 0x00009874, + COUNT_HI_mask = 0x7fffffff << 0, + COUNT_HI_shift = 0, + TD_PS_BORDER_COLOR_INDEX = 0x0000a400, + INDEX_mask = 0x1f << 0, + INDEX_shift = 0, + TD_PS_BORDER_COLOR_RED = 0x0000a404, + TD_PS_BORDER_COLOR_GREEN = 0x0000a408, + TD_PS_BORDER_COLOR_BLUE = 0x0000a40c, + TD_PS_BORDER_COLOR_ALPHA = 0x0000a410, + TD_VS_BORDER_COLOR_INDEX = 0x0000a414, +/* INDEX_mask = 0x1f << 0, */ +/* INDEX_shift = 0, */ + TD_VS_BORDER_COLOR_RED = 0x0000a418, + TD_VS_BORDER_COLOR_GREEN = 0x0000a41c, + TD_VS_BORDER_COLOR_BLUE = 0x0000a420, + TD_VS_BORDER_COLOR_ALPHA = 0x0000a424, + TD_GS_BORDER_COLOR_INDEX = 0x0000a428, +/* INDEX_mask = 0x1f << 0, */ +/* INDEX_shift = 0, */ + TD_GS_BORDER_COLOR_RED = 0x0000a42c, + TD_GS_BORDER_COLOR_GREEN = 0x0000a430, + TD_GS_BORDER_COLOR_BLUE = 0x0000a434, + TD_GS_BORDER_COLOR_ALPHA = 0x0000a438, + TD_HS_BORDER_COLOR_INDEX = 0x0000a43c, +/* INDEX_mask = 0x1f << 0, */ +/* INDEX_shift = 0, */ + TD_HS_BORDER_COLOR_RED = 0x0000a440, + TD_HS_BORDER_COLOR_GREEN = 0x0000a444, + TD_HS_BORDER_COLOR_BLUE = 0x0000a448, + TD_HS_BORDER_COLOR_ALPHA = 0x0000a44c, + TD_LS_BORDER_COLOR_INDEX = 0x0000a450, +/* INDEX_mask = 0x1f << 0, */ +/* INDEX_shift = 0, */ + TD_LS_BORDER_COLOR_RED = 0x0000a454, + TD_LS_BORDER_COLOR_GREEN = 0x0000a458, + TD_LS_BORDER_COLOR_BLUE = 0x0000a45c, + TD_LS_BORDER_COLOR_ALPHA = 0x0000a460, + TD_CS_BORDER_COLOR_INDEX = 0x0000a464, +/* INDEX_mask = 0x1f << 0, */ +/* INDEX_shift = 0, */ + TD_CS_BORDER_COLOR_RED = 0x0000a468, + TD_CS_BORDER_COLOR_GREEN = 0x0000a46c, + TD_CS_BORDER_COLOR_BLUE = 0x0000a470, + TD_CS_BORDER_COLOR_ALPHA = 0x0000a474, + DB_RENDER_CONTROL = 0x00028000, + DEPTH_CLEAR_ENABLE_bit = 1 << 0, + STENCIL_CLEAR_ENABLE_bit = 1 << 1, + DEPTH_COPY_bit = 1 << 2, + STENCIL_COPY_bit = 1 << 3, + RESUMMARIZE_ENABLE_bit = 1 << 4, + STENCIL_COMPRESS_DISABLE_bit = 1 << 5, + DEPTH_COMPRESS_DISABLE_bit = 1 << 6, + COPY_CENTROID_bit = 1 << 7, + COPY_SAMPLE_mask = 0x07 << 8, + COPY_SAMPLE_shift = 8, + COLOR_DISABLE_bit = 1 << 12, + DB_COUNT_CONTROL = 0x00028004, + ZPASS_INCREMENT_DISABLE_bit = 1 << 0, + PERFECT_ZPASS_COUNTS_bit = 1 << 1, + DB_DEPTH_VIEW = 0x00028008, + SLICE_START_mask = 0x7ff << 0, + SLICE_START_shift = 0, + SLICE_MAX_mask = 0x7ff << 13, + SLICE_MAX_shift = 13, + Z_READ_ONLY_bit = 1 << 24, + STENCIL_READ_ONLY_bit = 1 << 25, + DB_RENDER_OVERRIDE = 0x0002800c, + FORCE_HIZ_ENABLE_mask = 0x03 << 0, + FORCE_HIZ_ENABLE_shift = 0, + FORCE_OFF = 0x00, + FORCE_ENABLE = 0x01, + FORCE_DISABLE = 0x02, + FORCE_RESERVED = 0x03, + FORCE_HIS_ENABLE0_mask = 0x03 << 2, + FORCE_HIS_ENABLE0_shift = 2, +/* FORCE_OFF = 0x00, */ +/* FORCE_ENABLE = 0x01, */ +/* FORCE_DISABLE = 0x02, */ +/* FORCE_RESERVED = 0x03, */ + FORCE_HIS_ENABLE1_mask = 0x03 << 4, + FORCE_HIS_ENABLE1_shift = 4, +/* FORCE_OFF = 0x00, */ +/* FORCE_ENABLE = 0x01, */ +/* FORCE_DISABLE = 0x02, */ +/* FORCE_RESERVED = 0x03, */ + FORCE_SHADER_Z_ORDER_bit = 1 << 6, + FAST_Z_DISABLE_bit = 1 << 7, + FAST_STENCIL_DISABLE_bit = 1 << 8, + NOOP_CULL_DISABLE_bit = 1 << 9, + FORCE_COLOR_KILL_bit = 1 << 10, + FORCE_Z_READ_bit = 1 << 11, + FORCE_STENCIL_READ_bit = 1 << 12, + FORCE_FULL_Z_RANGE_mask = 0x03 << 13, + FORCE_FULL_Z_RANGE_shift = 13, +/* FORCE_OFF = 0x00, */ +/* FORCE_ENABLE = 0x01, */ +/* FORCE_DISABLE = 0x02, */ +/* FORCE_RESERVED = 0x03, */ + FORCE_QC_SMASK_CONFLICT_bit = 1 << 15, + DISABLE_VIEWPORT_CLAMP_bit = 1 << 16, + IGNORE_SC_ZRANGE_bit = 1 << 17, + DISABLE_FULLY_COVERED_bit = 1 << 18, + FORCE_Z_LIMIT_SUMM_mask = 0x03 << 19, + FORCE_Z_LIMIT_SUMM_shift = 19, + FORCE_SUMM_OFF = 0x00, + FORCE_SUMM_MINZ = 0x01, + FORCE_SUMM_MAXZ = 0x02, + FORCE_SUMM_BOTH = 0x03, + MAX_TILES_IN_DTT_mask = 0x1f << 21, + MAX_TILES_IN_DTT_shift = 21, + DISABLE_PIXEL_RATE_TILES_bit = 1 << 26, + FORCE_Z_DIRTY_bit = 1 << 27, + FORCE_STENCIL_DIRTY_bit = 1 << 28, + FORCE_Z_VALID_bit = 1 << 29, + FORCE_STENCIL_VALID_bit = 1 << 30, + PRESERVE_COMPRESSION_bit = 1 << 31, + DB_RENDER_OVERRIDE2 = 0x00028010, + PARTIAL_SQUAD_LAUNCH_CONTROL_mask = 0x03 << 0, + PARTIAL_SQUAD_LAUNCH_CONTROL_shift = 0, + PSLC_AUTO = 0x00, + PSLC_ON_HANG_ONLY = 0x01, + PSLC_ASAP = 0x02, + PSLC_COUNTDOWN = 0x03, + PARTIAL_SQUAD_LAUNCH_COUNTDOWN_mask = 0x07 << 2, + PARTIAL_SQUAD_LAUNCH_COUNTDOWN_shift = 2, + DISABLE_ZMASK_EXPCLEAR_OPTIMIZATIO_bit = 1 << 5, + DB_HTILE_DATA_BASE = 0x00028014, + DB_STENCIL_CLEAR = 0x00028028, + DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0, + DB_STENCIL_CLEAR__CLEAR_shift = 0, + MIN_mask = 0xff << 16, + MIN_shift = 16, + DB_DEPTH_CLEAR = 0x0002802c, + PA_SC_SCREEN_SCISSOR_TL = 0x00028030, + PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0xffff << 0, + PA_SC_SCREEN_SCISSOR_TL__TL_X_shift = 0, + PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0xffff << 16, + PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift = 16, + PA_SC_SCREEN_SCISSOR_BR = 0x00028034, + PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0xffff << 0, + PA_SC_SCREEN_SCISSOR_BR__BR_X_shift = 0, + PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0xffff << 16, + PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift = 16, + DB_Z_INFO = 0x00028040, + DB_Z_INFO__FORMAT_mask = 0x03 << 0, + DB_Z_INFO__FORMAT_shift = 0, + Z_INVALID = 0x00, + Z_16 = 0x01, + Z_24 = 0x02, + Z_32_FLOAT = 0x03, + DB_Z_INFO__ARRAY_MODE_mask = 0x0f << 4, + DB_Z_INFO__ARRAY_MODE_shift = 4, + ARRAY_LINEAR_GENERAL = 0x00, + ARRAY_LINEAR_ALIGNED = 0x01, + ARRAY_1D_TILED_THIN1 = 0x02, + ARRAY_2D_TILED_THIN1 = 0x04, + DB_Z_INFO__TILE_SPLIT_mask = 0x07 << 8, + DB_Z_INFO__TILE_SPLIT_shift = 8, + ADDR_SURF_TILE_SPLIT_64B = 0x00, + ADDR_SURF_TILE_SPLIT_128B = 0x01, + ADDR_SURF_TILE_SPLIT_256B = 0x02, + ADDR_SURF_TILE_SPLIT_512B = 0x03, + ADDR_SURF_TILE_SPLIT_1KB = 0x04, + ADDR_SURF_TILE_SPLIT_2KB = 0x05, + ADDR_SURF_TILE_SPLIT_4KB = 0x06, + DB_Z_INFO__NUM_BANKS_mask = 0x03 << 12, + DB_Z_INFO__NUM_BANKS_shift = 12, + ADDR_SURF_2_BANK = 0x00, + ADDR_SURF_4_BANK = 0x01, + ADDR_SURF_8_BANK = 0x02, + ADDR_SURF_16_BANK = 0x03, + DB_Z_INFO__BANK_WIDTH_mask = 0x03 << 16, + DB_Z_INFO__BANK_WIDTH_shift = 16, + ADDR_SURF_BANK_WIDTH_1 = 0x00, + ADDR_SURF_BANK_WIDTH_2 = 0x01, + ADDR_SURF_BANK_WIDTH_4 = 0x02, + ADDR_SURF_BANK_WIDTH_8 = 0x03, + DB_Z_INFO__BANK_HEIGHT_mask = 0x03 << 20, + DB_Z_INFO__BANK_HEIGHT_shift = 20, + ADDR_SURF_BANK_HEIGHT_1 = 0x00, + ADDR_SURF_BANK_HEIGHT_2 = 0x01, + ADDR_SURF_BANK_HEIGHT_4 = 0x02, + ADDR_SURF_BANK_HEIGHT_8 = 0x03, + DB_Z_INFO__MACRO_TILE_ASPECT_mask = 0x03 << 24, + DB_Z_INFO__MACRO_TILE_ASPECT_shift = 24, + ADDR_SURF_MACRO_ASPECT_1 = 0x00, + ADDR_SURF_MACRO_ASPECT_2 = 0x01, + ADDR_SURF_MACRO_ASPECT_4 = 0x02, + ADDR_SURF_MACRO_ASPECT_8 = 0x03, + ALLOW_EXPCLEAR_bit = 1 << 27, + READ_SIZE_bit = 1 << 28, + TILE_SURFACE_ENABLE_bit = 1 << 29, + DB_Z_INFO__TILE_COMPACT_bit = 1 << 30, + ZRANGE_PRECISION_bit = 1 << 31, + DB_STENCIL_INFO = 0x00028044, + DB_STENCIL_INFO__FORMAT_bit = 1 << 0, + DB_STENCIL_INFO__TILE_SPLIT_mask = 0x07 << 8, + DB_STENCIL_INFO__TILE_SPLIT_shift = 8, +/* ADDR_SURF_TILE_SPLIT_64B = 0x00, */ +/* ADDR_SURF_TILE_SPLIT_128B = 0x01, */ +/* ADDR_SURF_TILE_SPLIT_256B = 0x02, */ +/* ADDR_SURF_TILE_SPLIT_512B = 0x03, */ +/* ADDR_SURF_TILE_SPLIT_1KB = 0x04, */ +/* ADDR_SURF_TILE_SPLIT_2KB = 0x05, */ +/* ADDR_SURF_TILE_SPLIT_4KB = 0x06, */ + DB_Z_READ_BASE = 0x00028048, + DB_STENCIL_READ_BASE = 0x0002804c, + DB_Z_WRITE_BASE = 0x00028050, + DB_STENCIL_WRITE_BASE = 0x00028054, + DB_DEPTH_SIZE = 0x00028058, + PITCH_TILE_MAX_mask = 0x7ff << 0, + PITCH_TILE_MAX_shift = 0, + HEIGHT_TILE_MAX_mask = 0x7ff << 11, + HEIGHT_TILE_MAX_shift = 11, + DB_DEPTH_SLICE = 0x0002805c, + SLICE_TILE_MAX_mask = 0x3fffff << 0, + SLICE_TILE_MAX_shift = 0, + SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140, + SQ_ALU_CONST_BUFFER_SIZE_PS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift = 0, + SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180, + SQ_ALU_CONST_BUFFER_SIZE_VS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift = 0, + SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0, + SQ_ALU_CONST_BUFFER_SIZE_GS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift = 0, + PA_SC_WINDOW_OFFSET = 0x00028200, + WINDOW_X_OFFSET_mask = 0xffff << 0, + WINDOW_X_OFFSET_shift = 0, + WINDOW_Y_OFFSET_mask = 0xffff << 16, + WINDOW_Y_OFFSET_shift = 16, + PA_SC_WINDOW_SCISSOR_TL = 0x00028204, + PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x7fff << 0, + PA_SC_WINDOW_SCISSOR_TL__TL_X_shift = 0, + PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x7fff << 16, + PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift = 16, + WINDOW_OFFSET_DISABLE_bit = 1 << 31, + PA_SC_WINDOW_SCISSOR_BR = 0x00028208, + PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x7fff << 0, + PA_SC_WINDOW_SCISSOR_BR__BR_X_shift = 0, + PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x7fff << 16, + PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift = 16, + PA_SC_CLIPRECT_RULE = 0x0002820c, + CLIP_RULE_mask = 0xffff << 0, + CLIP_RULE_shift = 0, + PA_SC_CLIPRECT_0_TL = 0x00028210, + PA_SC_CLIPRECT_0_TL_num = 4, + PA_SC_CLIPRECT_0_TL_offset = 8, + PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x7fff << 0, + PA_SC_CLIPRECT_0_TL__TL_X_shift = 0, + PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x7fff << 16, + PA_SC_CLIPRECT_0_TL__TL_Y_shift = 16, + PA_SC_CLIPRECT_0_BR = 0x00028214, + PA_SC_CLIPRECT_0_BR_num = 4, + PA_SC_CLIPRECT_0_BR_offset = 8, + PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x7fff << 0, + PA_SC_CLIPRECT_0_BR__BR_X_shift = 0, + PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x7fff << 16, + PA_SC_CLIPRECT_0_BR__BR_Y_shift = 16, + PA_SC_EDGERULE = 0x00028230, + ER_TRI_mask = 0x0f << 0, + ER_TRI_shift = 0, + ER_POINT_mask = 0x0f << 4, + ER_POINT_shift = 4, + ER_RECT_mask = 0x0f << 8, + ER_RECT_shift = 8, + ER_LINE_LR_mask = 0x3f << 12, + ER_LINE_LR_shift = 12, + ER_LINE_RL_mask = 0x3f << 18, + ER_LINE_RL_shift = 18, + ER_LINE_TB_mask = 0x0f << 24, + ER_LINE_TB_shift = 24, + ER_LINE_BT_mask = 0x0f << 28, + ER_LINE_BT_shift = 28, + PA_SU_HARDWARE_SCREEN_OFFSET = 0x00028234, + HW_SCREEN_OFFSET_X_mask = 0x1f << 0, + HW_SCREEN_OFFSET_X_shift = 0, + HW_SCREEN_OFFSET_Y_mask = 0x1f << 8, + HW_SCREEN_OFFSET_Y_shift = 8, + CB_TARGET_MASK = 0x00028238, + TARGET0_ENABLE_mask = 0x0f << 0, + TARGET0_ENABLE_shift = 0, + TARGET1_ENABLE_mask = 0x0f << 4, + TARGET1_ENABLE_shift = 4, + TARGET2_ENABLE_mask = 0x0f << 8, + TARGET2_ENABLE_shift = 8, + TARGET3_ENABLE_mask = 0x0f << 12, + TARGET3_ENABLE_shift = 12, + TARGET4_ENABLE_mask = 0x0f << 16, + TARGET4_ENABLE_shift = 16, + TARGET5_ENABLE_mask = 0x0f << 20, + TARGET5_ENABLE_shift = 20, + TARGET6_ENABLE_mask = 0x0f << 24, + TARGET6_ENABLE_shift = 24, + TARGET7_ENABLE_mask = 0x0f << 28, + TARGET7_ENABLE_shift = 28, + CB_SHADER_MASK = 0x0002823c, + OUTPUT0_ENABLE_mask = 0x0f << 0, + OUTPUT0_ENABLE_shift = 0, + OUTPUT1_ENABLE_mask = 0x0f << 4, + OUTPUT1_ENABLE_shift = 4, + OUTPUT2_ENABLE_mask = 0x0f << 8, + OUTPUT2_ENABLE_shift = 8, + OUTPUT3_ENABLE_mask = 0x0f << 12, + OUTPUT3_ENABLE_shift = 12, + OUTPUT4_ENABLE_mask = 0x0f << 16, + OUTPUT4_ENABLE_shift = 16, + OUTPUT5_ENABLE_mask = 0x0f << 20, + OUTPUT5_ENABLE_shift = 20, + OUTPUT6_ENABLE_mask = 0x0f << 24, + OUTPUT6_ENABLE_shift = 24, + OUTPUT7_ENABLE_mask = 0x0f << 28, + OUTPUT7_ENABLE_shift = 28, + PA_SC_GENERIC_SCISSOR_TL = 0x00028240, + PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x7fff << 0, + PA_SC_GENERIC_SCISSOR_TL__TL_X_shift = 0, + PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x7fff << 16, + PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift = 16, +/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ + PA_SC_GENERIC_SCISSOR_BR = 0x00028244, + PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x7fff << 0, + PA_SC_GENERIC_SCISSOR_BR__BR_X_shift = 0, + PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x7fff << 16, + PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift = 16, + PA_SC_VPORT_SCISSOR_0_TL = 0x00028250, + PA_SC_VPORT_SCISSOR_0_TL_num = 16, + PA_SC_VPORT_SCISSOR_0_TL_offset = 8, + PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x7fff << 0, + PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift = 0, + PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x7fff << 16, + PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift = 16, +/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ + PA_SC_VPORT_SCISSOR_0_BR = 0x00028254, + PA_SC_VPORT_SCISSOR_0_BR_num = 16, + PA_SC_VPORT_SCISSOR_0_BR_offset = 8, + PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x7fff << 0, + PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift = 0, + PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x7fff << 16, + PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift = 16, + PA_SC_VPORT_ZMIN_0 = 0x000282d0, + PA_SC_VPORT_ZMIN_0_num = 16, + PA_SC_VPORT_ZMIN_0_offset = 8, + PA_SC_VPORT_ZMAX_0 = 0x000282d4, + PA_SC_VPORT_ZMAX_0_num = 16, + PA_SC_VPORT_ZMAX_0_offset = 8, + SX_MISC = 0x00028350, + MULTIPASS_bit = 1 << 0, + SQ_VTX_SEMANTIC_0 = 0x00028380, + SQ_VTX_SEMANTIC_0_num = 32, +/* SEMANTIC_ID_mask = 0xff << 0, */ +/* SEMANTIC_ID_shift = 0, */ + VGT_MAX_VTX_INDX = 0x00028400, + VGT_MIN_VTX_INDX = 0x00028404, + VGT_INDX_OFFSET = 0x00028408, + VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c, + SX_ALPHA_TEST_CONTROL = 0x00028410, + ALPHA_FUNC_mask = 0x07 << 0, + ALPHA_FUNC_shift = 0, + REF_NEVER = 0x00, + REF_LESS = 0x01, + REF_EQUAL = 0x02, + REF_LEQUAL = 0x03, + REF_GREATER = 0x04, + REF_NOTEQUAL = 0x05, + REF_GEQUAL = 0x06, + REF_ALWAYS = 0x07, + ALPHA_TEST_ENABLE_bit = 1 << 3, + ALPHA_TEST_BYPASS_bit = 1 << 8, + CB_BLEND_RED = 0x00028414, + CB_BLEND_GREEN = 0x00028418, + CB_BLEND_BLUE = 0x0002841c, + CB_BLEND_ALPHA = 0x00028420, + DB_STENCILREFMASK = 0x00028430, + STENCILREF_mask = 0xff << 0, + STENCILREF_shift = 0, + STENCILMASK_mask = 0xff << 8, + STENCILMASK_shift = 8, + STENCILWRITEMASK_mask = 0xff << 16, + STENCILWRITEMASK_shift = 16, + DB_STENCILREFMASK_BF = 0x00028434, + STENCILREF_BF_mask = 0xff << 0, + STENCILREF_BF_shift = 0, + STENCILMASK_BF_mask = 0xff << 8, + STENCILMASK_BF_shift = 8, + STENCILWRITEMASK_BF_mask = 0xff << 16, + STENCILWRITEMASK_BF_shift = 16, + SX_ALPHA_REF = 0x00028438, + PA_CL_VPORT_XSCALE_0 = 0x0002843c, + PA_CL_VPORT_XSCALE_0_num = 16, + PA_CL_VPORT_XSCALE_0_offset = 24, + PA_CL_VPORT_XOFFSET_0 = 0x00028440, + PA_CL_VPORT_XOFFSET_0_num = 16, + PA_CL_VPORT_XOFFSET_0_offset = 24, + PA_CL_VPORT_YSCALE_0 = 0x00028444, + PA_CL_VPORT_YSCALE_0_num = 16, + PA_CL_VPORT_YSCALE_0_offset = 24, + PA_CL_VPORT_YOFFSET_0 = 0x00028448, + PA_CL_VPORT_YOFFSET_0_num = 16, + PA_CL_VPORT_YOFFSET_0_offset = 24, + PA_CL_VPORT_ZSCALE_0 = 0x0002844c, + PA_CL_VPORT_ZSCALE_0_num = 16, + PA_CL_VPORT_ZSCALE_0_offset = 24, + PA_CL_VPORT_ZOFFSET_0 = 0x00028450, + PA_CL_VPORT_ZOFFSET_0_num = 16, + PA_CL_VPORT_ZOFFSET_0_offset = 24, + PA_CL_UCP_0_X = 0x000285bc, + PA_CL_UCP_0_X_num = 6, + PA_CL_UCP_0_X_offset = 16, + PA_CL_UCP_0_Y = 0x000285c0, + PA_CL_UCP_0_Y_num = 6, + PA_CL_UCP_0_Y_offset = 16, + PA_CL_UCP_0_Z = 0x000285c4, + PA_CL_UCP_0_Z_num = 6, + PA_CL_UCP_0_Z_offset = 16, + PA_CL_UCP_0_W = 0x000285c8, + PA_CL_UCP_0_W_num = 6, + PA_CL_UCP_0_W_offset = 16, + SPI_VS_OUT_ID_0 = 0x0002861c, + SPI_VS_OUT_ID_0_num = 10, + SEMANTIC_0_mask = 0xff << 0, + SEMANTIC_0_shift = 0, + SEMANTIC_1_mask = 0xff << 8, + SEMANTIC_1_shift = 8, + SEMANTIC_2_mask = 0xff << 16, + SEMANTIC_2_shift = 16, + SEMANTIC_3_mask = 0xff << 24, + SEMANTIC_3_shift = 24, + SPI_PS_INPUT_CNTL_0 = 0x00028644, + SPI_PS_INPUT_CNTL_0_num = 32, + SEMANTIC_mask = 0xff << 0, + SEMANTIC_shift = 0, + DEFAULT_VAL_mask = 0x03 << 8, + DEFAULT_VAL_shift = 8, + X_0_0F = 0x00, + FLAT_SHADE_bit = 1 << 10, + CYL_WRAP_mask = 0x0f << 13, + CYL_WRAP_shift = 13, + PT_SPRITE_TEX_bit = 1 << 17, + SPI_VS_OUT_CONFIG = 0x000286c4, + VS_PER_COMPONENT_bit = 1 << 0, + VS_EXPORT_COUNT_mask = 0x1f << 1, + VS_EXPORT_COUNT_shift = 1, + VS_EXPORTS_FOG_bit = 1 << 8, + VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9, + VS_OUT_FOG_VEC_ADDR_shift = 9, + SPI_PS_IN_CONTROL_0 = 0x000286cc, + NUM_INTERP_mask = 0x3f << 0, + NUM_INTERP_shift = 0, + POSITION_ENA_bit = 1 << 8, + POSITION_CENTROID_bit = 1 << 9, + POSITION_ADDR_mask = 0x1f << 10, + POSITION_ADDR_shift = 10, + PARAM_GEN_mask = 0x0f << 15, + PARAM_GEN_shift = 15, + PERSP_GRADIENT_ENA_bit = 1 << 28, + LINEAR_GRADIENT_ENA_bit = 1 << 29, + POSITION_SAMPLE_bit = 1 << 30, + SPI_PS_IN_CONTROL_1 = 0x000286d0, + FRONT_FACE_ENA_bit = 1 << 8, + FRONT_FACE_ALL_BITS_bit = 1 << 11, + FRONT_FACE_ADDR_mask = 0x1f << 12, + FRONT_FACE_ADDR_shift = 12, + FOG_ADDR_mask = 0x7f << 17, + FOG_ADDR_shift = 17, + FIXED_PT_POSITION_ENA_bit = 1 << 24, + FIXED_PT_POSITION_ADDR_mask = 0x1f << 25, + FIXED_PT_POSITION_ADDR_shift = 25, + POSITION_ULC_bit = 1 << 30, + SPI_INTERP_CONTROL_0 = 0x000286d4, + FLAT_SHADE_ENA_bit = 1 << 0, + PNT_SPRITE_ENA_bit = 1 << 1, + PNT_SPRITE_OVRD_X_mask = 0x07 << 2, + PNT_SPRITE_OVRD_X_shift = 2, + SPI_PNT_SPRITE_SEL_0 = 0x00, + SPI_PNT_SPRITE_SEL_1 = 0x01, + SPI_PNT_SPRITE_SEL_S = 0x02, + SPI_PNT_SPRITE_SEL_T = 0x03, + SPI_PNT_SPRITE_SEL_NONE = 0x04, + PNT_SPRITE_OVRD_Y_mask = 0x07 << 5, + PNT_SPRITE_OVRD_Y_shift = 5, +/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ +/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ +/* SPI_PNT_SPRITE_SEL_S = 0x02, */ +/* SPI_PNT_SPRITE_SEL_T = 0x03, */ +/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ + PNT_SPRITE_OVRD_Z_mask = 0x07 << 8, + PNT_SPRITE_OVRD_Z_shift = 8, +/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ +/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ +/* SPI_PNT_SPRITE_SEL_S = 0x02, */ +/* SPI_PNT_SPRITE_SEL_T = 0x03, */ +/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ + PNT_SPRITE_OVRD_W_mask = 0x07 << 11, + PNT_SPRITE_OVRD_W_shift = 11, +/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ +/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ +/* SPI_PNT_SPRITE_SEL_S = 0x02, */ +/* SPI_PNT_SPRITE_SEL_T = 0x03, */ +/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ + PNT_SPRITE_TOP_1_bit = 1 << 14, + SPI_INPUT_Z = 0x000286d8, + PROVIDE_Z_TO_SPI_bit = 1 << 0, + SPI_FOG_CNTL = 0x000286dc, + PASS_FOG_THROUGH_PS_bit = 1 << 0, + SPI_BARYC_CNTL = 0x000286e0, + PERSP_CENTER_ENA_mask = 0x03 << 0, + PERSP_CENTER_ENA_shift = 0, + X_OFF = 0x00, + PERSP_CENTER_ENA__X_ON_AT_CENTER = 0x01, + PERSP_CENTER_ENA__X_ON_AT_CENTROID = 0x02, + PERSP_CENTROID_ENA_mask = 0x03 << 4, + PERSP_CENTROID_ENA_shift = 4, +/* X_OFF = 0x00, */ + PERSP_CENTROID_ENA__X_ON_AT_CENTROID = 0x01, + PERSP_CENTROID_ENA__X_ON_AT_CENTER = 0x02, + PERSP_SAMPLE_ENA_mask = 0x03 << 8, + PERSP_SAMPLE_ENA_shift = 8, +/* X_OFF = 0x00, */ + PERSP_PULL_MODEL_ENA_mask = 0x03 << 12, + PERSP_PULL_MODEL_ENA_shift = 12, +/* X_OFF = 0x00, */ + LINEAR_CENTER_ENA_mask = 0x03 << 16, + LINEAR_CENTER_ENA_shift = 16, +/* X_OFF = 0x00, */ + LINEAR_CENTER_ENA__X_ON_AT_CENTER = 0x01, + LINEAR_CENTER_ENA__X_ON_AT_CENTROID = 0x02, + LINEAR_CENTROID_ENA_mask = 0x03 << 20, + LINEAR_CENTROID_ENA_shift = 20, +/* X_OFF = 0x00, */ + LINEAR_CENTROID_ENA__X_ON_AT_CENTROID = 0x01, + LINEAR_CENTROID_ENA__X_ON_AT_CENTER = 0x02, + LINEAR_SAMPLE_ENA_mask = 0x03 << 24, + LINEAR_SAMPLE_ENA_shift = 24, +/* X_OFF = 0x00, */ + SPI_PS_IN_CONTROL_2 = 0x000286e4, + LINE_STIPPLE_TEX_ADDR_mask = 0xff << 0, + LINE_STIPPLE_TEX_ADDR_shift = 0, + LINE_STIPPLE_TEX_ENA_bit = 1 << 8, + CB_BLEND0_CONTROL = 0x00028780, + CB_BLEND0_CONTROL_num = 8, + COLOR_SRCBLEND_mask = 0x1f << 0, + COLOR_SRCBLEND_shift = 0, + BLEND_ZERO = 0x00, + BLEND_ONE = 0x01, + BLEND_SRC_COLOR = 0x02, + BLEND_ONE_MINUS_SRC_COLOR = 0x03, + BLEND_SRC_ALPHA = 0x04, + BLEND_ONE_MINUS_SRC_ALPHA = 0x05, + BLEND_DST_ALPHA = 0x06, + BLEND_ONE_MINUS_DST_ALPHA = 0x07, + BLEND_DST_COLOR = 0x08, + BLEND_ONE_MINUS_DST_COLOR = 0x09, + BLEND_SRC_ALPHA_SATURATE = 0x0a, + BLEND_BOTH_SRC_ALPHA = 0x0b, + BLEND_BOTH_INV_SRC_ALPHA = 0x0c, + BLEND_CONSTANT_COLOR = 0x0d, + BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, + BLEND_SRC1_COLOR = 0x0f, + BLEND_INV_SRC1_COLOR = 0x10, + BLEND_SRC1_ALPHA = 0x11, + BLEND_INV_SRC1_ALPHA = 0x12, + BLEND_CONSTANT_ALPHA = 0x13, + BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, + COLOR_COMB_FCN_mask = 0x07 << 5, + COLOR_COMB_FCN_shift = 5, + COMB_DST_PLUS_SRC = 0x00, + COMB_SRC_MINUS_DST = 0x01, + COMB_MIN_DST_SRC = 0x02, + COMB_MAX_DST_SRC = 0x03, + COMB_DST_MINUS_SRC = 0x04, + COLOR_DESTBLEND_mask = 0x1f << 8, + COLOR_DESTBLEND_shift = 8, +/* BLEND_ZERO = 0x00, */ +/* BLEND_ONE = 0x01, */ +/* BLEND_SRC_COLOR = 0x02, */ +/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ +/* BLEND_SRC_ALPHA = 0x04, */ +/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ +/* BLEND_DST_ALPHA = 0x06, */ +/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ +/* BLEND_DST_COLOR = 0x08, */ +/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ +/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ +/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ +/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ +/* BLEND_CONSTANT_COLOR = 0x0d, */ +/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ +/* BLEND_SRC1_COLOR = 0x0f, */ +/* BLEND_INV_SRC1_COLOR = 0x10, */ +/* BLEND_SRC1_ALPHA = 0x11, */ +/* BLEND_INV_SRC1_ALPHA = 0x12, */ +/* BLEND_CONSTANT_ALPHA = 0x13, */ +/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ + ALPHA_SRCBLEND_mask = 0x1f << 16, + ALPHA_SRCBLEND_shift = 16, +/* BLEND_ZERO = 0x00, */ +/* BLEND_ONE = 0x01, */ +/* BLEND_SRC_COLOR = 0x02, */ +/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ +/* BLEND_SRC_ALPHA = 0x04, */ +/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ +/* BLEND_DST_ALPHA = 0x06, */ +/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ +/* BLEND_DST_COLOR = 0x08, */ +/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ +/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ +/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ +/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ +/* BLEND_CONSTANT_COLOR = 0x0d, */ +/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ +/* BLEND_SRC1_COLOR = 0x0f, */ +/* BLEND_INV_SRC1_COLOR = 0x10, */ +/* BLEND_SRC1_ALPHA = 0x11, */ +/* BLEND_INV_SRC1_ALPHA = 0x12, */ +/* BLEND_CONSTANT_ALPHA = 0x13, */ +/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ + ALPHA_COMB_FCN_mask = 0x07 << 21, + ALPHA_COMB_FCN_shift = 21, +/* COMB_DST_PLUS_SRC = 0x00, */ +/* COMB_SRC_MINUS_DST = 0x01, */ +/* COMB_MIN_DST_SRC = 0x02, */ +/* COMB_MAX_DST_SRC = 0x03, */ +/* COMB_DST_MINUS_SRC = 0x04, */ + ALPHA_DESTBLEND_mask = 0x1f << 24, + ALPHA_DESTBLEND_shift = 24, +/* BLEND_ZERO = 0x00, */ +/* BLEND_ONE = 0x01, */ +/* BLEND_SRC_COLOR = 0x02, */ +/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ +/* BLEND_SRC_ALPHA = 0x04, */ +/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ +/* BLEND_DST_ALPHA = 0x06, */ +/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ +/* BLEND_DST_COLOR = 0x08, */ +/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ +/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ +/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ +/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ +/* BLEND_CONSTANT_COLOR = 0x0d, */ +/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ +/* BLEND_SRC1_COLOR = 0x0f, */ +/* BLEND_INV_SRC1_COLOR = 0x10, */ +/* BLEND_SRC1_ALPHA = 0x11, */ +/* BLEND_INV_SRC1_ALPHA = 0x12, */ +/* BLEND_CONSTANT_ALPHA = 0x13, */ +/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ + SEPARATE_ALPHA_BLEND_bit = 1 << 29, + CB_BLEND0_CONTROL__ENABLE_bit = 1 << 30, + PA_CL_POINT_X_RAD = 0x000287d4, + PA_CL_POINT_Y_RAD = 0x000287d8, + PA_CL_POINT_SIZE = 0x000287dc, + PA_CL_POINT_CULL_RAD = 0x000287e0, + VGT_DMA_BASE_HI = 0x000287e4, + VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0, + VGT_DMA_BASE_HI__BASE_ADDR_shift = 0, + VGT_DMA_BASE = 0x000287e8, + VGT_DRAW_INITIATOR = 0x000287f0, + SOURCE_SELECT_mask = 0x03 << 0, + SOURCE_SELECT_shift = 0, + DI_SRC_SEL_DMA = 0x00, + DI_SRC_SEL_IMMEDIATE = 0x01, + DI_SRC_SEL_AUTO_INDEX = 0x02, + DI_SRC_SEL_RESERVED = 0x03, + MAJOR_MODE_mask = 0x03 << 2, + MAJOR_MODE_shift = 2, + DI_MAJOR_MODE_0 = 0x00, + DI_MAJOR_MODE_1 = 0x01, + NOT_EOP_bit = 1 << 5, + USE_OPAQUE_bit = 1 << 6, + VGT_IMMED_DATA = 0x000287f4, + VGT_EVENT_ADDRESS_REG = 0x000287f8, + ADDRESS_LOW_mask = 0xfffffff << 0, + ADDRESS_LOW_shift = 0, + DB_DEPTH_CONTROL = 0x00028800, + STENCIL_ENABLE_bit = 1 << 0, + Z_ENABLE_bit = 1 << 1, + Z_WRITE_ENABLE_bit = 1 << 2, + ZFUNC_mask = 0x07 << 4, + ZFUNC_shift = 4, + FRAG_NEVER = 0x00, + FRAG_LESS = 0x01, + FRAG_EQUAL = 0x02, + FRAG_LEQUAL = 0x03, + FRAG_GREATER = 0x04, + FRAG_NOTEQUAL = 0x05, + FRAG_GEQUAL = 0x06, + FRAG_ALWAYS = 0x07, + BACKFACE_ENABLE_bit = 1 << 7, + STENCILFUNC_mask = 0x07 << 8, + STENCILFUNC_shift = 8, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + STENCILFAIL_mask = 0x07 << 11, + STENCILFAIL_shift = 11, + STENCIL_KEEP = 0x00, + STENCIL_ZERO = 0x01, + STENCIL_REPLACE = 0x02, + STENCIL_INCR_CLAMP = 0x03, + STENCIL_DECR_CLAMP = 0x04, + STENCIL_INVERT = 0x05, + STENCIL_INCR_WRAP = 0x06, + STENCIL_DECR_WRAP = 0x07, + STENCILZPASS_mask = 0x07 << 14, + STENCILZPASS_shift = 14, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILZFAIL_mask = 0x07 << 17, + STENCILZFAIL_shift = 17, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILFUNC_BF_mask = 0x07 << 20, + STENCILFUNC_BF_shift = 20, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + STENCILFAIL_BF_mask = 0x07 << 23, + STENCILFAIL_BF_shift = 23, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILZPASS_BF_mask = 0x07 << 26, + STENCILZPASS_BF_shift = 26, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILZFAIL_BF_mask = 0x07 << 29, + STENCILZFAIL_BF_shift = 29, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + CB_COLOR_CONTROL = 0x00028808, + DEGAMMA_ENABLE_bit = 1 << 3, + CB_COLOR_CONTROL__MODE_mask = 0x07 << 4, + CB_COLOR_CONTROL__MODE_shift = 4, + CB_DISABLE = 0x00, + CB_NORMAL = 0x01, + CB_ELIMINATE_FAST_CLEAR = 0x02, + CB_RESOLVE = 0x03, + CB_DECOMPRESS = 0x04, + CB_FMASK_DECOMPRESS = 0x05, + ROP3_mask = 0xff << 16, + ROP3_shift = 16, + DB_SHADER_CONTROL = 0x0002880c, + Z_EXPORT_ENABLE_bit = 1 << 0, + STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1, + Z_ORDER_mask = 0x03 << 4, + Z_ORDER_shift = 4, + LATE_Z = 0x00, + EARLY_Z_THEN_LATE_Z = 0x01, + RE_Z = 0x02, + EARLY_Z_THEN_RE_Z = 0x03, + KILL_ENABLE_bit = 1 << 6, + COVERAGE_TO_MASK_ENABLE_bit = 1 << 7, + MASK_EXPORT_ENABLE_bit = 1 << 8, + DUAL_EXPORT_ENABLE_bit = 1 << 9, + EXEC_ON_HIER_FAIL_bit = 1 << 10, + EXEC_ON_NOOP_bit = 1 << 11, + ALPHA_TO_MASK_DISABLE_bit = 1 << 12, + DB_SOURCE_FORMAT_mask = 0x03 << 13, + DB_SOURCE_FORMAT_shift = 13, + EXPORT_DB_FULL = 0x00, + EXPORT_DB_FOUR16 = 0x01, + EXPORT_DB_TWO = 0x02, + DEPTH_BEFORE_SHADER_bit = 1 << 15, + CONSERVATIVE_Z_EXPORT_mask = 0x03 << 16, + CONSERVATIVE_Z_EXPORT_shift = 16, + EXPORT_ANY_Z = 0x00, + EXPORT_LESS_THAN_Z = 0x01, + EXPORT_GREATER_THAN_Z = 0x02, + EXPORT_RESERVED = 0x03, + PA_CL_CLIP_CNTL = 0x00028810, + UCP_ENA_0_bit = 1 << 0, + UCP_ENA_1_bit = 1 << 1, + UCP_ENA_2_bit = 1 << 2, + UCP_ENA_3_bit = 1 << 3, + UCP_ENA_4_bit = 1 << 4, + UCP_ENA_5_bit = 1 << 5, + PS_UCP_Y_SCALE_NEG_bit = 1 << 13, + PS_UCP_MODE_mask = 0x03 << 14, + PS_UCP_MODE_shift = 14, + CLIP_DISABLE_bit = 1 << 16, + UCP_CULL_ONLY_ENA_bit = 1 << 17, + BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18, + DX_CLIP_SPACE_DEF_bit = 1 << 19, + DIS_CLIP_ERR_DETECT_bit = 1 << 20, + VTX_KILL_OR_bit = 1 << 21, + DX_RASTERIZATION_KILL_bit = 1 << 22, + DX_LINEAR_ATTR_CLIP_ENA_bit = 1 << 24, + VTE_VPORT_PROVOKE_DISABLE_bit = 1 << 25, + ZCLIP_NEAR_DISABLE_bit = 1 << 26, + ZCLIP_FAR_DISABLE_bit = 1 << 27, + PA_SU_SC_MODE_CNTL = 0x00028814, + CULL_FRONT_bit = 1 << 0, + CULL_BACK_bit = 1 << 1, + FACE_bit = 1 << 2, + POLY_MODE_mask = 0x03 << 3, + POLY_MODE_shift = 3, + X_DISABLE_POLY_MODE = 0x00, + X_DUAL_MODE = 0x01, + POLYMODE_FRONT_PTYPE_mask = 0x07 << 5, + POLYMODE_FRONT_PTYPE_shift = 5, + X_DRAW_POINTS = 0x00, + X_DRAW_LINES = 0x01, + X_DRAW_TRIANGLES = 0x02, + POLYMODE_BACK_PTYPE_mask = 0x07 << 8, + POLYMODE_BACK_PTYPE_shift = 8, +/* X_DRAW_POINTS = 0x00, */ +/* X_DRAW_LINES = 0x01, */ +/* X_DRAW_TRIANGLES = 0x02, */ + POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11, + POLY_OFFSET_BACK_ENABLE_bit = 1 << 12, + POLY_OFFSET_PARA_ENABLE_bit = 1 << 13, + VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16, + PROVOKING_VTX_LAST_bit = 1 << 19, + PERSP_CORR_DIS_bit = 1 << 20, + MULTI_PRIM_IB_ENA_bit = 1 << 21, + PA_CL_VTE_CNTL = 0x00028818, + VPORT_X_SCALE_ENA_bit = 1 << 0, + VPORT_X_OFFSET_ENA_bit = 1 << 1, + VPORT_Y_SCALE_ENA_bit = 1 << 2, + VPORT_Y_OFFSET_ENA_bit = 1 << 3, + VPORT_Z_SCALE_ENA_bit = 1 << 4, + VPORT_Z_OFFSET_ENA_bit = 1 << 5, + VTX_XY_FMT_bit = 1 << 8, + VTX_Z_FMT_bit = 1 << 9, + VTX_W0_FMT_bit = 1 << 10, + PA_CL_VS_OUT_CNTL = 0x0002881c, + CLIP_DIST_ENA_0_bit = 1 << 0, + CLIP_DIST_ENA_1_bit = 1 << 1, + CLIP_DIST_ENA_2_bit = 1 << 2, + CLIP_DIST_ENA_3_bit = 1 << 3, + CLIP_DIST_ENA_4_bit = 1 << 4, + CLIP_DIST_ENA_5_bit = 1 << 5, + CLIP_DIST_ENA_6_bit = 1 << 6, + CLIP_DIST_ENA_7_bit = 1 << 7, + CULL_DIST_ENA_0_bit = 1 << 8, + CULL_DIST_ENA_1_bit = 1 << 9, + CULL_DIST_ENA_2_bit = 1 << 10, + CULL_DIST_ENA_3_bit = 1 << 11, + CULL_DIST_ENA_4_bit = 1 << 12, + CULL_DIST_ENA_5_bit = 1 << 13, + CULL_DIST_ENA_6_bit = 1 << 14, + CULL_DIST_ENA_7_bit = 1 << 15, + USE_VTX_POINT_SIZE_bit = 1 << 16, + USE_VTX_EDGE_FLAG_bit = 1 << 17, + USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18, + USE_VTX_VIEWPORT_INDX_bit = 1 << 19, + USE_VTX_KILL_FLAG_bit = 1 << 20, + VS_OUT_MISC_VEC_ENA_bit = 1 << 21, + VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22, + VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23, + PA_CL_NANINF_CNTL = 0x00028820, + VTE_XY_INF_DISCARD_bit = 1 << 0, + VTE_Z_INF_DISCARD_bit = 1 << 1, + VTE_W_INF_DISCARD_bit = 1 << 2, + VTE_0XNANINF_IS_0_bit = 1 << 3, + VTE_XY_NAN_RETAIN_bit = 1 << 4, + VTE_Z_NAN_RETAIN_bit = 1 << 5, + VTE_W_NAN_RETAIN_bit = 1 << 6, + VTE_W_RECIP_NAN_IS_0_bit = 1 << 7, + VS_XY_NAN_TO_INF_bit = 1 << 8, + VS_XY_INF_RETAIN_bit = 1 << 9, + VS_Z_NAN_TO_INF_bit = 1 << 10, + VS_Z_INF_RETAIN_bit = 1 << 11, + VS_W_NAN_TO_INF_bit = 1 << 12, + VS_W_INF_RETAIN_bit = 1 << 13, + VS_CLIP_DIST_INF_DISCARD_bit = 1 << 14, + VTE_NO_OUTPUT_NEG_0_bit = 1 << 20, + PA_SU_LINE_STIPPLE_CNTL = 0x00028824, + LINE_STIPPLE_RESET_mask = 0x03 << 0, + LINE_STIPPLE_RESET_shift = 0, + EXPAND_FULL_LENGTH_bit = 1 << 2, + FRACTIONAL_ACCUM_bit = 1 << 3, + DIAMOND_ADJUST_bit = 1 << 4, + PA_SU_LINE_STIPPLE_SCALE = 0x00028828, + PA_SU_PRIM_FILTER_CNTL = 0x0002882c, + TRIANGLE_FILTER_DISABLE_bit = 1 << 0, + LINE_FILTER_DISABLE_bit = 1 << 1, + POINT_FILTER_DISABLE_bit = 1 << 2, + RECTANGLE_FILTER_DISABLE_bit = 1 << 3, + TRIANGLE_EXPAND_ENA_bit = 1 << 4, + LINE_EXPAND_ENA_bit = 1 << 5, + POINT_EXPAND_ENA_bit = 1 << 6, + RECTANGLE_EXPAND_ENA_bit = 1 << 7, + PRIM_EXPAND_CONSTANT_mask = 0xff << 8, + PRIM_EXPAND_CONSTANT_shift = 8, + SQ_LSTMP_RING_ITEMSIZE = 0x00028830, + ITEMSIZE_mask = 0x7fff << 0, + ITEMSIZE_shift = 0, + SQ_HSTMP_RING_ITEMSIZE = 0x00028834, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_PGM_START_PS = 0x00028840, + SQ_PGM_RESOURCES_PS = 0x00028844, + NUM_GPRS_mask = 0xff << 0, + NUM_GPRS_shift = 0, + STACK_SIZE_mask = 0xff << 8, + STACK_SIZE_shift = 8, + DX10_CLAMP_bit = 1 << 21, + UNCACHED_FIRST_INST_bit = 1 << 28, + CLAMP_CONSTS_bit = 1 << 31, + SQ_PGM_RESOURCES_2_PS = 0x00028848, + SINGLE_ROUND_mask = 0x03 << 0, + SINGLE_ROUND_shift = 0, + SQ_ROUND_NEAREST_EVEN = 0x00, + SQ_ROUND_PLUS_INFINITY = 0x01, + SQ_ROUND_MINUS_INFINITY = 0x02, + SQ_ROUND_TO_ZERO = 0x03, + DOUBLE_ROUND_mask = 0x03 << 2, + DOUBLE_ROUND_shift = 2, +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ + ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, + ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, + ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, + ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, + SQ_PGM_EXPORTS_PS = 0x0002884c, + EXPORT_MODE_mask = 0x1f << 0, + EXPORT_MODE_shift = 0, + SQ_PGM_START_VS = 0x0002885c, + SQ_PGM_RESOURCES_VS = 0x00028860, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ +/* DX10_CLAMP_bit = 1 << 21, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_RESOURCES_2_VS = 0x00028864, +/* SINGLE_ROUND_mask = 0x03 << 0, */ +/* SINGLE_ROUND_shift = 0, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* DOUBLE_ROUND_mask = 0x03 << 2, */ +/* DOUBLE_ROUND_shift = 2, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */ +/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */ +/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */ +/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */ + SQ_PGM_START_GS = 0x00028874, + SQ_PGM_RESOURCES_GS = 0x00028878, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ +/* DX10_CLAMP_bit = 1 << 21, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_RESOURCES_2_GS = 0x0002887c, +/* SINGLE_ROUND_mask = 0x03 << 0, */ +/* SINGLE_ROUND_shift = 0, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* DOUBLE_ROUND_mask = 0x03 << 2, */ +/* DOUBLE_ROUND_shift = 2, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */ +/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */ +/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */ +/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */ + SQ_PGM_START_ES = 0x0002888c, + SQ_PGM_RESOURCES_ES = 0x00028890, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ +/* DX10_CLAMP_bit = 1 << 21, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_RESOURCES_2_ES = 0x00028894, +/* SINGLE_ROUND_mask = 0x03 << 0, */ +/* SINGLE_ROUND_shift = 0, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* DOUBLE_ROUND_mask = 0x03 << 2, */ +/* DOUBLE_ROUND_shift = 2, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */ +/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */ +/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */ +/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */ + SQ_PGM_START_FS = 0x000288a4, + SQ_PGM_RESOURCES_FS = 0x000288a8, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ +/* DX10_CLAMP_bit = 1 << 21, */ + SQ_PGM_START_HS = 0x000288b8, + SQ_PGM_RESOURCES_HS = 0x000288bc, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ +/* DX10_CLAMP_bit = 1 << 21, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_RESOURCES_2_HS = 0x000288c0, +/* SINGLE_ROUND_mask = 0x03 << 0, */ +/* SINGLE_ROUND_shift = 0, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* DOUBLE_ROUND_mask = 0x03 << 2, */ +/* DOUBLE_ROUND_shift = 2, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */ +/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */ +/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */ +/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */ + SQ_PGM_START_LS = 0x000288d0, + SQ_PGM_RESOURCES_LS = 0x000288d4, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ +/* DX10_CLAMP_bit = 1 << 21, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_RESOURCES_2_LS = 0x000288d8, +/* SINGLE_ROUND_mask = 0x03 << 0, */ +/* SINGLE_ROUND_shift = 0, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* DOUBLE_ROUND_mask = 0x03 << 2, */ +/* DOUBLE_ROUND_shift = 2, */ +/* SQ_ROUND_NEAREST_EVEN = 0x00, */ +/* SQ_ROUND_PLUS_INFINITY = 0x01, */ +/* SQ_ROUND_MINUS_INFINITY = 0x02, */ +/* SQ_ROUND_TO_ZERO = 0x03, */ +/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */ +/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */ +/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */ +/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */ + SQ_VTX_SEMANTIC_CLEAR = 0x000288f0, + SQ_ESGS_RING_ITEMSIZE = 0x00028900, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GSVS_RING_ITEMSIZE = 0x00028904, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_ESTMP_RING_ITEMSIZE = 0x00028908, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GSTMP_RING_ITEMSIZE = 0x0002890c, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_VSTMP_RING_ITEMSIZE = 0x00028910, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_PSTMP_RING_ITEMSIZE = 0x00028914, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GS_VERT_ITEMSIZE = 0x0002891c, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GS_VERT_ITEMSIZE_1 = 0x00028920, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GS_VERT_ITEMSIZE_2 = 0x00028924, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GS_VERT_ITEMSIZE_3 = 0x00028928, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GSVS_RING_OFFSET_1 = 0x0002892c, + SQ_GSVS_RING_OFFSET_1__OFFSET_mask = 0x7fff << 0, + SQ_GSVS_RING_OFFSET_1__OFFSET_shift = 0, + SQ_GSVS_RING_OFFSET_2 = 0x00028930, + SQ_GSVS_RING_OFFSET_2__OFFSET_mask = 0x7fff << 0, + SQ_GSVS_RING_OFFSET_2__OFFSET_shift = 0, + SQ_GSVS_RING_OFFSET_3 = 0x00028934, + SQ_GSVS_RING_OFFSET_3__OFFSET_mask = 0x7fff << 0, + SQ_GSVS_RING_OFFSET_3__OFFSET_shift = 0, + SQ_ALU_CONST_CACHE_PS_0 = 0x00028940, + SQ_ALU_CONST_CACHE_PS_0_num = 16, + SQ_ALU_CONST_CACHE_VS_0 = 0x00028980, + SQ_ALU_CONST_CACHE_VS_0_num = 16, + SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0, + SQ_ALU_CONST_CACHE_GS_0_num = 16, + PA_SU_POINT_SIZE = 0x00028a00, + HEIGHT_mask = 0xffff << 0, + HEIGHT_shift = 0, + PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16, + PA_SU_POINT_SIZE__WIDTH_shift = 16, + PA_SU_POINT_MINMAX = 0x00028a04, + MIN_SIZE_mask = 0xffff << 0, + MIN_SIZE_shift = 0, + PA_SU_POINT_MINMAX__MAX_SIZE_mask = 0xffff << 16, + PA_SU_POINT_MINMAX__MAX_SIZE_shift = 16, + PA_SU_LINE_CNTL = 0x00028a08, + PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0, + PA_SU_LINE_CNTL__WIDTH_shift = 0, + PA_SC_LINE_STIPPLE = 0x00028a0c, + LINE_PATTERN_mask = 0xffff << 0, + LINE_PATTERN_shift = 0, + REPEAT_COUNT_mask = 0xff << 16, + REPEAT_COUNT_shift = 16, + PATTERN_BIT_ORDER_bit = 1 << 28, + AUTO_RESET_CNTL_mask = 0x03 << 29, + AUTO_RESET_CNTL_shift = 29, + VGT_OUTPUT_PATH_CNTL = 0x00028a10, + PATH_SELECT_mask = 0x07 << 0, + PATH_SELECT_shift = 0, + VGT_OUTPATH_VTX_REUSE = 0x00, + VGT_OUTPATH_TESS_EN = 0x01, + VGT_OUTPATH_PASSTHRU = 0x02, + VGT_OUTPATH_GS_BLOCK = 0x03, + VGT_OUTPATH_HS_BLOCK = 0x04, + VGT_HOS_CNTL = 0x00028a14, + TESS_MODE_mask = 0x03 << 0, + TESS_MODE_shift = 0, + VGT_HOS_MAX_TESS_LEVEL = 0x00028a18, + VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c, + VGT_HOS_REUSE_DEPTH = 0x00028a20, + REUSE_DEPTH_mask = 0xff << 0, + REUSE_DEPTH_shift = 0, + VGT_GROUP_PRIM_TYPE = 0x00028a24, + VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0, + VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift = 0, + VGT_GRP_3D_POINT = 0x00, + VGT_GRP_3D_LINE = 0x01, + VGT_GRP_3D_TRI = 0x02, + VGT_GRP_3D_RECT = 0x03, + VGT_GRP_3D_QUAD = 0x04, + VGT_GRP_2D_COPY_RECT_V0 = 0x05, + VGT_GRP_2D_COPY_RECT_V1 = 0x06, + VGT_GRP_2D_COPY_RECT_V2 = 0x07, + VGT_GRP_2D_COPY_RECT_V3 = 0x08, + VGT_GRP_2D_FILL_RECT = 0x09, + VGT_GRP_2D_LINE = 0x0a, + VGT_GRP_2D_TRI = 0x0b, + VGT_GRP_PRIM_INDEX_LINE = 0x0c, + VGT_GRP_PRIM_INDEX_TRI = 0x0d, + VGT_GRP_PRIM_INDEX_QUAD = 0x0e, + VGT_GRP_3D_LINE_ADJ = 0x0f, + VGT_GRP_3D_TRI_ADJ = 0x10, + VGT_GRP_3D_PATCH = 0x11, + RETAIN_ORDER_bit = 1 << 14, + RETAIN_QUADS_bit = 1 << 15, + PRIM_ORDER_mask = 0x07 << 16, + PRIM_ORDER_shift = 16, + VGT_GRP_LIST = 0x00, + VGT_GRP_STRIP = 0x01, + VGT_GRP_FAN = 0x02, + VGT_GRP_LOOP = 0x03, + VGT_GRP_POLYGON = 0x04, + VGT_GROUP_FIRST_DECR = 0x00028a28, + FIRST_DECR_mask = 0x0f << 0, + FIRST_DECR_shift = 0, + VGT_GROUP_DECR = 0x00028a2c, + DECR_mask = 0x0f << 0, + DECR_shift = 0, + VGT_GROUP_VECT_0_CNTL = 0x00028a30, + COMP_X_EN_bit = 1 << 0, + COMP_Y_EN_bit = 1 << 1, + COMP_Z_EN_bit = 1 << 2, + COMP_W_EN_bit = 1 << 3, + VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8, + VGT_GROUP_VECT_0_CNTL__STRIDE_shift = 8, + SHIFT_mask = 0xff << 16, + SHIFT_shift = 16, + VGT_GROUP_VECT_1_CNTL = 0x00028a34, +/* COMP_X_EN_bit = 1 << 0, */ +/* COMP_Y_EN_bit = 1 << 1, */ +/* COMP_Z_EN_bit = 1 << 2, */ +/* COMP_W_EN_bit = 1 << 3, */ + VGT_GROUP_VECT_1_CNTL__STRIDE_mask = 0xff << 8, + VGT_GROUP_VECT_1_CNTL__STRIDE_shift = 8, +/* SHIFT_mask = 0xff << 16, */ +/* SHIFT_shift = 16, */ + VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38, + X_CONV_mask = 0x0f << 0, + X_CONV_shift = 0, + VGT_GRP_INDEX_16 = 0x00, + VGT_GRP_INDEX_32 = 0x01, + VGT_GRP_UINT_16 = 0x02, + VGT_GRP_UINT_32 = 0x03, + VGT_GRP_SINT_16 = 0x04, + VGT_GRP_SINT_32 = 0x05, + VGT_GRP_FLOAT_32 = 0x06, + VGT_GRP_AUTO_PRIM = 0x07, + VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, + X_OFFSET_mask = 0x0f << 4, + X_OFFSET_shift = 4, + Y_CONV_mask = 0x0f << 8, + Y_CONV_shift = 8, +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ + Y_OFFSET_mask = 0x0f << 12, + Y_OFFSET_shift = 12, + Z_CONV_mask = 0x0f << 16, + Z_CONV_shift = 16, +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ + Z_OFFSET_mask = 0x0f << 20, + Z_OFFSET_shift = 20, + W_CONV_mask = 0x0f << 24, + W_CONV_shift = 24, +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ + W_OFFSET_mask = 0x0f << 28, + W_OFFSET_shift = 28, + VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c, +/* X_CONV_mask = 0x0f << 0, */ +/* X_CONV_shift = 0, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* X_OFFSET_mask = 0x0f << 4, */ +/* X_OFFSET_shift = 4, */ +/* Y_CONV_mask = 0x0f << 8, */ +/* Y_CONV_shift = 8, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* Y_OFFSET_mask = 0x0f << 12, */ +/* Y_OFFSET_shift = 12, */ +/* Z_CONV_mask = 0x0f << 16, */ +/* Z_CONV_shift = 16, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* Z_OFFSET_mask = 0x0f << 20, */ +/* Z_OFFSET_shift = 20, */ +/* W_CONV_mask = 0x0f << 24, */ +/* W_CONV_shift = 24, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* W_OFFSET_mask = 0x0f << 28, */ +/* W_OFFSET_shift = 28, */ + VGT_GS_MODE = 0x00028a40, + VGT_GS_MODE__MODE_mask = 0x03 << 0, + VGT_GS_MODE__MODE_shift = 0, + GS_OFF = 0x00, + GS_SCENARIO_A = 0x01, + GS_SCENARIO_B = 0x02, + GS_SCENARIO_G = 0x03, + GS_SCENARIO_C = 0x04, + SPRITE_EN = 0x05, + ES_PASSTHRU_bit = 1 << 2, + CUT_MODE_mask = 0x03 << 3, + CUT_MODE_shift = 3, + GS_CUT_1024 = 0x00, + GS_CUT_512 = 0x01, + GS_CUT_256 = 0x02, + GS_CUT_128 = 0x03, + MODE_HI_bit = 1 << 8, + PA_SC_MODE_CNTL_0 = 0x00028a48, + MSAA_ENABLE_bit = 1 << 0, + VPORT_SCISSOR_ENABLE_bit = 1 << 1, + LINE_STIPPLE_ENABLE_bit = 1 << 2, + VGT_ENHANCE = 0x00028a50, + VGT_GS_PER_ES = 0x00028a54, + GS_PER_ES_mask = 0x7ff << 0, + GS_PER_ES_shift = 0, + VGT_ES_PER_GS = 0x00028a58, + ES_PER_GS_mask = 0x7ff << 0, + ES_PER_GS_shift = 0, + VGT_GS_PER_VS = 0x00028a5c, + GS_PER_VS_mask = 0x0f << 0, + GS_PER_VS_shift = 0, + VGT_GS_OUT_PRIM_TYPE = 0x00028a6c, + OUTPRIM_TYPE_mask = 0x3f << 0, + OUTPRIM_TYPE_shift = 0, + POINTLIST = 0x00, + LINESTRIP = 0x01, + TRISTRIP = 0x02, + VGT_DMA_SIZE = 0x00028a74, + VGT_DMA_MAX_SIZE = 0x00028a78, + VGT_DMA_INDEX_TYPE = 0x00028a7c, +/* INDEX_TYPE_mask = 0x03 << 0, */ +/* INDEX_TYPE_shift = 0, */ + VGT_INDEX_16 = 0x00, + VGT_INDEX_32 = 0x01, + SWAP_MODE_mask = 0x03 << 2, + SWAP_MODE_shift = 2, + VGT_DMA_SWAP_NONE = 0x00, + VGT_DMA_SWAP_16_BIT = 0x01, + VGT_DMA_SWAP_32_BIT = 0x02, + VGT_DMA_SWAP_WORD = 0x03, + VGT_PRIMITIVEID_EN = 0x00028a84, + PRIMITIVEID_EN_bit = 1 << 0, + VGT_DMA_NUM_INSTANCES = 0x00028a88, + VGT_EVENT_INITIATOR = 0x00028a90, + EVENT_TYPE_mask = 0x3f << 0, + EVENT_TYPE_shift = 0, + SAMPLE_STREAMOUTSTATS1 = 0x01, + SAMPLE_STREAMOUTSTATS2 = 0x02, + SAMPLE_STREAMOUTSTATS3 = 0x03, + CACHE_FLUSH_TS = 0x04, + CONTEXT_DONE = 0x05, + CACHE_FLUSH = 0x06, + CS_PARTIAL_FLUSH = 0x07, + RST_PIX_CNT = 0x0d, + VS_PARTIAL_FLUSH = 0x0f, + PS_PARTIAL_FLUSH = 0x10, + FLUSH_HS_OUTPUT = 0x11, + FLUSH_LS_OUTPUT = 0x12, + CACHE_FLUSH_AND_INV_TS_EVENT = 0x14, + ZPASS_DONE = 0x15, + CACHE_FLUSH_AND_INV_EVENT = 0x16, + PERFCOUNTER_START = 0x17, + PERFCOUNTER_STOP = 0x18, + PIPELINESTAT_START = 0x19, + PIPELINESTAT_STOP = 0x1a, + PERFCOUNTER_SAMPLE = 0x1b, + FLUSH_ES_OUTPUT = 0x1c, + FLUSH_GS_OUTPUT = 0x1d, + SAMPLE_PIPELINESTAT = 0x1e, + SO_VGTSTREAMOUT_FLUSH = 0x1f, + SAMPLE_STREAMOUTSTATS = 0x20, + RESET_VTX_CNT = 0x21, + BLOCK_CONTEXT_DONE = 0x22, + CS_CONTEXT_DONE = 0x23, + VGT_FLUSH = 0x24, + SQ_NON_EVENT = 0x26, + SC_SEND_DB_VPZ = 0x27, + BOTTOM_OF_PIPE_TS = 0x28, + FLUSH_SX_TS = 0x29, + DB_CACHE_FLUSH_AND_INV = 0x2a, + FLUSH_AND_INV_DB_DATA_TS = 0x2b, + FLUSH_AND_INV_DB_META = 0x2c, + FLUSH_AND_INV_CB_DATA_TS = 0x2d, + FLUSH_AND_INV_CB_META = 0x2e, + CS_DONE = 0x2f, + PS_DONE = 0x30, + FLUSH_AND_INV_CB_PIXEL_DATA = 0x31, + ADDRESS_HI_mask = 0xff << 19, + ADDRESS_HI_shift = 19, + EXTENDED_EVENT_bit = 1 << 27, + VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94, + RESET_EN_bit = 1 << 0, + VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0, + VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4, + VGT_REUSE_OFF = 0x00028ab4, + REUSE_OFF_bit = 1 << 0, + VGT_VTX_CNT_EN = 0x00028ab8, + VTX_CNT_EN_bit = 1 << 0, + DB_HTILE_SURFACE = 0x00028abc, + HTILE_WIDTH_bit = 1 << 0, + HTILE_HEIGHT_bit = 1 << 1, + LINEAR_bit = 1 << 2, + FULL_CACHE_bit = 1 << 3, + HTILE_USES_PRELOAD_WIN_bit = 1 << 4, + PRELOAD_bit = 1 << 5, + PREFETCH_WIDTH_mask = 0x3f << 6, + PREFETCH_WIDTH_shift = 6, + PREFETCH_HEIGHT_mask = 0x3f << 12, + PREFETCH_HEIGHT_shift = 12, + DB_SRESULTS_COMPARE_STATE0 = 0x00028ac0, + COMPAREFUNC0_mask = 0x07 << 0, + COMPAREFUNC0_shift = 0, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + COMPAREVALUE0_mask = 0xff << 4, + COMPAREVALUE0_shift = 4, + COMPAREMASK0_mask = 0xff << 12, + COMPAREMASK0_shift = 12, + ENABLE0_bit = 1 << 24, + DB_SRESULTS_COMPARE_STATE1 = 0x00028ac4, + COMPAREFUNC1_mask = 0x07 << 0, + COMPAREFUNC1_shift = 0, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + COMPAREVALUE1_mask = 0xff << 4, + COMPAREVALUE1_shift = 4, + COMPAREMASK1_mask = 0xff << 12, + COMPAREMASK1_shift = 12, + ENABLE1_bit = 1 << 24, + DB_PRELOAD_CONTROL = 0x00028ac8, + START_X_mask = 0xff << 0, + START_X_shift = 0, + START_Y_mask = 0xff << 8, + START_Y_shift = 8, + MAX_X_mask = 0xff << 16, + MAX_X_shift = 16, + MAX_Y_mask = 0xff << 24, + MAX_Y_shift = 24, + VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0, + VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4, + VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8, + VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc, + VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0, + VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4, + VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8, + VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec, + VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0, + VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4, + VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8, + VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc, + VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00, + VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04, + VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08, + VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c, + VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10, + VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14, + VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18, + VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c, + VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28, + VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c, + VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30, + VERTEX_STRIDE_mask = 0x1ff << 0, + VERTEX_STRIDE_shift = 0, + VGT_GS_MAX_VERT_OUT = 0x00028b38, + MAX_VERT_OUT_mask = 0x7ff << 0, + MAX_VERT_OUT_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44, + VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48, + VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c, + VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50, + VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift = 0, + VGT_SHADER_STAGES_EN = 0x00028b54, + LS_EN_mask = 0x03 << 0, + LS_EN_shift = 0, + LS_STAGE_OFF = 0x00, + LS_STAGE_ON = 0x01, + CS_STAGE_ON = 0x02, + HS_EN_bit = 1 << 2, + ES_EN_mask = 0x03 << 3, + ES_EN_shift = 3, + ES_STAGE_OFF = 0x00, + ES_STAGE_DS = 0x01, + ES_STAGE_REAL = 0x02, + GS_EN_bit = 1 << 5, + VS_EN_mask = 0x03 << 6, + VS_EN_shift = 6, + VS_STAGE_REAL = 0x00, + VS_STAGE_DS = 0x01, + VS_STAGE_COPY_SHADER = 0x02, + VGT_LS_HS_CONFIG = 0x00028b58, + NUM_PATCHES_mask = 0xff << 0, + NUM_PATCHES_shift = 0, + HS_NUM_INPUT_CP_mask = 0x3f << 8, + HS_NUM_INPUT_CP_shift = 8, + HS_NUM_OUTPUT_CP_mask = 0x3f << 14, + HS_NUM_OUTPUT_CP_shift = 14, + VGT_LS_SIZE = 0x00028b5c, + VGT_LS_SIZE__SIZE_mask = 0xff << 0, + VGT_LS_SIZE__SIZE_shift = 0, + PATCH_CP_SIZE_mask = 0x1fff << 8, + PATCH_CP_SIZE_shift = 8, + VGT_HS_SIZE = 0x00028b60, + VGT_HS_SIZE__SIZE_mask = 0xff << 0, + VGT_HS_SIZE__SIZE_shift = 0, +/* PATCH_CP_SIZE_mask = 0x1fff << 8, */ +/* PATCH_CP_SIZE_shift = 8, */ + VGT_LS_HS_ALLOC = 0x00028b64, + HS_TOTAL_OUTPUT_mask = 0x1fff << 0, + HS_TOTAL_OUTPUT_shift = 0, + LS_HS_TOTAL_OUTPUT_mask = 0x1fff << 13, + LS_HS_TOTAL_OUTPUT_shift = 13, + VGT_HS_PATCH_CONST = 0x00028b68, + VGT_HS_PATCH_CONST__SIZE_mask = 0x1fff << 0, + VGT_HS_PATCH_CONST__SIZE_shift = 0, + VGT_HS_PATCH_CONST__STRIDE_mask = 0x1fff << 13, + VGT_HS_PATCH_CONST__STRIDE_shift = 13, + DB_ALPHA_TO_MASK = 0x00028b70, + ALPHA_TO_MASK_ENABLE_bit = 1 << 0, + ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET0_shift = 8, + ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 10, + ALPHA_TO_MASK_OFFSET1_shift = 10, + ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 12, + ALPHA_TO_MASK_OFFSET2_shift = 12, + ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 14, + ALPHA_TO_MASK_OFFSET3_shift = 14, + OFFSET_ROUND_bit = 1 << 16, + PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028b78, + POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0, + POLY_OFFSET_NEG_NUM_DB_BITS_shift = 0, + POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8, + PA_SU_POLY_OFFSET_CLAMP = 0x00028b7c, + PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028b80, + PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028b84, + PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028b88, + PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028b8c, + VGT_GS_INSTANCE_CNT = 0x00028b90, + VGT_GS_INSTANCE_CNT__ENABLE_bit = 1 << 0, + CNT_mask = 0x7f << 2, + CNT_shift = 2, + VGT_STRMOUT_CONFIG = 0x00028b94, + STREAMOUT_0_EN_bit = 1 << 0, + STREAMOUT_1_EN_bit = 1 << 1, + STREAMOUT_2_EN_bit = 1 << 2, + STREAMOUT_3_EN_bit = 1 << 3, + RAST_STREAM_mask = 0x07 << 4, + RAST_STREAM_shift = 4, + VGT_STRMOUT_BUFFER_CONFIG = 0x00028b98, + STREAM_0_BUFFER_EN_mask = 0x0f << 0, + STREAM_0_BUFFER_EN_shift = 0, + STREAM_1_BUFFER_EN_mask = 0x0f << 4, + STREAM_1_BUFFER_EN_shift = 4, + STREAM_2_BUFFER_EN_mask = 0x0f << 8, + STREAM_2_BUFFER_EN_shift = 8, + STREAM_3_BUFFER_EN_mask = 0x0f << 12, + STREAM_3_BUFFER_EN_shift = 12, + CB_IMMED0_BASE = 0x00028b9c, + CB_IMMED0_BASE_num = 12, + PA_SC_LINE_CNTL = 0x00028c00, + EXPAND_LINE_WIDTH_bit = 1 << 9, + LAST_PIXEL_bit = 1 << 10, + PERPENDICULAR_ENDCAP_ENA_bit = 1 << 11, + DX10_DIAMOND_TEST_ENA_bit = 1 << 12, + PA_SC_AA_CONFIG = 0x00028c04, + MSAA_NUM_SAMPLES_mask = 0x03 << 0, + MSAA_NUM_SAMPLES_shift = 0, + AA_MASK_CENTROID_DTMN_bit = 1 << 4, + MAX_SAMPLE_DIST_mask = 0x0f << 13, + MAX_SAMPLE_DIST_shift = 13, + PA_SU_VTX_CNTL = 0x00028c08, + PIX_CENTER_bit = 1 << 0, + PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1, + PA_SU_VTX_CNTL__ROUND_MODE_shift = 1, + X_TRUNCATE = 0x00, + X_ROUND = 0x01, + X_ROUND_TO_EVEN = 0x02, + X_ROUND_TO_ODD = 0x03, + QUANT_MODE_mask = 0x07 << 3, + QUANT_MODE_shift = 3, + X_1_16TH = 0x00, + X_1_8TH = 0x01, + X_1_4TH = 0x02, + X_1_2 = 0x03, + X_1 = 0x04, + X_1_256TH = 0x05, + X_1_1024TH = 0x06, + X_1_4096TH = 0x07, + PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c, + PA_CL_GB_VERT_DISC_ADJ = 0x00028c10, + PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14, + PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18, + PA_SC_AA_SAMPLE_LOCS_0 = 0x00028c1c, + S0_X_mask = 0x0f << 0, + S0_X_shift = 0, + S0_Y_mask = 0x0f << 4, + S0_Y_shift = 4, + S1_X_mask = 0x0f << 8, + S1_X_shift = 8, + S1_Y_mask = 0x0f << 12, + S1_Y_shift = 12, + S2_X_mask = 0x0f << 16, + S2_X_shift = 16, + S2_Y_mask = 0x0f << 20, + S2_Y_shift = 20, + S3_X_mask = 0x0f << 24, + S3_X_shift = 24, + S3_Y_mask = 0x0f << 28, + S3_Y_shift = 28, + PA_SC_AA_SAMPLE_LOCS_1 = 0x00028c20, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_2 = 0x00028c24, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_3 = 0x00028c28, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_4 = 0x00028c2c, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_5 = 0x00028c30, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_6 = 0x00028c34, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_7 = 0x00028c38, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_MASK = 0x00028c3c, + VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58, + VTX_REUSE_DEPTH_mask = 0xff << 0, + VTX_REUSE_DEPTH_shift = 0, + VGT_OUT_DEALLOC_CNTL = 0x00028c5c, + DEALLOC_DIST_mask = 0x7f << 0, + DEALLOC_DIST_shift = 0, + CB_COLOR0_BASE = 0x00028c60, + CB_COLOR0_BASE_num = 12, + CB_COLOR0_BASE_offset = 51, + CB_COLOR0_PITCH = 0x00028c64, + CB_COLOR0_PITCH_num = 12, + CB_COLOR0_PITCH_offset = 51, + CB_COLOR0_PITCH__TILE_MAX_mask = 0x7ff << 0, + CB_COLOR0_PITCH__TILE_MAX_shift = 0, + CB_COLOR0_SLICE = 0x00028c68, + CB_COLOR0_SLICE_num = 12, + CB_COLOR0_SLICE_offset = 51, + CB_COLOR0_SLICE__TILE_MAX_mask = 0x3fffff << 0, + CB_COLOR0_SLICE__TILE_MAX_shift = 0, + CB_COLOR0_VIEW = 0x00028c6c, + CB_COLOR0_VIEW_num = 12, + CB_COLOR0_VIEW_offset = 51, +/* SLICE_START_mask = 0x7ff << 0, */ +/* SLICE_START_shift = 0, */ +/* SLICE_MAX_mask = 0x7ff << 13, */ +/* SLICE_MAX_shift = 13, */ + CB_COLOR0_INFO = 0x00028c70, + CB_COLOR0_INFO_num = 12, + CB_COLOR0_INFO_offset = 51, + ENDIAN_mask = 0x03 << 0, + ENDIAN_shift = 0, + ENDIAN_NONE = 0x00, + ENDIAN_8IN16 = 0x01, + ENDIAN_8IN32 = 0x02, + ENDIAN_8IN64 = 0x03, + CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2, + CB_COLOR0_INFO__FORMAT_shift = 2, + COLOR_INVALID = 0x00, + COLOR_8 = 0x01, + COLOR_16 = 0x05, + COLOR_16_FLOAT = 0x06, + COLOR_8_8 = 0x07, + COLOR_5_6_5 = 0x08, + COLOR_1_5_5_5 = 0x0a, + COLOR_4_4_4_4 = 0x0b, + COLOR_5_5_5_1 = 0x0c, + COLOR_32 = 0x0d, + COLOR_32_FLOAT = 0x0e, + COLOR_16_16 = 0x0f, + COLOR_16_16_FLOAT = 0x10, + COLOR_8_24 = 0x11, + COLOR_24_8 = 0x13, + COLOR_10_11_11 = 0x15, + COLOR_10_11_11_FLOAT = 0x16, + COLOR_2_10_10_10 = 0x19, + COLOR_8_8_8_8 = 0x1a, + COLOR_10_10_10_2 = 0x1b, + COLOR_X24_8_32_FLOAT = 0x1c, + COLOR_32_32 = 0x1d, + COLOR_32_32_FLOAT = 0x1e, + COLOR_16_16_16_16 = 0x1f, + COLOR_16_16_16_16_FLOAT = 0x20, + COLOR_32_32_32_32 = 0x22, + COLOR_32_32_32_32_FLOAT = 0x23, + CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8, + CB_COLOR0_INFO__ARRAY_MODE_shift = 8, +/* ARRAY_LINEAR_GENERAL = 0x00, */ +/* ARRAY_LINEAR_ALIGNED = 0x01, */ +/* ARRAY_1D_TILED_THIN1 = 0x02, */ +/* ARRAY_2D_TILED_THIN1 = 0x04, */ + NUMBER_TYPE_mask = 0x07 << 12, + NUMBER_TYPE_shift = 12, + NUMBER_UNORM = 0x00, + NUMBER_SNORM = 0x01, + NUMBER_UINT = 0x04, + NUMBER_SINT = 0x05, + NUMBER_SRGB = 0x06, + NUMBER_FLOAT = 0x07, + COMP_SWAP_mask = 0x03 << 15, + COMP_SWAP_shift = 15, + SWAP_STD = 0x00, + SWAP_ALT = 0x01, + SWAP_STD_REV = 0x02, + SWAP_ALT_REV = 0x03, + FAST_CLEAR_bit = 1 << 17, + COMPRESSION_bit = 1 << 18, + BLEND_CLAMP_bit = 1 << 19, + BLEND_BYPASS_bit = 1 << 20, + SIMPLE_FLOAT_bit = 1 << 21, + CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 22, + CB_COLOR0_INFO__TILE_COMPACT_bit = 1 << 23, + SOURCE_FORMAT_mask = 0x03 << 24, + SOURCE_FORMAT_shift = 24, + EXPORT_4C_32BPC = 0x00, + EXPORT_4C_16BPC = 0x01, + RAT_bit = 1 << 26, + RESOURCE_TYPE_mask = 0x07 << 27, + RESOURCE_TYPE_shift = 27, + BUFFER = 0x00, + TEXTURE1D = 0x01, + TEXTURE1DARRAY = 0x02, + TEXTURE2D = 0x03, + TEXTURE2DARRAY = 0x04, + TEXTURE3D = 0x05, + CB_COLOR0_ATTRIB = 0x00028c74, + CB_COLOR0_ATTRIB_num = 12, + CB_COLOR0_ATTRIB_offset = 51, + IGNORE_SHADER_ENGINE_TILING_bit = 1 << 3, + CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit = 1 << 4, + CB_COLOR0_ATTRIB__TILE_SPLIT_mask = 0x0f << 5, + CB_COLOR0_ATTRIB__TILE_SPLIT_shift = 5, +/* ADDR_SURF_TILE_SPLIT_64B = 0x00, */ +/* ADDR_SURF_TILE_SPLIT_128B = 0x01, */ +/* ADDR_SURF_TILE_SPLIT_256B = 0x02, */ +/* ADDR_SURF_TILE_SPLIT_512B = 0x03, */ +/* ADDR_SURF_TILE_SPLIT_1KB = 0x04, */ +/* ADDR_SURF_TILE_SPLIT_2KB = 0x05, */ +/* ADDR_SURF_TILE_SPLIT_4KB = 0x06, */ + CB_COLOR0_ATTRIB__NUM_BANKS_mask = 0x03 << 10, + CB_COLOR0_ATTRIB__NUM_BANKS_shift = 10, +/* ADDR_SURF_2_BANK = 0x00, */ +/* ADDR_SURF_4_BANK = 0x01, */ +/* ADDR_SURF_8_BANK = 0x02, */ +/* ADDR_SURF_16_BANK = 0x03, */ + CB_COLOR0_ATTRIB__BANK_WIDTH_mask = 0x03 << 13, + CB_COLOR0_ATTRIB__BANK_WIDTH_shift = 13, +/* ADDR_SURF_BANK_WIDTH_1 = 0x00, */ +/* ADDR_SURF_BANK_WIDTH_2 = 0x01, */ +/* ADDR_SURF_BANK_WIDTH_4 = 0x02, */ +/* ADDR_SURF_BANK_WIDTH_8 = 0x03, */ + CB_COLOR0_ATTRIB__BANK_HEIGHT_mask = 0x03 << 16, + CB_COLOR0_ATTRIB__BANK_HEIGHT_shift = 16, +/* ADDR_SURF_BANK_HEIGHT_1 = 0x00, */ +/* ADDR_SURF_BANK_HEIGHT_2 = 0x01, */ +/* ADDR_SURF_BANK_HEIGHT_4 = 0x02, */ +/* ADDR_SURF_BANK_HEIGHT_8 = 0x03, */ + CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_mask = 0x03 << 19, + CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift = 19, +/* ADDR_SURF_MACRO_ASPECT_1 = 0x00, */ +/* ADDR_SURF_MACRO_ASPECT_2 = 0x01, */ +/* ADDR_SURF_MACRO_ASPECT_4 = 0x02, */ +/* ADDR_SURF_MACRO_ASPECT_8 = 0x03, */ + FMASK_BANK_HEIGHT_mask = 0x03 << 22, + FMASK_BANK_HEIGHT_shift = 22, +/* ADDR_SURF_BANK_HEIGHT_1 = 0x00, */ +/* ADDR_SURF_BANK_HEIGHT_2 = 0x01, */ +/* ADDR_SURF_BANK_HEIGHT_4 = 0x02, */ +/* ADDR_SURF_BANK_HEIGHT_8 = 0x03, */ + CB_COLOR0_DIM = 0x00028c78, + CB_COLOR0_DIM_num = 12, + CB_COLOR0_DIM_offset = 51, + WIDTH_MAX_mask = 0xffff << 0, + WIDTH_MAX_shift = 0, + HEIGHT_MAX_mask = 0xffff << 16, + HEIGHT_MAX_shift = 16, + CB_COLOR0_CMASK = 0x00028c7c, + CB_COLOR0_CMASK_num = 8, + CB_COLOR0_CMASK_offset = 60, + CB_COLOR0_CMASK_SLICE = 0x00028c80, + CB_COLOR0_CMASK_SLICE_num = 8, + CB_COLOR0_CMASK_SLICE_offset = 60, + CB_COLOR0_CMASK_SLICE__TILE_MAX_mask = 0x3fff << 0, + CB_COLOR0_CMASK_SLICE__TILE_MAX_shift = 0, + CB_COLOR0_FMASK = 0x00028c84, + CB_COLOR0_FMASK_num = 8, + CB_COLOR0_FMASK_offset = 60, + CB_COLOR0_FMASK_SLICE = 0x00028c88, + CB_COLOR0_FMASK_SLICE_num = 8, + CB_COLOR0_FMASK_SLICE_offset = 60, + CB_COLOR0_FMASK_SLICE__TILE_MAX_mask = 0x3fffff << 0, + CB_COLOR0_FMASK_SLICE__TILE_MAX_shift = 0, + CB_COLOR0_CLEAR_WORD0 = 0x00028c8c, + CB_COLOR0_CLEAR_WORD0_num = 8, + CB_COLOR0_CLEAR_WORD0_offset = 60, + CB_COLOR0_CLEAR_WORD1 = 0x00028c90, + CB_COLOR0_CLEAR_WORD1_num = 8, + CB_COLOR0_CLEAR_WORD1_offset = 60, + CB_COLOR0_CLEAR_WORD2 = 0x00028c94, + CB_COLOR0_CLEAR_WORD2_num = 8, + CB_COLOR0_CLEAR_WORD2_offset = 60, + CB_COLOR0_CLEAR_WORD3 = 0x00028c98, + CB_COLOR0_CLEAR_WORD3_num = 8, + CB_COLOR0_CLEAR_WORD3_offset = 60, + SQ_ALU_CONST_CACHE_HS_0 = 0x00028f00, + SQ_ALU_CONST_CACHE_HS_0_num = 16, + SQ_ALU_CONST_CACHE_LS_0 = 0x00028f40, + SQ_ALU_CONST_CACHE_LS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_HS_0 = 0x00028f80, + SQ_ALU_CONST_BUFFER_SIZE_HS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_HS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_HS_0__DATA_shift = 0, + SQ_ALU_CONST_BUFFER_SIZE_LS_0 = 0x00028fc0, + SQ_ALU_CONST_BUFFER_SIZE_LS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_LS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_LS_0__DATA_shift = 0, + SQ_VTX_CONSTANT_WORD0_0 = 0x00030000, + SQ_TEX_RESOURCE_WORD0_0 = 0x00030000, + DIM_mask = 0x07 << 0, + DIM_shift = 0, + SQ_TEX_DIM_1D = 0x00, + SQ_TEX_DIM_2D = 0x01, + SQ_TEX_DIM_3D = 0x02, + SQ_TEX_DIM_CUBEMAP = 0x03, + SQ_TEX_DIM_1D_ARRAY = 0x04, + SQ_TEX_DIM_2D_ARRAY = 0x05, + SQ_TEX_DIM_2D_MSAA = 0x06, + SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07, +/* IGNORE_SHADER_ENGINE_TILING_bit = 1 << 3, */ + SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit= 1 << 5, + PITCH_mask = 0xfff << 6, + PITCH_shift = 6, + TEX_WIDTH_mask = 0x3fff << 18, + TEX_WIDTH_shift = 18, + SQ_VTX_CONSTANT_WORD1_0 = 0x00030004, + SQ_TEX_RESOURCE_WORD1_0 = 0x00030004, + TEX_HEIGHT_mask = 0x3fff << 0, + TEX_HEIGHT_shift = 0, + TEX_DEPTH_mask = 0x1fff << 14, + TEX_DEPTH_shift = 14, + SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask = 0x0f << 28, + SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift = 28, + SQ_VTX_CONSTANT_WORD2_0 = 0x00030008, + BASE_ADDRESS_HI_mask = 0xff << 0, + BASE_ADDRESS_HI_shift = 0, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask = 0x7ff << 8, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift = 8, + SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit = 1 << 19, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift = 20, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask = 0x03 << 26, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift = 26, +/* SQ_NUM_FORMAT_NORM = 0x00, */ +/* SQ_NUM_FORMAT_INT = 0x01, */ +/* SQ_NUM_FORMAT_SCALED = 0x02, */ + SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit = 1 << 28, + SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit = 1 << 29, + SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask = 0x03 << 30, + SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift = 30, +/* SQ_ENDIAN_NONE = 0x00, */ +/* SQ_ENDIAN_8IN16 = 0x01, */ +/* SQ_ENDIAN_8IN32 = 0x02, */ + SQ_TEX_RESOURCE_WORD2_0 = 0x00030008, + SQ_VTX_CONSTANT_WORD3_0 = 0x0003000c, + SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit = 1 << 2, + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_mask = 0x07 << 3, + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift = 3, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_mask = 0x07 << 6, + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift = 6, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_mask = 0x07 << 9, + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_mask = 0x07 << 12, + SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift = 12, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD3_0 = 0x0003000c, + SQ_TEX_RESOURCE_WORD4_0 = 0x00030010, + FORMAT_COMP_X_mask = 0x03 << 0, + FORMAT_COMP_X_shift = 0, + SQ_FORMAT_COMP_UNSIGNED = 0x00, + SQ_FORMAT_COMP_SIGNED = 0x01, + SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, + FORMAT_COMP_Y_mask = 0x03 << 2, + FORMAT_COMP_Y_shift = 2, +/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ +/* SQ_FORMAT_COMP_SIGNED = 0x01, */ +/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ + FORMAT_COMP_Z_mask = 0x03 << 4, + FORMAT_COMP_Z_shift = 4, +/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ +/* SQ_FORMAT_COMP_SIGNED = 0x01, */ +/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ + FORMAT_COMP_W_mask = 0x03 << 6, + FORMAT_COMP_W_shift = 6, +/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ +/* SQ_FORMAT_COMP_SIGNED = 0x01, */ +/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ + SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask = 0x03 << 8, + SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift = 8, +/* SQ_NUM_FORMAT_NORM = 0x00, */ +/* SQ_NUM_FORMAT_INT = 0x01, */ +/* SQ_NUM_FORMAT_SCALED = 0x02, */ + SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit = 1 << 10, + SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit = 1 << 11, + SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask = 0x03 << 12, + SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift = 12, +/* SQ_ENDIAN_NONE = 0x00, */ +/* SQ_ENDIAN_8IN16 = 0x01, */ +/* SQ_ENDIAN_8IN32 = 0x02, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask = 0x07 << 16, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift = 16, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask = 0x07 << 19, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift = 19, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask = 0x07 << 22, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift = 22, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask = 0x07 << 25, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift = 25, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + BASE_LEVEL_mask = 0x0f << 28, + BASE_LEVEL_shift = 28, + SQ_VTX_CONSTANT_WORD4_0 = 0x00030010, + SQ_TEX_RESOURCE_WORD5_0 = 0x00030014, + LAST_LEVEL_mask = 0x0f << 0, + LAST_LEVEL_shift = 0, + BASE_ARRAY_mask = 0x1fff << 4, + BASE_ARRAY_shift = 4, + LAST_ARRAY_mask = 0x1fff << 17, + LAST_ARRAY_shift = 17, + SQ_TEX_RESOURCE_WORD6_0 = 0x00030018, + PERF_MODULATION_mask = 0x07 << 3, + PERF_MODULATION_shift = 3, + INTERLACED_bit = 1 << 6, + SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_mask = 0xfff << 8, + SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift = 8, + SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_mask = 0x07 << 29, + SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift = 29, + SQ_ADDR_SURF_TILE_SPLIT_64B = 0x00, + SQ_ADDR_SURF_TILE_SPLIT_128B = 0x01, + SQ_ADDR_SURF_TILE_SPLIT_256B = 0x02, + SQ_ADDR_SURF_TILE_SPLIT_512B = 0x03, + SQ_ADDR_SURF_TILE_SPLIT_1KB = 0x04, + SQ_ADDR_SURF_TILE_SPLIT_2KB = 0x05, + SQ_ADDR_SURF_TILE_SPLIT_4KB = 0x06, + SQ_VTX_CONSTANT_WORD7_0 = 0x0003001c, + SQ_VTX_CONSTANT_WORD7_0__TYPE_mask = 0x03 << 30, + SQ_VTX_CONSTANT_WORD7_0__TYPE_shift = 30, + SQ_TEX_VTX_INVALID_TEXTURE = 0x00, + SQ_TEX_VTX_INVALID_BUFFER = 0x01, + SQ_TEX_VTX_VALID_TEXTURE = 0x02, + SQ_TEX_VTX_VALID_BUFFER = 0x03, + SQ_TEX_RESOURCE_WORD7_0 = 0x0003001c, + SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask = 0x3f << 0, + SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift = 0, + SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_mask = 0x03 << 6, + SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift = 6, + SQ_ADDR_SURF_MACRO_ASPECT_1 = 0x00, + SQ_ADDR_SURF_MACRO_ASPECT_2 = 0x01, + SQ_ADDR_SURF_MACRO_ASPECT_4 = 0x02, + SQ_ADDR_SURF_MACRO_ASPECT_8 = 0x03, + SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_mask = 0x03 << 8, + SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift = 8, + SQ_ADDR_SURF_BANK_WH_1 = 0x00, + SQ_ADDR_SURF_BANK_WH_2 = 0x01, + SQ_ADDR_SURF_BANK_WH_4 = 0x02, + SQ_ADDR_SURF_BANK_WH_8 = 0x03, + SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_mask = 0x03 << 10, + SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift = 10, +/* SQ_ADDR_SURF_BANK_WH_1 = 0x00, */ +/* SQ_ADDR_SURF_BANK_WH_2 = 0x01, */ +/* SQ_ADDR_SURF_BANK_WH_4 = 0x02, */ +/* SQ_ADDR_SURF_BANK_WH_8 = 0x03, */ + DEPTH_SAMPLE_ORDER_bit = 1 << 15, + SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_mask = 0x03 << 16, + SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift = 16, + SQ_ADDR_SURF_2_BANK = 0x00, + SQ_ADDR_SURF_4_BANK = 0x01, + SQ_ADDR_SURF_8_BANK = 0x02, + SQ_ADDR_SURF_16_BANK = 0x03, + SQ_TEX_RESOURCE_WORD7_0__TYPE_mask = 0x03 << 30, + SQ_TEX_RESOURCE_WORD7_0__TYPE_shift = 30, +/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */ +/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */ +/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */ +/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */ + SQ_LOOP_CONST_DX10_0 = 0x0003a200, + SQ_LOOP_CONST_0 = 0x0003a200, + SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0, + SQ_LOOP_CONST_0__COUNT_shift = 0, + INIT_mask = 0xfff << 12, + INIT_shift = 12, + INC_mask = 0xff << 24, + INC_shift = 24, + SQ_JUMPTABLE_CONST_0 = 0x0003a200, + CONST_A_mask = 0xff << 0, + CONST_A_shift = 0, + CONST_B_mask = 0xff << 8, + CONST_B_shift = 8, + CONST_C_mask = 0xff << 16, + CONST_C_shift = 16, + CONST_D_mask = 0xff << 24, + CONST_D_shift = 24, + SQ_BOOL_CONST_0 = 0x0003a500, + SQ_BOOL_CONST_0_num = 6, + SQ_TEX_SAMPLER_WORD0_0 = 0x0003c000, + SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x07 << 0, + SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0, + SQ_TEX_WRAP = 0x00, + SQ_TEX_MIRROR = 0x01, + SQ_TEX_CLAMP_LAST_TEXEL = 0x02, + SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, + SQ_TEX_CLAMP_HALF_BORDER = 0x04, + SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, + SQ_TEX_CLAMP_BORDER = 0x06, + SQ_TEX_MIRROR_ONCE_BORDER = 0x07, + CLAMP_Y_mask = 0x07 << 3, + CLAMP_Y_shift = 3, +/* SQ_TEX_WRAP = 0x00, */ +/* SQ_TEX_MIRROR = 0x01, */ +/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ +/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ +/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ +/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ +/* SQ_TEX_CLAMP_BORDER = 0x06, */ +/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ + CLAMP_Z_mask = 0x07 << 6, + CLAMP_Z_shift = 6, +/* SQ_TEX_WRAP = 0x00, */ +/* SQ_TEX_MIRROR = 0x01, */ +/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ +/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ +/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ +/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ +/* SQ_TEX_CLAMP_BORDER = 0x06, */ +/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ + XY_MAG_FILTER_mask = 0x03 << 9, + XY_MAG_FILTER_shift = 9, + SQ_TEX_XY_FILTER_POINT = 0x00, + SQ_TEX_XY_FILTER_BILINEAR = 0x01, + XY_MIN_FILTER_mask = 0x03 << 11, + XY_MIN_FILTER_shift = 11, +/* SQ_TEX_XY_FILTER_POINT = 0x00, */ +/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */ + Z_FILTER_mask = 0x03 << 13, + Z_FILTER_shift = 13, + SQ_TEX_Z_FILTER_NONE = 0x00, + SQ_TEX_Z_FILTER_POINT = 0x01, + SQ_TEX_Z_FILTER_LINEAR = 0x02, + MIP_FILTER_mask = 0x03 << 15, + MIP_FILTER_shift = 15, +/* SQ_TEX_Z_FILTER_NONE = 0x00, */ +/* SQ_TEX_Z_FILTER_POINT = 0x01, */ +/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */ + BORDER_COLOR_TYPE_mask = 0x03 << 20, + BORDER_COLOR_TYPE_shift = 20, + SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00, + SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01, + SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02, + SQ_TEX_BORDER_COLOR_REGISTER = 0x03, + DEPTH_COMPARE_FUNCTION_mask = 0x07 << 22, + DEPTH_COMPARE_FUNCTION_shift = 22, + SQ_TEX_DEPTH_COMPARE_NEVER = 0x00, + SQ_TEX_DEPTH_COMPARE_LESS = 0x01, + SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02, + SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03, + SQ_TEX_DEPTH_COMPARE_GREATER = 0x04, + SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05, + SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06, + SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07, + CHROMA_KEY_mask = 0x03 << 25, + CHROMA_KEY_shift = 25, + SQ_TEX_CHROMA_KEY_DISABLED = 0x00, + SQ_TEX_CHROMA_KEY_KILL = 0x01, + SQ_TEX_CHROMA_KEY_BLEND = 0x02, + SQ_TEX_SAMPLER_WORD1_0 = 0x0003c004, + SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_mask = 0xfff << 0, + SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift = 0, + MAX_LOD_mask = 0xfff << 12, + MAX_LOD_shift = 12, + PERF_MIP_mask = 0x0f << 24, + PERF_MIP_shift = 24, + PERF_Z_mask = 0x0f << 28, + PERF_Z_shift = 28, + SQ_TEX_SAMPLER_WORD2_0 = 0x0003c008, + SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_mask = 0x3fff << 0, + SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift = 0, + LOD_BIAS_SEC_mask = 0x3f << 14, + LOD_BIAS_SEC_shift = 14, + MC_COORD_TRUNCATE_bit = 1 << 20, + SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 21, + TRUNCATE_COORD_bit = 1 << 28, + SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit = 1 << 29, + SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31, + SQ_VTX_BASE_VTX_LOC = 0x0003cff0, + SQ_VTX_START_INST_LOC = 0x0003cff4, + SQ_TEX_SAMPLER_CLEAR = 0x0003ff00, + SQ_TEX_RESOURCE_CLEAR = 0x0003ff04, + SQ_LOOP_BOOL_CLEAR = 0x0003ff08, + +} ; + +#endif /* _EVERGREEN_REG_AUTO_H */ + diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c new file mode 100644 index 00000000..64e96d89 --- /dev/null +++ b/src/evergreen_shader.c @@ -0,0 +1,2790 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Alex Deucher <alexander.deucher@amd.com> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" + +#include "evergreen_shader.h" +#include "evergreen_reg.h" + +/* solid vs --------------------------------------- */ +int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_DWORD0(ADDR(4), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_VC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 1 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 2 - always export a param whether it's used or not */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(0)); + /* 3 - padding */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + /* 4/5 */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + + return i; +} + +/* solid ps --------------------------------------- */ +int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_ALU_DWORD0(ADDR(2), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 1 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + + /* 2 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + /* 3 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* 4 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + /* 5 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + + return i; +} + +/* copy vs --------------------------------------- */ +int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_DWORD0(ADDR(4), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_VC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 1 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 2 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(0)); + /* 3 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + /* 4/5 */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(16)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + /* 6/7 */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(8), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + + return i; +} + +/* copy ps --------------------------------------- */ +int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* CF INST 0 */ + shader[i++] = CF_ALU_DWORD0(ADDR(3), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* CF INST 1 */ + shader[i++] = CF_DWORD0(ADDR(8), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_TC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* CF INST 2 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + + /* 3 interpolate tex coords */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 4 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* 5 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 6 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 7 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + + /* 8/9 TEX INST 0 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), /* R */ + DST_SEL_Y(SQ_SEL_Y), /* G */ + DST_SEL_Z(SQ_SEL_Z), /* B */ + DST_SEL_W(SQ_SEL_W), /* A */ + LOD_BIAS(0), + COORD_TYPE_X(TEX_UNNORMALIZED), + COORD_TYPE_Y(TEX_UNNORMALIZED), + COORD_TYPE_Z(TEX_UNNORMALIZED), + COORD_TYPE_W(TEX_UNNORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + return i; +} + +int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_DWORD0(ADDR(6), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_VC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 1 - ALU */ + shader[i++] = CF_ALU_DWORD0(ADDR(4), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(2), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 2 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(3)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 3 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(3)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(0)); + + + /* 4 texX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 5 texY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 6/7 */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(16)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + /* 8/9 */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(8), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + + return i; +} + +int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_ALU_DWORD0(ADDR(5), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 1 */ + shader[i++] = CF_DWORD0(ADDR(21), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 2 */ + shader[i++] = CF_DWORD0(ADDR(30), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 3 */ + shader[i++] = CF_ALU_DWORD0(ADDR(9), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(12), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 4 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(2), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(3)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 5 interpolate tex coords */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 6 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* 7 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 8 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 9,10,11,12 */ + /* r2.x = MAD(c0.w, r1.x, c0.x) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* r2.y = MAD(c0.w, r1.x, c0.y) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* r2.z = MAD(c0.w, r1.x, c0.z) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* r2.w = MAD(0, 0, 1) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(SQ_ALU_SRC_0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 13,14,15,16 */ + /* r2.x = MAD(c1.x, r1.y, pv.x) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* r2.y = MAD(c1.y, r1.y, pv.y) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* r2.z = MAD(c1.z, r1.y, pv.z) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* r2.w = MAD(0, 0, 1) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(SQ_ALU_SRC_0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + /* 17,18,19,20 */ + /* r2.x = MAD(c2.x, r1.z, pv.x) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + /* r2.y = MAD(c2.y, r1.z, pv.y) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* r2.z = MAD(c2.z, r1.z, pv.z) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + /* r2.w = MAD(0, 0, 1) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(SQ_ALU_SRC_0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + + /* 21 */ + shader[i++] = CF_DWORD0(ADDR(24), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(3), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_TC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 22 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 23 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + /* 24/25 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_MASK), + DST_SEL_Z(SQ_SEL_MASK), + DST_SEL_W(SQ_SEL_1), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + /* 26/27 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(1), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_MASK), + DST_SEL_Y(SQ_SEL_MASK), + DST_SEL_Z(SQ_SEL_X), + DST_SEL_W(SQ_SEL_MASK), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(1), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + /* 28/29 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(2), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_MASK), + DST_SEL_Y(SQ_SEL_X), + DST_SEL_Z(SQ_SEL_MASK), + DST_SEL_W(SQ_SEL_MASK), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(2), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + /* 30 */ + shader[i++] = CF_DWORD0(ADDR(32), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_TC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 31 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 32/33 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_MASK), + DST_SEL_Z(SQ_SEL_MASK), + DST_SEL_W(SQ_SEL_1), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + /* 34/35 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(1), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_MASK), + DST_SEL_Y(SQ_SEL_X), + DST_SEL_Z(SQ_SEL_Y), + DST_SEL_W(SQ_SEL_MASK), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(1), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + return i; +} + +/* comp vs --------------------------------------- */ +int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_DWORD0(ADDR(3), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 1 */ + shader[i++] = CF_DWORD0(ADDR(9), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 2 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_NOP), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 3 - mask sub */ + shader[i++] = CF_DWORD0(ADDR(32), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(3), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_VC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 4 - ALU */ + shader[i++] = CF_ALU_DWORD0(ADDR(14), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(12), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 5 - dst */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(2), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 6 - src */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT), + MARK(0), + BARRIER(0)); + /* 7 - mask */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 8 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 9 - non-mask sub */ + shader[i++] = CF_DWORD0(ADDR(38), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_VC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 10 - ALU */ + shader[i++] = CF_ALU_DWORD0(ADDR(26), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(6), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 11 - dst */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), + BURST_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 12 - src */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1), + BURST_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(0)); + /* 13 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* mask alu - 14 srcX MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 15 srcY MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 16 srcX MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 17 srcY MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 18 maskX MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 19 maskY MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 20 srcX MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 21 srcY MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 22 srcX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 23 srcY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 24 maskX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 25 maskY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* no mask alu - 26 srcX MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 27 srcY MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 28 srcX MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 29 srcY MAD */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* 30 srcX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 31 srcY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* mask vfetch - 32/33 - dst */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(24)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + /* 34/35 - src */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_1), + DST_SEL_W(SQ_SEL_0), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(8), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + /* 36/37 - mask */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_1), + DST_SEL_W(SQ_SEL_0), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(16), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + + /* no mask vfetch - 38/39 - dst */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(16)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + /* 40/41 - src */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_1), + DST_SEL_W(SQ_SEL_0), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(8), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0), + ALT_CONST(0), + BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = VTX_DWORD_PAD; + + return i; +} + +/* comp ps --------------------------------------- */ +int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_DWORD0(ADDR(3), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 1 */ + shader[i++] = CF_DWORD0(ADDR(8), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 2 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_NOP), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 3 - mask sub */ + shader[i++] = CF_ALU_DWORD0(ADDR(12), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(8), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 4 */ + shader[i++] = CF_DWORD0(ADDR(28), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_TC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 5 */ + shader[i++] = CF_ALU_DWORD0(ADDR(20), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 6 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(2), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + /* 7 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 8 - non-mask sub */ + shader[i++] = CF_ALU_DWORD0(ADDR(24), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 9 */ + shader[i++] = CF_DWORD0(ADDR(32), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_TC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 10 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + + /* 11 */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 12 interpolate src tex coords - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 13 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* 14 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 15 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 16 interpolate mask tex coords */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 17 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* 18 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 19 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 20 - alu 0 */ + /* MUL gpr[2].x gpr[0].x gpr[1].x */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + /* 21 - alu 1 */ + /* MUL gpr[2].y gpr[0].y gpr[1].y */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* 22 - alu 2 */ + /* MUL gpr[2].z gpr[0].z gpr[1].z */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + /* 23 - alu 3 */ + /* MUL gpr[2].w gpr[0].w gpr[1].w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + + /* 24 - interpolate tex coords - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + /* 25 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + /* 26 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + /* 27 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_INTERP_XY), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 28/29 - src - mask */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(1), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + /* 30/31 - mask */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(1), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(1), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + /* 32/33 - src - non-mask */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + return i; +} diff --git a/src/evergreen_shader.h b/src/evergreen_shader.h new file mode 100644 index 00000000..41066191 --- /dev/null +++ b/src/evergreen_shader.h @@ -0,0 +1,292 @@ +/* + * Evergreen shaders + * + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Shader macros + */ + +#ifndef __SHADER_H__ +#define __SHADER_H__ + +#include "radeon.h" + +/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */ + + +// CF insts +// addr +#define ADDR(x) (x) +// jumptable +#define JUMPTABLE_SEL(x) (x) +// pc +#define POP_COUNT(x) (x) +// const +#define CF_CONST(x) (x) +// cond +#define COND(x) (x) // SQ_COND_* +// count +#define I_COUNT(x) ((x) ? ((x) - 1) : 0) +// vpm +#define VALID_PIXEL_MODE(x) (x) +// eop +#define END_OF_PROGRAM(x) (x) +// cf inst +#define CF_INST(x) (x) // SQ_CF_INST_* +// wqm +#define WHOLE_QUAD_MODE(x) (x) +// barrier +#define BARRIER(x) (x) +//kb0 +#define KCACHE_BANK0(x) (x) +//kb1 +#define KCACHE_BANK1(x) (x) +// km0/1 +#define KCACHE_MODE0(x) (x) +#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_* +// +#define KCACHE_ADDR0(x) (x) +#define KCACHE_ADDR1(x) (x) + +#define ALT_CONST(x) (x) + +#define ARRAY_BASE(x) (x) +// export pixel +#define CF_PIXEL_MRT0 0 +#define CF_PIXEL_MRT1 1 +#define CF_PIXEL_MRT2 2 +#define CF_PIXEL_MRT3 3 +#define CF_PIXEL_MRT4 4 +#define CF_PIXEL_MRT5 5 +#define CF_PIXEL_MRT6 6 +#define CF_PIXEL_MRT7 7 +// computed Z +#define CF_COMPUTED_Z 61 +// export pos +#define CF_POS0 60 +#define CF_POS1 61 +#define CF_POS2 62 +#define CF_POS3 63 +// export param +// 0...31 +#define TYPE(x) (x) // SQ_EXPORT_* +#define RW_GPR(x) (x) +#define RW_REL(x) (x) +#define ABSOLUTE 0 +#define RELATIVE 1 +#define INDEX_GPR(x) (x) +#define ELEM_SIZE(x) (x ? (x - 1) : 0) +#define BURST_COUNT(x) (x ? (x - 1) : 0) +#define MARK(x) (x) + +// swiz +#define SRC_SEL_X(x) (x) // SQ_SEL_* each +#define SRC_SEL_Y(x) (x) +#define SRC_SEL_Z(x) (x) +#define SRC_SEL_W(x) (x) + +#define CF_DWORD0(addr, jmptbl) ((addr) | ((jmptbl) << 24)) +#define CF_DWORD1(pc, cf_const, cond, count, vpm, eop, cf_inst, wqm, b) \ + (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \ + ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | ((wqm) << 30) | ((b) << 31)) + +#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)) +#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, alt_const, cf_inst, wqm, b) \ + (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \ + ((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)) + +#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \ + (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \ + ((index_gpr) << 23) | ((es) << 30)) +#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, bc, vpm, eop, cf_inst, m, b) \ + (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \ + ((bc) << 16) | ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | \ + ((m) << 30) | ((b) << 31)) + +// ALU clause insts +#define SRC0_SEL(x) (x) +#define SRC1_SEL(x) (x) +#define SRC2_SEL(x) (x) +// src[0-2]_sel +// 0-127 GPR +// 128-159 kcache constants bank 0 +// 160-191 kcache constants bank 1 +// 192-255 inline const values +// 256-287 kcache constants bank 2 +// 288-319 kcache constants bank 3 +// 219-255 special SQ_ALU_SRC_* (0, 1, etc.) +// 488-520 src param space +#define ALU_SRC_GPR_BASE 0 +#define ALU_SRC_KCACHE0_BASE 128 +#define ALU_SRC_KCACHE1_BASE 160 +#define ALU_SRC_INLINE_K_BASE 192 +#define ALU_SRC_KCACHE2_BASE 256 +#define ALU_SRC_KCACHE3_BASE 288 +#define ALU_SRC_PARAM_BASE 448 + +#define SRC0_REL(x) (x) +#define SRC1_REL(x) (x) +#define SRC2_REL(x) (x) +// elem +#define SRC0_ELEM(x) (x) +#define SRC1_ELEM(x) (x) +#define SRC2_ELEM(x) (x) +#define ELEM_X 0 +#define ELEM_Y 1 +#define ELEM_Z 2 +#define ELEM_W 3 +// neg +#define SRC0_NEG(x) (x) +#define SRC1_NEG(x) (x) +#define SRC2_NEG(x) (x) +// im +#define INDEX_MODE(x) (x) // SQ_INDEX_* +// ps +#define PRED_SEL(x) (x) // SQ_PRED_SEL_* +// last +#define LAST(x) (x) +// abs +#define SRC0_ABS(x) (x) +#define SRC1_ABS(x) (x) +// uem +#define UPDATE_EXECUTE_MASK(x) (x) +// up +#define UPDATE_PRED(x) (x) +// wm +#define WRITE_MASK(x) (x) +// omod +#define OMOD(x) (x) // SQ_ALU_OMOD_* +// alu inst +#define ALU_INST(x) (x) // SQ_ALU_INST_* +//bs +#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_* +#define DST_GPR(x) (x) +#define DST_REL(x) (x) +#define DST_ELEM(x) (x) +#define CLAMP(x) (x) + +#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \ + (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \ + ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \ + ((im) << 26) | ((ps) << 29) | ((last) << 31)) + +#define ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ + (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ + ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \ + ((dr) << 28) | ((de) << 29) | ((clamp) << 31)) + +#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \ + (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \ + ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \ + ((de) << 29) | ((clamp) << 31)) + +// VTX clause insts +// vxt insts +#define VTX_INST(x) (x) // SQ_VTX_INST_* + +// fetch type +#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_* + +#define FETCH_WHOLE_QUAD(x) (x) +#define BUFFER_ID(x) (x) +#define SRC_GPR(x) (x) +#define SRC_REL(x) (x) +#define MEGA_FETCH_COUNT(x) ((x) ? ((x) - 1) : 0) + +#define DST_SEL_X(x) (x) +#define DST_SEL_Y(x) (x) +#define DST_SEL_Z(x) (x) +#define DST_SEL_W(x) (x) +#define USE_CONST_FIELDS(x) (x) +#define DATA_FORMAT(x) (x) +// num format +#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_* +// format comp +#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_* +// sma +#define SRF_MODE_ALL(x) (x) +#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0 +#define SRF_MODE_NO_ZERO 1 +#define OFFSET(x) (x) +// endian swap +#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_* +#define CONST_BUF_NO_STRIDE(x) (x) +// mf +#define MEGA_FETCH(x) (x) +#define BUFFER_INDEX_MODE(x) (x) + +#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \ + (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \ + ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26)) +#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ + (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)) +#define VTX_DWORD2(offset, es, cbns, mf, alt_const, bim) \ + (((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19) | ((alt_const) << 20) | ((bim) << 21)) +#define VTX_DWORD_PAD 0x00000000 + +// TEX clause insts +// tex insts +#define TEX_INST(x) (x) // SQ_TEX_INST_* +#define INST_MOD(x) (x) +#define FETCH_WHOLE_QUAD(x) (x) +#define RESOURCE_ID(x) (x) +#define RESOURCE_INDEX_MODE(x) (x) +#define SAMPLER_INDEX_MODE(x) (x) + +#define LOD_BIAS(x) (x) +//ct +#define COORD_TYPE_X(x) (x) +#define COORD_TYPE_Y(x) (x) +#define COORD_TYPE_Z(x) (x) +#define COORD_TYPE_W(x) (x) +#define TEX_UNNORMALIZED 0 +#define TEX_NORMALIZED 1 +#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */ +#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f) +#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f) +#define SAMPLER_ID(x) (x) + +#define TEX_DWORD0(tex_inst, im, fwq, resource_id, src_gpr, sr, ac, rim, sim) \ + (((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \ + ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27)) +#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \ + (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)) +#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \ + (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \ + ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)) +#define TEX_DWORD_PAD 0x00000000 + +extern int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs); +extern int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps); + +extern int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs); +extern int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps); + +extern int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader); +extern int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader); + +extern int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs); +extern int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps); + +#endif diff --git a/src/evergreen_state.h b/src/evergreen_state.h new file mode 100644 index 00000000..5869256e --- /dev/null +++ b/src/evergreen_state.h @@ -0,0 +1,338 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Alex Deucher <alexander.deucher@amd.com> + * + */ + +#ifndef __EVERGREEN_STATE_H__ +#define __EVERGREEN_STATE_H__ + +typedef int bool_t; + +#define CLEAR(x) memset (&x, 0, sizeof(x)) + +/* Sequencer / thread handling */ +typedef struct { + int ps_prio; + int vs_prio; + int gs_prio; + int es_prio; + int hs_prio; + int ls_prio; + int cs_prio; + int num_ps_gprs; + int num_vs_gprs; + int num_gs_gprs; + int num_es_gprs; + int num_hs_gprs; + int num_ls_gprs; + int num_cs_gprs; + int num_temp_gprs; + int num_ps_threads; + int num_vs_threads; + int num_gs_threads; + int num_es_threads; + int num_hs_threads; + int num_ls_threads; + int num_ps_stack_entries; + int num_vs_stack_entries; + int num_gs_stack_entries; + int num_es_stack_entries; + int num_hs_stack_entries; + int num_ls_stack_entries; +} sq_config_t; + +/* Color buffer / render target */ +typedef struct { + int id; + int w; + int h; + uint64_t base; + int format; + int endian; + int array_mode; // tiling + int number_type; + int read_size; + int comp_swap; + int tile_mode; + int blend_clamp; + int clear_color; + int blend_bypass; + int simple_float; + int round_mode; + int tile_compact; + int source_format; + int resource_type; + int fast_clear; + int compression; + int rat; + struct radeon_bo *bo; +} cb_config_t; + +/* Shader */ +typedef struct { + uint64_t shader_addr; + uint32_t shader_size; + int num_gprs; + int stack_size; + int dx10_clamp; + int clamp_consts; + int export_mode; + int uncached_first_inst; + int single_round; + int double_round; + int allow_sdi; + int allow_sd0; + int allow_ddi; + int allow_ddo; + struct radeon_bo *bo; +} shader_config_t; + +/* Shader consts */ +typedef struct { + int type; + int size_bytes; + uint64_t const_addr; + struct radeon_bo *bo; +} const_config_t; + +/* Vertex buffer / vtx resource */ +typedef struct { + int id; + uint64_t vb_addr; + uint32_t vtx_num_entries; + uint32_t vtx_size_dw; + int clamp_x; + int format; + int num_format_all; + int format_comp_all; + int srf_mode_all; + int endian; + int mem_req_size; + int dst_sel_x; + int dst_sel_y; + int dst_sel_z; + int dst_sel_w; + int uncached; + struct radeon_bo *bo; +} vtx_resource_t; + +/* Texture resource */ +typedef struct { + int id; + int w; + int h; + int pitch; + int depth; + int dim; + int array_mode; + int tile_type; + int format; + uint64_t base; + uint64_t mip_base; + uint32_t size; + int format_comp_x; + int format_comp_y; + int format_comp_z; + int format_comp_w; + int num_format_all; + int srf_mode_all; + int force_degamma; + int endian; + int dst_sel_x; + int dst_sel_y; + int dst_sel_z; + int dst_sel_w; + int base_level; + int last_level; + int base_array; + int last_array; + int perf_modulation; + int interlaced; + int min_lod; + struct radeon_bo *bo; + struct radeon_bo *mip_bo; +} tex_resource_t; + +/* Texture sampler */ +typedef struct { + int id; + /* Clamping */ + int clamp_x, clamp_y, clamp_z; + int border_color; + /* Filtering */ + int xy_mag_filter, xy_min_filter; + int z_filter; + int mip_filter; + bool_t high_precision_filter; /* ? */ + int perf_mip; /* ? 0-7 */ + int perf_z; /* ? 3 */ + /* LoD selection */ + int min_lod, max_lod; /* 0-0x3ff */ + int lod_bias; /* 0-0xfff (signed?) */ + int lod_bias2; /* ? 0-0xfff (signed?) */ + bool_t lod_uses_minor_axis; /* ? */ + /* Other stuff */ + bool_t point_sampling_clamp; /* ? */ + bool_t tex_array_override; /* ? */ + bool_t mc_coord_truncate; /* ? */ + bool_t force_degamma; /* ? */ + bool_t fetch_4; /* ? */ + bool_t sample_is_pcf; /* ? */ + bool_t type; /* ? */ + int depth_compare; /* only depth textures? */ + int chroma_key; + int truncate_coord; + bool_t disable_cube_wrap; +} tex_sampler_t; + +/* Draw command */ +typedef struct { + uint32_t prim_type; + uint32_t vgt_draw_initiator; + uint32_t index_type; + uint32_t num_instances; + uint32_t num_indices; +} draw_config_t; + +#define BEGIN_BATCH(n) \ +do { \ + radeon_ddx_cs_start(pScrn, (n), __FILE__, __func__, __LINE__); \ +} while(0) +#define END_BATCH() \ +do { \ + radeon_cs_end(info->cs, __FILE__, __func__, __LINE__); \ +} while(0) +#define RELOC_BATCH(bo, rd, wd) \ +do { \ + int _ret; \ + _ret = radeon_cs_write_reloc(info->cs, (bo), (rd), (wd), 0); \ + if (_ret) ErrorF("reloc emit failure %d (%s %d)\n", _ret, __func__, __LINE__); \ +} while(0) +#define E32(dword) \ +do { \ + radeon_cs_write_dword(info->cs, (dword)); \ +} while (0) + +#define EFLOAT(val) \ +do { \ + union { float f; uint32_t d; } a; \ + a.f = (val); \ + E32(a.d); \ +} while (0) + +#define PACK3(cmd, num) \ +do { \ + E32(RADEON_CP_PACKET3 | ((cmd) << 8) | ((((num) - 1) & 0x3fff) << 16)); \ +} while (0) + +/* write num registers, start at reg */ +/* If register falls in a special area, special commands are issued */ +#define PACK0(reg, num) \ +do { \ + if ((reg) >= SET_CONFIG_REG_offset && (reg) < SET_CONFIG_REG_end) { \ + PACK3(IT_SET_CONFIG_REG, (num) + 1); \ + E32(((reg) - SET_CONFIG_REG_offset) >> 2); \ + } else if ((reg) >= SET_CONTEXT_REG_offset && (reg) < SET_CONTEXT_REG_end) { \ + PACK3(IT_SET_CONTEXT_REG, (num) + 1); \ + E32(((reg) - SET_CONTEXT_REG_offset) >> 2); \ + } else if ((reg) >= SET_RESOURCE_offset && (reg) < SET_RESOURCE_end) { \ + PACK3(IT_SET_RESOURCE, num + 1); \ + E32(((reg) - SET_RESOURCE_offset) >> 2); \ + } else if ((reg) >= SET_SAMPLER_offset && (reg) < SET_SAMPLER_end) { \ + PACK3(IT_SET_SAMPLER, (num) + 1); \ + E32((reg - SET_SAMPLER_offset) >> 2); \ + } else if ((reg) >= SET_CTL_CONST_offset && (reg) < SET_CTL_CONST_end) { \ + PACK3(IT_SET_CTL_CONST, (num) + 1); \ + E32(((reg) - SET_CTL_CONST_offset) >> 2); \ + } else if ((reg) >= SET_LOOP_CONST_offset && (reg) < SET_LOOP_CONST_end) { \ + PACK3(IT_SET_LOOP_CONST, (num) + 1); \ + E32(((reg) - SET_LOOP_CONST_offset) >> 2); \ + } else if ((reg) >= SET_BOOL_CONST_offset && (reg) < SET_BOOL_CONST_end) { \ + PACK3(IT_SET_BOOL_CONST, (num) + 1); \ + E32(((reg) - SET_BOOL_CONST_offset) >> 2); \ + } else { \ + E32(CP_PACKET0 ((reg), (num) - 1)); \ + } \ +} while (0) + +/* write a single register */ +#define EREG(reg, val) \ +do { \ + PACK0((reg), 1); \ + E32((val)); \ +} while (0) + +void +evergreen_start_3d(ScrnInfoPtr pScrn); +void +evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain); +void +evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop); +void +evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain); +void +evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain); +void +evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain); +void +evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain); +void +evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val); +void +evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain); +void +evergreen_set_tex_sampler(ScrnInfoPtr pScrn, tex_sampler_t *s); +void +evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2); +void +evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2); +void +evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2); +void +evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2); +void +evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2); +void +evergreen_set_default_state(ScrnInfoPtr pScrn); +void +evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf); + +void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size); + +Bool +EVERGREENSetAccelState(ScrnInfoPtr pScrn, + struct r600_accel_object *src0, + struct r600_accel_object *src1, + struct r600_accel_object *dst, + uint32_t vs_offset, uint32_t ps_offset, + int rop, Pixel planemask); + +extern Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index); +extern void RADEONFinishAccess_CS(PixmapPtr pPix, int index); +extern void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align); +extern void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv); +extern struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix); +extern Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix); + +#endif diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c new file mode 100644 index 00000000..a6746806 --- /dev/null +++ b/src/evergreen_textured_videofuncs.c @@ -0,0 +1,556 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Alex Deucher <alexander.deucher@amd.com> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" + +#include "exa.h" + +#include "radeon.h" +#include "radeon_reg.h" +#include "evergreen_shader.h" +#include "evergreen_reg.h" +#include "evergreen_state.h" + +#include "radeon_video.h" + +#include <X11/extensions/Xv.h> +#include "fourcc.h" + +#include "damage.h" + +#include "radeon_exa_shared.h" +#include "radeon_vbo.h" + +/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces + note the difference to the parameters used in overlay are due + to 10bit vs. float calcs */ +static REF_TRANSFORM trans[2] = +{ + {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */ + {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ +}; + +void +EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + PixmapPtr pPixmap = pPriv->pPixmap; + BoxPtr pBox = REGION_RECTS(&pPriv->clip); + int nBox = REGION_NUM_RECTS(&pPriv->clip); + int dstxoff, dstyoff; + struct r600_accel_object src_obj, dst_obj; + cb_config_t cb_conf; + tex_resource_t tex_res; + tex_sampler_t tex_samp; + shader_config_t vs_conf, ps_conf; + /* + * y' = y - .0625 + * u' = u - .5 + * v' = v - .5; + * + * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' + * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' + * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' + * + * DP3 might look like the straightforward solution + * but we'd need to move the texture yuv values in + * the same reg for this to work. Therefore use MADs. + * Brightness just adds to the off constant. + * Contrast is multiplication of luminance. + * Saturation and hue change the u and v coeffs. + * Default values (before adjustments - depend on colorspace): + * yco = 1.1643 + * uco = 0, -0.39173, 2.017 + * vco = 1.5958, -0.8129, 0 + * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], + * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], + * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], + * + * temp = MAD(yco, yuv.yyyy, off) + * temp = MAD(uco, yuv.uuuu, temp) + * result = MAD(vco, yuv.vvvv, temp) + */ + /* TODO: calc consts in the shader */ + const float Loff = -0.0627; + const float Coff = -0.502; + float uvcosf, uvsinf; + float yco; + float uco[3], vco[3], off[3]; + float bright, cont, gamma; + int ref = pPriv->transform_index; + Bool needgamma = FALSE; + float *ps_alu_consts; + const_config_t ps_const_conf; + float *vs_alu_consts; + const_config_t vs_const_conf; + + cont = RTFContrast(pPriv->contrast); + bright = RTFBrightness(pPriv->brightness); + gamma = (float)pPriv->gamma / 1000.0; + uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); + uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); + /* overlay video also does pre-gamma contrast/sat adjust, should we? */ + + yco = trans[ref].RefLuma * cont; + uco[0] = -trans[ref].RefRCr * uvsinf; + uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; + uco[2] = trans[ref].RefBCb * uvcosf; + vco[0] = trans[ref].RefRCr * uvcosf; + vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; + vco[2] = trans[ref].RefBCb * uvsinf; + off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; + off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; + off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; + + // XXX + gamma = 1.0; + + if (gamma != 1.0) { + needgamma = TRUE; + /* note: gamma correction is out = in ^ gamma; + gpu can only do LG2/EX2 therefore we transform into + in ^ gamma = 2 ^ (log2(in) * gamma). + Lots of scalar ops, unfortunately (better solution?) - + without gamma that's 3 inst, with gamma it's 10... + could use different gamma factors per channel, + if that's of any use. */ + } + + CLEAR (cb_conf); + CLEAR (tex_res); + CLEAR (tex_samp); + CLEAR (vs_conf); + CLEAR (ps_conf); + CLEAR (vs_const_conf); + CLEAR (ps_const_conf); + +#if defined(XF86DRM_MODE) + if (info->cs) { + dst_obj.offset = 0; + src_obj.offset = 0; + dst_obj.bo = radeon_get_pixmap_bo(pPixmap); + } else +#endif + { + dst_obj.offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; + src_obj.offset = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; + dst_obj.bo = src_obj.bo = NULL; + } + dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); + + src_obj.pitch = pPriv->src_pitch; + src_obj.width = pPriv->w; + src_obj.height = pPriv->h; + src_obj.bpp = 16; + src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; + src_obj.bo = pPriv->src_bo[pPriv->currentBuffer]; + + dst_obj.width = pPixmap->drawable.width; + dst_obj.height = pPixmap->drawable.height; + dst_obj.bpp = pPixmap->drawable.bitsPerPixel; + dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; + + if (!EVERGREENSetAccelState(pScrn, + &src_obj, + NULL, + &dst_obj, + accel_state->xv_vs_offset, accel_state->xv_ps_offset, + 3, 0xffffffff)) + return; + +#ifdef COMPOSITE + dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; + dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; +#else + dstxoff = 0; + dstyoff = 0; +#endif + + radeon_vbo_check(pScrn, &accel_state->vbo, 16); + radeon_vbo_check(pScrn, &accel_state->cbuf, 512); + radeon_cp_start(pScrn); + + evergreen_set_default_state(pScrn); + + evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + + /* PS bool constant */ + switch(pPriv->id) { + case FOURCC_YV12: + case FOURCC_I420: + evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0)); + break; + case FOURCC_UYVY: + case FOURCC_YUY2: + default: + evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0)); + break; + } + + /* Shader */ + vs_conf.shader_addr = accel_state->vs_mc_addr; + vs_conf.shader_size = accel_state->vs_size; + vs_conf.num_gprs = 2; + vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; + evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); + + ps_conf.shader_addr = accel_state->ps_mc_addr; + ps_conf.shader_size = accel_state->ps_size; + ps_conf.num_gprs = 3; + ps_conf.stack_size = 1; + ps_conf.clamp_consts = 0; + ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; + evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); + + /* Texture */ + switch(pPriv->id) { + case FOURCC_YV12: + case FOURCC_I420: + accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; + + /* Y texture */ + tex_res.id = 0; + tex_res.w = accel_state->src_obj[0].width; + tex_res.h = accel_state->src_obj[0].height; + tex_res.pitch = accel_state->src_obj[0].pitch; + tex_res.depth = 0; + tex_res.dim = SQ_TEX_DIM_2D; + tex_res.base = accel_state->src_obj[0].offset; + tex_res.mip_base = accel_state->src_obj[0].offset; + tex_res.size = accel_state->src_size[0]; + tex_res.bo = accel_state->src_obj[0].bo; + tex_res.mip_bo = accel_state->src_obj[0].bo; + + tex_res.format = FMT_8; + tex_res.dst_sel_x = SQ_SEL_X; /* Y */ + tex_res.dst_sel_y = SQ_SEL_1; + tex_res.dst_sel_z = SQ_SEL_1; + tex_res.dst_sel_w = SQ_SEL_1; + + tex_res.base_level = 0; + tex_res.last_level = 0; + tex_res.perf_modulation = 0; + tex_res.interlaced = 0; + evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); + + /* Y sampler */ + tex_samp.id = 0; + tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_z = SQ_TEX_WRAP; + + /* xxx: switch to bicubic */ + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; + + tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; + tex_samp.mip_filter = 0; /* no mipmap */ + evergreen_set_tex_sampler(pScrn, &tex_samp); + + /* U or V texture */ + tex_res.id = 1; + tex_res.format = FMT_8; + tex_res.w = accel_state->src_obj[0].width >> 1; + tex_res.h = accel_state->src_obj[0].height >> 1; + tex_res.pitch = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, 256); + tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ + tex_res.dst_sel_y = SQ_SEL_1; + tex_res.dst_sel_z = SQ_SEL_1; + tex_res.dst_sel_w = SQ_SEL_1; + tex_res.interlaced = 0; + + tex_res.base = accel_state->src_obj[0].offset + pPriv->planev_offset; + tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planev_offset; + tex_res.size = accel_state->src_size[0] / 4; + evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); + + /* U or V sampler */ + tex_samp.id = 1; + evergreen_set_tex_sampler(pScrn, &tex_samp); + + /* U or V texture */ + tex_res.id = 2; + tex_res.format = FMT_8; + tex_res.w = accel_state->src_obj[0].width >> 1; + tex_res.h = accel_state->src_obj[0].height >> 1; + tex_res.pitch = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, 256); + tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ + tex_res.dst_sel_y = SQ_SEL_1; + tex_res.dst_sel_z = SQ_SEL_1; + tex_res.dst_sel_w = SQ_SEL_1; + tex_res.interlaced = 0; + + tex_res.base = accel_state->src_obj[0].offset + pPriv->planeu_offset; + tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planeu_offset; + tex_res.size = accel_state->src_size[0] / 4; + evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); + + /* UV sampler */ + tex_samp.id = 2; + evergreen_set_tex_sampler(pScrn, &tex_samp); + break; + case FOURCC_UYVY: + case FOURCC_YUY2: + default: + accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; + + /* Y texture */ + tex_res.id = 0; + tex_res.w = accel_state->src_obj[0].width; + tex_res.h = accel_state->src_obj[0].height; + tex_res.pitch = accel_state->src_obj[0].pitch >> 1; + tex_res.depth = 0; + tex_res.dim = SQ_TEX_DIM_2D; + tex_res.base = accel_state->src_obj[0].offset; + tex_res.mip_base = accel_state->src_obj[0].offset; + tex_res.size = accel_state->src_size[0]; + tex_res.bo = accel_state->src_obj[0].bo; + tex_res.mip_bo = accel_state->src_obj[0].bo; + + tex_res.format = FMT_8_8; + if (pPriv->id == FOURCC_UYVY) + tex_res.dst_sel_x = SQ_SEL_Y; /* Y */ + else + tex_res.dst_sel_x = SQ_SEL_X; /* Y */ + tex_res.dst_sel_y = SQ_SEL_1; + tex_res.dst_sel_z = SQ_SEL_1; + tex_res.dst_sel_w = SQ_SEL_1; + + tex_res.base_level = 0; + tex_res.last_level = 0; + tex_res.perf_modulation = 0; + tex_res.interlaced = 0; + evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); + + /* Y sampler */ + tex_samp.id = 0; + tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_z = SQ_TEX_WRAP; + + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; + + tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; + tex_samp.mip_filter = 0; /* no mipmap */ + evergreen_set_tex_sampler(pScrn, &tex_samp); + + /* UV texture */ + tex_res.id = 1; + tex_res.format = FMT_8_8_8_8; + tex_res.w = accel_state->src_obj[0].width >> 1; + tex_res.h = accel_state->src_obj[0].height; + tex_res.pitch = accel_state->src_obj[0].pitch >> 2; + if (pPriv->id == FOURCC_UYVY) { + tex_res.dst_sel_x = SQ_SEL_X; /* V */ + tex_res.dst_sel_y = SQ_SEL_Z; /* U */ + } else { + tex_res.dst_sel_x = SQ_SEL_Y; /* V */ + tex_res.dst_sel_y = SQ_SEL_W; /* U */ + } + tex_res.dst_sel_z = SQ_SEL_1; + tex_res.dst_sel_w = SQ_SEL_1; + tex_res.interlaced = 0; + + tex_res.base = accel_state->src_obj[0].offset; + tex_res.mip_base = accel_state->src_obj[0].offset; + tex_res.size = accel_state->src_size[0]; + evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); + + /* UV sampler */ + tex_samp.id = 1; + evergreen_set_tex_sampler(pScrn, &tex_samp); + break; + } + + cb_conf.id = 0; + cb_conf.w = accel_state->dst_obj.pitch; + cb_conf.h = accel_state->dst_obj.height; + cb_conf.base = accel_state->dst_obj.offset; + cb_conf.bo = accel_state->dst_obj.bo; + + switch (accel_state->dst_obj.bpp) { + case 16: + if (pPixmap->drawable.depth == 15) { + cb_conf.format = COLOR_1_5_5_5; + cb_conf.comp_swap = 1; /* ARGB */ + } else { + cb_conf.format = COLOR_5_6_5; + cb_conf.comp_swap = 2; /* RGB */ + } + break; + case 32: + cb_conf.format = COLOR_8_8_8_8; + cb_conf.comp_swap = 1; /* ARGB */ + break; + default: + return; + } + + cb_conf.source_format = EXPORT_4C_16BPC; + cb_conf.blend_clamp = 1; + evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); + + /* Render setup */ + BEGIN_BATCH(23); + EREG(CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift)); + EREG(CB_COLOR_CONTROL, ((0xcc << ROP3_shift) | + (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift))); + EREG(CB_BLEND0_CONTROL, 0); + + /* Interpolator setup */ + /* export tex coords from VS */ + EREG(SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); + EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); + EREG(SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | + (0x03 << DEFAULT_VAL_shift))); + + /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x + * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ + PACK0(SPI_PS_IN_CONTROL_0, 3); + E32(((1 << NUM_INTERP_shift) | + LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0 + E32(0); // SPI_PS_IN_CONTROL_1 + E32(0); // SPI_INTERP_CONTROL_0 + END_BATCH(); + + /* PS alu constants */ + ps_const_conf.size_bytes = 256; + ps_const_conf.type = SHADER_TYPE_PS; + ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256); + + ps_alu_consts[0] = off[0]; + ps_alu_consts[1] = off[1]; + ps_alu_consts[2] = off[2]; + ps_alu_consts[3] = yco; + + ps_alu_consts[4] = uco[0]; + ps_alu_consts[5] = uco[1]; + ps_alu_consts[6] = uco[2]; + ps_alu_consts[7] = gamma; + + ps_alu_consts[8] = vco[0]; + ps_alu_consts[9] = vco[1]; + ps_alu_consts[10] = vco[2]; + ps_alu_consts[11] = 0.0; + + radeon_vbo_commit(pScrn, &accel_state->cbuf); + + /* PS alu constants */ + ps_const_conf.bo = accel_state->cbuf.vb_bo; + ps_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op; + evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT); + + /* VS alu constants */ + vs_const_conf.size_bytes = 256; + vs_const_conf.type = SHADER_TYPE_VS; + vs_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256); + + vs_alu_consts[0] = 1.0 / pPriv->w; + vs_alu_consts[1] = 1.0 / pPriv->h; + vs_alu_consts[2] = 0.0; + vs_alu_consts[3] = 0.0; + + radeon_vbo_commit(pScrn, &accel_state->cbuf); + + /* VS alu constants */ + vs_const_conf.bo = accel_state->cbuf.vb_bo; + vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op + 256; + evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT); + + if (pPriv->vsync) { + xf86CrtcPtr crtc; + if (pPriv->desired_crtc) + crtc = pPriv->desired_crtc; + else + crtc = radeon_pick_best_crtc(pScrn, + pPriv->drw_x, + pPriv->drw_x + pPriv->dst_w, + pPriv->drw_y, + pPriv->drw_y + pPriv->dst_h); + if (crtc) + evergreen_cp_wait_vline_sync(pScrn, pPixmap, + crtc, + pPriv->drw_y - crtc->y, + (pPriv->drw_y - crtc->y) + pPriv->dst_h); + } + + while (nBox--) { + int srcX, srcY, srcw, srch; + int dstX, dstY, dstw, dsth; + float *vb; + + + dstX = pBox->x1 + dstxoff; + dstY = pBox->y1 + dstyoff; + dstw = pBox->x2 - pBox->x1; + dsth = pBox->y2 - pBox->y1; + + srcX = pPriv->src_x; + srcX += ((pBox->x1 - pPriv->drw_x) * + pPriv->src_w) / pPriv->dst_w; + srcY = pPriv->src_y; + srcY += ((pBox->y1 - pPriv->drw_y) * + pPriv->src_h) / pPriv->dst_h; + + srcw = (pPriv->src_w * dstw) / pPriv->dst_w; + srch = (pPriv->src_h * dsth) / pPriv->dst_h; + + vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); + + vb[0] = (float)dstX; + vb[1] = (float)dstY; + vb[2] = (float)srcX; + vb[3] = (float)srcY; + + vb[4] = (float)dstX; + vb[5] = (float)(dstY + dsth); + vb[6] = (float)srcX; + vb[7] = (float)(srcY + srch); + + vb[8] = (float)(dstX + dstw); + vb[9] = (float)(dstY + dsth); + vb[10] = (float)(srcX + srcw); + vb[11] = (float)(srcY + srch); + + radeon_vbo_commit(pScrn, &accel_state->vbo); + + pBox++; + } + + evergreen_finish_op(pScrn, 16); + + DamageDamageRegion(pPriv->pDraw, &pPriv->clip); +} diff --git a/src/r600_exa.c b/src/r600_exa.c index 85440343..a04d66ac 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -43,7 +43,7 @@ /* #define SHOW_VERTEXES */ -uint32_t RADEON_ROP[16] = { +uint32_t R600_ROP[16] = { RADEON_ROP3_ZERO, /* GXclear */ RADEON_ROP3_DSa, /* Gxand */ RADEON_ROP3_SDna, /* GXandReverse */ @@ -208,14 +208,14 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) CLEAR (vs_conf); CLEAR (ps_conf); - radeon_vbo_check(pScrn, 16); + radeon_vbo_check(pScrn, &accel_state->vbo, 16); radeon_cp_start(pScrn); - set_default_state(pScrn, accel_state->ib); + r600_set_default_state(pScrn, accel_state->ib); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); - set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); - set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); /* Shader */ vs_conf.shader_addr = accel_state->vs_mc_addr; @@ -223,7 +223,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) vs_conf.num_gprs = 2; vs_conf.stack_size = 0; vs_conf.bo = accel_state->shaders_bo; - vs_setup (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); + r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.shader_size = accel_state->ps_size; @@ -233,7 +233,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; ps_conf.bo = accel_state->shaders_bo; - ps_setup (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); + r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); cb_conf.id = 0; cb_conf.w = accel_state->dst_obj.pitch; @@ -253,7 +253,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) } cb_conf.source_format = 1; cb_conf.blend_clamp = 1; - set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); + r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); /* Render setup */ if (accel_state->planemask & 0x000000ff) @@ -266,7 +266,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) pmask |= 8; /* A */ BEGIN_BATCH(20); EREG(accel_state->ib, CB_TARGET_MASK, (pmask << TARGET0_ENABLE_shift)); - EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[accel_state->rop]); + EREG(accel_state->ib, CB_COLOR_CONTROL, R600_ROP[accel_state->rop]); /* Interpolator setup */ /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */ @@ -312,8 +312,8 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) ps_alu_consts[2] = (float)b / 255; /* B */ ps_alu_consts[3] = (float)a / 255; /* A */ } - set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, - sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); + r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, + sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); if (accel_state->vsync) RADEONVlineHelperClear(pScrn); @@ -333,7 +333,7 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) if (accel_state->vsync) RADEONVlineHelperSet(pScrn, x1, y1, x2, y2); - vb = radeon_vbo_space(pScrn, 8); + vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8); vb[0] = (float)x1; vb[1] = (float)y1; @@ -344,7 +344,7 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) vb[4] = (float)x2; vb[5] = (float)y2; - radeon_vbo_commit(pScrn); + radeon_vbo_commit(pScrn, &accel_state->vbo); } static void @@ -355,10 +355,10 @@ R600DoneSolid(PixmapPtr pPix) struct radeon_accel_state *accel_state = info->accel_state; if (accel_state->vsync) - cp_wait_vline_sync(pScrn, accel_state->ib, pPix, - accel_state->vline_crtc, - accel_state->vline_y1, - accel_state->vline_y2); + r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix, + accel_state->vline_crtc, + accel_state->vline_y1, + accel_state->vline_y2); r600_finish_op(pScrn, 8); } @@ -380,14 +380,14 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn) CLEAR (vs_conf); CLEAR (ps_conf); - radeon_vbo_check(pScrn, 16); + radeon_vbo_check(pScrn, &accel_state->vbo, 16); radeon_cp_start(pScrn); - set_default_state(pScrn, accel_state->ib); + r600_set_default_state(pScrn, accel_state->ib); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); - set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); - set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); /* Shader */ vs_conf.shader_addr = accel_state->vs_mc_addr; @@ -395,7 +395,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn) vs_conf.num_gprs = 2; vs_conf.stack_size = 0; vs_conf.bo = accel_state->shaders_bo; - vs_setup (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); + r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.shader_size = accel_state->ps_size; @@ -405,7 +405,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn) ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; ps_conf.bo = accel_state->shaders_bo; - ps_setup (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); + r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); /* Texture */ tex_res.id = 0; @@ -443,7 +443,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn) tex_res.base_level = 0; tex_res.last_level = 0; tex_res.perf_modulation = 0; - set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); + r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); tex_samp.id = 0; tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; @@ -453,7 +453,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn) tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; tex_samp.mip_filter = 0; /* no mipmap */ - set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); cb_conf.id = 0; cb_conf.w = accel_state->dst_obj.pitch; @@ -472,7 +472,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn) } cb_conf.source_format = 1; cb_conf.blend_clamp = 1; - set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); + r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); /* Render setup */ if (accel_state->planemask & 0x000000ff) @@ -485,7 +485,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn) pmask |= 8; /* A */ BEGIN_BATCH(20); EREG(accel_state->ib, CB_TARGET_MASK, (pmask << TARGET0_ENABLE_shift)); - EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[accel_state->rop]); + EREG(accel_state->ib, CB_COLOR_CONTROL, R600_ROP[accel_state->rop]); /* Interpolator setup */ /* export tex coord from VS */ @@ -521,10 +521,10 @@ R600DoCopyVline(PixmapPtr pPix) struct radeon_accel_state *accel_state = info->accel_state; if (accel_state->vsync) - cp_wait_vline_sync(pScrn, accel_state->ib, pPix, - accel_state->vline_crtc, - accel_state->vline_y1, - accel_state->vline_y2); + r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix, + accel_state->vline_crtc, + accel_state->vline_y1, + accel_state->vline_y2); r600_finish_op(pScrn, 16); } @@ -535,9 +535,11 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn, int dstX, int dstY, int w, int h) { + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; float *vb; - vb = radeon_vbo_space(pScrn, 16); + vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); vb[0] = (float)dstX; vb[1] = (float)dstY; @@ -554,7 +556,7 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn, vb[10] = (float)(srcX + w); vb[11] = (float)(srcY + h); - radeon_vbo_commit(pScrn); + radeon_vbo_commit(pScrn, &accel_state->vbo); } static Bool @@ -603,7 +605,7 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, src_obj.height = pSrc->drawable.height; src_obj.bpp = pSrc->drawable.bitsPerPixel; src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; - + dst_obj.width = pDst->drawable.width; dst_obj.height = pDst->drawable.height; dst_obj.bpp = pDst->drawable.bitsPerPixel; @@ -1060,7 +1062,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, tex_res.base_level = 0; tex_res.last_level = 0; tex_res.perf_modulation = 0; - set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain); + r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain); tex_samp.id = unit; tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK; @@ -1102,7 +1104,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, tex_samp.clamp_z = SQ_TEX_WRAP; tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; tex_samp.mip_filter = 0; /* no mipmap */ - set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); if (pPict->transform != 0) { accel_state->is_transform[unit] = TRUE; @@ -1132,8 +1134,8 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, } /* VS alu constants */ - set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2), - sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); + r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2), + sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); return TRUE; } @@ -1264,7 +1266,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, if (info->cs) { mask_obj.offset = 0; mask_obj.bo = radeon_get_pixmap_bo(pMask); - } else + } else #endif { mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset; @@ -1318,39 +1320,39 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, CLEAR (ps_conf); if (pMask) - radeon_vbo_check(pScrn, 24); + radeon_vbo_check(pScrn, &accel_state->vbo, 24); else - radeon_vbo_check(pScrn, 16); + radeon_vbo_check(pScrn, &accel_state->vbo, 16); radeon_cp_start(pScrn); - set_default_state(pScrn, accel_state->ib); + r600_set_default_state(pScrn, accel_state->ib); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); - set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); - set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); if (!R600TextureSetup(pSrcPicture, pSrc, 0)) { R600IBDiscard(pScrn, accel_state->ib); - radeon_vb_discard(pScrn); + radeon_vb_discard(pScrn, &accel_state->vbo); return FALSE; } if (pMask) { if (!R600TextureSetup(pMaskPicture, pMask, 1)) { R600IBDiscard(pScrn, accel_state->ib); - radeon_vb_discard(pScrn); + radeon_vb_discard(pScrn, &accel_state->vbo); return FALSE; } } else accel_state->is_transform[1] = FALSE; if (pMask) { - set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0)); - set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0)); + r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0)); + r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0)); } else { - set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0)); - set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0)); + r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0)); + r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0)); } /* Shader */ @@ -1359,7 +1361,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, vs_conf.num_gprs = 3; vs_conf.stack_size = 1; vs_conf.bo = accel_state->shaders_bo; - vs_setup (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); + r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.shader_size = accel_state->ps_size; @@ -1369,7 +1371,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; ps_conf.bo = accel_state->shaders_bo; - ps_setup (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); + r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); cb_conf.id = 0; cb_conf.w = accel_state->dst_obj.pitch; @@ -1405,7 +1407,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, } cb_conf.source_format = 1; cb_conf.blend_clamp = 1; - set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); + r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); BEGIN_BATCH(24); EREG(accel_state->ib, CB_TARGET_MASK, (0xf << TARGET0_ENABLE_shift)); @@ -1414,10 +1416,10 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, if (info->ChipFamily == CHIP_FAMILY_R600) { /* no per-MRT blend on R600 */ - EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift)); + EREG(accel_state->ib, CB_COLOR_CONTROL, R600_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift)); EREG(accel_state->ib, CB_BLEND_CONTROL, blendcntl); } else { - EREG(accel_state->ib, CB_COLOR_CONTROL, (RADEON_ROP[3] | + EREG(accel_state->ib, CB_COLOR_CONTROL, (R600_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift) | PER_MRT_BLEND_bit)); EREG(accel_state->ib, CB_BLEND0_CONTROL, blendcntl); @@ -1484,7 +1486,7 @@ static void R600Composite(PixmapPtr pDst, if (accel_state->msk_pic) { - vb = radeon_vbo_space(pScrn, 24); + vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24); vb[0] = (float)dstX; vb[1] = (float)dstY; @@ -1507,11 +1509,11 @@ static void R600Composite(PixmapPtr pDst, vb[16] = (float)(maskX + w); vb[17] = (float)(maskY + h); - radeon_vbo_commit(pScrn); + radeon_vbo_commit(pScrn, &accel_state->vbo); } else { - vb = radeon_vbo_space(pScrn, 16); + vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); vb[0] = (float)dstX; vb[1] = (float)dstY; @@ -1528,7 +1530,7 @@ static void R600Composite(PixmapPtr pDst, vb[10] = (float)(srcX + w); vb[11] = (float)(srcY + h); - radeon_vbo_commit(pScrn); + radeon_vbo_commit(pScrn, &accel_state->vbo); } @@ -1542,10 +1544,10 @@ static void R600DoneComposite(PixmapPtr pDst) int vtx_size; if (accel_state->vsync) - cp_wait_vline_sync(pScrn, accel_state->ib, pDst, - accel_state->vline_crtc, - accel_state->vline_y1, - accel_state->vline_y2); + r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst, + accel_state->vline_crtc, + accel_state->vline_y1, + accel_state->vline_y2); vtx_size = accel_state->msk_pic ? 24 : 16; @@ -1642,7 +1644,7 @@ R600CopyToVRAM(ScrnInfoPtr pScrn, } R600IBDiscard(pScrn, scratch); - radeon_vb_discard(pScrn); + radeon_vb_discard(pScrn, &accel_state->vbo); return TRUE; } @@ -1756,7 +1758,7 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, } R600IBDiscard(pScrn, scratch); - radeon_vb_discard(pScrn); + radeon_vb_discard(pScrn, &accel_state->vbo); return TRUE; @@ -1941,7 +1943,7 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, src_obj.bpp = bpp; src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; src_obj.bo = radeon_get_pixmap_bo(pSrc); - + dst_obj.pitch = dst_pitch_hw; dst_obj.width = w; dst_obj.height = h; @@ -2263,9 +2265,9 @@ R600DrawInit(ScreenPtr pScreen) info->accel_state->src_obj[1].bo = NULL; info->accel_state->dst_obj.bo = NULL; info->accel_state->copy_area_bo = NULL; - info->accel_state->vb_start_op = -1; + info->accel_state->vbo.vb_start_op = -1; info->accel_state->finish_op = r600_finish_op; - info->accel_state->verts_per_op = 3; + info->accel_state->vbo.verts_per_op = 3; RADEONVlineHelperClear(pScrn); #ifdef XF86DRM_MODE diff --git a/src/r600_state.h b/src/r600_state.h index 151f402a..1e8dea3e 100644 --- a/src/r600_state.h +++ b/src/r600_state.h @@ -274,48 +274,46 @@ do { \ void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib); void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib); -uint64_t -upload (ScrnInfoPtr pScrn, void *shader, int size, int offset); void -wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib); +r600_wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib); void -wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib); +r600_wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib); void -start_3d(ScrnInfoPtr pScrn, drmBufPtr ib); +r600_start_3d(ScrnInfoPtr pScrn, drmBufPtr ib); void -set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain); +r600_set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain); void -cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop); +r600_cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop); void -fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain); +r600_fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain); void -vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain); +r600_vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain); void -ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain); +r600_ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain); void -set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf); +r600_set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf); void -set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val); +r600_set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val); void -set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain); +r600_set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain); void -set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s); +r600_set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s); void -set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); +r600_set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); void -set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2); +r600_set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2); void -set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); +r600_set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); void -set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); +r600_set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); void -set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2); +r600_set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2); void -set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib); +r600_set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib); void -draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices); +r600_draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices); void -draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf); +r600_draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf); void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size); diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index e18a9c82..88073ac5 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -206,25 +206,25 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) dstyoff = 0; #endif - radeon_vbo_check(pScrn, 16); + radeon_vbo_check(pScrn, &accel_state->vbo, 16); radeon_cp_start(pScrn); - set_default_state(pScrn, accel_state->ib); + r600_set_default_state(pScrn, accel_state->ib); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); - set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); - set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); + r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); /* PS bool constant */ switch(pPriv->id) { case FOURCC_YV12: case FOURCC_I420: - set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0)); + r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0)); break; case FOURCC_UYVY: case FOURCC_YUY2: default: - set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0)); + r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0)); break; } @@ -234,7 +234,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) vs_conf.num_gprs = 2; vs_conf.stack_size = 0; vs_conf.bo = accel_state->shaders_bo; - vs_setup (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); + r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.shader_size = accel_state->ps_size; @@ -244,11 +244,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; ps_conf.bo = accel_state->shaders_bo; - ps_setup (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); + r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); /* PS alu constants */ - set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, - sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); + r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, + sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); /* Texture */ switch(pPriv->id) { @@ -280,7 +280,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_res.last_level = 0; tex_res.perf_modulation = 0; tex_res.interlaced = 0; - set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); + r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); /* Y sampler */ tex_samp.id = 0; @@ -294,7 +294,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; tex_samp.mip_filter = 0; /* no mipmap */ - set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); /* U or V texture */ tex_res.id = 1; @@ -311,11 +311,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_res.base = accel_state->src_obj[0].offset + pPriv->planev_offset; tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planev_offset; tex_res.size = accel_state->src_size[0] / 4; - set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); + r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); /* U or V sampler */ tex_samp.id = 1; - set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); /* U or V texture */ tex_res.id = 2; @@ -332,11 +332,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_res.base = accel_state->src_obj[0].offset + pPriv->planeu_offset; tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planeu_offset; tex_res.size = accel_state->src_size[0] / 4; - set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); + r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); /* UV sampler */ tex_samp.id = 2; - set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); break; case FOURCC_UYVY: case FOURCC_YUY2: @@ -370,7 +370,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_res.last_level = 0; tex_res.perf_modulation = 0; tex_res.interlaced = 0; - set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); + r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); /* Y sampler */ tex_samp.id = 0; @@ -384,7 +384,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; tex_samp.mip_filter = 0; /* no mipmap */ - set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); /* UV texture */ tex_res.id = 1; @@ -406,11 +406,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_res.base = accel_state->src_obj[0].offset; tex_res.mip_base = accel_state->src_obj[0].offset; tex_res.size = accel_state->src_size[0]; - set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); + r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); /* UV sampler */ tex_samp.id = 1; - set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); break; } @@ -440,7 +440,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) cb_conf.source_format = 1; cb_conf.blend_clamp = 1; - set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); + r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); /* Render setup */ BEGIN_BATCH(20); @@ -469,8 +469,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) vs_alu_consts[3] = 0.0; /* VS alu constants */ - set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs, - sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); + r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs, + sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); if (pPriv->vsync) { xf86CrtcPtr crtc; @@ -483,10 +483,10 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) pPriv->drw_y, pPriv->drw_y + pPriv->dst_h); if (crtc) - cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap, - crtc, - pPriv->drw_y - crtc->y, - (pPriv->drw_y - crtc->y) + pPriv->dst_h); + r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap, + crtc, + pPriv->drw_y - crtc->y, + (pPriv->drw_y - crtc->y) + pPriv->dst_h); } while (nBox--) { @@ -510,7 +510,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) srcw = (pPriv->src_w * dstw) / pPriv->dst_w; srch = (pPriv->src_h * dsth) / pPriv->dst_h; - vb = radeon_vbo_space(pScrn, 16); + vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); vb[0] = (float)dstX; vb[1] = (float)dstY; @@ -527,7 +527,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) vb[10] = (float)(srcX + srcw); vb[11] = (float)(srcY + srch); - radeon_vbo_commit(pScrn); + radeon_vbo_commit(pScrn, &accel_state->vbo); pBox++; } diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index ff7dfda3..e4365141 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -98,7 +98,7 @@ void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) } void -wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) +r600_wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -113,7 +113,7 @@ wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) } void -wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) +r600_wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -123,7 +123,7 @@ wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) } void -start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) +r600_start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -147,7 +147,7 @@ start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) // asic stack/thread/gpr limits - need to query the drm static void -sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) +r600_sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) { uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; @@ -198,7 +198,7 @@ sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) } void -set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain) +r600_set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain) { uint32_t cb_color_info; int pitch, slice, h; @@ -276,8 +276,9 @@ set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_ } static void -cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr, - struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) +r600_cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, + uint32_t size, uint64_t mc_addr, + struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) { RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t cp_coher_size; @@ -297,7 +298,8 @@ cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_ } /* inserts a wait for vline in the command stream */ -void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, +void +r600_cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -380,7 +382,7 @@ void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, } void -fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain) +r600_fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain) { RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_pgm_resources; @@ -403,7 +405,7 @@ fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t dom } void -vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain) +r600_vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain) { RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_pgm_resources; @@ -419,9 +421,9 @@ vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t dom sq_pgm_resources |= UNCACHED_FIRST_INST_bit; /* flush SQ cache */ - cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit, - vs_conf->shader_size, vs_conf->shader_addr, - vs_conf->bo, domain, 0); + r600_cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit, + vs_conf->shader_size, vs_conf->shader_addr, + vs_conf->bo, domain, 0); BEGIN_BATCH(3 + 2); EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8); @@ -435,7 +437,7 @@ vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t dom } void -ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain) +r600_ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain) { RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_pgm_resources; @@ -453,9 +455,9 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t dom sq_pgm_resources |= CLAMP_CONSTS_bit; /* flush SQ cache */ - cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit, - ps_conf->shader_size, ps_conf->shader_addr, - ps_conf->bo, domain, 0); + r600_cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit, + ps_conf->shader_size, ps_conf->shader_addr, + ps_conf->bo, domain, 0); BEGIN_BATCH(3 + 2); EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8); @@ -470,7 +472,7 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t dom } void -set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf) +r600_set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf) { RADEONInfoPtr info = RADEONPTR(pScrn); int i; @@ -484,7 +486,7 @@ set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *co } void -set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) +r600_set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) { RADEONInfoPtr info = RADEONPTR(pScrn); /* bool register order is: ps, vs, gs; one register each @@ -496,7 +498,7 @@ set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) } static void -set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t domain) +r600_set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t domain) { RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; @@ -522,15 +524,15 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t (info->ChipFamily == CHIP_FAMILY_RS780) || (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) - cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit, - accel_state->vb_offset, accel_state->vb_mc_addr, - res->bo, - domain, 0); + r600_cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit, + accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr, + res->bo, + domain, 0); else - cp_set_surface_sync(pScrn, ib, VC_ACTION_ENA_bit, - accel_state->vb_offset, accel_state->vb_mc_addr, - res->bo, - domain, 0); + r600_cp_set_surface_sync(pScrn, ib, VC_ACTION_ENA_bit, + accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr, + res->bo, + domain, 0); BEGIN_BATCH(9 + 2); PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); @@ -546,7 +548,7 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t } void -set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain) +r600_set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain) { RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; @@ -599,9 +601,9 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint3 sq_tex_resource_word6 |= INTERLACED_bit; /* flush texture cache */ - cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit, - tex_res->size, tex_res->base, - tex_res->bo, domain, 0); + r600_cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit, + tex_res->size, tex_res->base, + tex_res->bo, domain, 0); BEGIN_BATCH(9 + 4); PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); @@ -618,7 +620,7 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint3 } void -set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) +r600_set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) { RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; @@ -670,7 +672,7 @@ set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) //XXX deal with clip offsets in clip setup void -set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) +r600_set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -684,7 +686,7 @@ set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int } void -set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) +r600_set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -699,7 +701,7 @@ set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x } void -set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) +r600_set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -714,7 +716,7 @@ set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int } void -set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) +r600_set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -729,7 +731,7 @@ set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int } void -set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) +r600_set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -747,7 +749,7 @@ set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, i */ void -set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) +r600_set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) { tex_resource_t tex_res; shader_config_t fs_conf; @@ -764,7 +766,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) accel_state->XInited3D = TRUE; - start_3d(pScrn, accel_state->ib); + r600_start_3d(pScrn, accel_state->ib); // SQ sq_conf.ps_prio = 0; @@ -888,7 +890,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) break; } - sq_setup(pScrn, ib, &sq_conf); + r600_sq_setup(pScrn, ib, &sq_conf); /* set fake reloc for unused depth */ BEGIN_BATCH(3 + 2); @@ -992,10 +994,10 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) /* clip boolean is set to always visible -> doesn't matter */ for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) - set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192); + r600_set_clip_rect(pScrn, ib, i, 0, 0, 8192, 8192); for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) - set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192); + r600_set_vport_scissor(pScrn, ib, i, 0, 0, 8192, 8192); BEGIN_BATCH(42); PACK0(ib, PA_SC_MPASS_PS_CNTL, 2); @@ -1051,7 +1053,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) // clear FS fs_conf.bo = accel_state->shaders_bo; - fs_setup(pScrn, ib, &fs_conf, RADEON_GEM_DOMAIN_VRAM); + r600_fs_setup(pScrn, ib, &fs_conf, RADEON_GEM_DOMAIN_VRAM); // VGT BEGIN_BATCH(43); @@ -1102,7 +1104,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) */ void -draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices) +r600_draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices) { RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t i, count; @@ -1140,7 +1142,7 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i } void -draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) +r600_draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -1163,27 +1165,27 @@ void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size) draw_config_t draw_conf; vtx_resource_t vtx_res; - if (accel_state->vb_start_op == -1) + if (accel_state->vbo.vb_start_op == -1) return; CLEAR (draw_conf); CLEAR (vtx_res); - if (accel_state->vb_offset == accel_state->vb_start_op) { + if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) { R600IBDiscard(pScrn, accel_state->ib); - radeon_vb_discard(pScrn); + radeon_vb_discard(pScrn, &accel_state->vbo); return; } /* Vertex buffer setup */ - accel_state->vb_size = accel_state->vb_offset - accel_state->vb_start_op; + accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op; vtx_res.id = SQ_VTX_RESOURCE_vs; vtx_res.vtx_size_dw = vtx_size / 4; - vtx_res.vtx_num_entries = accel_state->vb_size / 4; + vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4; vtx_res.mem_req_size = 1; - vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; - vtx_res.bo = accel_state->vb_bo; - set_vtx_resource (pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT); + vtx_res.vb_addr = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op; + vtx_res.bo = accel_state->vbo.vb_bo; + r600_set_vtx_resource(pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT); /* Draw */ draw_conf.prim_type = DI_PT_RECTLIST; @@ -1192,17 +1194,17 @@ void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size) draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; draw_conf.index_type = DI_INDEX_SIZE_16_BIT; - draw_auto(pScrn, accel_state->ib, &draw_conf); + r600_draw_auto(pScrn, accel_state->ib, &draw_conf); /* XXX drm should handle this in fence submit */ - wait_3d_idle_clean(pScrn, accel_state->ib); + r600_wait_3d_idle_clean(pScrn, accel_state->ib); /* sync dst surface */ - cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_obj.offset, - accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); + r600_cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), + accel_state->dst_size, accel_state->dst_obj.offset, + accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain); - accel_state->vb_start_op = -1; + accel_state->vbo.vb_start_op = -1; accel_state->ib_reset_op = 0; #if KMS_MULTI_OP diff --git a/src/radeon.h b/src/radeon.h index 7a3f5b66..6d5a282c 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -387,6 +387,8 @@ typedef enum { #define IS_DCE4_VARIANT ((info->ChipFamily >= CHIP_FAMILY_CEDAR)) +#define IS_EVERGREEN_3D (info->ChipFamily >= CHIP_FAMILY_CEDAR) + #define IS_R600_3D (info->ChipFamily >= CHIP_FAMILY_R600) #define IS_R500_3D ((info->ChipFamily == CHIP_FAMILY_RV515) || \ @@ -674,6 +676,18 @@ struct r600_accel_object { struct radeon_bo *bo; }; +struct radeon_vbo_object { + int vb_offset; + uint64_t vb_mc_addr; + int vb_total; + void *vb_ptr; + uint32_t vb_size; + uint32_t vb_op_vert_size; + int32_t vb_start_op; + struct radeon_bo *vb_bo; + unsigned verts_per_op; +}; + struct radeon_accel_state { /* common accel data */ int fifo_slots; /* Free slots in the FIFO (64 max) */ @@ -721,20 +735,15 @@ struct radeon_accel_state { uint32_t *draw_header; unsigned vtx_count; unsigned num_vtx; - unsigned verts_per_op; Bool vsync; drmBufPtr ib; - int vb_offset; - uint64_t vb_mc_addr; - int vb_total; - void *vb_ptr; - uint32_t vb_size; - uint32_t vb_op_vert_size; - int32_t vb_start_op; + + struct radeon_vbo_object vbo; + struct radeon_vbo_object cbuf; + /* where to discard IB from if we cancel operation */ uint32_t ib_reset_op; - struct radeon_bo *vb_bo; #ifdef XF86DRM_MODE struct radeon_dma_bo bo_free; struct radeon_dma_bo bo_wait; @@ -753,6 +762,16 @@ struct radeon_accel_state { uint32_t comp_ps_offset; uint32_t xv_vs_offset; uint32_t xv_ps_offset; + // shader consts + uint32_t solid_vs_const_offset; + uint32_t solid_ps_const_offset; + uint32_t copy_vs_const_offset; + uint32_t copy_ps_const_offset; + uint32_t comp_vs_const_offset; + uint32_t comp_ps_const_offset; + uint32_t comp_mask_ps_const_offset; + uint32_t xv_vs_const_offset; + uint32_t xv_ps_const_offset; //size/addr stuff struct r600_accel_object src_obj[2]; @@ -1274,6 +1293,8 @@ extern void RADEONDoPrepareCopyMMIO(ScrnInfoPtr pScrn, Pixel planemask); extern Bool R600DrawInit(ScreenPtr pScreen); extern Bool R600LoadShaders(ScrnInfoPtr pScrn); +extern Bool EVERGREENDrawInit(ScreenPtr pScreen); +extern Bool EVERGREENLoadShaders(ScrnInfoPtr pScrn); #endif #if defined(XF86DRI) && defined(USE_EXA) diff --git a/src/radeon_accel.c b/src/radeon_accel.c index 281bc6d4..8fc515d1 100644 --- a/src/radeon_accel.c +++ b/src/radeon_accel.c @@ -1072,7 +1072,10 @@ Bool RADEONAccelInit(ScreenPtr pScreen) if (info->useEXA) { # ifdef XF86DRI if (info->directRenderingEnabled) { - if (info->ChipFamily >= CHIP_FAMILY_R600) { + if (info->ChipFamily >= CHIP_FAMILY_CEDAR) { + if (!EVERGREENDrawInit(pScreen)) + return FALSE; + } else if (info->ChipFamily >= CHIP_FAMILY_R600) { if (!R600DrawInit(pScreen)) return FALSE; } else { diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c index ed7fdd68..3a315a44 100644 --- a/src/radeon_dri2.c +++ b/src/radeon_dri2.c @@ -72,7 +72,7 @@ radeon_dri2_create_buffers(DrawablePtr drawable, struct dri2_buffer_priv *privates; PixmapPtr pixmap, depth_pixmap; struct radeon_exa_pixmap_priv *driver_priv; - int i, r; + int i, r, need_enlarge = 0; int flags = 0; buffers = calloc(count, sizeof *buffers); @@ -101,7 +101,6 @@ radeon_dri2_create_buffers(DrawablePtr drawable, /* tile the back buffer */ switch(attachments[i]) { case DRI2BufferDepth: - case DRI2BufferDepthStencil: if (info->ChipFamily >= CHIP_FAMILY_R600) /* macro is the preferred setting, but the 2D detiling for software * fallbacks in mesa still has issues on some configurations @@ -110,6 +109,17 @@ radeon_dri2_create_buffers(DrawablePtr drawable, else flags = RADEON_CREATE_PIXMAP_TILING_MACRO | RADEON_CREATE_PIXMAP_TILING_MICRO; break; + case DRI2BufferDepthStencil: + if (info->ChipFamily >= CHIP_FAMILY_R600) { + /* macro is the preferred setting, but the 2D detiling for software + * fallbacks in mesa still has issues on some configurations + */ + flags = RADEON_CREATE_PIXMAP_TILING_MICRO; + if (info->ChipFamily >= CHIP_FAMILY_CEDAR) + need_enlarge = 1; + } else + flags = RADEON_CREATE_PIXMAP_TILING_MACRO | RADEON_CREATE_PIXMAP_TILING_MICRO; + break; case DRI2BufferBackLeft: case DRI2BufferBackRight: case DRI2BufferFakeFrontLeft: @@ -125,11 +135,31 @@ radeon_dri2_create_buffers(DrawablePtr drawable, default: flags = 0; } - pixmap = (*pScreen->CreatePixmap)(pScreen, - drawable->width, - drawable->height, - drawable->depth, - flags); + + if (need_enlarge) { + /* evergreen uses separate allocations for depth and stencil + * so we make an extra large depth buffer to cover stencil + * as well. + */ + int pitch = drawable->width * (drawable->depth / 8); + int aligned_height = (drawable->height + 7) & ~7; + int size = pitch * aligned_height; + size = (size + 255) & ~255; + size += drawable->width * aligned_height; + aligned_height = ((size / pitch) + 7) & ~7; + + pixmap = (*pScreen->CreatePixmap)(pScreen, + drawable->width, + aligned_height, + drawable->depth, + flags); + + } else + pixmap = (*pScreen->CreatePixmap)(pScreen, + drawable->width, + drawable->height, + drawable->depth, + flags); } if (attachments[i] == DRI2BufferDepth) { @@ -166,7 +196,7 @@ radeon_dri2_create_buffer(DrawablePtr drawable, struct dri2_buffer_priv *privates; PixmapPtr pixmap, depth_pixmap; struct radeon_exa_pixmap_priv *driver_priv; - int r; + int r, need_enlarge = 0; int flags; buffers = calloc(1, sizeof *buffers); @@ -195,7 +225,6 @@ radeon_dri2_create_buffer(DrawablePtr drawable, /* tile the back buffer */ switch(attachment) { case DRI2BufferDepth: - case DRI2BufferDepthStencil: /* macro is the preferred setting, but the 2D detiling for software * fallbacks in mesa still has issues on some configurations */ @@ -204,6 +233,17 @@ radeon_dri2_create_buffer(DrawablePtr drawable, else flags = RADEON_CREATE_PIXMAP_TILING_MACRO | RADEON_CREATE_PIXMAP_TILING_MICRO; break; + case DRI2BufferDepthStencil: + /* macro is the preferred setting, but the 2D detiling for software + * fallbacks in mesa still has issues on some configurations + */ + if (info->ChipFamily >= CHIP_FAMILY_R600) { + flags = RADEON_CREATE_PIXMAP_TILING_MICRO; + if (info->ChipFamily >= CHIP_FAMILY_CEDAR) + need_enlarge = 1; + } else + flags = RADEON_CREATE_PIXMAP_TILING_MACRO | RADEON_CREATE_PIXMAP_TILING_MICRO; + break; case DRI2BufferBackLeft: case DRI2BufferBackRight: case DRI2BufferFakeFrontLeft: @@ -219,11 +259,32 @@ radeon_dri2_create_buffer(DrawablePtr drawable, default: flags = 0; } - pixmap = (*pScreen->CreatePixmap)(pScreen, - drawable->width, - drawable->height, - (format != 0)?format:drawable->depth, - flags); + + if (need_enlarge) { + /* evergreen uses separate allocations for depth and stencil + * so we make an extra large depth buffer to cover stencil + * as well. + */ + int depth = (format != 0) ? format : drawable->depth; + int pitch = drawable->width * (depth / 8); + int aligned_height = (drawable->height + 7) & ~7; + int size = pitch * aligned_height; + size = (size + 255) & ~255; + size += drawable->width * aligned_height; + aligned_height = ((size / pitch) + 7) & ~7; + + pixmap = (*pScreen->CreatePixmap)(pScreen, + drawable->width, + aligned_height, + (format != 0)?format:drawable->depth, + flags); + + } else + pixmap = (*pScreen->CreatePixmap)(pScreen, + drawable->width, + drawable->height, + (format != 0)?format:drawable->depth, + flags); } if (attachment == DRI2BufferDepth) { diff --git a/src/radeon_exa_shared.c b/src/radeon_exa_shared.c index d1926f4e..2ef07511 100644 --- a/src/radeon_exa_shared.c +++ b/src/radeon_exa_shared.c @@ -131,21 +131,19 @@ static Bool radeon_vb_get(ScrnInfoPtr pScrn) RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; - accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + + accel_state->vbo.vb_mc_addr = info->gartLocation + info->dri->bufStart + (accel_state->ib->idx*accel_state->ib->total)+ (accel_state->ib->total / 2); - accel_state->vb_total = (accel_state->ib->total / 2); - accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address + + accel_state->vbo.vb_total = (accel_state->ib->total / 2); + accel_state->vbo.vb_ptr = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); - accel_state->vb_offset = 0; + accel_state->vbo.vb_offset = 0; return TRUE; } -void radeon_vb_discard(ScrnInfoPtr pScrn) +void radeon_vb_discard(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo) { - RADEONInfoPtr info = RADEONPTR(pScrn); - - info->accel_state->vb_start_op = -1; + vbo->vb_start_op = -1; } int radeon_cp_start(ScrnInfoPtr pScrn) @@ -159,7 +157,6 @@ int radeon_cp_start(ScrnInfoPtr pScrn) radeon_cs_flush_indirect(pScrn); } accel_state->ib_reset_op = info->cs->cdw; - accel_state->vb_start_op = accel_state->vb_offset; } else #endif { @@ -167,33 +164,36 @@ int radeon_cp_start(ScrnInfoPtr pScrn) if (!radeon_vb_get(pScrn)) { return -1; } - accel_state->vb_start_op = accel_state->vb_offset; } + accel_state->vbo.vb_start_op = accel_state->vbo.vb_offset; + accel_state->cbuf.vb_start_op = accel_state->cbuf.vb_offset; return 0; } -void radeon_vb_no_space(ScrnInfoPtr pScrn, int vert_size) +void radeon_vb_no_space(ScrnInfoPtr pScrn, + struct radeon_vbo_object *vbo, + int vert_size) { RADEONInfoPtr info = RADEONPTR(pScrn); - struct radeon_accel_state *accel_state = info->accel_state; + struct radeon_accel_state *accel_state = info->accel_state; #if defined(XF86DRM_MODE) if (info->cs) { - if (accel_state->vb_bo) { - if (accel_state->vb_start_op != accel_state->vb_offset) { + if (vbo->vb_bo) { + if (vbo->vb_start_op != vbo->vb_offset) { accel_state->finish_op(pScrn, vert_size); accel_state->ib_reset_op = info->cs->cdw; } /* release the current VBO */ - radeon_vbo_put(pScrn); + radeon_vbo_put(pScrn, vbo); } /* get a new one */ - radeon_vbo_get(pScrn); + radeon_vbo_get(pScrn, vbo); return; } #endif - if (accel_state->vb_start_op != -1) { + if (vbo->vb_start_op != -1) { accel_state->finish_op(pScrn, vert_size); radeon_cp_start(pScrn); } @@ -213,8 +213,10 @@ void radeon_ib_discard(ScrnInfoPtr pScrn) goto out; } - info->accel_state->vb_offset = 0; - info->accel_state->vb_start_op = -1; + info->accel_state->vbo.vb_offset = 0; + info->accel_state->vbo.vb_start_op = -1; + info->accel_state->cbuf.vb_offset = 0; + info->accel_state->cbuf.vb_start_op = -1; if (CS_FULL(info->cs)) { radeon_cs_flush_indirect(pScrn); @@ -222,11 +224,19 @@ void radeon_ib_discard(ScrnInfoPtr pScrn) } radeon_cs_erase(info->cs); ret = radeon_cs_space_check_with_bo(info->cs, - info->accel_state->vb_bo, + info->accel_state->vbo.vb_bo, RADEON_GEM_DOMAIN_GTT, 0); if (ret) ErrorF("space check failed in flush\n"); + if (info->accel_state->cbuf.vb_bo) { + ret = radeon_cs_space_check_with_bo(info->cs, + info->accel_state->cbuf.vb_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + ErrorF("space check failed in flush\n"); + } + out: if (info->dri2.enabled) { info->accel_state->XInited3D = FALSE; diff --git a/src/radeon_exa_shared.h b/src/radeon_exa_shared.h index 71068b12..489e3b0e 100644 --- a/src/radeon_exa_shared.h +++ b/src/radeon_exa_shared.h @@ -72,9 +72,9 @@ static inline void radeon_add_pixmap(struct radeon_cs *cs, PixmapPtr pPix, int r extern void radeon_ib_discard(ScrnInfoPtr pScrn); #endif /* XF86DRM_MODE */ -extern void radeon_vb_discard(ScrnInfoPtr pScrn); +extern void radeon_vb_discard(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo); extern int radeon_cp_start(ScrnInfoPtr pScrn); -extern void radeon_vb_no_space(ScrnInfoPtr pScrn, int vert_size); +extern void radeon_vb_no_space(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo, int vert_size); extern void radeon_vbo_done_composite(PixmapPtr pDst); #endif diff --git a/src/radeon_kms.c b/src/radeon_kms.c index 0d6055dd..b94544e8 100644 --- a/src/radeon_kms.c +++ b/src/radeon_kms.c @@ -83,9 +83,15 @@ void radeon_cs_flush_indirect(ScrnInfoPtr pScrn) return; /* release the current VBO so we don't block on mapping it later */ - if (info->accel_state->vb_offset && info->accel_state->vb_bo) { - radeon_vbo_put(pScrn); - info->accel_state->vb_start_op = -1; + if (info->accel_state->vbo.vb_offset && info->accel_state->vbo.vb_bo) { + radeon_vbo_put(pScrn, &info->accel_state->vbo); + info->accel_state->vbo.vb_start_op = -1; + } + + /* release the current VBO so we don't block on mapping it later */ + if (info->accel_state->cbuf.vb_offset && info->accel_state->cbuf.vb_bo) { + radeon_vbo_put(pScrn, &info->accel_state->cbuf); + info->accel_state->cbuf.vb_start_op = -1; } radeon_cs_emit(info->cs); @@ -95,11 +101,19 @@ void radeon_cs_flush_indirect(ScrnInfoPtr pScrn) radeon_vbo_flush_bos(pScrn); ret = radeon_cs_space_check_with_bo(info->cs, - accel_state->vb_bo, + accel_state->vbo.vb_bo, RADEON_GEM_DOMAIN_GTT, 0); if (ret) ErrorF("space check failed in flush\n"); + if (accel_state->cbuf.vb_bo) { + ret = radeon_cs_space_check_with_bo(info->cs, + accel_state->cbuf.vb_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + ErrorF("space check failed in flush\n"); + } + if (info->reemit_current2d && info->state_2d.op) info->reemit_current2d(pScrn, info->state_2d.op); @@ -211,8 +225,18 @@ static Bool RADEONIsAccelWorking(ScrnInfoPtr pScrn) int r; uint32_t tmp; +#ifndef RADEON_INFO_ACCEL_WORKING +#define RADEON_INFO_ACCEL_WORKING 0x03 +#endif +#ifndef RADEON_INFO_ACCEL_WORKING2 +#define RADEON_INFO_ACCEL_WORKING2 0x05 +#endif + memset(&ginfo, 0, sizeof(ginfo)); - ginfo.request = 0x3; + if (info->dri->pKernelDRMVersion->version_minor >= 5) + ginfo.request = RADEON_INFO_ACCEL_WORKING2; + else + ginfo.request = RADEON_INFO_ACCEL_WORKING; ginfo.value = (uintptr_t)&tmp; r = drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &ginfo, sizeof(ginfo)); if (r) { @@ -239,7 +263,6 @@ static Bool RADEONPreInitAccel_KMS(ScrnInfoPtr pScrn) } if (xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE) || - (info->ChipFamily >= CHIP_FAMILY_CEDAR) || (!RADEONIsAccelWorking(pScrn))) { xf86DrvMsg(pScrn->scrnIndex, X_INFO, "GPU accel disabled or not working, using shadowfb for KMS\n"); diff --git a/src/radeon_reg.h b/src/radeon_reg.h index 377c26bf..e61c29d2 100644 --- a/src/radeon_reg.h +++ b/src/radeon_reg.h @@ -4242,6 +4242,12 @@ #define EVERGREEN_DATA_FORMAT 0x6b00 # define EVERGREEN_INTERLEAVE_EN (1 << 0) #define EVERGREEN_DESKTOP_HEIGHT 0x6b04 +#define EVERGREEN_VLINE_START_END 0x6b08 +# define EVERGREEN_VLINE_START_SHIFT 0 +# define EVERGREEN_VLINE_END_SHIFT 16 +# define EVERGREEN_VLINE_INV (1 << 31) +#define EVERGREEN_VLINE_STATUS 0x6bb8 +# define EVERGREEN_VLINE_STAT (1 << 12) #define EVERGREEN_VIEWPORT_START 0x6d70 #define EVERGREEN_VIEWPORT_SIZE 0x6d74 diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c index c19066b6..f6828111 100644 --- a/src/radeon_textured_video.c +++ b/src/radeon_textured_video.c @@ -36,7 +36,6 @@ #include "radeon.h" #include "radeon_reg.h" -#include "r600_reg.h" #include "radeon_macros.h" #include "radeon_probe.h" #include "radeon_video.h" @@ -47,6 +46,9 @@ extern void R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv); +extern void +EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv); + extern Bool R600CopyToVRAM(ScrnInfoPtr pScrn, char *src, int src_pitch, @@ -473,7 +475,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, #endif #ifdef XF86DRI if (info->directRenderingEnabled) { - if (IS_R600_3D) + if (IS_EVERGREEN_3D) + EVERGREENDisplayTexturedVideo(pScrn, pPriv); + else if (IS_R600_3D) R600DisplayTexturedVideo(pScrn, pPriv); else if (IS_R500_3D) R500DisplayTexturedVideoCP(pScrn, pPriv); diff --git a/src/radeon_vbo.c b/src/radeon_vbo.c index 0735540d..c0a668f6 100644 --- a/src/radeon_vbo.c +++ b/src/radeon_vbo.c @@ -41,31 +41,27 @@ static struct radeon_bo *radeon_vbo_get_bo(ScrnInfoPtr pScrn); -void radeon_vbo_put(ScrnInfoPtr pScrn) +void radeon_vbo_put(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo) { - RADEONInfoPtr info = RADEONPTR(pScrn); - struct radeon_accel_state *accel_state = info->accel_state; - - if (accel_state->vb_bo) { - radeon_bo_unmap(accel_state->vb_bo); - radeon_bo_unref(accel_state->vb_bo); - accel_state->vb_bo = NULL; - accel_state->vb_total = 0; + + if (vbo->vb_bo) { + radeon_bo_unmap(vbo->vb_bo); + radeon_bo_unref(vbo->vb_bo); + vbo->vb_bo = NULL; + vbo->vb_total = 0; } - accel_state->vb_offset = 0; + vbo->vb_offset = 0; } -void radeon_vbo_get(ScrnInfoPtr pScrn) +void radeon_vbo_get(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo) { - RADEONInfoPtr info = RADEONPTR(pScrn); - struct radeon_accel_state *accel_state = info->accel_state; - accel_state->vb_bo = radeon_vbo_get_bo(pScrn); + vbo->vb_bo = radeon_vbo_get_bo(pScrn); - accel_state->vb_total = VBO_SIZE; - accel_state->vb_offset = 0; - accel_state->vb_start_op = accel_state->vb_offset; + vbo->vb_total = VBO_SIZE; + vbo->vb_offset = 0; + vbo->vb_start_op = vbo->vb_offset; } /* these functions could migrate to libdrm and @@ -80,7 +76,7 @@ static int radeon_bo_is_idle(struct radeon_bo *bo) void radeon_vbo_init_lists(ScrnInfoPtr pScrn) { RADEONInfoPtr info = RADEONPTR(pScrn); - struct radeon_accel_state *accel_state = info->accel_state; + struct radeon_accel_state *accel_state = info->accel_state; accel_state->use_vbos = TRUE; make_empty_list(&accel_state->bo_free); @@ -91,7 +87,7 @@ void radeon_vbo_init_lists(ScrnInfoPtr pScrn) void radeon_vbo_free_lists(ScrnInfoPtr pScrn) { RADEONInfoPtr info = RADEONPTR(pScrn); - struct radeon_accel_state *accel_state = info->accel_state; + struct radeon_accel_state *accel_state = info->accel_state; struct radeon_dma_bo *dma_bo, *temp; foreach_s(dma_bo, temp, &accel_state->bo_free) { @@ -116,7 +112,7 @@ void radeon_vbo_free_lists(ScrnInfoPtr pScrn) void radeon_vbo_flush_bos(ScrnInfoPtr pScrn) { RADEONInfoPtr info = RADEONPTR(pScrn); - struct radeon_accel_state *accel_state = info->accel_state; + struct radeon_accel_state *accel_state = info->accel_state; struct radeon_dma_bo *dma_bo, *temp; const int expire_at = ++accel_state->bo_free.expire_counter + DMA_BO_FREE_TIME; const int time = accel_state->bo_free.expire_counter; @@ -164,7 +160,7 @@ void radeon_vbo_flush_bos(ScrnInfoPtr pScrn) static struct radeon_bo *radeon_vbo_get_bo(ScrnInfoPtr pScrn) { RADEONInfoPtr info = RADEONPTR(pScrn); - struct radeon_accel_state *accel_state = info->accel_state; + struct radeon_accel_state *accel_state = info->accel_state; struct radeon_dma_bo *dma_bo = NULL; struct radeon_bo *bo; diff --git a/src/radeon_vbo.h b/src/radeon_vbo.h index b505f66b..21533c2e 100644 --- a/src/radeon_vbo.h +++ b/src/radeon_vbo.h @@ -2,39 +2,40 @@ #ifndef RADEON_VBO_H #define RADEON_VBO_H -extern void radeon_vb_no_space(ScrnInfoPtr pScrn, int vert_size); +extern void radeon_vb_no_space(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo, int vert_size); extern void radeon_vbo_init_lists(ScrnInfoPtr pScrn); extern void radeon_vbo_free_lists(ScrnInfoPtr pScrn); extern void radeon_vbo_flush_bos(ScrnInfoPtr pScrn); -extern void radeon_vbo_get(ScrnInfoPtr pScrn); -extern void radeon_vbo_put(ScrnInfoPtr pScrn); +extern void radeon_vbo_get(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo); +extern void radeon_vbo_put(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo); -static inline void radeon_vbo_check(ScrnInfoPtr pScrn, int vert_size) +static inline void radeon_vbo_check(ScrnInfoPtr pScrn, + struct radeon_vbo_object *vbo, + int vert_size) { - RADEONInfoPtr info = RADEONPTR(pScrn); - struct radeon_accel_state *accel_state = info->accel_state; - if ((accel_state->vb_offset + (accel_state->verts_per_op * vert_size)) > accel_state->vb_total) { - radeon_vb_no_space(pScrn, vert_size); + if ((vbo->vb_offset + (vbo->verts_per_op * vert_size)) > vbo->vb_total) { + radeon_vb_no_space(pScrn, vbo, vert_size); } } static inline void * -radeon_vbo_space(ScrnInfoPtr pScrn, int vert_size) +radeon_vbo_space(ScrnInfoPtr pScrn, + struct radeon_vbo_object *vbo, + int vert_size) { RADEONInfoPtr info = RADEONPTR(pScrn); - struct radeon_accel_state *accel_state = info->accel_state; void *vb; - + /* we've ran out of space in the vertex buffer - need to get a new one */ - radeon_vbo_check(pScrn, vert_size); + radeon_vbo_check(pScrn, vbo, vert_size); - accel_state->vb_op_vert_size = vert_size; + vbo->vb_op_vert_size = vert_size; #if defined(XF86DRM_MODE) if (info->cs) { int ret; - struct radeon_bo *bo = accel_state->vb_bo; + struct radeon_bo *bo = vbo->vb_bo; if (!bo->ptr) { ret = radeon_bo_map(bo, 1); @@ -43,19 +44,18 @@ radeon_vbo_space(ScrnInfoPtr pScrn, int vert_size) return NULL; } } - vb = (pointer)((char *)bo->ptr + accel_state->vb_offset); + vb = (pointer)((char *)bo->ptr + vbo->vb_offset); } else #endif - vb = (pointer)((char *)accel_state->vb_ptr + accel_state->vb_offset); + vb = (pointer)((char *)vbo->vb_ptr + vbo->vb_offset); return vb; } -static inline void radeon_vbo_commit(ScrnInfoPtr pScrn) +static inline void radeon_vbo_commit(ScrnInfoPtr pScrn, + struct radeon_vbo_object *vbo) { - RADEONInfoPtr info = RADEONPTR(pScrn); - struct radeon_accel_state *accel_state = info->accel_state; - accel_state->vb_offset += accel_state->verts_per_op * accel_state->vb_op_vert_size; + vbo->vb_offset += vbo->verts_per_op * vbo->vb_op_vert_size; } #endif |