summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am8
-rw-r--r--src/evergreen_accel.c1112
-rw-r--r--src/evergreen_exa.c1907
-rw-r--r--src/evergreen_reg.h247
-rw-r--r--src/evergreen_reg_auto.h4039
-rw-r--r--src/evergreen_shader.c2790
-rw-r--r--src/evergreen_shader.h292
-rw-r--r--src/evergreen_state.h338
-rw-r--r--src/evergreen_textured_videofuncs.c556
-rw-r--r--src/r600_exa.c144
-rw-r--r--src/r600_state.h42
-rw-r--r--src/r600_textured_videofuncs.c60
-rw-r--r--src/r6xx_accel.c122
-rw-r--r--src/radeon.h39
-rw-r--r--src/radeon_accel.c5
-rw-r--r--src/radeon_dri2.c89
-rw-r--r--src/radeon_exa_shared.c50
-rw-r--r--src/radeon_exa_shared.h4
-rw-r--r--src/radeon_kms.c35
-rw-r--r--src/radeon_reg.h6
-rw-r--r--src/radeon_textured_video.c8
-rw-r--r--src/radeon_vbo.c38
-rw-r--r--src/radeon_vbo.h40
23 files changed, 11692 insertions, 279 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 033047e4..e05722cc 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -53,7 +53,8 @@ RADEON_KMS_SRCS=radeon_dri2.c radeon_kms.c drmmode_display.c radeon_vbo.c
endif
if USE_EXA
-RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c r600_shader.c radeon_exa_shared.c
+RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c r600_shader.c radeon_exa_shared.c \
+ evergreen_exa.c evergreen_accel.c evergreen_shader.c evergreen_textured_videofuncs.c
endif
AM_CFLAGS = \
@@ -129,6 +130,11 @@ EXTRA_DIST = \
r600_reg_r7xx.h \
r600_shader.h \
r600_state.h \
+ evergreen_reg.h \
+ evergreen_reg_auto.h \
+ evergreen_reg_r7xx.h \
+ evergreen_shader.h \
+ evergreen_state.h \
ati.h \
ativersion.h \
bicubic_table.h \
diff --git a/src/evergreen_accel.c b/src/evergreen_accel.c
new file mode 100644
index 00000000..38666eef
--- /dev/null
+++ b/src/evergreen_accel.c
@@ -0,0 +1,1112 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Alex Deucher <alexander.deucher@amd.com>
+ *
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include <errno.h>
+
+#include "radeon.h"
+#include "evergreen_shader.h"
+#include "radeon_reg.h"
+#include "evergreen_reg.h"
+#include "evergreen_state.h"
+
+#include "radeon_drm.h"
+#include "radeon_vbo.h"
+#include "radeon_exa_shared.h"
+
+void
+evergreen_start_3d(ScrnInfoPtr pScrn)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ BEGIN_BATCH(3);
+ PACK3(IT_CONTEXT_CONTROL, 2);
+ E32(0x80000000);
+ E32(0x80000000);
+ END_BATCH();
+
+}
+
+/*
+ * Setup of functional groups
+ */
+
+// asic stack/thread/gpr limits - need to query the drm
+static void
+evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
+{
+ uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
+ uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
+ uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ if (info->ChipFamily == CHIP_FAMILY_CEDAR)
+ sq_config = 0;
+ else
+ sq_config = VC_ENABLE_bit;
+
+ sq_config |= (EXPORT_SRC_C_bit |
+ (sq_conf->cs_prio << CS_PRIO_shift) |
+ (sq_conf->ls_prio << LS_PRIO_shift) |
+ (sq_conf->hs_prio << HS_PRIO_shift) |
+ (sq_conf->ps_prio << PS_PRIO_shift) |
+ (sq_conf->vs_prio << VS_PRIO_shift) |
+ (sq_conf->gs_prio << GS_PRIO_shift) |
+ (sq_conf->es_prio << ES_PRIO_shift));
+
+ sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
+ (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
+ (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
+ sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
+ (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
+ sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
+ (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
+
+ sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
+ (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
+ (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
+ (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
+ sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
+ (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
+
+ sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
+ (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
+
+ sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
+ (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
+
+ sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
+ (sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
+
+ BEGIN_BATCH(16);
+ /* disable dyn gprs */
+ EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
+ PACK0(SQ_CONFIG, 4);
+ E32(sq_config);
+ E32(sq_gpr_resource_mgmt_1);
+ E32(sq_gpr_resource_mgmt_2);
+ E32(sq_gpr_resource_mgmt_3);
+ PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
+ E32(sq_thread_resource_mgmt);
+ E32(sq_thread_resource_mgmt_2);
+ E32(sq_stack_resource_mgmt_1);
+ E32(sq_stack_resource_mgmt_2);
+ E32(sq_stack_resource_mgmt_3);
+ END_BATCH();
+}
+
+void
+evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
+{
+ uint32_t cb_color_info, cb_color_attrib, cb_color_dim;
+ int pitch, slice, h;
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ cb_color_info = ((cb_conf->endian << ENDIAN_shift) |
+ (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) |
+ (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) |
+ (cb_conf->number_type << NUMBER_TYPE_shift) |
+ (cb_conf->comp_swap << COMP_SWAP_shift) |
+ (cb_conf->source_format << SOURCE_FORMAT_shift) |
+ (cb_conf->resource_type << RESOURCE_TYPE_shift));
+ if (cb_conf->blend_clamp)
+ cb_color_info |= BLEND_CLAMP_bit;
+ if (cb_conf->fast_clear)
+ cb_color_info |= FAST_CLEAR_bit;
+ if (cb_conf->compression)
+ cb_color_info |= COMPRESSION_bit;
+ if (cb_conf->blend_bypass)
+ cb_color_info |= BLEND_BYPASS_bit;
+ if (cb_conf->simple_float)
+ cb_color_info |= SIMPLE_FLOAT_bit;
+ if (cb_conf->round_mode)
+ cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
+ if (cb_conf->tile_compact)
+ cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
+ if (cb_conf->rat)
+ cb_color_info |= RAT_bit;
+
+ /* bit 4 needs to be set for linear and depth/stencil surfaces */
+ cb_color_attrib = CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
+
+ pitch = (cb_conf->w / 8) - 1;
+ h = RADEON_ALIGN(cb_conf->h, 8);
+ slice = ((cb_conf->w * h) / 64) - 1;
+
+ switch (cb_conf->resource_type) {
+ case BUFFER:
+ /* number of elements in the surface */
+ cb_color_dim = pitch * slice;
+ break;
+ default:
+ /* w/h of the surface */
+ cb_color_dim = (((cb_conf->w - 1) << WIDTH_MAX_shift) |
+ ((cb_conf->h - 1) << HEIGHT_MAX_shift));
+ break;
+ }
+
+ BEGIN_BATCH(3 + 2);
+ EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
+ RELOC_BATCH(cb_conf->bo, 0, domain);
+ END_BATCH();
+
+ /* Set CMASK & FMASK buffer to the offset of color buffer as
+ * we don't use those this shouldn't cause any issue and we
+ * then have a valid cmd stream
+ */
+ BEGIN_BATCH(3 + 2);
+ EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0 >> 8));
+ RELOC_BATCH(cb_conf->bo, 0, domain);
+ END_BATCH();
+ BEGIN_BATCH(3 + 2);
+ EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0 >> 8));
+ RELOC_BATCH(cb_conf->bo, 0, domain);
+ END_BATCH();
+
+ /* tiling config */
+ BEGIN_BATCH(3 + 2);
+ EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
+ RELOC_BATCH(cb_conf->bo, 0, domain);
+ END_BATCH();
+ BEGIN_BATCH(3 + 2);
+ EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
+ RELOC_BATCH(cb_conf->bo, 0, domain);
+ END_BATCH();
+
+ BEGIN_BATCH(24);
+ EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
+ EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
+ EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
+ EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
+ EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
+ EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
+ PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ END_BATCH();
+}
+
+static void
+evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
+ uint32_t size, uint64_t mc_addr,
+ struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ uint32_t cp_coher_size;
+ if (size == 0xffffffff)
+ cp_coher_size = 0xffffffff;
+ else
+ cp_coher_size = ((size + 255) >> 8);
+
+ BEGIN_BATCH(5 + 2);
+ PACK3(IT_SURFACE_SYNC, 4);
+ E32(sync_type);
+ E32(cp_coher_size);
+ E32((mc_addr >> 8));
+ E32(10); /* poll interval */
+ RELOC_BATCH(bo, rdomains, wdomain);
+ END_BATCH();
+}
+
+/* inserts a wait for vline in the command stream */
+void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
+ xf86CrtcPtr crtc, int start, int stop)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ drmmode_crtc_private_ptr drmmode_crtc;
+ uint32_t offset;
+
+ if (!crtc)
+ return;
+
+ drmmode_crtc = crtc->driver_private;
+
+ if (stop < start)
+ return;
+
+ if (!crtc->enabled)
+ return;
+
+ if (info->cs) {
+ if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
+ return;
+ } else {
+#ifdef USE_EXA
+ if (info->useEXA)
+ offset = exaGetPixmapOffset(pPix);
+ else
+#endif
+ offset = pPix->devPrivate.ptr - info->FB;
+
+ /* if drawing to front buffer */
+ if (offset != 0)
+ return;
+ }
+
+ start = max(start, 0);
+ stop = min(stop, crtc->mode.VDisplay);
+
+ if (start > crtc->mode.VDisplay)
+ return;
+
+ BEGIN_BATCH(11);
+ /* set the VLINE range */
+ EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
+ (start << EVERGREEN_VLINE_START_SHIFT) |
+ (stop << EVERGREEN_VLINE_END_SHIFT));
+
+ /* tell the CP to poll the VLINE state register */
+ PACK3(IT_WAIT_REG_MEM, 6);
+ E32(IT_WAIT_REG | IT_WAIT_EQ);
+ E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
+ E32(0);
+ E32(0); // Ref value
+ E32(EVERGREEN_VLINE_STAT); // Mask
+ E32(10); // Wait interval
+ /* add crtc reloc */
+ PACK3(IT_NOP, 1);
+ E32(drmmode_crtc->mode_crtc->crtc_id);
+ END_BATCH();
+}
+
+void
+evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ uint32_t sq_pgm_resources;
+
+ sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
+ (fs_conf->stack_size << STACK_SIZE_shift));
+
+ if (fs_conf->dx10_clamp)
+ sq_pgm_resources |= DX10_CLAMP_bit;
+
+ BEGIN_BATCH(3 + 2);
+ EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
+ RELOC_BATCH(fs_conf->bo, domain, 0);
+ END_BATCH();
+
+ BEGIN_BATCH(3);
+ EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
+ END_BATCH();
+}
+
+void
+evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ uint32_t sq_pgm_resources, sq_pgm_resources_2;
+
+ sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
+ (vs_conf->stack_size << STACK_SIZE_shift));
+
+ if (vs_conf->dx10_clamp)
+ sq_pgm_resources |= DX10_CLAMP_bit;
+ if (vs_conf->uncached_first_inst)
+ sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
+
+ sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
+ (vs_conf->double_round << DOUBLE_ROUND_shift));
+
+ if (vs_conf->allow_sdi)
+ sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
+ if (vs_conf->allow_sd0)
+ sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
+ if (vs_conf->allow_ddi)
+ sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
+ if (vs_conf->allow_ddo)
+ sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
+
+ /* flush SQ cache */
+ evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
+ vs_conf->shader_size, vs_conf->shader_addr,
+ vs_conf->bo, domain, 0);
+
+ BEGIN_BATCH(3 + 2);
+ EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
+ RELOC_BATCH(vs_conf->bo, domain, 0);
+ END_BATCH();
+
+ BEGIN_BATCH(4);
+ PACK0(SQ_PGM_RESOURCES_VS, 2);
+ E32(sq_pgm_resources);
+ E32(sq_pgm_resources_2);
+ END_BATCH();
+}
+
+void
+evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ uint32_t sq_pgm_resources, sq_pgm_resources_2;
+
+ sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
+ (ps_conf->stack_size << STACK_SIZE_shift));
+
+ if (ps_conf->dx10_clamp)
+ sq_pgm_resources |= DX10_CLAMP_bit;
+ if (ps_conf->uncached_first_inst)
+ sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
+ if (ps_conf->clamp_consts)
+ sq_pgm_resources |= CLAMP_CONSTS_bit;
+
+ sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
+ (ps_conf->double_round << DOUBLE_ROUND_shift));
+
+ if (ps_conf->allow_sdi)
+ sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
+ if (ps_conf->allow_sd0)
+ sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
+ if (ps_conf->allow_ddi)
+ sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
+ if (ps_conf->allow_ddo)
+ sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
+
+ /* flush SQ cache */
+ evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
+ ps_conf->shader_size, ps_conf->shader_addr,
+ ps_conf->bo, domain, 0);
+
+ BEGIN_BATCH(3 + 2);
+ EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
+ RELOC_BATCH(ps_conf->bo, domain, 0);
+ END_BATCH();
+
+ BEGIN_BATCH(5);
+ PACK0(SQ_PGM_RESOURCES_PS, 3);
+ E32(sq_pgm_resources);
+ E32(sq_pgm_resources_2);
+ E32(ps_conf->export_mode);
+ END_BATCH();
+}
+
+void
+evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ /* size reg is units of 16 consts (4 dwords each) */
+ uint32_t size = const_conf->size_bytes >> 8;
+
+ if (size == 0)
+ size = 1;
+
+ /* flush SQ cache */
+ evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
+ const_conf->size_bytes, const_conf->const_addr,
+ const_conf->bo, domain, 0);
+
+ switch (const_conf->type) {
+ case SHADER_TYPE_VS:
+ BEGIN_BATCH(3);
+ EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
+ END_BATCH();
+ BEGIN_BATCH(3 + 2);
+ EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
+ RELOC_BATCH(const_conf->bo, domain, 0);
+ END_BATCH();
+ break;
+ case SHADER_TYPE_PS:
+ BEGIN_BATCH(3);
+ EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
+ END_BATCH();
+ BEGIN_BATCH(3 + 2);
+ EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
+ RELOC_BATCH(const_conf->bo, domain, 0);
+ END_BATCH();
+ break;
+ default:
+ ErrorF("Unsupported const type %d\n", const_conf->type);
+ break;
+ }
+
+}
+
+void
+evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
+ * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
+ */
+ BEGIN_BATCH(3);
+ EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
+ END_BATCH();
+}
+
+static void
+evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
+
+ sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
+ ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
+ (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
+ (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
+ (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
+ if (res->clamp_x)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
+
+ if (res->format_comp_all)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
+
+ if (res->srf_mode_all)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
+
+ sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
+ (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
+ (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
+ (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
+
+ if (res->uncached)
+ sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
+
+ /* XXX ??? */
+ sq_vtx_constant_word4 = 0;
+
+ /* flush vertex cache */
+ if (info->ChipFamily == CHIP_FAMILY_CEDAR)
+ evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
+ accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
+ res->bo,
+ domain, 0);
+ else
+ evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
+ accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
+ res->bo,
+ domain, 0);
+
+ BEGIN_BATCH(10 + 2);
+ PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
+ E32(res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS
+ E32((res->vtx_num_entries << 2) - 1); // 1: SIZE
+ E32(sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
+ E32(sq_vtx_constant_word3); // 3: swizzles
+ E32(sq_vtx_constant_word4); // 4: num elements
+ E32(0); // 5: n/a
+ E32(0); // 6: n/a
+ E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift); // 7: TYPE
+ RELOC_BATCH(res->bo, domain, 0);
+ END_BATCH();
+}
+
+void
+evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+ uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
+
+ sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
+
+ if (tex_res->w)
+ sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
+ ((tex_res->w - 1) << TEX_WIDTH_shift));
+
+ if (tex_res->tile_type)
+ sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
+
+ sq_tex_resource_word1 = (tex_res->array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
+
+ if (tex_res->h)
+ sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
+ if (tex_res->depth)
+ sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
+
+ sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
+ (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
+ (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
+ (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
+ (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
+ (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
+ (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
+ (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
+ (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
+ (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
+ (tex_res->base_level << BASE_LEVEL_shift));
+
+ if (tex_res->srf_mode_all)
+ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
+ if (tex_res->force_degamma)
+ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
+
+ sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
+ (tex_res->base_array << BASE_ARRAY_shift) |
+ (tex_res->last_array << LAST_ARRAY_shift));
+
+ sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
+ (tex_res->perf_modulation << PERF_MODULATION_shift));
+
+ if (tex_res->interlaced)
+ sq_tex_resource_word6 |= INTERLACED_bit;
+
+ sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
+ (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
+
+ /* flush texture cache */
+ evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
+ tex_res->size, tex_res->base,
+ tex_res->bo, domain, 0);
+
+ BEGIN_BATCH(10 + 4);
+ PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
+ E32(sq_tex_resource_word0);
+ E32(sq_tex_resource_word1);
+ E32(((tex_res->base) >> 8));
+ E32(((tex_res->mip_base) >> 8));
+ E32(sq_tex_resource_word4);
+ E32(sq_tex_resource_word5);
+ E32(sq_tex_resource_word6);
+ E32(sq_tex_resource_word7);
+ RELOC_BATCH(tex_res->bo, domain, 0);
+ RELOC_BATCH(tex_res->mip_bo, domain, 0);
+ END_BATCH();
+}
+
+void
+evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
+
+ sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) |
+ (s->clamp_y << CLAMP_Y_shift) |
+ (s->clamp_z << CLAMP_Z_shift) |
+ (s->xy_mag_filter << XY_MAG_FILTER_shift) |
+ (s->xy_min_filter << XY_MIN_FILTER_shift) |
+ (s->z_filter << Z_FILTER_shift) |
+ (s->mip_filter << MIP_FILTER_shift) |
+ (s->border_color << BORDER_COLOR_TYPE_shift) |
+ (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) |
+ (s->chroma_key << CHROMA_KEY_shift));
+
+ sq_tex_sampler_word1 = ((s->min_lod << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift) |
+ (s->max_lod << MAX_LOD_shift) |
+ (s->perf_mip << PERF_MIP_shift) |
+ (s->perf_z << PERF_Z_shift));
+
+
+ sq_tex_sampler_word2 = ((s->lod_bias << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
+ (s->lod_bias2 << LOD_BIAS_SEC_shift));
+
+ if (s->mc_coord_truncate)
+ sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
+ if (s->force_degamma)
+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
+ if (s->truncate_coord)
+ sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
+ if (s->disable_cube_wrap)
+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
+ if (s->type)
+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
+
+ BEGIN_BATCH(5);
+ PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
+ E32(sq_tex_sampler_word0);
+ E32(sq_tex_sampler_word1);
+ E32(sq_tex_sampler_word2);
+ END_BATCH();
+}
+
+//XXX deal with clip offsets in clip setup
+void
+evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ BEGIN_BATCH(4);
+ PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
+ E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
+ E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
+ END_BATCH();
+}
+
+void
+evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ BEGIN_BATCH(4);
+ PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
+ E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
+ (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
+ (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
+ END_BATCH();
+}
+
+void
+evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ BEGIN_BATCH(4);
+ PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
+ E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
+ END_BATCH();
+}
+
+void
+evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ BEGIN_BATCH(4);
+ PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
+ E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
+ END_BATCH();
+}
+
+void
+evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ BEGIN_BATCH(4);
+ PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
+ E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
+ (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
+ E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
+ (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
+ END_BATCH();
+}
+
+/*
+ * Setup of default state
+ */
+
+void
+evergreen_set_default_state(ScrnInfoPtr pScrn)
+{
+ tex_resource_t tex_res;
+ shader_config_t fs_conf;
+ sq_config_t sq_conf;
+ int i;
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ if (accel_state->XInited3D)
+ return;
+
+ memset(&tex_res, 0, sizeof(tex_resource_t));
+ memset(&fs_conf, 0, sizeof(shader_config_t));
+
+ accel_state->XInited3D = TRUE;
+
+ evergreen_start_3d(pScrn);
+
+ /* SQ */
+ sq_conf.ps_prio = 0;
+ sq_conf.vs_prio = 1;
+ sq_conf.gs_prio = 2;
+ sq_conf.es_prio = 3;
+ sq_conf.hs_prio = 0;
+ sq_conf.ls_prio = 0;
+ sq_conf.cs_prio = 0;
+
+ switch (info->ChipFamily) {
+ case CHIP_FAMILY_CEDAR:
+ default:
+ sq_conf.num_ps_gprs = 93;
+ sq_conf.num_vs_gprs = 46;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 31;
+ sq_conf.num_es_gprs = 31;
+ sq_conf.num_hs_gprs = 23;
+ sq_conf.num_ls_gprs = 23;
+ sq_conf.num_ps_threads = 96;
+ sq_conf.num_vs_threads = 16;
+ sq_conf.num_gs_threads = 16;
+ sq_conf.num_es_threads = 16;
+ sq_conf.num_hs_threads = 16;
+ sq_conf.num_ls_threads = 16;
+ sq_conf.num_ps_stack_entries = 42;
+ sq_conf.num_vs_stack_entries = 42;
+ sq_conf.num_gs_stack_entries = 42;
+ sq_conf.num_es_stack_entries = 42;
+ sq_conf.num_hs_stack_entries = 42;
+ sq_conf.num_ls_stack_entries = 42;
+ break;
+ case CHIP_FAMILY_REDWOOD:
+ sq_conf.num_ps_gprs = 93;
+ sq_conf.num_vs_gprs = 46;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 31;
+ sq_conf.num_es_gprs = 31;
+ sq_conf.num_hs_gprs = 23;
+ sq_conf.num_ls_gprs = 23;
+ sq_conf.num_ps_threads = 128;
+ sq_conf.num_vs_threads = 20;
+ sq_conf.num_gs_threads = 20;
+ sq_conf.num_es_threads = 20;
+ sq_conf.num_hs_threads = 20;
+ sq_conf.num_ls_threads = 20;
+ sq_conf.num_ps_stack_entries = 42;
+ sq_conf.num_vs_stack_entries = 42;
+ sq_conf.num_gs_stack_entries = 42;
+ sq_conf.num_es_stack_entries = 42;
+ sq_conf.num_hs_stack_entries = 42;
+ sq_conf.num_ls_stack_entries = 42;
+ break;
+ case CHIP_FAMILY_JUNIPER:
+ sq_conf.num_ps_gprs = 93;
+ sq_conf.num_vs_gprs = 46;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 31;
+ sq_conf.num_es_gprs = 31;
+ sq_conf.num_hs_gprs = 23;
+ sq_conf.num_ls_gprs = 23;
+ sq_conf.num_ps_threads = 128;
+ sq_conf.num_vs_threads = 20;
+ sq_conf.num_gs_threads = 20;
+ sq_conf.num_es_threads = 20;
+ sq_conf.num_hs_threads = 20;
+ sq_conf.num_ls_threads = 20;
+ sq_conf.num_ps_stack_entries = 85;
+ sq_conf.num_vs_stack_entries = 85;
+ sq_conf.num_gs_stack_entries = 85;
+ sq_conf.num_es_stack_entries = 85;
+ sq_conf.num_hs_stack_entries = 85;
+ sq_conf.num_ls_stack_entries = 85;
+ break;
+ case CHIP_FAMILY_CYPRESS:
+ case CHIP_FAMILY_HEMLOCK:
+ sq_conf.num_ps_gprs = 93;
+ sq_conf.num_vs_gprs = 46;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 31;
+ sq_conf.num_es_gprs = 31;
+ sq_conf.num_hs_gprs = 23;
+ sq_conf.num_ls_gprs = 23;
+ sq_conf.num_ps_threads = 128;
+ sq_conf.num_vs_threads = 20;
+ sq_conf.num_gs_threads = 20;
+ sq_conf.num_es_threads = 20;
+ sq_conf.num_hs_threads = 20;
+ sq_conf.num_ls_threads = 20;
+ sq_conf.num_ps_stack_entries = 85;
+ sq_conf.num_vs_stack_entries = 85;
+ sq_conf.num_gs_stack_entries = 85;
+ sq_conf.num_es_stack_entries = 85;
+ sq_conf.num_hs_stack_entries = 85;
+ sq_conf.num_ls_stack_entries = 85;
+ break;
+ }
+
+ evergreen_sq_setup(pScrn, &sq_conf);
+
+ BEGIN_BATCH(24);
+ EREG(SQ_LDS_ALLOC_PS, 0);
+ EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
+
+ PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+
+ PACK0(SQ_GS_VERT_ITEMSIZE, 4);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+
+ PACK0(SQ_VTX_BASE_VTX_LOC, 2);
+ E32(0);
+ E32(0);
+ END_BATCH();
+
+ /* DB */
+ BEGIN_BATCH(3 + 2);
+ EREG(DB_Z_INFO, 0);
+ RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ BEGIN_BATCH(3 + 2);
+ EREG(DB_STENCIL_INFO, 0);
+ RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ BEGIN_BATCH(3 + 2);
+ EREG(DB_HTILE_DATA_BASE, 0);
+ RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ BEGIN_BATCH(49);
+ EREG(DB_DEPTH_CONTROL, 0);
+
+ PACK0(PA_SC_VPORT_ZMIN_0, 2);
+ EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
+ EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
+
+ PACK0(DB_RENDER_CONTROL, 5);
+ E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
+ E32(0); // DB_COUNT_CONTROL
+ E32(0); // DB_DEPTH_VIEW
+ E32(0x2a); // DB_RENDER_OVERRIDE
+ E32(0); // DB_RENDER_OVERRIDE2
+
+ PACK0(DB_STENCIL_CLEAR, 2);
+ E32(0); // DB_STENCIL_CLEAR
+ E32(0); // DB_DEPTH_CLEAR
+
+ EREG(DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET1_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET2_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET3_shift)));
+
+ EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+ // SX
+ EREG(SX_MISC, 0);
+
+ // CB
+ PACK0(SX_ALPHA_TEST_CONTROL, 5);
+ E32(0); // SX_ALPHA_TEST_CONTROL
+ E32(0x00000000); //CB_BLEND_RED
+ E32(0x00000000); //CB_BLEND_GREEN
+ E32(0x00000000); //CB_BLEND_BLUE
+ E32(0x00000000); //CB_BLEND_ALPHA
+
+ EREG(CB_SHADER_MASK, OUTPUT0_ENABLE_mask);
+
+ // SC
+ EREG(PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) |
+ (0 << WINDOW_Y_OFFSET_shift)));
+ EREG(PA_SC_CLIPRECT_RULE, CLIP_RULE_mask);
+ EREG(PA_SC_EDGERULE, 0xAAAAAAAA);
+ EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
+ END_BATCH();
+
+ /* clip boolean is set to always visible -> doesn't matter */
+ for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
+ evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
+
+ for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
+ evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
+
+ BEGIN_BATCH(50);
+ PACK0(PA_SC_MODE_CNTL_0, 2);
+ E32(0); // PA_SC_MODE_CNTL_0
+ E32(0); // PA_SC_MODE_CNTL_1
+
+ PACK0(PA_SC_LINE_CNTL, 16);
+ E32(0); // PA_SC_LINE_CNTL
+ E32(0); // PA_SC_AA_CONFIG
+ E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
+ PIX_CENTER_bit)); // PA_SU_VTX_CNTL
+ EFLOAT(1.0); // PA_CL_GB_VERT_CLIP_ADJ
+ EFLOAT(1.0); // PA_CL_GB_VERT_DISC_ADJ
+ EFLOAT(1.0); // PA_CL_GB_HORZ_CLIP_ADJ
+ EFLOAT(1.0); // PA_CL_GB_HORZ_DISC_ADJ
+ E32(0); // PA_SC_AA_SAMPLE_LOCS_0
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0); // PA_SC_AA_SAMPLE_LOCS_7
+ E32(0xFFFFFFFF); // PA_SC_AA_MASK
+
+ // CL
+ PACK0(PA_CL_CLIP_CNTL, 8);
+ E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
+ E32(FACE_bit); // PA_SU_SC_MODE_CNTL
+ E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
+ E32(0); // PA_CL_VS_OUT_CNTL
+ E32(0); // PA_CL_NANINF_CNTL
+ E32(0); // PA_SU_LINE_STIPPLE_CNTL
+ E32(0); // PA_SU_LINE_STIPPLE_SCALE
+ E32(0); // PA_SU_PRIM_FILTER_CNTL
+
+ // SU
+ PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+
+ PACK0(SPI_INPUT_Z, 8);
+ E32(0); // SPI_INPUT_Z
+ E32(0); // SPI_FOG_CNTL
+ E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
+ E32(0); // SPI_PS_IN_CONTROL_2
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ END_BATCH();
+
+ // clear FS
+ fs_conf.bo = accel_state->shaders_bo;
+ evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+ // VGT
+ BEGIN_BATCH(46);
+
+ PACK0(VGT_MAX_VTX_INDX, 4);
+ E32(0xffffff);
+ E32(0);
+ E32(0);
+ E32(0);
+
+ PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
+ E32(0);
+ E32(0);
+
+ PACK0(VGT_REUSE_OFF, 2);
+ E32(0);
+ E32(0);
+
+ PACK0(PA_SU_POINT_SIZE, 17);
+ E32(0); // PA_SU_POINT_SIZE
+ E32(0); // PA_SU_POINT_MINMAX
+ E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
+ E32(0); // PA_SC_LINE_STIPPLE
+ E32(0); // VGT_OUTPUT_PATH_CNTL
+ E32(0); // VGT_HOS_CNTL
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0);
+ E32(0); // VGT_GS_MODE
+
+ EREG(VGT_PRIMITIVEID_EN, 0);
+ EREG(VGT_MULTI_PRIM_IB_RESET_EN, 0);
+ EREG(VGT_SHADER_STAGES_EN, 0);
+
+ PACK0(VGT_STRMOUT_CONFIG, 2);
+ E32(0);
+ E32(0);
+ END_BATCH();
+}
+
+
+/*
+ * Commands
+ */
+
+void
+evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ BEGIN_BATCH(10);
+ EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
+ PACK3(IT_INDEX_TYPE, 1);
+ E32(draw_conf->index_type);
+ PACK3(IT_NUM_INSTANCES, 1);
+ E32(draw_conf->num_instances);
+ PACK3(IT_DRAW_INDEX_AUTO, 2);
+ E32(draw_conf->num_indices);
+ E32(draw_conf->vgt_draw_initiator);
+ END_BATCH();
+}
+
+void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ draw_config_t draw_conf;
+ vtx_resource_t vtx_res;
+
+ if (accel_state->vbo.vb_start_op == -1)
+ return;
+
+ CLEAR (draw_conf);
+ CLEAR (vtx_res);
+
+ if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
+ radeon_ib_discard(pScrn);
+ radeon_cs_flush_indirect(pScrn);
+ radeon_vb_discard(pScrn, &accel_state->vbo);
+ radeon_vb_discard(pScrn, &accel_state->cbuf);
+ return;
+ }
+
+ /* Vertex buffer setup */
+ accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
+ vtx_res.id = SQ_FETCH_RESOURCE_vs;
+ vtx_res.vtx_size_dw = vtx_size / 4;
+ vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
+ vtx_res.vb_addr = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op;
+ vtx_res.bo = accel_state->vbo.vb_bo;
+ vtx_res.dst_sel_x = SQ_SEL_X;
+ vtx_res.dst_sel_y = SQ_SEL_Y;
+ vtx_res.dst_sel_z = SQ_SEL_Z;
+ vtx_res.dst_sel_w = SQ_SEL_W;
+ evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
+
+ /* Draw */
+ draw_conf.prim_type = DI_PT_RECTLIST;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
+
+ evergreen_draw_auto(pScrn, &draw_conf);
+
+ /* sync dst surface */
+ evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
+ accel_state->dst_size, accel_state->dst_obj.offset,
+ accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
+
+ accel_state->vbo.vb_start_op = -1;
+ accel_state->cbuf.vb_start_op = -1;
+ accel_state->ib_reset_op = 0;
+
+}
+
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
new file mode 100644
index 00000000..7b2a65ee
--- /dev/null
+++ b/src/evergreen_exa.c
@@ -0,0 +1,1907 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Alex Deucher <alexander.deucher@amd.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include "exa.h"
+
+#include "radeon.h"
+#include "radeon_macros.h"
+#include "radeon_reg.h"
+#include "evergreen_shader.h"
+#include "evergreen_reg.h"
+#include "evergreen_state.h"
+#include "radeon_exa_shared.h"
+#include "radeon_vbo.h"
+
+uint32_t EVERGREEN_ROP[16] = {
+ RADEON_ROP3_ZERO, /* GXclear */
+ RADEON_ROP3_DSa, /* Gxand */
+ RADEON_ROP3_SDna, /* GXandReverse */
+ RADEON_ROP3_S, /* GXcopy */
+ RADEON_ROP3_DSna, /* GXandInverted */
+ RADEON_ROP3_D, /* GXnoop */
+ RADEON_ROP3_DSx, /* GXxor */
+ RADEON_ROP3_DSo, /* GXor */
+ RADEON_ROP3_DSon, /* GXnor */
+ RADEON_ROP3_DSxn, /* GXequiv */
+ RADEON_ROP3_Dn, /* GXinvert */
+ RADEON_ROP3_SDno, /* GXorReverse */
+ RADEON_ROP3_Sn, /* GXcopyInverted */
+ RADEON_ROP3_DSno, /* GXorInverted */
+ RADEON_ROP3_DSan, /* GXnand */
+ RADEON_ROP3_ONE, /* GXset */
+};
+
+Bool
+EVERGREENSetAccelState(ScrnInfoPtr pScrn,
+ struct r600_accel_object *src0,
+ struct r600_accel_object *src1,
+ struct r600_accel_object *dst,
+ uint32_t vs_offset, uint32_t ps_offset,
+ int rop, Pixel planemask)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ int ret;
+
+ if (src0) {
+ memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
+ accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
+ } else {
+ memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
+ accel_state->src_size[0] = 0;
+ }
+
+ if (src1) {
+ memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
+ accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
+ } else {
+ memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
+ accel_state->src_size[1] = 0;
+ }
+
+ if (dst) {
+ memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
+ accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
+ } else {
+ memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
+ accel_state->dst_size = 0;
+ }
+
+ accel_state->rop = rop;
+ accel_state->planemask = planemask;
+
+ /* bad pitch */
+ if (accel_state->src_obj[0].pitch & 7)
+ RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
+
+ /* bad offset */
+ if (accel_state->src_obj[0].offset & 0xff)
+ RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
+
+ /* bad pitch */
+ if (accel_state->src_obj[1].pitch & 7)
+ RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
+
+ /* bad offset */
+ if (accel_state->src_obj[1].offset & 0xff)
+ RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
+
+ if (accel_state->dst_obj.pitch & 7)
+ RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
+
+ if (accel_state->dst_obj.offset & 0xff)
+ RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
+
+ accel_state->vs_size = 512;
+ accel_state->ps_size = 512;
+
+ accel_state->vs_mc_addr = vs_offset;
+ accel_state->ps_mc_addr = ps_offset;
+
+ radeon_cs_space_reset_bos(info->cs);
+ radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
+ RADEON_GEM_DOMAIN_VRAM, 0);
+ if (accel_state->src_obj[0].bo)
+ radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
+ accel_state->src_obj[0].domain, 0);
+ if (accel_state->src_obj[1].bo)
+ radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
+ accel_state->src_obj[1].domain, 0);
+ if (accel_state->dst_obj.bo)
+ radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
+ 0, accel_state->dst_obj.domain);
+ ret = radeon_cs_space_check(info->cs);
+ if (ret)
+ RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
+
+ return TRUE;
+}
+
+static void
+EVERGREENDoneSolid(PixmapPtr pPix);
+
+static Bool
+EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ cb_config_t cb_conf;
+ shader_config_t vs_conf, ps_conf;
+ int pmask = 0;
+ uint32_t a, r, g, b;
+ float *ps_alu_consts;
+ const_config_t ps_const_conf;
+ struct r600_accel_object dst;
+
+ //return FALSE;
+
+ if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
+ RADEON_FALLBACK(("EVERGREENCheckDatatype failed\n"));
+ if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
+ RADEON_FALLBACK(("invalid planemask\n"));
+
+ dst.offset = 0;
+ dst.bo = radeon_get_pixmap_bo(pPix);
+
+ dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
+ dst.width = pPix->drawable.width;
+ dst.height = pPix->drawable.height;
+ dst.bpp = pPix->drawable.bitsPerPixel;
+ dst.domain = RADEON_GEM_DOMAIN_VRAM;
+
+ if (!EVERGREENSetAccelState(pScrn,
+ NULL,
+ NULL,
+ &dst,
+ accel_state->solid_vs_offset, accel_state->solid_ps_offset,
+ alu, pm))
+ return FALSE;
+
+ CLEAR (cb_conf);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+ CLEAR (ps_const_conf);
+
+ radeon_vbo_check(pScrn, &accel_state->vbo, 16);
+ radeon_vbo_check(pScrn, &accel_state->cbuf, 256);
+ radeon_cp_start(pScrn);
+
+ evergreen_set_default_state(pScrn);
+
+ evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+
+ /* Shader */
+ vs_conf.shader_addr = accel_state->vs_mc_addr;
+ vs_conf.shader_size = accel_state->vs_size;
+ vs_conf.num_gprs = 2;
+ vs_conf.stack_size = 0;
+ vs_conf.bo = accel_state->shaders_bo;
+ evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+ ps_conf.shader_addr = accel_state->ps_mc_addr;
+ ps_conf.shader_size = accel_state->ps_size;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.clamp_consts = 0;
+ ps_conf.export_mode = 2;
+ ps_conf.bo = accel_state->shaders_bo;
+ evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+
+ cb_conf.id = 0;
+ cb_conf.w = accel_state->dst_obj.pitch;
+ cb_conf.h = accel_state->dst_obj.height;
+ cb_conf.base = accel_state->dst_obj.offset;
+ cb_conf.bo = accel_state->dst_obj.bo;
+
+ if (accel_state->dst_obj.bpp == 8) {
+ cb_conf.format = COLOR_8;
+ cb_conf.comp_swap = 3; /* A */
+ } else if (accel_state->dst_obj.bpp == 16) {
+ cb_conf.format = COLOR_5_6_5;
+ cb_conf.comp_swap = 2; /* RGB */
+ } else {
+ cb_conf.format = COLOR_8_8_8_8;
+ cb_conf.comp_swap = 1; /* ARGB */
+ }
+ cb_conf.source_format = EXPORT_4C_16BPC;
+ cb_conf.blend_clamp = 1;
+ evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
+
+ /* Render setup */
+ if (accel_state->planemask & 0x000000ff)
+ pmask |= 4; /* B */
+ if (accel_state->planemask & 0x0000ff00)
+ pmask |= 2; /* G */
+ if (accel_state->planemask & 0x00ff0000)
+ pmask |= 1; /* R */
+ if (accel_state->planemask & 0xff000000)
+ pmask |= 8; /* A */
+
+ BEGIN_BATCH(23);
+ EREG(CB_TARGET_MASK, (pmask << TARGET0_ENABLE_shift));
+ EREG(CB_COLOR_CONTROL, (EVERGREEN_ROP[accel_state->rop] |
+ (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
+ EREG(CB_BLEND0_CONTROL, 0);
+
+ /* Interpolator setup */
+ /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */
+ EREG(SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift));
+ EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
+ /* color semantic id 0 -> GPR[0] */
+ EREG(SPI_PS_INPUT_CNTL_0 + (0 << 2), ((0 << SEMANTIC_shift) |
+ (0x03 << DEFAULT_VAL_shift) |
+ FLAT_SHADE_bit));
+
+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+ /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */
+ PACK0(SPI_PS_IN_CONTROL_0, 3);
+ E32(((0 << NUM_INTERP_shift) |
+ LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
+ E32(0); // SPI_PS_IN_CONTROL_1
+ E32(FLAT_SHADE_ENA_bit); // SPI_INTERP_CONTROL_0
+ END_BATCH();
+
+
+ /* PS alu constants */
+ ps_const_conf.size_bytes = 256;
+ ps_const_conf.type = SHADER_TYPE_PS;
+ ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
+ if (accel_state->dst_obj.bpp == 16) {
+ r = (fg >> 11) & 0x1f;
+ g = (fg >> 5) & 0x3f;
+ b = (fg >> 0) & 0x1f;
+ ps_alu_consts[0] = (float)r / 31; /* R */
+ ps_alu_consts[1] = (float)g / 63; /* G */
+ ps_alu_consts[2] = (float)b / 31; /* B */
+ ps_alu_consts[3] = 1.0; /* A */
+ } else if (accel_state->dst_obj.bpp == 8) {
+ a = (fg >> 0) & 0xff;
+ ps_alu_consts[0] = 0.0; /* R */
+ ps_alu_consts[1] = 0.0; /* G */
+ ps_alu_consts[2] = 0.0; /* B */
+ ps_alu_consts[3] = (float)a / 255; /* A */
+ } else {
+ a = (fg >> 24) & 0xff;
+ r = (fg >> 16) & 0xff;
+ g = (fg >> 8) & 0xff;
+ b = (fg >> 0) & 0xff;
+ ps_alu_consts[0] = (float)r / 255; /* R */
+ ps_alu_consts[1] = (float)g / 255; /* G */
+ ps_alu_consts[2] = (float)b / 255; /* B */
+ ps_alu_consts[3] = (float)a / 255; /* A */
+ }
+ radeon_vbo_commit(pScrn, &accel_state->cbuf);
+
+ ps_const_conf.bo = accel_state->cbuf.vb_bo;
+ ps_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op;
+ evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
+
+ if (accel_state->vsync)
+ RADEONVlineHelperClear(pScrn);
+
+ return TRUE;
+}
+
+
+static void
+EVERGREENSolid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ float *vb;
+
+ if (accel_state->vsync)
+ RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
+
+ vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
+
+ vb[0] = (float)x1;
+ vb[1] = (float)y1;
+
+ vb[2] = (float)x1;
+ vb[3] = (float)y2;
+
+ vb[4] = (float)x2;
+ vb[5] = (float)y2;
+
+ radeon_vbo_commit(pScrn, &accel_state->vbo);
+}
+
+static void
+EVERGREENDoneSolid(PixmapPtr pPix)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ if (accel_state->vsync)
+ evergreen_cp_wait_vline_sync(pScrn, pPix,
+ accel_state->vline_crtc,
+ accel_state->vline_y1,
+ accel_state->vline_y2);
+
+ evergreen_finish_op(pScrn, 8);
+}
+
+static void
+EVERGREENDoPrepareCopy(ScrnInfoPtr pScrn)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ int pmask = 0;
+ cb_config_t cb_conf;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ shader_config_t vs_conf, ps_conf;
+
+ CLEAR (cb_conf);
+ CLEAR (tex_res);
+ CLEAR (tex_samp);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+
+ radeon_vbo_check(pScrn, &accel_state->vbo, 16);
+ radeon_cp_start(pScrn);
+
+ evergreen_set_default_state(pScrn);
+
+ evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+
+ /* Shader */
+ vs_conf.shader_addr = accel_state->vs_mc_addr;
+ vs_conf.shader_size = accel_state->vs_size;
+ vs_conf.num_gprs = 2;
+ vs_conf.stack_size = 0;
+ vs_conf.bo = accel_state->shaders_bo;
+ evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+ ps_conf.shader_addr = accel_state->ps_mc_addr;
+ ps_conf.shader_size = accel_state->ps_size;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.clamp_consts = 0;
+ ps_conf.export_mode = 2;
+ ps_conf.bo = accel_state->shaders_bo;
+ evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+
+ /* Texture */
+ tex_res.id = 0;
+ tex_res.w = accel_state->src_obj[0].width;
+ tex_res.h = accel_state->src_obj[0].height;
+ tex_res.pitch = accel_state->src_obj[0].pitch;
+ tex_res.depth = 0;
+ tex_res.dim = SQ_TEX_DIM_2D;
+ tex_res.base = accel_state->src_obj[0].offset;
+ tex_res.mip_base = accel_state->src_obj[0].offset;
+ tex_res.size = accel_state->src_size[0];
+ tex_res.bo = accel_state->src_obj[0].bo;
+ tex_res.mip_bo = accel_state->src_obj[0].bo;
+ if (accel_state->src_obj[0].bpp == 8) {
+ tex_res.format = FMT_8;
+ tex_res.dst_sel_x = SQ_SEL_1; /* R */
+ tex_res.dst_sel_y = SQ_SEL_1; /* G */
+ tex_res.dst_sel_z = SQ_SEL_1; /* B */
+ tex_res.dst_sel_w = SQ_SEL_X; /* A */
+ } else if (accel_state->src_obj[0].bpp == 16) {
+ tex_res.format = FMT_5_6_5;
+ tex_res.dst_sel_x = SQ_SEL_Z; /* R */
+ tex_res.dst_sel_y = SQ_SEL_Y; /* G */
+ tex_res.dst_sel_z = SQ_SEL_X; /* B */
+ tex_res.dst_sel_w = SQ_SEL_1; /* A */
+ } else {
+ tex_res.format = FMT_8_8_8_8;
+ tex_res.dst_sel_x = SQ_SEL_Z; /* R */
+ tex_res.dst_sel_y = SQ_SEL_Y; /* G */
+ tex_res.dst_sel_z = SQ_SEL_X; /* B */
+ tex_res.dst_sel_w = SQ_SEL_W; /* A */
+ }
+
+ tex_res.base_level = 0;
+ tex_res.last_level = 0;
+ tex_res.perf_modulation = 0;
+ evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
+
+ tex_samp.id = 0;
+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_z = SQ_TEX_WRAP;
+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT;
+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT;
+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
+ tex_samp.mip_filter = 0; /* no mipmap */
+ evergreen_set_tex_sampler (pScrn, &tex_samp);
+
+ cb_conf.id = 0;
+ cb_conf.w = accel_state->dst_obj.pitch;
+ cb_conf.h = accel_state->dst_obj.height;
+ cb_conf.base = accel_state->dst_obj.offset;
+ cb_conf.bo = accel_state->dst_obj.bo;
+ if (accel_state->dst_obj.bpp == 8) {
+ cb_conf.format = COLOR_8;
+ cb_conf.comp_swap = 3; /* A */
+ } else if (accel_state->dst_obj.bpp == 16) {
+ cb_conf.format = COLOR_5_6_5;
+ cb_conf.comp_swap = 2; /* RGB */
+ } else {
+ cb_conf.format = COLOR_8_8_8_8;
+ cb_conf.comp_swap = 1; /* ARGB */
+ }
+ cb_conf.source_format = EXPORT_4C_16BPC;
+ cb_conf.blend_clamp = 1;
+ evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
+
+ /* Render setup */
+ if (accel_state->planemask & 0x000000ff)
+ pmask |= 4; /* B */
+ if (accel_state->planemask & 0x0000ff00)
+ pmask |= 2; /* G */
+ if (accel_state->planemask & 0x00ff0000)
+ pmask |= 1; /* R */
+ if (accel_state->planemask & 0xff000000)
+ pmask |= 8; /* A */
+
+ BEGIN_BATCH(23);
+ EREG(CB_TARGET_MASK, (pmask << TARGET0_ENABLE_shift));
+ EREG(CB_COLOR_CONTROL, (EVERGREEN_ROP[accel_state->rop] |
+ (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
+ EREG(CB_BLEND0_CONTROL, 0);
+
+ /* Interpolator setup */
+ /* export tex coord from VS */
+ EREG(SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
+ EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
+ /* color semantic id 0 -> GPR[0] */
+ EREG(SPI_PS_INPUT_CNTL_0 + (0 << 2), ((0 << SEMANTIC_shift) |
+ (0x01 << DEFAULT_VAL_shift)));
+
+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+ /* input tex coord from VS */
+ PACK0(SPI_PS_IN_CONTROL_0, 3);
+ E32(((1 << NUM_INTERP_shift) |
+ LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
+ E32(0); //SPI_PS_IN_CONTROL_1
+ E32(0); // SPI_INTERP_CONTROL_0
+ END_BATCH();
+
+}
+
+static void
+EVERGREENDoCopy(ScrnInfoPtr pScrn)
+{
+ evergreen_finish_op(pScrn, 16);
+}
+
+static void
+EVERGREENDoCopyVline(PixmapPtr pPix)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ if (accel_state->vsync)
+ evergreen_cp_wait_vline_sync(pScrn, pPix,
+ accel_state->vline_crtc,
+ accel_state->vline_y1,
+ accel_state->vline_y2);
+
+ evergreen_finish_op(pScrn, 16);
+}
+
+static void
+EVERGREENAppendCopyVertex(ScrnInfoPtr pScrn,
+ int srcX, int srcY,
+ int dstX, int dstY,
+ int w, int h)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ float *vb;
+
+ vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
+
+ vb[0] = (float)dstX;
+ vb[1] = (float)dstY;
+ vb[2] = (float)srcX;
+ vb[3] = (float)srcY;
+
+ vb[4] = (float)dstX;
+ vb[5] = (float)(dstY + h);
+ vb[6] = (float)srcX;
+ vb[7] = (float)(srcY + h);
+
+ vb[8] = (float)(dstX + w);
+ vb[9] = (float)(dstY + h);
+ vb[10] = (float)(srcX + w);
+ vb[11] = (float)(srcY + h);
+
+ radeon_vbo_commit(pScrn, &accel_state->vbo);
+}
+
+static Bool
+EVERGREENPrepareCopy(PixmapPtr pSrc, PixmapPtr pDst,
+ int xdir, int ydir,
+ int rop,
+ Pixel planemask)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ struct r600_accel_object src_obj, dst_obj;
+
+ //return FALSE;
+
+ if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
+ RADEON_FALLBACK(("EVERGREENCheckDatatype src failed\n"));
+ if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
+ RADEON_FALLBACK(("EVERGREENCheckDatatype dst failed\n"));
+ if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
+ RADEON_FALLBACK(("Invalid planemask\n"));
+
+ dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
+ src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
+
+ accel_state->same_surface = FALSE;
+
+ src_obj.offset = 0;
+ dst_obj.offset = 0;
+ src_obj.bo = radeon_get_pixmap_bo(pSrc);
+ dst_obj.bo = radeon_get_pixmap_bo(pDst);
+ if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
+ accel_state->same_surface = TRUE;
+
+ src_obj.width = pSrc->drawable.width;
+ src_obj.height = pSrc->drawable.height;
+ src_obj.bpp = pSrc->drawable.bitsPerPixel;
+ src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+
+ dst_obj.width = pDst->drawable.width;
+ dst_obj.height = pDst->drawable.height;
+ dst_obj.bpp = pDst->drawable.bitsPerPixel;
+ dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
+
+ if (!EVERGREENSetAccelState(pScrn,
+ &src_obj,
+ NULL,
+ &dst_obj,
+ accel_state->copy_vs_offset, accel_state->copy_ps_offset,
+ rop, planemask))
+ return FALSE;
+
+ if (accel_state->same_surface == TRUE) {
+ unsigned long size = pDst->drawable.height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
+
+ if (accel_state->copy_area_bo) {
+ radeon_bo_unref(accel_state->copy_area_bo);
+ accel_state->copy_area_bo = NULL;
+ }
+ accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
+ RADEON_GEM_DOMAIN_VRAM,
+ 0);
+ if (accel_state->copy_area_bo == NULL)
+ RADEON_FALLBACK(("temp copy surface alloc failed\n"));
+
+ radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
+ RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_VRAM);
+ if (radeon_cs_space_check(info->cs)) {
+ radeon_bo_unref(accel_state->copy_area_bo);
+ accel_state->copy_area_bo = NULL;
+ return FALSE;
+ }
+ accel_state->copy_area = (void*)accel_state->copy_area_bo;
+ } else
+ EVERGREENDoPrepareCopy(pScrn);
+
+ if (accel_state->vsync)
+ RADEONVlineHelperClear(pScrn);
+
+ return TRUE;
+}
+
+static void
+EVERGREENCopy(PixmapPtr pDst,
+ int srcX, int srcY,
+ int dstX, int dstY,
+ int w, int h)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
+ return;
+
+ if (accel_state->vsync)
+ RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
+
+ if (accel_state->same_surface && accel_state->copy_area) {
+ uint32_t orig_dst_domain = accel_state->dst_obj.domain;
+ uint32_t orig_src_domain = accel_state->src_obj[0].domain;
+ struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
+
+ /* src to tmp */
+ accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
+ accel_state->dst_obj.bo = accel_state->copy_area_bo;
+ accel_state->dst_obj.offset = 0;
+ EVERGREENDoPrepareCopy(pScrn);
+ EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
+ EVERGREENDoCopy(pScrn);
+
+ /* tmp to dst */
+ accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
+ accel_state->src_obj[0].bo = accel_state->copy_area_bo;
+ accel_state->src_obj[0].offset = 0;
+ accel_state->dst_obj.domain = orig_dst_domain;
+ accel_state->dst_obj.bo = orig_bo;
+ accel_state->dst_obj.offset = 0;
+ EVERGREENDoPrepareCopy(pScrn);
+ EVERGREENAppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
+ EVERGREENDoCopyVline(pDst);
+
+ /* restore state */
+ accel_state->src_obj[0].domain = orig_src_domain;
+ accel_state->src_obj[0].bo = orig_bo;
+ accel_state->src_obj[0].offset = 0;
+ } else
+ EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
+
+}
+
+static void
+EVERGREENDoneCopy(PixmapPtr pDst)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ if (!accel_state->same_surface)
+ EVERGREENDoCopyVline(pDst);
+
+ if (accel_state->copy_area)
+ accel_state->copy_area = NULL;
+
+}
+
+
+#define xFixedToFloat(f) (((float) (f)) / 65536)
+
+struct blendinfo {
+ Bool dst_alpha;
+ Bool src_alpha;
+ uint32_t blend_cntl;
+};
+
+static struct blendinfo EVERGREENBlendOp[] = {
+ /* Clear */
+ {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
+ /* Src */
+ {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
+ /* Dst */
+ {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
+ /* Over */
+ {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+ /* OverReverse */
+ {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
+ /* In */
+ {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
+ /* InReverse */
+ {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+ /* Out */
+ {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
+ /* OutReverse */
+ {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+ /* Atop */
+ {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+ /* AtopReverse */
+ {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+ /* Xor */
+ {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+ /* Add */
+ {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
+};
+
+struct formatinfo {
+ unsigned int fmt;
+ uint32_t card_fmt;
+};
+
+static struct formatinfo EVERGREENTexFormats[] = {
+ {PICT_a8r8g8b8, FMT_8_8_8_8},
+ {PICT_x8r8g8b8, FMT_8_8_8_8},
+ {PICT_a8b8g8r8, FMT_8_8_8_8},
+ {PICT_x8b8g8r8, FMT_8_8_8_8},
+#ifdef PICT_TYPE_BGRA
+ {PICT_b8g8r8a8, FMT_8_8_8_8},
+ {PICT_b8g8r8x8, FMT_8_8_8_8},
+#endif
+ {PICT_r5g6b5, FMT_5_6_5},
+ {PICT_a1r5g5b5, FMT_1_5_5_5},
+ {PICT_x1r5g5b5, FMT_1_5_5_5},
+ {PICT_a8, FMT_8},
+};
+
+static uint32_t EVERGREENGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
+{
+ uint32_t sblend, dblend;
+
+ sblend = EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
+ dblend = EVERGREENBlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
+
+ /* If there's no dst alpha channel, adjust the blend op so that we'll treat
+ * it as always 1.
+ */
+ if (PICT_FORMAT_A(dst_format) == 0 && EVERGREENBlendOp[op].dst_alpha) {
+ if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
+ sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
+ else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
+ sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
+ }
+
+ /* If the source alpha is being used, then we should only be in a case where
+ * the source blend factor is 0, and the source blend value is the mask
+ * channels multiplied by the source picture's alpha.
+ */
+ if (pMask && pMask->componentAlpha && EVERGREENBlendOp[op].src_alpha) {
+ if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
+ dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
+ } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
+ dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
+ }
+ }
+
+ return sblend | dblend;
+}
+
+static Bool EVERGREENGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
+{
+ switch (pDstPicture->format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+#ifdef PICT_TYPE_BGRA
+ case PICT_b8g8r8a8:
+ case PICT_b8g8r8x8:
+#endif
+ *dst_format = COLOR_8_8_8_8;
+ break;
+ case PICT_r5g6b5:
+ *dst_format = COLOR_5_6_5;
+ break;
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ *dst_format = COLOR_1_5_5_5;
+ break;
+ case PICT_a8:
+ *dst_format = COLOR_8;
+ break;
+ default:
+ RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
+ (int)pDstPicture->format));
+ }
+ return TRUE;
+}
+
+static Bool EVERGREENCheckCompositeTexture(PicturePtr pPict,
+ PicturePtr pDstPict,
+ int op,
+ int unit)
+{
+ int w = pPict->pDrawable->width;
+ int h = pPict->pDrawable->height;
+ unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
+ unsigned int i;
+ int max_tex_w, max_tex_h;
+
+ max_tex_w = 16384;
+ max_tex_h = 16384;
+
+ if ((w > max_tex_w) || (h > max_tex_h))
+ RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
+
+ for (i = 0; i < sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]); i++) {
+ if (EVERGREENTexFormats[i].fmt == pPict->format)
+ break;
+ }
+ if (i == sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]))
+ RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
+ (int)pPict->format));
+
+ if (pPict->filter != PictFilterNearest &&
+ pPict->filter != PictFilterBilinear)
+ RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
+
+ /* for REPEAT_NONE, Render semantics are that sampling outside the source
+ * picture results in alpha=0 pixels. We can implement this with a border color
+ * *if* our source texture has an alpha channel, otherwise we need to fall
+ * back. If we're not transformed then we hope that upper layers have clipped
+ * rendering to the bounds of the source drawable, in which case it doesn't
+ * matter. I have not, however, verified that the X server always does such
+ * clipping.
+ */
+ /* FIXME evergreen */
+ if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
+ if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
+ RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
+ }
+
+ return TRUE;
+}
+
+static void EVERGREENXFormSetup(PicturePtr pPict, PixmapPtr pPix,
+ int unit, float *vs_alu_consts)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ int w = pPict->pDrawable->width;
+ int h = pPict->pDrawable->height;
+ int const_offset = unit * 8;
+
+ if (pPict->transform != 0) {
+ accel_state->is_transform[unit] = TRUE;
+ accel_state->transform[unit] = pPict->transform;
+
+ vs_alu_consts[0 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][0]);
+ vs_alu_consts[1 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][1]);
+ vs_alu_consts[2 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][2]);
+ vs_alu_consts[3 + const_offset] = 1.0 / w;
+
+ vs_alu_consts[4 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][0]);
+ vs_alu_consts[5 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][1]);
+ vs_alu_consts[6 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][2]);
+ vs_alu_consts[7 + const_offset] = 1.0 / h;
+ } else {
+ accel_state->is_transform[unit] = FALSE;
+
+ vs_alu_consts[0 + const_offset] = 1.0;
+ vs_alu_consts[1 + const_offset] = 0.0;
+ vs_alu_consts[2 + const_offset] = 0.0;
+ vs_alu_consts[3 + const_offset] = 1.0 / w;
+
+ vs_alu_consts[4 + const_offset] = 0.0;
+ vs_alu_consts[5 + const_offset] = 1.0;
+ vs_alu_consts[6 + const_offset] = 0.0;
+ vs_alu_consts[7 + const_offset] = 1.0 / h;
+ }
+
+}
+
+static Bool EVERGREENTextureSetup(PicturePtr pPict, PixmapPtr pPix,
+ int unit)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ int w = pPict->pDrawable->width;
+ int h = pPict->pDrawable->height;
+ unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
+ unsigned int i;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ int pix_r, pix_g, pix_b, pix_a;
+
+ CLEAR (tex_res);
+ CLEAR (tex_samp);
+
+ for (i = 0; i < sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]); i++) {
+ if (EVERGREENTexFormats[i].fmt == pPict->format)
+ break;
+ }
+
+ /* Texture */
+ tex_res.id = unit;
+ tex_res.w = w;
+ tex_res.h = h;
+ tex_res.pitch = accel_state->src_obj[unit].pitch;
+ tex_res.depth = 0;
+ tex_res.dim = SQ_TEX_DIM_2D;
+ tex_res.base = accel_state->src_obj[unit].offset;
+ tex_res.mip_base = accel_state->src_obj[unit].offset;
+ tex_res.size = accel_state->src_size[unit];
+ tex_res.format = EVERGREENTexFormats[i].card_fmt;
+ tex_res.bo = accel_state->src_obj[unit].bo;
+ tex_res.mip_bo = accel_state->src_obj[unit].bo;
+
+ /* component swizzles */
+ switch (pPict->format) {
+ case PICT_a1r5g5b5:
+ case PICT_a8r8g8b8:
+ pix_r = SQ_SEL_Z; /* R */
+ pix_g = SQ_SEL_Y; /* G */
+ pix_b = SQ_SEL_X; /* B */
+ pix_a = SQ_SEL_W; /* A */
+ break;
+ case PICT_a8b8g8r8:
+ pix_r = SQ_SEL_X; /* R */
+ pix_g = SQ_SEL_Y; /* G */
+ pix_b = SQ_SEL_Z; /* B */
+ pix_a = SQ_SEL_W; /* A */
+ break;
+ case PICT_x8b8g8r8:
+ pix_r = SQ_SEL_X; /* R */
+ pix_g = SQ_SEL_Y; /* G */
+ pix_b = SQ_SEL_Z; /* B */
+ pix_a = SQ_SEL_1; /* A */
+ break;
+#ifdef PICT_TYPE_BGRA
+ case PICT_b8g8r8a8:
+ pix_r = SQ_SEL_Y; /* R */
+ pix_g = SQ_SEL_Z; /* G */
+ pix_b = SQ_SEL_W; /* B */
+ pix_a = SQ_SEL_X; /* A */
+ break;
+ case PICT_b8g8r8x8:
+ pix_r = SQ_SEL_Y; /* R */
+ pix_g = SQ_SEL_Z; /* G */
+ pix_b = SQ_SEL_W; /* B */
+ pix_a = SQ_SEL_1; /* A */
+ break;
+#endif
+ case PICT_x1r5g5b5:
+ case PICT_x8r8g8b8:
+ case PICT_r5g6b5:
+ pix_r = SQ_SEL_Z; /* R */
+ pix_g = SQ_SEL_Y; /* G */
+ pix_b = SQ_SEL_X; /* B */
+ pix_a = SQ_SEL_1; /* A */
+ break;
+ case PICT_a8:
+ pix_r = SQ_SEL_0; /* R */
+ pix_g = SQ_SEL_0; /* G */
+ pix_b = SQ_SEL_0; /* B */
+ pix_a = SQ_SEL_X; /* A */
+ break;
+ default:
+ RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
+ }
+
+ if (unit == 0) {
+ if (!accel_state->msk_pic) {
+ if (PICT_FORMAT_RGB(pPict->format) == 0) {
+ pix_r = SQ_SEL_0;
+ pix_g = SQ_SEL_0;
+ pix_b = SQ_SEL_0;
+ }
+
+ if (PICT_FORMAT_A(pPict->format) == 0)
+ pix_a = SQ_SEL_1;
+ } else {
+ if (accel_state->component_alpha) {
+ if (accel_state->src_alpha) {
+ if (PICT_FORMAT_A(pPict->format) == 0) {
+ pix_r = SQ_SEL_1;
+ pix_g = SQ_SEL_1;
+ pix_b = SQ_SEL_1;
+ pix_a = SQ_SEL_1;
+ } else {
+ pix_r = pix_a;
+ pix_g = pix_a;
+ pix_b = pix_a;
+ }
+ } else {
+ if (PICT_FORMAT_A(pPict->format) == 0)
+ pix_a = SQ_SEL_1;
+ }
+ } else {
+ if (PICT_FORMAT_RGB(pPict->format) == 0) {
+ pix_r = SQ_SEL_0;
+ pix_g = SQ_SEL_0;
+ pix_b = SQ_SEL_0;
+ }
+
+ if (PICT_FORMAT_A(pPict->format) == 0)
+ pix_a = SQ_SEL_1;
+ }
+ }
+ } else {
+ if (accel_state->component_alpha) {
+ if (PICT_FORMAT_A(pPict->format) == 0)
+ pix_a = SQ_SEL_1;
+ } else {
+ if (PICT_FORMAT_A(pPict->format) == 0) {
+ pix_r = SQ_SEL_1;
+ pix_g = SQ_SEL_1;
+ pix_b = SQ_SEL_1;
+ pix_a = SQ_SEL_1;
+ } else {
+ pix_r = pix_a;
+ pix_g = pix_a;
+ pix_b = pix_a;
+ }
+ }
+ }
+
+ tex_res.dst_sel_x = pix_r; /* R */
+ tex_res.dst_sel_y = pix_g; /* G */
+ tex_res.dst_sel_z = pix_b; /* B */
+ tex_res.dst_sel_w = pix_a; /* A */
+
+ tex_res.base_level = 0;
+ tex_res.last_level = 0;
+ tex_res.perf_modulation = 0;
+ evergreen_set_tex_resource (pScrn, &tex_res, accel_state->src_obj[unit].domain);
+
+ tex_samp.id = unit;
+ tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+
+ switch (repeatType) {
+ case RepeatNormal:
+ tex_samp.clamp_x = SQ_TEX_WRAP;
+ tex_samp.clamp_y = SQ_TEX_WRAP;
+ break;
+ case RepeatPad:
+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
+ break;
+ case RepeatReflect:
+ tex_samp.clamp_x = SQ_TEX_MIRROR;
+ tex_samp.clamp_y = SQ_TEX_MIRROR;
+ break;
+ case RepeatNone:
+ tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER;
+ break;
+ default:
+ RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
+ }
+
+ switch (pPict->filter) {
+ case PictFilterNearest:
+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT;
+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT;
+ break;
+ case PictFilterBilinear:
+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR;
+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR;
+ break;
+ default:
+ RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
+ }
+
+ tex_samp.clamp_z = SQ_TEX_WRAP;
+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
+ tex_samp.mip_filter = 0; /* no mipmap */
+ evergreen_set_tex_sampler (pScrn, &tex_samp);
+
+ return TRUE;
+}
+
+static Bool EVERGREENCheckComposite(int op, PicturePtr pSrcPicture,
+ PicturePtr pMaskPicture,
+ PicturePtr pDstPicture)
+{
+ uint32_t tmp1;
+ PixmapPtr pSrcPixmap, pDstPixmap;
+ int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
+
+ /* Check for unsupported compositing operations. */
+ if (op >= (int) (sizeof(EVERGREENBlendOp) / sizeof(EVERGREENBlendOp[0])))
+ RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
+
+ if (!pSrcPicture->pDrawable)
+ RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
+
+ pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
+
+ max_tex_w = 8192;
+ max_tex_h = 8192;
+ max_dst_w = 8192;
+ max_dst_h = 8192;
+
+ if (pSrcPixmap->drawable.width >= max_tex_w ||
+ pSrcPixmap->drawable.height >= max_tex_h) {
+ RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
+ pSrcPixmap->drawable.width,
+ pSrcPixmap->drawable.height));
+ }
+
+ pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
+
+ if (pDstPixmap->drawable.width >= max_dst_w ||
+ pDstPixmap->drawable.height >= max_dst_h) {
+ RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
+ pDstPixmap->drawable.width,
+ pDstPixmap->drawable.height));
+ }
+
+ if (pMaskPicture) {
+ PixmapPtr pMaskPixmap;
+
+ if (!pMaskPicture->pDrawable)
+ RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
+
+ pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
+
+ if (pMaskPixmap->drawable.width >= max_tex_w ||
+ pMaskPixmap->drawable.height >= max_tex_h) {
+ RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
+ pMaskPixmap->drawable.width,
+ pMaskPixmap->drawable.height));
+ }
+
+ if (pMaskPicture->componentAlpha) {
+ /* Check if it's component alpha that relies on a source alpha and
+ * on the source value. We can only get one of those into the
+ * single source value that we get to blend with.
+ */
+ if (EVERGREENBlendOp[op].src_alpha &&
+ (EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
+ (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
+ RADEON_FALLBACK(("Component alpha not supported with source "
+ "alpha and source value blending.\n"));
+ }
+ }
+
+ if (!EVERGREENCheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
+ return FALSE;
+ }
+
+ if (!EVERGREENCheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
+ return FALSE;
+
+ if (!EVERGREENGetDestFormat(pDstPicture, &tmp1))
+ return FALSE;
+
+ return TRUE;
+
+}
+
+static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
+ PicturePtr pMaskPicture, PicturePtr pDstPicture,
+ PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ uint32_t blendcntl, dst_format;
+ cb_config_t cb_conf;
+ shader_config_t vs_conf, ps_conf;
+ const_config_t vs_const_conf;
+ struct r600_accel_object src_obj, mask_obj, dst_obj;
+ float *cbuf;
+
+ //return FALSE;
+
+ if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8)
+ return FALSE;
+
+ src_obj.offset = 0;
+ dst_obj.offset = 0;
+ src_obj.bo = radeon_get_pixmap_bo(pSrc);
+ dst_obj.bo = radeon_get_pixmap_bo(pDst);
+
+ src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
+ dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
+
+ src_obj.width = pSrc->drawable.width;
+ src_obj.height = pSrc->drawable.height;
+ src_obj.bpp = pSrc->drawable.bitsPerPixel;
+ src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+
+ dst_obj.width = pDst->drawable.width;
+ dst_obj.height = pDst->drawable.height;
+ dst_obj.bpp = pDst->drawable.bitsPerPixel;
+ dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
+
+ if (pMask) {
+ mask_obj.offset = 0;
+ mask_obj.bo = radeon_get_pixmap_bo(pMask);
+ mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
+
+ mask_obj.width = pMask->drawable.width;
+ mask_obj.height = pMask->drawable.height;
+ mask_obj.bpp = pMask->drawable.bitsPerPixel;
+ mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+
+ if (!EVERGREENSetAccelState(pScrn,
+ &src_obj,
+ &mask_obj,
+ &dst_obj,
+ accel_state->comp_vs_offset, accel_state->comp_ps_offset,
+ 3, 0xffffffff))
+ return FALSE;
+
+ accel_state->msk_pic = pMaskPicture;
+ if (pMaskPicture->componentAlpha) {
+ accel_state->component_alpha = TRUE;
+ if (EVERGREENBlendOp[op].src_alpha)
+ accel_state->src_alpha = TRUE;
+ else
+ accel_state->src_alpha = FALSE;
+ } else {
+ accel_state->component_alpha = FALSE;
+ accel_state->src_alpha = FALSE;
+ }
+ } else {
+ if (!EVERGREENSetAccelState(pScrn,
+ &src_obj,
+ NULL,
+ &dst_obj,
+ accel_state->comp_vs_offset, accel_state->comp_ps_offset,
+ 3, 0xffffffff))
+ return FALSE;
+
+ accel_state->msk_pic = NULL;
+ accel_state->component_alpha = FALSE;
+ accel_state->src_alpha = FALSE;
+ }
+
+ if (!EVERGREENGetDestFormat(pDstPicture, &dst_format))
+ return FALSE;
+
+ CLEAR (cb_conf);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+ CLEAR (vs_const_conf);
+
+ if (pMask)
+ radeon_vbo_check(pScrn, &accel_state->vbo, 24);
+ else
+ radeon_vbo_check(pScrn, &accel_state->vbo, 16);
+
+ radeon_vbo_check(pScrn, &accel_state->cbuf, 256);
+
+ radeon_cp_start(pScrn);
+
+ evergreen_set_default_state(pScrn);
+
+ evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+
+ if (!EVERGREENTextureSetup(pSrcPicture, pSrc, 0)) {
+ radeon_ib_discard(pScrn);
+ radeon_cs_flush_indirect(pScrn);
+ radeon_vb_discard(pScrn, &accel_state->vbo);
+ radeon_vb_discard(pScrn, &accel_state->cbuf);
+ return FALSE;
+ }
+
+ if (pMask) {
+ if (!EVERGREENTextureSetup(pMaskPicture, pMask, 1)) {
+ radeon_ib_discard(pScrn);
+ radeon_cs_flush_indirect(pScrn);
+ radeon_vb_discard(pScrn, &accel_state->vbo);
+ radeon_vb_discard(pScrn, &accel_state->cbuf);
+ return FALSE;
+ }
+ } else
+ accel_state->is_transform[1] = FALSE;
+
+ if (pMask) {
+ evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
+ evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
+ } else {
+ evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
+ evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
+ }
+
+ /* Shader */
+ vs_conf.shader_addr = accel_state->vs_mc_addr;
+ vs_conf.shader_size = accel_state->vs_size;
+ vs_conf.num_gprs = 3;
+ vs_conf.stack_size = 1;
+ vs_conf.bo = accel_state->shaders_bo;
+ evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+ ps_conf.shader_addr = accel_state->ps_mc_addr;
+ ps_conf.shader_size = accel_state->ps_size;
+ ps_conf.num_gprs = 3;
+ ps_conf.stack_size = 1;
+ ps_conf.clamp_consts = 0;
+ ps_conf.export_mode = 2;
+ ps_conf.bo = accel_state->shaders_bo;
+ evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+
+ cb_conf.id = 0;
+ cb_conf.w = accel_state->dst_obj.pitch;
+ cb_conf.h = accel_state->dst_obj.height;
+ cb_conf.base = accel_state->dst_obj.offset;
+ cb_conf.format = dst_format;
+ cb_conf.bo = accel_state->dst_obj.bo;
+
+ switch (pDstPicture->format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ default:
+ cb_conf.comp_swap = 1; /* ARGB */
+ break;
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ cb_conf.comp_swap = 0; /* ABGR */
+ break;
+#ifdef PICT_TYPE_BGRA
+ case PICT_b8g8r8a8:
+ case PICT_b8g8r8x8:
+ cb_conf.comp_swap = 3; /* BGRA */
+ break;
+#endif
+ case PICT_r5g6b5:
+ cb_conf.comp_swap = 2; /* RGB */
+ break;
+ case PICT_a8:
+ cb_conf.comp_swap = 3; /* A */
+ break;
+ }
+ cb_conf.source_format = EXPORT_4C_16BPC;
+ cb_conf.blend_clamp = 1;
+ evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
+
+ blendcntl = EVERGREENGetBlendCntl(op, pMaskPicture, pDstPicture->format);
+
+ BEGIN_BATCH(24);
+ EREG(CB_TARGET_MASK, (0xf << TARGET0_ENABLE_shift));
+ EREG(CB_COLOR_CONTROL, (EVERGREEN_ROP[3] |
+ (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
+ EREG(CB_BLEND0_CONTROL, blendcntl | CB_BLEND0_CONTROL__ENABLE_bit);
+
+ /* Interpolator setup */
+ if (pMask) {
+ /* export 2 tex coords from VS */
+ EREG(SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift));
+ /* src = semantic id 0; mask = semantic id 1 */
+ EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
+ (1 << SEMANTIC_1_shift)));
+ } else {
+ /* export 1 tex coords from VS */
+ EREG(SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
+ /* src = semantic id 0 */
+ EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
+ }
+
+ PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
+ /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
+ E32(((0 << SEMANTIC_shift) |
+ (0x01 << DEFAULT_VAL_shift)));
+ /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
+ E32(((1 << SEMANTIC_shift) |
+ (0x01 << DEFAULT_VAL_shift)));
+
+ PACK0(SPI_PS_IN_CONTROL_0, 3);
+ if (pMask) {
+ /* input 2 tex coords from VS */
+ E32(((2 << NUM_INTERP_shift) |
+ LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
+ } else {
+ /* input 1 tex coords from VS */
+ E32(((1 << NUM_INTERP_shift) |
+ LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
+ }
+ E32(0); // SPI_PS_IN_CONTROL_1
+ E32(0); // SPI_INTERP_CONTROL_0
+ END_BATCH();
+
+ /* VS alu constants */
+ vs_const_conf.size_bytes = 256;
+ vs_const_conf.type = SHADER_TYPE_VS;
+ cbuf = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
+
+ EVERGREENXFormSetup(pSrcPicture, pSrc, 0, cbuf);
+ if (pMask)
+ EVERGREENXFormSetup(pMaskPicture, pMask, 1, cbuf);
+
+ radeon_vbo_commit(pScrn, &accel_state->cbuf);
+
+ /* VS alu constants */
+ vs_const_conf.bo = accel_state->cbuf.vb_bo;
+ vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op;
+ evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
+
+ if (accel_state->vsync)
+ RADEONVlineHelperClear(pScrn);
+
+ return TRUE;
+}
+
+static void EVERGREENComposite(PixmapPtr pDst,
+ int srcX, int srcY,
+ int maskX, int maskY,
+ int dstX, int dstY,
+ int w, int h)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ float *vb;
+
+ if (accel_state->vsync)
+ RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
+
+ if (accel_state->msk_pic) {
+
+ vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
+
+ vb[0] = (float)dstX;
+ vb[1] = (float)dstY;
+ vb[2] = (float)srcX;
+ vb[3] = (float)srcY;
+ vb[4] = (float)maskX;
+ vb[5] = (float)maskY;
+
+ vb[6] = (float)dstX;
+ vb[7] = (float)(dstY + h);
+ vb[8] = (float)srcX;
+ vb[9] = (float)(srcY + h);
+ vb[10] = (float)maskX;
+ vb[11] = (float)(maskY + h);
+
+ vb[12] = (float)(dstX + w);
+ vb[13] = (float)(dstY + h);
+ vb[14] = (float)(srcX + w);
+ vb[15] = (float)(srcY + h);
+ vb[16] = (float)(maskX + w);
+ vb[17] = (float)(maskY + h);
+
+ radeon_vbo_commit(pScrn, &accel_state->vbo);
+
+ } else {
+
+ vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
+
+ vb[0] = (float)dstX;
+ vb[1] = (float)dstY;
+ vb[2] = (float)srcX;
+ vb[3] = (float)srcY;
+
+ vb[4] = (float)dstX;
+ vb[5] = (float)(dstY + h);
+ vb[6] = (float)srcX;
+ vb[7] = (float)(srcY + h);
+
+ vb[8] = (float)(dstX + w);
+ vb[9] = (float)(dstY + h);
+ vb[10] = (float)(srcX + w);
+ vb[11] = (float)(srcY + h);
+
+ radeon_vbo_commit(pScrn, &accel_state->vbo);
+ }
+
+
+}
+
+static void EVERGREENDoneComposite(PixmapPtr pDst)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ int vtx_size;
+
+ if (accel_state->vsync)
+ evergreen_cp_wait_vline_sync(pScrn, pDst,
+ accel_state->vline_crtc,
+ accel_state->vline_y1,
+ accel_state->vline_y2);
+
+ vtx_size = accel_state->msk_pic ? 24 : 16;
+
+ evergreen_finish_op(pScrn, vtx_size);
+}
+
+static Bool
+EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
+ char *src, int src_pitch)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ struct radeon_exa_pixmap_priv *driver_priv;
+ struct radeon_bo *scratch;
+ unsigned size;
+ uint32_t dst_domain;
+ int bpp = pDst->drawable.bitsPerPixel;
+ uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
+ uint32_t src_pitch_hw = scratch_pitch / (bpp / 8);
+ uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
+ Bool r;
+ int i;
+ struct r600_accel_object src_obj, dst_obj;
+
+ if (bpp < 8)
+ return FALSE;
+
+ driver_priv = exaGetPixmapDriverPrivate(pDst);
+
+ /* If we know the BO won't be busy, don't bother */
+ if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs) &&
+ !radeon_bo_is_busy(driver_priv->bo, &dst_domain))
+ return FALSE;
+
+ size = scratch_pitch * h;
+ scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ if (scratch == NULL) {
+ return FALSE;
+ }
+
+ src_obj.pitch = src_pitch_hw;
+ src_obj.width = w;
+ src_obj.height = h;
+ src_obj.offset = 0;
+ src_obj.bpp = bpp;
+ src_obj.domain = RADEON_GEM_DOMAIN_GTT;
+ src_obj.bo = scratch;
+
+ dst_obj.pitch = dst_pitch_hw;
+ dst_obj.width = pDst->drawable.width;
+ dst_obj.height = pDst->drawable.height;
+ dst_obj.offset = 0;
+ dst_obj.bpp = bpp;
+ dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
+ dst_obj.bo = radeon_get_pixmap_bo(pDst);
+
+ if (!EVERGREENSetAccelState(pScrn,
+ &src_obj,
+ NULL,
+ &dst_obj,
+ accel_state->copy_vs_offset, accel_state->copy_ps_offset,
+ 3, 0xffffffff)) {
+ r = FALSE;
+ goto out;
+ }
+
+ r = radeon_bo_map(scratch, 0);
+ if (r) {
+ r = FALSE;
+ goto out;
+ }
+ r = TRUE;
+ size = w * bpp / 8;
+ for (i = 0; i < h; i++) {
+ memcpy(scratch->ptr + i * scratch_pitch, src, size);
+ src += src_pitch;
+ }
+ radeon_bo_unmap(scratch);
+
+ if (info->accel_state->vsync)
+ RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
+
+ /* blit from gart to vram */
+ EVERGREENDoPrepareCopy(pScrn);
+ EVERGREENAppendCopyVertex(pScrn, 0, 0, x, y, w, h);
+ EVERGREENDoCopyVline(pDst);
+
+out:
+ radeon_bo_unref(scratch);
+ return r;
+}
+
+static Bool
+EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
+ int h, char *dst, int dst_pitch)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ struct radeon_exa_pixmap_priv *driver_priv;
+ struct radeon_bo *scratch;
+ unsigned size;
+ uint32_t src_domain = 0;
+ int bpp = pSrc->drawable.bitsPerPixel;
+ uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
+ uint32_t dst_pitch_hw = scratch_pitch / (bpp / 8);
+ uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
+ Bool r;
+ struct r600_accel_object src_obj, dst_obj;
+
+ if (bpp < 8)
+ return FALSE;
+
+ driver_priv = exaGetPixmapDriverPrivate(pSrc);
+
+ /* If we know the BO won't end up in VRAM anyway, don't bother */
+ if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
+ src_domain = radeon_bo_get_src_domain(driver_priv->bo);
+ if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
+ (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
+ src_domain = 0;
+ }
+
+ if (!src_domain)
+ radeon_bo_is_busy(driver_priv->bo, &src_domain);
+
+ if (src_domain != RADEON_GEM_DOMAIN_VRAM)
+ return FALSE;
+
+ size = scratch_pitch * h;
+ scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ if (scratch == NULL) {
+ return FALSE;
+ }
+ radeon_cs_space_reset_bos(info->cs);
+ radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
+ RADEON_GEM_DOMAIN_VRAM, 0);
+ accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
+ radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
+ accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
+ radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
+ r = radeon_cs_space_check(info->cs);
+ if (r) {
+ r = FALSE;
+ goto out;
+ }
+
+ src_obj.pitch = src_pitch_hw;
+ src_obj.width = pSrc->drawable.width;
+ src_obj.height = pSrc->drawable.height;
+ src_obj.offset = 0;
+ src_obj.bpp = bpp;
+ src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+ src_obj.bo = radeon_get_pixmap_bo(pSrc);
+
+ dst_obj.pitch = dst_pitch_hw;
+ dst_obj.width = w;
+ dst_obj.height = h;
+ dst_obj.offset = 0;
+ dst_obj.bo = scratch;
+ dst_obj.bpp = bpp;
+ dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
+
+ if (!EVERGREENSetAccelState(pScrn,
+ &src_obj,
+ NULL,
+ &dst_obj,
+ accel_state->copy_vs_offset, accel_state->copy_ps_offset,
+ 3, 0xffffffff)) {
+ r = FALSE;
+ goto out;
+ }
+
+ /* blit from vram to gart */
+ EVERGREENDoPrepareCopy(pScrn);
+ EVERGREENAppendCopyVertex(pScrn, x, y, 0, 0, w, h);
+ EVERGREENDoCopy(pScrn);
+
+ radeon_cs_flush_indirect(pScrn);
+
+ r = radeon_bo_map(scratch, 0);
+ if (r) {
+ r = FALSE;
+ goto out;
+ }
+ r = TRUE;
+ w *= bpp / 8;
+ size = 0;
+ while (h--) {
+ memcpy(dst, scratch->ptr + size, w);
+ size += scratch_pitch;
+ dst += dst_pitch;
+ }
+ radeon_bo_unmap(scratch);
+out:
+ radeon_bo_unref(scratch);
+ return r;
+}
+
+static int
+EVERGREENMarkSync(ScreenPtr pScreen)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ return ++accel_state->exaSyncMarker;
+
+}
+
+static void
+EVERGREENSync(ScreenPtr pScreen, int marker)
+{
+ return;
+}
+
+static Bool
+EVERGREENAllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ /* 512 bytes per shader for now */
+ int size = 512 * 9;
+
+ accel_state->shaders = NULL;
+
+ accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
+ RADEON_GEM_DOMAIN_VRAM, 0);
+ if (accel_state->shaders_bo == NULL) {
+ ErrorF("Allocating shader failed\n");
+ return FALSE;
+ }
+ return TRUE;
+}
+
+Bool
+EVERGREENLoadShaders(ScrnInfoPtr pScrn)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ RADEONChipFamily ChipSet = info->ChipFamily;
+ uint32_t *shader;
+ int ret;
+
+ ret = radeon_bo_map(accel_state->shaders_bo, 1);
+ if (ret) {
+ FatalError("failed to map shader %d\n", ret);
+ return FALSE;
+ }
+ shader = accel_state->shaders_bo->ptr;
+
+ /* solid vs --------------------------------------- */
+ accel_state->solid_vs_offset = 0;
+ evergreen_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
+
+ /* solid ps --------------------------------------- */
+ accel_state->solid_ps_offset = 512;
+ evergreen_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
+
+ /* copy vs --------------------------------------- */
+ accel_state->copy_vs_offset = 1024;
+ evergreen_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
+
+ /* copy ps --------------------------------------- */
+ accel_state->copy_ps_offset = 1536;
+ evergreen_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
+
+ /* comp vs --------------------------------------- */
+ accel_state->comp_vs_offset = 2048;
+ evergreen_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
+
+ /* comp ps --------------------------------------- */
+ accel_state->comp_ps_offset = 2560;
+ evergreen_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
+
+ /* xv vs --------------------------------------- */
+ accel_state->xv_vs_offset = 3072;
+ evergreen_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
+
+ /* xv ps --------------------------------------- */
+ accel_state->xv_ps_offset = 3584;
+ evergreen_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
+
+ radeon_bo_unmap(accel_state->shaders_bo);
+
+ return TRUE;
+}
+
+Bool
+EVERGREENDrawInit(ScreenPtr pScreen)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ if (info->accel_state->exa == NULL) {
+ xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
+ return FALSE;
+ }
+
+ /* accel requires kms */
+ if (!info->cs)
+ return FALSE;
+
+ info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
+ info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
+
+ info->accel_state->exa->PrepareSolid = EVERGREENPrepareSolid;
+ info->accel_state->exa->Solid = EVERGREENSolid;
+ info->accel_state->exa->DoneSolid = EVERGREENDoneSolid;
+
+ info->accel_state->exa->PrepareCopy = EVERGREENPrepareCopy;
+ info->accel_state->exa->Copy = EVERGREENCopy;
+ info->accel_state->exa->DoneCopy = EVERGREENDoneCopy;
+
+ info->accel_state->exa->MarkSync = EVERGREENMarkSync;
+ info->accel_state->exa->WaitMarker = EVERGREENSync;
+
+ info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
+ info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
+ info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
+ info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
+ info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
+ info->accel_state->exa->UploadToScreen = EVERGREENUploadToScreen;
+ info->accel_state->exa->DownloadFromScreen = EVERGREENDownloadFromScreen;
+
+ info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
+#ifdef EXA_SUPPORTS_PREPARE_AUX
+ info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
+#endif
+
+#ifdef EXA_HANDLES_PIXMAPS
+ info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
+#ifdef EXA_MIXED_PIXMAPS
+ info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
+#endif
+#endif
+ info->accel_state->exa->pixmapOffsetAlign = 256;
+ info->accel_state->exa->pixmapPitchAlign = 256;
+
+ info->accel_state->exa->CheckComposite = EVERGREENCheckComposite;
+ info->accel_state->exa->PrepareComposite = EVERGREENPrepareComposite;
+ info->accel_state->exa->Composite = EVERGREENComposite;
+ info->accel_state->exa->DoneComposite = EVERGREENDoneComposite;
+
+#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
+
+ info->accel_state->exa->maxPitchBytes = 32768;
+ info->accel_state->exa->maxX = 8192;
+#else
+ info->accel_state->exa->maxX = 8192;
+#endif
+ info->accel_state->exa->maxY = 8192;
+
+ /* not supported yet */
+ if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
+ info->accel_state->vsync = TRUE;
+ } else
+ info->accel_state->vsync = FALSE;
+
+ if (!exaDriverInit(pScreen, info->accel_state->exa)) {
+ xfree(info->accel_state->exa);
+ return FALSE;
+ }
+
+ info->accel_state->XInited3D = FALSE;
+ info->accel_state->copy_area = NULL;
+ info->accel_state->src_obj[0].bo = NULL;
+ info->accel_state->src_obj[1].bo = NULL;
+ info->accel_state->dst_obj.bo = NULL;
+ info->accel_state->copy_area_bo = NULL;
+ info->accel_state->vbo.vb_start_op = -1;
+ info->accel_state->cbuf.vb_start_op = -1;
+ info->accel_state->finish_op = evergreen_finish_op;
+ info->accel_state->vbo.verts_per_op = 3;
+ info->accel_state->cbuf.verts_per_op = 1;
+ RADEONVlineHelperClear(pScrn);
+
+ radeon_vbo_init_lists(pScrn);
+
+ if (!EVERGREENAllocShaders(pScrn, pScreen))
+ return FALSE;
+
+ if (!EVERGREENLoadShaders(pScrn))
+ return FALSE;
+
+ exaMarkSync(pScreen);
+
+ return TRUE;
+
+}
+
diff --git a/src/evergreen_reg.h b/src/evergreen_reg.h
new file mode 100644
index 00000000..4608f080
--- /dev/null
+++ b/src/evergreen_reg.h
@@ -0,0 +1,247 @@
+/*
+ * Evergeen Register documentation
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EVERGREEN_REG_H_
+#define _EVERGREEN_REG_H_
+
+/*
+ * Register definitions
+ */
+
+#include "evergreen_reg_auto.h"
+
+enum {
+ SHADER_TYPE_PS,
+ SHADER_TYPE_VS,
+ SHADER_TYPE_GS,
+ SHADER_TYPE_HS,
+ SHADER_TYPE_LS,
+ SHADER_TYPE_CS,
+ SHADER_TYPE_FS,
+};
+
+
+/* SET_*_REG offsets + ends */
+enum {
+ SET_CONFIG_REG_offset = 0x00008000,
+ SET_CONFIG_REG_end = 0x0000ac00,
+ SET_CONTEXT_REG_offset = 0x00028000,
+ SET_CONTEXT_REG_end = 0x00029000,
+ SET_RESOURCE_offset = 0x00030000,
+ SET_RESOURCE_end = 0x00038000,
+ SET_SAMPLER_offset = 0x0003c000,
+ SET_SAMPLER_end = 0x0003c600,
+ SET_CTL_CONST_offset = 0x0003cff0,
+ SET_CTL_CONST_end = 0x0003ff0c,
+ SET_LOOP_CONST_offset = 0x0003a200,
+ SET_LOOP_CONST_end = 0x0003a500,
+ SET_BOOL_CONST_offset = 0x0003a500,
+ SET_BOOL_CONST_end = 0x0003a518,
+};
+
+/* Packet3 commands */
+enum {
+ IT_NOP = 0x10,
+ IT_INDIRECT_BUFFER_END = 0x17,
+ IT_SET_PREDICATION = 0x20,
+ IT_COND_EXEC = 0x22,
+ IT_PRED_EXEC = 0x23,
+ IT_DRAW_INDEX_2 = 0x27,
+ IT_CONTEXT_CONTROL = 0x28,
+ IT_DRAW_INDEX_OFFSET = 0x29,
+ IT_INDEX_TYPE = 0x2A,
+ IT_DRAW_INDEX = 0x2B,
+ IT_DRAW_INDEX_AUTO = 0x2D,
+ IT_DRAW_INDEX_IMMD = 0x2E,
+ IT_NUM_INSTANCES = 0x2F,
+ IT_INDIRECT_BUFFER = 0x32,
+ IT_STRMOUT_BUFFER_UPDATE = 0x34,
+ IT_MEM_SEMAPHORE = 0x39,
+ IT_MPEG_INDEX = 0x3A,
+ IT_WAIT_REG_MEM = 0x3C,
+ IT_MEM_WRITE = 0x3D,
+ IT_SURFACE_SYNC = 0x43,
+ IT_ME_INITIALIZE = 0x44,
+ IT_COND_WRITE = 0x45,
+ IT_EVENT_WRITE = 0x46,
+ IT_EVENT_WRITE_EOP = 0x47,
+ IT_EVENT_WRITE_EOS = 0x48,
+ IT_SET_CONFIG_REG = 0x68,
+ IT_SET_CONTEXT_REG = 0x69,
+ IT_SET_ALU_CONST = 0x6A,
+ IT_SET_BOOL_CONST = 0x6B,
+ IT_SET_LOOP_CONST = 0x6C,
+ IT_SET_RESOURCE = 0x6D,
+ IT_SET_SAMPLER = 0x6E,
+ IT_SET_CTL_CONST = 0x6F,
+};
+
+/* IT_WAIT_REG_MEM operation encoding */
+
+#define IT_WAIT_ALWAYS (0 << 0)
+#define IT_WAIT_LT (1 << 0)
+#define IT_WAIT_LE (2 << 0)
+#define IT_WAIT_EQ (3 << 0)
+#define IT_WAIT_NE (4 << 0)
+#define IT_WAIT_GE (5 << 0)
+#define IT_WAIT_GT (6 << 0)
+#define IT_WAIT_REG (0 << 4)
+#define IT_WAIT_MEM (1 << 4)
+
+#define IT_WAIT_ADDR(x) ((x) >> 2)
+
+enum {
+
+ SQ_LDS_ALLOC_PS = 0x288ec,
+ SQ_DYN_GPR_RESOURCE_LIMIT_1 = 0x28838,
+ SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x8d8c,
+
+ WAIT_UNTIL = 0x8040,
+ WAIT_CP_DMA_IDLE_bit = 1 << 8,
+ WAIT_CMDFIFO_bit = 1 << 10,
+ WAIT_3D_IDLE_bit = 1 << 15,
+ WAIT_3D_IDLECLEAN_bit = 1 << 17,
+ WAIT_EXTERN_SIG_bit = 1 << 19,
+ CMDFIFO_ENTRIES_mask = 0xf << 20,
+ CMDFIFO_ENTRIES_shift = 20,
+
+ CP_COHER_CNTL = 0x85f0,
+ DEST_BASE_0_ENA_bit = 1 << 0,
+ DEST_BASE_1_ENA_bit = 1 << 1,
+ SO0_DEST_BASE_ENA_bit = 1 << 2,
+ SO1_DEST_BASE_ENA_bit = 1 << 3,
+ SO2_DEST_BASE_ENA_bit = 1 << 4,
+ SO3_DEST_BASE_ENA_bit = 1 << 5,
+ CB0_DEST_BASE_ENA_bit = 1 << 6,
+ CB1_DEST_BASE_ENA_bit = 1 << 7,
+ CB2_DEST_BASE_ENA_bit = 1 << 8,
+ CB3_DEST_BASE_ENA_bit = 1 << 9,
+ CB4_DEST_BASE_ENA_bit = 1 << 10,
+ CB5_DEST_BASE_ENA_bit = 1 << 11,
+ CB6_DEST_BASE_ENA_bit = 1 << 12,
+ CB7_DEST_BASE_ENA_bit = 1 << 13,
+ DB_DEST_BASE_ENA_bit = 1 << 14,
+ CB8_DEST_BASE_ENA_bit = 1 << 15,
+ CB9_DEST_BASE_ENA_bit = 1 << 16,
+ CB10_DEST_BASE_ENA_bit = 1 << 17,
+ CB11_DEST_BASE_ENA_bit = 1 << 18,
+ FULL_CACHE_ENA_bit = 1 << 20,
+ TC_ACTION_ENA_bit = 1 << 23,
+ VC_ACTION_ENA_bit = 1 << 24,
+ CB_ACTION_ENA_bit = 1 << 25,
+ DB_ACTION_ENA_bit = 1 << 26,
+ SH_ACTION_ENA_bit = 1 << 27,
+ SX_ACTION_ENA_bit = 1 << 28,
+ CP_COHER_SIZE = 0x85f4,
+ CP_COHER_BASE = 0x85f8,
+ CP_COHER_STATUS = 0x85fc,
+ MATCHING_GFX_CNTX_mask = 0xff << 0,
+ MATCHING_GFX_CNTX_shift = 0,
+ STATUS_bit = 1 << 31,
+
+// SQ_VTX_CONSTANT_WORD2_0 = 0x00030008,
+// SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20,
+ FMT_INVALID=0, FMT_8, FMT_4_4, FMT_3_3_2,
+ FMT_16=5, FMT_16_FLOAT, FMT_8_8,
+ FMT_5_6_5, FMT_6_5_5, FMT_1_5_5_5, FMT_4_4_4_4,
+ FMT_5_5_5_1, FMT_32, FMT_32_FLOAT, FMT_16_16,
+ FMT_16_16_FLOAT=16, FMT_8_24, FMT_8_24_FLOAT, FMT_24_8,
+ FMT_24_8_FLOAT, FMT_10_11_11, FMT_10_11_11_FLOAT, FMT_11_11_10,
+ FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8, FMT_10_10_10_2,
+ FMT_X24_8_32_FLOAT, FMT_32_32, FMT_32_32_FLOAT, FMT_16_16_16_16,
+ FMT_16_16_16_16_FLOAT=32, FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT,
+ FMT_1 = 37, FMT_GB_GR=39,
+ FMT_BG_RG, FMT_32_AS_8, FMT_32_AS_8_8, FMT_5_9_9_9_SHAREDEXP,
+ FMT_8_8_8, FMT_16_16_16, FMT_16_16_16_FLOAT, FMT_32_32_32,
+ FMT_32_32_32_FLOAT=48,
+
+// High level register file lengths
+ SQ_FETCH_RESOURCE = SQ_TEX_RESOURCE_WORD0_0,
+ SQ_FETCH_RESOURCE_ps_num = 176,
+ SQ_FETCH_RESOURCE_vs_num = 160,
+ SQ_FETCH_RESOURCE_gs_num = 160,
+ SQ_FETCH_RESOURCE_hs_num = 160,
+ SQ_FETCH_RESOURCE_ls_num = 160,
+ SQ_FETCH_RESOURCE_cs_num = 176,
+ SQ_FETCH_RESOURCE_fs_num = 32,
+ SQ_FETCH_RESOURCE_all_num = 1024,
+ SQ_FETCH_RESOURCE_offset = 32,
+ SQ_FETCH_RESOURCE_ps = 0, // 0...175
+ SQ_FETCH_RESOURCE_vs = SQ_FETCH_RESOURCE_ps + SQ_FETCH_RESOURCE_ps_num, // 176...335
+ SQ_FETCH_RESOURCE_gs = SQ_FETCH_RESOURCE_vs + SQ_FETCH_RESOURCE_fs_num, // 336...495
+ SQ_FETCH_RESOURCE_hs = SQ_FETCH_RESOURCE_gs + SQ_FETCH_RESOURCE_gs_num, // 496...655
+ SQ_FETCH_RESOURCE_ls = SQ_FETCH_RESOURCE_hs + SQ_FETCH_RESOURCE_hs_num, // 656...815
+ SQ_FETCH_RESOURCE_cs = SQ_FETCH_RESOURCE_ls + SQ_FETCH_RESOURCE_ls_num, // 816...991
+ SQ_FETCH_RESOURCE_fs = SQ_FETCH_RESOURCE_cs + SQ_FETCH_RESOURCE_cs_num, // 992...1023
+
+ SQ_TEX_SAMPLER_WORD = SQ_TEX_SAMPLER_WORD0_0,
+ SQ_TEX_SAMPLER_WORD_ps_num = 18,
+ SQ_TEX_SAMPLER_WORD_vs_num = 18,
+ SQ_TEX_SAMPLER_WORD_gs_num = 18,
+ SQ_TEX_SAMPLER_WORD_hs_num = 18,
+ SQ_TEX_SAMPLER_WORD_ls_num = 18,
+ SQ_TEX_SAMPLER_WORD_cs_num = 18,
+ SQ_TEX_SAMPLER_WORD_all_num = 108,
+ SQ_TEX_SAMPLER_WORD_offset = 12,
+ SQ_TEX_SAMPLER_WORD_ps = 0, // 0...17
+ SQ_TEX_SAMPLER_WORD_vs = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num, // 18...35
+ SQ_TEX_SAMPLER_WORD_gs = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num, // 36...53
+ SQ_TEX_SAMPLER_WORD_hs = SQ_TEX_SAMPLER_WORD_gs + SQ_TEX_SAMPLER_WORD_gs_num, // 54...71
+ SQ_TEX_SAMPLER_WORD_ls = SQ_TEX_SAMPLER_WORD_hs + SQ_TEX_SAMPLER_WORD_hs_num, // 72...89
+ SQ_TEX_SAMPLER_WORD_cs = SQ_TEX_SAMPLER_WORD_ls + SQ_TEX_SAMPLER_WORD_ls_num, // 90...107
+
+ SQ_LOOP_CONST = SQ_LOOP_CONST_0,
+ SQ_LOOP_CONST_ps_num = 32,
+ SQ_LOOP_CONST_vs_num = 32,
+ SQ_LOOP_CONST_gs_num = 32,
+ SQ_LOOP_CONST_hs_num = 32,
+ SQ_LOOP_CONST_ls_num = 32,
+ SQ_LOOP_CONST_cs_num = 32,
+ SQ_LOOP_CONST_all_num = 192,
+ SQ_LOOP_CONST_offset = 4,
+ SQ_LOOP_CONST_ps = 0, // 0...31
+ SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num, // 32...63
+ SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num, // 64...95
+ SQ_LOOP_CONST_hs = SQ_LOOP_CONST_gs + SQ_LOOP_CONST_gs_num, // 96...127
+ SQ_LOOP_CONST_ls = SQ_LOOP_CONST_hs + SQ_LOOP_CONST_hs_num, // 128...159
+ SQ_LOOP_CONST_cs = SQ_LOOP_CONST_ls + SQ_LOOP_CONST_ls_num, // 160...191
+
+ SQ_BOOL_CONST = SQ_BOOL_CONST_0, /* 32 bits each */
+ SQ_BOOL_CONST_ps_num = 1,
+ SQ_BOOL_CONST_vs_num = 1,
+ SQ_BOOL_CONST_gs_num = 1,
+ SQ_BOOL_CONST_hs_num = 1,
+ SQ_BOOL_CONST_ls_num = 1,
+ SQ_BOOL_CONST_cs_num = 1,
+ SQ_BOOL_CONST_all_num = 6,
+ SQ_BOOL_CONST_offset = 4,
+ SQ_BOOL_CONST_ps = 0,
+ SQ_BOOL_CONST_vs = SQ_BOOL_CONST_ps + SQ_BOOL_CONST_ps_num,
+ SQ_BOOL_CONST_gs = SQ_BOOL_CONST_vs + SQ_BOOL_CONST_vs_num,
+ SQ_BOOL_CONST_hs = SQ_BOOL_CONST_gs + SQ_BOOL_CONST_gs_num,
+ SQ_BOOL_CONST_ls = SQ_BOOL_CONST_hs + SQ_BOOL_CONST_hs_num,
+ SQ_BOOL_CONST_cs = SQ_BOOL_CONST_ls + SQ_BOOL_CONST_ls_num,
+
+};
+
+#endif
diff --git a/src/evergreen_reg_auto.h b/src/evergreen_reg_auto.h
new file mode 100644
index 00000000..5c615864
--- /dev/null
+++ b/src/evergreen_reg_auto.h
@@ -0,0 +1,4039 @@
+/*
+ * Evergreen Register documentation
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EVERGREEN_REG_AUTO_H
+#define _EVERGREEN_REG_AUTO_H
+
+enum {
+
+ VGT_VTX_VECT_EJECT_REG = 0x000088b0,
+ PRIM_COUNT_mask = 0x3ff << 0,
+ PRIM_COUNT_shift = 0,
+ VGT_LAST_COPY_STATE = 0x000088c0,
+ SRC_STATE_ID_mask = 0x07 << 0,
+ SRC_STATE_ID_shift = 0,
+ DST_STATE_ID_mask = 0x07 << 16,
+ DST_STATE_ID_shift = 16,
+ VGT_CACHE_INVALIDATION = 0x000088c4,
+ CACHE_INVALIDATION_mask = 0x03 << 0,
+ CACHE_INVALIDATION_shift = 0,
+ VC_ONLY = 0x00,
+ TC_ONLY = 0x01,
+ VC_AND_TC = 0x02,
+ VS_NO_EXTRA_BUFFER_bit = 1 << 5,
+ AUTO_INVLD_EN_mask = 0x03 << 6,
+ AUTO_INVLD_EN_shift = 6,
+ VGT_GS_VERTEX_REUSE = 0x000088d4,
+ VERT_REUSE_mask = 0x1f << 0,
+ VERT_REUSE_shift = 0,
+ VGT_CNTL_STATUS = 0x000088f0,
+ VGT_OUT_INDX_BUSY_bit = 1 << 0,
+ VGT_OUT_BUSY_bit = 1 << 1,
+ VGT_PT_BUSY_bit = 1 << 2,
+ VGT_TE_BUSY_bit = 1 << 3,
+ VGT_VR_BUSY_bit = 1 << 4,
+ VGT_GRP_BUSY_bit = 1 << 5,
+ VGT_DMA_REQ_BUSY_bit = 1 << 6,
+ VGT_DMA_BUSY_bit = 1 << 7,
+ VGT_GS_BUSY_bit = 1 << 8,
+ VGT_HS_BUSY_bit = 1 << 9,
+ VGT_TE11_BUSY_bit = 1 << 10,
+ VGT_BUSY_bit = 1 << 11,
+ VGT_PRIMITIVE_TYPE = 0x00008958,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift = 0,
+ DI_PT_NONE = 0x00,
+ DI_PT_POINTLIST = 0x01,
+ DI_PT_LINELIST = 0x02,
+ DI_PT_LINESTRIP = 0x03,
+ DI_PT_TRILIST = 0x04,
+ DI_PT_TRIFAN = 0x05,
+ DI_PT_TRISTRIP = 0x06,
+ DI_PT_UNUSED_0 = 0x07,
+ DI_PT_UNUSED_1 = 0x08,
+ DI_PT_PATCH = 0x09,
+ DI_PT_LINELIST_ADJ = 0x0a,
+ DI_PT_LINESTRIP_ADJ = 0x0b,
+ DI_PT_TRILIST_ADJ = 0x0c,
+ DI_PT_TRISTRIP_ADJ = 0x0d,
+ DI_PT_UNUSED_3 = 0x0e,
+ DI_PT_UNUSED_4 = 0x0f,
+ DI_PT_TRI_WITH_WFLAGS = 0x10,
+ DI_PT_RECTLIST = 0x11,
+ DI_PT_LINELOOP = 0x12,
+ DI_PT_QUADLIST = 0x13,
+ DI_PT_QUADSTRIP = 0x14,
+ DI_PT_POLYGON = 0x15,
+ DI_PT_2D_COPY_RECT_LIST_V0 = 0x16,
+ DI_PT_2D_COPY_RECT_LIST_V1 = 0x17,
+ DI_PT_2D_COPY_RECT_LIST_V2 = 0x18,
+ DI_PT_2D_COPY_RECT_LIST_V3 = 0x19,
+ DI_PT_2D_FILL_RECT_LIST = 0x1a,
+ DI_PT_2D_LINE_STRIP = 0x1b,
+ DI_PT_2D_TRI_STRIP = 0x1c,
+ VGT_INDEX_TYPE = 0x0000895c,
+ INDEX_TYPE_mask = 0x03 << 0,
+ INDEX_TYPE_shift = 0,
+ DI_INDEX_SIZE_16_BIT = 0x00,
+ DI_INDEX_SIZE_32_BIT = 0x01,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c,
+ VGT_NUM_INDICES = 0x00008970,
+ VGT_NUM_INSTANCES = 0x00008974,
+ PA_CL_CNTL_STATUS = 0x00008a10,
+ CL_BUSY_bit = 1 << 31,
+ PA_CL_ENHANCE = 0x00008a14,
+ CLIP_VTX_REORDER_ENA_bit = 1 << 0,
+ NUM_CLIP_SEQ_mask = 0x03 << 1,
+ NUM_CLIP_SEQ_shift = 1,
+ CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3,
+ VE_NAN_PROC_DISABLE_bit = 1 << 4,
+ PA_SU_CNTL_STATUS = 0x00008a50,
+ SU_BUSY_bit = 1 << 31,
+ PA_SU_LINE_STIPPLE_VALUE = 0x00008a60,
+ LINE_STIPPLE_VALUE_mask = 0xffffff << 0,
+ LINE_STIPPLE_VALUE_shift = 0,
+ PA_SC_LINE_STIPPLE_STATE = 0x00008b10,
+ CURRENT_PTR_mask = 0x0f << 0,
+ CURRENT_PTR_shift = 0,
+ CURRENT_COUNT_mask = 0xff << 8,
+ CURRENT_COUNT_shift = 8,
+ SQ_CONFIG = 0x00008c00,
+ VC_ENABLE_bit = 1 << 0,
+ EXPORT_SRC_C_bit = 1 << 1,
+ CS_PRIO_mask = 0x03 << 18,
+ CS_PRIO_shift = 18,
+ LS_PRIO_mask = 0x03 << 20,
+ LS_PRIO_shift = 20,
+ HS_PRIO_mask = 0x03 << 22,
+ HS_PRIO_shift = 22,
+ PS_PRIO_mask = 0x03 << 24,
+ PS_PRIO_shift = 24,
+ VS_PRIO_mask = 0x03 << 26,
+ VS_PRIO_shift = 26,
+ GS_PRIO_mask = 0x03 << 28,
+ GS_PRIO_shift = 28,
+ ES_PRIO_mask = 0x03 << 30,
+ ES_PRIO_shift = 30,
+ SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04,
+ NUM_PS_GPRS_mask = 0xff << 0,
+ NUM_PS_GPRS_shift = 0,
+ NUM_VS_GPRS_mask = 0xff << 16,
+ NUM_VS_GPRS_shift = 16,
+ NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28,
+ NUM_CLAUSE_TEMP_GPRS_shift = 28,
+ SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08,
+ NUM_GS_GPRS_mask = 0xff << 0,
+ NUM_GS_GPRS_shift = 0,
+ NUM_ES_GPRS_mask = 0xff << 16,
+ NUM_ES_GPRS_shift = 16,
+ SQ_GPR_RESOURCE_MGMT_3 = 0x00008c0c,
+ NUM_HS_GPRS_mask = 0xff << 0,
+ NUM_HS_GPRS_shift = 0,
+ NUM_LS_GPRS_mask = 0xff << 16,
+ NUM_LS_GPRS_shift = 16,
+ SQ_GLOBAL_GPR_RESOURCE_MGMT_1 = 0x00008c10,
+ PS_GGPR_BASE_mask = 0xff << 0,
+ PS_GGPR_BASE_shift = 0,
+ VS_GGPR_BASE_mask = 0xff << 8,
+ VS_GGPR_BASE_shift = 8,
+ GS_GGPR_BASE_mask = 0xff << 16,
+ GS_GGPR_BASE_shift = 16,
+ ES_GGPR_BASE_mask = 0xff << 24,
+ ES_GGPR_BASE_shift = 24,
+ SQ_GLOBAL_GPR_RESOURCE_MGMT_2 = 0x00008c14,
+ HS_GGPR_BASE_mask = 0xff << 0,
+ HS_GGPR_BASE_shift = 0,
+ LS_GGPR_BASE_mask = 0xff << 8,
+ LS_GGPR_BASE_shift = 8,
+ CS_GGPR_BASE_mask = 0xff << 16,
+ CS_GGPR_BASE_shift = 16,
+ SQ_THREAD_RESOURCE_MGMT = 0x00008c18,
+ NUM_PS_THREADS_mask = 0xff << 0,
+ NUM_PS_THREADS_shift = 0,
+ NUM_VS_THREADS_mask = 0xff << 8,
+ NUM_VS_THREADS_shift = 8,
+ NUM_GS_THREADS_mask = 0xff << 16,
+ NUM_GS_THREADS_shift = 16,
+ NUM_ES_THREADS_mask = 0xff << 24,
+ NUM_ES_THREADS_shift = 24,
+ SQ_THREAD_RESOURCE_MGMT_2 = 0x00008c1c,
+ NUM_HS_THREADS_mask = 0xff << 0,
+ NUM_HS_THREADS_shift = 0,
+ NUM_LS_THREADS_mask = 0xff << 8,
+ NUM_LS_THREADS_shift = 8,
+ SQ_STACK_RESOURCE_MGMT_1 = 0x00008c20,
+ NUM_PS_STACK_ENTRIES_mask = 0xfff << 0,
+ NUM_PS_STACK_ENTRIES_shift = 0,
+ NUM_VS_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_VS_STACK_ENTRIES_shift = 16,
+ SQ_STACK_RESOURCE_MGMT_2 = 0x00008c24,
+ NUM_GS_STACK_ENTRIES_mask = 0xfff << 0,
+ NUM_GS_STACK_ENTRIES_shift = 0,
+ NUM_ES_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_ES_STACK_ENTRIES_shift = 16,
+ SQ_STACK_RESOURCE_MGMT_3 = 0x00008c28,
+ NUM_HS_STACK_ENTRIES_mask = 0xfff << 0,
+ NUM_HS_STACK_ENTRIES_shift = 0,
+ NUM_LS_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_LS_STACK_ENTRIES_shift = 16,
+ SQ_ESGS_RING_BASE = 0x00008c40,
+ SQ_ESGS_RING_SIZE = 0x00008c44,
+ SQ_GSVS_RING_BASE = 0x00008c48,
+ SQ_GSVS_RING_SIZE = 0x00008c4c,
+ SQ_ESTMP_RING_BASE = 0x00008c50,
+ SQ_ESTMP_RING_SIZE = 0x00008c54,
+ SQ_GSTMP_RING_BASE = 0x00008c58,
+ SQ_GSTMP_RING_SIZE = 0x00008c5c,
+ SQ_VSTMP_RING_BASE = 0x00008c60,
+ SQ_VSTMP_RING_SIZE = 0x00008c64,
+ SQ_PSTMP_RING_BASE = 0x00008c68,
+ SQ_PSTMP_RING_SIZE = 0x00008c6c,
+ SQ_CONST_MEM_BASE = 0x00008df8,
+ SQ_ALU_WORD1_OP3 = 0x00008dfc,
+ SRC2_SEL_mask = 0x1ff << 0,
+ SRC2_SEL_shift = 0,
+ SQ_ALU_SRC_LDS_OQ_A = 0xdb,
+ SQ_ALU_SRC_LDS_OQ_B = 0xdc,
+ SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd,
+ SQ_ALU_SRC_LDS_OQ_B_POP = 0xde,
+ SQ_ALU_SRC_LDS_DIRECT_A = 0xdf,
+ SQ_ALU_SRC_LDS_DIRECT_B = 0xe0,
+ SQ_ALU_SRC_TIME_HI = 0xe3,
+ SQ_ALU_SRC_TIME_LO = 0xe4,
+ SQ_ALU_SRC_MASK_HI = 0xe5,
+ SQ_ALU_SRC_MASK_LO = 0xe6,
+ SQ_ALU_SRC_HW_WAVE_ID = 0xe7,
+ SQ_ALU_SRC_SIMD_ID = 0xe8,
+ SQ_ALU_SRC_SE_ID = 0xe9,
+ SQ_ALU_SRC_HW_THREADGRP_ID = 0xea,
+ SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb,
+ SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec,
+ SQ_ALU_SRC_HW_ALU_ODD = 0xed,
+ SQ_ALU_SRC_LOOP_IDX = 0xee,
+ SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0,
+ SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1,
+ SQ_ALU_SRC_PRIM_MASK_HI = 0xf2,
+ SQ_ALU_SRC_PRIM_MASK_LO = 0xf3,
+ SQ_ALU_SRC_1_DBL_L = 0xf4,
+ SQ_ALU_SRC_1_DBL_M = 0xf5,
+ SQ_ALU_SRC_0_5_DBL_L = 0xf6,
+ SQ_ALU_SRC_0_5_DBL_M = 0xf7,
+ SQ_ALU_SRC_0 = 0xf8,
+ SQ_ALU_SRC_1 = 0xf9,
+ SQ_ALU_SRC_1_INT = 0xfa,
+ SQ_ALU_SRC_M_1_INT = 0xfb,
+ SQ_ALU_SRC_0_5 = 0xfc,
+ SQ_ALU_SRC_LITERAL = 0xfd,
+ SQ_ALU_SRC_PV = 0xfe,
+ SQ_ALU_SRC_PS = 0xff,
+ SRC2_REL_bit = 1 << 9,
+ SRC2_CHAN_mask = 0x03 << 10,
+ SRC2_CHAN_shift = 10,
+ SQ_CHAN_X = 0x00,
+ SQ_CHAN_Y = 0x01,
+ SQ_CHAN_Z = 0x02,
+ SQ_CHAN_W = 0x03,
+ SRC2_NEG_bit = 1 << 12,
+ SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13,
+ SQ_ALU_WORD1_OP3__ALU_INST_shift = 13,
+ SQ_OP3_INST_BFE_UINT = 0x04,
+ SQ_OP3_INST_BFE_INT = 0x05,
+ SQ_OP3_INST_BFI_INT = 0x06,
+ SQ_OP3_INST_FMA = 0x07,
+ SQ_OP3_INST_CNDNE_64 = 0x09,
+ SQ_OP3_INST_FMA_64 = 0x0a,
+ SQ_OP3_INST_LERP_UINT = 0x0b,
+ SQ_OP3_INST_BIT_ALIGN_INT = 0x0c,
+ SQ_OP3_INST_BYTE_ALIGN_INT = 0x0d,
+ SQ_OP3_INST_SAD_ACCUM_UINT = 0x0e,
+ SQ_OP3_INST_SAD_ACCUM_HI_UINT = 0x0f,
+ SQ_OP3_INST_MULADD_UINT24 = 0x10,
+ SQ_OP3_INST_LDS_IDX_OP = 0x11,
+ SQ_OP3_INST_MULADD = 0x14,
+ SQ_OP3_INST_MULADD_M2 = 0x15,
+ SQ_OP3_INST_MULADD_M4 = 0x16,
+ SQ_OP3_INST_MULADD_D2 = 0x17,
+ SQ_OP3_INST_MULADD_IEEE = 0x18,
+ SQ_OP3_INST_CNDE = 0x19,
+ SQ_OP3_INST_CNDGT = 0x1a,
+ SQ_OP3_INST_CNDGE = 0x1b,
+ SQ_OP3_INST_CNDE_INT = 0x1c,
+ SQ_OP3_INST_CNDGT_INT = 0x1d,
+ SQ_OP3_INST_CNDGE_INT = 0x1e,
+ SQ_OP3_INST_MUL_LIT = 0x1f,
+ SQ_ALU_WORD1_LDS_DIRECT_LITERAL_LO = 0x00008dfc,
+ OFFSET_A_mask = 0x1fff << 0,
+ OFFSET_A_shift = 0,
+ STRIDE_A_mask = 0x7f << 13,
+ STRIDE_A_shift = 13,
+ THREAD_REL_A_bit = 1 << 22,
+ SQ_TEX_WORD2 = 0x00008dfc,
+ OFFSET_X_mask = 0x1f << 0,
+ OFFSET_X_shift = 0,
+ OFFSET_Y_mask = 0x1f << 5,
+ OFFSET_Y_shift = 5,
+ OFFSET_Z_mask = 0x1f << 10,
+ OFFSET_Z_shift = 10,
+ SAMPLER_ID_mask = 0x1f << 15,
+ SAMPLER_ID_shift = 15,
+ SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20,
+ SQ_TEX_WORD2__SRC_SEL_X_shift = 20,
+ SQ_SEL_X = 0x00,
+ SQ_SEL_Y = 0x01,
+ SQ_SEL_Z = 0x02,
+ SQ_SEL_W = 0x03,
+ SQ_SEL_0 = 0x04,
+ SQ_SEL_1 = 0x05,
+ SRC_SEL_Y_mask = 0x07 << 23,
+ SRC_SEL_Y_shift = 23,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SRC_SEL_Z_mask = 0x07 << 26,
+ SRC_SEL_Z_shift = 26,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SRC_SEL_W_mask = 0x07 << 29,
+ SRC_SEL_W_shift = 29,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc,
+ BURST_COUNT_mask = 0x0f << 16,
+ BURST_COUNT_shift = 16,
+ VALID_PIXEL_MODE_bit = 1 << 20,
+ END_OF_PROGRAM_bit = 1 << 21,
+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0xff << 22,
+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 22,
+ SQ_CF_INST_MEM_STREAM0_BUF0 = 0x40,
+ SQ_CF_INST_MEM_STREAM0_BUF1 = 0x41,
+ SQ_CF_INST_MEM_STREAM0_BUF2 = 0x42,
+ SQ_CF_INST_MEM_STREAM0_BUF3 = 0x43,
+ SQ_CF_INST_MEM_STREAM1_BUF0 = 0x44,
+ SQ_CF_INST_MEM_STREAM1_BUF1 = 0x45,
+ SQ_CF_INST_MEM_STREAM1_BUF2 = 0x46,
+ SQ_CF_INST_MEM_STREAM1_BUF3 = 0x47,
+ SQ_CF_INST_MEM_STREAM2_BUF0 = 0x48,
+ SQ_CF_INST_MEM_STREAM2_BUF1 = 0x49,
+ SQ_CF_INST_MEM_STREAM2_BUF2 = 0x4a,
+ SQ_CF_INST_MEM_STREAM2_BUF3 = 0x4b,
+ SQ_CF_INST_MEM_STREAM3_BUF0 = 0x4c,
+ SQ_CF_INST_MEM_STREAM3_BUF1 = 0x4d,
+ SQ_CF_INST_MEM_STREAM3_BUF2 = 0x4e,
+ SQ_CF_INST_MEM_STREAM3_BUF3 = 0x4f,
+ SQ_CF_INST_MEM_SCRATCH = 0x50,
+ SQ_CF_INST_MEM_RING = 0x52,
+ SQ_CF_INST_EXPORT = 0x53,
+ SQ_CF_INST_EXPORT_DONE = 0x54,
+ SQ_CF_INST_MEM_EXPORT = 0x55,
+ SQ_CF_INST_MEM_RAT = 0x56,
+ SQ_CF_INST_MEM_RAT_CACHELESS = 0x57,
+ SQ_CF_INST_MEM_RING1 = 0x58,
+ SQ_CF_INST_MEM_RING2 = 0x59,
+ SQ_CF_INST_MEM_RING3 = 0x5a,
+ SQ_CF_INST_MEM_EXPORT_COMBINED = 0x5b,
+ SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS = 0x5c,
+ MARK_bit = 1 << 30,
+ BARRIER_bit = 1 << 31,
+ SQ_CF_ALU_WORD1 = 0x00008dfc,
+ KCACHE_MODE1_mask = 0x03 << 0,
+ KCACHE_MODE1_shift = 0,
+ SQ_CF_KCACHE_NOP = 0x00,
+ SQ_CF_KCACHE_LOCK_1 = 0x01,
+ SQ_CF_KCACHE_LOCK_2 = 0x02,
+ SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03,
+ KCACHE_ADDR0_mask = 0xff << 2,
+ KCACHE_ADDR0_shift = 2,
+ KCACHE_ADDR1_mask = 0xff << 10,
+ KCACHE_ADDR1_shift = 10,
+ SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18,
+ SQ_CF_ALU_WORD1__COUNT_shift = 18,
+ SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25,
+ SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26,
+ SQ_CF_ALU_WORD1__CF_INST_shift = 26,
+ SQ_CF_INST_ALU = 0x08,
+ SQ_CF_INST_ALU_PUSH_BEFORE = 0x09,
+ SQ_CF_INST_ALU_POP_AFTER = 0x0a,
+ SQ_CF_INST_ALU_POP2_AFTER = 0x0b,
+ SQ_CF_INST_ALU_EXTENDED = 0x0c,
+ SQ_CF_INST_ALU_CONTINUE = 0x0d,
+ SQ_CF_INST_ALU_BREAK = 0x0e,
+ SQ_CF_INST_ALU_ELSE_AFTER = 0x0f,
+ WHOLE_QUAD_MODE_bit = 1 << 30,
+/* BARRIER_bit = 1 << 31, */
+ SQ_TEX_WORD1 = 0x00008dfc,
+ SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0,
+ SQ_TEX_WORD1__DST_GPR_shift = 0,
+ SQ_TEX_WORD1__DST_REL_bit = 1 << 7,
+ SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_TEX_WORD1__DST_SEL_X_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_SEL_MASK = 0x07,
+ SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+ SQ_TEX_WORD1__DST_SEL_Y_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+ SQ_TEX_WORD1__DST_SEL_Z_shift = 15,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18,
+ SQ_TEX_WORD1__DST_SEL_W_shift = 18,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21,
+ SQ_TEX_WORD1__LOD_BIAS_shift = 21,
+ COORD_TYPE_X_bit = 1 << 28,
+ COORD_TYPE_Y_bit = 1 << 29,
+ COORD_TYPE_Z_bit = 1 << 30,
+ COORD_TYPE_W_bit = 1 << 31,
+ SQ_VTX_WORD0 = 0x00008dfc,
+ VTX_INST_mask = 0x1f << 0,
+ VTX_INST_shift = 0,
+ SQ_VTX_INST_FETCH = 0x00,
+ SQ_VTX_INST_SEMANTIC = 0x01,
+ SQ_VTX_INST_GET_BUFFER_RESINFO = 0x0e,
+ FETCH_TYPE_mask = 0x03 << 5,
+ FETCH_TYPE_shift = 5,
+ SQ_VTX_FETCH_VERTEX_DATA = 0x00,
+ SQ_VTX_FETCH_INSTANCE_DATA = 0x01,
+ SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02,
+ FETCH_WHOLE_QUAD_bit = 1 << 7,
+ BUFFER_ID_mask = 0xff << 8,
+ BUFFER_ID_shift = 8,
+ SQ_VTX_WORD0__SRC_GPR_mask = 0x7f << 16,
+ SQ_VTX_WORD0__SRC_GPR_shift = 16,
+ SRC_REL_bit = 1 << 23,
+ SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24,
+ SQ_VTX_WORD0__SRC_SEL_X_shift = 24,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+ MEGA_FETCH_COUNT_mask = 0x3f << 26,
+ MEGA_FETCH_COUNT_shift = 26,
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc,
+ SEL_X_mask = 0x07 << 0,
+ SEL_X_shift = 0,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_Y_mask = 0x07 << 3,
+ SEL_Y_shift = 3,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_Z_mask = 0x07 << 6,
+ SEL_Z_shift = 6,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_W_mask = 0x07 << 9,
+ SEL_W_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_RD_WORD0 = 0x00008dfc,
+ MEM_INST_mask = 0x1f << 0,
+ MEM_INST_shift = 0,
+ SQ_MEM_INST_MEM = 0x02,
+ SQ_MEM_RD_WORD0__ELEM_SIZE_mask = 0x03 << 5,
+ SQ_MEM_RD_WORD0__ELEM_SIZE_shift = 5,
+/* FETCH_WHOLE_QUAD_bit = 1 << 7, */
+ MEM_OP_mask = 0x07 << 8,
+ MEM_OP_shift = 8,
+ SQ_MEM_OP_RD_SCRATCH = 0x00,
+ SQ_MEM_OP_RD_SCATTER = 0x02,
+ SQ_MEM_OP_GDS = 0x04,
+ SQ_MEM_OP_TF_WRITE = 0x05,
+ SQ_MEM_RD_WORD0__UNCACHED_bit = 1 << 11,
+ INDEXED_bit = 1 << 12,
+ SQ_MEM_RD_WORD0__SRC_GPR_mask = 0x7f << 16,
+ SQ_MEM_RD_WORD0__SRC_GPR_shift = 16,
+/* SRC_REL_bit = 1 << 23, */
+ SQ_MEM_RD_WORD0__SRC_SEL_X_mask = 0x03 << 24,
+ SQ_MEM_RD_WORD0__SRC_SEL_X_shift = 24,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+ BURST_CNT_mask = 0x0f << 26,
+ BURST_CNT_shift = 26,
+ SQ_ALU_WORD1 = 0x00008dfc,
+ SQ_ALU_WORD1__ENCODING_mask = 0x07 << 15,
+ SQ_ALU_WORD1__ENCODING_shift = 15,
+ BANK_SWIZZLE_mask = 0x07 << 18,
+ BANK_SWIZZLE_shift = 18,
+ SQ_ALU_VEC_012 = 0x00,
+ SQ_ALU_VEC_021 = 0x01,
+ SQ_ALU_VEC_120 = 0x02,
+ SQ_ALU_VEC_102 = 0x03,
+ SQ_ALU_VEC_201 = 0x04,
+ SQ_ALU_VEC_210 = 0x05,
+ SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21,
+ SQ_ALU_WORD1__DST_GPR_shift = 21,
+ SQ_ALU_WORD1__DST_REL_bit = 1 << 28,
+ DST_CHAN_mask = 0x03 << 29,
+ DST_CHAN_shift = 29,
+ CHAN_X = 0x00,
+ CHAN_Y = 0x01,
+ CHAN_Z = 0x02,
+ CHAN_W = 0x03,
+ SQ_ALU_WORD1__CLAMP_bit = 1 << 31,
+ SQ_CF_ALU_WORD0_EXT = 0x00008dfc,
+ KCACHE_BANK_INDEX_MODE0_mask = 0x03 << 4,
+ KCACHE_BANK_INDEX_MODE0_shift = 4,
+ SQ_CF_INDEX_NONE = 0x00,
+ SQ_CF_INDEX_0 = 0x01,
+ SQ_CF_INDEX_1 = 0x02,
+ SQ_CF_INVALID = 0x03,
+ KCACHE_BANK_INDEX_MODE1_mask = 0x03 << 6,
+ KCACHE_BANK_INDEX_MODE1_shift = 6,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ KCACHE_BANK_INDEX_MODE2_mask = 0x03 << 8,
+ KCACHE_BANK_INDEX_MODE2_shift = 8,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ KCACHE_BANK_INDEX_MODE3_mask = 0x03 << 10,
+ KCACHE_BANK_INDEX_MODE3_shift = 10,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ KCACHE_BANK2_mask = 0x0f << 22,
+ KCACHE_BANK2_shift = 22,
+ KCACHE_BANK3_mask = 0x0f << 26,
+ KCACHE_BANK3_shift = 26,
+ KCACHE_MODE2_mask = 0x03 << 30,
+ KCACHE_MODE2_shift = 30,
+/* SQ_CF_KCACHE_NOP = 0x00, */
+/* SQ_CF_KCACHE_LOCK_1 = 0x01, */
+/* SQ_CF_KCACHE_LOCK_2 = 0x02, */
+/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */
+ SQ_ALU_WORD0_LDS_IDX_OP = 0x00008dfc,
+ SRC0_SEL_mask = 0x1ff << 0,
+ SRC0_SEL_shift = 0,
+/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */
+/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */
+/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */
+/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */
+/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */
+/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */
+/* SQ_ALU_SRC_TIME_HI = 0xe3, */
+/* SQ_ALU_SRC_TIME_LO = 0xe4, */
+/* SQ_ALU_SRC_MASK_HI = 0xe5, */
+/* SQ_ALU_SRC_MASK_LO = 0xe6, */
+/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */
+/* SQ_ALU_SRC_SIMD_ID = 0xe8, */
+/* SQ_ALU_SRC_SE_ID = 0xe9, */
+/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */
+/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */
+/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */
+/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */
+/* SQ_ALU_SRC_LOOP_IDX = 0xee, */
+/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */
+/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */
+/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */
+/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */
+/* SQ_ALU_SRC_1_DBL_L = 0xf4, */
+/* SQ_ALU_SRC_1_DBL_M = 0xf5, */
+/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */
+/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+ SRC0_REL_bit = 1 << 9,
+ SRC0_CHAN_mask = 0x03 << 10,
+ SRC0_CHAN_shift = 10,
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ IDX_OFFSET_4_bit = 1 << 12,
+ SRC1_SEL_mask = 0x1ff << 13,
+ SRC1_SEL_shift = 13,
+/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */
+/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */
+/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */
+/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */
+/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */
+/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */
+/* SQ_ALU_SRC_TIME_HI = 0xe3, */
+/* SQ_ALU_SRC_TIME_LO = 0xe4, */
+/* SQ_ALU_SRC_MASK_HI = 0xe5, */
+/* SQ_ALU_SRC_MASK_LO = 0xe6, */
+/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */
+/* SQ_ALU_SRC_SIMD_ID = 0xe8, */
+/* SQ_ALU_SRC_SE_ID = 0xe9, */
+/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */
+/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */
+/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */
+/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */
+/* SQ_ALU_SRC_LOOP_IDX = 0xee, */
+/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */
+/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */
+/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */
+/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */
+/* SQ_ALU_SRC_1_DBL_L = 0xf4, */
+/* SQ_ALU_SRC_1_DBL_M = 0xf5, */
+/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */
+/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+ SRC1_REL_bit = 1 << 22,
+ SRC1_CHAN_mask = 0x03 << 23,
+ SRC1_CHAN_shift = 23,
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ IDX_OFFSET_5_bit = 1 << 25,
+ INDEX_MODE_mask = 0x07 << 26,
+ INDEX_MODE_shift = 26,
+ SQ_INDEX_AR_X = 0x00,
+ SQ_INDEX_LOOP = 0x04,
+ SQ_INDEX_GLOBAL = 0x05,
+ SQ_INDEX_GLOBAL_AR_X = 0x06,
+ PRED_SEL_mask = 0x03 << 29,
+ PRED_SEL_shift = 29,
+ SQ_PRED_SEL_OFF = 0x00,
+ SQ_PRED_SEL_ZERO = 0x02,
+ SQ_PRED_SEL_ONE = 0x03,
+ LAST_bit = 1 << 31,
+ SQ_MEM_GDS_WORD2 = 0x00008dfc,
+ SQ_MEM_GDS_WORD2__DST_SEL_X_mask = 0x07 << 0,
+ SQ_MEM_GDS_WORD2__DST_SEL_X_shift = 0,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_GDS_WORD2__DST_SEL_Y_mask = 0x07 << 3,
+ SQ_MEM_GDS_WORD2__DST_SEL_Y_shift = 3,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_GDS_WORD2__DST_SEL_Z_mask = 0x07 << 6,
+ SQ_MEM_GDS_WORD2__DST_SEL_Z_shift = 6,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_GDS_WORD2__DST_SEL_W_mask = 0x07 << 9,
+ SQ_MEM_GDS_WORD2__DST_SEL_W_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_CF_ALLOC_EXPORT_WORD0_RAT = 0x00008dfc,
+ RAT_ID_mask = 0x0f << 0,
+ RAT_ID_shift = 0,
+ RAT_INST_mask = 0x3f << 4,
+ RAT_INST_shift = 4,
+ SQ_EXPORT_RAT_INST_NOP = 0x00,
+ SQ_EXPORT_RAT_INST_STORE_TYPED = 0x01,
+ SQ_EXPORT_RAT_INST_STORE_RAW = 0x02,
+ SQ_EXPORT_RAT_INST_STORE_RAW_FDENORM = 0x03,
+ SQ_EXPORT_RAT_INST_CMPXCHG_INT = 0x04,
+ SQ_EXPORT_RAT_INST_CMPXCHG_FLT = 0x05,
+ SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM = 0x06,
+ SQ_EXPORT_RAT_INST_ADD = 0x07,
+ SQ_EXPORT_RAT_INST_SUB = 0x08,
+ SQ_EXPORT_RAT_INST_RSUB = 0x09,
+ SQ_EXPORT_RAT_INST_MIN_INT = 0x0a,
+ SQ_EXPORT_RAT_INST_MIN_UINT = 0x0b,
+ SQ_EXPORT_RAT_INST_MAX_INT = 0x0c,
+ SQ_EXPORT_RAT_INST_MAX_UINT = 0x0d,
+ SQ_EXPORT_RAT_INST_AND = 0x0e,
+ SQ_EXPORT_RAT_INST_OR = 0x0f,
+ SQ_EXPORT_RAT_INST_XOR = 0x10,
+ SQ_EXPORT_RAT_INST_MSKOR = 0x11,
+ SQ_EXPORT_RAT_INST_INC_UINT = 0x12,
+ SQ_EXPORT_RAT_INST_DEC_UINT = 0x13,
+ SQ_EXPORT_RAT_INST_NOP_RTN = 0x20,
+ SQ_EXPORT_RAT_INST_XCHG_RTN = 0x22,
+ SQ_EXPORT_RAT_INST_XCHG_FDENORM_RTN = 0x23,
+ SQ_EXPORT_RAT_INST_CMPXCHG_INT_RTN = 0x24,
+ SQ_EXPORT_RAT_INST_CMPXCHG_FLT_RTN = 0x25,
+ SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM_RTN = 0x26,
+ SQ_EXPORT_RAT_INST_ADD_RTN = 0x27,
+ SQ_EXPORT_RAT_INST_SUB_RTN = 0x28,
+ SQ_EXPORT_RAT_INST_RSUB_RTN = 0x29,
+ SQ_EXPORT_RAT_INST_MIN_INT_RTN = 0x2a,
+ SQ_EXPORT_RAT_INST_MIN_UINT_RTN = 0x2b,
+ SQ_EXPORT_RAT_INST_MAX_INT_RTN = 0x2c,
+ SQ_EXPORT_RAT_INST_MAX_UINT_RTN = 0x2d,
+ SQ_EXPORT_RAT_INST_AND_RTN = 0x2e,
+ SQ_EXPORT_RAT_INST_OR_RTN = 0x2f,
+ SQ_EXPORT_RAT_INST_XOR_RTN = 0x30,
+ SQ_EXPORT_RAT_INST_MSKOR_RTN = 0x31,
+ SQ_EXPORT_RAT_INST_INC_UINT_RTN = 0x32,
+ SQ_EXPORT_RAT_INST_DEC_UINT_RTN = 0x33,
+ RAT_INDEX_MODE_mask = 0x03 << 11,
+ RAT_INDEX_MODE_shift = 11,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ SQ_CF_ALLOC_EXPORT_WORD0_RAT__TYPE_mask = 0x03 << 13,
+ SQ_CF_ALLOC_EXPORT_WORD0_RAT__TYPE_shift = 13,
+ SQ_EXPORT_PIXEL = 0x00,
+ SQ_EXPORT_POS = 0x01,
+ SQ_EXPORT_PARAM = 0x02,
+ X_UNUSED_FOR_SX_EXPORTS = 0x03,
+ RW_GPR_mask = 0x7f << 15,
+ RW_GPR_shift = 15,
+ RW_REL_bit = 1 << 22,
+ INDEX_GPR_mask = 0x7f << 23,
+ INDEX_GPR_shift = 23,
+ SQ_CF_ALLOC_EXPORT_WORD0_RAT__ELEM_SIZE_mask = 0x03 << 30,
+ SQ_CF_ALLOC_EXPORT_WORD0_RAT__ELEM_SIZE_shift = 30,
+ SQ_CF_ALU_WORD0 = 0x00008dfc,
+ SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0,
+ SQ_CF_ALU_WORD0__ADDR_shift = 0,
+ KCACHE_BANK0_mask = 0x0f << 22,
+ KCACHE_BANK0_shift = 22,
+ KCACHE_BANK1_mask = 0x0f << 26,
+ KCACHE_BANK1_shift = 26,
+ KCACHE_MODE0_mask = 0x03 << 30,
+ KCACHE_MODE0_shift = 30,
+/* SQ_CF_KCACHE_NOP = 0x00, */
+/* SQ_CF_KCACHE_LOCK_1 = 0x01, */
+/* SQ_CF_KCACHE_LOCK_2 = 0x02, */
+/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */
+ SQ_MEM_GDS_WORD1 = 0x00008dfc,
+ SQ_MEM_GDS_WORD1__DST_GPR_mask = 0x7f << 0,
+ SQ_MEM_GDS_WORD1__DST_GPR_shift = 0,
+ DST_REL_MODE_mask = 0x03 << 7,
+ DST_REL_MODE_shift = 7,
+ SQ_REL_NONE = 0x00,
+ SQ_REL_LOOP = 0x01,
+ SQ_REL_GLOBAL = 0x02,
+ GDS_OP_mask = 0x3f << 9,
+ GDS_OP_shift = 9,
+ SQ_DS_INST_ADD = 0x00,
+ SQ_DS_INST_SUB = 0x01,
+ SQ_DS_INST_RSUB = 0x02,
+ SQ_DS_INST_INC = 0x03,
+ SQ_DS_INST_DEC = 0x04,
+ SQ_DS_INST_MIN_INT = 0x05,
+ SQ_DS_INST_MAX_INT = 0x06,
+ SQ_DS_INST_MIN_UINT = 0x07,
+ SQ_DS_INST_MAX_UINT = 0x08,
+ SQ_DS_INST_AND = 0x09,
+ SQ_DS_INST_OR = 0x0a,
+ SQ_DS_INST_XOR = 0x0b,
+ SQ_DS_INST_MSKOR = 0x0c,
+ SQ_DS_INST_WRITE = 0x0d,
+ SQ_DS_INST_WRITE_REL = 0x0e,
+ SQ_DS_INST_WRITE2 = 0x0f,
+ SQ_DS_INST_CMP_STORE = 0x10,
+ SQ_DS_INST_CMP_STORE_SPF = 0x11,
+ SQ_DS_INST_BYTE_WRITE = 0x12,
+ SQ_DS_INST_SHORT_WRITE = 0x13,
+ SQ_DS_INST_ADD_RET = 0x20,
+ SQ_DS_INST_SUB_RET = 0x21,
+ SQ_DS_INST_RSUB_RET = 0x22,
+ SQ_DS_INST_INC_RET = 0x23,
+ SQ_DS_INST_DEC_RET = 0x24,
+ SQ_DS_INST_MIN_INT_RET = 0x25,
+ SQ_DS_INST_MAX_INT_RET = 0x26,
+ SQ_DS_INST_MIN_UINT_RET = 0x27,
+ SQ_DS_INST_MAX_UINT_RET = 0x28,
+ SQ_DS_INST_AND_RET = 0x29,
+ SQ_DS_INST_OR_RET = 0x2a,
+ SQ_DS_INST_XOR_RET = 0x2b,
+ SQ_DS_INST_MSKOR_RET = 0x2c,
+ SQ_DS_INST_XCHG_RET = 0x2d,
+ SQ_DS_INST_XCHG_REL_RET = 0x2e,
+ SQ_DS_INST_XCHG2_RET = 0x2f,
+ SQ_DS_INST_CMP_XCHG_RET = 0x30,
+ SQ_DS_INST_CMP_XCHG_SPF_RET = 0x31,
+ SQ_DS_INST_READ_RET = 0x32,
+ SQ_DS_INST_READ_REL_RET = 0x33,
+ SQ_DS_INST_READ2_RET = 0x34,
+ SQ_DS_INST_READWRITE_RET = 0x35,
+ SQ_DS_INST_BYTE_READ_RET = 0x36,
+ SQ_DS_INST_UBYTE_READ_RET = 0x37,
+ SQ_DS_INST_SHORT_READ_RET = 0x38,
+ SQ_DS_INST_USHORT_READ_RET = 0x39,
+ SQ_DS_INST_ATOMIC_ORDERED_ALLOC_RET = 0x3f,
+ DS_OFFSET_mask = 0x7f << 16,
+ DS_OFFSET_shift = 16,
+ UAV_INDEX_MODE_mask = 0x03 << 24,
+ UAV_INDEX_MODE_shift = 24,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ UAV_ID_mask = 0x0f << 26,
+ UAV_ID_shift = 26,
+ ALLOC_CONSUME_bit = 1 << 30,
+ BCAST_FIRST_REQ_bit = 1 << 31,
+ SQ_MEM_RD_WORD2 = 0x00008dfc,
+ ARRAY_BASE_mask = 0x1fff << 0,
+ ARRAY_BASE_shift = 0,
+ SQ_MEM_RD_WORD2__ENDIAN_SWAP_mask = 0x03 << 16,
+ SQ_MEM_RD_WORD2__ENDIAN_SWAP_shift = 16,
+ SQ_ENDIAN_NONE = 0x00,
+ SQ_ENDIAN_8IN16 = 0x01,
+ SQ_ENDIAN_8IN32 = 0x02,
+ SQ_MEM_RD_WORD2__ARRAY_SIZE_mask = 0xfff << 20,
+ SQ_MEM_RD_WORD2__ARRAY_SIZE_shift = 20,
+ SQ_CF_ALU_WORD1_EXT = 0x00008dfc,
+ KCACHE_MODE3_mask = 0x03 << 0,
+ KCACHE_MODE3_shift = 0,
+/* SQ_CF_KCACHE_NOP = 0x00, */
+/* SQ_CF_KCACHE_LOCK_1 = 0x01, */
+/* SQ_CF_KCACHE_LOCK_2 = 0x02, */
+/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */
+ KCACHE_ADDR2_mask = 0xff << 2,
+ KCACHE_ADDR2_shift = 2,
+ KCACHE_ADDR3_mask = 0xff << 10,
+ KCACHE_ADDR3_shift = 10,
+ SQ_CF_ALU_WORD1_EXT__CF_INST_mask = 0x0f << 26,
+ SQ_CF_ALU_WORD1_EXT__CF_INST_shift = 26,
+/* SQ_CF_INST_ALU = 0x08, */
+/* SQ_CF_INST_ALU_PUSH_BEFORE = 0x09, */
+/* SQ_CF_INST_ALU_POP_AFTER = 0x0a, */
+/* SQ_CF_INST_ALU_POP2_AFTER = 0x0b, */
+/* SQ_CF_INST_ALU_EXTENDED = 0x0c, */
+/* SQ_CF_INST_ALU_CONTINUE = 0x0d, */
+/* SQ_CF_INST_ALU_BREAK = 0x0e, */
+/* SQ_CF_INST_ALU_ELSE_AFTER = 0x0f, */
+/* BARRIER_bit = 1 << 31, */
+ SQ_CF_GWS_WORD0 = 0x00008dfc,
+ VALUE_mask = 0x3ff << 0,
+ VALUE_shift = 0,
+ RESOURCE_mask = 0x1f << 16,
+ RESOURCE_shift = 16,
+ SIGN_bit = 1 << 25,
+ VAL_INDEX_MODE_mask = 0x03 << 26,
+ VAL_INDEX_MODE_shift = 26,
+ SQ_GWS_INDEX_NONE = 0x00,
+ SQ_GWS_INDEX_0 = 0x01,
+ SQ_GWS_INDEX_1 = 0x02,
+ SQ_GWS_INDEX_MIX = 0x03,
+ RSRC_INDEX_MODE_mask = 0x03 << 28,
+ RSRC_INDEX_MODE_shift = 28,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ GWS_OPCODE_mask = 0x03 << 30,
+ GWS_OPCODE_shift = 30,
+ SQ_GWS_SEMA_V = 0x00,
+ SQ_GWS_SEMA_P = 0x01,
+ SQ_GWS_BARRIER = 0x02,
+ SQ_GWS_INIT = 0x03,
+ SQ_VTX_WORD2 = 0x00008dfc,
+ SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0,
+ SQ_VTX_WORD2__OFFSET_shift = 0,
+ SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16,
+ SQ_VTX_WORD2__ENDIAN_SWAP_shift = 16,
+/* SQ_ENDIAN_NONE = 0x00, */
+/* SQ_ENDIAN_8IN16 = 0x01, */
+/* SQ_ENDIAN_8IN32 = 0x02, */
+ CONST_BUF_NO_STRIDE_bit = 1 << 18,
+ MEGA_FETCH_bit = 1 << 19,
+ SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20,
+ BUFFER_INDEX_MODE_mask = 0x03 << 21,
+ BUFFER_INDEX_MODE_shift = 21,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc,
+ SQ_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_mask = 0xfff << 0,
+ SQ_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_shift = 0,
+ COMP_MASK_mask = 0x0f << 12,
+ COMP_MASK_shift = 12,
+ SQ_CF_WORD0 = 0x00008dfc,
+ SQ_CF_WORD0__ADDR_mask = 0xffffff << 0,
+ SQ_CF_WORD0__ADDR_shift = 0,
+ JUMPTABLE_SEL_mask = 0x07 << 24,
+ JUMPTABLE_SEL_shift = 24,
+ SQ_CF_JUMPTABLE_SEL_CONST_A = 0x00,
+ SQ_CF_JUMPTABLE_SEL_CONST_B = 0x01,
+ SQ_CF_JUMPTABLE_SEL_CONST_C = 0x02,
+ SQ_CF_JUMPTABLE_SEL_CONST_D = 0x03,
+ SQ_CF_JUMPTABLE_SEL_INDEX_0 = 0x04,
+ SQ_CF_JUMPTABLE_SEL_INDEX_1 = 0x05,
+ SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc,
+/* ARRAY_BASE_mask = 0x1fff << 0, */
+/* ARRAY_BASE_shift = 0, */
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13,
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13,
+/* SQ_EXPORT_PIXEL = 0x00, */
+/* SQ_EXPORT_POS = 0x01, */
+/* SQ_EXPORT_PARAM = 0x02, */
+/* X_UNUSED_FOR_SX_EXPORTS = 0x03, */
+/* RW_GPR_mask = 0x7f << 15, */
+/* RW_GPR_shift = 15, */
+/* RW_REL_bit = 1 << 22, */
+/* INDEX_GPR_mask = 0x7f << 23, */
+/* INDEX_GPR_shift = 23, */
+ SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_mask = 0x03 << 30,
+ SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_shift = 30,
+ SQ_MEM_GDS_WORD0 = 0x00008dfc,
+/* MEM_INST_mask = 0x1f << 0, */
+/* MEM_INST_shift = 0, */
+/* SQ_MEM_INST_MEM = 0x02, */
+/* MEM_OP_mask = 0x07 << 8, */
+/* MEM_OP_shift = 8, */
+/* SQ_MEM_OP_RD_SCRATCH = 0x00, */
+/* SQ_MEM_OP_RD_SCATTER = 0x02, */
+/* SQ_MEM_OP_GDS = 0x04, */
+/* SQ_MEM_OP_TF_WRITE = 0x05, */
+ SQ_MEM_GDS_WORD0__SRC_GPR_mask = 0x7f << 11,
+ SQ_MEM_GDS_WORD0__SRC_GPR_shift = 11,
+ SRC_REL_MODE_mask = 0x03 << 18,
+ SRC_REL_MODE_shift = 18,
+/* SQ_REL_NONE = 0x00, */
+/* SQ_REL_LOOP = 0x01, */
+/* SQ_REL_GLOBAL = 0x02, */
+ SQ_MEM_GDS_WORD0__SRC_SEL_X_mask = 0x07 << 20,
+ SQ_MEM_GDS_WORD0__SRC_SEL_X_shift = 20,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SRC_SEL_Y_mask = 0x07 << 23, */
+/* SRC_SEL_Y_shift = 23, */
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SRC_SEL_Z_mask = 0x07 << 26, */
+/* SRC_SEL_Z_shift = 26, */
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_ALU_WORD1_LDS_DIRECT_LITERAL_HI = 0x00008dfc,
+ OFFSET_B_mask = 0x1fff << 0,
+ OFFSET_B_shift = 0,
+ STRIDE_B_mask = 0x7f << 13,
+ STRIDE_B_shift = 13,
+ THREAD_REL_B_bit = 1 << 22,
+ DIRECT_READ_32_bit = 1 << 31,
+ SQ_VTX_WORD1 = 0x00008dfc,
+ SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_VTX_WORD1__DST_SEL_X_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+ SQ_VTX_WORD1__DST_SEL_Y_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+ SQ_VTX_WORD1__DST_SEL_Z_shift = 15,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18,
+ SQ_VTX_WORD1__DST_SEL_W_shift = 18,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ USE_CONST_FIELDS_bit = 1 << 21,
+ SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22,
+ SQ_VTX_WORD1__DATA_FORMAT_shift = 22,
+ SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28,
+ SQ_VTX_WORD1__NUM_FORMAT_ALL_shift = 28,
+ SQ_NUM_FORMAT_NORM = 0x00,
+ SQ_NUM_FORMAT_INT = 0x01,
+ SQ_NUM_FORMAT_SCALED = 0x02,
+ SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30,
+ SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31,
+ SQ_ALU_WORD1_OP2 = 0x00008dfc,
+ SRC0_ABS_bit = 1 << 0,
+ SRC1_ABS_bit = 1 << 1,
+ UPDATE_EXECUTE_MASK_bit = 1 << 2,
+ UPDATE_PRED_bit = 1 << 3,
+ WRITE_MASK_bit = 1 << 4,
+ OMOD_mask = 0x03 << 5,
+ OMOD_shift = 5,
+ SQ_ALU_OMOD_OFF = 0x00,
+ SQ_ALU_OMOD_M2 = 0x01,
+ SQ_ALU_OMOD_M4 = 0x02,
+ SQ_ALU_OMOD_D2 = 0x03,
+ SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x7ff << 7,
+ SQ_ALU_WORD1_OP2__ALU_INST_shift = 7,
+ SQ_OP2_INST_ADD = 0x00,
+ SQ_OP2_INST_MUL = 0x01,
+ SQ_OP2_INST_MUL_IEEE = 0x02,
+ SQ_OP2_INST_MAX = 0x03,
+ SQ_OP2_INST_MIN = 0x04,
+ SQ_OP2_INST_MAX_DX10 = 0x05,
+ SQ_OP2_INST_MIN_DX10 = 0x06,
+ SQ_OP2_INST_SETE = 0x08,
+ SQ_OP2_INST_SETGT = 0x09,
+ SQ_OP2_INST_SETGE = 0x0a,
+ SQ_OP2_INST_SETNE = 0x0b,
+ SQ_OP2_INST_SETE_DX10 = 0x0c,
+ SQ_OP2_INST_SETGT_DX10 = 0x0d,
+ SQ_OP2_INST_SETGE_DX10 = 0x0e,
+ SQ_OP2_INST_SETNE_DX10 = 0x0f,
+ SQ_OP2_INST_FRACT = 0x10,
+ SQ_OP2_INST_TRUNC = 0x11,
+ SQ_OP2_INST_CEIL = 0x12,
+ SQ_OP2_INST_RNDNE = 0x13,
+ SQ_OP2_INST_FLOOR = 0x14,
+ SQ_OP2_INST_ASHR_INT = 0x15,
+ SQ_OP2_INST_LSHR_INT = 0x16,
+ SQ_OP2_INST_LSHL_INT = 0x17,
+ SQ_OP2_INST_MOV = 0x19,
+ SQ_OP2_INST_NOP = 0x1a,
+ SQ_OP2_INST_PRED_SETGT_UINT = 0x1e,
+ SQ_OP2_INST_PRED_SETGE_UINT = 0x1f,
+ SQ_OP2_INST_PRED_SETE = 0x20,
+ SQ_OP2_INST_PRED_SETGT = 0x21,
+ SQ_OP2_INST_PRED_SETGE = 0x22,
+ SQ_OP2_INST_PRED_SETNE = 0x23,
+ SQ_OP2_INST_PRED_SET_INV = 0x24,
+ SQ_OP2_INST_PRED_SET_POP = 0x25,
+ SQ_OP2_INST_PRED_SET_CLR = 0x26,
+ SQ_OP2_INST_PRED_SET_RESTORE = 0x27,
+ SQ_OP2_INST_PRED_SETE_PUSH = 0x28,
+ SQ_OP2_INST_PRED_SETGT_PUSH = 0x29,
+ SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a,
+ SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b,
+ SQ_OP2_INST_KILLE = 0x2c,
+ SQ_OP2_INST_KILLGT = 0x2d,
+ SQ_OP2_INST_KILLGE = 0x2e,
+ SQ_OP2_INST_KILLNE = 0x2f,
+ SQ_OP2_INST_AND_INT = 0x30,
+ SQ_OP2_INST_OR_INT = 0x31,
+ SQ_OP2_INST_XOR_INT = 0x32,
+ SQ_OP2_INST_NOT_INT = 0x33,
+ SQ_OP2_INST_ADD_INT = 0x34,
+ SQ_OP2_INST_SUB_INT = 0x35,
+ SQ_OP2_INST_MAX_INT = 0x36,
+ SQ_OP2_INST_MIN_INT = 0x37,
+ SQ_OP2_INST_MAX_UINT = 0x38,
+ SQ_OP2_INST_MIN_UINT = 0x39,
+ SQ_OP2_INST_SETE_INT = 0x3a,
+ SQ_OP2_INST_SETGT_INT = 0x3b,
+ SQ_OP2_INST_SETGE_INT = 0x3c,
+ SQ_OP2_INST_SETNE_INT = 0x3d,
+ SQ_OP2_INST_SETGT_UINT = 0x3e,
+ SQ_OP2_INST_SETGE_UINT = 0x3f,
+ SQ_OP2_INST_KILLGT_UINT = 0x40,
+ SQ_OP2_INST_KILLGE_UINT = 0x41,
+ SQ_OP2_INST_PRED_SETE_INT = 0x42,
+ SQ_OP2_INST_PRED_SETGT_INT = 0x43,
+ SQ_OP2_INST_PRED_SETGE_INT = 0x44,
+ SQ_OP2_INST_PRED_SETNE_INT = 0x45,
+ SQ_OP2_INST_KILLE_INT = 0x46,
+ SQ_OP2_INST_KILLGT_INT = 0x47,
+ SQ_OP2_INST_KILLGE_INT = 0x48,
+ SQ_OP2_INST_KILLNE_INT = 0x49,
+ SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a,
+ SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b,
+ SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c,
+ SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d,
+ SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e,
+ SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f,
+ SQ_OP2_INST_FLT_TO_INT = 0x50,
+ SQ_OP2_INST_BFREV_INT = 0x51,
+ SQ_OP2_INST_ADDC_UINT = 0x52,
+ SQ_OP2_INST_SUBB_UINT = 0x53,
+ SQ_OP2_INST_GROUP_BARRIER = 0x54,
+ SQ_OP2_INST_GROUP_SEQ_BEGIN = 0x55,
+ SQ_OP2_INST_GROUP_SEQ_END = 0x56,
+ SQ_OP2_INST_SET_MODE = 0x57,
+ SQ_OP2_INST_SET_CF_IDX0 = 0x58,
+ SQ_OP2_INST_SET_CF_IDX1 = 0x59,
+ SQ_OP2_INST_SET_LDS_SIZE = 0x5a,
+ SQ_OP2_INST_EXP_IEEE = 0x81,
+ SQ_OP2_INST_LOG_CLAMPED = 0x82,
+ SQ_OP2_INST_LOG_IEEE = 0x83,
+ SQ_OP2_INST_RECIP_CLAMPED = 0x84,
+ SQ_OP2_INST_RECIP_FF = 0x85,
+ SQ_OP2_INST_RECIP_IEEE = 0x86,
+ SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x87,
+ SQ_OP2_INST_RECIPSQRT_FF = 0x88,
+ SQ_OP2_INST_RECIPSQRT_IEEE = 0x89,
+ SQ_OP2_INST_SQRT_IEEE = 0x8a,
+ SQ_OP2_INST_SIN = 0x8d,
+ SQ_OP2_INST_COS = 0x8e,
+ SQ_OP2_INST_MULLO_INT = 0x8f,
+ SQ_OP2_INST_MULHI_INT = 0x90,
+ SQ_OP2_INST_MULLO_UINT = 0x91,
+ SQ_OP2_INST_MULHI_UINT = 0x92,
+ SQ_OP2_INST_RECIP_INT = 0x93,
+ SQ_OP2_INST_RECIP_UINT = 0x94,
+ SQ_OP2_INST_RECIP_64 = 0x95,
+ SQ_OP2_INST_RECIP_CLAMPED_64 = 0x96,
+ SQ_OP2_INST_RECIPSQRT_64 = 0x97,
+ SQ_OP2_INST_RECIPSQRT_CLAMPED_64 = 0x98,
+ SQ_OP2_INST_SQRT_64 = 0x99,
+ SQ_OP2_INST_FLT_TO_UINT = 0x9a,
+ SQ_OP2_INST_INT_TO_FLT = 0x9b,
+ SQ_OP2_INST_UINT_TO_FLT = 0x9c,
+ SQ_OP2_INST_BFM_INT = 0xa0,
+ SQ_OP2_INST_FLT32_TO_FLT16 = 0xa2,
+ SQ_OP2_INST_FLT16_TO_FLT32 = 0xa3,
+ SQ_OP2_INST_UBYTE0_FLT = 0xa4,
+ SQ_OP2_INST_UBYTE1_FLT = 0xa5,
+ SQ_OP2_INST_UBYTE2_FLT = 0xa6,
+ SQ_OP2_INST_UBYTE3_FLT = 0xa7,
+ SQ_OP2_INST_BCNT_INT = 0xaa,
+ SQ_OP2_INST_FFBH_UINT = 0xab,
+ SQ_OP2_INST_FFBL_INT = 0xac,
+ SQ_OP2_INST_FFBH_INT = 0xad,
+ SQ_OP2_INST_FLT_TO_UINT4 = 0xae,
+ SQ_OP2_INST_DOT_IEEE = 0xaf,
+ SQ_OP2_INST_FLT_TO_INT_RPI = 0xb0,
+ SQ_OP2_INST_FLT_TO_INT_FLOOR = 0xb1,
+ SQ_OP2_INST_MULHI_UINT24 = 0xb2,
+ SQ_OP2_INST_MBCNT_32HI_INT = 0xb3,
+ SQ_OP2_INST_OFFSET_TO_FLT = 0xb4,
+ SQ_OP2_INST_MUL_UINT24 = 0xb5,
+ SQ_OP2_INST_BCNT_ACCUM_PREV_INT = 0xb6,
+ SQ_OP2_INST_MBCNT_32LO_ACCUM_PREV_INT = 0xb7,
+ SQ_OP2_INST_SETE_64 = 0xb8,
+ SQ_OP2_INST_SETNE_64 = 0xb9,
+ SQ_OP2_INST_SETGT_64 = 0xba,
+ SQ_OP2_INST_SETGE_64 = 0xbb,
+ SQ_OP2_INST_MIN_64 = 0xbc,
+ SQ_OP2_INST_MAX_64 = 0xbd,
+ SQ_OP2_INST_DOT4 = 0xbe,
+ SQ_OP2_INST_DOT4_IEEE = 0xbf,
+ SQ_OP2_INST_CUBE = 0xc0,
+ SQ_OP2_INST_MAX4 = 0xc1,
+ SQ_OP2_INST_FREXP_64 = 0xc4,
+ SQ_OP2_INST_LDEXP_64 = 0xc5,
+ SQ_OP2_INST_FRACT_64 = 0xc6,
+ SQ_OP2_INST_PRED_SETGT_64 = 0xc7,
+ SQ_OP2_INST_PRED_SETE_64 = 0xc8,
+ SQ_OP2_INST_PRED_SETGE_64 = 0xc9,
+ SQ_OP2_INST_MUL_64 = 0xca,
+ SQ_OP2_INST_ADD_64 = 0xcb,
+ SQ_OP2_INST_MOVA_INT = 0xcc,
+ SQ_OP2_INST_FLT64_TO_FLT32 = 0xcd,
+ SQ_OP2_INST_FLT32_TO_FLT64 = 0xce,
+ SQ_OP2_INST_SAD_ACCUM_PREV_UINT = 0xcf,
+ SQ_OP2_INST_DOT = 0xd0,
+ SQ_OP2_INST_MUL_PREV = 0xd1,
+ SQ_OP2_INST_MUL_IEEE_PREV = 0xd2,
+ SQ_OP2_INST_ADD_PREV = 0xd3,
+ SQ_OP2_INST_MULADD_PREV = 0xd4,
+ SQ_OP2_INST_MULADD_IEEE_PREV = 0xd5,
+ SQ_OP2_INST_INTERP_XY = 0xd6,
+ SQ_OP2_INST_INTERP_ZW = 0xd7,
+ SQ_OP2_INST_INTERP_X = 0xd8,
+ SQ_OP2_INST_INTERP_Z = 0xd9,
+ SQ_OP2_INST_STORE_FLAGS = 0xda,
+ SQ_OP2_INST_LOAD_STORE_FLAGS = 0xdb,
+ SQ_OP2_INST_INTERP_LOAD_P0 = 0xe0,
+ SQ_OP2_INST_INTERP_LOAD_P10 = 0xe1,
+ SQ_OP2_INST_INTERP_LOAD_P20 = 0xe2,
+ SQ_CF_WORD1 = 0x00008dfc,
+ POP_COUNT_mask = 0x07 << 0,
+ POP_COUNT_shift = 0,
+ CF_CONST_mask = 0x1f << 3,
+ CF_CONST_shift = 3,
+ COND_mask = 0x03 << 8,
+ COND_shift = 8,
+ SQ_CF_COND_ACTIVE = 0x00,
+ SQ_CF_COND_FALSE = 0x01,
+ SQ_CF_COND_BOOL = 0x02,
+ SQ_CF_COND_NOT_BOOL = 0x03,
+ SQ_CF_WORD1__COUNT_mask = 0x3f << 10,
+ SQ_CF_WORD1__COUNT_shift = 10,
+/* VALID_PIXEL_MODE_bit = 1 << 20, */
+/* END_OF_PROGRAM_bit = 1 << 21, */
+ SQ_CF_WORD1__CF_INST_mask = 0xff << 22,
+ SQ_CF_WORD1__CF_INST_shift = 22,
+ SQ_CF_INST_NOP = 0x00,
+ SQ_CF_INST_TC = 0x01,
+ SQ_CF_INST_VC = 0x02,
+ SQ_CF_INST_GDS = 0x03,
+ SQ_CF_INST_LOOP_START = 0x04,
+ SQ_CF_INST_LOOP_END = 0x05,
+ SQ_CF_INST_LOOP_START_DX10 = 0x06,
+ SQ_CF_INST_LOOP_START_NO_AL = 0x07,
+ SQ_CF_INST_LOOP_CONTINUE = 0x08,
+ SQ_CF_INST_LOOP_BREAK = 0x09,
+ SQ_CF_INST_JUMP = 0x0a,
+ SQ_CF_INST_PUSH = 0x0b,
+ SQ_CF_INST_ELSE = 0x0d,
+ SQ_CF_INST_POP = 0x0e,
+ SQ_CF_INST_CALL = 0x12,
+ SQ_CF_INST_CALL_FS = 0x13,
+ SQ_CF_INST_RETURN = 0x14,
+ SQ_CF_INST_EMIT_VERTEX = 0x15,
+ SQ_CF_INST_EMIT_CUT_VERTEX = 0x16,
+ SQ_CF_INST_CUT_VERTEX = 0x17,
+ SQ_CF_INST_KILL = 0x18,
+ SQ_CF_INST_WAIT_ACK = 0x1a,
+ SQ_CF_INST_TC_ACK = 0x1b,
+ SQ_CF_INST_VC_ACK = 0x1c,
+ SQ_CF_INST_JUMPTABLE = 0x1d,
+ SQ_CF_INST_GLOBAL_WAVE_SYNC = 0x1e,
+ SQ_CF_INST_HALT = 0x1f,
+/* WHOLE_QUAD_MODE_bit = 1 << 30, */
+/* BARRIER_bit = 1 << 31, */
+ SQ_VTX_WORD1_SEM = 0x00008dfc,
+ SEMANTIC_ID_mask = 0xff << 0,
+ SEMANTIC_ID_shift = 0,
+ SQ_TEX_WORD0 = 0x00008dfc,
+ TEX_INST_mask = 0x1f << 0,
+ TEX_INST_shift = 0,
+ SQ_TEX_INST_LD = 0x03,
+ SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04,
+ SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05,
+ SQ_TEX_INST_GET_LOD = 0x06,
+ SQ_TEX_INST_GET_GRADIENTS_H = 0x07,
+ SQ_TEX_INST_GET_GRADIENTS_V = 0x08,
+ SQ_TEX_INST_SET_TEXTURE_OFFSETS = 0x09,
+ SQ_TEX_INST_KEEP_GRADIENTS = 0x0a,
+ SQ_TEX_INST_SET_GRADIENTS_H = 0x0b,
+ SQ_TEX_INST_SET_GRADIENTS_V = 0x0c,
+ SQ_TEX_INST_PASS = 0x0d,
+ SQ_TEX_INST_SAMPLE = 0x10,
+ SQ_TEX_INST_SAMPLE_L = 0x11,
+ SQ_TEX_INST_SAMPLE_LB = 0x12,
+ SQ_TEX_INST_SAMPLE_LZ = 0x13,
+ SQ_TEX_INST_SAMPLE_G = 0x14,
+ SQ_TEX_INST_GATHER4 = 0x15,
+ SQ_TEX_INST_SAMPLE_G_LB = 0x16,
+ SQ_TEX_INST_GATHER4_O = 0x17,
+ SQ_TEX_INST_SAMPLE_C = 0x18,
+ SQ_TEX_INST_SAMPLE_C_L = 0x19,
+ SQ_TEX_INST_SAMPLE_C_LB = 0x1a,
+ SQ_TEX_INST_SAMPLE_C_LZ = 0x1b,
+ SQ_TEX_INST_SAMPLE_C_G = 0x1c,
+ SQ_TEX_INST_GATHER4_C = 0x1d,
+ SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e,
+ SQ_TEX_INST_GATHER4_C_O = 0x1f,
+ INST_MOD_mask = 0x03 << 5,
+ INST_MOD_shift = 5,
+/* FETCH_WHOLE_QUAD_bit = 1 << 7, */
+ RESOURCE_ID_mask = 0xff << 8,
+ RESOURCE_ID_shift = 8,
+ SQ_TEX_WORD0__SRC_GPR_mask = 0x7f << 16,
+ SQ_TEX_WORD0__SRC_GPR_shift = 16,
+/* SRC_REL_bit = 1 << 23, */
+ SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24,
+ RESOURCE_INDEX_MODE_mask = 0x03 << 25,
+ RESOURCE_INDEX_MODE_shift = 25,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ SAMPLER_INDEX_MODE_mask = 0x03 << 27,
+ SAMPLER_INDEX_MODE_shift = 27,
+/* SQ_CF_INDEX_NONE = 0x00, */
+/* SQ_CF_INDEX_0 = 0x01, */
+/* SQ_CF_INDEX_1 = 0x02, */
+/* SQ_CF_INVALID = 0x03, */
+ SQ_VTX_WORD1_GPR = 0x00008dfc,
+ SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0,
+ SQ_VTX_WORD1_GPR__DST_GPR_shift = 0,
+ SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7,
+ SQ_ALU_WORD1_LDS_IDX_OP = 0x00008dfc,
+/* SRC2_SEL_mask = 0x1ff << 0, */
+/* SRC2_SEL_shift = 0, */
+/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */
+/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */
+/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */
+/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */
+/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */
+/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */
+/* SQ_ALU_SRC_TIME_HI = 0xe3, */
+/* SQ_ALU_SRC_TIME_LO = 0xe4, */
+/* SQ_ALU_SRC_MASK_HI = 0xe5, */
+/* SQ_ALU_SRC_MASK_LO = 0xe6, */
+/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */
+/* SQ_ALU_SRC_SIMD_ID = 0xe8, */
+/* SQ_ALU_SRC_SE_ID = 0xe9, */
+/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */
+/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */
+/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */
+/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */
+/* SQ_ALU_SRC_LOOP_IDX = 0xee, */
+/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */
+/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */
+/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */
+/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */
+/* SQ_ALU_SRC_1_DBL_L = 0xf4, */
+/* SQ_ALU_SRC_1_DBL_M = 0xf5, */
+/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */
+/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+/* SRC2_REL_bit = 1 << 9, */
+/* SRC2_CHAN_mask = 0x03 << 10, */
+/* SRC2_CHAN_shift = 10, */
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ IDX_OFFSET_1_bit = 1 << 12,
+ SQ_ALU_WORD1_LDS_IDX_OP__ALU_INST_mask = 0x1f << 13,
+ SQ_ALU_WORD1_LDS_IDX_OP__ALU_INST_shift = 13,
+/* SQ_OP3_INST_BFE_UINT = 0x04, */
+/* SQ_OP3_INST_BFE_INT = 0x05, */
+/* SQ_OP3_INST_BFI_INT = 0x06, */
+/* SQ_OP3_INST_FMA = 0x07, */
+/* SQ_OP3_INST_CNDNE_64 = 0x09, */
+/* SQ_OP3_INST_FMA_64 = 0x0a, */
+/* SQ_OP3_INST_LERP_UINT = 0x0b, */
+/* SQ_OP3_INST_BIT_ALIGN_INT = 0x0c, */
+/* SQ_OP3_INST_BYTE_ALIGN_INT = 0x0d, */
+/* SQ_OP3_INST_SAD_ACCUM_UINT = 0x0e, */
+/* SQ_OP3_INST_SAD_ACCUM_HI_UINT = 0x0f, */
+/* SQ_OP3_INST_MULADD_UINT24 = 0x10, */
+/* SQ_OP3_INST_LDS_IDX_OP = 0x11, */
+/* SQ_OP3_INST_MULADD = 0x14, */
+/* SQ_OP3_INST_MULADD_M2 = 0x15, */
+/* SQ_OP3_INST_MULADD_M4 = 0x16, */
+/* SQ_OP3_INST_MULADD_D2 = 0x17, */
+/* SQ_OP3_INST_MULADD_IEEE = 0x18, */
+/* SQ_OP3_INST_CNDE = 0x19, */
+/* SQ_OP3_INST_CNDGT = 0x1a, */
+/* SQ_OP3_INST_CNDGE = 0x1b, */
+/* SQ_OP3_INST_CNDE_INT = 0x1c, */
+/* SQ_OP3_INST_CNDGT_INT = 0x1d, */
+/* SQ_OP3_INST_CNDGE_INT = 0x1e, */
+/* SQ_OP3_INST_MUL_LIT = 0x1f, */
+/* BANK_SWIZZLE_mask = 0x07 << 18, */
+/* BANK_SWIZZLE_shift = 18, */
+/* SQ_ALU_VEC_012 = 0x00, */
+/* SQ_ALU_VEC_021 = 0x01, */
+/* SQ_ALU_VEC_120 = 0x02, */
+/* SQ_ALU_VEC_102 = 0x03, */
+/* SQ_ALU_VEC_201 = 0x04, */
+/* SQ_ALU_VEC_210 = 0x05, */
+ LDS_OP_mask = 0x3f << 21,
+ LDS_OP_shift = 21,
+/* SQ_DS_INST_ADD = 0x00, */
+/* SQ_DS_INST_SUB = 0x01, */
+/* SQ_DS_INST_RSUB = 0x02, */
+/* SQ_DS_INST_INC = 0x03, */
+/* SQ_DS_INST_DEC = 0x04, */
+/* SQ_DS_INST_MIN_INT = 0x05, */
+/* SQ_DS_INST_MAX_INT = 0x06, */
+/* SQ_DS_INST_MIN_UINT = 0x07, */
+/* SQ_DS_INST_MAX_UINT = 0x08, */
+/* SQ_DS_INST_AND = 0x09, */
+/* SQ_DS_INST_OR = 0x0a, */
+/* SQ_DS_INST_XOR = 0x0b, */
+/* SQ_DS_INST_MSKOR = 0x0c, */
+/* SQ_DS_INST_WRITE = 0x0d, */
+/* SQ_DS_INST_WRITE_REL = 0x0e, */
+/* SQ_DS_INST_WRITE2 = 0x0f, */
+/* SQ_DS_INST_CMP_STORE = 0x10, */
+/* SQ_DS_INST_CMP_STORE_SPF = 0x11, */
+/* SQ_DS_INST_BYTE_WRITE = 0x12, */
+/* SQ_DS_INST_SHORT_WRITE = 0x13, */
+/* SQ_DS_INST_ADD_RET = 0x20, */
+/* SQ_DS_INST_SUB_RET = 0x21, */
+/* SQ_DS_INST_RSUB_RET = 0x22, */
+/* SQ_DS_INST_INC_RET = 0x23, */
+/* SQ_DS_INST_DEC_RET = 0x24, */
+/* SQ_DS_INST_MIN_INT_RET = 0x25, */
+/* SQ_DS_INST_MAX_INT_RET = 0x26, */
+/* SQ_DS_INST_MIN_UINT_RET = 0x27, */
+/* SQ_DS_INST_MAX_UINT_RET = 0x28, */
+/* SQ_DS_INST_AND_RET = 0x29, */
+/* SQ_DS_INST_OR_RET = 0x2a, */
+/* SQ_DS_INST_XOR_RET = 0x2b, */
+/* SQ_DS_INST_MSKOR_RET = 0x2c, */
+/* SQ_DS_INST_XCHG_RET = 0x2d, */
+/* SQ_DS_INST_XCHG_REL_RET = 0x2e, */
+/* SQ_DS_INST_XCHG2_RET = 0x2f, */
+/* SQ_DS_INST_CMP_XCHG_RET = 0x30, */
+/* SQ_DS_INST_CMP_XCHG_SPF_RET = 0x31, */
+/* SQ_DS_INST_READ_RET = 0x32, */
+/* SQ_DS_INST_READ_REL_RET = 0x33, */
+/* SQ_DS_INST_READ2_RET = 0x34, */
+/* SQ_DS_INST_READWRITE_RET = 0x35, */
+/* SQ_DS_INST_BYTE_READ_RET = 0x36, */
+/* SQ_DS_INST_UBYTE_READ_RET = 0x37, */
+/* SQ_DS_INST_SHORT_READ_RET = 0x38, */
+/* SQ_DS_INST_USHORT_READ_RET = 0x39, */
+/* SQ_DS_INST_ATOMIC_ORDERED_ALLOC_RET = 0x3f, */
+ IDX_OFFSET_0_bit = 1 << 27,
+ IDX_OFFSET_2_bit = 1 << 28,
+/* DST_CHAN_mask = 0x03 << 29, */
+/* DST_CHAN_shift = 29, */
+/* CHAN_X = 0x00, */
+/* CHAN_Y = 0x01, */
+/* CHAN_Z = 0x02, */
+/* CHAN_W = 0x03, */
+ IDX_OFFSET_3_bit = 1 << 31,
+ SQ_CF_ENCODING_WORD1 = 0x00008dfc,
+ SQ_CF_ENCODING_WORD1__ENCODING_mask = 0x03 << 28,
+ SQ_CF_ENCODING_WORD1__ENCODING_shift = 28,
+ SQ_CF_ENCODING_INST_CF = 0x00,
+ SQ_CF_ENCODING_INST_ALLOC_EXPORT = 0x01,
+ SQ_CF_ENCODING_INST_ALU0 = 0x02,
+ SQ_CF_ENCODING_INST_ALU1 = 0x03,
+ SQ_ALU_WORD0 = 0x00008dfc,
+/* SRC0_SEL_mask = 0x1ff << 0, */
+/* SRC0_SEL_shift = 0, */
+/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */
+/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */
+/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */
+/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */
+/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */
+/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */
+/* SQ_ALU_SRC_TIME_HI = 0xe3, */
+/* SQ_ALU_SRC_TIME_LO = 0xe4, */
+/* SQ_ALU_SRC_MASK_HI = 0xe5, */
+/* SQ_ALU_SRC_MASK_LO = 0xe6, */
+/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */
+/* SQ_ALU_SRC_SIMD_ID = 0xe8, */
+/* SQ_ALU_SRC_SE_ID = 0xe9, */
+/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */
+/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */
+/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */
+/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */
+/* SQ_ALU_SRC_LOOP_IDX = 0xee, */
+/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */
+/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */
+/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */
+/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */
+/* SQ_ALU_SRC_1_DBL_L = 0xf4, */
+/* SQ_ALU_SRC_1_DBL_M = 0xf5, */
+/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */
+/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+/* SRC0_REL_bit = 1 << 9, */
+/* SRC0_CHAN_mask = 0x03 << 10, */
+/* SRC0_CHAN_shift = 10, */
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ SRC0_NEG_bit = 1 << 12,
+/* SRC1_SEL_mask = 0x1ff << 13, */
+/* SRC1_SEL_shift = 13, */
+/* SQ_ALU_SRC_LDS_OQ_A = 0xdb, */
+/* SQ_ALU_SRC_LDS_OQ_B = 0xdc, */
+/* SQ_ALU_SRC_LDS_OQ_A_POP = 0xdd, */
+/* SQ_ALU_SRC_LDS_OQ_B_POP = 0xde, */
+/* SQ_ALU_SRC_LDS_DIRECT_A = 0xdf, */
+/* SQ_ALU_SRC_LDS_DIRECT_B = 0xe0, */
+/* SQ_ALU_SRC_TIME_HI = 0xe3, */
+/* SQ_ALU_SRC_TIME_LO = 0xe4, */
+/* SQ_ALU_SRC_MASK_HI = 0xe5, */
+/* SQ_ALU_SRC_MASK_LO = 0xe6, */
+/* SQ_ALU_SRC_HW_WAVE_ID = 0xe7, */
+/* SQ_ALU_SRC_SIMD_ID = 0xe8, */
+/* SQ_ALU_SRC_SE_ID = 0xe9, */
+/* SQ_ALU_SRC_HW_THREADGRP_ID = 0xea, */
+/* SQ_ALU_SRC_WAVE_ID_IN_GRP = 0xeb, */
+/* SQ_ALU_SRC_NUM_THREADGRP_WAVES = 0xec, */
+/* SQ_ALU_SRC_HW_ALU_ODD = 0xed, */
+/* SQ_ALU_SRC_LOOP_IDX = 0xee, */
+/* SQ_ALU_SRC_PARAM_BASE_ADDR = 0xf0, */
+/* SQ_ALU_SRC_NEW_PRIM_MASK = 0xf1, */
+/* SQ_ALU_SRC_PRIM_MASK_HI = 0xf2, */
+/* SQ_ALU_SRC_PRIM_MASK_LO = 0xf3, */
+/* SQ_ALU_SRC_1_DBL_L = 0xf4, */
+/* SQ_ALU_SRC_1_DBL_M = 0xf5, */
+/* SQ_ALU_SRC_0_5_DBL_L = 0xf6, */
+/* SQ_ALU_SRC_0_5_DBL_M = 0xf7, */
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+/* SRC1_REL_bit = 1 << 22, */
+/* SRC1_CHAN_mask = 0x03 << 23, */
+/* SRC1_CHAN_shift = 23, */
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ SRC1_NEG_bit = 1 << 25,
+/* INDEX_MODE_mask = 0x07 << 26, */
+/* INDEX_MODE_shift = 26, */
+/* SQ_INDEX_AR_X = 0x00, */
+/* SQ_INDEX_LOOP = 0x04, */
+/* SQ_INDEX_GLOBAL = 0x05, */
+/* SQ_INDEX_GLOBAL_AR_X = 0x06, */
+/* PRED_SEL_mask = 0x03 << 29, */
+/* PRED_SEL_shift = 29, */
+/* SQ_PRED_SEL_OFF = 0x00, */
+/* SQ_PRED_SEL_ZERO = 0x02, */
+/* SQ_PRED_SEL_ONE = 0x03, */
+/* LAST_bit = 1 << 31, */
+ SQ_MEM_RD_WORD1 = 0x00008dfc,
+ SQ_MEM_RD_WORD1__DST_GPR_mask = 0x7f << 0,
+ SQ_MEM_RD_WORD1__DST_GPR_shift = 0,
+ SQ_MEM_RD_WORD1__DST_REL_bit = 1 << 7,
+ SQ_MEM_RD_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_MEM_RD_WORD1__DST_SEL_X_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_RD_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+ SQ_MEM_RD_WORD1__DST_SEL_Y_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_RD_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+ SQ_MEM_RD_WORD1__DST_SEL_Z_shift = 15,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_RD_WORD1__DST_SEL_W_mask = 0x07 << 18,
+ SQ_MEM_RD_WORD1__DST_SEL_W_shift = 18,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_MEM_RD_WORD1__DATA_FORMAT_mask = 0x3f << 22,
+ SQ_MEM_RD_WORD1__DATA_FORMAT_shift = 22,
+ SQ_MEM_RD_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28,
+ SQ_MEM_RD_WORD1__NUM_FORMAT_ALL_shift = 28,
+/* SQ_NUM_FORMAT_NORM = 0x00, */
+/* SQ_NUM_FORMAT_INT = 0x01, */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */
+ SQ_MEM_RD_WORD1__FORMAT_COMP_ALL_bit = 1 << 30,
+ SQ_MEM_RD_WORD1__SRF_MODE_ALL_bit = 1 << 31,
+ SQ_LSTMP_RING_BASE = 0x00008e10,
+ SQ_LSTMP_RING_SIZE = 0x00008e14,
+ SQ_HSTMP_RING_BASE = 0x00008e18,
+ SQ_HSTMP_RING_SIZE = 0x00008e1c,
+ SX_EXPORT_BUFFER_SIZES = 0x0000900c,
+ COLOR_BUFFER_SIZE_mask = 0xff << 0,
+ COLOR_BUFFER_SIZE_shift = 0,
+ POSITION_BUFFER_SIZE_mask = 0xff << 8,
+ POSITION_BUFFER_SIZE_shift = 8,
+ SMX_BUFFER_SIZE_mask = 0xff << 16,
+ SMX_BUFFER_SIZE_shift = 16,
+ SX_MEMORY_EXPORT_BASE = 0x00009010,
+ SX_MEMORY_EXPORT_SIZE = 0x00009014,
+ SPI_CONFIG_CNTL = 0x00009100,
+ GPR_WRITE_PRIORITY_mask = 0x3ffff << 0,
+ GPR_WRITE_PRIORITY_shift = 0,
+ SPI_CONFIG_CNTL_1 = 0x0000913c,
+ VTX_DONE_DELAY_mask = 0x0f << 0,
+ VTX_DONE_DELAY_shift = 0,
+ X_DELAY_14_CLKS = 0x00,
+ X_DELAY_16_CLKS = 0x01,
+ X_DELAY_18_CLKS = 0x02,
+ X_DELAY_20_CLKS = 0x03,
+ X_DELAY_22_CLKS = 0x04,
+ X_DELAY_24_CLKS = 0x05,
+ X_DELAY_26_CLKS = 0x06,
+ X_DELAY_28_CLKS = 0x07,
+ X_DELAY_30_CLKS = 0x08,
+ X_DELAY_32_CLKS = 0x09,
+ X_DELAY_34_CLKS = 0x0a,
+ X_DELAY_4_CLKS = 0x0b,
+ X_DELAY_6_CLKS = 0x0c,
+ X_DELAY_8_CLKS = 0x0d,
+ X_DELAY_10_CLKS = 0x0e,
+ X_DELAY_12_CLKS = 0x0f,
+ INTERP_ONE_PRIM_PER_ROW_bit = 1 << 4,
+ BC_OPTIMIZE_DISABLE_bit = 1 << 5,
+ PC_LIMIT_ENABLE_bit = 1 << 6,
+ PC_LIMIT_STRICT_bit = 1 << 7,
+ PC_LIMIT_SIZE_mask = 0xffff << 16,
+ PC_LIMIT_SIZE_shift = 16,
+ TD_CNTL = 0x00009494,
+ SYNC_PHASE_SH_mask = 0x03 << 0,
+ SYNC_PHASE_SH_shift = 0,
+ PAD_STALL_EN_bit = 1 << 8,
+ GATHER4_FLOAT_MODE_bit = 1 << 16,
+ TD_STATUS = 0x00009498,
+ BUSY_bit = 1 << 31,
+ TA_CNTL_AUX = 0x00009508,
+ TA_CNTL_AUX__DISABLE_CUBE_WRAP_bit = 1 << 0,
+ DISABLE_CUBE_ANISO_bit = 1 << 1,
+ GETLOD_SELECT_mask = 0x03 << 2,
+ GETLOD_SELECT_shift = 2,
+ X_SAMPLER_AND_RESOURCE_CLAMPED_LOD_IN_RESOURCE= 0x00,
+ DISABLE_IDLE_STALL_bit = 1 << 4,
+ TEX_COORD_PRECISION_bit = 1 << 28,
+ LOD_LOG2_TRUNC_bit = 1 << 29,
+ DB_ZPASS_COUNT_LOW = 0x00009870,
+ DB_ZPASS_COUNT_HI = 0x00009874,
+ COUNT_HI_mask = 0x7fffffff << 0,
+ COUNT_HI_shift = 0,
+ TD_PS_BORDER_COLOR_INDEX = 0x0000a400,
+ INDEX_mask = 0x1f << 0,
+ INDEX_shift = 0,
+ TD_PS_BORDER_COLOR_RED = 0x0000a404,
+ TD_PS_BORDER_COLOR_GREEN = 0x0000a408,
+ TD_PS_BORDER_COLOR_BLUE = 0x0000a40c,
+ TD_PS_BORDER_COLOR_ALPHA = 0x0000a410,
+ TD_VS_BORDER_COLOR_INDEX = 0x0000a414,
+/* INDEX_mask = 0x1f << 0, */
+/* INDEX_shift = 0, */
+ TD_VS_BORDER_COLOR_RED = 0x0000a418,
+ TD_VS_BORDER_COLOR_GREEN = 0x0000a41c,
+ TD_VS_BORDER_COLOR_BLUE = 0x0000a420,
+ TD_VS_BORDER_COLOR_ALPHA = 0x0000a424,
+ TD_GS_BORDER_COLOR_INDEX = 0x0000a428,
+/* INDEX_mask = 0x1f << 0, */
+/* INDEX_shift = 0, */
+ TD_GS_BORDER_COLOR_RED = 0x0000a42c,
+ TD_GS_BORDER_COLOR_GREEN = 0x0000a430,
+ TD_GS_BORDER_COLOR_BLUE = 0x0000a434,
+ TD_GS_BORDER_COLOR_ALPHA = 0x0000a438,
+ TD_HS_BORDER_COLOR_INDEX = 0x0000a43c,
+/* INDEX_mask = 0x1f << 0, */
+/* INDEX_shift = 0, */
+ TD_HS_BORDER_COLOR_RED = 0x0000a440,
+ TD_HS_BORDER_COLOR_GREEN = 0x0000a444,
+ TD_HS_BORDER_COLOR_BLUE = 0x0000a448,
+ TD_HS_BORDER_COLOR_ALPHA = 0x0000a44c,
+ TD_LS_BORDER_COLOR_INDEX = 0x0000a450,
+/* INDEX_mask = 0x1f << 0, */
+/* INDEX_shift = 0, */
+ TD_LS_BORDER_COLOR_RED = 0x0000a454,
+ TD_LS_BORDER_COLOR_GREEN = 0x0000a458,
+ TD_LS_BORDER_COLOR_BLUE = 0x0000a45c,
+ TD_LS_BORDER_COLOR_ALPHA = 0x0000a460,
+ TD_CS_BORDER_COLOR_INDEX = 0x0000a464,
+/* INDEX_mask = 0x1f << 0, */
+/* INDEX_shift = 0, */
+ TD_CS_BORDER_COLOR_RED = 0x0000a468,
+ TD_CS_BORDER_COLOR_GREEN = 0x0000a46c,
+ TD_CS_BORDER_COLOR_BLUE = 0x0000a470,
+ TD_CS_BORDER_COLOR_ALPHA = 0x0000a474,
+ DB_RENDER_CONTROL = 0x00028000,
+ DEPTH_CLEAR_ENABLE_bit = 1 << 0,
+ STENCIL_CLEAR_ENABLE_bit = 1 << 1,
+ DEPTH_COPY_bit = 1 << 2,
+ STENCIL_COPY_bit = 1 << 3,
+ RESUMMARIZE_ENABLE_bit = 1 << 4,
+ STENCIL_COMPRESS_DISABLE_bit = 1 << 5,
+ DEPTH_COMPRESS_DISABLE_bit = 1 << 6,
+ COPY_CENTROID_bit = 1 << 7,
+ COPY_SAMPLE_mask = 0x07 << 8,
+ COPY_SAMPLE_shift = 8,
+ COLOR_DISABLE_bit = 1 << 12,
+ DB_COUNT_CONTROL = 0x00028004,
+ ZPASS_INCREMENT_DISABLE_bit = 1 << 0,
+ PERFECT_ZPASS_COUNTS_bit = 1 << 1,
+ DB_DEPTH_VIEW = 0x00028008,
+ SLICE_START_mask = 0x7ff << 0,
+ SLICE_START_shift = 0,
+ SLICE_MAX_mask = 0x7ff << 13,
+ SLICE_MAX_shift = 13,
+ Z_READ_ONLY_bit = 1 << 24,
+ STENCIL_READ_ONLY_bit = 1 << 25,
+ DB_RENDER_OVERRIDE = 0x0002800c,
+ FORCE_HIZ_ENABLE_mask = 0x03 << 0,
+ FORCE_HIZ_ENABLE_shift = 0,
+ FORCE_OFF = 0x00,
+ FORCE_ENABLE = 0x01,
+ FORCE_DISABLE = 0x02,
+ FORCE_RESERVED = 0x03,
+ FORCE_HIS_ENABLE0_mask = 0x03 << 2,
+ FORCE_HIS_ENABLE0_shift = 2,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_HIS_ENABLE1_mask = 0x03 << 4,
+ FORCE_HIS_ENABLE1_shift = 4,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_SHADER_Z_ORDER_bit = 1 << 6,
+ FAST_Z_DISABLE_bit = 1 << 7,
+ FAST_STENCIL_DISABLE_bit = 1 << 8,
+ NOOP_CULL_DISABLE_bit = 1 << 9,
+ FORCE_COLOR_KILL_bit = 1 << 10,
+ FORCE_Z_READ_bit = 1 << 11,
+ FORCE_STENCIL_READ_bit = 1 << 12,
+ FORCE_FULL_Z_RANGE_mask = 0x03 << 13,
+ FORCE_FULL_Z_RANGE_shift = 13,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_QC_SMASK_CONFLICT_bit = 1 << 15,
+ DISABLE_VIEWPORT_CLAMP_bit = 1 << 16,
+ IGNORE_SC_ZRANGE_bit = 1 << 17,
+ DISABLE_FULLY_COVERED_bit = 1 << 18,
+ FORCE_Z_LIMIT_SUMM_mask = 0x03 << 19,
+ FORCE_Z_LIMIT_SUMM_shift = 19,
+ FORCE_SUMM_OFF = 0x00,
+ FORCE_SUMM_MINZ = 0x01,
+ FORCE_SUMM_MAXZ = 0x02,
+ FORCE_SUMM_BOTH = 0x03,
+ MAX_TILES_IN_DTT_mask = 0x1f << 21,
+ MAX_TILES_IN_DTT_shift = 21,
+ DISABLE_PIXEL_RATE_TILES_bit = 1 << 26,
+ FORCE_Z_DIRTY_bit = 1 << 27,
+ FORCE_STENCIL_DIRTY_bit = 1 << 28,
+ FORCE_Z_VALID_bit = 1 << 29,
+ FORCE_STENCIL_VALID_bit = 1 << 30,
+ PRESERVE_COMPRESSION_bit = 1 << 31,
+ DB_RENDER_OVERRIDE2 = 0x00028010,
+ PARTIAL_SQUAD_LAUNCH_CONTROL_mask = 0x03 << 0,
+ PARTIAL_SQUAD_LAUNCH_CONTROL_shift = 0,
+ PSLC_AUTO = 0x00,
+ PSLC_ON_HANG_ONLY = 0x01,
+ PSLC_ASAP = 0x02,
+ PSLC_COUNTDOWN = 0x03,
+ PARTIAL_SQUAD_LAUNCH_COUNTDOWN_mask = 0x07 << 2,
+ PARTIAL_SQUAD_LAUNCH_COUNTDOWN_shift = 2,
+ DISABLE_ZMASK_EXPCLEAR_OPTIMIZATIO_bit = 1 << 5,
+ DB_HTILE_DATA_BASE = 0x00028014,
+ DB_STENCIL_CLEAR = 0x00028028,
+ DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0,
+ DB_STENCIL_CLEAR__CLEAR_shift = 0,
+ MIN_mask = 0xff << 16,
+ MIN_shift = 16,
+ DB_DEPTH_CLEAR = 0x0002802c,
+ PA_SC_SCREEN_SCISSOR_TL = 0x00028030,
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0xffff << 0,
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0xffff << 16,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift = 16,
+ PA_SC_SCREEN_SCISSOR_BR = 0x00028034,
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0xffff << 0,
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0xffff << 16,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift = 16,
+ DB_Z_INFO = 0x00028040,
+ DB_Z_INFO__FORMAT_mask = 0x03 << 0,
+ DB_Z_INFO__FORMAT_shift = 0,
+ Z_INVALID = 0x00,
+ Z_16 = 0x01,
+ Z_24 = 0x02,
+ Z_32_FLOAT = 0x03,
+ DB_Z_INFO__ARRAY_MODE_mask = 0x0f << 4,
+ DB_Z_INFO__ARRAY_MODE_shift = 4,
+ ARRAY_LINEAR_GENERAL = 0x00,
+ ARRAY_LINEAR_ALIGNED = 0x01,
+ ARRAY_1D_TILED_THIN1 = 0x02,
+ ARRAY_2D_TILED_THIN1 = 0x04,
+ DB_Z_INFO__TILE_SPLIT_mask = 0x07 << 8,
+ DB_Z_INFO__TILE_SPLIT_shift = 8,
+ ADDR_SURF_TILE_SPLIT_64B = 0x00,
+ ADDR_SURF_TILE_SPLIT_128B = 0x01,
+ ADDR_SURF_TILE_SPLIT_256B = 0x02,
+ ADDR_SURF_TILE_SPLIT_512B = 0x03,
+ ADDR_SURF_TILE_SPLIT_1KB = 0x04,
+ ADDR_SURF_TILE_SPLIT_2KB = 0x05,
+ ADDR_SURF_TILE_SPLIT_4KB = 0x06,
+ DB_Z_INFO__NUM_BANKS_mask = 0x03 << 12,
+ DB_Z_INFO__NUM_BANKS_shift = 12,
+ ADDR_SURF_2_BANK = 0x00,
+ ADDR_SURF_4_BANK = 0x01,
+ ADDR_SURF_8_BANK = 0x02,
+ ADDR_SURF_16_BANK = 0x03,
+ DB_Z_INFO__BANK_WIDTH_mask = 0x03 << 16,
+ DB_Z_INFO__BANK_WIDTH_shift = 16,
+ ADDR_SURF_BANK_WIDTH_1 = 0x00,
+ ADDR_SURF_BANK_WIDTH_2 = 0x01,
+ ADDR_SURF_BANK_WIDTH_4 = 0x02,
+ ADDR_SURF_BANK_WIDTH_8 = 0x03,
+ DB_Z_INFO__BANK_HEIGHT_mask = 0x03 << 20,
+ DB_Z_INFO__BANK_HEIGHT_shift = 20,
+ ADDR_SURF_BANK_HEIGHT_1 = 0x00,
+ ADDR_SURF_BANK_HEIGHT_2 = 0x01,
+ ADDR_SURF_BANK_HEIGHT_4 = 0x02,
+ ADDR_SURF_BANK_HEIGHT_8 = 0x03,
+ DB_Z_INFO__MACRO_TILE_ASPECT_mask = 0x03 << 24,
+ DB_Z_INFO__MACRO_TILE_ASPECT_shift = 24,
+ ADDR_SURF_MACRO_ASPECT_1 = 0x00,
+ ADDR_SURF_MACRO_ASPECT_2 = 0x01,
+ ADDR_SURF_MACRO_ASPECT_4 = 0x02,
+ ADDR_SURF_MACRO_ASPECT_8 = 0x03,
+ ALLOW_EXPCLEAR_bit = 1 << 27,
+ READ_SIZE_bit = 1 << 28,
+ TILE_SURFACE_ENABLE_bit = 1 << 29,
+ DB_Z_INFO__TILE_COMPACT_bit = 1 << 30,
+ ZRANGE_PRECISION_bit = 1 << 31,
+ DB_STENCIL_INFO = 0x00028044,
+ DB_STENCIL_INFO__FORMAT_bit = 1 << 0,
+ DB_STENCIL_INFO__TILE_SPLIT_mask = 0x07 << 8,
+ DB_STENCIL_INFO__TILE_SPLIT_shift = 8,
+/* ADDR_SURF_TILE_SPLIT_64B = 0x00, */
+/* ADDR_SURF_TILE_SPLIT_128B = 0x01, */
+/* ADDR_SURF_TILE_SPLIT_256B = 0x02, */
+/* ADDR_SURF_TILE_SPLIT_512B = 0x03, */
+/* ADDR_SURF_TILE_SPLIT_1KB = 0x04, */
+/* ADDR_SURF_TILE_SPLIT_2KB = 0x05, */
+/* ADDR_SURF_TILE_SPLIT_4KB = 0x06, */
+ DB_Z_READ_BASE = 0x00028048,
+ DB_STENCIL_READ_BASE = 0x0002804c,
+ DB_Z_WRITE_BASE = 0x00028050,
+ DB_STENCIL_WRITE_BASE = 0x00028054,
+ DB_DEPTH_SIZE = 0x00028058,
+ PITCH_TILE_MAX_mask = 0x7ff << 0,
+ PITCH_TILE_MAX_shift = 0,
+ HEIGHT_TILE_MAX_mask = 0x7ff << 11,
+ HEIGHT_TILE_MAX_shift = 11,
+ DB_DEPTH_SLICE = 0x0002805c,
+ SLICE_TILE_MAX_mask = 0x3fffff << 0,
+ SLICE_TILE_MAX_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift = 0,
+ PA_SC_WINDOW_OFFSET = 0x00028200,
+ WINDOW_X_OFFSET_mask = 0xffff << 0,
+ WINDOW_X_OFFSET_shift = 0,
+ WINDOW_Y_OFFSET_mask = 0xffff << 16,
+ WINDOW_Y_OFFSET_shift = 16,
+ PA_SC_WINDOW_SCISSOR_TL = 0x00028204,
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x7fff << 0,
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x7fff << 16,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift = 16,
+ WINDOW_OFFSET_DISABLE_bit = 1 << 31,
+ PA_SC_WINDOW_SCISSOR_BR = 0x00028208,
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x7fff << 0,
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x7fff << 16,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift = 16,
+ PA_SC_CLIPRECT_RULE = 0x0002820c,
+ CLIP_RULE_mask = 0xffff << 0,
+ CLIP_RULE_shift = 0,
+ PA_SC_CLIPRECT_0_TL = 0x00028210,
+ PA_SC_CLIPRECT_0_TL_num = 4,
+ PA_SC_CLIPRECT_0_TL_offset = 8,
+ PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x7fff << 0,
+ PA_SC_CLIPRECT_0_TL__TL_X_shift = 0,
+ PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x7fff << 16,
+ PA_SC_CLIPRECT_0_TL__TL_Y_shift = 16,
+ PA_SC_CLIPRECT_0_BR = 0x00028214,
+ PA_SC_CLIPRECT_0_BR_num = 4,
+ PA_SC_CLIPRECT_0_BR_offset = 8,
+ PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x7fff << 0,
+ PA_SC_CLIPRECT_0_BR__BR_X_shift = 0,
+ PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x7fff << 16,
+ PA_SC_CLIPRECT_0_BR__BR_Y_shift = 16,
+ PA_SC_EDGERULE = 0x00028230,
+ ER_TRI_mask = 0x0f << 0,
+ ER_TRI_shift = 0,
+ ER_POINT_mask = 0x0f << 4,
+ ER_POINT_shift = 4,
+ ER_RECT_mask = 0x0f << 8,
+ ER_RECT_shift = 8,
+ ER_LINE_LR_mask = 0x3f << 12,
+ ER_LINE_LR_shift = 12,
+ ER_LINE_RL_mask = 0x3f << 18,
+ ER_LINE_RL_shift = 18,
+ ER_LINE_TB_mask = 0x0f << 24,
+ ER_LINE_TB_shift = 24,
+ ER_LINE_BT_mask = 0x0f << 28,
+ ER_LINE_BT_shift = 28,
+ PA_SU_HARDWARE_SCREEN_OFFSET = 0x00028234,
+ HW_SCREEN_OFFSET_X_mask = 0x1f << 0,
+ HW_SCREEN_OFFSET_X_shift = 0,
+ HW_SCREEN_OFFSET_Y_mask = 0x1f << 8,
+ HW_SCREEN_OFFSET_Y_shift = 8,
+ CB_TARGET_MASK = 0x00028238,
+ TARGET0_ENABLE_mask = 0x0f << 0,
+ TARGET0_ENABLE_shift = 0,
+ TARGET1_ENABLE_mask = 0x0f << 4,
+ TARGET1_ENABLE_shift = 4,
+ TARGET2_ENABLE_mask = 0x0f << 8,
+ TARGET2_ENABLE_shift = 8,
+ TARGET3_ENABLE_mask = 0x0f << 12,
+ TARGET3_ENABLE_shift = 12,
+ TARGET4_ENABLE_mask = 0x0f << 16,
+ TARGET4_ENABLE_shift = 16,
+ TARGET5_ENABLE_mask = 0x0f << 20,
+ TARGET5_ENABLE_shift = 20,
+ TARGET6_ENABLE_mask = 0x0f << 24,
+ TARGET6_ENABLE_shift = 24,
+ TARGET7_ENABLE_mask = 0x0f << 28,
+ TARGET7_ENABLE_shift = 28,
+ CB_SHADER_MASK = 0x0002823c,
+ OUTPUT0_ENABLE_mask = 0x0f << 0,
+ OUTPUT0_ENABLE_shift = 0,
+ OUTPUT1_ENABLE_mask = 0x0f << 4,
+ OUTPUT1_ENABLE_shift = 4,
+ OUTPUT2_ENABLE_mask = 0x0f << 8,
+ OUTPUT2_ENABLE_shift = 8,
+ OUTPUT3_ENABLE_mask = 0x0f << 12,
+ OUTPUT3_ENABLE_shift = 12,
+ OUTPUT4_ENABLE_mask = 0x0f << 16,
+ OUTPUT4_ENABLE_shift = 16,
+ OUTPUT5_ENABLE_mask = 0x0f << 20,
+ OUTPUT5_ENABLE_shift = 20,
+ OUTPUT6_ENABLE_mask = 0x0f << 24,
+ OUTPUT6_ENABLE_shift = 24,
+ OUTPUT7_ENABLE_mask = 0x0f << 28,
+ OUTPUT7_ENABLE_shift = 28,
+ PA_SC_GENERIC_SCISSOR_TL = 0x00028240,
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x7fff << 0,
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x7fff << 16,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift = 16,
+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */
+ PA_SC_GENERIC_SCISSOR_BR = 0x00028244,
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x7fff << 0,
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x7fff << 16,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift = 16,
+ PA_SC_VPORT_SCISSOR_0_TL = 0x00028250,
+ PA_SC_VPORT_SCISSOR_0_TL_num = 16,
+ PA_SC_VPORT_SCISSOR_0_TL_offset = 8,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x7fff << 0,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift = 0,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x7fff << 16,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift = 16,
+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */
+ PA_SC_VPORT_SCISSOR_0_BR = 0x00028254,
+ PA_SC_VPORT_SCISSOR_0_BR_num = 16,
+ PA_SC_VPORT_SCISSOR_0_BR_offset = 8,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x7fff << 0,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift = 0,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x7fff << 16,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift = 16,
+ PA_SC_VPORT_ZMIN_0 = 0x000282d0,
+ PA_SC_VPORT_ZMIN_0_num = 16,
+ PA_SC_VPORT_ZMIN_0_offset = 8,
+ PA_SC_VPORT_ZMAX_0 = 0x000282d4,
+ PA_SC_VPORT_ZMAX_0_num = 16,
+ PA_SC_VPORT_ZMAX_0_offset = 8,
+ SX_MISC = 0x00028350,
+ MULTIPASS_bit = 1 << 0,
+ SQ_VTX_SEMANTIC_0 = 0x00028380,
+ SQ_VTX_SEMANTIC_0_num = 32,
+/* SEMANTIC_ID_mask = 0xff << 0, */
+/* SEMANTIC_ID_shift = 0, */
+ VGT_MAX_VTX_INDX = 0x00028400,
+ VGT_MIN_VTX_INDX = 0x00028404,
+ VGT_INDX_OFFSET = 0x00028408,
+ VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c,
+ SX_ALPHA_TEST_CONTROL = 0x00028410,
+ ALPHA_FUNC_mask = 0x07 << 0,
+ ALPHA_FUNC_shift = 0,
+ REF_NEVER = 0x00,
+ REF_LESS = 0x01,
+ REF_EQUAL = 0x02,
+ REF_LEQUAL = 0x03,
+ REF_GREATER = 0x04,
+ REF_NOTEQUAL = 0x05,
+ REF_GEQUAL = 0x06,
+ REF_ALWAYS = 0x07,
+ ALPHA_TEST_ENABLE_bit = 1 << 3,
+ ALPHA_TEST_BYPASS_bit = 1 << 8,
+ CB_BLEND_RED = 0x00028414,
+ CB_BLEND_GREEN = 0x00028418,
+ CB_BLEND_BLUE = 0x0002841c,
+ CB_BLEND_ALPHA = 0x00028420,
+ DB_STENCILREFMASK = 0x00028430,
+ STENCILREF_mask = 0xff << 0,
+ STENCILREF_shift = 0,
+ STENCILMASK_mask = 0xff << 8,
+ STENCILMASK_shift = 8,
+ STENCILWRITEMASK_mask = 0xff << 16,
+ STENCILWRITEMASK_shift = 16,
+ DB_STENCILREFMASK_BF = 0x00028434,
+ STENCILREF_BF_mask = 0xff << 0,
+ STENCILREF_BF_shift = 0,
+ STENCILMASK_BF_mask = 0xff << 8,
+ STENCILMASK_BF_shift = 8,
+ STENCILWRITEMASK_BF_mask = 0xff << 16,
+ STENCILWRITEMASK_BF_shift = 16,
+ SX_ALPHA_REF = 0x00028438,
+ PA_CL_VPORT_XSCALE_0 = 0x0002843c,
+ PA_CL_VPORT_XSCALE_0_num = 16,
+ PA_CL_VPORT_XSCALE_0_offset = 24,
+ PA_CL_VPORT_XOFFSET_0 = 0x00028440,
+ PA_CL_VPORT_XOFFSET_0_num = 16,
+ PA_CL_VPORT_XOFFSET_0_offset = 24,
+ PA_CL_VPORT_YSCALE_0 = 0x00028444,
+ PA_CL_VPORT_YSCALE_0_num = 16,
+ PA_CL_VPORT_YSCALE_0_offset = 24,
+ PA_CL_VPORT_YOFFSET_0 = 0x00028448,
+ PA_CL_VPORT_YOFFSET_0_num = 16,
+ PA_CL_VPORT_YOFFSET_0_offset = 24,
+ PA_CL_VPORT_ZSCALE_0 = 0x0002844c,
+ PA_CL_VPORT_ZSCALE_0_num = 16,
+ PA_CL_VPORT_ZSCALE_0_offset = 24,
+ PA_CL_VPORT_ZOFFSET_0 = 0x00028450,
+ PA_CL_VPORT_ZOFFSET_0_num = 16,
+ PA_CL_VPORT_ZOFFSET_0_offset = 24,
+ PA_CL_UCP_0_X = 0x000285bc,
+ PA_CL_UCP_0_X_num = 6,
+ PA_CL_UCP_0_X_offset = 16,
+ PA_CL_UCP_0_Y = 0x000285c0,
+ PA_CL_UCP_0_Y_num = 6,
+ PA_CL_UCP_0_Y_offset = 16,
+ PA_CL_UCP_0_Z = 0x000285c4,
+ PA_CL_UCP_0_Z_num = 6,
+ PA_CL_UCP_0_Z_offset = 16,
+ PA_CL_UCP_0_W = 0x000285c8,
+ PA_CL_UCP_0_W_num = 6,
+ PA_CL_UCP_0_W_offset = 16,
+ SPI_VS_OUT_ID_0 = 0x0002861c,
+ SPI_VS_OUT_ID_0_num = 10,
+ SEMANTIC_0_mask = 0xff << 0,
+ SEMANTIC_0_shift = 0,
+ SEMANTIC_1_mask = 0xff << 8,
+ SEMANTIC_1_shift = 8,
+ SEMANTIC_2_mask = 0xff << 16,
+ SEMANTIC_2_shift = 16,
+ SEMANTIC_3_mask = 0xff << 24,
+ SEMANTIC_3_shift = 24,
+ SPI_PS_INPUT_CNTL_0 = 0x00028644,
+ SPI_PS_INPUT_CNTL_0_num = 32,
+ SEMANTIC_mask = 0xff << 0,
+ SEMANTIC_shift = 0,
+ DEFAULT_VAL_mask = 0x03 << 8,
+ DEFAULT_VAL_shift = 8,
+ X_0_0F = 0x00,
+ FLAT_SHADE_bit = 1 << 10,
+ CYL_WRAP_mask = 0x0f << 13,
+ CYL_WRAP_shift = 13,
+ PT_SPRITE_TEX_bit = 1 << 17,
+ SPI_VS_OUT_CONFIG = 0x000286c4,
+ VS_PER_COMPONENT_bit = 1 << 0,
+ VS_EXPORT_COUNT_mask = 0x1f << 1,
+ VS_EXPORT_COUNT_shift = 1,
+ VS_EXPORTS_FOG_bit = 1 << 8,
+ VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9,
+ VS_OUT_FOG_VEC_ADDR_shift = 9,
+ SPI_PS_IN_CONTROL_0 = 0x000286cc,
+ NUM_INTERP_mask = 0x3f << 0,
+ NUM_INTERP_shift = 0,
+ POSITION_ENA_bit = 1 << 8,
+ POSITION_CENTROID_bit = 1 << 9,
+ POSITION_ADDR_mask = 0x1f << 10,
+ POSITION_ADDR_shift = 10,
+ PARAM_GEN_mask = 0x0f << 15,
+ PARAM_GEN_shift = 15,
+ PERSP_GRADIENT_ENA_bit = 1 << 28,
+ LINEAR_GRADIENT_ENA_bit = 1 << 29,
+ POSITION_SAMPLE_bit = 1 << 30,
+ SPI_PS_IN_CONTROL_1 = 0x000286d0,
+ FRONT_FACE_ENA_bit = 1 << 8,
+ FRONT_FACE_ALL_BITS_bit = 1 << 11,
+ FRONT_FACE_ADDR_mask = 0x1f << 12,
+ FRONT_FACE_ADDR_shift = 12,
+ FOG_ADDR_mask = 0x7f << 17,
+ FOG_ADDR_shift = 17,
+ FIXED_PT_POSITION_ENA_bit = 1 << 24,
+ FIXED_PT_POSITION_ADDR_mask = 0x1f << 25,
+ FIXED_PT_POSITION_ADDR_shift = 25,
+ POSITION_ULC_bit = 1 << 30,
+ SPI_INTERP_CONTROL_0 = 0x000286d4,
+ FLAT_SHADE_ENA_bit = 1 << 0,
+ PNT_SPRITE_ENA_bit = 1 << 1,
+ PNT_SPRITE_OVRD_X_mask = 0x07 << 2,
+ PNT_SPRITE_OVRD_X_shift = 2,
+ SPI_PNT_SPRITE_SEL_0 = 0x00,
+ SPI_PNT_SPRITE_SEL_1 = 0x01,
+ SPI_PNT_SPRITE_SEL_S = 0x02,
+ SPI_PNT_SPRITE_SEL_T = 0x03,
+ SPI_PNT_SPRITE_SEL_NONE = 0x04,
+ PNT_SPRITE_OVRD_Y_mask = 0x07 << 5,
+ PNT_SPRITE_OVRD_Y_shift = 5,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_OVRD_Z_mask = 0x07 << 8,
+ PNT_SPRITE_OVRD_Z_shift = 8,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_OVRD_W_mask = 0x07 << 11,
+ PNT_SPRITE_OVRD_W_shift = 11,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_TOP_1_bit = 1 << 14,
+ SPI_INPUT_Z = 0x000286d8,
+ PROVIDE_Z_TO_SPI_bit = 1 << 0,
+ SPI_FOG_CNTL = 0x000286dc,
+ PASS_FOG_THROUGH_PS_bit = 1 << 0,
+ SPI_BARYC_CNTL = 0x000286e0,
+ PERSP_CENTER_ENA_mask = 0x03 << 0,
+ PERSP_CENTER_ENA_shift = 0,
+ X_OFF = 0x00,
+ PERSP_CENTER_ENA__X_ON_AT_CENTER = 0x01,
+ PERSP_CENTER_ENA__X_ON_AT_CENTROID = 0x02,
+ PERSP_CENTROID_ENA_mask = 0x03 << 4,
+ PERSP_CENTROID_ENA_shift = 4,
+/* X_OFF = 0x00, */
+ PERSP_CENTROID_ENA__X_ON_AT_CENTROID = 0x01,
+ PERSP_CENTROID_ENA__X_ON_AT_CENTER = 0x02,
+ PERSP_SAMPLE_ENA_mask = 0x03 << 8,
+ PERSP_SAMPLE_ENA_shift = 8,
+/* X_OFF = 0x00, */
+ PERSP_PULL_MODEL_ENA_mask = 0x03 << 12,
+ PERSP_PULL_MODEL_ENA_shift = 12,
+/* X_OFF = 0x00, */
+ LINEAR_CENTER_ENA_mask = 0x03 << 16,
+ LINEAR_CENTER_ENA_shift = 16,
+/* X_OFF = 0x00, */
+ LINEAR_CENTER_ENA__X_ON_AT_CENTER = 0x01,
+ LINEAR_CENTER_ENA__X_ON_AT_CENTROID = 0x02,
+ LINEAR_CENTROID_ENA_mask = 0x03 << 20,
+ LINEAR_CENTROID_ENA_shift = 20,
+/* X_OFF = 0x00, */
+ LINEAR_CENTROID_ENA__X_ON_AT_CENTROID = 0x01,
+ LINEAR_CENTROID_ENA__X_ON_AT_CENTER = 0x02,
+ LINEAR_SAMPLE_ENA_mask = 0x03 << 24,
+ LINEAR_SAMPLE_ENA_shift = 24,
+/* X_OFF = 0x00, */
+ SPI_PS_IN_CONTROL_2 = 0x000286e4,
+ LINE_STIPPLE_TEX_ADDR_mask = 0xff << 0,
+ LINE_STIPPLE_TEX_ADDR_shift = 0,
+ LINE_STIPPLE_TEX_ENA_bit = 1 << 8,
+ CB_BLEND0_CONTROL = 0x00028780,
+ CB_BLEND0_CONTROL_num = 8,
+ COLOR_SRCBLEND_mask = 0x1f << 0,
+ COLOR_SRCBLEND_shift = 0,
+ BLEND_ZERO = 0x00,
+ BLEND_ONE = 0x01,
+ BLEND_SRC_COLOR = 0x02,
+ BLEND_ONE_MINUS_SRC_COLOR = 0x03,
+ BLEND_SRC_ALPHA = 0x04,
+ BLEND_ONE_MINUS_SRC_ALPHA = 0x05,
+ BLEND_DST_ALPHA = 0x06,
+ BLEND_ONE_MINUS_DST_ALPHA = 0x07,
+ BLEND_DST_COLOR = 0x08,
+ BLEND_ONE_MINUS_DST_COLOR = 0x09,
+ BLEND_SRC_ALPHA_SATURATE = 0x0a,
+ BLEND_BOTH_SRC_ALPHA = 0x0b,
+ BLEND_BOTH_INV_SRC_ALPHA = 0x0c,
+ BLEND_CONSTANT_COLOR = 0x0d,
+ BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e,
+ BLEND_SRC1_COLOR = 0x0f,
+ BLEND_INV_SRC1_COLOR = 0x10,
+ BLEND_SRC1_ALPHA = 0x11,
+ BLEND_INV_SRC1_ALPHA = 0x12,
+ BLEND_CONSTANT_ALPHA = 0x13,
+ BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14,
+ COLOR_COMB_FCN_mask = 0x07 << 5,
+ COLOR_COMB_FCN_shift = 5,
+ COMB_DST_PLUS_SRC = 0x00,
+ COMB_SRC_MINUS_DST = 0x01,
+ COMB_MIN_DST_SRC = 0x02,
+ COMB_MAX_DST_SRC = 0x03,
+ COMB_DST_MINUS_SRC = 0x04,
+ COLOR_DESTBLEND_mask = 0x1f << 8,
+ COLOR_DESTBLEND_shift = 8,
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+ ALPHA_SRCBLEND_mask = 0x1f << 16,
+ ALPHA_SRCBLEND_shift = 16,
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+ ALPHA_COMB_FCN_mask = 0x07 << 21,
+ ALPHA_COMB_FCN_shift = 21,
+/* COMB_DST_PLUS_SRC = 0x00, */
+/* COMB_SRC_MINUS_DST = 0x01, */
+/* COMB_MIN_DST_SRC = 0x02, */
+/* COMB_MAX_DST_SRC = 0x03, */
+/* COMB_DST_MINUS_SRC = 0x04, */
+ ALPHA_DESTBLEND_mask = 0x1f << 24,
+ ALPHA_DESTBLEND_shift = 24,
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+ SEPARATE_ALPHA_BLEND_bit = 1 << 29,
+ CB_BLEND0_CONTROL__ENABLE_bit = 1 << 30,
+ PA_CL_POINT_X_RAD = 0x000287d4,
+ PA_CL_POINT_Y_RAD = 0x000287d8,
+ PA_CL_POINT_SIZE = 0x000287dc,
+ PA_CL_POINT_CULL_RAD = 0x000287e0,
+ VGT_DMA_BASE_HI = 0x000287e4,
+ VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0,
+ VGT_DMA_BASE_HI__BASE_ADDR_shift = 0,
+ VGT_DMA_BASE = 0x000287e8,
+ VGT_DRAW_INITIATOR = 0x000287f0,
+ SOURCE_SELECT_mask = 0x03 << 0,
+ SOURCE_SELECT_shift = 0,
+ DI_SRC_SEL_DMA = 0x00,
+ DI_SRC_SEL_IMMEDIATE = 0x01,
+ DI_SRC_SEL_AUTO_INDEX = 0x02,
+ DI_SRC_SEL_RESERVED = 0x03,
+ MAJOR_MODE_mask = 0x03 << 2,
+ MAJOR_MODE_shift = 2,
+ DI_MAJOR_MODE_0 = 0x00,
+ DI_MAJOR_MODE_1 = 0x01,
+ NOT_EOP_bit = 1 << 5,
+ USE_OPAQUE_bit = 1 << 6,
+ VGT_IMMED_DATA = 0x000287f4,
+ VGT_EVENT_ADDRESS_REG = 0x000287f8,
+ ADDRESS_LOW_mask = 0xfffffff << 0,
+ ADDRESS_LOW_shift = 0,
+ DB_DEPTH_CONTROL = 0x00028800,
+ STENCIL_ENABLE_bit = 1 << 0,
+ Z_ENABLE_bit = 1 << 1,
+ Z_WRITE_ENABLE_bit = 1 << 2,
+ ZFUNC_mask = 0x07 << 4,
+ ZFUNC_shift = 4,
+ FRAG_NEVER = 0x00,
+ FRAG_LESS = 0x01,
+ FRAG_EQUAL = 0x02,
+ FRAG_LEQUAL = 0x03,
+ FRAG_GREATER = 0x04,
+ FRAG_NOTEQUAL = 0x05,
+ FRAG_GEQUAL = 0x06,
+ FRAG_ALWAYS = 0x07,
+ BACKFACE_ENABLE_bit = 1 << 7,
+ STENCILFUNC_mask = 0x07 << 8,
+ STENCILFUNC_shift = 8,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ STENCILFAIL_mask = 0x07 << 11,
+ STENCILFAIL_shift = 11,
+ STENCIL_KEEP = 0x00,
+ STENCIL_ZERO = 0x01,
+ STENCIL_REPLACE = 0x02,
+ STENCIL_INCR_CLAMP = 0x03,
+ STENCIL_DECR_CLAMP = 0x04,
+ STENCIL_INVERT = 0x05,
+ STENCIL_INCR_WRAP = 0x06,
+ STENCIL_DECR_WRAP = 0x07,
+ STENCILZPASS_mask = 0x07 << 14,
+ STENCILZPASS_shift = 14,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZFAIL_mask = 0x07 << 17,
+ STENCILZFAIL_shift = 17,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILFUNC_BF_mask = 0x07 << 20,
+ STENCILFUNC_BF_shift = 20,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ STENCILFAIL_BF_mask = 0x07 << 23,
+ STENCILFAIL_BF_shift = 23,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZPASS_BF_mask = 0x07 << 26,
+ STENCILZPASS_BF_shift = 26,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZFAIL_BF_mask = 0x07 << 29,
+ STENCILZFAIL_BF_shift = 29,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ CB_COLOR_CONTROL = 0x00028808,
+ DEGAMMA_ENABLE_bit = 1 << 3,
+ CB_COLOR_CONTROL__MODE_mask = 0x07 << 4,
+ CB_COLOR_CONTROL__MODE_shift = 4,
+ CB_DISABLE = 0x00,
+ CB_NORMAL = 0x01,
+ CB_ELIMINATE_FAST_CLEAR = 0x02,
+ CB_RESOLVE = 0x03,
+ CB_DECOMPRESS = 0x04,
+ CB_FMASK_DECOMPRESS = 0x05,
+ ROP3_mask = 0xff << 16,
+ ROP3_shift = 16,
+ DB_SHADER_CONTROL = 0x0002880c,
+ Z_EXPORT_ENABLE_bit = 1 << 0,
+ STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1,
+ Z_ORDER_mask = 0x03 << 4,
+ Z_ORDER_shift = 4,
+ LATE_Z = 0x00,
+ EARLY_Z_THEN_LATE_Z = 0x01,
+ RE_Z = 0x02,
+ EARLY_Z_THEN_RE_Z = 0x03,
+ KILL_ENABLE_bit = 1 << 6,
+ COVERAGE_TO_MASK_ENABLE_bit = 1 << 7,
+ MASK_EXPORT_ENABLE_bit = 1 << 8,
+ DUAL_EXPORT_ENABLE_bit = 1 << 9,
+ EXEC_ON_HIER_FAIL_bit = 1 << 10,
+ EXEC_ON_NOOP_bit = 1 << 11,
+ ALPHA_TO_MASK_DISABLE_bit = 1 << 12,
+ DB_SOURCE_FORMAT_mask = 0x03 << 13,
+ DB_SOURCE_FORMAT_shift = 13,
+ EXPORT_DB_FULL = 0x00,
+ EXPORT_DB_FOUR16 = 0x01,
+ EXPORT_DB_TWO = 0x02,
+ DEPTH_BEFORE_SHADER_bit = 1 << 15,
+ CONSERVATIVE_Z_EXPORT_mask = 0x03 << 16,
+ CONSERVATIVE_Z_EXPORT_shift = 16,
+ EXPORT_ANY_Z = 0x00,
+ EXPORT_LESS_THAN_Z = 0x01,
+ EXPORT_GREATER_THAN_Z = 0x02,
+ EXPORT_RESERVED = 0x03,
+ PA_CL_CLIP_CNTL = 0x00028810,
+ UCP_ENA_0_bit = 1 << 0,
+ UCP_ENA_1_bit = 1 << 1,
+ UCP_ENA_2_bit = 1 << 2,
+ UCP_ENA_3_bit = 1 << 3,
+ UCP_ENA_4_bit = 1 << 4,
+ UCP_ENA_5_bit = 1 << 5,
+ PS_UCP_Y_SCALE_NEG_bit = 1 << 13,
+ PS_UCP_MODE_mask = 0x03 << 14,
+ PS_UCP_MODE_shift = 14,
+ CLIP_DISABLE_bit = 1 << 16,
+ UCP_CULL_ONLY_ENA_bit = 1 << 17,
+ BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18,
+ DX_CLIP_SPACE_DEF_bit = 1 << 19,
+ DIS_CLIP_ERR_DETECT_bit = 1 << 20,
+ VTX_KILL_OR_bit = 1 << 21,
+ DX_RASTERIZATION_KILL_bit = 1 << 22,
+ DX_LINEAR_ATTR_CLIP_ENA_bit = 1 << 24,
+ VTE_VPORT_PROVOKE_DISABLE_bit = 1 << 25,
+ ZCLIP_NEAR_DISABLE_bit = 1 << 26,
+ ZCLIP_FAR_DISABLE_bit = 1 << 27,
+ PA_SU_SC_MODE_CNTL = 0x00028814,
+ CULL_FRONT_bit = 1 << 0,
+ CULL_BACK_bit = 1 << 1,
+ FACE_bit = 1 << 2,
+ POLY_MODE_mask = 0x03 << 3,
+ POLY_MODE_shift = 3,
+ X_DISABLE_POLY_MODE = 0x00,
+ X_DUAL_MODE = 0x01,
+ POLYMODE_FRONT_PTYPE_mask = 0x07 << 5,
+ POLYMODE_FRONT_PTYPE_shift = 5,
+ X_DRAW_POINTS = 0x00,
+ X_DRAW_LINES = 0x01,
+ X_DRAW_TRIANGLES = 0x02,
+ POLYMODE_BACK_PTYPE_mask = 0x07 << 8,
+ POLYMODE_BACK_PTYPE_shift = 8,
+/* X_DRAW_POINTS = 0x00, */
+/* X_DRAW_LINES = 0x01, */
+/* X_DRAW_TRIANGLES = 0x02, */
+ POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11,
+ POLY_OFFSET_BACK_ENABLE_bit = 1 << 12,
+ POLY_OFFSET_PARA_ENABLE_bit = 1 << 13,
+ VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16,
+ PROVOKING_VTX_LAST_bit = 1 << 19,
+ PERSP_CORR_DIS_bit = 1 << 20,
+ MULTI_PRIM_IB_ENA_bit = 1 << 21,
+ PA_CL_VTE_CNTL = 0x00028818,
+ VPORT_X_SCALE_ENA_bit = 1 << 0,
+ VPORT_X_OFFSET_ENA_bit = 1 << 1,
+ VPORT_Y_SCALE_ENA_bit = 1 << 2,
+ VPORT_Y_OFFSET_ENA_bit = 1 << 3,
+ VPORT_Z_SCALE_ENA_bit = 1 << 4,
+ VPORT_Z_OFFSET_ENA_bit = 1 << 5,
+ VTX_XY_FMT_bit = 1 << 8,
+ VTX_Z_FMT_bit = 1 << 9,
+ VTX_W0_FMT_bit = 1 << 10,
+ PA_CL_VS_OUT_CNTL = 0x0002881c,
+ CLIP_DIST_ENA_0_bit = 1 << 0,
+ CLIP_DIST_ENA_1_bit = 1 << 1,
+ CLIP_DIST_ENA_2_bit = 1 << 2,
+ CLIP_DIST_ENA_3_bit = 1 << 3,
+ CLIP_DIST_ENA_4_bit = 1 << 4,
+ CLIP_DIST_ENA_5_bit = 1 << 5,
+ CLIP_DIST_ENA_6_bit = 1 << 6,
+ CLIP_DIST_ENA_7_bit = 1 << 7,
+ CULL_DIST_ENA_0_bit = 1 << 8,
+ CULL_DIST_ENA_1_bit = 1 << 9,
+ CULL_DIST_ENA_2_bit = 1 << 10,
+ CULL_DIST_ENA_3_bit = 1 << 11,
+ CULL_DIST_ENA_4_bit = 1 << 12,
+ CULL_DIST_ENA_5_bit = 1 << 13,
+ CULL_DIST_ENA_6_bit = 1 << 14,
+ CULL_DIST_ENA_7_bit = 1 << 15,
+ USE_VTX_POINT_SIZE_bit = 1 << 16,
+ USE_VTX_EDGE_FLAG_bit = 1 << 17,
+ USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18,
+ USE_VTX_VIEWPORT_INDX_bit = 1 << 19,
+ USE_VTX_KILL_FLAG_bit = 1 << 20,
+ VS_OUT_MISC_VEC_ENA_bit = 1 << 21,
+ VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22,
+ VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23,
+ PA_CL_NANINF_CNTL = 0x00028820,
+ VTE_XY_INF_DISCARD_bit = 1 << 0,
+ VTE_Z_INF_DISCARD_bit = 1 << 1,
+ VTE_W_INF_DISCARD_bit = 1 << 2,
+ VTE_0XNANINF_IS_0_bit = 1 << 3,
+ VTE_XY_NAN_RETAIN_bit = 1 << 4,
+ VTE_Z_NAN_RETAIN_bit = 1 << 5,
+ VTE_W_NAN_RETAIN_bit = 1 << 6,
+ VTE_W_RECIP_NAN_IS_0_bit = 1 << 7,
+ VS_XY_NAN_TO_INF_bit = 1 << 8,
+ VS_XY_INF_RETAIN_bit = 1 << 9,
+ VS_Z_NAN_TO_INF_bit = 1 << 10,
+ VS_Z_INF_RETAIN_bit = 1 << 11,
+ VS_W_NAN_TO_INF_bit = 1 << 12,
+ VS_W_INF_RETAIN_bit = 1 << 13,
+ VS_CLIP_DIST_INF_DISCARD_bit = 1 << 14,
+ VTE_NO_OUTPUT_NEG_0_bit = 1 << 20,
+ PA_SU_LINE_STIPPLE_CNTL = 0x00028824,
+ LINE_STIPPLE_RESET_mask = 0x03 << 0,
+ LINE_STIPPLE_RESET_shift = 0,
+ EXPAND_FULL_LENGTH_bit = 1 << 2,
+ FRACTIONAL_ACCUM_bit = 1 << 3,
+ DIAMOND_ADJUST_bit = 1 << 4,
+ PA_SU_LINE_STIPPLE_SCALE = 0x00028828,
+ PA_SU_PRIM_FILTER_CNTL = 0x0002882c,
+ TRIANGLE_FILTER_DISABLE_bit = 1 << 0,
+ LINE_FILTER_DISABLE_bit = 1 << 1,
+ POINT_FILTER_DISABLE_bit = 1 << 2,
+ RECTANGLE_FILTER_DISABLE_bit = 1 << 3,
+ TRIANGLE_EXPAND_ENA_bit = 1 << 4,
+ LINE_EXPAND_ENA_bit = 1 << 5,
+ POINT_EXPAND_ENA_bit = 1 << 6,
+ RECTANGLE_EXPAND_ENA_bit = 1 << 7,
+ PRIM_EXPAND_CONSTANT_mask = 0xff << 8,
+ PRIM_EXPAND_CONSTANT_shift = 8,
+ SQ_LSTMP_RING_ITEMSIZE = 0x00028830,
+ ITEMSIZE_mask = 0x7fff << 0,
+ ITEMSIZE_shift = 0,
+ SQ_HSTMP_RING_ITEMSIZE = 0x00028834,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_PGM_START_PS = 0x00028840,
+ SQ_PGM_RESOURCES_PS = 0x00028844,
+ NUM_GPRS_mask = 0xff << 0,
+ NUM_GPRS_shift = 0,
+ STACK_SIZE_mask = 0xff << 8,
+ STACK_SIZE_shift = 8,
+ DX10_CLAMP_bit = 1 << 21,
+ UNCACHED_FIRST_INST_bit = 1 << 28,
+ CLAMP_CONSTS_bit = 1 << 31,
+ SQ_PGM_RESOURCES_2_PS = 0x00028848,
+ SINGLE_ROUND_mask = 0x03 << 0,
+ SINGLE_ROUND_shift = 0,
+ SQ_ROUND_NEAREST_EVEN = 0x00,
+ SQ_ROUND_PLUS_INFINITY = 0x01,
+ SQ_ROUND_MINUS_INFINITY = 0x02,
+ SQ_ROUND_TO_ZERO = 0x03,
+ DOUBLE_ROUND_mask = 0x03 << 2,
+ DOUBLE_ROUND_shift = 2,
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+ ALLOW_SINGLE_DENORM_IN_bit = 1 << 4,
+ ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5,
+ ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6,
+ ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7,
+ SQ_PGM_EXPORTS_PS = 0x0002884c,
+ EXPORT_MODE_mask = 0x1f << 0,
+ EXPORT_MODE_shift = 0,
+ SQ_PGM_START_VS = 0x0002885c,
+ SQ_PGM_RESOURCES_VS = 0x00028860,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+/* DX10_CLAMP_bit = 1 << 21, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_RESOURCES_2_VS = 0x00028864,
+/* SINGLE_ROUND_mask = 0x03 << 0, */
+/* SINGLE_ROUND_shift = 0, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* DOUBLE_ROUND_mask = 0x03 << 2, */
+/* DOUBLE_ROUND_shift = 2, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */
+/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */
+/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */
+/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */
+ SQ_PGM_START_GS = 0x00028874,
+ SQ_PGM_RESOURCES_GS = 0x00028878,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+/* DX10_CLAMP_bit = 1 << 21, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_RESOURCES_2_GS = 0x0002887c,
+/* SINGLE_ROUND_mask = 0x03 << 0, */
+/* SINGLE_ROUND_shift = 0, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* DOUBLE_ROUND_mask = 0x03 << 2, */
+/* DOUBLE_ROUND_shift = 2, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */
+/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */
+/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */
+/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */
+ SQ_PGM_START_ES = 0x0002888c,
+ SQ_PGM_RESOURCES_ES = 0x00028890,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+/* DX10_CLAMP_bit = 1 << 21, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_RESOURCES_2_ES = 0x00028894,
+/* SINGLE_ROUND_mask = 0x03 << 0, */
+/* SINGLE_ROUND_shift = 0, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* DOUBLE_ROUND_mask = 0x03 << 2, */
+/* DOUBLE_ROUND_shift = 2, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */
+/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */
+/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */
+/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */
+ SQ_PGM_START_FS = 0x000288a4,
+ SQ_PGM_RESOURCES_FS = 0x000288a8,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+/* DX10_CLAMP_bit = 1 << 21, */
+ SQ_PGM_START_HS = 0x000288b8,
+ SQ_PGM_RESOURCES_HS = 0x000288bc,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+/* DX10_CLAMP_bit = 1 << 21, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_RESOURCES_2_HS = 0x000288c0,
+/* SINGLE_ROUND_mask = 0x03 << 0, */
+/* SINGLE_ROUND_shift = 0, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* DOUBLE_ROUND_mask = 0x03 << 2, */
+/* DOUBLE_ROUND_shift = 2, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */
+/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */
+/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */
+/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */
+ SQ_PGM_START_LS = 0x000288d0,
+ SQ_PGM_RESOURCES_LS = 0x000288d4,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+/* DX10_CLAMP_bit = 1 << 21, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_RESOURCES_2_LS = 0x000288d8,
+/* SINGLE_ROUND_mask = 0x03 << 0, */
+/* SINGLE_ROUND_shift = 0, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* DOUBLE_ROUND_mask = 0x03 << 2, */
+/* DOUBLE_ROUND_shift = 2, */
+/* SQ_ROUND_NEAREST_EVEN = 0x00, */
+/* SQ_ROUND_PLUS_INFINITY = 0x01, */
+/* SQ_ROUND_MINUS_INFINITY = 0x02, */
+/* SQ_ROUND_TO_ZERO = 0x03, */
+/* ALLOW_SINGLE_DENORM_IN_bit = 1 << 4, */
+/* ALLOW_SINGLE_DENORM_OUT_bit = 1 << 5, */
+/* ALLOW_DOUBLE_DENORM_IN_bit = 1 << 6, */
+/* ALLOW_DOUBLE_DENORM_OUT_bit = 1 << 7, */
+ SQ_VTX_SEMANTIC_CLEAR = 0x000288f0,
+ SQ_ESGS_RING_ITEMSIZE = 0x00028900,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GSVS_RING_ITEMSIZE = 0x00028904,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_ESTMP_RING_ITEMSIZE = 0x00028908,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GSTMP_RING_ITEMSIZE = 0x0002890c,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_VSTMP_RING_ITEMSIZE = 0x00028910,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_PSTMP_RING_ITEMSIZE = 0x00028914,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GS_VERT_ITEMSIZE = 0x0002891c,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GS_VERT_ITEMSIZE_1 = 0x00028920,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GS_VERT_ITEMSIZE_2 = 0x00028924,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GS_VERT_ITEMSIZE_3 = 0x00028928,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GSVS_RING_OFFSET_1 = 0x0002892c,
+ SQ_GSVS_RING_OFFSET_1__OFFSET_mask = 0x7fff << 0,
+ SQ_GSVS_RING_OFFSET_1__OFFSET_shift = 0,
+ SQ_GSVS_RING_OFFSET_2 = 0x00028930,
+ SQ_GSVS_RING_OFFSET_2__OFFSET_mask = 0x7fff << 0,
+ SQ_GSVS_RING_OFFSET_2__OFFSET_shift = 0,
+ SQ_GSVS_RING_OFFSET_3 = 0x00028934,
+ SQ_GSVS_RING_OFFSET_3__OFFSET_mask = 0x7fff << 0,
+ SQ_GSVS_RING_OFFSET_3__OFFSET_shift = 0,
+ SQ_ALU_CONST_CACHE_PS_0 = 0x00028940,
+ SQ_ALU_CONST_CACHE_PS_0_num = 16,
+ SQ_ALU_CONST_CACHE_VS_0 = 0x00028980,
+ SQ_ALU_CONST_CACHE_VS_0_num = 16,
+ SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0,
+ SQ_ALU_CONST_CACHE_GS_0_num = 16,
+ PA_SU_POINT_SIZE = 0x00028a00,
+ HEIGHT_mask = 0xffff << 0,
+ HEIGHT_shift = 0,
+ PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16,
+ PA_SU_POINT_SIZE__WIDTH_shift = 16,
+ PA_SU_POINT_MINMAX = 0x00028a04,
+ MIN_SIZE_mask = 0xffff << 0,
+ MIN_SIZE_shift = 0,
+ PA_SU_POINT_MINMAX__MAX_SIZE_mask = 0xffff << 16,
+ PA_SU_POINT_MINMAX__MAX_SIZE_shift = 16,
+ PA_SU_LINE_CNTL = 0x00028a08,
+ PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0,
+ PA_SU_LINE_CNTL__WIDTH_shift = 0,
+ PA_SC_LINE_STIPPLE = 0x00028a0c,
+ LINE_PATTERN_mask = 0xffff << 0,
+ LINE_PATTERN_shift = 0,
+ REPEAT_COUNT_mask = 0xff << 16,
+ REPEAT_COUNT_shift = 16,
+ PATTERN_BIT_ORDER_bit = 1 << 28,
+ AUTO_RESET_CNTL_mask = 0x03 << 29,
+ AUTO_RESET_CNTL_shift = 29,
+ VGT_OUTPUT_PATH_CNTL = 0x00028a10,
+ PATH_SELECT_mask = 0x07 << 0,
+ PATH_SELECT_shift = 0,
+ VGT_OUTPATH_VTX_REUSE = 0x00,
+ VGT_OUTPATH_TESS_EN = 0x01,
+ VGT_OUTPATH_PASSTHRU = 0x02,
+ VGT_OUTPATH_GS_BLOCK = 0x03,
+ VGT_OUTPATH_HS_BLOCK = 0x04,
+ VGT_HOS_CNTL = 0x00028a14,
+ TESS_MODE_mask = 0x03 << 0,
+ TESS_MODE_shift = 0,
+ VGT_HOS_MAX_TESS_LEVEL = 0x00028a18,
+ VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c,
+ VGT_HOS_REUSE_DEPTH = 0x00028a20,
+ REUSE_DEPTH_mask = 0xff << 0,
+ REUSE_DEPTH_shift = 0,
+ VGT_GROUP_PRIM_TYPE = 0x00028a24,
+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0,
+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift = 0,
+ VGT_GRP_3D_POINT = 0x00,
+ VGT_GRP_3D_LINE = 0x01,
+ VGT_GRP_3D_TRI = 0x02,
+ VGT_GRP_3D_RECT = 0x03,
+ VGT_GRP_3D_QUAD = 0x04,
+ VGT_GRP_2D_COPY_RECT_V0 = 0x05,
+ VGT_GRP_2D_COPY_RECT_V1 = 0x06,
+ VGT_GRP_2D_COPY_RECT_V2 = 0x07,
+ VGT_GRP_2D_COPY_RECT_V3 = 0x08,
+ VGT_GRP_2D_FILL_RECT = 0x09,
+ VGT_GRP_2D_LINE = 0x0a,
+ VGT_GRP_2D_TRI = 0x0b,
+ VGT_GRP_PRIM_INDEX_LINE = 0x0c,
+ VGT_GRP_PRIM_INDEX_TRI = 0x0d,
+ VGT_GRP_PRIM_INDEX_QUAD = 0x0e,
+ VGT_GRP_3D_LINE_ADJ = 0x0f,
+ VGT_GRP_3D_TRI_ADJ = 0x10,
+ VGT_GRP_3D_PATCH = 0x11,
+ RETAIN_ORDER_bit = 1 << 14,
+ RETAIN_QUADS_bit = 1 << 15,
+ PRIM_ORDER_mask = 0x07 << 16,
+ PRIM_ORDER_shift = 16,
+ VGT_GRP_LIST = 0x00,
+ VGT_GRP_STRIP = 0x01,
+ VGT_GRP_FAN = 0x02,
+ VGT_GRP_LOOP = 0x03,
+ VGT_GRP_POLYGON = 0x04,
+ VGT_GROUP_FIRST_DECR = 0x00028a28,
+ FIRST_DECR_mask = 0x0f << 0,
+ FIRST_DECR_shift = 0,
+ VGT_GROUP_DECR = 0x00028a2c,
+ DECR_mask = 0x0f << 0,
+ DECR_shift = 0,
+ VGT_GROUP_VECT_0_CNTL = 0x00028a30,
+ COMP_X_EN_bit = 1 << 0,
+ COMP_Y_EN_bit = 1 << 1,
+ COMP_Z_EN_bit = 1 << 2,
+ COMP_W_EN_bit = 1 << 3,
+ VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8,
+ VGT_GROUP_VECT_0_CNTL__STRIDE_shift = 8,
+ SHIFT_mask = 0xff << 16,
+ SHIFT_shift = 16,
+ VGT_GROUP_VECT_1_CNTL = 0x00028a34,
+/* COMP_X_EN_bit = 1 << 0, */
+/* COMP_Y_EN_bit = 1 << 1, */
+/* COMP_Z_EN_bit = 1 << 2, */
+/* COMP_W_EN_bit = 1 << 3, */
+ VGT_GROUP_VECT_1_CNTL__STRIDE_mask = 0xff << 8,
+ VGT_GROUP_VECT_1_CNTL__STRIDE_shift = 8,
+/* SHIFT_mask = 0xff << 16, */
+/* SHIFT_shift = 16, */
+ VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38,
+ X_CONV_mask = 0x0f << 0,
+ X_CONV_shift = 0,
+ VGT_GRP_INDEX_16 = 0x00,
+ VGT_GRP_INDEX_32 = 0x01,
+ VGT_GRP_UINT_16 = 0x02,
+ VGT_GRP_UINT_32 = 0x03,
+ VGT_GRP_SINT_16 = 0x04,
+ VGT_GRP_SINT_32 = 0x05,
+ VGT_GRP_FLOAT_32 = 0x06,
+ VGT_GRP_AUTO_PRIM = 0x07,
+ VGT_GRP_FIX_1_23_TO_FLOAT = 0x08,
+ X_OFFSET_mask = 0x0f << 4,
+ X_OFFSET_shift = 4,
+ Y_CONV_mask = 0x0f << 8,
+ Y_CONV_shift = 8,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ Y_OFFSET_mask = 0x0f << 12,
+ Y_OFFSET_shift = 12,
+ Z_CONV_mask = 0x0f << 16,
+ Z_CONV_shift = 16,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ Z_OFFSET_mask = 0x0f << 20,
+ Z_OFFSET_shift = 20,
+ W_CONV_mask = 0x0f << 24,
+ W_CONV_shift = 24,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ W_OFFSET_mask = 0x0f << 28,
+ W_OFFSET_shift = 28,
+ VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c,
+/* X_CONV_mask = 0x0f << 0, */
+/* X_CONV_shift = 0, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* X_OFFSET_mask = 0x0f << 4, */
+/* X_OFFSET_shift = 4, */
+/* Y_CONV_mask = 0x0f << 8, */
+/* Y_CONV_shift = 8, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* Y_OFFSET_mask = 0x0f << 12, */
+/* Y_OFFSET_shift = 12, */
+/* Z_CONV_mask = 0x0f << 16, */
+/* Z_CONV_shift = 16, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* Z_OFFSET_mask = 0x0f << 20, */
+/* Z_OFFSET_shift = 20, */
+/* W_CONV_mask = 0x0f << 24, */
+/* W_CONV_shift = 24, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* W_OFFSET_mask = 0x0f << 28, */
+/* W_OFFSET_shift = 28, */
+ VGT_GS_MODE = 0x00028a40,
+ VGT_GS_MODE__MODE_mask = 0x03 << 0,
+ VGT_GS_MODE__MODE_shift = 0,
+ GS_OFF = 0x00,
+ GS_SCENARIO_A = 0x01,
+ GS_SCENARIO_B = 0x02,
+ GS_SCENARIO_G = 0x03,
+ GS_SCENARIO_C = 0x04,
+ SPRITE_EN = 0x05,
+ ES_PASSTHRU_bit = 1 << 2,
+ CUT_MODE_mask = 0x03 << 3,
+ CUT_MODE_shift = 3,
+ GS_CUT_1024 = 0x00,
+ GS_CUT_512 = 0x01,
+ GS_CUT_256 = 0x02,
+ GS_CUT_128 = 0x03,
+ MODE_HI_bit = 1 << 8,
+ PA_SC_MODE_CNTL_0 = 0x00028a48,
+ MSAA_ENABLE_bit = 1 << 0,
+ VPORT_SCISSOR_ENABLE_bit = 1 << 1,
+ LINE_STIPPLE_ENABLE_bit = 1 << 2,
+ VGT_ENHANCE = 0x00028a50,
+ VGT_GS_PER_ES = 0x00028a54,
+ GS_PER_ES_mask = 0x7ff << 0,
+ GS_PER_ES_shift = 0,
+ VGT_ES_PER_GS = 0x00028a58,
+ ES_PER_GS_mask = 0x7ff << 0,
+ ES_PER_GS_shift = 0,
+ VGT_GS_PER_VS = 0x00028a5c,
+ GS_PER_VS_mask = 0x0f << 0,
+ GS_PER_VS_shift = 0,
+ VGT_GS_OUT_PRIM_TYPE = 0x00028a6c,
+ OUTPRIM_TYPE_mask = 0x3f << 0,
+ OUTPRIM_TYPE_shift = 0,
+ POINTLIST = 0x00,
+ LINESTRIP = 0x01,
+ TRISTRIP = 0x02,
+ VGT_DMA_SIZE = 0x00028a74,
+ VGT_DMA_MAX_SIZE = 0x00028a78,
+ VGT_DMA_INDEX_TYPE = 0x00028a7c,
+/* INDEX_TYPE_mask = 0x03 << 0, */
+/* INDEX_TYPE_shift = 0, */
+ VGT_INDEX_16 = 0x00,
+ VGT_INDEX_32 = 0x01,
+ SWAP_MODE_mask = 0x03 << 2,
+ SWAP_MODE_shift = 2,
+ VGT_DMA_SWAP_NONE = 0x00,
+ VGT_DMA_SWAP_16_BIT = 0x01,
+ VGT_DMA_SWAP_32_BIT = 0x02,
+ VGT_DMA_SWAP_WORD = 0x03,
+ VGT_PRIMITIVEID_EN = 0x00028a84,
+ PRIMITIVEID_EN_bit = 1 << 0,
+ VGT_DMA_NUM_INSTANCES = 0x00028a88,
+ VGT_EVENT_INITIATOR = 0x00028a90,
+ EVENT_TYPE_mask = 0x3f << 0,
+ EVENT_TYPE_shift = 0,
+ SAMPLE_STREAMOUTSTATS1 = 0x01,
+ SAMPLE_STREAMOUTSTATS2 = 0x02,
+ SAMPLE_STREAMOUTSTATS3 = 0x03,
+ CACHE_FLUSH_TS = 0x04,
+ CONTEXT_DONE = 0x05,
+ CACHE_FLUSH = 0x06,
+ CS_PARTIAL_FLUSH = 0x07,
+ RST_PIX_CNT = 0x0d,
+ VS_PARTIAL_FLUSH = 0x0f,
+ PS_PARTIAL_FLUSH = 0x10,
+ FLUSH_HS_OUTPUT = 0x11,
+ FLUSH_LS_OUTPUT = 0x12,
+ CACHE_FLUSH_AND_INV_TS_EVENT = 0x14,
+ ZPASS_DONE = 0x15,
+ CACHE_FLUSH_AND_INV_EVENT = 0x16,
+ PERFCOUNTER_START = 0x17,
+ PERFCOUNTER_STOP = 0x18,
+ PIPELINESTAT_START = 0x19,
+ PIPELINESTAT_STOP = 0x1a,
+ PERFCOUNTER_SAMPLE = 0x1b,
+ FLUSH_ES_OUTPUT = 0x1c,
+ FLUSH_GS_OUTPUT = 0x1d,
+ SAMPLE_PIPELINESTAT = 0x1e,
+ SO_VGTSTREAMOUT_FLUSH = 0x1f,
+ SAMPLE_STREAMOUTSTATS = 0x20,
+ RESET_VTX_CNT = 0x21,
+ BLOCK_CONTEXT_DONE = 0x22,
+ CS_CONTEXT_DONE = 0x23,
+ VGT_FLUSH = 0x24,
+ SQ_NON_EVENT = 0x26,
+ SC_SEND_DB_VPZ = 0x27,
+ BOTTOM_OF_PIPE_TS = 0x28,
+ FLUSH_SX_TS = 0x29,
+ DB_CACHE_FLUSH_AND_INV = 0x2a,
+ FLUSH_AND_INV_DB_DATA_TS = 0x2b,
+ FLUSH_AND_INV_DB_META = 0x2c,
+ FLUSH_AND_INV_CB_DATA_TS = 0x2d,
+ FLUSH_AND_INV_CB_META = 0x2e,
+ CS_DONE = 0x2f,
+ PS_DONE = 0x30,
+ FLUSH_AND_INV_CB_PIXEL_DATA = 0x31,
+ ADDRESS_HI_mask = 0xff << 19,
+ ADDRESS_HI_shift = 19,
+ EXTENDED_EVENT_bit = 1 << 27,
+ VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94,
+ RESET_EN_bit = 1 << 0,
+ VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0,
+ VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4,
+ VGT_REUSE_OFF = 0x00028ab4,
+ REUSE_OFF_bit = 1 << 0,
+ VGT_VTX_CNT_EN = 0x00028ab8,
+ VTX_CNT_EN_bit = 1 << 0,
+ DB_HTILE_SURFACE = 0x00028abc,
+ HTILE_WIDTH_bit = 1 << 0,
+ HTILE_HEIGHT_bit = 1 << 1,
+ LINEAR_bit = 1 << 2,
+ FULL_CACHE_bit = 1 << 3,
+ HTILE_USES_PRELOAD_WIN_bit = 1 << 4,
+ PRELOAD_bit = 1 << 5,
+ PREFETCH_WIDTH_mask = 0x3f << 6,
+ PREFETCH_WIDTH_shift = 6,
+ PREFETCH_HEIGHT_mask = 0x3f << 12,
+ PREFETCH_HEIGHT_shift = 12,
+ DB_SRESULTS_COMPARE_STATE0 = 0x00028ac0,
+ COMPAREFUNC0_mask = 0x07 << 0,
+ COMPAREFUNC0_shift = 0,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ COMPAREVALUE0_mask = 0xff << 4,
+ COMPAREVALUE0_shift = 4,
+ COMPAREMASK0_mask = 0xff << 12,
+ COMPAREMASK0_shift = 12,
+ ENABLE0_bit = 1 << 24,
+ DB_SRESULTS_COMPARE_STATE1 = 0x00028ac4,
+ COMPAREFUNC1_mask = 0x07 << 0,
+ COMPAREFUNC1_shift = 0,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ COMPAREVALUE1_mask = 0xff << 4,
+ COMPAREVALUE1_shift = 4,
+ COMPAREMASK1_mask = 0xff << 12,
+ COMPAREMASK1_shift = 12,
+ ENABLE1_bit = 1 << 24,
+ DB_PRELOAD_CONTROL = 0x00028ac8,
+ START_X_mask = 0xff << 0,
+ START_X_shift = 0,
+ START_Y_mask = 0xff << 8,
+ START_Y_shift = 8,
+ MAX_X_mask = 0xff << 16,
+ MAX_X_shift = 16,
+ MAX_Y_mask = 0xff << 24,
+ MAX_Y_shift = 24,
+ VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0,
+ VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4,
+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8,
+ VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc,
+ VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0,
+ VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4,
+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8,
+ VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec,
+ VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0,
+ VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4,
+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8,
+ VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc,
+ VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00,
+ VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04,
+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08,
+ VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c,
+ VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10,
+ VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14,
+ VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18,
+ VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c,
+ VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28,
+ VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c,
+ VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30,
+ VERTEX_STRIDE_mask = 0x1ff << 0,
+ VERTEX_STRIDE_shift = 0,
+ VGT_GS_MAX_VERT_OUT = 0x00028b38,
+ MAX_VERT_OUT_mask = 0x7ff << 0,
+ MAX_VERT_OUT_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44,
+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48,
+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c,
+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50,
+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift = 0,
+ VGT_SHADER_STAGES_EN = 0x00028b54,
+ LS_EN_mask = 0x03 << 0,
+ LS_EN_shift = 0,
+ LS_STAGE_OFF = 0x00,
+ LS_STAGE_ON = 0x01,
+ CS_STAGE_ON = 0x02,
+ HS_EN_bit = 1 << 2,
+ ES_EN_mask = 0x03 << 3,
+ ES_EN_shift = 3,
+ ES_STAGE_OFF = 0x00,
+ ES_STAGE_DS = 0x01,
+ ES_STAGE_REAL = 0x02,
+ GS_EN_bit = 1 << 5,
+ VS_EN_mask = 0x03 << 6,
+ VS_EN_shift = 6,
+ VS_STAGE_REAL = 0x00,
+ VS_STAGE_DS = 0x01,
+ VS_STAGE_COPY_SHADER = 0x02,
+ VGT_LS_HS_CONFIG = 0x00028b58,
+ NUM_PATCHES_mask = 0xff << 0,
+ NUM_PATCHES_shift = 0,
+ HS_NUM_INPUT_CP_mask = 0x3f << 8,
+ HS_NUM_INPUT_CP_shift = 8,
+ HS_NUM_OUTPUT_CP_mask = 0x3f << 14,
+ HS_NUM_OUTPUT_CP_shift = 14,
+ VGT_LS_SIZE = 0x00028b5c,
+ VGT_LS_SIZE__SIZE_mask = 0xff << 0,
+ VGT_LS_SIZE__SIZE_shift = 0,
+ PATCH_CP_SIZE_mask = 0x1fff << 8,
+ PATCH_CP_SIZE_shift = 8,
+ VGT_HS_SIZE = 0x00028b60,
+ VGT_HS_SIZE__SIZE_mask = 0xff << 0,
+ VGT_HS_SIZE__SIZE_shift = 0,
+/* PATCH_CP_SIZE_mask = 0x1fff << 8, */
+/* PATCH_CP_SIZE_shift = 8, */
+ VGT_LS_HS_ALLOC = 0x00028b64,
+ HS_TOTAL_OUTPUT_mask = 0x1fff << 0,
+ HS_TOTAL_OUTPUT_shift = 0,
+ LS_HS_TOTAL_OUTPUT_mask = 0x1fff << 13,
+ LS_HS_TOTAL_OUTPUT_shift = 13,
+ VGT_HS_PATCH_CONST = 0x00028b68,
+ VGT_HS_PATCH_CONST__SIZE_mask = 0x1fff << 0,
+ VGT_HS_PATCH_CONST__SIZE_shift = 0,
+ VGT_HS_PATCH_CONST__STRIDE_mask = 0x1fff << 13,
+ VGT_HS_PATCH_CONST__STRIDE_shift = 13,
+ DB_ALPHA_TO_MASK = 0x00028b70,
+ ALPHA_TO_MASK_ENABLE_bit = 1 << 0,
+ ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET0_shift = 8,
+ ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 10,
+ ALPHA_TO_MASK_OFFSET1_shift = 10,
+ ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 12,
+ ALPHA_TO_MASK_OFFSET2_shift = 12,
+ ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 14,
+ ALPHA_TO_MASK_OFFSET3_shift = 14,
+ OFFSET_ROUND_bit = 1 << 16,
+ PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028b78,
+ POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0,
+ POLY_OFFSET_NEG_NUM_DB_BITS_shift = 0,
+ POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8,
+ PA_SU_POLY_OFFSET_CLAMP = 0x00028b7c,
+ PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028b80,
+ PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028b84,
+ PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028b88,
+ PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028b8c,
+ VGT_GS_INSTANCE_CNT = 0x00028b90,
+ VGT_GS_INSTANCE_CNT__ENABLE_bit = 1 << 0,
+ CNT_mask = 0x7f << 2,
+ CNT_shift = 2,
+ VGT_STRMOUT_CONFIG = 0x00028b94,
+ STREAMOUT_0_EN_bit = 1 << 0,
+ STREAMOUT_1_EN_bit = 1 << 1,
+ STREAMOUT_2_EN_bit = 1 << 2,
+ STREAMOUT_3_EN_bit = 1 << 3,
+ RAST_STREAM_mask = 0x07 << 4,
+ RAST_STREAM_shift = 4,
+ VGT_STRMOUT_BUFFER_CONFIG = 0x00028b98,
+ STREAM_0_BUFFER_EN_mask = 0x0f << 0,
+ STREAM_0_BUFFER_EN_shift = 0,
+ STREAM_1_BUFFER_EN_mask = 0x0f << 4,
+ STREAM_1_BUFFER_EN_shift = 4,
+ STREAM_2_BUFFER_EN_mask = 0x0f << 8,
+ STREAM_2_BUFFER_EN_shift = 8,
+ STREAM_3_BUFFER_EN_mask = 0x0f << 12,
+ STREAM_3_BUFFER_EN_shift = 12,
+ CB_IMMED0_BASE = 0x00028b9c,
+ CB_IMMED0_BASE_num = 12,
+ PA_SC_LINE_CNTL = 0x00028c00,
+ EXPAND_LINE_WIDTH_bit = 1 << 9,
+ LAST_PIXEL_bit = 1 << 10,
+ PERPENDICULAR_ENDCAP_ENA_bit = 1 << 11,
+ DX10_DIAMOND_TEST_ENA_bit = 1 << 12,
+ PA_SC_AA_CONFIG = 0x00028c04,
+ MSAA_NUM_SAMPLES_mask = 0x03 << 0,
+ MSAA_NUM_SAMPLES_shift = 0,
+ AA_MASK_CENTROID_DTMN_bit = 1 << 4,
+ MAX_SAMPLE_DIST_mask = 0x0f << 13,
+ MAX_SAMPLE_DIST_shift = 13,
+ PA_SU_VTX_CNTL = 0x00028c08,
+ PIX_CENTER_bit = 1 << 0,
+ PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1,
+ PA_SU_VTX_CNTL__ROUND_MODE_shift = 1,
+ X_TRUNCATE = 0x00,
+ X_ROUND = 0x01,
+ X_ROUND_TO_EVEN = 0x02,
+ X_ROUND_TO_ODD = 0x03,
+ QUANT_MODE_mask = 0x07 << 3,
+ QUANT_MODE_shift = 3,
+ X_1_16TH = 0x00,
+ X_1_8TH = 0x01,
+ X_1_4TH = 0x02,
+ X_1_2 = 0x03,
+ X_1 = 0x04,
+ X_1_256TH = 0x05,
+ X_1_1024TH = 0x06,
+ X_1_4096TH = 0x07,
+ PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c,
+ PA_CL_GB_VERT_DISC_ADJ = 0x00028c10,
+ PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14,
+ PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18,
+ PA_SC_AA_SAMPLE_LOCS_0 = 0x00028c1c,
+ S0_X_mask = 0x0f << 0,
+ S0_X_shift = 0,
+ S0_Y_mask = 0x0f << 4,
+ S0_Y_shift = 4,
+ S1_X_mask = 0x0f << 8,
+ S1_X_shift = 8,
+ S1_Y_mask = 0x0f << 12,
+ S1_Y_shift = 12,
+ S2_X_mask = 0x0f << 16,
+ S2_X_shift = 16,
+ S2_Y_mask = 0x0f << 20,
+ S2_Y_shift = 20,
+ S3_X_mask = 0x0f << 24,
+ S3_X_shift = 24,
+ S3_Y_mask = 0x0f << 28,
+ S3_Y_shift = 28,
+ PA_SC_AA_SAMPLE_LOCS_1 = 0x00028c20,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_2 = 0x00028c24,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_3 = 0x00028c28,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_4 = 0x00028c2c,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_5 = 0x00028c30,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_6 = 0x00028c34,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_7 = 0x00028c38,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_MASK = 0x00028c3c,
+ VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58,
+ VTX_REUSE_DEPTH_mask = 0xff << 0,
+ VTX_REUSE_DEPTH_shift = 0,
+ VGT_OUT_DEALLOC_CNTL = 0x00028c5c,
+ DEALLOC_DIST_mask = 0x7f << 0,
+ DEALLOC_DIST_shift = 0,
+ CB_COLOR0_BASE = 0x00028c60,
+ CB_COLOR0_BASE_num = 12,
+ CB_COLOR0_BASE_offset = 51,
+ CB_COLOR0_PITCH = 0x00028c64,
+ CB_COLOR0_PITCH_num = 12,
+ CB_COLOR0_PITCH_offset = 51,
+ CB_COLOR0_PITCH__TILE_MAX_mask = 0x7ff << 0,
+ CB_COLOR0_PITCH__TILE_MAX_shift = 0,
+ CB_COLOR0_SLICE = 0x00028c68,
+ CB_COLOR0_SLICE_num = 12,
+ CB_COLOR0_SLICE_offset = 51,
+ CB_COLOR0_SLICE__TILE_MAX_mask = 0x3fffff << 0,
+ CB_COLOR0_SLICE__TILE_MAX_shift = 0,
+ CB_COLOR0_VIEW = 0x00028c6c,
+ CB_COLOR0_VIEW_num = 12,
+ CB_COLOR0_VIEW_offset = 51,
+/* SLICE_START_mask = 0x7ff << 0, */
+/* SLICE_START_shift = 0, */
+/* SLICE_MAX_mask = 0x7ff << 13, */
+/* SLICE_MAX_shift = 13, */
+ CB_COLOR0_INFO = 0x00028c70,
+ CB_COLOR0_INFO_num = 12,
+ CB_COLOR0_INFO_offset = 51,
+ ENDIAN_mask = 0x03 << 0,
+ ENDIAN_shift = 0,
+ ENDIAN_NONE = 0x00,
+ ENDIAN_8IN16 = 0x01,
+ ENDIAN_8IN32 = 0x02,
+ ENDIAN_8IN64 = 0x03,
+ CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2,
+ CB_COLOR0_INFO__FORMAT_shift = 2,
+ COLOR_INVALID = 0x00,
+ COLOR_8 = 0x01,
+ COLOR_16 = 0x05,
+ COLOR_16_FLOAT = 0x06,
+ COLOR_8_8 = 0x07,
+ COLOR_5_6_5 = 0x08,
+ COLOR_1_5_5_5 = 0x0a,
+ COLOR_4_4_4_4 = 0x0b,
+ COLOR_5_5_5_1 = 0x0c,
+ COLOR_32 = 0x0d,
+ COLOR_32_FLOAT = 0x0e,
+ COLOR_16_16 = 0x0f,
+ COLOR_16_16_FLOAT = 0x10,
+ COLOR_8_24 = 0x11,
+ COLOR_24_8 = 0x13,
+ COLOR_10_11_11 = 0x15,
+ COLOR_10_11_11_FLOAT = 0x16,
+ COLOR_2_10_10_10 = 0x19,
+ COLOR_8_8_8_8 = 0x1a,
+ COLOR_10_10_10_2 = 0x1b,
+ COLOR_X24_8_32_FLOAT = 0x1c,
+ COLOR_32_32 = 0x1d,
+ COLOR_32_32_FLOAT = 0x1e,
+ COLOR_16_16_16_16 = 0x1f,
+ COLOR_16_16_16_16_FLOAT = 0x20,
+ COLOR_32_32_32_32 = 0x22,
+ COLOR_32_32_32_32_FLOAT = 0x23,
+ CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8,
+ CB_COLOR0_INFO__ARRAY_MODE_shift = 8,
+/* ARRAY_LINEAR_GENERAL = 0x00, */
+/* ARRAY_LINEAR_ALIGNED = 0x01, */
+/* ARRAY_1D_TILED_THIN1 = 0x02, */
+/* ARRAY_2D_TILED_THIN1 = 0x04, */
+ NUMBER_TYPE_mask = 0x07 << 12,
+ NUMBER_TYPE_shift = 12,
+ NUMBER_UNORM = 0x00,
+ NUMBER_SNORM = 0x01,
+ NUMBER_UINT = 0x04,
+ NUMBER_SINT = 0x05,
+ NUMBER_SRGB = 0x06,
+ NUMBER_FLOAT = 0x07,
+ COMP_SWAP_mask = 0x03 << 15,
+ COMP_SWAP_shift = 15,
+ SWAP_STD = 0x00,
+ SWAP_ALT = 0x01,
+ SWAP_STD_REV = 0x02,
+ SWAP_ALT_REV = 0x03,
+ FAST_CLEAR_bit = 1 << 17,
+ COMPRESSION_bit = 1 << 18,
+ BLEND_CLAMP_bit = 1 << 19,
+ BLEND_BYPASS_bit = 1 << 20,
+ SIMPLE_FLOAT_bit = 1 << 21,
+ CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 22,
+ CB_COLOR0_INFO__TILE_COMPACT_bit = 1 << 23,
+ SOURCE_FORMAT_mask = 0x03 << 24,
+ SOURCE_FORMAT_shift = 24,
+ EXPORT_4C_32BPC = 0x00,
+ EXPORT_4C_16BPC = 0x01,
+ RAT_bit = 1 << 26,
+ RESOURCE_TYPE_mask = 0x07 << 27,
+ RESOURCE_TYPE_shift = 27,
+ BUFFER = 0x00,
+ TEXTURE1D = 0x01,
+ TEXTURE1DARRAY = 0x02,
+ TEXTURE2D = 0x03,
+ TEXTURE2DARRAY = 0x04,
+ TEXTURE3D = 0x05,
+ CB_COLOR0_ATTRIB = 0x00028c74,
+ CB_COLOR0_ATTRIB_num = 12,
+ CB_COLOR0_ATTRIB_offset = 51,
+ IGNORE_SHADER_ENGINE_TILING_bit = 1 << 3,
+ CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit = 1 << 4,
+ CB_COLOR0_ATTRIB__TILE_SPLIT_mask = 0x0f << 5,
+ CB_COLOR0_ATTRIB__TILE_SPLIT_shift = 5,
+/* ADDR_SURF_TILE_SPLIT_64B = 0x00, */
+/* ADDR_SURF_TILE_SPLIT_128B = 0x01, */
+/* ADDR_SURF_TILE_SPLIT_256B = 0x02, */
+/* ADDR_SURF_TILE_SPLIT_512B = 0x03, */
+/* ADDR_SURF_TILE_SPLIT_1KB = 0x04, */
+/* ADDR_SURF_TILE_SPLIT_2KB = 0x05, */
+/* ADDR_SURF_TILE_SPLIT_4KB = 0x06, */
+ CB_COLOR0_ATTRIB__NUM_BANKS_mask = 0x03 << 10,
+ CB_COLOR0_ATTRIB__NUM_BANKS_shift = 10,
+/* ADDR_SURF_2_BANK = 0x00, */
+/* ADDR_SURF_4_BANK = 0x01, */
+/* ADDR_SURF_8_BANK = 0x02, */
+/* ADDR_SURF_16_BANK = 0x03, */
+ CB_COLOR0_ATTRIB__BANK_WIDTH_mask = 0x03 << 13,
+ CB_COLOR0_ATTRIB__BANK_WIDTH_shift = 13,
+/* ADDR_SURF_BANK_WIDTH_1 = 0x00, */
+/* ADDR_SURF_BANK_WIDTH_2 = 0x01, */
+/* ADDR_SURF_BANK_WIDTH_4 = 0x02, */
+/* ADDR_SURF_BANK_WIDTH_8 = 0x03, */
+ CB_COLOR0_ATTRIB__BANK_HEIGHT_mask = 0x03 << 16,
+ CB_COLOR0_ATTRIB__BANK_HEIGHT_shift = 16,
+/* ADDR_SURF_BANK_HEIGHT_1 = 0x00, */
+/* ADDR_SURF_BANK_HEIGHT_2 = 0x01, */
+/* ADDR_SURF_BANK_HEIGHT_4 = 0x02, */
+/* ADDR_SURF_BANK_HEIGHT_8 = 0x03, */
+ CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_mask = 0x03 << 19,
+ CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift = 19,
+/* ADDR_SURF_MACRO_ASPECT_1 = 0x00, */
+/* ADDR_SURF_MACRO_ASPECT_2 = 0x01, */
+/* ADDR_SURF_MACRO_ASPECT_4 = 0x02, */
+/* ADDR_SURF_MACRO_ASPECT_8 = 0x03, */
+ FMASK_BANK_HEIGHT_mask = 0x03 << 22,
+ FMASK_BANK_HEIGHT_shift = 22,
+/* ADDR_SURF_BANK_HEIGHT_1 = 0x00, */
+/* ADDR_SURF_BANK_HEIGHT_2 = 0x01, */
+/* ADDR_SURF_BANK_HEIGHT_4 = 0x02, */
+/* ADDR_SURF_BANK_HEIGHT_8 = 0x03, */
+ CB_COLOR0_DIM = 0x00028c78,
+ CB_COLOR0_DIM_num = 12,
+ CB_COLOR0_DIM_offset = 51,
+ WIDTH_MAX_mask = 0xffff << 0,
+ WIDTH_MAX_shift = 0,
+ HEIGHT_MAX_mask = 0xffff << 16,
+ HEIGHT_MAX_shift = 16,
+ CB_COLOR0_CMASK = 0x00028c7c,
+ CB_COLOR0_CMASK_num = 8,
+ CB_COLOR0_CMASK_offset = 60,
+ CB_COLOR0_CMASK_SLICE = 0x00028c80,
+ CB_COLOR0_CMASK_SLICE_num = 8,
+ CB_COLOR0_CMASK_SLICE_offset = 60,
+ CB_COLOR0_CMASK_SLICE__TILE_MAX_mask = 0x3fff << 0,
+ CB_COLOR0_CMASK_SLICE__TILE_MAX_shift = 0,
+ CB_COLOR0_FMASK = 0x00028c84,
+ CB_COLOR0_FMASK_num = 8,
+ CB_COLOR0_FMASK_offset = 60,
+ CB_COLOR0_FMASK_SLICE = 0x00028c88,
+ CB_COLOR0_FMASK_SLICE_num = 8,
+ CB_COLOR0_FMASK_SLICE_offset = 60,
+ CB_COLOR0_FMASK_SLICE__TILE_MAX_mask = 0x3fffff << 0,
+ CB_COLOR0_FMASK_SLICE__TILE_MAX_shift = 0,
+ CB_COLOR0_CLEAR_WORD0 = 0x00028c8c,
+ CB_COLOR0_CLEAR_WORD0_num = 8,
+ CB_COLOR0_CLEAR_WORD0_offset = 60,
+ CB_COLOR0_CLEAR_WORD1 = 0x00028c90,
+ CB_COLOR0_CLEAR_WORD1_num = 8,
+ CB_COLOR0_CLEAR_WORD1_offset = 60,
+ CB_COLOR0_CLEAR_WORD2 = 0x00028c94,
+ CB_COLOR0_CLEAR_WORD2_num = 8,
+ CB_COLOR0_CLEAR_WORD2_offset = 60,
+ CB_COLOR0_CLEAR_WORD3 = 0x00028c98,
+ CB_COLOR0_CLEAR_WORD3_num = 8,
+ CB_COLOR0_CLEAR_WORD3_offset = 60,
+ SQ_ALU_CONST_CACHE_HS_0 = 0x00028f00,
+ SQ_ALU_CONST_CACHE_HS_0_num = 16,
+ SQ_ALU_CONST_CACHE_LS_0 = 0x00028f40,
+ SQ_ALU_CONST_CACHE_LS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_HS_0 = 0x00028f80,
+ SQ_ALU_CONST_BUFFER_SIZE_HS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_HS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_HS_0__DATA_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_LS_0 = 0x00028fc0,
+ SQ_ALU_CONST_BUFFER_SIZE_LS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_LS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_LS_0__DATA_shift = 0,
+ SQ_VTX_CONSTANT_WORD0_0 = 0x00030000,
+ SQ_TEX_RESOURCE_WORD0_0 = 0x00030000,
+ DIM_mask = 0x07 << 0,
+ DIM_shift = 0,
+ SQ_TEX_DIM_1D = 0x00,
+ SQ_TEX_DIM_2D = 0x01,
+ SQ_TEX_DIM_3D = 0x02,
+ SQ_TEX_DIM_CUBEMAP = 0x03,
+ SQ_TEX_DIM_1D_ARRAY = 0x04,
+ SQ_TEX_DIM_2D_ARRAY = 0x05,
+ SQ_TEX_DIM_2D_MSAA = 0x06,
+ SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07,
+/* IGNORE_SHADER_ENGINE_TILING_bit = 1 << 3, */
+ SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit= 1 << 5,
+ PITCH_mask = 0xfff << 6,
+ PITCH_shift = 6,
+ TEX_WIDTH_mask = 0x3fff << 18,
+ TEX_WIDTH_shift = 18,
+ SQ_VTX_CONSTANT_WORD1_0 = 0x00030004,
+ SQ_TEX_RESOURCE_WORD1_0 = 0x00030004,
+ TEX_HEIGHT_mask = 0x3fff << 0,
+ TEX_HEIGHT_shift = 0,
+ TEX_DEPTH_mask = 0x1fff << 14,
+ TEX_DEPTH_shift = 14,
+ SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask = 0x0f << 28,
+ SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift = 28,
+ SQ_VTX_CONSTANT_WORD2_0 = 0x00030008,
+ BASE_ADDRESS_HI_mask = 0xff << 0,
+ BASE_ADDRESS_HI_shift = 0,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask = 0x7ff << 8,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift = 8,
+ SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit = 1 << 19,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift = 20,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask = 0x03 << 26,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift = 26,
+/* SQ_NUM_FORMAT_NORM = 0x00, */
+/* SQ_NUM_FORMAT_INT = 0x01, */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */
+ SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit = 1 << 28,
+ SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit = 1 << 29,
+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask = 0x03 << 30,
+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift = 30,
+/* SQ_ENDIAN_NONE = 0x00, */
+/* SQ_ENDIAN_8IN16 = 0x01, */
+/* SQ_ENDIAN_8IN32 = 0x02, */
+ SQ_TEX_RESOURCE_WORD2_0 = 0x00030008,
+ SQ_VTX_CONSTANT_WORD3_0 = 0x0003000c,
+ SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit = 1 << 2,
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_mask = 0x07 << 3,
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift = 3,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_mask = 0x07 << 6,
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift = 6,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_mask = 0x07 << 9,
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_mask = 0x07 << 12,
+ SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD3_0 = 0x0003000c,
+ SQ_TEX_RESOURCE_WORD4_0 = 0x00030010,
+ FORMAT_COMP_X_mask = 0x03 << 0,
+ FORMAT_COMP_X_shift = 0,
+ SQ_FORMAT_COMP_UNSIGNED = 0x00,
+ SQ_FORMAT_COMP_SIGNED = 0x01,
+ SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02,
+ FORMAT_COMP_Y_mask = 0x03 << 2,
+ FORMAT_COMP_Y_shift = 2,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ FORMAT_COMP_Z_mask = 0x03 << 4,
+ FORMAT_COMP_Z_shift = 4,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ FORMAT_COMP_W_mask = 0x03 << 6,
+ FORMAT_COMP_W_shift = 6,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask = 0x03 << 8,
+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift = 8,
+/* SQ_NUM_FORMAT_NORM = 0x00, */
+/* SQ_NUM_FORMAT_INT = 0x01, */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */
+ SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit = 1 << 10,
+ SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit = 1 << 11,
+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask = 0x03 << 12,
+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift = 12,
+/* SQ_ENDIAN_NONE = 0x00, */
+/* SQ_ENDIAN_8IN16 = 0x01, */
+/* SQ_ENDIAN_8IN32 = 0x02, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask = 0x07 << 16,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift = 16,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask = 0x07 << 19,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift = 19,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask = 0x07 << 22,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift = 22,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask = 0x07 << 25,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift = 25,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ BASE_LEVEL_mask = 0x0f << 28,
+ BASE_LEVEL_shift = 28,
+ SQ_VTX_CONSTANT_WORD4_0 = 0x00030010,
+ SQ_TEX_RESOURCE_WORD5_0 = 0x00030014,
+ LAST_LEVEL_mask = 0x0f << 0,
+ LAST_LEVEL_shift = 0,
+ BASE_ARRAY_mask = 0x1fff << 4,
+ BASE_ARRAY_shift = 4,
+ LAST_ARRAY_mask = 0x1fff << 17,
+ LAST_ARRAY_shift = 17,
+ SQ_TEX_RESOURCE_WORD6_0 = 0x00030018,
+ PERF_MODULATION_mask = 0x07 << 3,
+ PERF_MODULATION_shift = 3,
+ INTERLACED_bit = 1 << 6,
+ SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_mask = 0xfff << 8,
+ SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift = 8,
+ SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_mask = 0x07 << 29,
+ SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift = 29,
+ SQ_ADDR_SURF_TILE_SPLIT_64B = 0x00,
+ SQ_ADDR_SURF_TILE_SPLIT_128B = 0x01,
+ SQ_ADDR_SURF_TILE_SPLIT_256B = 0x02,
+ SQ_ADDR_SURF_TILE_SPLIT_512B = 0x03,
+ SQ_ADDR_SURF_TILE_SPLIT_1KB = 0x04,
+ SQ_ADDR_SURF_TILE_SPLIT_2KB = 0x05,
+ SQ_ADDR_SURF_TILE_SPLIT_4KB = 0x06,
+ SQ_VTX_CONSTANT_WORD7_0 = 0x0003001c,
+ SQ_VTX_CONSTANT_WORD7_0__TYPE_mask = 0x03 << 30,
+ SQ_VTX_CONSTANT_WORD7_0__TYPE_shift = 30,
+ SQ_TEX_VTX_INVALID_TEXTURE = 0x00,
+ SQ_TEX_VTX_INVALID_BUFFER = 0x01,
+ SQ_TEX_VTX_VALID_TEXTURE = 0x02,
+ SQ_TEX_VTX_VALID_BUFFER = 0x03,
+ SQ_TEX_RESOURCE_WORD7_0 = 0x0003001c,
+ SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask = 0x3f << 0,
+ SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift = 0,
+ SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_mask = 0x03 << 6,
+ SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift = 6,
+ SQ_ADDR_SURF_MACRO_ASPECT_1 = 0x00,
+ SQ_ADDR_SURF_MACRO_ASPECT_2 = 0x01,
+ SQ_ADDR_SURF_MACRO_ASPECT_4 = 0x02,
+ SQ_ADDR_SURF_MACRO_ASPECT_8 = 0x03,
+ SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_mask = 0x03 << 8,
+ SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift = 8,
+ SQ_ADDR_SURF_BANK_WH_1 = 0x00,
+ SQ_ADDR_SURF_BANK_WH_2 = 0x01,
+ SQ_ADDR_SURF_BANK_WH_4 = 0x02,
+ SQ_ADDR_SURF_BANK_WH_8 = 0x03,
+ SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_mask = 0x03 << 10,
+ SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift = 10,
+/* SQ_ADDR_SURF_BANK_WH_1 = 0x00, */
+/* SQ_ADDR_SURF_BANK_WH_2 = 0x01, */
+/* SQ_ADDR_SURF_BANK_WH_4 = 0x02, */
+/* SQ_ADDR_SURF_BANK_WH_8 = 0x03, */
+ DEPTH_SAMPLE_ORDER_bit = 1 << 15,
+ SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_mask = 0x03 << 16,
+ SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift = 16,
+ SQ_ADDR_SURF_2_BANK = 0x00,
+ SQ_ADDR_SURF_4_BANK = 0x01,
+ SQ_ADDR_SURF_8_BANK = 0x02,
+ SQ_ADDR_SURF_16_BANK = 0x03,
+ SQ_TEX_RESOURCE_WORD7_0__TYPE_mask = 0x03 << 30,
+ SQ_TEX_RESOURCE_WORD7_0__TYPE_shift = 30,
+/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */
+/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */
+/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */
+/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */
+ SQ_LOOP_CONST_DX10_0 = 0x0003a200,
+ SQ_LOOP_CONST_0 = 0x0003a200,
+ SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0,
+ SQ_LOOP_CONST_0__COUNT_shift = 0,
+ INIT_mask = 0xfff << 12,
+ INIT_shift = 12,
+ INC_mask = 0xff << 24,
+ INC_shift = 24,
+ SQ_JUMPTABLE_CONST_0 = 0x0003a200,
+ CONST_A_mask = 0xff << 0,
+ CONST_A_shift = 0,
+ CONST_B_mask = 0xff << 8,
+ CONST_B_shift = 8,
+ CONST_C_mask = 0xff << 16,
+ CONST_C_shift = 16,
+ CONST_D_mask = 0xff << 24,
+ CONST_D_shift = 24,
+ SQ_BOOL_CONST_0 = 0x0003a500,
+ SQ_BOOL_CONST_0_num = 6,
+ SQ_TEX_SAMPLER_WORD0_0 = 0x0003c000,
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x07 << 0,
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0,
+ SQ_TEX_WRAP = 0x00,
+ SQ_TEX_MIRROR = 0x01,
+ SQ_TEX_CLAMP_LAST_TEXEL = 0x02,
+ SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03,
+ SQ_TEX_CLAMP_HALF_BORDER = 0x04,
+ SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05,
+ SQ_TEX_CLAMP_BORDER = 0x06,
+ SQ_TEX_MIRROR_ONCE_BORDER = 0x07,
+ CLAMP_Y_mask = 0x07 << 3,
+ CLAMP_Y_shift = 3,
+/* SQ_TEX_WRAP = 0x00, */
+/* SQ_TEX_MIRROR = 0x01, */
+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */
+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */
+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */
+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */
+/* SQ_TEX_CLAMP_BORDER = 0x06, */
+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */
+ CLAMP_Z_mask = 0x07 << 6,
+ CLAMP_Z_shift = 6,
+/* SQ_TEX_WRAP = 0x00, */
+/* SQ_TEX_MIRROR = 0x01, */
+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */
+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */
+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */
+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */
+/* SQ_TEX_CLAMP_BORDER = 0x06, */
+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */
+ XY_MAG_FILTER_mask = 0x03 << 9,
+ XY_MAG_FILTER_shift = 9,
+ SQ_TEX_XY_FILTER_POINT = 0x00,
+ SQ_TEX_XY_FILTER_BILINEAR = 0x01,
+ XY_MIN_FILTER_mask = 0x03 << 11,
+ XY_MIN_FILTER_shift = 11,
+/* SQ_TEX_XY_FILTER_POINT = 0x00, */
+/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */
+ Z_FILTER_mask = 0x03 << 13,
+ Z_FILTER_shift = 13,
+ SQ_TEX_Z_FILTER_NONE = 0x00,
+ SQ_TEX_Z_FILTER_POINT = 0x01,
+ SQ_TEX_Z_FILTER_LINEAR = 0x02,
+ MIP_FILTER_mask = 0x03 << 15,
+ MIP_FILTER_shift = 15,
+/* SQ_TEX_Z_FILTER_NONE = 0x00, */
+/* SQ_TEX_Z_FILTER_POINT = 0x01, */
+/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */
+ BORDER_COLOR_TYPE_mask = 0x03 << 20,
+ BORDER_COLOR_TYPE_shift = 20,
+ SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00,
+ SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01,
+ SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02,
+ SQ_TEX_BORDER_COLOR_REGISTER = 0x03,
+ DEPTH_COMPARE_FUNCTION_mask = 0x07 << 22,
+ DEPTH_COMPARE_FUNCTION_shift = 22,
+ SQ_TEX_DEPTH_COMPARE_NEVER = 0x00,
+ SQ_TEX_DEPTH_COMPARE_LESS = 0x01,
+ SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02,
+ SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03,
+ SQ_TEX_DEPTH_COMPARE_GREATER = 0x04,
+ SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05,
+ SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06,
+ SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07,
+ CHROMA_KEY_mask = 0x03 << 25,
+ CHROMA_KEY_shift = 25,
+ SQ_TEX_CHROMA_KEY_DISABLED = 0x00,
+ SQ_TEX_CHROMA_KEY_KILL = 0x01,
+ SQ_TEX_CHROMA_KEY_BLEND = 0x02,
+ SQ_TEX_SAMPLER_WORD1_0 = 0x0003c004,
+ SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_mask = 0xfff << 0,
+ SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift = 0,
+ MAX_LOD_mask = 0xfff << 12,
+ MAX_LOD_shift = 12,
+ PERF_MIP_mask = 0x0f << 24,
+ PERF_MIP_shift = 24,
+ PERF_Z_mask = 0x0f << 28,
+ PERF_Z_shift = 28,
+ SQ_TEX_SAMPLER_WORD2_0 = 0x0003c008,
+ SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_mask = 0x3fff << 0,
+ SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift = 0,
+ LOD_BIAS_SEC_mask = 0x3f << 14,
+ LOD_BIAS_SEC_shift = 14,
+ MC_COORD_TRUNCATE_bit = 1 << 20,
+ SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 21,
+ TRUNCATE_COORD_bit = 1 << 28,
+ SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit = 1 << 29,
+ SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31,
+ SQ_VTX_BASE_VTX_LOC = 0x0003cff0,
+ SQ_VTX_START_INST_LOC = 0x0003cff4,
+ SQ_TEX_SAMPLER_CLEAR = 0x0003ff00,
+ SQ_TEX_RESOURCE_CLEAR = 0x0003ff04,
+ SQ_LOOP_BOOL_CLEAR = 0x0003ff08,
+
+} ;
+
+#endif /* _EVERGREEN_REG_AUTO_H */
+
diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c
new file mode 100644
index 00000000..64e96d89
--- /dev/null
+++ b/src/evergreen_shader.c
@@ -0,0 +1,2790 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Alex Deucher <alexander.deucher@amd.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include "evergreen_shader.h"
+#include "evergreen_reg.h"
+
+/* solid vs --------------------------------------- */
+int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(4),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_VC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 1 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 2 - always export a param whether it's used or not */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+ /* 3 - padding */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+ /* 4/5 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ return i;
+}
+
+/* solid ps --------------------------------------- */
+int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(2),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 1 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+
+ /* 2 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* 3 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 4 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* 5 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+
+ return i;
+}
+
+/* copy vs --------------------------------------- */
+int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(4),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_VC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 1 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 2 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+ /* 3 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+ /* 4/5 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 6/7 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ return i;
+}
+
+/* copy ps --------------------------------------- */
+int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* CF INST 0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(3),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* CF INST 1 */
+ shader[i++] = CF_DWORD0(ADDR(8),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_TC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* CF INST 2 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+
+ /* 3 interpolate tex coords */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 4 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 5 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 6 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 7 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+
+ /* 8/9 TEX INST 0 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X), /* R */
+ DST_SEL_Y(SQ_SEL_Y), /* G */
+ DST_SEL_Z(SQ_SEL_Z), /* B */
+ DST_SEL_W(SQ_SEL_W), /* A */
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_UNNORMALIZED),
+ COORD_TYPE_Y(TEX_UNNORMALIZED),
+ COORD_TYPE_Z(TEX_UNNORMALIZED),
+ COORD_TYPE_W(TEX_UNNORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ return i;
+}
+
+int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(6),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_VC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 1 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(4),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(2),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 2 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(3));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 3 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(3));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+
+
+ /* 4 texX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 5 texY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 6/7 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 8/9 */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ return i;
+}
+
+int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(5),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 1 */
+ shader[i++] = CF_DWORD0(ADDR(21),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 2 */
+ shader[i++] = CF_DWORD0(ADDR(30),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 3 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(9),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(12),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 4 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(3));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 5 interpolate tex coords */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 6 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 7 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 8 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 9,10,11,12 */
+ /* r2.x = MAD(c0.w, r1.x, c0.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* r2.y = MAD(c0.w, r1.x, c0.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* r2.z = MAD(c0.w, r1.x, c0.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 13,14,15,16 */
+ /* r2.x = MAD(c1.x, r1.y, pv.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* r2.y = MAD(c1.y, r1.y, pv.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* r2.z = MAD(c1.z, r1.y, pv.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+ /* 17,18,19,20 */
+ /* r2.x = MAD(c2.x, r1.z, pv.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* r2.y = MAD(c2.y, r1.z, pv.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* r2.z = MAD(c2.z, r1.z, pv.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+
+ /* 21 */
+ shader[i++] = CF_DWORD0(ADDR(24),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(3),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_TC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 22 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 23 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+ /* 24/25 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_MASK),
+ DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_W(SQ_SEL_1),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 26/27 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_MASK),
+ DST_SEL_Z(SQ_SEL_X),
+ DST_SEL_W(SQ_SEL_MASK),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 28/29 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(2),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_X),
+ DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_W(SQ_SEL_MASK),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(2),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 30 */
+ shader[i++] = CF_DWORD0(ADDR(32),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_TC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 31 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 32/33 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_MASK),
+ DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_W(SQ_SEL_1),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 34/35 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_X),
+ DST_SEL_Z(SQ_SEL_Y),
+ DST_SEL_W(SQ_SEL_MASK),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ return i;
+}
+
+/* comp vs --------------------------------------- */
+int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(3),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 1 */
+ shader[i++] = CF_DWORD0(ADDR(9),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 2 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_NOP),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 3 - mask sub */
+ shader[i++] = CF_DWORD0(ADDR(32),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(3),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_VC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 4 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(14),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(12),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 5 - dst */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 6 - src */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT),
+ MARK(0),
+ BARRIER(0));
+ /* 7 - mask */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 8 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 9 - non-mask sub */
+ shader[i++] = CF_DWORD0(ADDR(38),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_VC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 10 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(26),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(6),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 11 - dst */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 12 - src */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ BURST_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(0));
+ /* 13 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* mask alu - 14 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 15 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 16 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 17 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 18 maskX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 19 maskY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 20 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 21 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 22 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 23 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 24 maskX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 25 maskY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* no mask alu - 26 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 27 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 28 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 29 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 30 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 31 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* mask vfetch - 32/33 - dst */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(24));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 34/35 - src */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 36/37 - mask */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(16),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ /* no mask vfetch - 38/39 - dst */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 40/41 - src */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0),
+ ALT_CONST(0),
+ BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = VTX_DWORD_PAD;
+
+ return i;
+}
+
+/* comp ps --------------------------------------- */
+int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(3),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 1 */
+ shader[i++] = CF_DWORD0(ADDR(8),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 2 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_NOP),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 3 - mask sub */
+ shader[i++] = CF_ALU_DWORD0(ADDR(12),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(8),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 4 */
+ shader[i++] = CF_DWORD0(ADDR(28),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_TC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 5 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(20),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 6 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+ /* 7 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 8 - non-mask sub */
+ shader[i++] = CF_ALU_DWORD0(ADDR(24),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 9 */
+ shader[i++] = CF_DWORD0(ADDR(32),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_TC),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 10 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+
+ /* 11 */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 12 interpolate src tex coords - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 13 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 14 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 15 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 16 interpolate mask tex coords */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 17 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 18 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 19 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 20 - alu 0 */
+ /* MUL gpr[2].x gpr[0].x gpr[1].x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* 21 - alu 1 */
+ /* MUL gpr[2].y gpr[0].y gpr[1].y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 22 - alu 2 */
+ /* MUL gpr[2].z gpr[0].z gpr[1].z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* 23 - alu 3 */
+ /* MUL gpr[2].w gpr[0].w gpr[1].w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+
+ /* 24 - interpolate tex coords - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 25 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 26 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 27 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_INTERP_XY),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 28/29 - src - mask */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(1),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 30/31 - mask */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ /* 32/33 - src - non-mask */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ return i;
+}
diff --git a/src/evergreen_shader.h b/src/evergreen_shader.h
new file mode 100644
index 00000000..41066191
--- /dev/null
+++ b/src/evergreen_shader.h
@@ -0,0 +1,292 @@
+/*
+ * Evergreen shaders
+ *
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Shader macros
+ */
+
+#ifndef __SHADER_H__
+#define __SHADER_H__
+
+#include "radeon.h"
+
+/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */
+
+
+// CF insts
+// addr
+#define ADDR(x) (x)
+// jumptable
+#define JUMPTABLE_SEL(x) (x)
+// pc
+#define POP_COUNT(x) (x)
+// const
+#define CF_CONST(x) (x)
+// cond
+#define COND(x) (x) // SQ_COND_*
+// count
+#define I_COUNT(x) ((x) ? ((x) - 1) : 0)
+// vpm
+#define VALID_PIXEL_MODE(x) (x)
+// eop
+#define END_OF_PROGRAM(x) (x)
+// cf inst
+#define CF_INST(x) (x) // SQ_CF_INST_*
+// wqm
+#define WHOLE_QUAD_MODE(x) (x)
+// barrier
+#define BARRIER(x) (x)
+//kb0
+#define KCACHE_BANK0(x) (x)
+//kb1
+#define KCACHE_BANK1(x) (x)
+// km0/1
+#define KCACHE_MODE0(x) (x)
+#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_*
+//
+#define KCACHE_ADDR0(x) (x)
+#define KCACHE_ADDR1(x) (x)
+
+#define ALT_CONST(x) (x)
+
+#define ARRAY_BASE(x) (x)
+// export pixel
+#define CF_PIXEL_MRT0 0
+#define CF_PIXEL_MRT1 1
+#define CF_PIXEL_MRT2 2
+#define CF_PIXEL_MRT3 3
+#define CF_PIXEL_MRT4 4
+#define CF_PIXEL_MRT5 5
+#define CF_PIXEL_MRT6 6
+#define CF_PIXEL_MRT7 7
+// computed Z
+#define CF_COMPUTED_Z 61
+// export pos
+#define CF_POS0 60
+#define CF_POS1 61
+#define CF_POS2 62
+#define CF_POS3 63
+// export param
+// 0...31
+#define TYPE(x) (x) // SQ_EXPORT_*
+#define RW_GPR(x) (x)
+#define RW_REL(x) (x)
+#define ABSOLUTE 0
+#define RELATIVE 1
+#define INDEX_GPR(x) (x)
+#define ELEM_SIZE(x) (x ? (x - 1) : 0)
+#define BURST_COUNT(x) (x ? (x - 1) : 0)
+#define MARK(x) (x)
+
+// swiz
+#define SRC_SEL_X(x) (x) // SQ_SEL_* each
+#define SRC_SEL_Y(x) (x)
+#define SRC_SEL_Z(x) (x)
+#define SRC_SEL_W(x) (x)
+
+#define CF_DWORD0(addr, jmptbl) ((addr) | ((jmptbl) << 24))
+#define CF_DWORD1(pc, cf_const, cond, count, vpm, eop, cf_inst, wqm, b) \
+ (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \
+ ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | ((wqm) << 30) | ((b) << 31))
+
+#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))
+#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, alt_const, cf_inst, wqm, b) \
+ (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
+ ((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))
+
+#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
+ (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \
+ ((index_gpr) << 23) | ((es) << 30))
+#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, bc, vpm, eop, cf_inst, m, b) \
+ (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \
+ ((bc) << 16) | ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | \
+ ((m) << 30) | ((b) << 31))
+
+// ALU clause insts
+#define SRC0_SEL(x) (x)
+#define SRC1_SEL(x) (x)
+#define SRC2_SEL(x) (x)
+// src[0-2]_sel
+// 0-127 GPR
+// 128-159 kcache constants bank 0
+// 160-191 kcache constants bank 1
+// 192-255 inline const values
+// 256-287 kcache constants bank 2
+// 288-319 kcache constants bank 3
+// 219-255 special SQ_ALU_SRC_* (0, 1, etc.)
+// 488-520 src param space
+#define ALU_SRC_GPR_BASE 0
+#define ALU_SRC_KCACHE0_BASE 128
+#define ALU_SRC_KCACHE1_BASE 160
+#define ALU_SRC_INLINE_K_BASE 192
+#define ALU_SRC_KCACHE2_BASE 256
+#define ALU_SRC_KCACHE3_BASE 288
+#define ALU_SRC_PARAM_BASE 448
+
+#define SRC0_REL(x) (x)
+#define SRC1_REL(x) (x)
+#define SRC2_REL(x) (x)
+// elem
+#define SRC0_ELEM(x) (x)
+#define SRC1_ELEM(x) (x)
+#define SRC2_ELEM(x) (x)
+#define ELEM_X 0
+#define ELEM_Y 1
+#define ELEM_Z 2
+#define ELEM_W 3
+// neg
+#define SRC0_NEG(x) (x)
+#define SRC1_NEG(x) (x)
+#define SRC2_NEG(x) (x)
+// im
+#define INDEX_MODE(x) (x) // SQ_INDEX_*
+// ps
+#define PRED_SEL(x) (x) // SQ_PRED_SEL_*
+// last
+#define LAST(x) (x)
+// abs
+#define SRC0_ABS(x) (x)
+#define SRC1_ABS(x) (x)
+// uem
+#define UPDATE_EXECUTE_MASK(x) (x)
+// up
+#define UPDATE_PRED(x) (x)
+// wm
+#define WRITE_MASK(x) (x)
+// omod
+#define OMOD(x) (x) // SQ_ALU_OMOD_*
+// alu inst
+#define ALU_INST(x) (x) // SQ_ALU_INST_*
+//bs
+#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_*
+#define DST_GPR(x) (x)
+#define DST_REL(x) (x)
+#define DST_ELEM(x) (x)
+#define CLAMP(x) (x)
+
+#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
+ (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
+ ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
+ ((im) << 26) | ((ps) << 29) | ((last) << 31))
+
+#define ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
+ ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
+ ((dr) << 28) | ((de) << 29) | ((clamp) << 31))
+
+#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
+ ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
+ ((de) << 29) | ((clamp) << 31))
+
+// VTX clause insts
+// vxt insts
+#define VTX_INST(x) (x) // SQ_VTX_INST_*
+
+// fetch type
+#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_*
+
+#define FETCH_WHOLE_QUAD(x) (x)
+#define BUFFER_ID(x) (x)
+#define SRC_GPR(x) (x)
+#define SRC_REL(x) (x)
+#define MEGA_FETCH_COUNT(x) ((x) ? ((x) - 1) : 0)
+
+#define DST_SEL_X(x) (x)
+#define DST_SEL_Y(x) (x)
+#define DST_SEL_Z(x) (x)
+#define DST_SEL_W(x) (x)
+#define USE_CONST_FIELDS(x) (x)
+#define DATA_FORMAT(x) (x)
+// num format
+#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_*
+// format comp
+#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_*
+// sma
+#define SRF_MODE_ALL(x) (x)
+#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0
+#define SRF_MODE_NO_ZERO 1
+#define OFFSET(x) (x)
+// endian swap
+#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_*
+#define CONST_BUF_NO_STRIDE(x) (x)
+// mf
+#define MEGA_FETCH(x) (x)
+#define BUFFER_INDEX_MODE(x) (x)
+
+#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \
+ (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
+ ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26))
+#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
+ (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+ ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))
+#define VTX_DWORD2(offset, es, cbns, mf, alt_const, bim) \
+ (((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19) | ((alt_const) << 20) | ((bim) << 21))
+#define VTX_DWORD_PAD 0x00000000
+
+// TEX clause insts
+// tex insts
+#define TEX_INST(x) (x) // SQ_TEX_INST_*
+#define INST_MOD(x) (x)
+#define FETCH_WHOLE_QUAD(x) (x)
+#define RESOURCE_ID(x) (x)
+#define RESOURCE_INDEX_MODE(x) (x)
+#define SAMPLER_INDEX_MODE(x) (x)
+
+#define LOD_BIAS(x) (x)
+//ct
+#define COORD_TYPE_X(x) (x)
+#define COORD_TYPE_Y(x) (x)
+#define COORD_TYPE_Z(x) (x)
+#define COORD_TYPE_W(x) (x)
+#define TEX_UNNORMALIZED 0
+#define TEX_NORMALIZED 1
+#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */
+#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f)
+#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f)
+#define SAMPLER_ID(x) (x)
+
+#define TEX_DWORD0(tex_inst, im, fwq, resource_id, src_gpr, sr, ac, rim, sim) \
+ (((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
+ ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27))
+#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
+ (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+ ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))
+#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
+ (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
+ ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))
+#define TEX_DWORD_PAD 0x00000000
+
+extern int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+extern int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+extern int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
+extern int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader);
+
+extern int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+#endif
diff --git a/src/evergreen_state.h b/src/evergreen_state.h
new file mode 100644
index 00000000..5869256e
--- /dev/null
+++ b/src/evergreen_state.h
@@ -0,0 +1,338 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Alex Deucher <alexander.deucher@amd.com>
+ *
+ */
+
+#ifndef __EVERGREEN_STATE_H__
+#define __EVERGREEN_STATE_H__
+
+typedef int bool_t;
+
+#define CLEAR(x) memset (&x, 0, sizeof(x))
+
+/* Sequencer / thread handling */
+typedef struct {
+ int ps_prio;
+ int vs_prio;
+ int gs_prio;
+ int es_prio;
+ int hs_prio;
+ int ls_prio;
+ int cs_prio;
+ int num_ps_gprs;
+ int num_vs_gprs;
+ int num_gs_gprs;
+ int num_es_gprs;
+ int num_hs_gprs;
+ int num_ls_gprs;
+ int num_cs_gprs;
+ int num_temp_gprs;
+ int num_ps_threads;
+ int num_vs_threads;
+ int num_gs_threads;
+ int num_es_threads;
+ int num_hs_threads;
+ int num_ls_threads;
+ int num_ps_stack_entries;
+ int num_vs_stack_entries;
+ int num_gs_stack_entries;
+ int num_es_stack_entries;
+ int num_hs_stack_entries;
+ int num_ls_stack_entries;
+} sq_config_t;
+
+/* Color buffer / render target */
+typedef struct {
+ int id;
+ int w;
+ int h;
+ uint64_t base;
+ int format;
+ int endian;
+ int array_mode; // tiling
+ int number_type;
+ int read_size;
+ int comp_swap;
+ int tile_mode;
+ int blend_clamp;
+ int clear_color;
+ int blend_bypass;
+ int simple_float;
+ int round_mode;
+ int tile_compact;
+ int source_format;
+ int resource_type;
+ int fast_clear;
+ int compression;
+ int rat;
+ struct radeon_bo *bo;
+} cb_config_t;
+
+/* Shader */
+typedef struct {
+ uint64_t shader_addr;
+ uint32_t shader_size;
+ int num_gprs;
+ int stack_size;
+ int dx10_clamp;
+ int clamp_consts;
+ int export_mode;
+ int uncached_first_inst;
+ int single_round;
+ int double_round;
+ int allow_sdi;
+ int allow_sd0;
+ int allow_ddi;
+ int allow_ddo;
+ struct radeon_bo *bo;
+} shader_config_t;
+
+/* Shader consts */
+typedef struct {
+ int type;
+ int size_bytes;
+ uint64_t const_addr;
+ struct radeon_bo *bo;
+} const_config_t;
+
+/* Vertex buffer / vtx resource */
+typedef struct {
+ int id;
+ uint64_t vb_addr;
+ uint32_t vtx_num_entries;
+ uint32_t vtx_size_dw;
+ int clamp_x;
+ int format;
+ int num_format_all;
+ int format_comp_all;
+ int srf_mode_all;
+ int endian;
+ int mem_req_size;
+ int dst_sel_x;
+ int dst_sel_y;
+ int dst_sel_z;
+ int dst_sel_w;
+ int uncached;
+ struct radeon_bo *bo;
+} vtx_resource_t;
+
+/* Texture resource */
+typedef struct {
+ int id;
+ int w;
+ int h;
+ int pitch;
+ int depth;
+ int dim;
+ int array_mode;
+ int tile_type;
+ int format;
+ uint64_t base;
+ uint64_t mip_base;
+ uint32_t size;
+ int format_comp_x;
+ int format_comp_y;
+ int format_comp_z;
+ int format_comp_w;
+ int num_format_all;
+ int srf_mode_all;
+ int force_degamma;
+ int endian;
+ int dst_sel_x;
+ int dst_sel_y;
+ int dst_sel_z;
+ int dst_sel_w;
+ int base_level;
+ int last_level;
+ int base_array;
+ int last_array;
+ int perf_modulation;
+ int interlaced;
+ int min_lod;
+ struct radeon_bo *bo;
+ struct radeon_bo *mip_bo;
+} tex_resource_t;
+
+/* Texture sampler */
+typedef struct {
+ int id;
+ /* Clamping */
+ int clamp_x, clamp_y, clamp_z;
+ int border_color;
+ /* Filtering */
+ int xy_mag_filter, xy_min_filter;
+ int z_filter;
+ int mip_filter;
+ bool_t high_precision_filter; /* ? */
+ int perf_mip; /* ? 0-7 */
+ int perf_z; /* ? 3 */
+ /* LoD selection */
+ int min_lod, max_lod; /* 0-0x3ff */
+ int lod_bias; /* 0-0xfff (signed?) */
+ int lod_bias2; /* ? 0-0xfff (signed?) */
+ bool_t lod_uses_minor_axis; /* ? */
+ /* Other stuff */
+ bool_t point_sampling_clamp; /* ? */
+ bool_t tex_array_override; /* ? */
+ bool_t mc_coord_truncate; /* ? */
+ bool_t force_degamma; /* ? */
+ bool_t fetch_4; /* ? */
+ bool_t sample_is_pcf; /* ? */
+ bool_t type; /* ? */
+ int depth_compare; /* only depth textures? */
+ int chroma_key;
+ int truncate_coord;
+ bool_t disable_cube_wrap;
+} tex_sampler_t;
+
+/* Draw command */
+typedef struct {
+ uint32_t prim_type;
+ uint32_t vgt_draw_initiator;
+ uint32_t index_type;
+ uint32_t num_instances;
+ uint32_t num_indices;
+} draw_config_t;
+
+#define BEGIN_BATCH(n) \
+do { \
+ radeon_ddx_cs_start(pScrn, (n), __FILE__, __func__, __LINE__); \
+} while(0)
+#define END_BATCH() \
+do { \
+ radeon_cs_end(info->cs, __FILE__, __func__, __LINE__); \
+} while(0)
+#define RELOC_BATCH(bo, rd, wd) \
+do { \
+ int _ret; \
+ _ret = radeon_cs_write_reloc(info->cs, (bo), (rd), (wd), 0); \
+ if (_ret) ErrorF("reloc emit failure %d (%s %d)\n", _ret, __func__, __LINE__); \
+} while(0)
+#define E32(dword) \
+do { \
+ radeon_cs_write_dword(info->cs, (dword)); \
+} while (0)
+
+#define EFLOAT(val) \
+do { \
+ union { float f; uint32_t d; } a; \
+ a.f = (val); \
+ E32(a.d); \
+} while (0)
+
+#define PACK3(cmd, num) \
+do { \
+ E32(RADEON_CP_PACKET3 | ((cmd) << 8) | ((((num) - 1) & 0x3fff) << 16)); \
+} while (0)
+
+/* write num registers, start at reg */
+/* If register falls in a special area, special commands are issued */
+#define PACK0(reg, num) \
+do { \
+ if ((reg) >= SET_CONFIG_REG_offset && (reg) < SET_CONFIG_REG_end) { \
+ PACK3(IT_SET_CONFIG_REG, (num) + 1); \
+ E32(((reg) - SET_CONFIG_REG_offset) >> 2); \
+ } else if ((reg) >= SET_CONTEXT_REG_offset && (reg) < SET_CONTEXT_REG_end) { \
+ PACK3(IT_SET_CONTEXT_REG, (num) + 1); \
+ E32(((reg) - SET_CONTEXT_REG_offset) >> 2); \
+ } else if ((reg) >= SET_RESOURCE_offset && (reg) < SET_RESOURCE_end) { \
+ PACK3(IT_SET_RESOURCE, num + 1); \
+ E32(((reg) - SET_RESOURCE_offset) >> 2); \
+ } else if ((reg) >= SET_SAMPLER_offset && (reg) < SET_SAMPLER_end) { \
+ PACK3(IT_SET_SAMPLER, (num) + 1); \
+ E32((reg - SET_SAMPLER_offset) >> 2); \
+ } else if ((reg) >= SET_CTL_CONST_offset && (reg) < SET_CTL_CONST_end) { \
+ PACK3(IT_SET_CTL_CONST, (num) + 1); \
+ E32(((reg) - SET_CTL_CONST_offset) >> 2); \
+ } else if ((reg) >= SET_LOOP_CONST_offset && (reg) < SET_LOOP_CONST_end) { \
+ PACK3(IT_SET_LOOP_CONST, (num) + 1); \
+ E32(((reg) - SET_LOOP_CONST_offset) >> 2); \
+ } else if ((reg) >= SET_BOOL_CONST_offset && (reg) < SET_BOOL_CONST_end) { \
+ PACK3(IT_SET_BOOL_CONST, (num) + 1); \
+ E32(((reg) - SET_BOOL_CONST_offset) >> 2); \
+ } else { \
+ E32(CP_PACKET0 ((reg), (num) - 1)); \
+ } \
+} while (0)
+
+/* write a single register */
+#define EREG(reg, val) \
+do { \
+ PACK0((reg), 1); \
+ E32((val)); \
+} while (0)
+
+void
+evergreen_start_3d(ScrnInfoPtr pScrn);
+void
+evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain);
+void
+evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop);
+void
+evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain);
+void
+evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain);
+void
+evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain);
+void
+evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain);
+void
+evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val);
+void
+evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain);
+void
+evergreen_set_tex_sampler(ScrnInfoPtr pScrn, tex_sampler_t *s);
+void
+evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2);
+void
+evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2);
+void
+evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2);
+void
+evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2);
+void
+evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2);
+void
+evergreen_set_default_state(ScrnInfoPtr pScrn);
+void
+evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf);
+
+void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size);
+
+Bool
+EVERGREENSetAccelState(ScrnInfoPtr pScrn,
+ struct r600_accel_object *src0,
+ struct r600_accel_object *src1,
+ struct r600_accel_object *dst,
+ uint32_t vs_offset, uint32_t ps_offset,
+ int rop, Pixel planemask);
+
+extern Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index);
+extern void RADEONFinishAccess_CS(PixmapPtr pPix, int index);
+extern void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align);
+extern void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv);
+extern struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix);
+extern Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix);
+
+#endif
diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
new file mode 100644
index 00000000..a6746806
--- /dev/null
+++ b/src/evergreen_textured_videofuncs.c
@@ -0,0 +1,556 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Alex Deucher <alexander.deucher@amd.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include "exa.h"
+
+#include "radeon.h"
+#include "radeon_reg.h"
+#include "evergreen_shader.h"
+#include "evergreen_reg.h"
+#include "evergreen_state.h"
+
+#include "radeon_video.h"
+
+#include <X11/extensions/Xv.h>
+#include "fourcc.h"
+
+#include "damage.h"
+
+#include "radeon_exa_shared.h"
+#include "radeon_vbo.h"
+
+/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
+ note the difference to the parameters used in overlay are due
+ to 10bit vs. float calcs */
+static REF_TRANSFORM trans[2] =
+{
+ {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */
+ {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */
+};
+
+void
+EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ PixmapPtr pPixmap = pPriv->pPixmap;
+ BoxPtr pBox = REGION_RECTS(&pPriv->clip);
+ int nBox = REGION_NUM_RECTS(&pPriv->clip);
+ int dstxoff, dstyoff;
+ struct r600_accel_object src_obj, dst_obj;
+ cb_config_t cb_conf;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ shader_config_t vs_conf, ps_conf;
+ /*
+ * y' = y - .0625
+ * u' = u - .5
+ * v' = v - .5;
+ *
+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
+ *
+ * DP3 might look like the straightforward solution
+ * but we'd need to move the texture yuv values in
+ * the same reg for this to work. Therefore use MADs.
+ * Brightness just adds to the off constant.
+ * Contrast is multiplication of luminance.
+ * Saturation and hue change the u and v coeffs.
+ * Default values (before adjustments - depend on colorspace):
+ * yco = 1.1643
+ * uco = 0, -0.39173, 2.017
+ * vco = 1.5958, -0.8129, 0
+ * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
+ * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
+ * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
+ *
+ * temp = MAD(yco, yuv.yyyy, off)
+ * temp = MAD(uco, yuv.uuuu, temp)
+ * result = MAD(vco, yuv.vvvv, temp)
+ */
+ /* TODO: calc consts in the shader */
+ const float Loff = -0.0627;
+ const float Coff = -0.502;
+ float uvcosf, uvsinf;
+ float yco;
+ float uco[3], vco[3], off[3];
+ float bright, cont, gamma;
+ int ref = pPriv->transform_index;
+ Bool needgamma = FALSE;
+ float *ps_alu_consts;
+ const_config_t ps_const_conf;
+ float *vs_alu_consts;
+ const_config_t vs_const_conf;
+
+ cont = RTFContrast(pPriv->contrast);
+ bright = RTFBrightness(pPriv->brightness);
+ gamma = (float)pPriv->gamma / 1000.0;
+ uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
+ uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
+ /* overlay video also does pre-gamma contrast/sat adjust, should we? */
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
+ off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
+ off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
+
+ // XXX
+ gamma = 1.0;
+
+ if (gamma != 1.0) {
+ needgamma = TRUE;
+ /* note: gamma correction is out = in ^ gamma;
+ gpu can only do LG2/EX2 therefore we transform into
+ in ^ gamma = 2 ^ (log2(in) * gamma).
+ Lots of scalar ops, unfortunately (better solution?) -
+ without gamma that's 3 inst, with gamma it's 10...
+ could use different gamma factors per channel,
+ if that's of any use. */
+ }
+
+ CLEAR (cb_conf);
+ CLEAR (tex_res);
+ CLEAR (tex_samp);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+ CLEAR (vs_const_conf);
+ CLEAR (ps_const_conf);
+
+#if defined(XF86DRM_MODE)
+ if (info->cs) {
+ dst_obj.offset = 0;
+ src_obj.offset = 0;
+ dst_obj.bo = radeon_get_pixmap_bo(pPixmap);
+ } else
+#endif
+ {
+ dst_obj.offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
+ src_obj.offset = pPriv->src_offset + info->fbLocation + pScrn->fbOffset;
+ dst_obj.bo = src_obj.bo = NULL;
+ }
+ dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
+
+ src_obj.pitch = pPriv->src_pitch;
+ src_obj.width = pPriv->w;
+ src_obj.height = pPriv->h;
+ src_obj.bpp = 16;
+ src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+ src_obj.bo = pPriv->src_bo[pPriv->currentBuffer];
+
+ dst_obj.width = pPixmap->drawable.width;
+ dst_obj.height = pPixmap->drawable.height;
+ dst_obj.bpp = pPixmap->drawable.bitsPerPixel;
+ dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
+
+ if (!EVERGREENSetAccelState(pScrn,
+ &src_obj,
+ NULL,
+ &dst_obj,
+ accel_state->xv_vs_offset, accel_state->xv_ps_offset,
+ 3, 0xffffffff))
+ return;
+
+#ifdef COMPOSITE
+ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
+ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
+#else
+ dstxoff = 0;
+ dstyoff = 0;
+#endif
+
+ radeon_vbo_check(pScrn, &accel_state->vbo, 16);
+ radeon_vbo_check(pScrn, &accel_state->cbuf, 512);
+ radeon_cp_start(pScrn);
+
+ evergreen_set_default_state(pScrn);
+
+ evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+
+ /* PS bool constant */
+ switch(pPriv->id) {
+ case FOURCC_YV12:
+ case FOURCC_I420:
+ evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
+ break;
+ case FOURCC_UYVY:
+ case FOURCC_YUY2:
+ default:
+ evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
+ break;
+ }
+
+ /* Shader */
+ vs_conf.shader_addr = accel_state->vs_mc_addr;
+ vs_conf.shader_size = accel_state->vs_size;
+ vs_conf.num_gprs = 2;
+ vs_conf.stack_size = 0;
+ vs_conf.bo = accel_state->shaders_bo;
+ evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+ ps_conf.shader_addr = accel_state->ps_mc_addr;
+ ps_conf.shader_size = accel_state->ps_size;
+ ps_conf.num_gprs = 3;
+ ps_conf.stack_size = 1;
+ ps_conf.clamp_consts = 0;
+ ps_conf.export_mode = 2;
+ ps_conf.bo = accel_state->shaders_bo;
+ evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+
+ /* Texture */
+ switch(pPriv->id) {
+ case FOURCC_YV12:
+ case FOURCC_I420:
+ accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
+
+ /* Y texture */
+ tex_res.id = 0;
+ tex_res.w = accel_state->src_obj[0].width;
+ tex_res.h = accel_state->src_obj[0].height;
+ tex_res.pitch = accel_state->src_obj[0].pitch;
+ tex_res.depth = 0;
+ tex_res.dim = SQ_TEX_DIM_2D;
+ tex_res.base = accel_state->src_obj[0].offset;
+ tex_res.mip_base = accel_state->src_obj[0].offset;
+ tex_res.size = accel_state->src_size[0];
+ tex_res.bo = accel_state->src_obj[0].bo;
+ tex_res.mip_bo = accel_state->src_obj[0].bo;
+
+ tex_res.format = FMT_8;
+ tex_res.dst_sel_x = SQ_SEL_X; /* Y */
+ tex_res.dst_sel_y = SQ_SEL_1;
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+
+ tex_res.base_level = 0;
+ tex_res.last_level = 0;
+ tex_res.perf_modulation = 0;
+ tex_res.interlaced = 0;
+ evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
+
+ /* Y sampler */
+ tex_samp.id = 0;
+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_z = SQ_TEX_WRAP;
+
+ /* xxx: switch to bicubic */
+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR;
+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR;
+
+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
+ tex_samp.mip_filter = 0; /* no mipmap */
+ evergreen_set_tex_sampler(pScrn, &tex_samp);
+
+ /* U or V texture */
+ tex_res.id = 1;
+ tex_res.format = FMT_8;
+ tex_res.w = accel_state->src_obj[0].width >> 1;
+ tex_res.h = accel_state->src_obj[0].height >> 1;
+ tex_res.pitch = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, 256);
+ tex_res.dst_sel_x = SQ_SEL_X; /* V or U */
+ tex_res.dst_sel_y = SQ_SEL_1;
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+ tex_res.interlaced = 0;
+
+ tex_res.base = accel_state->src_obj[0].offset + pPriv->planev_offset;
+ tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planev_offset;
+ tex_res.size = accel_state->src_size[0] / 4;
+ evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
+
+ /* U or V sampler */
+ tex_samp.id = 1;
+ evergreen_set_tex_sampler(pScrn, &tex_samp);
+
+ /* U or V texture */
+ tex_res.id = 2;
+ tex_res.format = FMT_8;
+ tex_res.w = accel_state->src_obj[0].width >> 1;
+ tex_res.h = accel_state->src_obj[0].height >> 1;
+ tex_res.pitch = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, 256);
+ tex_res.dst_sel_x = SQ_SEL_X; /* V or U */
+ tex_res.dst_sel_y = SQ_SEL_1;
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+ tex_res.interlaced = 0;
+
+ tex_res.base = accel_state->src_obj[0].offset + pPriv->planeu_offset;
+ tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planeu_offset;
+ tex_res.size = accel_state->src_size[0] / 4;
+ evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
+
+ /* UV sampler */
+ tex_samp.id = 2;
+ evergreen_set_tex_sampler(pScrn, &tex_samp);
+ break;
+ case FOURCC_UYVY:
+ case FOURCC_YUY2:
+ default:
+ accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
+
+ /* Y texture */
+ tex_res.id = 0;
+ tex_res.w = accel_state->src_obj[0].width;
+ tex_res.h = accel_state->src_obj[0].height;
+ tex_res.pitch = accel_state->src_obj[0].pitch >> 1;
+ tex_res.depth = 0;
+ tex_res.dim = SQ_TEX_DIM_2D;
+ tex_res.base = accel_state->src_obj[0].offset;
+ tex_res.mip_base = accel_state->src_obj[0].offset;
+ tex_res.size = accel_state->src_size[0];
+ tex_res.bo = accel_state->src_obj[0].bo;
+ tex_res.mip_bo = accel_state->src_obj[0].bo;
+
+ tex_res.format = FMT_8_8;
+ if (pPriv->id == FOURCC_UYVY)
+ tex_res.dst_sel_x = SQ_SEL_Y; /* Y */
+ else
+ tex_res.dst_sel_x = SQ_SEL_X; /* Y */
+ tex_res.dst_sel_y = SQ_SEL_1;
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+
+ tex_res.base_level = 0;
+ tex_res.last_level = 0;
+ tex_res.perf_modulation = 0;
+ tex_res.interlaced = 0;
+ evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
+
+ /* Y sampler */
+ tex_samp.id = 0;
+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_z = SQ_TEX_WRAP;
+
+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR;
+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR;
+
+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
+ tex_samp.mip_filter = 0; /* no mipmap */
+ evergreen_set_tex_sampler(pScrn, &tex_samp);
+
+ /* UV texture */
+ tex_res.id = 1;
+ tex_res.format = FMT_8_8_8_8;
+ tex_res.w = accel_state->src_obj[0].width >> 1;
+ tex_res.h = accel_state->src_obj[0].height;
+ tex_res.pitch = accel_state->src_obj[0].pitch >> 2;
+ if (pPriv->id == FOURCC_UYVY) {
+ tex_res.dst_sel_x = SQ_SEL_X; /* V */
+ tex_res.dst_sel_y = SQ_SEL_Z; /* U */
+ } else {
+ tex_res.dst_sel_x = SQ_SEL_Y; /* V */
+ tex_res.dst_sel_y = SQ_SEL_W; /* U */
+ }
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+ tex_res.interlaced = 0;
+
+ tex_res.base = accel_state->src_obj[0].offset;
+ tex_res.mip_base = accel_state->src_obj[0].offset;
+ tex_res.size = accel_state->src_size[0];
+ evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
+
+ /* UV sampler */
+ tex_samp.id = 1;
+ evergreen_set_tex_sampler(pScrn, &tex_samp);
+ break;
+ }
+
+ cb_conf.id = 0;
+ cb_conf.w = accel_state->dst_obj.pitch;
+ cb_conf.h = accel_state->dst_obj.height;
+ cb_conf.base = accel_state->dst_obj.offset;
+ cb_conf.bo = accel_state->dst_obj.bo;
+
+ switch (accel_state->dst_obj.bpp) {
+ case 16:
+ if (pPixmap->drawable.depth == 15) {
+ cb_conf.format = COLOR_1_5_5_5;
+ cb_conf.comp_swap = 1; /* ARGB */
+ } else {
+ cb_conf.format = COLOR_5_6_5;
+ cb_conf.comp_swap = 2; /* RGB */
+ }
+ break;
+ case 32:
+ cb_conf.format = COLOR_8_8_8_8;
+ cb_conf.comp_swap = 1; /* ARGB */
+ break;
+ default:
+ return;
+ }
+
+ cb_conf.source_format = EXPORT_4C_16BPC;
+ cb_conf.blend_clamp = 1;
+ evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
+
+ /* Render setup */
+ BEGIN_BATCH(23);
+ EREG(CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift));
+ EREG(CB_COLOR_CONTROL, ((0xcc << ROP3_shift) |
+ (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
+ EREG(CB_BLEND0_CONTROL, 0);
+
+ /* Interpolator setup */
+ /* export tex coords from VS */
+ EREG(SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
+ EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
+ EREG(SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
+ (0x03 << DEFAULT_VAL_shift)));
+
+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+ PACK0(SPI_PS_IN_CONTROL_0, 3);
+ E32(((1 << NUM_INTERP_shift) |
+ LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
+ E32(0); // SPI_PS_IN_CONTROL_1
+ E32(0); // SPI_INTERP_CONTROL_0
+ END_BATCH();
+
+ /* PS alu constants */
+ ps_const_conf.size_bytes = 256;
+ ps_const_conf.type = SHADER_TYPE_PS;
+ ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
+
+ ps_alu_consts[0] = off[0];
+ ps_alu_consts[1] = off[1];
+ ps_alu_consts[2] = off[2];
+ ps_alu_consts[3] = yco;
+
+ ps_alu_consts[4] = uco[0];
+ ps_alu_consts[5] = uco[1];
+ ps_alu_consts[6] = uco[2];
+ ps_alu_consts[7] = gamma;
+
+ ps_alu_consts[8] = vco[0];
+ ps_alu_consts[9] = vco[1];
+ ps_alu_consts[10] = vco[2];
+ ps_alu_consts[11] = 0.0;
+
+ radeon_vbo_commit(pScrn, &accel_state->cbuf);
+
+ /* PS alu constants */
+ ps_const_conf.bo = accel_state->cbuf.vb_bo;
+ ps_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op;
+ evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
+
+ /* VS alu constants */
+ vs_const_conf.size_bytes = 256;
+ vs_const_conf.type = SHADER_TYPE_VS;
+ vs_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
+
+ vs_alu_consts[0] = 1.0 / pPriv->w;
+ vs_alu_consts[1] = 1.0 / pPriv->h;
+ vs_alu_consts[2] = 0.0;
+ vs_alu_consts[3] = 0.0;
+
+ radeon_vbo_commit(pScrn, &accel_state->cbuf);
+
+ /* VS alu constants */
+ vs_const_conf.bo = accel_state->cbuf.vb_bo;
+ vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op + 256;
+ evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
+
+ if (pPriv->vsync) {
+ xf86CrtcPtr crtc;
+ if (pPriv->desired_crtc)
+ crtc = pPriv->desired_crtc;
+ else
+ crtc = radeon_pick_best_crtc(pScrn,
+ pPriv->drw_x,
+ pPriv->drw_x + pPriv->dst_w,
+ pPriv->drw_y,
+ pPriv->drw_y + pPriv->dst_h);
+ if (crtc)
+ evergreen_cp_wait_vline_sync(pScrn, pPixmap,
+ crtc,
+ pPriv->drw_y - crtc->y,
+ (pPriv->drw_y - crtc->y) + pPriv->dst_h);
+ }
+
+ while (nBox--) {
+ int srcX, srcY, srcw, srch;
+ int dstX, dstY, dstw, dsth;
+ float *vb;
+
+
+ dstX = pBox->x1 + dstxoff;
+ dstY = pBox->y1 + dstyoff;
+ dstw = pBox->x2 - pBox->x1;
+ dsth = pBox->y2 - pBox->y1;
+
+ srcX = pPriv->src_x;
+ srcX += ((pBox->x1 - pPriv->drw_x) *
+ pPriv->src_w) / pPriv->dst_w;
+ srcY = pPriv->src_y;
+ srcY += ((pBox->y1 - pPriv->drw_y) *
+ pPriv->src_h) / pPriv->dst_h;
+
+ srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
+ srch = (pPriv->src_h * dsth) / pPriv->dst_h;
+
+ vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
+
+ vb[0] = (float)dstX;
+ vb[1] = (float)dstY;
+ vb[2] = (float)srcX;
+ vb[3] = (float)srcY;
+
+ vb[4] = (float)dstX;
+ vb[5] = (float)(dstY + dsth);
+ vb[6] = (float)srcX;
+ vb[7] = (float)(srcY + srch);
+
+ vb[8] = (float)(dstX + dstw);
+ vb[9] = (float)(dstY + dsth);
+ vb[10] = (float)(srcX + srcw);
+ vb[11] = (float)(srcY + srch);
+
+ radeon_vbo_commit(pScrn, &accel_state->vbo);
+
+ pBox++;
+ }
+
+ evergreen_finish_op(pScrn, 16);
+
+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
+}
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 85440343..a04d66ac 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -43,7 +43,7 @@
/* #define SHOW_VERTEXES */
-uint32_t RADEON_ROP[16] = {
+uint32_t R600_ROP[16] = {
RADEON_ROP3_ZERO, /* GXclear */
RADEON_ROP3_DSa, /* Gxand */
RADEON_ROP3_SDna, /* GXandReverse */
@@ -208,14 +208,14 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
CLEAR (vs_conf);
CLEAR (ps_conf);
- radeon_vbo_check(pScrn, 16);
+ radeon_vbo_check(pScrn, &accel_state->vbo, 16);
radeon_cp_start(pScrn);
- set_default_state(pScrn, accel_state->ib);
+ r600_set_default_state(pScrn, accel_state->ib);
- set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
- set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
- set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
/* Shader */
vs_conf.shader_addr = accel_state->vs_mc_addr;
@@ -223,7 +223,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
vs_conf.num_gprs = 2;
vs_conf.stack_size = 0;
vs_conf.bo = accel_state->shaders_bo;
- vs_setup (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+ r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
ps_conf.shader_addr = accel_state->ps_mc_addr;
ps_conf.shader_size = accel_state->ps_size;
@@ -233,7 +233,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
ps_conf.clamp_consts = 0;
ps_conf.export_mode = 2;
ps_conf.bo = accel_state->shaders_bo;
- ps_setup (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+ r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
cb_conf.id = 0;
cb_conf.w = accel_state->dst_obj.pitch;
@@ -253,7 +253,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
}
cb_conf.source_format = 1;
cb_conf.blend_clamp = 1;
- set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
+ r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
/* Render setup */
if (accel_state->planemask & 0x000000ff)
@@ -266,7 +266,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
pmask |= 8; /* A */
BEGIN_BATCH(20);
EREG(accel_state->ib, CB_TARGET_MASK, (pmask << TARGET0_ENABLE_shift));
- EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[accel_state->rop]);
+ EREG(accel_state->ib, CB_COLOR_CONTROL, R600_ROP[accel_state->rop]);
/* Interpolator setup */
/* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */
@@ -312,8 +312,8 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
ps_alu_consts[2] = (float)b / 255; /* B */
ps_alu_consts[3] = (float)a / 255; /* A */
}
- set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
- sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+ r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
+ sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
if (accel_state->vsync)
RADEONVlineHelperClear(pScrn);
@@ -333,7 +333,7 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
if (accel_state->vsync)
RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
- vb = radeon_vbo_space(pScrn, 8);
+ vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
vb[0] = (float)x1;
vb[1] = (float)y1;
@@ -344,7 +344,7 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
vb[4] = (float)x2;
vb[5] = (float)y2;
- radeon_vbo_commit(pScrn);
+ radeon_vbo_commit(pScrn, &accel_state->vbo);
}
static void
@@ -355,10 +355,10 @@ R600DoneSolid(PixmapPtr pPix)
struct radeon_accel_state *accel_state = info->accel_state;
if (accel_state->vsync)
- cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
- accel_state->vline_crtc,
- accel_state->vline_y1,
- accel_state->vline_y2);
+ r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
+ accel_state->vline_crtc,
+ accel_state->vline_y1,
+ accel_state->vline_y2);
r600_finish_op(pScrn, 8);
}
@@ -380,14 +380,14 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
CLEAR (vs_conf);
CLEAR (ps_conf);
- radeon_vbo_check(pScrn, 16);
+ radeon_vbo_check(pScrn, &accel_state->vbo, 16);
radeon_cp_start(pScrn);
- set_default_state(pScrn, accel_state->ib);
+ r600_set_default_state(pScrn, accel_state->ib);
- set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
- set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
- set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
/* Shader */
vs_conf.shader_addr = accel_state->vs_mc_addr;
@@ -395,7 +395,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
vs_conf.num_gprs = 2;
vs_conf.stack_size = 0;
vs_conf.bo = accel_state->shaders_bo;
- vs_setup (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+ r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
ps_conf.shader_addr = accel_state->ps_mc_addr;
ps_conf.shader_size = accel_state->ps_size;
@@ -405,7 +405,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
ps_conf.clamp_consts = 0;
ps_conf.export_mode = 2;
ps_conf.bo = accel_state->shaders_bo;
- ps_setup (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+ r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
/* Texture */
tex_res.id = 0;
@@ -443,7 +443,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
tex_res.base_level = 0;
tex_res.last_level = 0;
tex_res.perf_modulation = 0;
- set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
+ r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
tex_samp.id = 0;
tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
@@ -453,7 +453,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT;
tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
tex_samp.mip_filter = 0; /* no mipmap */
- set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+ r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
cb_conf.id = 0;
cb_conf.w = accel_state->dst_obj.pitch;
@@ -472,7 +472,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
}
cb_conf.source_format = 1;
cb_conf.blend_clamp = 1;
- set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
+ r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
/* Render setup */
if (accel_state->planemask & 0x000000ff)
@@ -485,7 +485,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
pmask |= 8; /* A */
BEGIN_BATCH(20);
EREG(accel_state->ib, CB_TARGET_MASK, (pmask << TARGET0_ENABLE_shift));
- EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[accel_state->rop]);
+ EREG(accel_state->ib, CB_COLOR_CONTROL, R600_ROP[accel_state->rop]);
/* Interpolator setup */
/* export tex coord from VS */
@@ -521,10 +521,10 @@ R600DoCopyVline(PixmapPtr pPix)
struct radeon_accel_state *accel_state = info->accel_state;
if (accel_state->vsync)
- cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
- accel_state->vline_crtc,
- accel_state->vline_y1,
- accel_state->vline_y2);
+ r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
+ accel_state->vline_crtc,
+ accel_state->vline_y1,
+ accel_state->vline_y2);
r600_finish_op(pScrn, 16);
}
@@ -535,9 +535,11 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn,
int dstX, int dstY,
int w, int h)
{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
float *vb;
- vb = radeon_vbo_space(pScrn, 16);
+ vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
vb[0] = (float)dstX;
vb[1] = (float)dstY;
@@ -554,7 +556,7 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn,
vb[10] = (float)(srcX + w);
vb[11] = (float)(srcY + h);
- radeon_vbo_commit(pScrn);
+ radeon_vbo_commit(pScrn, &accel_state->vbo);
}
static Bool
@@ -603,7 +605,7 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst,
src_obj.height = pSrc->drawable.height;
src_obj.bpp = pSrc->drawable.bitsPerPixel;
src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
-
+
dst_obj.width = pDst->drawable.width;
dst_obj.height = pDst->drawable.height;
dst_obj.bpp = pDst->drawable.bitsPerPixel;
@@ -1060,7 +1062,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
tex_res.base_level = 0;
tex_res.last_level = 0;
tex_res.perf_modulation = 0;
- set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
+ r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
tex_samp.id = unit;
tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
@@ -1102,7 +1104,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
tex_samp.clamp_z = SQ_TEX_WRAP;
tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
tex_samp.mip_filter = 0; /* no mipmap */
- set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+ r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
if (pPict->transform != 0) {
accel_state->is_transform[unit] = TRUE;
@@ -1132,8 +1134,8 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
}
/* VS alu constants */
- set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
- sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
+ r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
+ sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
return TRUE;
}
@@ -1264,7 +1266,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
if (info->cs) {
mask_obj.offset = 0;
mask_obj.bo = radeon_get_pixmap_bo(pMask);
- } else
+ } else
#endif
{
mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
@@ -1318,39 +1320,39 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
CLEAR (ps_conf);
if (pMask)
- radeon_vbo_check(pScrn, 24);
+ radeon_vbo_check(pScrn, &accel_state->vbo, 24);
else
- radeon_vbo_check(pScrn, 16);
+ radeon_vbo_check(pScrn, &accel_state->vbo, 16);
radeon_cp_start(pScrn);
- set_default_state(pScrn, accel_state->ib);
+ r600_set_default_state(pScrn, accel_state->ib);
- set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
- set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
- set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
R600IBDiscard(pScrn, accel_state->ib);
- radeon_vb_discard(pScrn);
+ radeon_vb_discard(pScrn, &accel_state->vbo);
return FALSE;
}
if (pMask) {
if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
R600IBDiscard(pScrn, accel_state->ib);
- radeon_vb_discard(pScrn);
+ radeon_vb_discard(pScrn, &accel_state->vbo);
return FALSE;
}
} else
accel_state->is_transform[1] = FALSE;
if (pMask) {
- set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
- set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
+ r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
+ r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
} else {
- set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
- set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
+ r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
+ r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
}
/* Shader */
@@ -1359,7 +1361,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
vs_conf.num_gprs = 3;
vs_conf.stack_size = 1;
vs_conf.bo = accel_state->shaders_bo;
- vs_setup (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+ r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
ps_conf.shader_addr = accel_state->ps_mc_addr;
ps_conf.shader_size = accel_state->ps_size;
@@ -1369,7 +1371,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
ps_conf.clamp_consts = 0;
ps_conf.export_mode = 2;
ps_conf.bo = accel_state->shaders_bo;
- ps_setup (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+ r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
cb_conf.id = 0;
cb_conf.w = accel_state->dst_obj.pitch;
@@ -1405,7 +1407,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
}
cb_conf.source_format = 1;
cb_conf.blend_clamp = 1;
- set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
+ r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
BEGIN_BATCH(24);
EREG(accel_state->ib, CB_TARGET_MASK, (0xf << TARGET0_ENABLE_shift));
@@ -1414,10 +1416,10 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
if (info->ChipFamily == CHIP_FAMILY_R600) {
/* no per-MRT blend on R600 */
- EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
+ EREG(accel_state->ib, CB_COLOR_CONTROL, R600_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
EREG(accel_state->ib, CB_BLEND_CONTROL, blendcntl);
} else {
- EREG(accel_state->ib, CB_COLOR_CONTROL, (RADEON_ROP[3] |
+ EREG(accel_state->ib, CB_COLOR_CONTROL, (R600_ROP[3] |
(1 << TARGET_BLEND_ENABLE_shift) |
PER_MRT_BLEND_bit));
EREG(accel_state->ib, CB_BLEND0_CONTROL, blendcntl);
@@ -1484,7 +1486,7 @@ static void R600Composite(PixmapPtr pDst,
if (accel_state->msk_pic) {
- vb = radeon_vbo_space(pScrn, 24);
+ vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
vb[0] = (float)dstX;
vb[1] = (float)dstY;
@@ -1507,11 +1509,11 @@ static void R600Composite(PixmapPtr pDst,
vb[16] = (float)(maskX + w);
vb[17] = (float)(maskY + h);
- radeon_vbo_commit(pScrn);
+ radeon_vbo_commit(pScrn, &accel_state->vbo);
} else {
- vb = radeon_vbo_space(pScrn, 16);
+ vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
vb[0] = (float)dstX;
vb[1] = (float)dstY;
@@ -1528,7 +1530,7 @@ static void R600Composite(PixmapPtr pDst,
vb[10] = (float)(srcX + w);
vb[11] = (float)(srcY + h);
- radeon_vbo_commit(pScrn);
+ radeon_vbo_commit(pScrn, &accel_state->vbo);
}
@@ -1542,10 +1544,10 @@ static void R600DoneComposite(PixmapPtr pDst)
int vtx_size;
if (accel_state->vsync)
- cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
- accel_state->vline_crtc,
- accel_state->vline_y1,
- accel_state->vline_y2);
+ r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
+ accel_state->vline_crtc,
+ accel_state->vline_y1,
+ accel_state->vline_y2);
vtx_size = accel_state->msk_pic ? 24 : 16;
@@ -1642,7 +1644,7 @@ R600CopyToVRAM(ScrnInfoPtr pScrn,
}
R600IBDiscard(pScrn, scratch);
- radeon_vb_discard(pScrn);
+ radeon_vb_discard(pScrn, &accel_state->vbo);
return TRUE;
}
@@ -1756,7 +1758,7 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
}
R600IBDiscard(pScrn, scratch);
- radeon_vb_discard(pScrn);
+ radeon_vb_discard(pScrn, &accel_state->vbo);
return TRUE;
@@ -1941,7 +1943,7 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
src_obj.bpp = bpp;
src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
src_obj.bo = radeon_get_pixmap_bo(pSrc);
-
+
dst_obj.pitch = dst_pitch_hw;
dst_obj.width = w;
dst_obj.height = h;
@@ -2263,9 +2265,9 @@ R600DrawInit(ScreenPtr pScreen)
info->accel_state->src_obj[1].bo = NULL;
info->accel_state->dst_obj.bo = NULL;
info->accel_state->copy_area_bo = NULL;
- info->accel_state->vb_start_op = -1;
+ info->accel_state->vbo.vb_start_op = -1;
info->accel_state->finish_op = r600_finish_op;
- info->accel_state->verts_per_op = 3;
+ info->accel_state->vbo.verts_per_op = 3;
RADEONVlineHelperClear(pScrn);
#ifdef XF86DRM_MODE
diff --git a/src/r600_state.h b/src/r600_state.h
index 151f402a..1e8dea3e 100644
--- a/src/r600_state.h
+++ b/src/r600_state.h
@@ -274,48 +274,46 @@ do { \
void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib);
void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib);
-uint64_t
-upload (ScrnInfoPtr pScrn, void *shader, int size, int offset);
void
-wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib);
+r600_wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib);
void
-wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib);
+r600_wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib);
void
-start_3d(ScrnInfoPtr pScrn, drmBufPtr ib);
+r600_start_3d(ScrnInfoPtr pScrn, drmBufPtr ib);
void
-set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain);
+r600_set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain);
void
-cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop);
+r600_cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop);
void
-fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain);
+r600_fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain);
void
-vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain);
+r600_vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain);
void
-ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain);
+r600_ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain);
void
-set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf);
+r600_set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf);
void
-set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val);
+r600_set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val);
void
-set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain);
+r600_set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain);
void
-set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s);
+r600_set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s);
void
-set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
+r600_set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
void
-set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2);
+r600_set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2);
void
-set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
+r600_set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
void
-set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
+r600_set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
void
-set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2);
+r600_set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2);
void
-set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib);
+r600_set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib);
void
-draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices);
+r600_draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices);
void
-draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf);
+r600_draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf);
void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size);
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index e18a9c82..88073ac5 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -206,25 +206,25 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
dstyoff = 0;
#endif
- radeon_vbo_check(pScrn, 16);
+ radeon_vbo_check(pScrn, &accel_state->vbo, 16);
radeon_cp_start(pScrn);
- set_default_state(pScrn, accel_state->ib);
+ r600_set_default_state(pScrn, accel_state->ib);
- set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
- set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
- set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+ r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
/* PS bool constant */
switch(pPriv->id) {
case FOURCC_YV12:
case FOURCC_I420:
- set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
+ r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
break;
case FOURCC_UYVY:
case FOURCC_YUY2:
default:
- set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
+ r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
break;
}
@@ -234,7 +234,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
vs_conf.num_gprs = 2;
vs_conf.stack_size = 0;
vs_conf.bo = accel_state->shaders_bo;
- vs_setup (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+ r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
ps_conf.shader_addr = accel_state->ps_mc_addr;
ps_conf.shader_size = accel_state->ps_size;
@@ -244,11 +244,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
ps_conf.clamp_consts = 0;
ps_conf.export_mode = 2;
ps_conf.bo = accel_state->shaders_bo;
- ps_setup (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+ r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
/* PS alu constants */
- set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
- sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+ r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
+ sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
/* Texture */
switch(pPriv->id) {
@@ -280,7 +280,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_res.last_level = 0;
tex_res.perf_modulation = 0;
tex_res.interlaced = 0;
- set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
+ r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
/* Y sampler */
tex_samp.id = 0;
@@ -294,7 +294,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
tex_samp.mip_filter = 0; /* no mipmap */
- set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+ r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
/* U or V texture */
tex_res.id = 1;
@@ -311,11 +311,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_res.base = accel_state->src_obj[0].offset + pPriv->planev_offset;
tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planev_offset;
tex_res.size = accel_state->src_size[0] / 4;
- set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
+ r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
/* U or V sampler */
tex_samp.id = 1;
- set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+ r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
/* U or V texture */
tex_res.id = 2;
@@ -332,11 +332,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_res.base = accel_state->src_obj[0].offset + pPriv->planeu_offset;
tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planeu_offset;
tex_res.size = accel_state->src_size[0] / 4;
- set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
+ r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
/* UV sampler */
tex_samp.id = 2;
- set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+ r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
break;
case FOURCC_UYVY:
case FOURCC_YUY2:
@@ -370,7 +370,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_res.last_level = 0;
tex_res.perf_modulation = 0;
tex_res.interlaced = 0;
- set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
+ r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
/* Y sampler */
tex_samp.id = 0;
@@ -384,7 +384,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
tex_samp.mip_filter = 0; /* no mipmap */
- set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+ r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
/* UV texture */
tex_res.id = 1;
@@ -406,11 +406,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_res.base = accel_state->src_obj[0].offset;
tex_res.mip_base = accel_state->src_obj[0].offset;
tex_res.size = accel_state->src_size[0];
- set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
+ r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
/* UV sampler */
tex_samp.id = 1;
- set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+ r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
break;
}
@@ -440,7 +440,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
cb_conf.source_format = 1;
cb_conf.blend_clamp = 1;
- set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
+ r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
/* Render setup */
BEGIN_BATCH(20);
@@ -469,8 +469,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
vs_alu_consts[3] = 0.0;
/* VS alu constants */
- set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs,
- sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
+ r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs,
+ sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
if (pPriv->vsync) {
xf86CrtcPtr crtc;
@@ -483,10 +483,10 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
pPriv->drw_y,
pPriv->drw_y + pPriv->dst_h);
if (crtc)
- cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap,
- crtc,
- pPriv->drw_y - crtc->y,
- (pPriv->drw_y - crtc->y) + pPriv->dst_h);
+ r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap,
+ crtc,
+ pPriv->drw_y - crtc->y,
+ (pPriv->drw_y - crtc->y) + pPriv->dst_h);
}
while (nBox--) {
@@ -510,7 +510,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
srch = (pPriv->src_h * dsth) / pPriv->dst_h;
- vb = radeon_vbo_space(pScrn, 16);
+ vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
vb[0] = (float)dstX;
vb[1] = (float)dstY;
@@ -527,7 +527,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
vb[10] = (float)(srcX + srcw);
vb[11] = (float)(srcY + srch);
- radeon_vbo_commit(pScrn);
+ radeon_vbo_commit(pScrn, &accel_state->vbo);
pBox++;
}
diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c
index ff7dfda3..e4365141 100644
--- a/src/r6xx_accel.c
+++ b/src/r6xx_accel.c
@@ -98,7 +98,7 @@ void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib)
}
void
-wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
+r600_wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -113,7 +113,7 @@ wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
}
void
-wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
+r600_wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -123,7 +123,7 @@ wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
}
void
-start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
+r600_start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -147,7 +147,7 @@ start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
// asic stack/thread/gpr limits - need to query the drm
static void
-sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
+r600_sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
{
uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
@@ -198,7 +198,7 @@ sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
}
void
-set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain)
+r600_set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain)
{
uint32_t cb_color_info;
int pitch, slice, h;
@@ -276,8 +276,9 @@ set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_
}
static void
-cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr,
- struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
+r600_cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type,
+ uint32_t size, uint64_t mc_addr,
+ struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
uint32_t cp_coher_size;
@@ -297,7 +298,8 @@ cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_
}
/* inserts a wait for vline in the command stream */
-void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix,
+void
+r600_cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix,
xf86CrtcPtr crtc, int start, int stop)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -380,7 +382,7 @@ void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix,
}
void
-fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain)
+r600_fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
uint32_t sq_pgm_resources;
@@ -403,7 +405,7 @@ fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t dom
}
void
-vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain)
+r600_vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
uint32_t sq_pgm_resources;
@@ -419,9 +421,9 @@ vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t dom
sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
/* flush SQ cache */
- cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit,
- vs_conf->shader_size, vs_conf->shader_addr,
- vs_conf->bo, domain, 0);
+ r600_cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit,
+ vs_conf->shader_size, vs_conf->shader_addr,
+ vs_conf->bo, domain, 0);
BEGIN_BATCH(3 + 2);
EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
@@ -435,7 +437,7 @@ vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t dom
}
void
-ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain)
+r600_ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
uint32_t sq_pgm_resources;
@@ -453,9 +455,9 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t dom
sq_pgm_resources |= CLAMP_CONSTS_bit;
/* flush SQ cache */
- cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit,
- ps_conf->shader_size, ps_conf->shader_addr,
- ps_conf->bo, domain, 0);
+ r600_cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit,
+ ps_conf->shader_size, ps_conf->shader_addr,
+ ps_conf->bo, domain, 0);
BEGIN_BATCH(3 + 2);
EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
@@ -470,7 +472,7 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t dom
}
void
-set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf)
+r600_set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
int i;
@@ -484,7 +486,7 @@ set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *co
}
void
-set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
+r600_set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
/* bool register order is: ps, vs, gs; one register each
@@ -496,7 +498,7 @@ set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
}
static void
-set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t domain)
+r600_set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t domain)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
struct radeon_accel_state *accel_state = info->accel_state;
@@ -522,15 +524,15 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t
(info->ChipFamily == CHIP_FAMILY_RS780) ||
(info->ChipFamily == CHIP_FAMILY_RS880) ||
(info->ChipFamily == CHIP_FAMILY_RV710))
- cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit,
- accel_state->vb_offset, accel_state->vb_mc_addr,
- res->bo,
- domain, 0);
+ r600_cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit,
+ accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
+ res->bo,
+ domain, 0);
else
- cp_set_surface_sync(pScrn, ib, VC_ACTION_ENA_bit,
- accel_state->vb_offset, accel_state->vb_mc_addr,
- res->bo,
- domain, 0);
+ r600_cp_set_surface_sync(pScrn, ib, VC_ACTION_ENA_bit,
+ accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
+ res->bo,
+ domain, 0);
BEGIN_BATCH(9 + 2);
PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
@@ -546,7 +548,7 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t
}
void
-set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain)
+r600_set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
@@ -599,9 +601,9 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint3
sq_tex_resource_word6 |= INTERLACED_bit;
/* flush texture cache */
- cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit,
- tex_res->size, tex_res->base,
- tex_res->bo, domain, 0);
+ r600_cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit,
+ tex_res->size, tex_res->base,
+ tex_res->bo, domain, 0);
BEGIN_BATCH(9 + 4);
PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
@@ -618,7 +620,7 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint3
}
void
-set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
+r600_set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
@@ -670,7 +672,7 @@ set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
//XXX deal with clip offsets in clip setup
void
-set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
+r600_set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -684,7 +686,7 @@ set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int
}
void
-set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
+r600_set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -699,7 +701,7 @@ set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x
}
void
-set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
+r600_set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -714,7 +716,7 @@ set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int
}
void
-set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
+r600_set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -729,7 +731,7 @@ set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int
}
void
-set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
+r600_set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -747,7 +749,7 @@ set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, i
*/
void
-set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
+r600_set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
{
tex_resource_t tex_res;
shader_config_t fs_conf;
@@ -764,7 +766,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
accel_state->XInited3D = TRUE;
- start_3d(pScrn, accel_state->ib);
+ r600_start_3d(pScrn, accel_state->ib);
// SQ
sq_conf.ps_prio = 0;
@@ -888,7 +890,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
break;
}
- sq_setup(pScrn, ib, &sq_conf);
+ r600_sq_setup(pScrn, ib, &sq_conf);
/* set fake reloc for unused depth */
BEGIN_BATCH(3 + 2);
@@ -992,10 +994,10 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
/* clip boolean is set to always visible -> doesn't matter */
for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
- set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192);
+ r600_set_clip_rect(pScrn, ib, i, 0, 0, 8192, 8192);
for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
- set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192);
+ r600_set_vport_scissor(pScrn, ib, i, 0, 0, 8192, 8192);
BEGIN_BATCH(42);
PACK0(ib, PA_SC_MPASS_PS_CNTL, 2);
@@ -1051,7 +1053,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
// clear FS
fs_conf.bo = accel_state->shaders_bo;
- fs_setup(pScrn, ib, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
+ r600_fs_setup(pScrn, ib, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
// VGT
BEGIN_BATCH(43);
@@ -1102,7 +1104,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
*/
void
-draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices)
+r600_draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
uint32_t i, count;
@@ -1140,7 +1142,7 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i
}
void
-draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf)
+r600_draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -1163,27 +1165,27 @@ void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
draw_config_t draw_conf;
vtx_resource_t vtx_res;
- if (accel_state->vb_start_op == -1)
+ if (accel_state->vbo.vb_start_op == -1)
return;
CLEAR (draw_conf);
CLEAR (vtx_res);
- if (accel_state->vb_offset == accel_state->vb_start_op) {
+ if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
R600IBDiscard(pScrn, accel_state->ib);
- radeon_vb_discard(pScrn);
+ radeon_vb_discard(pScrn, &accel_state->vbo);
return;
}
/* Vertex buffer setup */
- accel_state->vb_size = accel_state->vb_offset - accel_state->vb_start_op;
+ accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
vtx_res.id = SQ_VTX_RESOURCE_vs;
vtx_res.vtx_size_dw = vtx_size / 4;
- vtx_res.vtx_num_entries = accel_state->vb_size / 4;
+ vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
vtx_res.mem_req_size = 1;
- vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op;
- vtx_res.bo = accel_state->vb_bo;
- set_vtx_resource (pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT);
+ vtx_res.vb_addr = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op;
+ vtx_res.bo = accel_state->vbo.vb_bo;
+ r600_set_vtx_resource(pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT);
/* Draw */
draw_conf.prim_type = DI_PT_RECTLIST;
@@ -1192,17 +1194,17 @@ void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
- draw_auto(pScrn, accel_state->ib, &draw_conf);
+ r600_draw_auto(pScrn, accel_state->ib, &draw_conf);
/* XXX drm should handle this in fence submit */
- wait_3d_idle_clean(pScrn, accel_state->ib);
+ r600_wait_3d_idle_clean(pScrn, accel_state->ib);
/* sync dst surface */
- cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
- accel_state->dst_size, accel_state->dst_obj.offset,
- accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
+ r600_cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
+ accel_state->dst_size, accel_state->dst_obj.offset,
+ accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
- accel_state->vb_start_op = -1;
+ accel_state->vbo.vb_start_op = -1;
accel_state->ib_reset_op = 0;
#if KMS_MULTI_OP
diff --git a/src/radeon.h b/src/radeon.h
index 7a3f5b66..6d5a282c 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -387,6 +387,8 @@ typedef enum {
#define IS_DCE4_VARIANT ((info->ChipFamily >= CHIP_FAMILY_CEDAR))
+#define IS_EVERGREEN_3D (info->ChipFamily >= CHIP_FAMILY_CEDAR)
+
#define IS_R600_3D (info->ChipFamily >= CHIP_FAMILY_R600)
#define IS_R500_3D ((info->ChipFamily == CHIP_FAMILY_RV515) || \
@@ -674,6 +676,18 @@ struct r600_accel_object {
struct radeon_bo *bo;
};
+struct radeon_vbo_object {
+ int vb_offset;
+ uint64_t vb_mc_addr;
+ int vb_total;
+ void *vb_ptr;
+ uint32_t vb_size;
+ uint32_t vb_op_vert_size;
+ int32_t vb_start_op;
+ struct radeon_bo *vb_bo;
+ unsigned verts_per_op;
+};
+
struct radeon_accel_state {
/* common accel data */
int fifo_slots; /* Free slots in the FIFO (64 max) */
@@ -721,20 +735,15 @@ struct radeon_accel_state {
uint32_t *draw_header;
unsigned vtx_count;
unsigned num_vtx;
- unsigned verts_per_op;
Bool vsync;
drmBufPtr ib;
- int vb_offset;
- uint64_t vb_mc_addr;
- int vb_total;
- void *vb_ptr;
- uint32_t vb_size;
- uint32_t vb_op_vert_size;
- int32_t vb_start_op;
+
+ struct radeon_vbo_object vbo;
+ struct radeon_vbo_object cbuf;
+
/* where to discard IB from if we cancel operation */
uint32_t ib_reset_op;
- struct radeon_bo *vb_bo;
#ifdef XF86DRM_MODE
struct radeon_dma_bo bo_free;
struct radeon_dma_bo bo_wait;
@@ -753,6 +762,16 @@ struct radeon_accel_state {
uint32_t comp_ps_offset;
uint32_t xv_vs_offset;
uint32_t xv_ps_offset;
+ // shader consts
+ uint32_t solid_vs_const_offset;
+ uint32_t solid_ps_const_offset;
+ uint32_t copy_vs_const_offset;
+ uint32_t copy_ps_const_offset;
+ uint32_t comp_vs_const_offset;
+ uint32_t comp_ps_const_offset;
+ uint32_t comp_mask_ps_const_offset;
+ uint32_t xv_vs_const_offset;
+ uint32_t xv_ps_const_offset;
//size/addr stuff
struct r600_accel_object src_obj[2];
@@ -1274,6 +1293,8 @@ extern void RADEONDoPrepareCopyMMIO(ScrnInfoPtr pScrn,
Pixel planemask);
extern Bool R600DrawInit(ScreenPtr pScreen);
extern Bool R600LoadShaders(ScrnInfoPtr pScrn);
+extern Bool EVERGREENDrawInit(ScreenPtr pScreen);
+extern Bool EVERGREENLoadShaders(ScrnInfoPtr pScrn);
#endif
#if defined(XF86DRI) && defined(USE_EXA)
diff --git a/src/radeon_accel.c b/src/radeon_accel.c
index 281bc6d4..8fc515d1 100644
--- a/src/radeon_accel.c
+++ b/src/radeon_accel.c
@@ -1072,7 +1072,10 @@ Bool RADEONAccelInit(ScreenPtr pScreen)
if (info->useEXA) {
# ifdef XF86DRI
if (info->directRenderingEnabled) {
- if (info->ChipFamily >= CHIP_FAMILY_R600) {
+ if (info->ChipFamily >= CHIP_FAMILY_CEDAR) {
+ if (!EVERGREENDrawInit(pScreen))
+ return FALSE;
+ } else if (info->ChipFamily >= CHIP_FAMILY_R600) {
if (!R600DrawInit(pScreen))
return FALSE;
} else {
diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c
index ed7fdd68..3a315a44 100644
--- a/src/radeon_dri2.c
+++ b/src/radeon_dri2.c
@@ -72,7 +72,7 @@ radeon_dri2_create_buffers(DrawablePtr drawable,
struct dri2_buffer_priv *privates;
PixmapPtr pixmap, depth_pixmap;
struct radeon_exa_pixmap_priv *driver_priv;
- int i, r;
+ int i, r, need_enlarge = 0;
int flags = 0;
buffers = calloc(count, sizeof *buffers);
@@ -101,7 +101,6 @@ radeon_dri2_create_buffers(DrawablePtr drawable,
/* tile the back buffer */
switch(attachments[i]) {
case DRI2BufferDepth:
- case DRI2BufferDepthStencil:
if (info->ChipFamily >= CHIP_FAMILY_R600)
/* macro is the preferred setting, but the 2D detiling for software
* fallbacks in mesa still has issues on some configurations
@@ -110,6 +109,17 @@ radeon_dri2_create_buffers(DrawablePtr drawable,
else
flags = RADEON_CREATE_PIXMAP_TILING_MACRO | RADEON_CREATE_PIXMAP_TILING_MICRO;
break;
+ case DRI2BufferDepthStencil:
+ if (info->ChipFamily >= CHIP_FAMILY_R600) {
+ /* macro is the preferred setting, but the 2D detiling for software
+ * fallbacks in mesa still has issues on some configurations
+ */
+ flags = RADEON_CREATE_PIXMAP_TILING_MICRO;
+ if (info->ChipFamily >= CHIP_FAMILY_CEDAR)
+ need_enlarge = 1;
+ } else
+ flags = RADEON_CREATE_PIXMAP_TILING_MACRO | RADEON_CREATE_PIXMAP_TILING_MICRO;
+ break;
case DRI2BufferBackLeft:
case DRI2BufferBackRight:
case DRI2BufferFakeFrontLeft:
@@ -125,11 +135,31 @@ radeon_dri2_create_buffers(DrawablePtr drawable,
default:
flags = 0;
}
- pixmap = (*pScreen->CreatePixmap)(pScreen,
- drawable->width,
- drawable->height,
- drawable->depth,
- flags);
+
+ if (need_enlarge) {
+ /* evergreen uses separate allocations for depth and stencil
+ * so we make an extra large depth buffer to cover stencil
+ * as well.
+ */
+ int pitch = drawable->width * (drawable->depth / 8);
+ int aligned_height = (drawable->height + 7) & ~7;
+ int size = pitch * aligned_height;
+ size = (size + 255) & ~255;
+ size += drawable->width * aligned_height;
+ aligned_height = ((size / pitch) + 7) & ~7;
+
+ pixmap = (*pScreen->CreatePixmap)(pScreen,
+ drawable->width,
+ aligned_height,
+ drawable->depth,
+ flags);
+
+ } else
+ pixmap = (*pScreen->CreatePixmap)(pScreen,
+ drawable->width,
+ drawable->height,
+ drawable->depth,
+ flags);
}
if (attachments[i] == DRI2BufferDepth) {
@@ -166,7 +196,7 @@ radeon_dri2_create_buffer(DrawablePtr drawable,
struct dri2_buffer_priv *privates;
PixmapPtr pixmap, depth_pixmap;
struct radeon_exa_pixmap_priv *driver_priv;
- int r;
+ int r, need_enlarge = 0;
int flags;
buffers = calloc(1, sizeof *buffers);
@@ -195,7 +225,6 @@ radeon_dri2_create_buffer(DrawablePtr drawable,
/* tile the back buffer */
switch(attachment) {
case DRI2BufferDepth:
- case DRI2BufferDepthStencil:
/* macro is the preferred setting, but the 2D detiling for software
* fallbacks in mesa still has issues on some configurations
*/
@@ -204,6 +233,17 @@ radeon_dri2_create_buffer(DrawablePtr drawable,
else
flags = RADEON_CREATE_PIXMAP_TILING_MACRO | RADEON_CREATE_PIXMAP_TILING_MICRO;
break;
+ case DRI2BufferDepthStencil:
+ /* macro is the preferred setting, but the 2D detiling for software
+ * fallbacks in mesa still has issues on some configurations
+ */
+ if (info->ChipFamily >= CHIP_FAMILY_R600) {
+ flags = RADEON_CREATE_PIXMAP_TILING_MICRO;
+ if (info->ChipFamily >= CHIP_FAMILY_CEDAR)
+ need_enlarge = 1;
+ } else
+ flags = RADEON_CREATE_PIXMAP_TILING_MACRO | RADEON_CREATE_PIXMAP_TILING_MICRO;
+ break;
case DRI2BufferBackLeft:
case DRI2BufferBackRight:
case DRI2BufferFakeFrontLeft:
@@ -219,11 +259,32 @@ radeon_dri2_create_buffer(DrawablePtr drawable,
default:
flags = 0;
}
- pixmap = (*pScreen->CreatePixmap)(pScreen,
- drawable->width,
- drawable->height,
- (format != 0)?format:drawable->depth,
- flags);
+
+ if (need_enlarge) {
+ /* evergreen uses separate allocations for depth and stencil
+ * so we make an extra large depth buffer to cover stencil
+ * as well.
+ */
+ int depth = (format != 0) ? format : drawable->depth;
+ int pitch = drawable->width * (depth / 8);
+ int aligned_height = (drawable->height + 7) & ~7;
+ int size = pitch * aligned_height;
+ size = (size + 255) & ~255;
+ size += drawable->width * aligned_height;
+ aligned_height = ((size / pitch) + 7) & ~7;
+
+ pixmap = (*pScreen->CreatePixmap)(pScreen,
+ drawable->width,
+ aligned_height,
+ (format != 0)?format:drawable->depth,
+ flags);
+
+ } else
+ pixmap = (*pScreen->CreatePixmap)(pScreen,
+ drawable->width,
+ drawable->height,
+ (format != 0)?format:drawable->depth,
+ flags);
}
if (attachment == DRI2BufferDepth) {
diff --git a/src/radeon_exa_shared.c b/src/radeon_exa_shared.c
index d1926f4e..2ef07511 100644
--- a/src/radeon_exa_shared.c
+++ b/src/radeon_exa_shared.c
@@ -131,21 +131,19 @@ static Bool radeon_vb_get(ScrnInfoPtr pScrn)
RADEONInfoPtr info = RADEONPTR(pScrn);
struct radeon_accel_state *accel_state = info->accel_state;
- accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
+ accel_state->vbo.vb_mc_addr = info->gartLocation + info->dri->bufStart +
(accel_state->ib->idx*accel_state->ib->total)+
(accel_state->ib->total / 2);
- accel_state->vb_total = (accel_state->ib->total / 2);
- accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address +
+ accel_state->vbo.vb_total = (accel_state->ib->total / 2);
+ accel_state->vbo.vb_ptr = (pointer)((char*)accel_state->ib->address +
(accel_state->ib->total / 2));
- accel_state->vb_offset = 0;
+ accel_state->vbo.vb_offset = 0;
return TRUE;
}
-void radeon_vb_discard(ScrnInfoPtr pScrn)
+void radeon_vb_discard(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo)
{
- RADEONInfoPtr info = RADEONPTR(pScrn);
-
- info->accel_state->vb_start_op = -1;
+ vbo->vb_start_op = -1;
}
int radeon_cp_start(ScrnInfoPtr pScrn)
@@ -159,7 +157,6 @@ int radeon_cp_start(ScrnInfoPtr pScrn)
radeon_cs_flush_indirect(pScrn);
}
accel_state->ib_reset_op = info->cs->cdw;
- accel_state->vb_start_op = accel_state->vb_offset;
} else
#endif
{
@@ -167,33 +164,36 @@ int radeon_cp_start(ScrnInfoPtr pScrn)
if (!radeon_vb_get(pScrn)) {
return -1;
}
- accel_state->vb_start_op = accel_state->vb_offset;
}
+ accel_state->vbo.vb_start_op = accel_state->vbo.vb_offset;
+ accel_state->cbuf.vb_start_op = accel_state->cbuf.vb_offset;
return 0;
}
-void radeon_vb_no_space(ScrnInfoPtr pScrn, int vert_size)
+void radeon_vb_no_space(ScrnInfoPtr pScrn,
+ struct radeon_vbo_object *vbo,
+ int vert_size)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
- struct radeon_accel_state *accel_state = info->accel_state;
+ struct radeon_accel_state *accel_state = info->accel_state;
#if defined(XF86DRM_MODE)
if (info->cs) {
- if (accel_state->vb_bo) {
- if (accel_state->vb_start_op != accel_state->vb_offset) {
+ if (vbo->vb_bo) {
+ if (vbo->vb_start_op != vbo->vb_offset) {
accel_state->finish_op(pScrn, vert_size);
accel_state->ib_reset_op = info->cs->cdw;
}
/* release the current VBO */
- radeon_vbo_put(pScrn);
+ radeon_vbo_put(pScrn, vbo);
}
/* get a new one */
- radeon_vbo_get(pScrn);
+ radeon_vbo_get(pScrn, vbo);
return;
}
#endif
- if (accel_state->vb_start_op != -1) {
+ if (vbo->vb_start_op != -1) {
accel_state->finish_op(pScrn, vert_size);
radeon_cp_start(pScrn);
}
@@ -213,8 +213,10 @@ void radeon_ib_discard(ScrnInfoPtr pScrn)
goto out;
}
- info->accel_state->vb_offset = 0;
- info->accel_state->vb_start_op = -1;
+ info->accel_state->vbo.vb_offset = 0;
+ info->accel_state->vbo.vb_start_op = -1;
+ info->accel_state->cbuf.vb_offset = 0;
+ info->accel_state->cbuf.vb_start_op = -1;
if (CS_FULL(info->cs)) {
radeon_cs_flush_indirect(pScrn);
@@ -222,11 +224,19 @@ void radeon_ib_discard(ScrnInfoPtr pScrn)
}
radeon_cs_erase(info->cs);
ret = radeon_cs_space_check_with_bo(info->cs,
- info->accel_state->vb_bo,
+ info->accel_state->vbo.vb_bo,
RADEON_GEM_DOMAIN_GTT, 0);
if (ret)
ErrorF("space check failed in flush\n");
+ if (info->accel_state->cbuf.vb_bo) {
+ ret = radeon_cs_space_check_with_bo(info->cs,
+ info->accel_state->cbuf.vb_bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ ErrorF("space check failed in flush\n");
+ }
+
out:
if (info->dri2.enabled) {
info->accel_state->XInited3D = FALSE;
diff --git a/src/radeon_exa_shared.h b/src/radeon_exa_shared.h
index 71068b12..489e3b0e 100644
--- a/src/radeon_exa_shared.h
+++ b/src/radeon_exa_shared.h
@@ -72,9 +72,9 @@ static inline void radeon_add_pixmap(struct radeon_cs *cs, PixmapPtr pPix, int r
extern void radeon_ib_discard(ScrnInfoPtr pScrn);
#endif /* XF86DRM_MODE */
-extern void radeon_vb_discard(ScrnInfoPtr pScrn);
+extern void radeon_vb_discard(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo);
extern int radeon_cp_start(ScrnInfoPtr pScrn);
-extern void radeon_vb_no_space(ScrnInfoPtr pScrn, int vert_size);
+extern void radeon_vb_no_space(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo, int vert_size);
extern void radeon_vbo_done_composite(PixmapPtr pDst);
#endif
diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index 0d6055dd..b94544e8 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -83,9 +83,15 @@ void radeon_cs_flush_indirect(ScrnInfoPtr pScrn)
return;
/* release the current VBO so we don't block on mapping it later */
- if (info->accel_state->vb_offset && info->accel_state->vb_bo) {
- radeon_vbo_put(pScrn);
- info->accel_state->vb_start_op = -1;
+ if (info->accel_state->vbo.vb_offset && info->accel_state->vbo.vb_bo) {
+ radeon_vbo_put(pScrn, &info->accel_state->vbo);
+ info->accel_state->vbo.vb_start_op = -1;
+ }
+
+ /* release the current VBO so we don't block on mapping it later */
+ if (info->accel_state->cbuf.vb_offset && info->accel_state->cbuf.vb_bo) {
+ radeon_vbo_put(pScrn, &info->accel_state->cbuf);
+ info->accel_state->cbuf.vb_start_op = -1;
}
radeon_cs_emit(info->cs);
@@ -95,11 +101,19 @@ void radeon_cs_flush_indirect(ScrnInfoPtr pScrn)
radeon_vbo_flush_bos(pScrn);
ret = radeon_cs_space_check_with_bo(info->cs,
- accel_state->vb_bo,
+ accel_state->vbo.vb_bo,
RADEON_GEM_DOMAIN_GTT, 0);
if (ret)
ErrorF("space check failed in flush\n");
+ if (accel_state->cbuf.vb_bo) {
+ ret = radeon_cs_space_check_with_bo(info->cs,
+ accel_state->cbuf.vb_bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ ErrorF("space check failed in flush\n");
+ }
+
if (info->reemit_current2d && info->state_2d.op)
info->reemit_current2d(pScrn, info->state_2d.op);
@@ -211,8 +225,18 @@ static Bool RADEONIsAccelWorking(ScrnInfoPtr pScrn)
int r;
uint32_t tmp;
+#ifndef RADEON_INFO_ACCEL_WORKING
+#define RADEON_INFO_ACCEL_WORKING 0x03
+#endif
+#ifndef RADEON_INFO_ACCEL_WORKING2
+#define RADEON_INFO_ACCEL_WORKING2 0x05
+#endif
+
memset(&ginfo, 0, sizeof(ginfo));
- ginfo.request = 0x3;
+ if (info->dri->pKernelDRMVersion->version_minor >= 5)
+ ginfo.request = RADEON_INFO_ACCEL_WORKING2;
+ else
+ ginfo.request = RADEON_INFO_ACCEL_WORKING;
ginfo.value = (uintptr_t)&tmp;
r = drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &ginfo, sizeof(ginfo));
if (r) {
@@ -239,7 +263,6 @@ static Bool RADEONPreInitAccel_KMS(ScrnInfoPtr pScrn)
}
if (xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE) ||
- (info->ChipFamily >= CHIP_FAMILY_CEDAR) ||
(!RADEONIsAccelWorking(pScrn))) {
xf86DrvMsg(pScrn->scrnIndex, X_INFO,
"GPU accel disabled or not working, using shadowfb for KMS\n");
diff --git a/src/radeon_reg.h b/src/radeon_reg.h
index 377c26bf..e61c29d2 100644
--- a/src/radeon_reg.h
+++ b/src/radeon_reg.h
@@ -4242,6 +4242,12 @@
#define EVERGREEN_DATA_FORMAT 0x6b00
# define EVERGREEN_INTERLEAVE_EN (1 << 0)
#define EVERGREEN_DESKTOP_HEIGHT 0x6b04
+#define EVERGREEN_VLINE_START_END 0x6b08
+# define EVERGREEN_VLINE_START_SHIFT 0
+# define EVERGREEN_VLINE_END_SHIFT 16
+# define EVERGREEN_VLINE_INV (1 << 31)
+#define EVERGREEN_VLINE_STATUS 0x6bb8
+# define EVERGREEN_VLINE_STAT (1 << 12)
#define EVERGREEN_VIEWPORT_START 0x6d70
#define EVERGREEN_VIEWPORT_SIZE 0x6d74
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index c19066b6..f6828111 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -36,7 +36,6 @@
#include "radeon.h"
#include "radeon_reg.h"
-#include "r600_reg.h"
#include "radeon_macros.h"
#include "radeon_probe.h"
#include "radeon_video.h"
@@ -47,6 +46,9 @@
extern void
R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv);
+extern void
+EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv);
+
extern Bool
R600CopyToVRAM(ScrnInfoPtr pScrn,
char *src, int src_pitch,
@@ -473,7 +475,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
#endif
#ifdef XF86DRI
if (info->directRenderingEnabled) {
- if (IS_R600_3D)
+ if (IS_EVERGREEN_3D)
+ EVERGREENDisplayTexturedVideo(pScrn, pPriv);
+ else if (IS_R600_3D)
R600DisplayTexturedVideo(pScrn, pPriv);
else if (IS_R500_3D)
R500DisplayTexturedVideoCP(pScrn, pPriv);
diff --git a/src/radeon_vbo.c b/src/radeon_vbo.c
index 0735540d..c0a668f6 100644
--- a/src/radeon_vbo.c
+++ b/src/radeon_vbo.c
@@ -41,31 +41,27 @@
static struct radeon_bo *radeon_vbo_get_bo(ScrnInfoPtr pScrn);
-void radeon_vbo_put(ScrnInfoPtr pScrn)
+void radeon_vbo_put(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo)
{
- RADEONInfoPtr info = RADEONPTR(pScrn);
- struct radeon_accel_state *accel_state = info->accel_state;
-
- if (accel_state->vb_bo) {
- radeon_bo_unmap(accel_state->vb_bo);
- radeon_bo_unref(accel_state->vb_bo);
- accel_state->vb_bo = NULL;
- accel_state->vb_total = 0;
+
+ if (vbo->vb_bo) {
+ radeon_bo_unmap(vbo->vb_bo);
+ radeon_bo_unref(vbo->vb_bo);
+ vbo->vb_bo = NULL;
+ vbo->vb_total = 0;
}
- accel_state->vb_offset = 0;
+ vbo->vb_offset = 0;
}
-void radeon_vbo_get(ScrnInfoPtr pScrn)
+void radeon_vbo_get(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo)
{
- RADEONInfoPtr info = RADEONPTR(pScrn);
- struct radeon_accel_state *accel_state = info->accel_state;
- accel_state->vb_bo = radeon_vbo_get_bo(pScrn);
+ vbo->vb_bo = radeon_vbo_get_bo(pScrn);
- accel_state->vb_total = VBO_SIZE;
- accel_state->vb_offset = 0;
- accel_state->vb_start_op = accel_state->vb_offset;
+ vbo->vb_total = VBO_SIZE;
+ vbo->vb_offset = 0;
+ vbo->vb_start_op = vbo->vb_offset;
}
/* these functions could migrate to libdrm and
@@ -80,7 +76,7 @@ static int radeon_bo_is_idle(struct radeon_bo *bo)
void radeon_vbo_init_lists(ScrnInfoPtr pScrn)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
- struct radeon_accel_state *accel_state = info->accel_state;
+ struct radeon_accel_state *accel_state = info->accel_state;
accel_state->use_vbos = TRUE;
make_empty_list(&accel_state->bo_free);
@@ -91,7 +87,7 @@ void radeon_vbo_init_lists(ScrnInfoPtr pScrn)
void radeon_vbo_free_lists(ScrnInfoPtr pScrn)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
- struct radeon_accel_state *accel_state = info->accel_state;
+ struct radeon_accel_state *accel_state = info->accel_state;
struct radeon_dma_bo *dma_bo, *temp;
foreach_s(dma_bo, temp, &accel_state->bo_free) {
@@ -116,7 +112,7 @@ void radeon_vbo_free_lists(ScrnInfoPtr pScrn)
void radeon_vbo_flush_bos(ScrnInfoPtr pScrn)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
- struct radeon_accel_state *accel_state = info->accel_state;
+ struct radeon_accel_state *accel_state = info->accel_state;
struct radeon_dma_bo *dma_bo, *temp;
const int expire_at = ++accel_state->bo_free.expire_counter + DMA_BO_FREE_TIME;
const int time = accel_state->bo_free.expire_counter;
@@ -164,7 +160,7 @@ void radeon_vbo_flush_bos(ScrnInfoPtr pScrn)
static struct radeon_bo *radeon_vbo_get_bo(ScrnInfoPtr pScrn)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
- struct radeon_accel_state *accel_state = info->accel_state;
+ struct radeon_accel_state *accel_state = info->accel_state;
struct radeon_dma_bo *dma_bo = NULL;
struct radeon_bo *bo;
diff --git a/src/radeon_vbo.h b/src/radeon_vbo.h
index b505f66b..21533c2e 100644
--- a/src/radeon_vbo.h
+++ b/src/radeon_vbo.h
@@ -2,39 +2,40 @@
#ifndef RADEON_VBO_H
#define RADEON_VBO_H
-extern void radeon_vb_no_space(ScrnInfoPtr pScrn, int vert_size);
+extern void radeon_vb_no_space(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo, int vert_size);
extern void radeon_vbo_init_lists(ScrnInfoPtr pScrn);
extern void radeon_vbo_free_lists(ScrnInfoPtr pScrn);
extern void radeon_vbo_flush_bos(ScrnInfoPtr pScrn);
-extern void radeon_vbo_get(ScrnInfoPtr pScrn);
-extern void radeon_vbo_put(ScrnInfoPtr pScrn);
+extern void radeon_vbo_get(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo);
+extern void radeon_vbo_put(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo);
-static inline void radeon_vbo_check(ScrnInfoPtr pScrn, int vert_size)
+static inline void radeon_vbo_check(ScrnInfoPtr pScrn,
+ struct radeon_vbo_object *vbo,
+ int vert_size)
{
- RADEONInfoPtr info = RADEONPTR(pScrn);
- struct radeon_accel_state *accel_state = info->accel_state;
- if ((accel_state->vb_offset + (accel_state->verts_per_op * vert_size)) > accel_state->vb_total) {
- radeon_vb_no_space(pScrn, vert_size);
+ if ((vbo->vb_offset + (vbo->verts_per_op * vert_size)) > vbo->vb_total) {
+ radeon_vb_no_space(pScrn, vbo, vert_size);
}
}
static inline void *
-radeon_vbo_space(ScrnInfoPtr pScrn, int vert_size)
+radeon_vbo_space(ScrnInfoPtr pScrn,
+ struct radeon_vbo_object *vbo,
+ int vert_size)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
- struct radeon_accel_state *accel_state = info->accel_state;
void *vb;
-
+
/* we've ran out of space in the vertex buffer - need to get a
new one */
- radeon_vbo_check(pScrn, vert_size);
+ radeon_vbo_check(pScrn, vbo, vert_size);
- accel_state->vb_op_vert_size = vert_size;
+ vbo->vb_op_vert_size = vert_size;
#if defined(XF86DRM_MODE)
if (info->cs) {
int ret;
- struct radeon_bo *bo = accel_state->vb_bo;
+ struct radeon_bo *bo = vbo->vb_bo;
if (!bo->ptr) {
ret = radeon_bo_map(bo, 1);
@@ -43,19 +44,18 @@ radeon_vbo_space(ScrnInfoPtr pScrn, int vert_size)
return NULL;
}
}
- vb = (pointer)((char *)bo->ptr + accel_state->vb_offset);
+ vb = (pointer)((char *)bo->ptr + vbo->vb_offset);
} else
#endif
- vb = (pointer)((char *)accel_state->vb_ptr + accel_state->vb_offset);
+ vb = (pointer)((char *)vbo->vb_ptr + vbo->vb_offset);
return vb;
}
-static inline void radeon_vbo_commit(ScrnInfoPtr pScrn)
+static inline void radeon_vbo_commit(ScrnInfoPtr pScrn,
+ struct radeon_vbo_object *vbo)
{
- RADEONInfoPtr info = RADEONPTR(pScrn);
- struct radeon_accel_state *accel_state = info->accel_state;
- accel_state->vb_offset += accel_state->verts_per_op * accel_state->vb_op_vert_size;
+ vbo->vb_offset += vbo->verts_per_op * vbo->vb_op_vert_size;
}
#endif