summaryrefslogtreecommitdiff
path: root/src/r6xx_accel.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/r6xx_accel.c')
-rw-r--r--src/r6xx_accel.c1110
1 files changed, 1110 insertions, 0 deletions
diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c
new file mode 100644
index 00000000..659d13da
--- /dev/null
+++ b/src/r6xx_accel.c
@@ -0,0 +1,1110 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Alex Deucher <alexander.deucher@amd.com>
+ * Matthias Hopf <mhopf@suse.de>
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include <errno.h>
+
+#include "radeon.h"
+#include "r600_shader.h"
+#include "radeon_reg.h"
+#include "r600_reg.h"
+#include "r600_state.h"
+
+#include "xf86drm.h"
+#include "radeon_drm.h"
+
+/* Emit uint32_t */
+inline void e32(drmBufPtr ib, uint32_t dword)
+{
+ uint32_t *ib_head = (pointer)(char*)ib->address;
+
+ ib_head[ib->used>>2] = dword;
+ ib->used += 4;
+}
+
+inline void efloat(drmBufPtr ib, float f)
+{
+ union {
+ float f;
+ uint32_t d;
+ } a;
+ a.f=f;
+ e32(ib, a.d);
+}
+
+inline void pack3(drmBufPtr ib, int cmd, unsigned num)
+{
+ e32 (ib, RADEON_CP_PACKET3 | (cmd << 8) | (((num-1) & 0x3fff) << 16));
+}
+
+/* write num registers, start at reg */
+/* If register falls in a special area, special commands are issued */
+inline void pack0 (drmBufPtr ib, uint32_t reg, int num)
+{
+ if (reg >= SET_CONFIG_REG_offset && reg < SET_CONFIG_REG_end) {
+ pack3 (ib, IT_SET_CONFIG_REG, num+1);
+ e32 (ib, (reg-SET_CONFIG_REG_offset) >> 2);
+ } else if (reg >= SET_CONTEXT_REG_offset && reg < SET_CONTEXT_REG_end) {
+ pack3 (ib, IT_SET_CONTEXT_REG, num+1);
+ e32 (ib, (reg-0x28000) >> 2);
+ } else if (reg >= SET_ALU_CONST_offset && reg < SET_ALU_CONST_end) {
+ pack3 (ib, IT_SET_ALU_CONST, num+1);
+ e32 (ib, (reg-SET_ALU_CONST_offset) >> 2);
+ } else if (reg >= SET_RESOURCE_offset && reg < SET_RESOURCE_end) {
+ pack3 (ib, IT_SET_RESOURCE, num+1);
+ e32 (ib, (reg-SET_RESOURCE_offset) >> 2);
+ } else if (reg >= SET_SAMPLER_offset && reg < SET_SAMPLER_end) {
+ pack3 (ib, IT_SET_SAMPLER, num+1);
+ e32 (ib, (reg-SET_SAMPLER_offset) >> 2);
+ } else if (reg >= SET_CTL_CONST_offset && reg < SET_CTL_CONST_end) {
+ pack3 (ib, IT_SET_CTL_CONST, num+1);
+ e32 (ib, (reg-SET_CTL_CONST_offset) >> 2);
+ } else if (reg >= SET_LOOP_CONST_offset && reg < SET_LOOP_CONST_end) {
+ pack3 (ib, IT_SET_LOOP_CONST, num+1);
+ e32 (ib, (reg-SET_LOOP_CONST_offset) >> 2);
+ } else if (reg >= SET_BOOL_CONST_offset && reg < SET_BOOL_CONST_end) {
+ pack3 (ib, IT_SET_BOOL_CONST, num+1);
+ e32 (ib, (reg-SET_BOOL_CONST_offset) >> 2);
+ } else {
+ e32 (ib, CP_PACKET0 (reg, num-1));
+ }
+}
+
+/* write a single register */
+inline void ereg (drmBufPtr ib, uint32_t reg, uint32_t val)
+{
+ pack0 (ib, reg, 1);
+ e32 (ib, val);
+}
+
+/* Flush the indirect buffer to the kernel for submission to the card */
+void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ drmBufPtr buffer = ib;
+ int start = 0;
+ drm_radeon_indirect_t indirect;
+
+ if (!buffer) return;
+
+ //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
+ // buffer->idx);
+
+ while (buffer->used & 0x3c){
+ e32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */
+ }
+
+ //ErrorF("buffer bytes: %d\n", buffer->used);
+
+ indirect.idx = buffer->idx;
+ indirect.start = start;
+ indirect.end = buffer->used;
+ indirect.discard = 1;
+
+ drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
+ &indirect, sizeof(drm_radeon_indirect_t));
+
+}
+
+void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ if (!ib) return;
+
+ ib->used = 0;
+ R600CPFlushIndirect(pScrn, ib);
+}
+
+void
+wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+
+ //flush caches, don't generate timestamp
+ pack3 (ib, IT_EVENT_WRITE, 1);
+ e32 (ib, CACHE_FLUSH_AND_INV_EVENT);
+ // wait for 3D idle clean
+ ereg (ib, WAIT_UNTIL, (WAIT_3D_IDLE_bit |
+ WAIT_3D_IDLECLEAN_bit));
+}
+
+void
+wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+
+ ereg (ib, WAIT_UNTIL, WAIT_3D_IDLE_bit);
+
+}
+
+static void
+reset_cb(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ int i;
+
+ pack0 (ib, CB_COLOR0_INFO, 8);
+ for (i = 0; i < 8; i++)
+ e32 (ib, 0);
+}
+
+static void
+reset_td_samplers(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ int i;
+
+ wait_3d_idle(pScrn, ib);
+
+ pack0 (ib, TD_PS_SAMPLER0_BORDER_RED, 4*TD_PS_SAMPLER0_BORDER_RED_num);
+ for (i = 0; i < 4*TD_PS_SAMPLER0_BORDER_RED_num; i++)
+ e32 (ib, 0);
+ pack0 (ib, TD_VS_SAMPLER0_BORDER_RED, 4*TD_VS_SAMPLER0_BORDER_RED_num);
+ for (i = 0; i < 4*TD_VS_SAMPLER0_BORDER_RED_num; i++)
+ e32 (ib, 0);
+
+ wait_3d_idle(pScrn, ib);
+}
+
+static void
+reset_sampler_const (ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ int i;
+
+ for (i = 0; i < SQ_TEX_SAMPLER_WORD_all_num; i++) {
+ pack0 (ib, SQ_TEX_SAMPLER_WORD + i * SQ_TEX_SAMPLER_WORD_offset, 3);
+ e32 (ib, SQ_TEX_DEPTH_COMPARE_LESSEQUAL << DEPTH_COMPARE_FUNCTION_shift);
+ e32 (ib, MAX_LOD_mask);
+ e32 (ib, 0);
+ }
+}
+
+static void
+reset_dx9_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ int i;
+
+ const int count = SQ_ALU_CONSTANT_all_num * (SQ_ALU_CONSTANT_offset >> 2);
+
+ pack0 (ib, SQ_ALU_CONSTANT, count);
+ for (i = 0; i < count; i++)
+ efloat (ib, 0.0);
+}
+
+static void
+reset_bool_loop_const(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ int i;
+
+ for (i = 0; i < SQ_BOOL_CONST_0_num; i++)
+ ereg (ib, SQ_BOOL_CONST_0 + (i << 2), 0);
+
+ pack0 (ib, SQ_LOOP_CONST, SQ_LOOP_CONST_all_num);
+
+ for (i = 0; i < SQ_LOOP_CONST_all_num; i++)
+ e32 (ib, 0);
+
+}
+
+void
+start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ if (info->ChipFamily < CHIP_FAMILY_RV770) {
+ pack3 (ib, IT_START_3D_CMDBUF, 1);
+ e32 (ib, 0);
+ }
+
+ pack3 (ib, IT_CONTEXT_CONTROL, 2);
+ e32 (ib, 0x80000000);
+ e32 (ib, 0x80000000);
+
+ wait_3d_idle_clean (pScrn, ib);
+}
+
+/*
+ * Setup of functional groups
+ */
+
+// asic stack/thread/gpr limits - need to query the drm
+static void
+sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
+{
+ uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+ uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
+ (info->ChipFamily == CHIP_FAMILY_RV620) ||
+ (info->ChipFamily == CHIP_FAMILY_RS780) ||
+ (info->ChipFamily == CHIP_FAMILY_RV710))
+ sq_config = 0; // no VC
+ else
+ sq_config = VC_ENABLE_bit;
+
+ sq_config |= (DX9_CONSTS_bit |
+ ALU_INST_PREFER_VECTOR_bit |
+ (sq_conf->ps_prio << PS_PRIO_shift) |
+ (sq_conf->vs_prio << VS_PRIO_shift) |
+ (sq_conf->gs_prio << GS_PRIO_shift) |
+ (sq_conf->es_prio << ES_PRIO_shift));
+
+ sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
+ (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
+ (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
+ sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
+ (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
+
+ sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
+ (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
+ (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
+ (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
+
+ sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
+ (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
+
+ sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
+ (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
+
+ pack0 (ib, SQ_CONFIG, 6);
+ e32 (ib, sq_config);
+ e32 (ib, sq_gpr_resource_mgmt_1);
+ e32 (ib, sq_gpr_resource_mgmt_2);
+ e32 (ib, sq_thread_resource_mgmt);
+ e32 (ib, sq_stack_resource_mgmt_1);
+ e32 (ib, sq_stack_resource_mgmt_2);
+
+}
+
+void
+set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf)
+{
+ uint32_t cb_color_info;
+ int pitch, slice, h;
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ cb_color_info = ((cb_conf->endian << ENDIAN_shift) |
+ (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) |
+ (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) |
+ (cb_conf->number_type << NUMBER_TYPE_shift) |
+ (cb_conf->comp_swap << COMP_SWAP_shift) |
+ (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift));
+ if (cb_conf->read_size)
+ cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit;
+ if (cb_conf->blend_clamp)
+ cb_color_info |= BLEND_CLAMP_bit;
+ if (cb_conf->clear_color)
+ cb_color_info |= CLEAR_COLOR_bit;
+ if (cb_conf->blend_bypass)
+ cb_color_info |= BLEND_BYPASS_bit;
+ if (cb_conf->blend_float32)
+ cb_color_info |= BLEND_FLOAT32_bit;
+ if (cb_conf->simple_float)
+ cb_color_info |= SIMPLE_FLOAT_bit;
+ if (cb_conf->round_mode)
+ cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
+ if (cb_conf->tile_compact)
+ cb_color_info |= TILE_COMPACT_bit;
+ if (cb_conf->source_format)
+ cb_color_info |= SOURCE_FORMAT_bit;
+
+ pitch = (cb_conf->w / 8) - 1;
+ h = (cb_conf->h + 7) & ~7;
+ slice = ((cb_conf->w * h) / 64) - 1;
+
+ ereg (ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8));
+
+ // rv6xx workaround
+ if ((info->ChipFamily > CHIP_FAMILY_R600) &&
+ (info->ChipFamily < CHIP_FAMILY_RV770)) {
+ pack3 (ib, IT_SURFACE_BASE_UPDATE, 1);
+ e32 (ib, (2 << cb_conf->id));
+ }
+
+ // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib
+ ereg (ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) |
+ (slice << SLICE_TILE_MAX_shift)));
+ ereg (ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) |
+ (0 << SLICE_MAX_shift)));
+ ereg (ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info);
+ ereg (ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256
+ ereg (ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256
+ ereg (ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) |
+ (0 << FMASK_TILE_MAX_shift)));
+}
+
+void
+cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr)
+{
+ uint32_t cp_coher_size;
+ if (size == 0xffffffff)
+ cp_coher_size = 0xffffffff;
+ else
+ cp_coher_size = ((size + 255) >> 8);
+
+ ereg (ib, CP_COHER_CNTL, sync_type);
+ ereg (ib, CP_COHER_SIZE, cp_coher_size);
+ ereg (ib, CP_COHER_BASE, (mc_addr >> 8));
+ pack3 (ib, IT_WAIT_REG_MEM, 6);
+ e32 (ib, 0x00000003); // ME, Register, EqualTo
+ e32 (ib, CP_COHER_STATUS >> 2);
+ e32 (ib, 0);
+ e32 (ib, 0); // Ref value
+ e32 (ib, STATUS_bit); // Ref mask
+ e32 (ib, 10); // Wait interval
+}
+
+void
+fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf)
+{
+ uint32_t sq_pgm_resources;
+
+ sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
+ (fs_conf->stack_size << STACK_SIZE_shift));
+
+ if (fs_conf->dx10_clamp)
+ sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit;
+
+ ereg (ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
+ ereg (ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources);
+ ereg (ib, SQ_PGM_CF_OFFSET_FS, 0);
+}
+
+void
+vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf)
+{
+ uint32_t sq_pgm_resources;
+
+ sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
+ (vs_conf->stack_size << STACK_SIZE_shift));
+
+ if (vs_conf->dx10_clamp)
+ sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit;
+ if (vs_conf->fetch_cache_lines)
+ sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
+ if (vs_conf->uncached_first_inst)
+ sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
+
+ ereg (ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
+ ereg (ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources);
+ ereg (ib, SQ_PGM_CF_OFFSET_VS, 0);
+}
+
+void
+ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf)
+{
+ uint32_t sq_pgm_resources;
+
+ sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
+ (ps_conf->stack_size << STACK_SIZE_shift));
+
+ if (ps_conf->dx10_clamp)
+ sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit;
+ if (ps_conf->fetch_cache_lines)
+ sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
+ if (ps_conf->uncached_first_inst)
+ sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
+ if (ps_conf->clamp_consts)
+ sq_pgm_resources |= CLAMP_CONSTS_bit;
+
+ ereg (ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
+ ereg (ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources);
+ ereg (ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode);
+ ereg (ib, SQ_PGM_CF_OFFSET_PS, 0);
+}
+
+void
+set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf)
+{
+ int i;
+ const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2);
+
+ pack0 (ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg);
+ for (i = 0; i < countreg; i++)
+ efloat (ib, const_buf[i]);
+}
+
+void
+set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res)
+{
+ uint32_t sq_vtx_constant_word2;
+
+ sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
+ ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
+ (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
+ (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
+ (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
+ if (res->clamp_x)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
+
+ if (res->format_comp_all)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
+
+ if (res->srf_mode_all)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
+
+ pack0 (ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
+ e32 (ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS
+ e32 (ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE
+ e32 (ib, sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
+ e32 (ib, res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!?
+ e32 (ib, 0); // 4: n/a
+ e32 (ib, 0); // 5: n/a
+ e32 (ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE
+}
+
+void
+set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res)
+{
+ uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+ uint32_t sq_tex_resource_word5, sq_tex_resource_word6;
+
+ sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) |
+ (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift));
+
+ if (tex_res->w)
+ sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
+ ((tex_res->w - 1) << TEX_WIDTH_shift));
+
+ if (tex_res->tile_type)
+ sq_tex_resource_word0 |= TILE_TYPE_bit;
+
+ sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift);
+
+ if (tex_res->h)
+ sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
+ if (tex_res->depth)
+ sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
+
+ sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
+ (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
+ (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
+ (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
+ (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
+ (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
+ (tex_res->request_size << REQUEST_SIZE_shift) |
+ (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
+ (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
+ (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
+ (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
+ (tex_res->base_level << BASE_LEVEL_shift));
+
+ if (tex_res->srf_mode_all)
+ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
+ if (tex_res->force_degamma)
+ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
+
+ sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
+ (tex_res->base_array << BASE_ARRAY_shift) |
+ (tex_res->last_array << LAST_ARRAY_shift));
+
+ sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) |
+ (tex_res->perf_modulation << PERF_MODULATION_shift) |
+ (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift));
+
+ if (tex_res->interlaced)
+ sq_tex_resource_word6 |= INTERLACED_bit;
+
+ pack0 (ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
+ e32 (ib, sq_tex_resource_word0);
+ e32 (ib, sq_tex_resource_word1);
+ e32 (ib, ((tex_res->base) >> 8));
+ e32 (ib, ((tex_res->mip_base) >> 8));
+ e32 (ib, sq_tex_resource_word4);
+ e32 (ib, sq_tex_resource_word5);
+ e32 (ib, sq_tex_resource_word6);
+}
+
+void
+set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
+{
+ uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
+
+ sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) |
+ (s->clamp_y << CLAMP_Y_shift) |
+ (s->clamp_z << CLAMP_Z_shift) |
+ (s->xy_mag_filter << XY_MAG_FILTER_shift) |
+ (s->xy_min_filter << XY_MIN_FILTER_shift) |
+ (s->z_filter << Z_FILTER_shift) |
+ (s->mip_filter << MIP_FILTER_shift) |
+ (s->border_color << BORDER_COLOR_TYPE_shift) |
+ (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) |
+ (s->chroma_key << CHROMA_KEY_shift));
+ if (s->point_sampling_clamp)
+ sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit;
+ if (s->tex_array_override)
+ sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit;
+ if (s->lod_uses_minor_axis)
+ sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit;
+
+ sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) |
+ (s->max_lod << MAX_LOD_shift) |
+ (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift));
+
+ sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) |
+ (s->perf_mip << PERF_MIP_shift) |
+ (s->perf_z << PERF_Z_shift));
+ if (s->mc_coord_truncate)
+ sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
+ if (s->force_degamma)
+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
+ if (s->high_precision_filter)
+ sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit;
+ if (s->fetch_4)
+ sq_tex_sampler_word2 |= FETCH_4_bit;
+ if (s->sample_is_pcf)
+ sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit;
+ if (s->type)
+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
+
+ pack0 (ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
+ e32 (ib, sq_tex_sampler_word0);
+ e32 (ib, sq_tex_sampler_word1);
+ e32 (ib, sq_tex_sampler_word2);
+}
+
+//XXX deal with clip offsets in clip setup
+void
+set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
+{
+
+ ereg (ib, PA_SC_SCREEN_SCISSOR_TL, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
+ ereg (ib, PA_SC_SCREEN_SCISSOR_BR, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
+}
+
+void
+set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
+{
+
+ ereg (ib, PA_SC_VPORT_SCISSOR_0_TL +
+ id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
+ (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ ereg (ib, PA_SC_VPORT_SCISSOR_0_BR +
+ id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
+ (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
+}
+
+void
+set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
+{
+
+ ereg (ib, PA_SC_GENERIC_SCISSOR_TL, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ ereg (ib, PA_SC_GENERIC_SCISSOR_BR, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
+}
+
+void
+set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
+{
+
+ ereg (ib, PA_SC_WINDOW_SCISSOR_TL, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ ereg (ib, PA_SC_WINDOW_SCISSOR_BR, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
+}
+
+void
+set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
+{
+
+ ereg (ib, PA_SC_CLIPRECT_0_TL +
+ id * PA_SC_CLIPRECT_0_TL_offset, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
+ (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
+ ereg (ib, PA_SC_CLIPRECT_0_BR +
+ id * PA_SC_CLIPRECT_0_BR_offset, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
+ (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
+}
+
+/*
+ * Setup of default state
+ */
+
+void
+set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ tex_resource_t tex_res;
+ shader_config_t fs_conf;
+ sq_config_t sq_conf;
+ int i;
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ memset(&tex_res, 0, sizeof(tex_resource_t));
+ memset(&fs_conf, 0, sizeof(shader_config_t));
+
+#if 1
+ if (accel_state->XInited3D)
+ return;
+#endif
+
+ accel_state->XInited3D = TRUE;
+
+ wait_3d_idle(pScrn, ib);
+
+ // ASIC specific setup, see drm
+ if (info->ChipFamily < CHIP_FAMILY_RV770) {
+ ereg (ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) |
+ (28 << TD_FIFO_CREDIT_shift)));
+ ereg (ib, VC_ENHANCE, 0);
+ ereg (ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
+ ereg (ib, DB_DEBUG, 0x82000000); /* ? */
+ ereg (ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
+ (16 << DEPTH_FLUSH_shift) |
+ (0 << FORCE_SUMMARIZE_shift) |
+ (4 << DEPTH_PENDING_FREE_shift) |
+ (16 << DEPTH_CACHELINE_FREE_shift) |
+ 0));
+ } else {
+ ereg (ib, TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) |
+ (28 << TD_FIFO_CREDIT_shift)));
+ ereg (ib, VC_ENHANCE, 0);
+ ereg (ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
+ ereg (ib, DB_DEBUG, 0);
+ ereg (ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
+ (16 << DEPTH_FLUSH_shift) |
+ (0 << FORCE_SUMMARIZE_shift) |
+ (4 << DEPTH_PENDING_FREE_shift) |
+ (4 << DEPTH_CACHELINE_FREE_shift) |
+ 0));
+ }
+
+ reset_td_samplers(pScrn, ib);
+ reset_dx9_alu_consts(pScrn, ib);
+ reset_bool_loop_const (pScrn, ib);
+ reset_sampler_const (pScrn, ib);
+
+ // SQ
+ sq_conf.ps_prio = 0;
+ sq_conf.vs_prio = 1;
+ sq_conf.gs_prio = 2;
+ sq_conf.es_prio = 3;
+ // need to set stack/thread/gpr limits based on the asic
+ // for now just set them low enough so any card will work
+ // see r600_cp.c in the drm
+ switch (info->ChipFamily) {
+ case CHIP_FAMILY_R600:
+ sq_conf.num_ps_gprs = 192;
+ sq_conf.num_vs_gprs = 56;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 136;
+ sq_conf.num_vs_threads = 48;
+ sq_conf.num_gs_threads = 4;
+ sq_conf.num_es_threads = 4;
+ sq_conf.num_ps_stack_entries = 128;
+ sq_conf.num_vs_stack_entries = 128;
+ sq_conf.num_gs_stack_entries = 0;
+ sq_conf.num_es_stack_entries = 0;
+ break;
+ case CHIP_FAMILY_RV630:
+ case CHIP_FAMILY_RV635:
+ sq_conf.num_ps_gprs = 84;
+ sq_conf.num_vs_gprs = 36;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 144;
+ sq_conf.num_vs_threads = 40;
+ sq_conf.num_gs_threads = 4;
+ sq_conf.num_es_threads = 4;
+ sq_conf.num_ps_stack_entries = 40;
+ sq_conf.num_vs_stack_entries = 40;
+ sq_conf.num_gs_stack_entries = 32;
+ sq_conf.num_es_stack_entries = 16;
+ break;
+ case CHIP_FAMILY_RV610:
+ case CHIP_FAMILY_RV620:
+ case CHIP_FAMILY_RS780:
+ default:
+ sq_conf.num_ps_gprs = 84;
+ sq_conf.num_vs_gprs = 36;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 136;
+ sq_conf.num_vs_threads = 48;
+ sq_conf.num_gs_threads = 4;
+ sq_conf.num_es_threads = 4;
+ sq_conf.num_ps_stack_entries = 40;
+ sq_conf.num_vs_stack_entries = 40;
+ sq_conf.num_gs_stack_entries = 32;
+ sq_conf.num_es_stack_entries = 16;
+ break;
+ case CHIP_FAMILY_RV670:
+ sq_conf.num_ps_gprs = 144;
+ sq_conf.num_vs_gprs = 40;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 136;
+ sq_conf.num_vs_threads = 48;
+ sq_conf.num_gs_threads = 4;
+ sq_conf.num_es_threads = 4;
+ sq_conf.num_ps_stack_entries = 40;
+ sq_conf.num_vs_stack_entries = 40;
+ sq_conf.num_gs_stack_entries = 32;
+ sq_conf.num_es_stack_entries = 16;
+ break;
+ case CHIP_FAMILY_RV770:
+ sq_conf.num_ps_gprs = 192;
+ sq_conf.num_vs_gprs = 56;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 188;
+ sq_conf.num_vs_threads = 60;
+ sq_conf.num_gs_threads = 0;
+ sq_conf.num_es_threads = 0;
+ sq_conf.num_ps_stack_entries = 256;
+ sq_conf.num_vs_stack_entries = 256;
+ sq_conf.num_gs_stack_entries = 0;
+ sq_conf.num_es_stack_entries = 0;
+ break;
+ case CHIP_FAMILY_RV730:
+ sq_conf.num_ps_gprs = 84;
+ sq_conf.num_vs_gprs = 36;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 188;
+ sq_conf.num_vs_threads = 60;
+ sq_conf.num_gs_threads = 0;
+ sq_conf.num_es_threads = 0;
+ sq_conf.num_ps_stack_entries = 128;
+ sq_conf.num_vs_stack_entries = 128;
+ sq_conf.num_gs_stack_entries = 0;
+ sq_conf.num_es_stack_entries = 0;
+ break;
+ case CHIP_FAMILY_RV710:
+ sq_conf.num_ps_gprs = 192;
+ sq_conf.num_vs_gprs = 56;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 144;
+ sq_conf.num_vs_threads = 48;
+ sq_conf.num_gs_threads = 0;
+ sq_conf.num_es_threads = 0;
+ sq_conf.num_ps_stack_entries = 128;
+ sq_conf.num_vs_stack_entries = 128;
+ sq_conf.num_gs_stack_entries = 0;
+ sq_conf.num_es_stack_entries = 0;
+ break;
+ }
+
+ sq_setup(pScrn, ib, &sq_conf);
+
+ ereg (ib, SQ_VTX_BASE_VTX_LOC, 0);
+ ereg (ib, SQ_VTX_START_INST_LOC, 0);
+
+ pack0 (ib, SQ_ESGS_RING_ITEMSIZE, 9);
+ e32 (ib, 0); // SQ_ESGS_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_GSVS_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_ESTMP_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_GSTMP_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_VSTMP_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_PSTMP_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_FBUF_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_REDUC_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_GS_VERT_ITEMSIZE
+
+ // DB
+ ereg (ib, DB_DEPTH_INFO, 0);
+ ereg (ib, DB_STENCIL_CLEAR, 0);
+ ereg (ib, DB_DEPTH_CLEAR, 0);
+ ereg (ib, DB_STENCILREFMASK, 0);
+ ereg (ib, DB_STENCILREFMASK_BF, 0);
+ ereg (ib, DB_DEPTH_CONTROL, 0);
+ ereg (ib, DB_RENDER_CONTROL, STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit);
+ if (info->ChipFamily < CHIP_FAMILY_RV770)
+ ereg (ib, DB_RENDER_OVERRIDE, FORCE_SHADER_Z_ORDER_bit);
+ else
+ ereg (ib, DB_RENDER_OVERRIDE, 0);
+ ereg (ib, DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET1_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET2_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET3_shift)));
+
+ // SX
+ ereg (ib, SX_ALPHA_TEST_CONTROL, 0);
+ ereg (ib, SX_ALPHA_REF, 0);
+
+ // CB
+ reset_cb(pScrn, ib);
+
+ pack0 (ib, CB_BLEND_RED, 4);
+ e32 (ib, 0x00000000);
+ e32 (ib, 0x00000000);
+ e32 (ib, 0x00000000);
+ e32 (ib, 0x00000000);
+
+ /* CB_COLOR_CONTROL.PER_MRT_BLEND is off */
+ // RV6xx+ have per-MRT blend
+ if (info->ChipFamily > CHIP_FAMILY_R600) {
+ pack0 (ib, CB_BLEND0_CONTROL, CB_BLEND0_CONTROL_num);
+ for (i = 0; i < CB_BLEND0_CONTROL_num; i++)
+ e32 (ib, 0);
+ }
+
+ ereg (ib, CB_BLEND_CONTROL, 0);
+
+ if (info->ChipFamily < CHIP_FAMILY_RV770) {
+ pack0 (ib, CB_FOG_RED, 3);
+ e32 (ib, 0x00000000);
+ e32 (ib, 0x00000000);
+ e32 (ib, 0x00000000);
+ }
+
+ ereg (ib, CB_COLOR_CONTROL, 0);
+ pack0 (ib, CB_CLRCMP_CONTROL, 4);
+ e32 (ib, 1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC
+ e32 (ib, 0); // CB_CLRCMP_SRC
+ e32 (ib, 0); // CB_CLRCMP_DST
+ e32 (ib, 0); // CB_CLRCMP_MSK
+
+
+ if (info->ChipFamily < CHIP_FAMILY_RV770) {
+ pack0 (ib, CB_CLEAR_RED, 4);
+ efloat(ib, 1.0); /* WTF? */
+ efloat(ib, 0.0);
+ efloat(ib, 1.0);
+ efloat(ib, 1.0);
+ }
+ ereg (ib, CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift));
+
+ // SC
+ set_generic_scissor(pScrn, ib, 0, 0, 8192, 8192);
+ set_screen_scissor (pScrn, ib, 0, 0, 8192, 8192);
+ ereg (ib, PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) |
+ (0 << WINDOW_Y_OFFSET_shift)));
+ set_window_scissor (pScrn, ib, 0, 0, 8192, 8192);
+
+ ereg (ib, PA_SC_CLIPRECT_RULE, CLIP_RULE_mask);
+
+ /* clip boolean is set to always visible -> doesn't matter */
+ for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
+ set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192);
+
+ if (info->ChipFamily < CHIP_FAMILY_RV770)
+ ereg (ib, R7xx_PA_SC_EDGERULE, 0x00000000);
+ else
+ ereg (ib, R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); /* ? */
+
+ for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) {
+ set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192);
+ pack0 (ib, PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2);
+ efloat(ib, 0.0);
+ efloat(ib, 1.0);
+ }
+
+ if (info->ChipFamily < CHIP_FAMILY_RV770)
+ ereg (ib, PA_SC_MODE_CNTL, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit));
+ else
+ ereg (ib, PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit |
+ 0x00500000)); /* ? */
+
+ ereg (ib, PA_SC_LINE_CNTL, 0);
+ ereg (ib, PA_SC_AA_CONFIG, 0);
+ ereg (ib, PA_SC_AA_MASK, 0xFFFFFFFF);
+
+ //XXX: double check this
+ if (info->ChipFamily > CHIP_FAMILY_R600) {
+ ereg (ib, PA_SC_AA_SAMPLE_LOCS_MCTX, 0);
+ ereg (ib, PA_SC_AA_SAMPLE_LOCS_8S_WD1_M, 0);
+ }
+
+ ereg (ib, PA_SC_LINE_STIPPLE, 0);
+ ereg (ib, PA_SC_MPASS_PS_CNTL, 0);
+
+ // CL
+ pack0 (ib, PA_CL_VPORT_XSCALE_0, 6);
+ efloat (ib, 0.0f); // PA_CL_VPORT_XSCALE
+ efloat (ib, 0.0f); // PA_CL_VPORT_XOFFSET
+ efloat (ib, 0.0f); // PA_CL_VPORT_YSCALE
+ efloat (ib, 0.0f); // PA_CL_VPORT_YOFFSET
+ efloat (ib, 0.0f); // PA_CL_VPORT_ZSCALE
+ efloat (ib, 0.0f); // PA_CL_VPORT_ZOFFSET
+ ereg (ib, PA_CL_CLIP_CNTL, (CLIP_DISABLE_bit | DX_CLIP_SPACE_DEF_bit));
+ ereg (ib, PA_CL_VTE_CNTL, 0);
+ ereg (ib, PA_CL_VS_OUT_CNTL, 0);
+ ereg (ib, PA_CL_NANINF_CNTL, 0);
+ pack0 (ib, PA_CL_GB_VERT_CLIP_ADJ, 4);
+ efloat (ib, 1.0); // PA_CL_GB_VERT_CLIP_ADJ
+ efloat (ib, 1.0); // PA_CL_GB_VERT_DISC_ADJ
+ efloat (ib, 1.0); // PA_CL_GB_HORZ_CLIP_ADJ
+ efloat (ib, 1.0); // PA_CL_GB_HORZ_DISC_ADJ
+
+ /* user clipping planes are disabled by default */
+ pack0 (ib, PA_CL_UCP_0_X, 24);
+ for (i = 0; i < 24; i++)
+ efloat (ib, 0.0);
+
+ // SU
+ ereg (ib, PA_SU_SC_MODE_CNTL, FACE_bit);
+ ereg (ib, PA_SU_POINT_SIZE, 0);
+ ereg (ib, PA_SU_POINT_MINMAX, 0);
+ ereg (ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0);
+ ereg (ib, PA_SU_POLY_OFFSET_BACK_SCALE, 0);
+ ereg (ib, PA_SU_POLY_OFFSET_FRONT_SCALE, 0);
+ ereg (ib, PA_SU_POLY_OFFSET_BACK_OFFSET, 0);
+ ereg (ib, PA_SU_POLY_OFFSET_FRONT_OFFSET, 0);
+
+ ereg (ib, PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */
+ ereg (ib, PA_SU_VTX_CNTL, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
+ (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */
+ ereg (ib, PA_SU_POLY_OFFSET_CLAMP, 0);
+
+ // SPI
+ if (info->ChipFamily < CHIP_FAMILY_RV770)
+ ereg (ib, R7xx_SPI_THREAD_GROUPING, 0);
+ else
+ ereg (ib, R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift));
+
+ ereg (ib, SPI_INTERP_CONTROL_0, ((2 << PNT_SPRITE_OVRD_X_shift) |
+ (3 << PNT_SPRITE_OVRD_Y_shift) |
+ (0 << PNT_SPRITE_OVRD_Z_shift) |
+ (1 << PNT_SPRITE_OVRD_W_shift))); /* s,t,0,1 */
+ ereg (ib, SPI_INPUT_Z, 0);
+ ereg (ib, SPI_FOG_CNTL, 0);
+ ereg (ib, SPI_FOG_FUNC_SCALE, 0);
+ ereg (ib, SPI_FOG_FUNC_BIAS, 0);
+
+ pack0 (ib, SPI_VS_OUT_ID_0, SPI_VS_OUT_ID_0_num);
+ for (i = 0; i < SPI_VS_OUT_ID_0_num; i++) /* identity mapping */
+ e32 (ib, 0x03020100 + i*0x04040404);
+ ereg (ib, SPI_VS_OUT_CONFIG, 0);
+
+ // clear FS
+ fs_setup(pScrn, ib, &fs_conf);
+
+ // VGT
+ ereg (ib, VGT_MAX_VTX_INDX, 0);
+ ereg (ib, VGT_MIN_VTX_INDX, 0);
+ ereg (ib, VGT_INDX_OFFSET, 0);
+ ereg (ib, VGT_INSTANCE_STEP_RATE_0, 0);
+ ereg (ib, VGT_INSTANCE_STEP_RATE_1, 0);
+
+ ereg (ib, VGT_MULTI_PRIM_IB_RESET_INDX, 0);
+ ereg (ib, VGT_OUTPUT_PATH_CNTL, 0);
+ ereg (ib, VGT_GS_MODE, 0);
+ ereg (ib, VGT_HOS_CNTL, 0);
+ ereg (ib, VGT_HOS_MAX_TESS_LEVEL, 0);
+ ereg (ib, VGT_HOS_MIN_TESS_LEVEL, 0);
+ ereg (ib, VGT_HOS_REUSE_DEPTH, 0);
+ ereg (ib, VGT_GROUP_PRIM_TYPE, 0);
+ ereg (ib, VGT_GROUP_FIRST_DECR, 0);
+ ereg (ib, VGT_GROUP_DECR, 0);
+ ereg (ib, VGT_GROUP_VECT_0_CNTL, 0);
+ ereg (ib, VGT_GROUP_VECT_1_CNTL, 0);
+ ereg (ib, VGT_GROUP_VECT_0_FMT_CNTL, 0);
+ ereg (ib, VGT_GROUP_VECT_1_FMT_CNTL, 0);
+ ereg (ib, VGT_PRIMITIVEID_EN, 0);
+ ereg (ib, VGT_MULTI_PRIM_IB_RESET_EN, 0);
+ ereg (ib, VGT_STRMOUT_EN, 0);
+ ereg (ib, VGT_REUSE_OFF, 0);
+ ereg (ib, VGT_VTX_CNT_EN, 0);
+ ereg (ib, VGT_STRMOUT_BUFFER_EN, 0);
+
+ // clear tex resources - PS
+ for (i = 0; i < 16; i++) {
+ tex_res.id = i;
+ set_tex_resource(pScrn, ib, &tex_res);
+ }
+
+ // clear tex resources - VS
+ for (i = 160; i < 164; i++) {
+ tex_res.id = i;
+ set_tex_resource(pScrn, ib, &tex_res);
+ }
+
+ // clear tex resources - FS
+ for (i = 320; i < 335; i++) {
+ tex_res.id = i;
+ set_tex_resource(pScrn, ib, &tex_res);
+ }
+
+}
+
+
+/*
+ * Commands
+ */
+
+void
+draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices)
+{
+ uint32_t i, count;
+
+ ereg (ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
+ pack3 (ib, IT_INDEX_TYPE, 1);
+ e32 (ib, draw_conf->index_type);
+ pack3 (ib, IT_NUM_INSTANCES, 1);
+ e32 (ib, draw_conf->num_instances);
+
+ // calculate num of packets
+ count = 2;
+ if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT)
+ count += (draw_conf->num_indices + 1) / 2;
+ else
+ count += draw_conf->num_indices;
+
+ pack3 (ib, IT_DRAW_INDEX_IMMD, count);
+ e32 (ib, draw_conf->num_indices);
+ e32 (ib, draw_conf->vgt_draw_initiator);
+
+ if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) {
+ for (i = 0; i < draw_conf->num_indices; i += 2) {
+ if ((i + 1) == draw_conf->num_indices)
+ e32 (ib, indices[i]);
+ else
+ e32 (ib, (indices[i] | (indices[i + 1] << 16)));
+ }
+ } else {
+ for (i = 0; i < draw_conf->num_indices; i++)
+ e32 (ib, indices[i]);
+ }
+}
+
+void
+draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf)
+{
+
+ ereg (ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
+ pack3 (ib, IT_INDEX_TYPE, 1);
+ e32 (ib, draw_conf->index_type);
+ pack3 (ib, IT_NUM_INSTANCES, 1);
+ e32 (ib, draw_conf->num_instances);
+ pack3 (ib, IT_DRAW_INDEX_AUTO, 2);
+ e32 (ib, draw_conf->num_indices);
+ e32 (ib, draw_conf->vgt_draw_initiator);
+}