summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Deucher <alexdeucher@gmail.com>2009-02-26 11:44:13 -0500
committerAlex Deucher <alexdeucher@gmail.com>2009-02-26 11:44:13 -0500
commit000756e052a291230e5c95e48b69a5aa9c4fab0e (patch)
tree4a9a22ac48ae3081bfa70107fb175b081f1f380c
parent22d7746bfc1bcbb3269afac88971254088ea9ece (diff)
parent8373f4399b03961f2c928a9275d47e9f41bd92bb (diff)
Merge branch 'r6xx-r7xx-support' of git+ssh://agd5f@git.freedesktop.org/git/xorg/driver/xf86-video-ati
-rw-r--r--src/Makefile.am8
-rw-r--r--src/r600_exa.c4441
-rw-r--r--src/r600_reg.h132
-rw-r--r--src/r600_reg_auto_r6xx.h3087
-rw-r--r--src/r600_reg_r6xx.h494
-rw-r--r--src/r600_reg_r7xx.h149
-rw-r--r--src/r600_shader.h346
-rw-r--r--src/r600_state.h229
-rw-r--r--src/r600_textured_videofuncs.c521
-rw-r--r--src/r6xx_accel.c1160
-rw-r--r--src/radeon.h166
-rw-r--r--src/radeon_accel.c119
-rw-r--r--src/radeon_commonfuncs.c84
-rw-r--r--src/radeon_crtc.c3
-rw-r--r--src/radeon_dri.c201
-rw-r--r--src/radeon_driver.c49
-rw-r--r--src/radeon_exa.c1
-rw-r--r--src/radeon_exa_render.c2
-rw-r--r--src/radeon_modes.c22
-rw-r--r--src/radeon_reg.h33
-rw-r--r--src/radeon_textured_video.c164
21 files changed, 11177 insertions, 234 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index c15cc301..7ff7d31a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -66,7 +66,7 @@ XMODE_SRCS=\
modes/xf86DiDGA.c
if USE_EXA
-RADEON_EXA_SOURCES = radeon_exa.c
+RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c
endif
AM_CFLAGS = @XORG_CFLAGS@ @DRI_CFLAGS@ @XMODES_CFLAGS@ -DDISABLE_EASF -DENABLE_ALL_SERVICE_FUNCTIONS -DATOM_BIOS -DATOM_BIOS_PARSER -DDRIVER_PARSER
@@ -128,6 +128,12 @@ EXTRA_DIST = \
radeon_render.c \
radeon_accelfuncs.c \
radeon_textured_videofuncs.c \
+ r600_reg.h \
+ r600_reg_auto_r6xx.h \
+ r600_reg_r6xx.h \
+ r600_reg_r7xx.h \
+ r600_shader.h \
+ r600_state.h \
ati.h \
ativersion.h \
bicubic_table.h \
diff --git a/src/r600_exa.c b/src/r600_exa.c
new file mode 100644
index 00000000..17c5567e
--- /dev/null
+++ b/src/r600_exa.c
@@ -0,0 +1,4441 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Alex Deucher <alexander.deucher@amd.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include "exa.h"
+
+#include "radeon.h"
+#include "radeon_macros.h"
+#include "r600_shader.h"
+#include "r600_reg.h"
+#include "r600_state.h"
+
+extern PixmapPtr
+RADEONGetDrawablePixmap(DrawablePtr pDrawable);
+
+//#define SHOW_VERTEXES
+
+# define RADEON_ROP3_ZERO 0x00000000
+# define RADEON_ROP3_DSa 0x00880000
+# define RADEON_ROP3_SDna 0x00440000
+# define RADEON_ROP3_S 0x00cc0000
+# define RADEON_ROP3_DSna 0x00220000
+# define RADEON_ROP3_D 0x00aa0000
+# define RADEON_ROP3_DSx 0x00660000
+# define RADEON_ROP3_DSo 0x00ee0000
+# define RADEON_ROP3_DSon 0x00110000
+# define RADEON_ROP3_DSxn 0x00990000
+# define RADEON_ROP3_Dn 0x00550000
+# define RADEON_ROP3_SDno 0x00dd0000
+# define RADEON_ROP3_Sn 0x00330000
+# define RADEON_ROP3_DSno 0x00bb0000
+# define RADEON_ROP3_DSan 0x00770000
+# define RADEON_ROP3_ONE 0x00ff0000
+
+uint32_t RADEON_ROP[16] = {
+ RADEON_ROP3_ZERO, /* GXclear */
+ RADEON_ROP3_DSa, /* Gxand */
+ RADEON_ROP3_SDna, /* GXandReverse */
+ RADEON_ROP3_S, /* GXcopy */
+ RADEON_ROP3_DSna, /* GXandInverted */
+ RADEON_ROP3_D, /* GXnoop */
+ RADEON_ROP3_DSx, /* GXxor */
+ RADEON_ROP3_DSo, /* GXor */
+ RADEON_ROP3_DSon, /* GXnor */
+ RADEON_ROP3_DSxn, /* GXequiv */
+ RADEON_ROP3_Dn, /* GXinvert */
+ RADEON_ROP3_SDno, /* GXorReverse */
+ RADEON_ROP3_Sn, /* GXcopyInverted */
+ RADEON_ROP3_DSno, /* GXorInverted */
+ RADEON_ROP3_DSan, /* GXnand */
+ RADEON_ROP3_ONE, /* GXset */
+};
+
+static void
+R600DoneSolid(PixmapPtr pPix);
+
+static void
+R600DoneComposite(PixmapPtr pDst);
+
+
+static Bool
+R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ cb_config_t cb_conf;
+ shader_config_t vs_conf, ps_conf;
+ int pmask = 0;
+ uint32_t a, r, g, b;
+ float ps_alu_consts[4];
+
+ accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
+ accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height;
+ accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
+
+ // bad pitch
+ if (accel_state->dst_pitch & 7)
+ return FALSE;
+
+ // bad offset
+ if (accel_state->dst_mc_addr & 0xff)
+ return FALSE;
+
+ if (pPix->drawable.bitsPerPixel == 24)
+ return FALSE;
+
+ CLEAR (cb_conf);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+
+ //return FALSE;
+
+#ifdef SHOW_VERTEXES
+ ErrorF("%dx%d @ %dbpp, 0x%08x\n", pPix->drawable.width, pPix->drawable.height,
+ pPix->drawable.bitsPerPixel, exaGetPixmapPitch(pPix));
+#endif
+
+ accel_state->ib = RADEONCPGetBuffer(pScrn);
+
+ /* Init */
+ start_3d(pScrn, accel_state->ib);
+
+ //cp_set_surface_sync(pScrn, accel_state->ib);
+
+ set_default_state(pScrn, accel_state->ib);
+
+ /* Scissor / viewport */
+ ereg (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
+ ereg (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+
+ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+ accel_state->solid_vs_offset;
+ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+ accel_state->solid_ps_offset;
+ accel_state->vs_size = 512;
+ accel_state->ps_size = 512;
+
+ /* Shader */
+
+ /* flush SQ cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
+ accel_state->vs_size, accel_state->vs_mc_addr);
+
+ vs_conf.shader_addr = accel_state->vs_mc_addr;
+ vs_conf.num_gprs = 2;
+ vs_conf.stack_size = 0;
+ vs_setup (pScrn, accel_state->ib, &vs_conf);
+
+ /* flush SQ cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
+ accel_state->ps_size, accel_state->ps_mc_addr);
+
+ ps_conf.shader_addr = accel_state->ps_mc_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.clamp_consts = 0;
+ ps_conf.export_mode = 2;
+ ps_setup (pScrn, accel_state->ib, &ps_conf);
+
+ /* Render setup */
+ if (pm & 0x000000ff)
+ pmask |= 4; //B
+ if (pm & 0x0000ff00)
+ pmask |= 2; //G
+ if (pm & 0x00ff0000)
+ pmask |= 1; //R
+ if (pm & 0xff000000)
+ pmask |= 8; //A
+ ereg (accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift));
+ ereg (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
+ ereg (accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[alu]);
+
+
+ cb_conf.id = 0;
+ cb_conf.w = accel_state->dst_pitch;
+ cb_conf.h = pPix->drawable.height;
+ cb_conf.base = accel_state->dst_mc_addr;
+
+ if (pPix->drawable.bitsPerPixel == 8) {
+ cb_conf.format = COLOR_8;
+ cb_conf.comp_swap = 3; //A
+ } else if (pPix->drawable.bitsPerPixel == 16) {
+ cb_conf.format = COLOR_5_6_5;
+ cb_conf.comp_swap = 2; //RGB
+ } else {
+ cb_conf.format = COLOR_8_8_8_8;
+ cb_conf.comp_swap = 1; //ARGB
+ }
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(pScrn, accel_state->ib, &cb_conf);
+
+ ereg (accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
+ ereg (accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+ /* Interpolator setup */
+ // one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one)
+ ereg (accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift));
+ ereg (accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
+
+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+ // no VS exports as PS input (NUM_INTERP is not zero based, no minus one)
+ ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, (0 << NUM_INTERP_shift));
+ ereg (accel_state->ib, SPI_PS_IN_CONTROL_1, 0);
+ // color semantic id 0 -> GPR[0]
+ ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
+ (0x03 << DEFAULT_VAL_shift) |
+ FLAT_SHADE_bit |
+ SEL_CENTROID_bit));
+ ereg (accel_state->ib, SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit | 0);
+
+ // PS alu constants
+ if (pPix->drawable.bitsPerPixel == 16) {
+ r = (fg >> 11) & 0x1f;
+ g = (fg >> 5) & 0x3f;
+ b = (fg >> 0) & 0x1f;
+ ps_alu_consts[0] = (float)r / 31; //R
+ ps_alu_consts[1] = (float)g / 63; //G
+ ps_alu_consts[2] = (float)b / 31; //B
+ ps_alu_consts[3] = 1.0; //A
+ } else if (pPix->drawable.bitsPerPixel == 8) {
+ a = (fg >> 0) & 0xff;
+ ps_alu_consts[0] = 0.0; //R
+ ps_alu_consts[1] = 0.0; //G
+ ps_alu_consts[2] = 0.0; //B
+ ps_alu_consts[3] = (float)a / 255; //A
+ } else {
+ a = (fg >> 24) & 0xff;
+ r = (fg >> 16) & 0xff;
+ g = (fg >> 8) & 0xff;
+ b = (fg >> 0) & 0xff;
+ ps_alu_consts[0] = (float)r / 255; //R
+ ps_alu_consts[1] = (float)g / 255; //G
+ ps_alu_consts[2] = (float)b / 255; //B
+ ps_alu_consts[3] = (float)a / 255; //A
+ }
+ set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+
+ accel_state->vb_index = 0;
+
+#ifdef SHOW_VERTEXES
+ ErrorF("PM: 0x%08x\n", pm);
+#endif
+
+ return TRUE;
+}
+
+
+static void
+R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ struct r6xx_solid_vertex vertex[3];
+ struct r6xx_solid_vertex *solid_vb;
+
+ if (((accel_state->vb_index + 3) * 8) > (accel_state->ib->total / 2)) {
+ R600DoneSolid(pPix);
+ accel_state->vb_index = 0;
+ accel_state->ib = RADEONCPGetBuffer(pScrn);
+ }
+
+ solid_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2));
+
+ vertex[0].x = (float)x1;
+ vertex[0].y = (float)y1;
+
+ vertex[1].x = (float)x1;
+ vertex[1].y = (float)y2;
+
+ vertex[2].x = (float)x2;
+ vertex[2].y = (float)y2;
+
+#ifdef SHOW_VERTEXES
+ ErrorF("vertex 0: %f, %f\n", vertex[0].x, vertex[0].y);
+ ErrorF("vertex 1: %f, %f\n", vertex[1].x, vertex[1].y);
+ ErrorF("vertex 2: %f\n", vertex[2].x, vertex[2].y);
+#endif
+
+ // append to vertex buffer
+ solid_vb[accel_state->vb_index++] = vertex[0];
+ solid_vb[accel_state->vb_index++] = vertex[1];
+ solid_vb[accel_state->vb_index++] = vertex[2];
+}
+
+static void
+R600DoneSolid(PixmapPtr pPix)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ draw_config_t draw_conf;
+ vtx_resource_t vtx_res;
+
+ CLEAR (draw_conf);
+ CLEAR (vtx_res);
+
+ if (accel_state->vb_index == 0) {
+ R600IBDiscard(pScrn, accel_state->ib);
+ return;
+ }
+
+ accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
+ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
+ accel_state->vb_size = accel_state->vb_index * 8;
+
+ /* flush vertex cache */
+ if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
+ (info->ChipFamily == CHIP_FAMILY_RV620) ||
+ (info->ChipFamily == CHIP_FAMILY_RS780) ||
+ (info->ChipFamily == CHIP_FAMILY_RV710))
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
+ accel_state->vb_size, accel_state->vb_mc_addr);
+ else
+ cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
+ accel_state->vb_size, accel_state->vb_mc_addr);
+
+ /* Vertex buffer setup */
+ vtx_res.id = SQ_VTX_RESOURCE_vs;
+ vtx_res.vtx_size_dw = 8 / 4;
+ vtx_res.vtx_num_entries = accel_state->vb_size / 4;
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = accel_state->vb_mc_addr;
+ set_vtx_resource (pScrn, accel_state->ib, &vtx_res);
+
+ /* Draw */
+ draw_conf.prim_type = DI_PT_RECTLIST;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
+
+ draw_auto(pScrn, accel_state->ib, &draw_conf);
+
+ wait_3d_idle_clean(pScrn, accel_state->ib);
+
+ /* sync dst surface */
+ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
+ accel_state->dst_size, accel_state->dst_mc_addr);
+
+ R600CPFlushIndirect(pScrn, accel_state->ib);
+}
+
+static void
+R600DoPrepareCopy(ScrnInfoPtr pScrn,
+ int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp,
+ int dst_pitch, int dst_height, uint32_t dst_offset, int dst_bpp,
+ int rop, Pixel planemask)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ int pmask = 0;
+ cb_config_t cb_conf;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ shader_config_t vs_conf, ps_conf;
+
+ CLEAR (cb_conf);
+ CLEAR (tex_res);
+ CLEAR (tex_samp);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+
+ accel_state->ib = RADEONCPGetBuffer(pScrn);
+
+ /* Init */
+ start_3d(pScrn, accel_state->ib);
+
+ //cp_set_surface_sync(pScrn, accel_state->ib);
+
+ set_default_state(pScrn, accel_state->ib);
+
+ /* Scissor / viewport */
+ ereg (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
+ ereg (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+
+ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+ accel_state->copy_vs_offset;
+ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+ accel_state->copy_ps_offset;
+ accel_state->vs_size = 512;
+ accel_state->ps_size = 512;
+
+ /* Shader */
+
+ /* flush SQ cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
+ accel_state->vs_size, accel_state->vs_mc_addr);
+
+ vs_conf.shader_addr = accel_state->vs_mc_addr;
+ vs_conf.num_gprs = 2;
+ vs_conf.stack_size = 0;
+ vs_setup (pScrn, accel_state->ib, &vs_conf);
+
+ /* flush SQ cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
+ accel_state->ps_size, accel_state->ps_mc_addr);
+
+ ps_conf.shader_addr = accel_state->ps_mc_addr;
+ ps_conf.num_gprs = 1;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.clamp_consts = 0;
+ ps_conf.export_mode = 2;
+ ps_setup (pScrn, accel_state->ib, &ps_conf);
+
+ accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8);
+ accel_state->src_mc_addr[0] = src_offset;
+ accel_state->src_pitch[0] = src_pitch;
+ accel_state->src_width[0] = src_width;
+ accel_state->src_height[0] = src_height;
+ accel_state->src_bpp[0] = src_bpp;
+
+ /* flush texture cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
+ accel_state->src_size[0], accel_state->src_mc_addr[0]);
+
+ /* Texture */
+ tex_res.id = 0;
+ tex_res.w = src_width;
+ tex_res.h = src_height;
+ tex_res.pitch = accel_state->src_pitch[0];
+ tex_res.depth = 0;
+ tex_res.dim = SQ_TEX_DIM_2D;
+ tex_res.base = accel_state->src_mc_addr[0];
+ tex_res.mip_base = accel_state->src_mc_addr[0];
+ if (src_bpp == 8) {
+ tex_res.format = FMT_8;
+ tex_res.dst_sel_x = SQ_SEL_1; //R
+ tex_res.dst_sel_y = SQ_SEL_1; //G
+ tex_res.dst_sel_z = SQ_SEL_1; //B
+ tex_res.dst_sel_w = SQ_SEL_X; //A
+ } else if (src_bpp == 16) {
+ tex_res.format = FMT_5_6_5;
+ tex_res.dst_sel_x = SQ_SEL_Z; //R
+ tex_res.dst_sel_y = SQ_SEL_Y; //G
+ tex_res.dst_sel_z = SQ_SEL_X; //B
+ tex_res.dst_sel_w = SQ_SEL_1; //A
+ } else {
+ tex_res.format = FMT_8_8_8_8;
+ tex_res.dst_sel_x = SQ_SEL_Z; //R
+ tex_res.dst_sel_y = SQ_SEL_Y; //G
+ tex_res.dst_sel_z = SQ_SEL_X; //B
+ tex_res.dst_sel_w = SQ_SEL_W; //A
+ }
+
+ tex_res.request_size = 1;
+ tex_res.base_level = 0;
+ tex_res.last_level = 0;
+ tex_res.perf_modulation = 0;
+ set_tex_resource (pScrn, accel_state->ib, &tex_res);
+
+ tex_samp.id = 0;
+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_z = SQ_TEX_WRAP;
+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT;
+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT;
+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
+ tex_samp.mip_filter = 0; /* no mipmap */
+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+
+
+ /* Render setup */
+ if (planemask & 0x000000ff)
+ pmask |= 4; //B
+ if (planemask & 0x0000ff00)
+ pmask |= 2; //G
+ if (planemask & 0x00ff0000)
+ pmask |= 1; //R
+ if (planemask & 0xff000000)
+ pmask |= 8; //A
+ ereg (accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift));
+ ereg (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
+ ereg (accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[rop]);
+
+ accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8);
+ accel_state->dst_mc_addr = dst_offset;
+ accel_state->dst_pitch = dst_pitch;
+ accel_state->dst_height = dst_height;
+ accel_state->dst_bpp = dst_bpp;
+
+ cb_conf.id = 0;
+ cb_conf.w = accel_state->dst_pitch;
+ cb_conf.h = dst_height;
+ cb_conf.base = accel_state->dst_mc_addr;
+ if (dst_bpp == 8) {
+ cb_conf.format = COLOR_8;
+ cb_conf.comp_swap = 3; // A
+ } else if (dst_bpp == 16) {
+ cb_conf.format = COLOR_5_6_5;
+ cb_conf.comp_swap = 2; // RGB
+ } else {
+ cb_conf.format = COLOR_8_8_8_8;
+ cb_conf.comp_swap = 1; // ARGB
+ }
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(pScrn, accel_state->ib, &cb_conf);
+
+ ereg (accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
+ ereg (accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+ /* Interpolator setup */
+ // export tex coord from VS
+ ereg (accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
+ ereg (accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
+
+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+ // input tex coord from VS
+ ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift)));
+ ereg (accel_state->ib, SPI_PS_IN_CONTROL_1, 0);
+ // color semantic id 0 -> GPR[0]
+ ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
+ (0x01 << DEFAULT_VAL_shift) |
+ SEL_CENTROID_bit));
+ ereg (accel_state->ib, SPI_INTERP_CONTROL_0, 0);
+
+ accel_state->vb_index = 0;
+
+}
+
+static void
+R600DoCopy(ScrnInfoPtr pScrn)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ draw_config_t draw_conf;
+ vtx_resource_t vtx_res;
+
+ CLEAR (draw_conf);
+ CLEAR (vtx_res);
+
+ if (accel_state->vb_index == 0) {
+ R600IBDiscard(pScrn, accel_state->ib);
+ return;
+ }
+
+ accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
+ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
+ accel_state->vb_size = accel_state->vb_index * 16;
+
+ /* flush vertex cache */
+ if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
+ (info->ChipFamily == CHIP_FAMILY_RV620) ||
+ (info->ChipFamily == CHIP_FAMILY_RS780) ||
+ (info->ChipFamily == CHIP_FAMILY_RV710))
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
+ accel_state->vb_size, accel_state->vb_mc_addr);
+ else
+ cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
+ accel_state->vb_size, accel_state->vb_mc_addr);
+
+ /* Vertex buffer setup */
+ vtx_res.id = SQ_VTX_RESOURCE_vs;
+ vtx_res.vtx_size_dw = 16 / 4;
+ vtx_res.vtx_num_entries = accel_state->vb_size / 4;
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = accel_state->vb_mc_addr;
+ set_vtx_resource (pScrn, accel_state->ib, &vtx_res);
+
+ draw_conf.prim_type = DI_PT_RECTLIST;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
+
+ draw_auto(pScrn, accel_state->ib, &draw_conf);
+
+ wait_3d_idle_clean(pScrn, accel_state->ib);
+
+ /* sync dst surface */
+ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
+ accel_state->dst_size, accel_state->dst_mc_addr);
+
+ R600CPFlushIndirect(pScrn, accel_state->ib);
+}
+
+static void
+R600AppendCopyVertex(ScrnInfoPtr pScrn,
+ int srcX, int srcY,
+ int dstX, int dstY,
+ int w, int h)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ struct r6xx_copy_vertex *copy_vb;
+ struct r6xx_copy_vertex vertex[3];
+
+ if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
+ R600DoCopy(pScrn);
+ accel_state->vb_index = 0;
+ accel_state->ib = RADEONCPGetBuffer(pScrn);
+ }
+
+ copy_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2));
+
+ vertex[0].x = (float)dstX;
+ vertex[0].y = (float)dstY;
+ vertex[0].s = (float)srcX;
+ vertex[0].t = (float)srcY;
+
+ vertex[1].x = (float)dstX;
+ vertex[1].y = (float)(dstY + h);
+ vertex[1].s = (float)srcX;
+ vertex[1].t = (float)(srcY + h);
+
+ vertex[2].x = (float)(dstX + w);
+ vertex[2].y = (float)(dstY + h);
+ vertex[2].s = (float)(srcX + w);
+ vertex[2].t = (float)(srcY + h);
+
+#ifdef SHOW_VERTEXES
+ ErrorF("vertex 0: %f, %f, %f, %d\n", vertex[0].x, vertex[0].y, vertex[0].s, vertex[0].t);
+ ErrorF("vertex 1: %f, %f, %f, %d\n", vertex[1].x, vertex[1].y, vertex[1].s, vertex[1].t);
+ ErrorF("vertex 2: %f, %f, %f, %d\n", vertex[2].x, vertex[2].y, vertex[2].s, vertex[2].t);
+#endif
+
+ // append to vertex buffer
+ copy_vb[accel_state->vb_index++] = vertex[0];
+ copy_vb[accel_state->vb_index++] = vertex[1];
+ copy_vb[accel_state->vb_index++] = vertex[2];
+
+}
+
+static Bool
+R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst,
+ int xdir, int ydir,
+ int rop,
+ Pixel planemask)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
+ accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
+
+ accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
+ accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
+
+ accel_state->src_width[0] = pSrc->drawable.width;
+ accel_state->src_height[0] = pSrc->drawable.height;
+ accel_state->src_bpp[0] = pSrc->drawable.bitsPerPixel;
+ accel_state->dst_height = pDst->drawable.height;
+ accel_state->dst_bpp = pDst->drawable.bitsPerPixel;
+
+ // bad pitch
+ if (accel_state->src_pitch[0] & 7)
+ return FALSE;
+ if (accel_state->dst_pitch & 7)
+ return FALSE;
+
+ // bad offset
+ if (accel_state->src_mc_addr[0] & 0xff)
+ return FALSE;
+ if (accel_state->dst_mc_addr & 0xff)
+ return FALSE;
+
+ if (pSrc->drawable.bitsPerPixel == 24)
+ return FALSE;
+ if (pDst->drawable.bitsPerPixel == 24)
+ return FALSE;
+
+ //return FALSE;
+
+#ifdef SHOW_VERTEXES
+ ErrorF("src: %dx%d @ %dbpp, 0x%08x\n", pSrc->drawable.width, pSrc->drawable.height,
+ pSrc->drawable.bitsPerPixel, exaGetPixmapPitch(pSrc));
+ ErrorF("dst: %dx%d @ %dbpp, 0x%08x\n", pDst->drawable.width, pDst->drawable.height,
+ pDst->drawable.bitsPerPixel, exaGetPixmapPitch(pDst));
+#endif
+
+ accel_state->rop = rop;
+ accel_state->planemask = planemask;
+
+ if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) {
+ unsigned long size = pDst->drawable.height * accel_state->dst_pitch * pDst->drawable.bitsPerPixel/8;
+ accel_state->same_surface = TRUE;
+
+ if (accel_state->copy_area) {
+ exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
+ accel_state->copy_area = NULL;
+ }
+ accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
+ } else {
+ accel_state->same_surface = FALSE;
+
+ R600DoPrepareCopy(pScrn,
+ accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height,
+ accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel,
+ accel_state->dst_pitch, pDst->drawable.height,
+ accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel,
+ rop, planemask);
+
+ }
+
+ return TRUE;
+}
+
+static Bool
+is_overlap(int sx1, int sx2, int sy1, int sy2, int dx1, int dx2, int dy1, int dy2)
+{
+ if (((sx1 >= dx1) && (sx1 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || // TL x1, y1
+ ((sx2 >= dx1) && (sx2 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || // TR x2, y1
+ ((sx1 >= dx1) && (sx1 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)) || // BL x1, y2
+ ((sx2 >= dx1) && (sx2 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2))) // BR x2, y2
+ return TRUE;
+ else
+ return FALSE;
+}
+
+static void
+R600OverlapCopy(PixmapPtr pDst,
+ int srcX, int srcY,
+ int dstX, int dstY,
+ int w, int h)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
+ uint32_t dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
+ int i, hchunk, vchunk;
+
+ if (is_overlap(srcX, srcX + w, srcY, srcY + h,
+ dstX, dstX + w, dstY, dstY + h)) {
+ /* Calculate height/width of non-overlapping area */
+ hchunk = (srcX < dstX) ? (dstX - srcX) : (srcX - dstX);
+ vchunk = (srcY < dstY) ? (dstY - srcY) : (srcY - dstY);
+
+ /* Diagonally offset overlap is reduced to either horizontal or vertical offset-only
+ * by copying a part of the non-overlapping portion, then adjusting coordinates
+ * Choose horizontal vs vertical to minimize the total number of copy operations
+ */
+ if (vchunk != 0 && hchunk != 0) { //diagonal
+ if ((w / hchunk) <= (h / vchunk)) { // reduce to horizontal
+ if (srcY > dstY ) { // diagonal up
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk);
+ R600DoCopy(pScrn);
+
+ srcY = srcY + vchunk;
+ dstY = dstY + vchunk;
+ } else { // diagonal down
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk);
+ R600DoCopy(pScrn);
+ }
+ h = h - vchunk;
+ vchunk = 0;
+ } else { //reduce to vertical
+ if (srcX > dstX ) { // diagonal left
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h);
+ R600DoCopy(pScrn);
+
+ srcX = srcX + hchunk;
+ dstX = dstX + hchunk;
+ } else { // diagonal right
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h);
+ R600DoCopy(pScrn);
+ }
+ w = w - hchunk;
+ hchunk = 0;
+ }
+ }
+
+ if (vchunk == 0) { // left/right
+ if (srcX < dstX) { // right
+ // copy right to left
+ for (i = w; i > 0; i -= hchunk) {
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h);
+ R600DoCopy(pScrn);
+ }
+ } else { //left
+ // copy left to right
+ for (i = 0; i < w; i += hchunk) {
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+
+ R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h);
+ R600DoCopy(pScrn);
+ }
+ }
+ } else { //up/down
+ if (srcY > dstY) { // up
+ // copy top to bottom
+ for (i = 0; i < h; i += vchunk) {
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+
+ if (vchunk > h - i) vchunk = h - i;
+ R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i, w, vchunk);
+ R600DoCopy(pScrn);
+ }
+ } else { // down
+ // copy bottom to top
+ for (i = h; i > 0; i -= vchunk) {
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+
+ if (vchunk > i) vchunk = i;
+ R600AppendCopyVertex(pScrn, srcX, srcY + i - vchunk, dstX, dstY + i - vchunk, w, vchunk);
+ R600DoCopy(pScrn);
+ }
+ }
+ }
+ } else {
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+
+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
+ R600DoCopy(pScrn);
+ }
+}
+
+static void
+R600Copy(PixmapPtr pDst,
+ int srcX, int srcY,
+ int dstX, int dstY,
+ int w, int h)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
+ return;
+
+ if (accel_state->same_surface && is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) {
+ if (accel_state->copy_area) {
+ uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
+ uint32_t orig_offset, tmp_offset;
+
+ tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
+ orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
+
+ R600DoPrepareCopy(pScrn,
+ pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel,
+ pitch, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
+ R600DoCopy(pScrn);
+ R600DoPrepareCopy(pScrn,
+ pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel,
+ pitch, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
+ R600DoCopy(pScrn);
+ } else
+ R600OverlapCopy(pDst, srcX, srcY, dstX, dstY, w, h);
+ } else if (accel_state->same_surface) {
+ uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
+ uint32_t offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
+
+ R600DoPrepareCopy(pScrn,
+ pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel,
+ pitch, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
+ R600DoCopy(pScrn);
+ } else {
+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
+ }
+
+}
+
+static void
+R600DoneCopy(PixmapPtr pDst)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ if (!accel_state->same_surface)
+ R600DoCopy(pScrn);
+
+ if (accel_state->copy_area) {
+ exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
+ accel_state->copy_area = NULL;
+ }
+
+}
+
+#define RADEON_TRACE_FALL 0
+#define RADEON_TRACE_DRAW 0
+
+#if RADEON_TRACE_FALL
+#define RADEON_FALLBACK(x) \
+do { \
+ ErrorF("%s: ", __FUNCTION__); \
+ ErrorF x; \
+ return FALSE; \
+} while (0)
+#else
+#define RADEON_FALLBACK(x) return FALSE
+#endif
+
+#define xFixedToFloat(f) (((float) (f)) / 65536)
+
+static inline void transformPoint(PictTransform *transform, xPointFixed *point)
+{
+ PictVector v;
+ v.vector[0] = point->x;
+ v.vector[1] = point->y;
+ v.vector[2] = xFixed1;
+ PictureTransformPoint(transform, &v);
+ point->x = v.vector[0];
+ point->y = v.vector[1];
+}
+
+struct blendinfo {
+ Bool dst_alpha;
+ Bool src_alpha;
+ uint32_t blend_cntl;
+};
+
+static struct blendinfo R600BlendOp[] = {
+ /* Clear */
+ {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
+ /* Src */
+ {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
+ /* Dst */
+ {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
+ /* Over */
+ {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+ /* OverReverse */
+ {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
+ /* In */
+ {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
+ /* InReverse */
+ {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+ /* Out */
+ {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
+ /* OutReverse */
+ {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+ /* Atop */
+ {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+ /* AtopReverse */
+ {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+ /* Xor */
+ {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+ /* Add */
+ {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
+};
+
+struct formatinfo {
+ unsigned int fmt;
+ uint32_t card_fmt;
+};
+
+static struct formatinfo R600TexFormats[] = {
+ {PICT_a8r8g8b8, FMT_8_8_8_8},
+ {PICT_x8r8g8b8, FMT_8_8_8_8},
+ {PICT_a8b8g8r8, FMT_8_8_8_8},
+ {PICT_x8b8g8r8, FMT_8_8_8_8},
+ {PICT_r5g6b5, FMT_5_6_5},
+ {PICT_a1r5g5b5, FMT_1_5_5_5},
+ {PICT_x1r5g5b5, FMT_1_5_5_5},
+ {PICT_a8, FMT_8},
+};
+
+static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
+{
+ uint32_t sblend, dblend;
+
+ sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
+ dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
+
+ /* If there's no dst alpha channel, adjust the blend op so that we'll treat
+ * it as always 1.
+ */
+ if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
+ if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
+ sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
+ else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
+ sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
+ }
+
+ /* If the source alpha is being used, then we should only be in a case where
+ * the source blend factor is 0, and the source blend value is the mask
+ * channels multiplied by the source picture's alpha.
+ */
+ if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
+ if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
+ dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
+ } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
+ dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
+ }
+ }
+
+ return sblend | dblend;
+}
+
+static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
+{
+ switch (pDstPicture->format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ *dst_format = COLOR_8_8_8_8;
+ break;
+ case PICT_r5g6b5:
+ *dst_format = COLOR_5_6_5;
+ break;
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ *dst_format = COLOR_1_5_5_5;
+ break;
+ case PICT_a8:
+ *dst_format = COLOR_8;
+ break;
+ default:
+ RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
+ (int)pDstPicture->format));
+ }
+ return TRUE;
+}
+
+static Bool R600CheckCompositeTexture(PicturePtr pPict,
+ PicturePtr pDstPict,
+ int op,
+ int unit)
+{
+ int w = pPict->pDrawable->width;
+ int h = pPict->pDrawable->height;
+ unsigned int i;
+ int max_tex_w, max_tex_h;
+
+ max_tex_w = 8192;
+ max_tex_h = 8192;
+
+ if ((w > max_tex_w) || (h > max_tex_h))
+ RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
+
+ for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
+ if (R600TexFormats[i].fmt == pPict->format)
+ break;
+ }
+ if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
+ RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
+ (int)pPict->format));
+
+ if (pPict->filter != PictFilterNearest &&
+ pPict->filter != PictFilterBilinear)
+ RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
+
+ /* for REPEAT_NONE, Render semantics are that sampling outside the source
+ * picture results in alpha=0 pixels. We can implement this with a border color
+ * *if* our source texture has an alpha channel, otherwise we need to fall
+ * back. If we're not transformed then we hope that upper layers have clipped
+ * rendering to the bounds of the source drawable, in which case it doesn't
+ * matter. I have not, however, verified that the X server always does such
+ * clipping.
+ */
+ //FIXME R6xx
+ if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) {
+ if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
+ RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
+ }
+
+ return TRUE;
+}
+
+static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
+ int unit)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ int w = pPict->pDrawable->width;
+ int h = pPict->pDrawable->height;
+ unsigned int i;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+
+ CLEAR (tex_res);
+ CLEAR (tex_samp);
+
+ for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
+ if (R600TexFormats[i].fmt == pPict->format)
+ break;
+ }
+
+ accel_state->texW[unit] = w;
+ accel_state->texH[unit] = h;
+
+ //ErrorF("Tex %d setup %dx%d\n", unit, w, h);
+
+ accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
+ accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * h;
+ accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
+ /* flush texture cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
+ accel_state->src_size[unit], accel_state->src_mc_addr[unit]);
+
+ /* Texture */
+ tex_res.id = unit;
+ tex_res.w = w;
+ tex_res.h = h;
+ tex_res.pitch = accel_state->src_pitch[unit];
+ tex_res.depth = 0;
+ tex_res.dim = SQ_TEX_DIM_2D;
+ tex_res.base = accel_state->src_mc_addr[unit];
+ tex_res.mip_base = accel_state->src_mc_addr[unit];
+ tex_res.format = R600TexFormats[i].card_fmt;
+ tex_res.request_size = 1;
+
+ /* component swizzles */
+ switch (pPict->format) {
+ case PICT_a1r5g5b5:
+ case PICT_a8r8g8b8:
+ tex_res.dst_sel_x = SQ_SEL_Z; //R
+ tex_res.dst_sel_y = SQ_SEL_Y; //G
+ tex_res.dst_sel_z = SQ_SEL_X; //B
+ tex_res.dst_sel_w = SQ_SEL_W; //A
+ break;
+ case PICT_a8b8g8r8:
+ tex_res.dst_sel_x = SQ_SEL_X; //R
+ tex_res.dst_sel_y = SQ_SEL_Y; //G
+ tex_res.dst_sel_z = SQ_SEL_Z; //B
+ tex_res.dst_sel_w = SQ_SEL_W; //A
+ break;
+ case PICT_x8b8g8r8:
+ tex_res.dst_sel_x = SQ_SEL_X; //R
+ tex_res.dst_sel_y = SQ_SEL_Y; //G
+ tex_res.dst_sel_z = SQ_SEL_Z; //B
+ tex_res.dst_sel_w = SQ_SEL_1; //A
+ break;
+ case PICT_x1r5g5b5:
+ case PICT_x8r8g8b8:
+ tex_res.dst_sel_x = SQ_SEL_Z; //R
+ tex_res.dst_sel_y = SQ_SEL_Y; //G
+ tex_res.dst_sel_z = SQ_SEL_X; //B
+ tex_res.dst_sel_w = SQ_SEL_1; //A
+ break;
+ case PICT_r5g6b5:
+ tex_res.dst_sel_x = SQ_SEL_Z; //R
+ tex_res.dst_sel_y = SQ_SEL_Y; //G
+ tex_res.dst_sel_z = SQ_SEL_X; //B
+ tex_res.dst_sel_w = SQ_SEL_1; //A
+ break;
+ case PICT_a8:
+ tex_res.dst_sel_x = SQ_SEL_0; //R
+ tex_res.dst_sel_y = SQ_SEL_0; //G
+ tex_res.dst_sel_z = SQ_SEL_0; //B
+ tex_res.dst_sel_w = SQ_SEL_X; //A
+ break;
+ default:
+ RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
+ }
+
+ tex_res.base_level = 0;
+ tex_res.last_level = 0;
+ tex_res.perf_modulation = 0;
+ set_tex_resource (pScrn, accel_state->ib, &tex_res);
+
+ tex_samp.id = unit;
+ tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+
+ if (pPict->repeat) {
+ switch (pPict->repeatType) {
+ case RepeatNormal:
+ tex_samp.clamp_x = SQ_TEX_WRAP;
+ tex_samp.clamp_y = SQ_TEX_WRAP;
+ break;
+ case RepeatPad:
+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
+ break;
+ case RepeatReflect:
+ tex_samp.clamp_x = SQ_TEX_MIRROR;
+ tex_samp.clamp_y = SQ_TEX_MIRROR;
+ break;
+ case RepeatNone:
+ tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER;
+ break;
+ default:
+ RADEON_FALLBACK(("Bad repeat 0x%x\n", pPict->repeatType));
+ }
+ } else {
+ tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER;
+ }
+
+ switch (pPict->filter) {
+ case PictFilterNearest:
+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT;
+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT;
+ break;
+ case PictFilterBilinear:
+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR;
+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR;
+ break;
+ default:
+ RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
+ }
+
+ tex_samp.clamp_z = SQ_TEX_WRAP;
+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
+ tex_samp.mip_filter = 0; /* no mipmap */
+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+
+ if (pPict->transform != 0) {
+ accel_state->is_transform[unit] = TRUE;
+ accel_state->transform[unit] = pPict->transform;
+ } else
+ accel_state->is_transform[unit] = FALSE;
+
+ return TRUE;
+}
+
+static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
+ PicturePtr pDstPicture)
+{
+ uint32_t tmp1;
+// ScreenPtr pScreen = pDstPicture->pDrawable->pScreen;
+ PixmapPtr pSrcPixmap, pDstPixmap;
+// ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+// RADEONInfoPtr info = RADEONPTR(pScrn);
+ int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
+
+ /* Check for unsupported compositing operations. */
+ if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
+ RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
+
+ pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
+
+ max_tex_w = 8192;
+ max_tex_h = 8192;
+ max_dst_w = 8192;
+ max_dst_h = 8192;
+
+ if (pSrcPixmap->drawable.width >= max_tex_w ||
+ pSrcPixmap->drawable.height >= max_tex_h) {
+ RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
+ pSrcPixmap->drawable.width,
+ pSrcPixmap->drawable.height));
+ }
+
+ pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
+
+ if (pDstPixmap->drawable.width >= max_dst_w ||
+ pDstPixmap->drawable.height >= max_dst_h) {
+ RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
+ pDstPixmap->drawable.width,
+ pDstPixmap->drawable.height));
+ }
+
+ if (pMaskPicture) {
+ PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
+
+ if (pMaskPixmap->drawable.width >= max_tex_w ||
+ pMaskPixmap->drawable.height >= max_tex_h) {
+ RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
+ pMaskPixmap->drawable.width,
+ pMaskPixmap->drawable.height));
+ }
+
+ if (pMaskPicture->componentAlpha) {
+ /* Check if it's component alpha that relies on a source alpha and
+ * on the source value. We can only get one of those into the
+ * single source value that we get to blend with.
+ */
+ if (R600BlendOp[op].src_alpha &&
+ (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
+ (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
+ RADEON_FALLBACK(("Component alpha not supported with source "
+ "alpha and source value blending.\n"));
+ }
+ }
+
+ if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
+ return FALSE;
+ }
+
+ if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
+ return FALSE;
+
+ if (!R600GetDestFormat(pDstPicture, &tmp1))
+ return FALSE;
+
+ return TRUE;
+
+}
+
+static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
+ PicturePtr pMaskPicture, PicturePtr pDstPicture,
+ PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ uint32_t blendcntl, dst_format;
+ cb_config_t cb_conf;
+ shader_config_t vs_conf, ps_conf;
+ int i = 0;
+ uint32_t ps[24];
+
+ //return FALSE;
+
+ if (pMask)
+ accel_state->has_mask = TRUE;
+ else
+ accel_state->has_mask = FALSE;
+
+ accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
+ accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
+ accel_state->dst_size = exaGetPixmapPitch(pDst) * pDst->drawable.height;
+
+ accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
+ accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
+ accel_state->src_size[0] = exaGetPixmapPitch(pSrc) * pSrc->drawable.height;
+
+ if (accel_state->dst_pitch & 7)
+ RADEON_FALLBACK(("Bad dst pitch 0x%x\n", (int)accel_state->dst_pitch));
+
+ if (accel_state->dst_mc_addr & 0xff)
+ RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)accel_state->dst_mc_addr));
+
+ if (accel_state->src_pitch[0] & 7)
+ RADEON_FALLBACK(("Bad src pitch 0x%x\n", (int)accel_state->src_pitch[0]));
+
+ if (accel_state->src_mc_addr[0] & 0xff)
+ RADEON_FALLBACK(("Bad src offset 0x%x\n", (int)accel_state->src_mc_addr[0]));
+
+ if (!R600GetDestFormat(pDstPicture, &dst_format))
+ return FALSE;
+
+ if (pMask) {
+ int src_a, src_r, src_g, src_b;
+ int mask_a, mask_r, mask_g, mask_b;
+
+ accel_state->src_mc_addr[1] = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
+ accel_state->src_pitch[1] = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
+ accel_state->src_size[1] = exaGetPixmapPitch(pMask) * pMask->drawable.height;
+
+ if (accel_state->src_pitch[1] & 7)
+ RADEON_FALLBACK(("Bad mask pitch 0x%x\n", (int)accel_state->src_pitch[1]));
+
+ if (accel_state->src_mc_addr[1] & 0xff)
+ RADEON_FALLBACK(("Bad mask offset 0x%x\n", (int)accel_state->src_mc_addr[1]));
+
+ /* setup pixel shader */
+ if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) {
+ //src_color = R300_ALU_RGB_0_0;
+ src_r = SQ_SEL_0;
+ src_g = SQ_SEL_0;
+ src_b = SQ_SEL_0;
+ } else {
+ //src_color = R300_ALU_RGB_SRC0_RGB;
+ src_r = SQ_SEL_X;
+ src_g = SQ_SEL_Y;
+ src_b = SQ_SEL_Z;
+ }
+
+ if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
+ //src_alpha = R300_ALU_ALPHA_1_0;
+ src_a = SQ_SEL_1;
+ } else {
+ //src_alpha = R300_ALU_ALPHA_SRC0_A;
+ src_a = SQ_SEL_W;
+ }
+
+ if (pMaskPicture->componentAlpha) {
+ if (R600BlendOp[op].src_alpha) {
+ if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
+ //src_color = R300_ALU_RGB_1_0;
+ //src_alpha = R300_ALU_ALPHA_1_0;
+ src_r = SQ_SEL_1;
+ src_g = SQ_SEL_1;
+ src_b = SQ_SEL_1;
+ src_a = SQ_SEL_1;
+ } else {
+ //src_color = R300_ALU_RGB_SRC0_AAA;
+ //src_alpha = R300_ALU_ALPHA_SRC0_A;
+ src_r = SQ_SEL_W;
+ src_g = SQ_SEL_W;
+ src_b = SQ_SEL_W;
+ src_a = SQ_SEL_W;
+ }
+
+ //mask_color = R300_ALU_RGB_SRC1_RGB;
+ mask_r = SQ_SEL_X;
+ mask_g = SQ_SEL_Y;
+ mask_b = SQ_SEL_Z;
+
+ if (PICT_FORMAT_A(pMaskPicture->format) == 0) {
+ //mask_alpha = R300_ALU_ALPHA_1_0;
+ mask_a = SQ_SEL_1;
+ } else {
+ //mask_alpha = R300_ALU_ALPHA_SRC1_A;
+ mask_a = SQ_SEL_W;
+ }
+ } else {
+ //src_color = R300_ALU_RGB_SRC0_RGB;
+ src_r = SQ_SEL_X;
+ src_g = SQ_SEL_Y;
+ src_b = SQ_SEL_Z;
+
+ if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
+ //src_alpha = R300_ALU_ALPHA_1_0;
+ src_a = SQ_SEL_1;
+ } else {
+ //src_alpha = R300_ALU_ALPHA_SRC0_A;
+ src_a = SQ_SEL_W;
+ }
+
+ //mask_color = R300_ALU_RGB_SRC1_RGB;
+ mask_r = SQ_SEL_X;
+ mask_g = SQ_SEL_Y;
+ mask_b = SQ_SEL_Z;
+
+ if (PICT_FORMAT_A(pMaskPicture->format) == 0) {
+ //mask_alpha = R300_ALU_ALPHA_1_0;
+ mask_a = SQ_SEL_1;
+ } else {
+ //mask_alpha = R300_ALU_ALPHA_SRC1_A;
+ mask_a = SQ_SEL_W;
+ }
+ }
+ } else {
+ if (PICT_FORMAT_A(pMaskPicture->format) == 0) {
+ //mask_color = R300_ALU_RGB_1_0;
+ mask_r = SQ_SEL_1;
+ mask_g = SQ_SEL_1;
+ mask_b = SQ_SEL_1;
+ } else {
+ //mask_color = R300_ALU_RGB_SRC1_AAA;
+ mask_r = SQ_SEL_W;
+ mask_g = SQ_SEL_W;
+ mask_b = SQ_SEL_W;
+ }
+ if (PICT_FORMAT_A(pMaskPicture->format) == 0) {
+ //mask_alpha = R300_ALU_ALPHA_1_0;
+ mask_a = SQ_SEL_1;
+ } else {
+ //mask_alpha = R300_ALU_ALPHA_SRC1_A;
+ mask_a = SQ_SEL_W;
+ }
+ }
+
+ //0
+ ps[i++] = CF_DWORD0(ADDR(8));
+ ps[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ // 1
+ ps[i++] = CF_ALU_DWORD0(ADDR(3),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ //2
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ // 3 - alu 0
+ // MUL gpr[2].x gpr[1].x gpr[0].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 4 - alu 1
+ // MUL gpr[2].y gpr[1].y gpr[0].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ // 5 - alu 2
+ // MUL gpr[2].z gpr[1].z gpr[0].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ // 6 - alu 3
+ // MUL gpr[2].w gpr[1].w gpr[0].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ // 7
+ ps[i++] = 0x00000000;
+ ps[i++] = 0x00000000;
+
+ //8/9 - src
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(src_r),
+ DST_SEL_Y(src_g),
+ DST_SEL_Z(src_b),
+ DST_SEL_W(src_a),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ ps[i++] = TEX_DWORD_PAD;
+ //10/11 - mask
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(1),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(mask_r),
+ DST_SEL_Y(mask_g),
+ DST_SEL_Z(mask_b),
+ DST_SEL_W(mask_a),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ ps[i++] = TEX_DWORD_PAD;
+ } else {
+ int src_a, src_r, src_g, src_b;
+ /* setup pixel shader */
+ if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) {
+ //src_color = R300_ALU_RGB_0_0;
+ src_r = SQ_SEL_0;
+ src_g = SQ_SEL_0;
+ src_b = SQ_SEL_0;
+ } else {
+ //src_color = R300_ALU_RGB_SRC0_RGB;
+ src_r = SQ_SEL_X;
+ src_g = SQ_SEL_Y;
+ src_b = SQ_SEL_Z;
+ }
+
+ if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
+ //src_alpha = R300_ALU_ALPHA_1_0;
+ src_a = SQ_SEL_1;
+ } else {
+ //src_alpha = R300_ALU_ALPHA_SRC0_A;
+ src_a = SQ_SEL_W;
+ }
+
+ //0
+ ps[i++] = CF_DWORD0(ADDR(2));
+ ps[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //1
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+
+ //2/3 - src
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(src_r),
+ DST_SEL_Y(src_g),
+ DST_SEL_Z(src_b),
+ DST_SEL_W(src_a),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ ps[i++] = TEX_DWORD_PAD;
+ }
+
+ CLEAR (cb_conf);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+
+ accel_state->ib = RADEONCPGetBuffer(pScrn);
+
+ /* Init */
+ start_3d(pScrn, accel_state->ib);
+
+ //cp_set_surface_sync(pScrn, accel_state->ib);
+
+ set_default_state(pScrn, accel_state->ib);
+
+ /* Scissor / viewport */
+ ereg (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
+ ereg (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+
+ // fix me if false discard buffer!
+ if (!R600TextureSetup(pSrcPicture, pSrc, 0))
+ return FALSE;
+
+ if (pMask != NULL) {
+ // fix me if false discard buffer!
+ if (!R600TextureSetup(pMaskPicture, pMask, 1))
+ return FALSE;
+ } else {
+ accel_state->is_transform[1] = FALSE;
+ }
+
+ if (pMask != NULL)
+ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+ accel_state->comp_mask_vs_offset;
+ else
+ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+ accel_state->comp_vs_offset;
+
+ memcpy ((char *)accel_state->ib->address + (accel_state->ib->total / 2) - 256, ps, sizeof(ps));
+ accel_state->ps_mc_addr = info->gartLocation + info->dri->bufStart +
+ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2) - 256;
+
+ accel_state->vs_size = 512;
+ accel_state->ps_size = 512;
+
+ /* Shader */
+
+ /* flush SQ cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
+ accel_state->vs_size, accel_state->vs_mc_addr);
+
+ vs_conf.shader_addr = accel_state->vs_mc_addr;
+ vs_conf.num_gprs = 3;
+ vs_conf.stack_size = 0;
+ vs_setup (pScrn, accel_state->ib, &vs_conf);
+
+ /* flush SQ cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
+ accel_state->ps_size, accel_state->ps_mc_addr);
+
+ ps_conf.shader_addr = accel_state->ps_mc_addr;
+ ps_conf.num_gprs = 3;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.clamp_consts = 0;
+ ps_conf.export_mode = 2;
+ ps_setup (pScrn, accel_state->ib, &ps_conf);
+
+ ereg (accel_state->ib, CB_SHADER_MASK, (0xf << OUTPUT0_ENABLE_shift));
+ ereg (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
+
+ blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
+
+ if (info->ChipFamily == CHIP_FAMILY_R600) {
+ // no per-MRT blend on R600
+ ereg (accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
+ ereg (accel_state->ib, CB_BLEND_CONTROL, blendcntl);
+ } else {
+ ereg (accel_state->ib, CB_COLOR_CONTROL, (RADEON_ROP[3] |
+ (1 << TARGET_BLEND_ENABLE_shift) |
+ PER_MRT_BLEND_bit));
+ ereg (accel_state->ib, CB_BLEND0_CONTROL, blendcntl);
+ }
+
+ cb_conf.id = 0;
+ cb_conf.w = accel_state->dst_pitch;
+ cb_conf.h = pDst->drawable.height;
+ cb_conf.base = accel_state->dst_mc_addr;
+ cb_conf.format = dst_format;
+
+ switch (pDstPicture->format) {
+ case PICT_a8r8g8b8:
+ //ErrorF("dst: PICT_a8r8g8b8\n");
+ cb_conf.comp_swap = 1; //ARGB
+ break;
+ case PICT_x8r8g8b8:
+ //ErrorF("dst: PICT_x8r8g8b8\n");
+ cb_conf.comp_swap = 1; //ARGB
+ break;
+ case PICT_r5g6b5:
+ //ErrorF("dst: PICT_r5g6b5\n");
+ cb_conf.comp_swap = 2; //RGB
+ break;
+ case PICT_a1r5g5b5:
+ //ErrorF("dst: PICT_a1r5g5b5\n");
+ cb_conf.comp_swap = 1; //ARGB
+ break;
+ case PICT_x1r5g5b5:
+ //ErrorF("dst: PICT_x1r5g5b5\n");
+ cb_conf.comp_swap = 1; //ARGB
+ break;
+ case PICT_a8:
+ //ErrorF("dst: PICT_a8\n");
+ cb_conf.comp_swap = 3; //A
+ break;
+ default:
+ cb_conf.comp_swap = 1;
+ break;
+ }
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(pScrn, accel_state->ib, &cb_conf);
+
+ ereg (accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
+ ereg (accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+ /* Interpolator setup */
+ if (pMask) {
+ // export 2 tex coords from VS
+ ereg (accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift));
+ // src = semantic id 0; mask = semantic id 1
+ ereg (accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
+ (1 << SEMANTIC_1_shift)));
+ // input 2 tex coords from VS
+ ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, (2 << NUM_INTERP_shift));
+ } else {
+ // export 1 tex coords from VS
+ ereg (accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
+ // src = semantic id 0
+ ereg (accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
+ // input 1 tex coords from VS
+ ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift));
+ }
+ ereg (accel_state->ib, SPI_PS_IN_CONTROL_1, 0);
+ // SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0
+ ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
+ (0x01 << DEFAULT_VAL_shift) |
+ SEL_CENTROID_bit));
+ // SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1
+ ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (1 <<2), ((1 << SEMANTIC_shift) |
+ (0x01 << DEFAULT_VAL_shift) |
+ SEL_CENTROID_bit));
+ ereg (accel_state->ib, SPI_INTERP_CONTROL_0, 0);
+
+ accel_state->vb_index = 0;
+
+ return TRUE;
+}
+
+static void R600Composite(PixmapPtr pDst,
+ int srcX, int srcY,
+ int maskX, int maskY,
+ int dstX, int dstY,
+ int w, int h)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
+
+ /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
+ srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
+
+ srcTopLeft.x = IntToxFixed(srcX);
+ srcTopLeft.y = IntToxFixed(srcY);
+ srcTopRight.x = IntToxFixed(srcX + w);
+ srcTopRight.y = IntToxFixed(srcY);
+ srcBottomLeft.x = IntToxFixed(srcX);
+ srcBottomLeft.y = IntToxFixed(srcY + h);
+ srcBottomRight.x = IntToxFixed(srcX + w);
+ srcBottomRight.y = IntToxFixed(srcY + h);
+
+ //XXX do transform in vertex shader
+ if (accel_state->is_transform[0]) {
+ transformPoint(accel_state->transform[0], &srcTopLeft);
+ transformPoint(accel_state->transform[0], &srcTopRight);
+ transformPoint(accel_state->transform[0], &srcBottomLeft);
+ transformPoint(accel_state->transform[0], &srcBottomRight);
+ }
+
+ if (accel_state->has_mask) {
+ struct r6xx_comp_mask_vertex *comp_vb;
+ struct r6xx_comp_mask_vertex vertex[3];
+ xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
+
+ if (((accel_state->vb_index + 3) * 24) > (accel_state->ib->total / 2)) {
+ R600DoneComposite(pDst);
+ accel_state->vb_index = 0;
+ accel_state->ib = RADEONCPGetBuffer(pScrn);
+ }
+
+ comp_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2));
+
+ maskTopLeft.x = IntToxFixed(maskX);
+ maskTopLeft.y = IntToxFixed(maskY);
+ maskTopRight.x = IntToxFixed(maskX + w);
+ maskTopRight.y = IntToxFixed(maskY);
+ maskBottomLeft.x = IntToxFixed(maskX);
+ maskBottomLeft.y = IntToxFixed(maskY + h);
+ maskBottomRight.x = IntToxFixed(maskX + w);
+ maskBottomRight.y = IntToxFixed(maskY + h);
+
+ if (accel_state->is_transform[1]) {
+ transformPoint(accel_state->transform[1], &maskTopLeft);
+ transformPoint(accel_state->transform[1], &maskTopRight);
+ transformPoint(accel_state->transform[1], &maskBottomLeft);
+ transformPoint(accel_state->transform[1], &maskBottomRight);
+ }
+
+ vertex[0].x = (float)dstX;
+ vertex[0].y = (float)dstY;
+ vertex[0].src_s = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0];
+ vertex[0].src_t = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0];
+ vertex[0].mask_s = xFixedToFloat(maskTopLeft.x) / accel_state->texW[1];
+ vertex[0].mask_t = xFixedToFloat(maskTopLeft.y) / accel_state->texH[1];
+
+ vertex[1].x = (float)dstX;
+ vertex[1].y = (float)(dstY + h);
+ vertex[1].src_s = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0];
+ vertex[1].src_t = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0];
+ vertex[1].mask_s = xFixedToFloat(maskBottomLeft.x) / accel_state->texW[1];
+ vertex[1].mask_t = xFixedToFloat(maskBottomLeft.y) / accel_state->texH[1];
+
+ vertex[2].x = (float)(dstX + w);
+ vertex[2].y = (float)(dstY + h);
+ vertex[2].src_s = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0];
+ vertex[2].src_t = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0];
+ vertex[2].mask_s = xFixedToFloat(maskBottomRight.x) / accel_state->texW[1];
+ vertex[2].mask_t = xFixedToFloat(maskBottomRight.y) / accel_state->texH[1];
+
+#ifdef SHOW_VERTEXES
+ ErrorF("vertex 0: %d, %d, %f, %f, %f, %f\n", vertex[0].x, vertex[0].y,
+ vertex[0].src_s, vertex[0].src_t, vertex[0].mask_s, vertex[0].mask_t);
+ ErrorF("vertex 1: %d, %d, %f, %f, %f, %f\n", vertex[1].x, vertex[1].y,
+ vertex[1].src_s, vertex[1].src_t, vertex[1].mask_s, vertex[1].mask_t);
+ ErrorF("vertex 2: %d, %d, %f, %f, %f, %f\n", vertex[2].x, vertex[2].y,
+ vertex[2].src_s, vertex[2].src_t, vertex[2].mask_s, vertex[2].mask_t);
+#endif
+
+ // append to vertex buffer
+ comp_vb[accel_state->vb_index++] = vertex[0];
+ comp_vb[accel_state->vb_index++] = vertex[1];
+ comp_vb[accel_state->vb_index++] = vertex[2];
+
+ } else {
+ struct r6xx_comp_vertex *comp_vb;
+ struct r6xx_comp_vertex vertex[3];
+
+ if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
+ R600DoneComposite(pDst);
+ accel_state->vb_index = 0;
+ accel_state->ib = RADEONCPGetBuffer(pScrn);
+ }
+
+ comp_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2));
+
+ vertex[0].x = (float)dstX;
+ vertex[0].y = (float)dstY;
+ vertex[0].src_s = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0];
+ vertex[0].src_t = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0];
+
+ vertex[1].x = (float)dstX;
+ vertex[1].y = (float)(dstY + h);
+ vertex[1].src_s = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0];
+ vertex[1].src_t = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0];
+
+ vertex[2].x = (float)(dstX + w);
+ vertex[2].y = (float)(dstY + h);
+ vertex[2].src_s = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0];
+ vertex[2].src_t = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0];
+
+ // append to vertex buffer
+ comp_vb[accel_state->vb_index++] = vertex[0];
+ comp_vb[accel_state->vb_index++] = vertex[1];
+ comp_vb[accel_state->vb_index++] = vertex[2];
+
+#ifdef SHOW_VERTEXES
+ ErrorF("vertex 0: %d, %d, %f, %f\n", vertex[0].x, vertex[0].y, vertex[0].src_s, vertex[0].src_t);
+ ErrorF("vertex 1: %d, %d, %f, %f\n", vertex[1].x, vertex[1].y, vertex[1].src_s, vertex[1].src_t);
+ ErrorF("vertex 2: %d, %d, %f, %f\n", vertex[2].x, vertex[2].y, vertex[2].src_s, vertex[2].src_t);
+#endif
+ }
+
+
+}
+
+static void R600DoneComposite(PixmapPtr pDst)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ draw_config_t draw_conf;
+ vtx_resource_t vtx_res;
+
+ CLEAR (draw_conf);
+ CLEAR (vtx_res);
+
+ if (accel_state->vb_index == 0) {
+ R600IBDiscard(pScrn, accel_state->ib);
+ return;
+ }
+
+ accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
+ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
+
+
+ /* Vertex buffer setup */
+ if (accel_state->has_mask) {
+ accel_state->vb_size = accel_state->vb_index * 24;
+ vtx_res.id = SQ_VTX_RESOURCE_vs;
+ vtx_res.vtx_size_dw = 24 / 4;
+ vtx_res.vtx_num_entries = accel_state->vb_size / 4;
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = accel_state->vb_mc_addr;
+ } else {
+ accel_state->vb_size = accel_state->vb_index * 16;
+ vtx_res.id = SQ_VTX_RESOURCE_vs;
+ vtx_res.vtx_size_dw = 16 / 4;
+ vtx_res.vtx_num_entries = accel_state->vb_size / 4;
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = accel_state->vb_mc_addr;
+ }
+ /* flush vertex cache */
+ if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
+ (info->ChipFamily == CHIP_FAMILY_RV620) ||
+ (info->ChipFamily == CHIP_FAMILY_RS780) ||
+ (info->ChipFamily == CHIP_FAMILY_RV710))
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
+ accel_state->vb_size, accel_state->vb_mc_addr);
+ else
+ cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
+ accel_state->vb_size, accel_state->vb_mc_addr);
+
+ set_vtx_resource (pScrn, accel_state->ib, &vtx_res);
+
+ draw_conf.prim_type = DI_PT_RECTLIST;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
+
+ draw_auto(pScrn, accel_state->ib, &draw_conf);
+
+ wait_3d_idle_clean(pScrn, accel_state->ib);
+
+ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
+ accel_state->dst_size, accel_state->dst_mc_addr);
+
+ R600CPFlushIndirect(pScrn, accel_state->ib);
+}
+
+Bool
+R600CopyToVRAM(ScrnInfoPtr pScrn,
+ char *src, int src_pitch,
+ uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_height, int bpp,
+ int x, int y, int w, int h)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ uint32_t scratch_mc_addr;
+ int wpass = w * (bpp/8);
+ int scratch_pitch_bytes = (wpass + 255) & ~255;
+ uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
+ int scratch_offset = 0, hpass, temph;
+ char *dst;
+ drmBufPtr scratch;
+
+ if (dst_pitch & 7)
+ return FALSE;
+
+ if (dst_mc_addr & 0xff)
+ return FALSE;
+
+ scratch = RADEONCPGetBuffer(pScrn);
+ if (scratch == NULL)
+ return FALSE;
+
+ scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
+ temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
+ dst = (char *)scratch->address;
+
+ //memcopy from sys to scratch
+ while (temph--) {
+ memcpy (dst, src, wpass);
+ src += src_pitch;
+ dst += scratch_pitch_bytes;
+ }
+
+ while (h) {
+ uint32_t offset = scratch_mc_addr + scratch_offset;
+ int oldhpass = hpass;
+ h -= oldhpass;
+ temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
+
+ if (hpass) {
+ scratch_offset = scratch->total/2 - scratch_offset;
+ dst = (char *)scratch->address + scratch_offset;
+ // wait for the engine to be idle
+ RADEONWaitForIdleCP(pScrn);
+ //memcopy from sys to scratch
+ while (temph--) {
+ memcpy (dst, src, wpass);
+ src += src_pitch;
+ dst += scratch_pitch_bytes;
+ }
+ }
+ //blit from scratch to vram
+ R600DoPrepareCopy(pScrn,
+ scratch_pitch, w, oldhpass, offset, bpp,
+ dst_pitch, dst_height, dst_mc_addr, bpp,
+ 3, 0xffffffff);
+ R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
+ R600DoCopy(pScrn);
+ y += oldhpass;
+ }
+
+ R600IBDiscard(pScrn, scratch);
+
+ return TRUE;
+}
+
+static Bool
+R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
+ char *src, int src_pitch)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
+ uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
+ uint32_t dst_height = pDst->drawable.height;
+ int bpp = pDst->drawable.bitsPerPixel;
+
+ return R600CopyToVRAM(pScrn,
+ src, src_pitch,
+ dst_pitch, dst_mc_addr, dst_height, bpp,
+ x, y, w, h);
+}
+
+static Bool
+R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
+ char *dst, int dst_pitch)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
+ uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
+ uint32_t src_width = pSrc->drawable.width;
+ uint32_t src_height = pSrc->drawable.height;
+ int bpp = pSrc->drawable.bitsPerPixel;
+ uint32_t scratch_mc_addr;
+ int scratch_pitch_bytes = (dst_pitch + 255) & ~255;
+ int scratch_offset = 0, hpass;
+ uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
+ int wpass = w * (bpp/8);
+ drmBufPtr scratch;
+
+ if (src_pitch & 7)
+ return FALSE;
+
+ scratch = RADEONCPGetBuffer(pScrn);
+ if (scratch == NULL)
+ return FALSE;
+
+ scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
+ hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
+
+ //blit from vram to scratch
+ R600DoPrepareCopy(pScrn,
+ src_pitch, src_width, src_height, src_mc_addr, bpp,
+ scratch_pitch, hpass, scratch_mc_addr, bpp,
+ 3, 0xffffffff);
+ R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
+ R600DoCopy(pScrn);
+
+ while (h) {
+ char *src = (char *)scratch->address + scratch_offset;
+ int oldhpass = hpass;
+ h -= oldhpass;
+ y += oldhpass;
+ hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
+
+ if (hpass) {
+ scratch_offset = scratch->total/2 - scratch_offset;
+ //blit from vram to scratch
+ R600DoPrepareCopy(pScrn,
+ src_pitch, src_width, src_height, src_mc_addr, bpp,
+ scratch_pitch, hpass, scratch_mc_addr + scratch_offset, bpp,
+ 3, 0xffffffff);
+ R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
+ R600DoCopy(pScrn);
+ }
+
+ // wait for the engine to be idle
+ RADEONWaitForIdleCP(pScrn);
+ //memcopy from scratch to sys
+ while (oldhpass--) {
+ memcpy (dst, src, wpass);
+ dst += dst_pitch;
+ src += scratch_pitch_bytes;
+ }
+ }
+
+ R600IBDiscard(pScrn, scratch);
+
+ return TRUE;
+
+}
+
+static int
+R600MarkSync(ScreenPtr pScreen)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ return ++accel_state->exaSyncMarker;
+
+}
+
+static void
+R600Sync(ScreenPtr pScreen, int marker)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ if (accel_state->exaMarkerSynced != marker) {
+ RADEONWaitForIdleCP(pScrn);
+ accel_state->exaMarkerSynced = marker;
+ }
+
+}
+
+static Bool
+R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ uint32_t *vs;
+ uint32_t *ps;
+ // 512 bytes per shader for now
+ int size = 512 * 11;
+ int i;
+
+ accel_state->shaders = NULL;
+
+ accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
+ TRUE, NULL, NULL);
+
+ if (accel_state->shaders == NULL)
+ return FALSE;
+
+ vs = (pointer)((char *)info->FB + accel_state->shaders->offset);
+ ps = (pointer)((char *)info->FB + accel_state->shaders->offset);
+ accel_state->solid_vs_offset = 0;
+ accel_state->solid_ps_offset = 512;
+ accel_state->copy_vs_offset = 1024;
+ accel_state->copy_ps_offset = 1536;
+ accel_state->comp_vs_offset = 2048;
+ accel_state->comp_ps_offset = 2560;
+ accel_state->comp_mask_vs_offset = 3072;
+ accel_state->comp_mask_ps_offset = 3584;
+ accel_state->xv_vs_offset = 4096;
+ accel_state->xv_ps_offset_packed = 4608;
+ accel_state->xv_ps_offset_planar = 5120;
+
+ // solid vs ---------------------------------------
+ i = accel_state->solid_vs_offset / 4;
+ //0
+ vs[i++] = CF_DWORD0(ADDR(4));
+ vs[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //1
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //2 - always export a param whether it's used or not
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ //3 - padding
+ vs[i++] = 0x00000000;
+ vs[i++] = 0x00000000;
+ //4/5
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1));
+ vs[i++] = VTX_DWORD_PAD;
+
+ // solid ps ---------------------------------------
+ i = accel_state->solid_ps_offset / 4;
+ // 0
+ ps[i++] = CF_ALU_DWORD0(ADDR(2),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // 1
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ // 2
+ ps[i++] = ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 3
+ ps[i++] = ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ // 4
+ ps[i++] = ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ // 5
+ ps[i++] = ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+
+ // copy vs ---------------------------------------
+ i = accel_state->copy_vs_offset / 4;
+ //0
+ vs[i++] = CF_DWORD0(ADDR(4));
+ vs[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //1
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //2
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ //3
+ vs[i++] = 0x00000000;
+ vs[i++] = 0x00000000;
+ //4/5
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1));
+ vs[i++] = VTX_DWORD_PAD;
+ //6/7
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ vs[i++] = VTX_DWORD_PAD;
+
+ // copy ps ---------------------------------------
+ i = accel_state->copy_ps_offset / 4;
+ // CF INST 0
+ ps[i++] = CF_DWORD0(ADDR(2));
+ ps[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(1),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // CF INST 1
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // TEX INST 0
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X), //R
+ DST_SEL_Y(SQ_SEL_Y), //G
+ DST_SEL_Z(SQ_SEL_Z), //B
+ DST_SEL_W(SQ_SEL_W), //A
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_UNNORMALIZED),
+ COORD_TYPE_Y(TEX_UNNORMALIZED),
+ COORD_TYPE_Z(TEX_UNNORMALIZED),
+ COORD_TYPE_W(TEX_UNNORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ ps[i++] = TEX_DWORD_PAD;
+
+ // xv vs ---------------------------------------
+ i = accel_state->xv_vs_offset / 4;
+ //0
+ vs[i++] = CF_DWORD0(ADDR(4));
+ vs[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //1
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //2
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ //3
+ vs[i++] = 0x00000000;
+ vs[i++] = 0x00000000;
+ //4/5
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1));
+ vs[i++] = VTX_DWORD_PAD;
+ //6/7
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ vs[i++] = VTX_DWORD_PAD;
+
+ // xv ps packed ----------------------------------
+ i = accel_state->xv_ps_offset_packed / 4;
+ // 0
+ ps[i++] = CF_DWORD0(ADDR(20));
+ ps[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ // 1
+ ps[i++] = CF_ALU_DWORD0(ADDR(3),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(16),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // 2
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(3));
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* Undo scaling of Y'CbCr values
+ * Y' is scaled from 16:235
+ * Cb/Cr are scaled from 16:240
+ */
+ // 3 - alu 0
+ // MULADD gpr[1].x gpr[1].x c[3].x c[3].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(259),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 4 - alu 1
+ // MULADD gpr[1].y gpr[1].y c[3].z c[3].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(259),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ // 5 - alu 2
+ // MULADD gpr[1].z gpr[1].z c[3].z c[3].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(259),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ // 6 - alu 3
+ // MOV gpr[1].w 0.0
+ ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+ // 7 - alu 4
+ // DP4 gpr[2].x gpr[1].x c[0].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 8 - alu 5
+ // DP4 gpr[2].y gpr[1].y c[0].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ // 9 - alu 6
+ // DP4 gpr[2].z gpr[1].z c[0].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ // 10 - alu 7
+ // DP4 gpr[2].w gpr[1].w c[0].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ // 11 - alu 8
+ // DP4 gpr[2].x gpr[1].x c[1].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 12 - alu 9
+ // DP4 gpr[2].y gpr[1].y c[1].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ // 13 - alu 10
+ // DP4 gpr[2].z gpr[1].z c[1].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ // 14 - alu 11
+ // DP4 gpr[2].w gpr[1].w c[1].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ // 15 - alu 12
+ // DP4 gpr[2].x gpr[1].x c[2].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 16 - alu 13
+ // DP4 gpr[2].y gpr[1].y c[2].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ // 17 - alu 14
+ // DP4 gpr[2].z gpr[1].z c[2].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ // 18 - alu 15
+ // DP4 gpr[2].w gpr[1].w c[2].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ // 19 - alignment
+ ps[i++] = 0x00000000;
+ ps[i++] = 0x00000000;
+ // 20/21 - tex 0
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X), //R
+ DST_SEL_Y(SQ_SEL_MASK), //G
+ DST_SEL_Z(SQ_SEL_MASK), //B
+ DST_SEL_W(SQ_SEL_1), //A
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ ps[i++] = TEX_DWORD_PAD;
+ // 22/23 - tex 1
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK), //R
+ DST_SEL_Y(SQ_SEL_X), //G
+ DST_SEL_Z(SQ_SEL_Y), //B
+ DST_SEL_W(SQ_SEL_MASK), //A
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ ps[i++] = TEX_DWORD_PAD;
+
+ // xv ps planar ----------------------------------
+ i = accel_state->xv_ps_offset_planar / 4;
+ // 0
+ ps[i++] = CF_DWORD0(ADDR(20));
+ ps[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(3),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ // 1
+ ps[i++] = CF_ALU_DWORD0(ADDR(3),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(16),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // 2
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(3));
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* Undo scaling of Y'CbCr values
+ * Y' is scaled from 16:235
+ * Cb/Cr are scaled from 16:240
+ */
+ // 3 - alu 0
+ // MULADD gpr[1].x gpr[1].x c[3].x c[3].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(259),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 4 - alu 1
+ // MULADD gpr[1].y gpr[1].y c[3].z c[3].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(259),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ // 5 - alu 2
+ // MULADD gpr[1].z gpr[1].z c[3].z c[3].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(259),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ // 6 - alu 3
+ // MOV gpr[1].w 0.0
+ ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+ // 7 - alu 4
+ // DP4 gpr[2].x gpr[1].x c[0].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 8 - alu 5
+ // DP4 gpr[2].y gpr[1].y c[0].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ // 9 - alu 6
+ // DP4 gpr[2].z gpr[1].z c[0].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ // 10 - alu 7
+ // DP4 gpr[2].w gpr[1].w c[0].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ // 11 - alu 8
+ // DP4 gpr[2].x gpr[1].x c[1].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 12 - alu 9
+ // DP4 gpr[2].y gpr[1].y c[1].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ // 13 - alu 10
+ // DP4 gpr[2].z gpr[1].z c[1].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ // 14 - alu 11
+ // DP4 gpr[2].w gpr[1].w c[1].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ // 15 - alu 12
+ // DP4 gpr[2].x gpr[1].x c[2].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 16 - alu 13
+ // DP4 gpr[2].y gpr[1].y c[2].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ // 17 - alu 14
+ // DP4 gpr[2].z gpr[1].z c[2].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ // 18 - alu 15
+ // DP4 gpr[2].w gpr[1].w c[2].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ // 19 - alignment
+ ps[i++] = 0x00000000;
+ ps[i++] = 0x00000000;
+ // 20/21 - tex 0
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X), //R
+ DST_SEL_Y(SQ_SEL_MASK), //G
+ DST_SEL_Z(SQ_SEL_MASK), //B
+ DST_SEL_W(SQ_SEL_1), //A
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ ps[i++] = TEX_DWORD_PAD;
+ // 22/23 - tex 1
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK), //R
+ DST_SEL_Y(SQ_SEL_MASK), //G
+ DST_SEL_Z(SQ_SEL_X), //B
+ DST_SEL_W(SQ_SEL_MASK), //A
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ ps[i++] = TEX_DWORD_PAD;
+ // 24/25 - tex 2
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(2),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK), //R
+ DST_SEL_Y(SQ_SEL_X), //G
+ DST_SEL_Z(SQ_SEL_MASK), //B
+ DST_SEL_W(SQ_SEL_MASK), //A
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(2),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ ps[i++] = TEX_DWORD_PAD;
+
+ // comp mask vs ---------------------------------------
+ i = accel_state->comp_mask_vs_offset / 4;
+ //0
+ vs[i++] = CF_DWORD0(ADDR(4));
+ vs[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(3),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //1 - dst
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //2 - src
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ //3 - mask
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ //4/5 - dst
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(24));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(2),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1));
+ vs[i++] = VTX_DWORD_PAD;
+ //6/7 - src
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ vs[i++] = VTX_DWORD_PAD;
+ //8/9 - mask
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(16),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ vs[i++] = VTX_DWORD_PAD;
+
+ // comp mask vs ---------------------------------------
+ i = accel_state->comp_mask_vs_offset / 4;
+ //0
+ vs[i++] = CF_DWORD0(ADDR(4));
+ vs[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(3),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //1 - dst
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //2 - src
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ //3 - mask
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ //4/5 - dst
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(24));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(2),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1));
+ vs[i++] = VTX_DWORD_PAD;
+ //6/7 - src
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ vs[i++] = VTX_DWORD_PAD;
+ //8/9 - mask
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(16),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ vs[i++] = VTX_DWORD_PAD;
+
+ // comp mask ps ---------------------------------------
+ // not yet
+
+ // comp vs ---------------------------------------
+ i = accel_state->comp_vs_offset / 4;
+ //0
+ vs[i++] = CF_DWORD0(ADDR(4));
+ vs[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //1 - dst
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ //2 - src
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ //3
+ vs[i++] = 0x00000000;
+ vs[i++] = 0x00000000;
+ //4/5 - dst
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(16));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1));
+ vs[i++] = VTX_DWORD_PAD;
+ //6/7 - src
+ vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ vs[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ vs[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ vs[i++] = VTX_DWORD_PAD;
+
+ // comp ps ---------------------------------------
+ // not yet
+
+
+ return TRUE;
+}
+
+static Bool
+R600PrepareAccess(PixmapPtr pPix, int index)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ unsigned char *RADEONMMIO = info->MMIO;
+
+ //flush HDP read/write caches
+ OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
+
+ return TRUE;
+}
+
+static void
+R600FinishAccess(PixmapPtr pPix, int index)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ unsigned char *RADEONMMIO = info->MMIO;
+
+ //flush HDP read/write caches
+ OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
+
+}
+
+
+Bool
+R600DrawInit(ScreenPtr pScreen)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ if (info->accel_state->exa == NULL) {
+ xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
+ return FALSE;
+ }
+
+ info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
+ info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
+
+ info->accel_state->exa->PrepareSolid = R600PrepareSolid;
+ info->accel_state->exa->Solid = R600Solid;
+ info->accel_state->exa->DoneSolid = R600DoneSolid;
+
+ info->accel_state->exa->PrepareCopy = R600PrepareCopy;
+ info->accel_state->exa->Copy = R600Copy;
+ info->accel_state->exa->DoneCopy = R600DoneCopy;
+
+ info->accel_state->exa->MarkSync = R600MarkSync;
+ info->accel_state->exa->WaitMarker = R600Sync;
+
+ info->accel_state->exa->PrepareAccess = R600PrepareAccess;
+ info->accel_state->exa->FinishAccess = R600FinishAccess;
+
+ info->accel_state->exa->UploadToScreen = R600UploadToScreen;
+ info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
+
+ info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
+ info->accel_state->exa->pixmapOffsetAlign = 256;
+ info->accel_state->exa->pixmapPitchAlign = 256;
+
+ info->accel_state->exa->CheckComposite = R600CheckComposite;
+ info->accel_state->exa->PrepareComposite = R600PrepareComposite;
+ info->accel_state->exa->Composite = R600Composite;
+ info->accel_state->exa->DoneComposite = R600DoneComposite;
+
+#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
+
+ info->accel_state->exa->maxPitchBytes = 16320;
+ info->accel_state->exa->maxX = 8192;
+#else
+ info->accel_state->exa->maxX = 16320 / 4;
+#endif
+ info->accel_state->exa->maxY = 8192;
+
+ if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
+ info->accel_state->vsync = TRUE;
+ } else
+ info->accel_state->vsync = FALSE;
+
+ if (!exaDriverInit(pScreen, info->accel_state->exa)) {
+ xfree(info->accel_state->exa);
+ return FALSE;
+ }
+
+ if (!info->gartLocation)
+ return FALSE;
+
+ info->accel_state->XInited3D = FALSE;
+ info->accel_state->copy_area = NULL;
+
+ if (!R600LoadShaders(pScrn, pScreen))
+ return FALSE;
+
+ exaMarkSync(pScreen);
+
+ return TRUE;
+
+}
+
diff --git a/src/r600_reg.h b/src/r600_reg.h
new file mode 100644
index 00000000..9036e2a5
--- /dev/null
+++ b/src/r600_reg.h
@@ -0,0 +1,132 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_H_
+#define _R600_REG_H_
+
+/*
+ * Register definitions
+ */
+
+#include "r600_reg_auto_r6xx.h"
+#include "r600_reg_r6xx.h"
+#include "r600_reg_r7xx.h"
+
+
+/* SET_*_REG offsets + ends */
+enum {
+ SET_CONFIG_REG_offset = 0x00008000,
+ SET_CONFIG_REG_end = 0x0000ac00,
+ SET_CONTEXT_REG_offset = 0x00028000,
+ SET_CONTEXT_REG_end = 0x00029000,
+ SET_ALU_CONST_offset = 0x00030000,
+ SET_ALU_CONST_end = 0x00032000,
+ SET_RESOURCE_offset = 0x00038000,
+ SET_RESOURCE_end = 0x0003c000,
+ SET_SAMPLER_offset = 0x0003c000,
+ SET_SAMPLER_end = 0x0003cff0,
+ SET_CTL_CONST_offset = 0x0003cff0,
+ SET_CTL_CONST_end = 0x0003e200,
+ SET_LOOP_CONST_offset = 0x0003e200,
+ SET_LOOP_CONST_end = 0x0003e380,
+ SET_BOOL_CONST_offset = 0x0003e380,
+ SET_BOOL_CONST_end = 0x00040000,
+} ;
+
+/* packet3 IT_SURFACE_BASE_UPDATE bits */
+enum {
+ DEPTH_BASE = (1 << 0),
+ COLOR0_BASE = (1 << 1),
+ COLOR1_BASE = (1 << 2),
+ COLOR2_BASE = (1 << 3),
+ COLOR3_BASE = (1 << 4),
+ COLOR4_BASE = (1 << 5),
+ COLOR5_BASE = (1 << 6),
+ COLOR6_BASE = (1 << 7),
+ COLOR7_BASE = (1 << 8),
+ STRMOUT_BASE0 = (1 << 9),
+ STRMOUT_BASE1 = (1 << 10),
+ STRMOUT_BASE2 = (1 << 11),
+ STRMOUT_BASE3 = (1 << 12),
+ COHER_BASE0 = (1 << 13),
+ COHER_BASE1 = (1 << 14),
+};
+
+/* Packet3 commands */
+enum {
+ IT_NOP = 0x10,
+ IT_INDIRECT_BUFFER_END = 0x17,
+ IT_SET_PREDICATION = 0x20,
+ IT_REG_RMW = 0x21,
+ IT_COND_EXEC = 0x22,
+ IT_PRED_EXEC = 0x23,
+ IT_START_3D_CMDBUF = 0x24,
+ IT_DRAW_INDEX_2 = 0x27,
+ IT_CONTEXT_CONTROL = 0x28,
+ IT_DRAW_INDEX_IMMD_BE = 0x29,
+ IT_INDEX_TYPE = 0x2A,
+ IT_DRAW_INDEX = 0x2B,
+ IT_DRAW_INDEX_AUTO = 0x2D,
+ IT_DRAW_INDEX_IMMD = 0x2E,
+ IT_NUM_INSTANCES = 0x2F,
+ IT_STRMOUT_BUFFER_UPDATE = 0x34,
+ IT_INDIRECT_BUFFER_MP = 0x38,
+ IT_MEM_SEMAPHORE = 0x39,
+ IT_MPEG_INDEX = 0x3A,
+ IT_WAIT_REG_MEM = 0x3C,
+ IT_MEM_WRITE = 0x3D,
+ IT_INDIRECT_BUFFER = 0x32,
+ IT_CP_INTERRUPT = 0x40,
+ IT_SURFACE_SYNC = 0x43,
+ IT_ME_INITIALIZE = 0x44,
+ IT_COND_WRITE = 0x45,
+ IT_EVENT_WRITE = 0x46,
+ IT_EVENT_WRITE_EOP = 0x47,
+ IT_ONE_REG_WRITE = 0x57,
+ IT_SET_CONFIG_REG = 0x68,
+ IT_SET_CONTEXT_REG = 0x69,
+ IT_SET_ALU_CONST = 0x6A,
+ IT_SET_BOOL_CONST = 0x6B,
+ IT_SET_LOOP_CONST = 0x6C,
+ IT_SET_RESOURCE = 0x6D,
+ IT_SET_SAMPLER = 0x6E,
+ IT_SET_CTL_CONST = 0x6F,
+ IT_SURFACE_BASE_UPDATE = 0x73,
+} ;
+
+/* IT_WAIT_REG_MEM operation encoding */
+
+#define IT_WAIT_ALWAYS (0<<0)
+#define IT_WAIT_LT (1<<0)
+#define IT_WAIT_LE (2<<0)
+#define IT_WAIT_EQ (3<<0)
+#define IT_WAIT_NE (4<<0)
+#define IT_WAIT_GE (5<<0)
+#define IT_WAIT_GT (6<<0)
+#define IT_WAIT_REG (0<<4)
+#define IT_WAIT_MEM (1<<4)
+
+#define IT_WAIT_ADDR(x) ((x) >> 2)
+
+#endif
diff --git a/src/r600_reg_auto_r6xx.h b/src/r600_reg_auto_r6xx.h
new file mode 100644
index 00000000..9d5aa3c7
--- /dev/null
+++ b/src/r600_reg_auto_r6xx.h
@@ -0,0 +1,3087 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _AUTOREGS
+#define _AUTOREGS
+
+enum {
+
+ VGT_VTX_VECT_EJECT_REG = 0x000088b0,
+ PRIM_COUNT_mask = 0x3ff << 0,
+ PRIM_COUNT_shift = 0,
+ VGT_LAST_COPY_STATE = 0x000088c0,
+ SRC_STATE_ID_mask = 0x07 << 0,
+ SRC_STATE_ID_shift = 0,
+ DST_STATE_ID_mask = 0x07 << 16,
+ DST_STATE_ID_shift = 16,
+ VGT_CACHE_INVALIDATION = 0x000088c4,
+ CACHE_INVALIDATION_mask = 0x03 << 0,
+ CACHE_INVALIDATION_shift = 0,
+ VC_ONLY = 0x00,
+ TC_ONLY = 0x01,
+ VC_AND_TC = 0x02,
+ VS_NO_EXTRA_BUFFER_bit = 1 << 5,
+ VGT_GS_PER_ES = 0x000088c8,
+ VGT_ES_PER_GS = 0x000088cc,
+ VGT_GS_VERTEX_REUSE = 0x000088d4,
+ VERT_REUSE_mask = 0x1f << 0,
+ VERT_REUSE_shift = 0,
+ VGT_MC_LAT_CNTL = 0x000088d8,
+ MC_TIME_STAMP_RES_mask = 0x03 << 0,
+ MC_TIME_STAMP_RES_shift = 0,
+ X_0_992_MAX_LATENCY = 0x00,
+ X_0_496_MAX_LATENCY = 0x01,
+ X_0_248_MAX_LATENCY = 0x02,
+ X_0_124_MAX_LATENCY = 0x03,
+ VGT_GS_PER_VS = 0x000088e8,
+ GS_PER_VS_mask = 0x0f << 0,
+ GS_PER_VS_shift = 0,
+ VGT_CNTL_STATUS = 0x000088f0,
+ VGT_OUT_INDX_BUSY_bit = 1 << 0,
+ VGT_OUT_BUSY_bit = 1 << 1,
+ VGT_PT_BUSY_bit = 1 << 2,
+ VGT_TE_BUSY_bit = 1 << 3,
+ VGT_VR_BUSY_bit = 1 << 4,
+ VGT_GRP_BUSY_bit = 1 << 5,
+ VGT_DMA_REQ_BUSY_bit = 1 << 6,
+ VGT_DMA_BUSY_bit = 1 << 7,
+ VGT_GS_BUSY_bit = 1 << 8,
+ VGT_BUSY_bit = 1 << 9,
+ VGT_PRIMITIVE_TYPE = 0x00008958,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift = 0,
+ DI_PT_NONE = 0x00,
+ DI_PT_POINTLIST = 0x01,
+ DI_PT_LINELIST = 0x02,
+ DI_PT_LINESTRIP = 0x03,
+ DI_PT_TRILIST = 0x04,
+ DI_PT_TRIFAN = 0x05,
+ DI_PT_TRISTRIP = 0x06,
+ DI_PT_UNUSED_0 = 0x07,
+ DI_PT_UNUSED_1 = 0x08,
+ DI_PT_UNUSED_2 = 0x09,
+ DI_PT_LINELIST_ADJ = 0x0a,
+ DI_PT_LINESTRIP_ADJ = 0x0b,
+ DI_PT_TRILIST_ADJ = 0x0c,
+ DI_PT_TRISTRIP_ADJ = 0x0d,
+ DI_PT_UNUSED_3 = 0x0e,
+ DI_PT_UNUSED_4 = 0x0f,
+ DI_PT_TRI_WITH_WFLAGS = 0x10,
+ DI_PT_RECTLIST = 0x11,
+ DI_PT_LINELOOP = 0x12,
+ DI_PT_QUADLIST = 0x13,
+ DI_PT_QUADSTRIP = 0x14,
+ DI_PT_POLYGON = 0x15,
+ DI_PT_2D_COPY_RECT_LIST_V0 = 0x16,
+ DI_PT_2D_COPY_RECT_LIST_V1 = 0x17,
+ DI_PT_2D_COPY_RECT_LIST_V2 = 0x18,
+ DI_PT_2D_COPY_RECT_LIST_V3 = 0x19,
+ DI_PT_2D_FILL_RECT_LIST = 0x1a,
+ DI_PT_2D_LINE_STRIP = 0x1b,
+ DI_PT_2D_TRI_STRIP = 0x1c,
+ VGT_INDEX_TYPE = 0x0000895c,
+ INDEX_TYPE_mask = 0x03 << 0,
+ INDEX_TYPE_shift = 0,
+ DI_INDEX_SIZE_16_BIT = 0x00,
+ DI_INDEX_SIZE_32_BIT = 0x01,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968,
+ VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c,
+ VGT_NUM_INDICES = 0x00008970,
+ VGT_NUM_INSTANCES = 0x00008974,
+ PA_CL_CNTL_STATUS = 0x00008a10,
+ CL_BUSY_bit = 1 << 31,
+ PA_CL_ENHANCE = 0x00008a14,
+ CLIP_VTX_REORDER_ENA_bit = 1 << 0,
+ NUM_CLIP_SEQ_mask = 0x03 << 1,
+ NUM_CLIP_SEQ_shift = 1,
+ CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3,
+ VE_NAN_PROC_DISABLE_bit = 1 << 4,
+ PA_SU_CNTL_STATUS = 0x00008a50,
+ SU_BUSY_bit = 1 << 31,
+ PA_SC_LINE_STIPPLE_STATE = 0x00008b10,
+ CURRENT_PTR_mask = 0x0f << 0,
+ CURRENT_PTR_shift = 0,
+ CURRENT_COUNT_mask = 0xff << 8,
+ CURRENT_COUNT_shift = 8,
+ PA_SC_MULTI_CHIP_CNTL = 0x00008b20,
+ LOG2_NUM_CHIPS_mask = 0x07 << 0,
+ LOG2_NUM_CHIPS_shift = 0,
+ MULTI_CHIP_TILE_SIZE_mask = 0x03 << 3,
+ MULTI_CHIP_TILE_SIZE_shift = 3,
+ X_16_X_16_PIXEL_TILE_PER_CHIP = 0x00,
+ X_32_X_32_PIXEL_TILE_PER_CHIP = 0x01,
+ X_64_X_64_PIXEL_TILE_PER_CHIP = 0x02,
+ X_128X128_PIXEL_TILE_PER_CHIP = 0x03,
+ CHIP_TILE_X_LOC_mask = 0x07 << 5,
+ CHIP_TILE_X_LOC_shift = 5,
+ CHIP_TILE_Y_LOC_mask = 0x07 << 8,
+ CHIP_TILE_Y_LOC_shift = 8,
+ CHIP_SUPER_TILE_B_bit = 1 << 11,
+ PA_SC_AA_SAMPLE_LOCS_2S = 0x00008b40,
+ S0_X_mask = 0x0f << 0,
+ S0_X_shift = 0,
+ S0_Y_mask = 0x0f << 4,
+ S0_Y_shift = 4,
+ S1_X_mask = 0x0f << 8,
+ S1_X_shift = 8,
+ S1_Y_mask = 0x0f << 12,
+ S1_Y_shift = 12,
+ PA_SC_AA_SAMPLE_LOCS_4S = 0x00008b44,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+ S2_X_mask = 0x0f << 16,
+ S2_X_shift = 16,
+ S2_Y_mask = 0x0f << 20,
+ S2_Y_shift = 20,
+ S3_X_mask = 0x0f << 24,
+ S3_X_shift = 24,
+ S3_Y_mask = 0x0f << 28,
+ S3_Y_shift = 28,
+ PA_SC_AA_SAMPLE_LOCS_8S_WD0 = 0x00008b48,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_8S_WD1 = 0x00008b4c,
+ S4_X_mask = 0x0f << 0,
+ S4_X_shift = 0,
+ S4_Y_mask = 0x0f << 4,
+ S4_Y_shift = 4,
+ S5_X_mask = 0x0f << 8,
+ S5_X_shift = 8,
+ S5_Y_mask = 0x0f << 12,
+ S5_Y_shift = 12,
+ S6_X_mask = 0x0f << 16,
+ S6_X_shift = 16,
+ S6_Y_mask = 0x0f << 20,
+ S6_Y_shift = 20,
+ S7_X_mask = 0x0f << 24,
+ S7_X_shift = 24,
+ S7_Y_mask = 0x0f << 28,
+ S7_Y_shift = 28,
+ PA_SC_CNTL_STATUS = 0x00008be0,
+ MPASS_OVERFLOW_bit = 1 << 30,
+ PA_SC_ENHANCE = 0x00008bf0,
+ FORCE_EOV_MAX_CLK_CNT_mask = 0xfff << 0,
+ FORCE_EOV_MAX_CLK_CNT_shift = 0,
+ FORCE_EOV_MAX_TILE_CNT_mask = 0xfff << 12,
+ FORCE_EOV_MAX_TILE_CNT_shift = 12,
+ SQ_CONFIG = 0x00008c00,
+ VC_ENABLE_bit = 1 << 0,
+ EXPORT_SRC_C_bit = 1 << 1,
+ DX9_CONSTS_bit = 1 << 2,
+ ALU_INST_PREFER_VECTOR_bit = 1 << 3,
+ SQ_CONFIG__DX10_CLAMP_bit = 1 << 4,
+ ALU_PREFER_ONE_WATERFALL_bit = 1 << 5,
+ ALU_MAX_ONE_WATERFALL_bit = 1 << 6,
+ CLAUSE_SEQ_PRIO_mask = 0x03 << 8,
+ CLAUSE_SEQ_PRIO_shift = 8,
+ SQ_CL_PRIO_RND_ROBIN = 0x00,
+ SQ_CL_PRIO_MACRO_SEQ = 0x01,
+ SQ_CL_PRIO_NONE = 0x02,
+ PS_PRIO_mask = 0x03 << 24,
+ PS_PRIO_shift = 24,
+ VS_PRIO_mask = 0x03 << 26,
+ VS_PRIO_shift = 26,
+ GS_PRIO_mask = 0x03 << 28,
+ GS_PRIO_shift = 28,
+ ES_PRIO_mask = 0x03 << 30,
+ ES_PRIO_shift = 30,
+ SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04,
+ NUM_PS_GPRS_mask = 0xff << 0,
+ NUM_PS_GPRS_shift = 0,
+ NUM_VS_GPRS_mask = 0xff << 16,
+ NUM_VS_GPRS_shift = 16,
+ NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28,
+ NUM_CLAUSE_TEMP_GPRS_shift = 28,
+ SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08,
+ NUM_GS_GPRS_mask = 0xff << 0,
+ NUM_GS_GPRS_shift = 0,
+ NUM_ES_GPRS_mask = 0xff << 16,
+ NUM_ES_GPRS_shift = 16,
+ SQ_THREAD_RESOURCE_MGMT = 0x00008c0c,
+ NUM_PS_THREADS_mask = 0xff << 0,
+ NUM_PS_THREADS_shift = 0,
+ NUM_VS_THREADS_mask = 0xff << 8,
+ NUM_VS_THREADS_shift = 8,
+ NUM_GS_THREADS_mask = 0xff << 16,
+ NUM_GS_THREADS_shift = 16,
+ NUM_ES_THREADS_mask = 0xff << 24,
+ NUM_ES_THREADS_shift = 24,
+ SQ_STACK_RESOURCE_MGMT_1 = 0x00008c10,
+ NUM_PS_STACK_ENTRIES_mask = 0xfff << 0,
+ NUM_PS_STACK_ENTRIES_shift = 0,
+ NUM_VS_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_VS_STACK_ENTRIES_shift = 16,
+ SQ_STACK_RESOURCE_MGMT_2 = 0x00008c14,
+ NUM_GS_STACK_ENTRIES_mask = 0xfff << 0,
+ NUM_GS_STACK_ENTRIES_shift = 0,
+ NUM_ES_STACK_ENTRIES_mask = 0xfff << 16,
+ NUM_ES_STACK_ENTRIES_shift = 16,
+ SQ_ESGS_RING_BASE = 0x00008c40,
+ SQ_ESGS_RING_SIZE = 0x00008c44,
+ SQ_GSVS_RING_BASE = 0x00008c48,
+ SQ_GSVS_RING_SIZE = 0x00008c4c,
+ SQ_ESTMP_RING_BASE = 0x00008c50,
+ SQ_ESTMP_RING_SIZE = 0x00008c54,
+ SQ_GSTMP_RING_BASE = 0x00008c58,
+ SQ_GSTMP_RING_SIZE = 0x00008c5c,
+ SQ_VSTMP_RING_BASE = 0x00008c60,
+ SQ_VSTMP_RING_SIZE = 0x00008c64,
+ SQ_PSTMP_RING_BASE = 0x00008c68,
+ SQ_PSTMP_RING_SIZE = 0x00008c6c,
+ SQ_FBUF_RING_BASE = 0x00008c70,
+ SQ_FBUF_RING_SIZE = 0x00008c74,
+ SQ_REDUC_RING_BASE = 0x00008c78,
+ SQ_REDUC_RING_SIZE = 0x00008c7c,
+ SQ_ALU_WORD1_OP3 = 0x00008dfc,
+ SRC2_SEL_mask = 0x1ff << 0,
+ SRC2_SEL_shift = 0,
+ SQ_ALU_SRC_0 = 0xf8,
+ SQ_ALU_SRC_1 = 0xf9,
+ SQ_ALU_SRC_1_INT = 0xfa,
+ SQ_ALU_SRC_M_1_INT = 0xfb,
+ SQ_ALU_SRC_0_5 = 0xfc,
+ SQ_ALU_SRC_LITERAL = 0xfd,
+ SQ_ALU_SRC_PV = 0xfe,
+ SQ_ALU_SRC_PS = 0xff,
+ SRC2_REL_bit = 1 << 9,
+ SRC2_CHAN_mask = 0x03 << 10,
+ SRC2_CHAN_shift = 10,
+ SQ_CHAN_X = 0x00,
+ SQ_CHAN_Y = 0x01,
+ SQ_CHAN_Z = 0x02,
+ SQ_CHAN_W = 0x03,
+ SRC2_NEG_bit = 1 << 12,
+ SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13,
+ SQ_ALU_WORD1_OP3__ALU_INST_shift = 13,
+ SQ_OP3_INST_MUL_LIT = 0x0c,
+ SQ_OP3_INST_MUL_LIT_M2 = 0x0d,
+ SQ_OP3_INST_MUL_LIT_M4 = 0x0e,
+ SQ_OP3_INST_MUL_LIT_D2 = 0x0f,
+ SQ_OP3_INST_MULADD = 0x10,
+ SQ_OP3_INST_MULADD_M2 = 0x11,
+ SQ_OP3_INST_MULADD_M4 = 0x12,
+ SQ_OP3_INST_MULADD_D2 = 0x13,
+ SQ_OP3_INST_MULADD_IEEE = 0x14,
+ SQ_OP3_INST_MULADD_IEEE_M2 = 0x15,
+ SQ_OP3_INST_MULADD_IEEE_M4 = 0x16,
+ SQ_OP3_INST_MULADD_IEEE_D2 = 0x17,
+ SQ_OP3_INST_CNDE = 0x18,
+ SQ_OP3_INST_CNDGT = 0x19,
+ SQ_OP3_INST_CNDGE = 0x1a,
+ SQ_OP3_INST_CNDE_INT = 0x1c,
+ SQ_OP3_INST_CNDGT_INT = 0x1d,
+ SQ_OP3_INST_CNDGE_INT = 0x1e,
+ SQ_TEX_WORD2 = 0x00008dfc,
+ OFFSET_X_mask = 0x1f << 0,
+ OFFSET_X_shift = 0,
+ OFFSET_Y_mask = 0x1f << 5,
+ OFFSET_Y_shift = 5,
+ OFFSET_Z_mask = 0x1f << 10,
+ OFFSET_Z_shift = 10,
+ SAMPLER_ID_mask = 0x1f << 15,
+ SAMPLER_ID_shift = 15,
+ SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20,
+ SQ_TEX_WORD2__SRC_SEL_X_shift = 20,
+ SQ_SEL_X = 0x00,
+ SQ_SEL_Y = 0x01,
+ SQ_SEL_Z = 0x02,
+ SQ_SEL_W = 0x03,
+ SQ_SEL_0 = 0x04,
+ SQ_SEL_1 = 0x05,
+ SRC_SEL_Y_mask = 0x07 << 23,
+ SRC_SEL_Y_shift = 23,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SRC_SEL_Z_mask = 0x07 << 26,
+ SRC_SEL_Z_shift = 26,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SRC_SEL_W_mask = 0x07 << 29,
+ SRC_SEL_W_shift = 29,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc,
+ BURST_COUNT_mask = 0x0f << 17,
+ BURST_COUNT_shift = 17,
+ END_OF_PROGRAM_bit = 1 << 21,
+ VALID_PIXEL_MODE_bit = 1 << 22,
+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23,
+ SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 23,
+ SQ_CF_INST_MEM_STREAM0 = 0x20,
+ SQ_CF_INST_MEM_STREAM1 = 0x21,
+ SQ_CF_INST_MEM_STREAM2 = 0x22,
+ SQ_CF_INST_MEM_STREAM3 = 0x23,
+ SQ_CF_INST_MEM_SCRATCH = 0x24,
+ SQ_CF_INST_MEM_REDUCTION = 0x25,
+ SQ_CF_INST_MEM_RING = 0x26,
+ SQ_CF_INST_EXPORT = 0x27,
+ SQ_CF_INST_EXPORT_DONE = 0x28,
+ WHOLE_QUAD_MODE_bit = 1 << 30,
+ BARRIER_bit = 1 << 31,
+ SQ_CF_ALU_WORD1 = 0x00008dfc,
+ KCACHE_MODE1_mask = 0x03 << 0,
+ KCACHE_MODE1_shift = 0,
+ SQ_CF_KCACHE_NOP = 0x00,
+ SQ_CF_KCACHE_LOCK_1 = 0x01,
+ SQ_CF_KCACHE_LOCK_2 = 0x02,
+ SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03,
+ KCACHE_ADDR0_mask = 0xff << 2,
+ KCACHE_ADDR0_shift = 2,
+ KCACHE_ADDR1_mask = 0xff << 10,
+ KCACHE_ADDR1_shift = 10,
+ SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18,
+ SQ_CF_ALU_WORD1__COUNT_shift = 18,
+ SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25,
+ SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26,
+ SQ_CF_ALU_WORD1__CF_INST_shift = 26,
+ SQ_CF_INST_ALU = 0x08,
+ SQ_CF_INST_ALU_PUSH_BEFORE = 0x09,
+ SQ_CF_INST_ALU_POP_AFTER = 0x0a,
+ SQ_CF_INST_ALU_POP2_AFTER = 0x0b,
+ SQ_CF_INST_ALU_CONTINUE = 0x0d,
+ SQ_CF_INST_ALU_BREAK = 0x0e,
+ SQ_CF_INST_ALU_ELSE_AFTER = 0x0f,
+/* WHOLE_QUAD_MODE_bit = 1 << 30, */
+/* BARRIER_bit = 1 << 31, */
+ SQ_TEX_WORD1 = 0x00008dfc,
+ SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0,
+ SQ_TEX_WORD1__DST_GPR_shift = 0,
+ SQ_TEX_WORD1__DST_REL_bit = 1 << 7,
+ SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_TEX_WORD1__DST_SEL_X_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_SEL_MASK = 0x07,
+ SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+ SQ_TEX_WORD1__DST_SEL_Y_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+ SQ_TEX_WORD1__DST_SEL_Z_shift = 15,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18,
+ SQ_TEX_WORD1__DST_SEL_W_shift = 18,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21,
+ SQ_TEX_WORD1__LOD_BIAS_shift = 21,
+ COORD_TYPE_X_bit = 1 << 28,
+ COORD_TYPE_Y_bit = 1 << 29,
+ COORD_TYPE_Z_bit = 1 << 30,
+ COORD_TYPE_W_bit = 1 << 31,
+ SQ_VTX_WORD0 = 0x00008dfc,
+ VTX_INST_mask = 0x1f << 0,
+ VTX_INST_shift = 0,
+ SQ_VTX_INST_FETCH = 0x00,
+ SQ_VTX_INST_SEMANTIC = 0x01,
+ FETCH_TYPE_mask = 0x03 << 5,
+ FETCH_TYPE_shift = 5,
+ SQ_VTX_FETCH_VERTEX_DATA = 0x00,
+ SQ_VTX_FETCH_INSTANCE_DATA = 0x01,
+ SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02,
+ FETCH_WHOLE_QUAD_bit = 1 << 7,
+ BUFFER_ID_mask = 0xff << 8,
+ BUFFER_ID_shift = 8,
+ SRC_GPR_mask = 0x7f << 16,
+ SRC_GPR_shift = 16,
+ SRC_REL_bit = 1 << 23,
+ SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24,
+ SQ_VTX_WORD0__SRC_SEL_X_shift = 24,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+ MEGA_FETCH_COUNT_mask = 0x3f << 26,
+ MEGA_FETCH_COUNT_shift = 26,
+ SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc,
+ SEL_X_mask = 0x07 << 0,
+ SEL_X_shift = 0,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_Y_mask = 0x07 << 3,
+ SEL_Y_shift = 3,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_Z_mask = 0x07 << 6,
+ SEL_Z_shift = 6,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SEL_W_mask = 0x07 << 9,
+ SEL_W_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_ALU_WORD1 = 0x00008dfc,
+ ENCODING_mask = 0x07 << 15,
+ ENCODING_shift = 15,
+ BANK_SWIZZLE_mask = 0x07 << 18,
+ BANK_SWIZZLE_shift = 18,
+ SQ_ALU_VEC_012 = 0x00,
+ SQ_ALU_VEC_021 = 0x01,
+ SQ_ALU_VEC_120 = 0x02,
+ SQ_ALU_VEC_102 = 0x03,
+ SQ_ALU_VEC_201 = 0x04,
+ SQ_ALU_VEC_210 = 0x05,
+ SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21,
+ SQ_ALU_WORD1__DST_GPR_shift = 21,
+ SQ_ALU_WORD1__DST_REL_bit = 1 << 28,
+ DST_CHAN_mask = 0x03 << 29,
+ DST_CHAN_shift = 29,
+ CHAN_X = 0x00,
+ CHAN_Y = 0x01,
+ CHAN_Z = 0x02,
+ CHAN_W = 0x03,
+ SQ_ALU_WORD1__CLAMP_bit = 1 << 31,
+ SQ_CF_ALU_WORD0 = 0x00008dfc,
+ SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0,
+ SQ_CF_ALU_WORD0__ADDR_shift = 0,
+ KCACHE_BANK0_mask = 0x0f << 22,
+ KCACHE_BANK0_shift = 22,
+ KCACHE_BANK1_mask = 0x0f << 26,
+ KCACHE_BANK1_shift = 26,
+ KCACHE_MODE0_mask = 0x03 << 30,
+ KCACHE_MODE0_shift = 30,
+/* SQ_CF_KCACHE_NOP = 0x00, */
+/* SQ_CF_KCACHE_LOCK_1 = 0x01, */
+/* SQ_CF_KCACHE_LOCK_2 = 0x02, */
+/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */
+ SQ_VTX_WORD2 = 0x00008dfc,
+ SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0,
+ SQ_VTX_WORD2__OFFSET_shift = 0,
+ SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16,
+ SQ_VTX_WORD2__ENDIAN_SWAP_shift = 16,
+ SQ_ENDIAN_NONE = 0x00,
+ SQ_ENDIAN_8IN16 = 0x01,
+ SQ_ENDIAN_8IN32 = 0x02,
+ CONST_BUF_NO_STRIDE_bit = 1 << 18,
+ MEGA_FETCH_bit = 1 << 19,
+ SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20,
+ SQ_ALU_WORD1_OP2_V2 = 0x00008dfc,
+ SRC0_ABS_bit = 1 << 0,
+ SRC1_ABS_bit = 1 << 1,
+ UPDATE_EXECUTE_MASK_bit = 1 << 2,
+ UPDATE_PRED_bit = 1 << 3,
+ WRITE_MASK_bit = 1 << 4,
+ SQ_ALU_WORD1_OP2_V2__OMOD_mask = 0x03 << 5,
+ SQ_ALU_WORD1_OP2_V2__OMOD_shift = 5,
+ SQ_ALU_OMOD_OFF = 0x00,
+ SQ_ALU_OMOD_M2 = 0x01,
+ SQ_ALU_OMOD_M4 = 0x02,
+ SQ_ALU_OMOD_D2 = 0x03,
+ SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7,
+ SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7,
+ SQ_OP2_INST_ADD = 0x00,
+ SQ_OP2_INST_MUL = 0x01,
+ SQ_OP2_INST_MUL_IEEE = 0x02,
+ SQ_OP2_INST_MAX = 0x03,
+ SQ_OP2_INST_MIN = 0x04,
+ SQ_OP2_INST_MAX_DX10 = 0x05,
+ SQ_OP2_INST_MIN_DX10 = 0x06,
+ SQ_OP2_INST_SETE = 0x08,
+ SQ_OP2_INST_SETGT = 0x09,
+ SQ_OP2_INST_SETGE = 0x0a,
+ SQ_OP2_INST_SETNE = 0x0b,
+ SQ_OP2_INST_SETE_DX10 = 0x0c,
+ SQ_OP2_INST_SETGT_DX10 = 0x0d,
+ SQ_OP2_INST_SETGE_DX10 = 0x0e,
+ SQ_OP2_INST_SETNE_DX10 = 0x0f,
+ SQ_OP2_INST_FRACT = 0x10,
+ SQ_OP2_INST_TRUNC = 0x11,
+ SQ_OP2_INST_CEIL = 0x12,
+ SQ_OP2_INST_RNDNE = 0x13,
+ SQ_OP2_INST_FLOOR = 0x14,
+ SQ_OP2_INST_MOVA = 0x15,
+ SQ_OP2_INST_MOVA_FLOOR = 0x16,
+ SQ_OP2_INST_MOVA_INT = 0x18,
+ SQ_OP2_INST_MOV = 0x19,
+ SQ_OP2_INST_NOP = 0x1a,
+ SQ_OP2_INST_PRED_SETGT_UINT = 0x1e,
+ SQ_OP2_INST_PRED_SETGE_UINT = 0x1f,
+ SQ_OP2_INST_PRED_SETE = 0x20,
+ SQ_OP2_INST_PRED_SETGT = 0x21,
+ SQ_OP2_INST_PRED_SETGE = 0x22,
+ SQ_OP2_INST_PRED_SETNE = 0x23,
+ SQ_OP2_INST_PRED_SET_INV = 0x24,
+ SQ_OP2_INST_PRED_SET_POP = 0x25,
+ SQ_OP2_INST_PRED_SET_CLR = 0x26,
+ SQ_OP2_INST_PRED_SET_RESTORE = 0x27,
+ SQ_OP2_INST_PRED_SETE_PUSH = 0x28,
+ SQ_OP2_INST_PRED_SETGT_PUSH = 0x29,
+ SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a,
+ SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b,
+ SQ_OP2_INST_KILLE = 0x2c,
+ SQ_OP2_INST_KILLGT = 0x2d,
+ SQ_OP2_INST_KILLGE = 0x2e,
+ SQ_OP2_INST_KILLNE = 0x2f,
+ SQ_OP2_INST_AND_INT = 0x30,
+ SQ_OP2_INST_OR_INT = 0x31,
+ SQ_OP2_INST_XOR_INT = 0x32,
+ SQ_OP2_INST_NOT_INT = 0x33,
+ SQ_OP2_INST_ADD_INT = 0x34,
+ SQ_OP2_INST_SUB_INT = 0x35,
+ SQ_OP2_INST_MAX_INT = 0x36,
+ SQ_OP2_INST_MIN_INT = 0x37,
+ SQ_OP2_INST_MAX_UINT = 0x38,
+ SQ_OP2_INST_MIN_UINT = 0x39,
+ SQ_OP2_INST_SETE_INT = 0x3a,
+ SQ_OP2_INST_SETGT_INT = 0x3b,
+ SQ_OP2_INST_SETGE_INT = 0x3c,
+ SQ_OP2_INST_SETNE_INT = 0x3d,
+ SQ_OP2_INST_SETGT_UINT = 0x3e,
+ SQ_OP2_INST_SETGE_UINT = 0x3f,
+ SQ_OP2_INST_KILLGT_UINT = 0x40,
+ SQ_OP2_INST_KILLGE_UINT = 0x41,
+ SQ_OP2_INST_PRED_SETE_INT = 0x42,
+ SQ_OP2_INST_PRED_SETGT_INT = 0x43,
+ SQ_OP2_INST_PRED_SETGE_INT = 0x44,
+ SQ_OP2_INST_PRED_SETNE_INT = 0x45,
+ SQ_OP2_INST_KILLE_INT = 0x46,
+ SQ_OP2_INST_KILLGT_INT = 0x47,
+ SQ_OP2_INST_KILLGE_INT = 0x48,
+ SQ_OP2_INST_KILLNE_INT = 0x49,
+ SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a,
+ SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b,
+ SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c,
+ SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d,
+ SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e,
+ SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f,
+ SQ_OP2_INST_DOT4 = 0x50,
+ SQ_OP2_INST_DOT4_IEEE = 0x51,
+ SQ_OP2_INST_CUBE = 0x52,
+ SQ_OP2_INST_MAX4 = 0x53,
+ SQ_OP2_INST_MOVA_GPR_INT = 0x60,
+ SQ_OP2_INST_EXP_IEEE = 0x61,
+ SQ_OP2_INST_LOG_CLAMPED = 0x62,
+ SQ_OP2_INST_LOG_IEEE = 0x63,
+ SQ_OP2_INST_RECIP_CLAMPED = 0x64,
+ SQ_OP2_INST_RECIP_FF = 0x65,
+ SQ_OP2_INST_RECIP_IEEE = 0x66,
+ SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67,
+ SQ_OP2_INST_RECIPSQRT_FF = 0x68,
+ SQ_OP2_INST_RECIPSQRT_IEEE = 0x69,
+ SQ_OP2_INST_SQRT_IEEE = 0x6a,
+ SQ_OP2_INST_FLT_TO_INT = 0x6b,
+ SQ_OP2_INST_INT_TO_FLT = 0x6c,
+ SQ_OP2_INST_UINT_TO_FLT = 0x6d,
+ SQ_OP2_INST_SIN = 0x6e,
+ SQ_OP2_INST_COS = 0x6f,
+ SQ_OP2_INST_ASHR_INT = 0x70,
+ SQ_OP2_INST_LSHR_INT = 0x71,
+ SQ_OP2_INST_LSHL_INT = 0x72,
+ SQ_OP2_INST_MULLO_INT = 0x73,
+ SQ_OP2_INST_MULHI_INT = 0x74,
+ SQ_OP2_INST_MULLO_UINT = 0x75,
+ SQ_OP2_INST_MULHI_UINT = 0x76,
+ SQ_OP2_INST_RECIP_INT = 0x77,
+ SQ_OP2_INST_RECIP_UINT = 0x78,
+ SQ_OP2_INST_FLT_TO_UINT = 0x79,
+ SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc,
+ ARRAY_SIZE_mask = 0xfff << 0,
+ ARRAY_SIZE_shift = 0,
+ COMP_MASK_mask = 0x0f << 12,
+ COMP_MASK_shift = 12,
+ SQ_CF_WORD0 = 0x00008dfc,
+ SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc,
+ ARRAY_BASE_mask = 0x1fff << 0,
+ ARRAY_BASE_shift = 0,
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13,
+ SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13,
+ SQ_EXPORT_PIXEL = 0x00,
+ SQ_EXPORT_POS = 0x01,
+ SQ_EXPORT_PARAM = 0x02,
+ X_UNUSED_FOR_SX_EXPORTS = 0x03,
+ RW_GPR_mask = 0x7f << 15,
+ RW_GPR_shift = 15,
+ RW_REL_bit = 1 << 22,
+ INDEX_GPR_mask = 0x7f << 23,
+ INDEX_GPR_shift = 23,
+ ELEM_SIZE_mask = 0x03 << 30,
+ ELEM_SIZE_shift = 30,
+ SQ_VTX_WORD1 = 0x00008dfc,
+ SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9,
+ SQ_VTX_WORD1__DST_SEL_X_shift = 9,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12,
+ SQ_VTX_WORD1__DST_SEL_Y_shift = 12,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15,
+ SQ_VTX_WORD1__DST_SEL_Z_shift = 15,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18,
+ SQ_VTX_WORD1__DST_SEL_W_shift = 18,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+/* SQ_SEL_MASK = 0x07, */
+ USE_CONST_FIELDS_bit = 1 << 21,
+ SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22,
+ SQ_VTX_WORD1__DATA_FORMAT_shift = 22,
+ SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28,
+ SQ_VTX_WORD1__NUM_FORMAT_ALL_shift = 28,
+ SQ_NUM_FORMAT_NORM = 0x00,
+ SQ_NUM_FORMAT_INT = 0x01,
+ SQ_NUM_FORMAT_SCALED = 0x02,
+ SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30,
+ SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31,
+ SQ_ALU_WORD1_OP2 = 0x00008dfc,
+/* SRC0_ABS_bit = 1 << 0, */
+/* SRC1_ABS_bit = 1 << 1, */
+/* UPDATE_EXECUTE_MASK_bit = 1 << 2, */
+/* UPDATE_PRED_bit = 1 << 3, */
+/* WRITE_MASK_bit = 1 << 4, */
+ FOG_MERGE_bit = 1 << 5,
+ SQ_ALU_WORD1_OP2__OMOD_mask = 0x03 << 6,
+ SQ_ALU_WORD1_OP2__OMOD_shift = 6,
+/* SQ_ALU_OMOD_OFF = 0x00, */
+/* SQ_ALU_OMOD_M2 = 0x01, */
+/* SQ_ALU_OMOD_M4 = 0x02, */
+/* SQ_ALU_OMOD_D2 = 0x03, */
+ SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8,
+ SQ_ALU_WORD1_OP2__ALU_INST_shift = 8,
+/* SQ_OP2_INST_ADD = 0x00, */
+/* SQ_OP2_INST_MUL = 0x01, */
+/* SQ_OP2_INST_MUL_IEEE = 0x02, */
+/* SQ_OP2_INST_MAX = 0x03, */
+/* SQ_OP2_INST_MIN = 0x04, */
+/* SQ_OP2_INST_MAX_DX10 = 0x05, */
+/* SQ_OP2_INST_MIN_DX10 = 0x06, */
+/* SQ_OP2_INST_SETE = 0x08, */
+/* SQ_OP2_INST_SETGT = 0x09, */
+/* SQ_OP2_INST_SETGE = 0x0a, */
+/* SQ_OP2_INST_SETNE = 0x0b, */
+/* SQ_OP2_INST_SETE_DX10 = 0x0c, */
+/* SQ_OP2_INST_SETGT_DX10 = 0x0d, */
+/* SQ_OP2_INST_SETGE_DX10 = 0x0e, */
+/* SQ_OP2_INST_SETNE_DX10 = 0x0f, */
+/* SQ_OP2_INST_FRACT = 0x10, */
+/* SQ_OP2_INST_TRUNC = 0x11, */
+/* SQ_OP2_INST_CEIL = 0x12, */
+/* SQ_OP2_INST_RNDNE = 0x13, */
+/* SQ_OP2_INST_FLOOR = 0x14, */
+/* SQ_OP2_INST_MOVA = 0x15, */
+/* SQ_OP2_INST_MOVA_FLOOR = 0x16, */
+/* SQ_OP2_INST_MOVA_INT = 0x18, */
+/* SQ_OP2_INST_MOV = 0x19, */
+/* SQ_OP2_INST_NOP = 0x1a, */
+/* SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, */
+/* SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, */
+/* SQ_OP2_INST_PRED_SETE = 0x20, */
+/* SQ_OP2_INST_PRED_SETGT = 0x21, */
+/* SQ_OP2_INST_PRED_SETGE = 0x22, */
+/* SQ_OP2_INST_PRED_SETNE = 0x23, */
+/* SQ_OP2_INST_PRED_SET_INV = 0x24, */
+/* SQ_OP2_INST_PRED_SET_POP = 0x25, */
+/* SQ_OP2_INST_PRED_SET_CLR = 0x26, */
+/* SQ_OP2_INST_PRED_SET_RESTORE = 0x27, */
+/* SQ_OP2_INST_PRED_SETE_PUSH = 0x28, */
+/* SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, */
+/* SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, */
+/* SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, */
+/* SQ_OP2_INST_KILLE = 0x2c, */
+/* SQ_OP2_INST_KILLGT = 0x2d, */
+/* SQ_OP2_INST_KILLGE = 0x2e, */
+/* SQ_OP2_INST_KILLNE = 0x2f, */
+/* SQ_OP2_INST_AND_INT = 0x30, */
+/* SQ_OP2_INST_OR_INT = 0x31, */
+/* SQ_OP2_INST_XOR_INT = 0x32, */
+/* SQ_OP2_INST_NOT_INT = 0x33, */
+/* SQ_OP2_INST_ADD_INT = 0x34, */
+/* SQ_OP2_INST_SUB_INT = 0x35, */
+/* SQ_OP2_INST_MAX_INT = 0x36, */
+/* SQ_OP2_INST_MIN_INT = 0x37, */
+/* SQ_OP2_INST_MAX_UINT = 0x38, */
+/* SQ_OP2_INST_MIN_UINT = 0x39, */
+/* SQ_OP2_INST_SETE_INT = 0x3a, */
+/* SQ_OP2_INST_SETGT_INT = 0x3b, */
+/* SQ_OP2_INST_SETGE_INT = 0x3c, */
+/* SQ_OP2_INST_SETNE_INT = 0x3d, */
+/* SQ_OP2_INST_SETGT_UINT = 0x3e, */
+/* SQ_OP2_INST_SETGE_UINT = 0x3f, */
+/* SQ_OP2_INST_KILLGT_UINT = 0x40, */
+/* SQ_OP2_INST_KILLGE_UINT = 0x41, */
+/* SQ_OP2_INST_PRED_SETE_INT = 0x42, */
+/* SQ_OP2_INST_PRED_SETGT_INT = 0x43, */
+/* SQ_OP2_INST_PRED_SETGE_INT = 0x44, */
+/* SQ_OP2_INST_PRED_SETNE_INT = 0x45, */
+/* SQ_OP2_INST_KILLE_INT = 0x46, */
+/* SQ_OP2_INST_KILLGT_INT = 0x47, */
+/* SQ_OP2_INST_KILLGE_INT = 0x48, */
+/* SQ_OP2_INST_KILLNE_INT = 0x49, */
+/* SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, */
+/* SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, */
+/* SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, */
+/* SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, */
+/* SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, */
+/* SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, */
+/* SQ_OP2_INST_DOT4 = 0x50, */
+/* SQ_OP2_INST_DOT4_IEEE = 0x51, */
+/* SQ_OP2_INST_CUBE = 0x52, */
+/* SQ_OP2_INST_MAX4 = 0x53, */
+/* SQ_OP2_INST_MOVA_GPR_INT = 0x60, */
+/* SQ_OP2_INST_EXP_IEEE = 0x61, */
+/* SQ_OP2_INST_LOG_CLAMPED = 0x62, */
+/* SQ_OP2_INST_LOG_IEEE = 0x63, */
+/* SQ_OP2_INST_RECIP_CLAMPED = 0x64, */
+/* SQ_OP2_INST_RECIP_FF = 0x65, */
+/* SQ_OP2_INST_RECIP_IEEE = 0x66, */
+/* SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, */
+/* SQ_OP2_INST_RECIPSQRT_FF = 0x68, */
+/* SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, */
+/* SQ_OP2_INST_SQRT_IEEE = 0x6a, */
+/* SQ_OP2_INST_FLT_TO_INT = 0x6b, */
+/* SQ_OP2_INST_INT_TO_FLT = 0x6c, */
+/* SQ_OP2_INST_UINT_TO_FLT = 0x6d, */
+/* SQ_OP2_INST_SIN = 0x6e, */
+/* SQ_OP2_INST_COS = 0x6f, */
+/* SQ_OP2_INST_ASHR_INT = 0x70, */
+/* SQ_OP2_INST_LSHR_INT = 0x71, */
+/* SQ_OP2_INST_LSHL_INT = 0x72, */
+/* SQ_OP2_INST_MULLO_INT = 0x73, */
+/* SQ_OP2_INST_MULHI_INT = 0x74, */
+/* SQ_OP2_INST_MULLO_UINT = 0x75, */
+/* SQ_OP2_INST_MULHI_UINT = 0x76, */
+/* SQ_OP2_INST_RECIP_INT = 0x77, */
+/* SQ_OP2_INST_RECIP_UINT = 0x78, */
+/* SQ_OP2_INST_FLT_TO_UINT = 0x79, */
+ SQ_CF_WORD1 = 0x00008dfc,
+ POP_COUNT_mask = 0x07 << 0,
+ POP_COUNT_shift = 0,
+ CF_CONST_mask = 0x1f << 3,
+ CF_CONST_shift = 3,
+ COND_mask = 0x03 << 8,
+ COND_shift = 8,
+ SQ_CF_COND_ACTIVE = 0x00,
+ SQ_CF_COND_FALSE = 0x01,
+ SQ_CF_COND_BOOL = 0x02,
+ SQ_CF_COND_NOT_BOOL = 0x03,
+ SQ_CF_WORD1__COUNT_mask = 0x07 << 10,
+ SQ_CF_WORD1__COUNT_shift = 10,
+ CALL_COUNT_mask = 0x3f << 13,
+ CALL_COUNT_shift = 13,
+ COUNT_3_bit = 1 << 19,
+/* END_OF_PROGRAM_bit = 1 << 21, */
+/* VALID_PIXEL_MODE_bit = 1 << 22, */
+ SQ_CF_WORD1__CF_INST_mask = 0x7f << 23,
+ SQ_CF_WORD1__CF_INST_shift = 23,
+ SQ_CF_INST_NOP = 0x00,
+ SQ_CF_INST_TEX = 0x01,
+ SQ_CF_INST_VTX = 0x02,
+ SQ_CF_INST_VTX_TC = 0x03,
+ SQ_CF_INST_LOOP_START = 0x04,
+ SQ_CF_INST_LOOP_END = 0x05,
+ SQ_CF_INST_LOOP_START_DX10 = 0x06,
+ SQ_CF_INST_LOOP_START_NO_AL = 0x07,
+ SQ_CF_INST_LOOP_CONTINUE = 0x08,
+ SQ_CF_INST_LOOP_BREAK = 0x09,
+ SQ_CF_INST_JUMP = 0x0a,
+ SQ_CF_INST_PUSH = 0x0b,
+ SQ_CF_INST_PUSH_ELSE = 0x0c,
+ SQ_CF_INST_ELSE = 0x0d,
+ SQ_CF_INST_POP = 0x0e,
+ SQ_CF_INST_POP_JUMP = 0x0f,
+ SQ_CF_INST_POP_PUSH = 0x10,
+ SQ_CF_INST_POP_PUSH_ELSE = 0x11,
+ SQ_CF_INST_CALL = 0x12,
+ SQ_CF_INST_CALL_FS = 0x13,
+ SQ_CF_INST_RETURN = 0x14,
+ SQ_CF_INST_EMIT_VERTEX = 0x15,
+ SQ_CF_INST_EMIT_CUT_VERTEX = 0x16,
+ SQ_CF_INST_CUT_VERTEX = 0x17,
+ SQ_CF_INST_KILL = 0x18,
+/* WHOLE_QUAD_MODE_bit = 1 << 30, */
+/* BARRIER_bit = 1 << 31, */
+ SQ_VTX_WORD1_SEM = 0x00008dfc,
+ SEMANTIC_ID_mask = 0xff << 0,
+ SEMANTIC_ID_shift = 0,
+ SQ_TEX_WORD0 = 0x00008dfc,
+ TEX_INST_mask = 0x1f << 0,
+ TEX_INST_shift = 0,
+ SQ_TEX_INST_VTX_FETCH = 0x00,
+ SQ_TEX_INST_VTX_SEMANTIC = 0x01,
+ SQ_TEX_INST_LD = 0x03,
+ SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04,
+ SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05,
+ SQ_TEX_INST_GET_LOD = 0x06,
+ SQ_TEX_INST_GET_GRADIENTS_H = 0x07,
+ SQ_TEX_INST_GET_GRADIENTS_V = 0x08,
+ SQ_TEX_INST_GET_LERP = 0x09,
+ SQ_TEX_INST_RESERVED_10 = 0x0a,
+ SQ_TEX_INST_SET_GRADIENTS_H = 0x0b,
+ SQ_TEX_INST_SET_GRADIENTS_V = 0x0c,
+ SQ_TEX_INST_PASS = 0x0d,
+ X_Z_SET_INDEX_FOR_ARRAY_OF_CUBEMAPS = 0x0e,
+ SQ_TEX_INST_SAMPLE = 0x10,
+ SQ_TEX_INST_SAMPLE_L = 0x11,
+ SQ_TEX_INST_SAMPLE_LB = 0x12,
+ SQ_TEX_INST_SAMPLE_LZ = 0x13,
+ SQ_TEX_INST_SAMPLE_G = 0x14,
+ SQ_TEX_INST_SAMPLE_G_L = 0x15,
+ SQ_TEX_INST_SAMPLE_G_LB = 0x16,
+ SQ_TEX_INST_SAMPLE_G_LZ = 0x17,
+ SQ_TEX_INST_SAMPLE_C = 0x18,
+ SQ_TEX_INST_SAMPLE_C_L = 0x19,
+ SQ_TEX_INST_SAMPLE_C_LB = 0x1a,
+ SQ_TEX_INST_SAMPLE_C_LZ = 0x1b,
+ SQ_TEX_INST_SAMPLE_C_G = 0x1c,
+ SQ_TEX_INST_SAMPLE_C_G_L = 0x1d,
+ SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e,
+ SQ_TEX_INST_SAMPLE_C_G_LZ = 0x1f,
+ BC_FRAC_MODE_bit = 1 << 5,
+/* FETCH_WHOLE_QUAD_bit = 1 << 7, */
+ RESOURCE_ID_mask = 0xff << 8,
+ RESOURCE_ID_shift = 8,
+/* SRC_GPR_mask = 0x7f << 16, */
+/* SRC_GPR_shift = 16, */
+/* SRC_REL_bit = 1 << 23, */
+ SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24,
+ SQ_VTX_WORD1_GPR = 0x00008dfc,
+ SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0,
+ SQ_VTX_WORD1_GPR__DST_GPR_shift = 0,
+ SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7,
+ SQ_ALU_WORD0 = 0x00008dfc,
+ SRC0_SEL_mask = 0x1ff << 0,
+ SRC0_SEL_shift = 0,
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+ SRC0_REL_bit = 1 << 9,
+ SRC0_CHAN_mask = 0x03 << 10,
+ SRC0_CHAN_shift = 10,
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ SRC0_NEG_bit = 1 << 12,
+ SRC1_SEL_mask = 0x1ff << 13,
+ SRC1_SEL_shift = 13,
+/* SQ_ALU_SRC_0 = 0xf8, */
+/* SQ_ALU_SRC_1 = 0xf9, */
+/* SQ_ALU_SRC_1_INT = 0xfa, */
+/* SQ_ALU_SRC_M_1_INT = 0xfb, */
+/* SQ_ALU_SRC_0_5 = 0xfc, */
+/* SQ_ALU_SRC_LITERAL = 0xfd, */
+/* SQ_ALU_SRC_PV = 0xfe, */
+/* SQ_ALU_SRC_PS = 0xff, */
+ SRC1_REL_bit = 1 << 22,
+ SRC1_CHAN_mask = 0x03 << 23,
+ SRC1_CHAN_shift = 23,
+/* SQ_CHAN_X = 0x00, */
+/* SQ_CHAN_Y = 0x01, */
+/* SQ_CHAN_Z = 0x02, */
+/* SQ_CHAN_W = 0x03, */
+ SRC1_NEG_bit = 1 << 25,
+ INDEX_MODE_mask = 0x07 << 26,
+ INDEX_MODE_shift = 26,
+ SQ_INDEX_AR_X = 0x00,
+ SQ_INDEX_AR_Y = 0x01,
+ SQ_INDEX_AR_Z = 0x02,
+ SQ_INDEX_AR_W = 0x03,
+ SQ_INDEX_LOOP = 0x04,
+ PRED_SEL_mask = 0x03 << 29,
+ PRED_SEL_shift = 29,
+ SQ_PRED_SEL_OFF = 0x00,
+ SQ_PRED_SEL_ZERO = 0x02,
+ SQ_PRED_SEL_ONE = 0x03,
+ LAST_bit = 1 << 31,
+ SX_EXPORT_BUFFER_SIZES = 0x0000900c,
+ COLOR_BUFFER_SIZE_mask = 0xff << 0,
+ COLOR_BUFFER_SIZE_shift = 0,
+ POSITION_BUFFER_SIZE_mask = 0xff << 8,
+ POSITION_BUFFER_SIZE_shift = 8,
+ SMX_BUFFER_SIZE_mask = 0xff << 16,
+ SMX_BUFFER_SIZE_shift = 16,
+ SX_MEMORY_EXPORT_BASE = 0x00009010,
+ SX_MEMORY_EXPORT_SIZE = 0x00009014,
+ SPI_CONFIG_CNTL = 0x00009100,
+ GPR_WRITE_PRIORITY_mask = 0x1f << 0,
+ GPR_WRITE_PRIORITY_shift = 0,
+ X_PRIORITY_ORDER = 0x00,
+ X_PRIORITY_ORDER_VS = 0x01,
+ DISABLE_INTERP_1_bit = 1 << 5,
+ DEBUG_THREAD_TYPE_SEL_mask = 0x03 << 6,
+ DEBUG_THREAD_TYPE_SEL_shift = 6,
+ DEBUG_GROUP_SEL_mask = 0x1f << 8,
+ DEBUG_GROUP_SEL_shift = 8,
+ DEBUG_GRBM_OVERRIDE_bit = 1 << 13,
+ SPI_CONFIG_CNTL_1 = 0x0000913c,
+ VTX_DONE_DELAY_mask = 0x0f << 0,
+ VTX_DONE_DELAY_shift = 0,
+ X_DELAY_10_CLKS = 0x00,
+ X_DELAY_11_CLKS = 0x01,
+ X_DELAY_12_CLKS = 0x02,
+ X_DELAY_13_CLKS = 0x03,
+ X_DELAY_14_CLKS = 0x04,
+ X_DELAY_15_CLKS = 0x05,
+ X_DELAY_16_CLKS = 0x06,
+ X_DELAY_17_CLKS = 0x07,
+ X_DELAY_2_CLKS = 0x08,
+ X_DELAY_3_CLKS = 0x09,
+ X_DELAY_4_CLKS = 0x0a,
+ X_DELAY_5_CLKS = 0x0b,
+ X_DELAY_6_CLKS = 0x0c,
+ X_DELAY_7_CLKS = 0x0d,
+ X_DELAY_8_CLKS = 0x0e,
+ X_DELAY_9_CLKS = 0x0f,
+ INTERP_ONE_PRIM_PER_ROW_bit = 1 << 4,
+ TD_FILTER4 = 0x00009400,
+ WEIGHT_1_mask = 0x7ff << 0,
+ WEIGHT_1_shift = 0,
+ WEIGHT_0_mask = 0x7ff << 11,
+ WEIGHT_0_shift = 11,
+ WEIGHT_PAIR_bit = 1 << 22,
+ PHASE_mask = 0x0f << 23,
+ PHASE_shift = 23,
+ DIRECTION_bit = 1 << 27,
+ TD_FILTER4_1 = 0x00009404,
+ TD_FILTER4_1_num = 35,
+/* WEIGHT_1_mask = 0x7ff << 0, */
+/* WEIGHT_1_shift = 0, */
+/* WEIGHT_0_mask = 0x7ff << 11, */
+/* WEIGHT_0_shift = 11, */
+ TD_CNTL = 0x00009490,
+ SYNC_PHASE_SH_mask = 0x03 << 0,
+ SYNC_PHASE_SH_shift = 0,
+ SYNC_PHASE_VC_SMX_mask = 0x03 << 4,
+ SYNC_PHASE_VC_SMX_shift = 4,
+ TD0_CNTL = 0x00009494,
+ TD0_CNTL_num = 4,
+ ID_OVERRIDE_mask = 0x03 << 28,
+ ID_OVERRIDE_shift = 28,
+ TD0_STATUS = 0x000094a4,
+ TD0_STATUS_num = 4,
+ BUSY_bit = 1 << 31,
+ TA_CNTL = 0x00009504,
+ GRADIENT_CREDIT_mask = 0x1f << 0,
+ GRADIENT_CREDIT_shift = 0,
+ WALKER_CREDIT_mask = 0x1f << 8,
+ WALKER_CREDIT_shift = 8,
+ ALIGNER_CREDIT_mask = 0x1f << 16,
+ ALIGNER_CREDIT_shift = 16,
+ TD_FIFO_CREDIT_mask = 0x3ff << 22,
+ TD_FIFO_CREDIT_shift = 22,
+ TA_CNTL_AUX = 0x00009508,
+ DISABLE_CUBE_WRAP_bit = 1 << 0,
+ SYNC_GRADIENT_bit = 1 << 24,
+ SYNC_WALKER_bit = 1 << 25,
+ SYNC_ALIGNER_bit = 1 << 26,
+ BILINEAR_PRECISION_bit = 1 << 31,
+ TA0_CNTL = 0x00009510,
+/* ID_OVERRIDE_mask = 0x03 << 28, */
+/* ID_OVERRIDE_shift = 28, */
+ TA1_CNTL = 0x00009514,
+/* ID_OVERRIDE_mask = 0x03 << 28, */
+/* ID_OVERRIDE_shift = 28, */
+ TA2_CNTL = 0x00009518,
+/* ID_OVERRIDE_mask = 0x03 << 28, */
+/* ID_OVERRIDE_shift = 28, */
+ TA3_CNTL = 0x0000951c,
+/* ID_OVERRIDE_mask = 0x03 << 28, */
+/* ID_OVERRIDE_shift = 28, */
+ TA0_STATUS = 0x00009520,
+ FG_PFIFO_EMPTYB_bit = 1 << 12,
+ FG_LFIFO_EMPTYB_bit = 1 << 13,
+ FG_SFIFO_EMPTYB_bit = 1 << 14,
+ FL_PFIFO_EMPTYB_bit = 1 << 16,
+ FL_LFIFO_EMPTYB_bit = 1 << 17,
+ FL_SFIFO_EMPTYB_bit = 1 << 18,
+ FA_PFIFO_EMPTYB_bit = 1 << 20,
+ FA_LFIFO_EMPTYB_bit = 1 << 21,
+ FA_SFIFO_EMPTYB_bit = 1 << 22,
+ IN_BUSY_bit = 1 << 24,
+ FG_BUSY_bit = 1 << 25,
+ FL_BUSY_bit = 1 << 27,
+ TA_BUSY_bit = 1 << 28,
+ FA_BUSY_bit = 1 << 29,
+ AL_BUSY_bit = 1 << 30,
+/* BUSY_bit = 1 << 31, */
+ TA1_STATUS = 0x00009524,
+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */
+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */
+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */
+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */
+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */
+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */
+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */
+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */
+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */
+/* IN_BUSY_bit = 1 << 24, */
+/* FG_BUSY_bit = 1 << 25, */
+/* FL_BUSY_bit = 1 << 27, */
+/* TA_BUSY_bit = 1 << 28, */
+/* FA_BUSY_bit = 1 << 29, */
+/* AL_BUSY_bit = 1 << 30, */
+/* BUSY_bit = 1 << 31, */
+ TA2_STATUS = 0x00009528,
+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */
+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */
+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */
+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */
+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */
+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */
+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */
+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */
+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */
+/* IN_BUSY_bit = 1 << 24, */
+/* FG_BUSY_bit = 1 << 25, */
+/* FL_BUSY_bit = 1 << 27, */
+/* TA_BUSY_bit = 1 << 28, */
+/* FA_BUSY_bit = 1 << 29, */
+/* AL_BUSY_bit = 1 << 30, */
+/* BUSY_bit = 1 << 31, */
+ TA3_STATUS = 0x0000952c,
+/* FG_PFIFO_EMPTYB_bit = 1 << 12, */
+/* FG_LFIFO_EMPTYB_bit = 1 << 13, */
+/* FG_SFIFO_EMPTYB_bit = 1 << 14, */
+/* FL_PFIFO_EMPTYB_bit = 1 << 16, */
+/* FL_LFIFO_EMPTYB_bit = 1 << 17, */
+/* FL_SFIFO_EMPTYB_bit = 1 << 18, */
+/* FA_PFIFO_EMPTYB_bit = 1 << 20, */
+/* FA_LFIFO_EMPTYB_bit = 1 << 21, */
+/* FA_SFIFO_EMPTYB_bit = 1 << 22, */
+/* IN_BUSY_bit = 1 << 24, */
+/* FG_BUSY_bit = 1 << 25, */
+/* FL_BUSY_bit = 1 << 27, */
+/* TA_BUSY_bit = 1 << 28, */
+/* FA_BUSY_bit = 1 << 29, */
+/* AL_BUSY_bit = 1 << 30, */
+/* BUSY_bit = 1 << 31, */
+ TC_STATUS = 0x00009600,
+ TC_BUSY_bit = 1 << 0,
+ TC_INVALIDATE = 0x00009604,
+ START_bit = 1 << 0,
+ TC_CNTL = 0x00009608,
+ FORCE_HIT_bit = 1 << 0,
+ FORCE_MISS_bit = 1 << 1,
+ L2_SIZE_mask = 0x0f << 5,
+ L2_SIZE_shift = 5,
+ _256K = 0x00,
+ _224K = 0x01,
+ _192K = 0x02,
+ _160K = 0x03,
+ _128K = 0x04,
+ _96K = 0x05,
+ _64K = 0x06,
+ _32K = 0x07,
+ L2_DISABLE_LATE_HIT_bit = 1 << 9,
+ DISABLE_VERT_PERF_bit = 1 << 10,
+ DISABLE_INVAL_BUSY_bit = 1 << 11,
+ DISABLE_INVAL_SAME_SURFACE_bit = 1 << 12,
+ PARTITION_MODE_mask = 0x03 << 13,
+ PARTITION_MODE_shift = 13,
+ X_VERTEX = 0x00,
+ MISS_ARB_MODE_bit = 1 << 15,
+ HIT_ARB_MODE_bit = 1 << 16,
+ DISABLE_WRITE_DELAY_bit = 1 << 17,
+ HIT_FIFO_DEPTH_bit = 1 << 18,
+ VC_CNTL = 0x00009700,
+ L2_INVALIDATE_bit = 1 << 0,
+ RESERVED_bit = 1 << 1,
+ CC_FORCE_MISS_bit = 1 << 2,
+ MI_CHAN_SEL_mask = 0x03 << 3,
+ MI_CHAN_SEL_shift = 3,
+ X_MC0_USES_CH_0_1 = 0x00,
+ X_MC0_USES_CH_0_3 = 0x01,
+ X_VC_MC0_IS_ACTIVE = 0x02,
+ X_VC_MC1_IS_DISABLED = 0x03,
+ MI_STEER_DISABLE_bit = 1 << 5,
+ MI_CREDIT_CTR_mask = 0x0f << 6,
+ MI_CREDIT_CTR_shift = 6,
+ MI_CREDIT_WE_bit = 1 << 10,
+ MI_REQ_STALL_THLD_mask = 0x07 << 11,
+ MI_REQ_STALL_THLD_shift = 11,
+ X_LATENCY_EXCEEDS_399_CLOCKS = 0x00,
+ X_LATENCY_EXCEEDS_415_CLOCKS = 0x01,
+ X_LATENCY_EXCEEDS_431_CLOCKS = 0x02,
+ X_LATENCY_EXCEEDS_447_CLOCKS = 0x03,
+ X_LATENCY_EXCEEDS_463_CLOCKS = 0x04,
+ X_LATENCY_EXCEEDS_479_CLOCKS = 0x05,
+ X_LATENCY_EXCEEDS_495_CLOCKS = 0x06,
+ X_LATENCY_EXCEEDS_511_CLOCKS = 0x07,
+ VC_CNTL__MI_TIMESTAMP_RES_mask = 0x1f << 14,
+ VC_CNTL__MI_TIMESTAMP_RES_shift = 14,
+ X_1X_SYSTEM_CLOCK = 0x00,
+ X_2X_SYSTEM_CLOCK = 0x01,
+ X_4X_SYSTEM_CLOCK = 0x02,
+ X_8X_SYSTEM_CLOCK = 0x03,
+ X_16X_SYSTEM_CLOCK = 0x04,
+ X_32X_SYSTEM_CLOCK = 0x05,
+ X_64X_SYSTEM_CLOCK = 0x06,
+ X_128X_SYSTEM_CLOCK = 0x07,
+ X_256X_SYSTEM_CLOCK = 0x08,
+ X_512X_SYSTEM_CLOCK = 0x09,
+ X_1024X_SYSTEM_CLOCK = 0x0a,
+ X_2048X_SYSTEM_CLOCK = 0x0b,
+ X_4092X_SYSTEM_CLOCK = 0x0c,
+ X_8192X_SYSTEM_CLOCK = 0x0d,
+ X_16384X_SYSTEM_CLOCK = 0x0e,
+ X_32768X_SYSTEM_CLOCK = 0x0f,
+ VC_CNTL_STATUS = 0x00009704,
+ RP_BUSY_bit = 1 << 0,
+ RG_BUSY_bit = 1 << 1,
+ VC_BUSY_bit = 1 << 2,
+ CLAMP_DETECT_bit = 1 << 3,
+ VC_CONFIG = 0x00009718,
+ WRITE_DIS_bit = 1 << 0,
+ GPR_DATA_PHASE_ADJ_mask = 0x07 << 1,
+ GPR_DATA_PHASE_ADJ_shift = 1,
+ X_LATENCY_BASE_0_CYCLES = 0x00,
+ X_LATENCY_BASE_1_CYCLES = 0x01,
+ X_LATENCY_BASE_2_CYCLES = 0x02,
+ X_LATENCY_BASE_3_CYCLES = 0x03,
+ TD_SIMD_SYNC_ADJ_mask = 0x07 << 4,
+ TD_SIMD_SYNC_ADJ_shift = 4,
+ X_0_CYCLES_DELAY = 0x00,
+ X_1_CYCLES_DELAY = 0x01,
+ X_2_CYCLES_DELAY = 0x02,
+ X_3_CYCLES_DELAY = 0x03,
+ X_4_CYCLES_DELAY = 0x04,
+ X_5_CYCLES_DELAY = 0x05,
+ X_6_CYCLES_DELAY = 0x06,
+ X_7_CYCLES_DELAY = 0x07,
+ SMX_DC_CTL0 = 0x0000a020,
+ WR_GATHER_STREAM0_bit = 1 << 0,
+ WR_GATHER_STREAM1_bit = 1 << 1,
+ WR_GATHER_STREAM2_bit = 1 << 2,
+ WR_GATHER_STREAM3_bit = 1 << 3,
+ WR_GATHER_SCRATCH_bit = 1 << 4,
+ WR_GATHER_REDUC_BUF_bit = 1 << 5,
+ WR_GATHER_RING_BUF_bit = 1 << 6,
+ WR_GATHER_F_BUF_bit = 1 << 7,
+ DISABLE_CACHES_bit = 1 << 8,
+ AUTO_FLUSH_INVAL_EN_bit = 1 << 10,
+ AUTO_FLUSH_EN_bit = 1 << 11,
+ AUTO_FLUSH_CNT_mask = 0xffff << 12,
+ AUTO_FLUSH_CNT_shift = 12,
+ MC_RD_STALL_FACTOR_mask = 0x03 << 28,
+ MC_RD_STALL_FACTOR_shift = 28,
+ MC_WR_STALL_FACTOR_mask = 0x03 << 30,
+ MC_WR_STALL_FACTOR_shift = 30,
+ SMX_DC_CTL1 = 0x0000a024,
+ OP_FIFO_SKID_mask = 0x7f << 0,
+ OP_FIFO_SKID_shift = 0,
+ CACHE_LINE_SIZE_bit = 1 << 8,
+ MULTI_FLUSH_MODE_bit = 1 << 9,
+ MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_mask = 0x0f << 10,
+ MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_shift = 10,
+ DISABLE_WR_GATHER_RD_HIT_FORCE_EVICT_bit = 1 << 16,
+ DISABLE_WR_GATHER_RD_HIT_COMP_VLDS_CHECK_bit = 1 << 17,
+ DISABLE_FLUSH_ES_ALSO_INVALS_bit = 1 << 18,
+ DISABLE_FLUSH_GS_ALSO_INVALS_bit = 1 << 19,
+ SMX_DC_CTL2 = 0x0000a028,
+ INVALIDATE_CACHES_bit = 1 << 0,
+ CACHES_INVALID_bit = 1 << 1,
+ CACHES_DIRTY_bit = 1 << 2,
+ FLUSH_ALL_bit = 1 << 4,
+ FLUSH_GS_THREADS_bit = 1 << 8,
+ FLUSH_ES_THREADS_bit = 1 << 9,
+ SMX_DC_MC_INTF_CTL = 0x0000a02c,
+ MC_RD_REQ_CRED_mask = 0xff << 0,
+ MC_RD_REQ_CRED_shift = 0,
+ MC_WR_REQ_CRED_mask = 0xff << 16,
+ MC_WR_REQ_CRED_shift = 16,
+ TD_PS_SAMPLER0_BORDER_RED = 0x0000a400,
+ TD_PS_SAMPLER0_BORDER_RED_num = 18,
+ TD_PS_SAMPLER0_BORDER_RED_offset = 16,
+ TD_PS_SAMPLER0_BORDER_GREEN = 0x0000a404,
+ TD_PS_SAMPLER0_BORDER_GREEN_num = 18,
+ TD_PS_SAMPLER0_BORDER_GREEN_offset = 16,
+ TD_PS_SAMPLER0_BORDER_BLUE = 0x0000a408,
+ TD_PS_SAMPLER0_BORDER_BLUE_num = 18,
+ TD_PS_SAMPLER0_BORDER_BLUE_offset = 16,
+ TD_PS_SAMPLER0_BORDER_ALPHA = 0x0000a40c,
+ TD_PS_SAMPLER0_BORDER_ALPHA_num = 18,
+ TD_PS_SAMPLER0_BORDER_ALPHA_offset = 16,
+ TD_VS_SAMPLER0_BORDER_RED = 0x0000a600,
+ TD_VS_SAMPLER0_BORDER_RED_num = 18,
+ TD_VS_SAMPLER0_BORDER_RED_offset = 16,
+ TD_VS_SAMPLER0_BORDER_GREEN = 0x0000a604,
+ TD_VS_SAMPLER0_BORDER_GREEN_num = 18,
+ TD_VS_SAMPLER0_BORDER_GREEN_offset = 16,
+ TD_VS_SAMPLER0_BORDER_BLUE = 0x0000a608,
+ TD_VS_SAMPLER0_BORDER_BLUE_num = 18,
+ TD_VS_SAMPLER0_BORDER_BLUE_offset = 16,
+ TD_VS_SAMPLER0_BORDER_ALPHA = 0x0000a60c,
+ TD_VS_SAMPLER0_BORDER_ALPHA_num = 18,
+ TD_VS_SAMPLER0_BORDER_ALPHA_offset = 16,
+ TD_GS_SAMPLER0_BORDER_RED = 0x0000a800,
+ TD_GS_SAMPLER0_BORDER_RED_num = 18,
+ TD_GS_SAMPLER0_BORDER_RED_offset = 16,
+ TD_GS_SAMPLER0_BORDER_GREEN = 0x0000a804,
+ TD_GS_SAMPLER0_BORDER_GREEN_num = 18,
+ TD_GS_SAMPLER0_BORDER_GREEN_offset = 16,
+ TD_GS_SAMPLER0_BORDER_BLUE = 0x0000a808,
+ TD_GS_SAMPLER0_BORDER_BLUE_num = 18,
+ TD_GS_SAMPLER0_BORDER_BLUE_offset = 16,
+ TD_GS_SAMPLER0_BORDER_ALPHA = 0x0000a80c,
+ TD_GS_SAMPLER0_BORDER_ALPHA_num = 18,
+ TD_GS_SAMPLER0_BORDER_ALPHA_offset = 16,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL = 0x0000aa00,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL_num = 18,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_mask = 0x07 << 0,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_shift = 0,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_mask = 0x07 << 3,
+ TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_shift = 3,
+ DB_DEPTH_SIZE = 0x00028000,
+ PITCH_TILE_MAX_mask = 0x3ff << 0,
+ PITCH_TILE_MAX_shift = 0,
+ SLICE_TILE_MAX_mask = 0xfffff << 10,
+ SLICE_TILE_MAX_shift = 10,
+ DB_DEPTH_VIEW = 0x00028004,
+ SLICE_START_mask = 0x7ff << 0,
+ SLICE_START_shift = 0,
+ SLICE_MAX_mask = 0x7ff << 13,
+ SLICE_MAX_shift = 13,
+ DB_DEPTH_BASE = 0x0002800c,
+ DB_DEPTH_INFO = 0x00028010,
+ DB_DEPTH_INFO__FORMAT_mask = 0x07 << 0,
+ DB_DEPTH_INFO__FORMAT_shift = 0,
+ DEPTH_INVALID = 0x00,
+ DEPTH_16 = 0x01,
+ DEPTH_X8_24 = 0x02,
+ DEPTH_8_24 = 0x03,
+ DEPTH_X8_24_FLOAT = 0x04,
+ DEPTH_8_24_FLOAT = 0x05,
+ DEPTH_32_FLOAT = 0x06,
+ DEPTH_X24_8_32_FLOAT = 0x07,
+ DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3,
+ DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15,
+ DB_DEPTH_INFO__ARRAY_MODE_shift = 15,
+ ARRAY_2D_TILED_THIN1 = 0x04,
+ TILE_SURFACE_ENABLE_bit = 1 << 25,
+ TILE_COMPACT_bit = 1 << 26,
+ ZRANGE_PRECISION_bit = 1 << 31,
+ DB_HTILE_DATA_BASE = 0x00028014,
+ DB_STENCIL_CLEAR = 0x00028028,
+ DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0,
+ DB_STENCIL_CLEAR__CLEAR_shift = 0,
+ MIN_mask = 0xff << 16,
+ MIN_shift = 16,
+ DB_DEPTH_CLEAR = 0x0002802c,
+ PA_SC_SCREEN_SCISSOR_TL = 0x00028030,
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0x7fff << 0,
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0x7fff << 16,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift = 16,
+ PA_SC_SCREEN_SCISSOR_BR = 0x00028034,
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0x7fff << 0,
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0x7fff << 16,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift = 16,
+ CB_COLOR0_BASE = 0x00028040,
+ CB_COLOR0_BASE_num = 8,
+ CB_COLOR0_SIZE = 0x00028060,
+ CB_COLOR0_SIZE_num = 8,
+/* PITCH_TILE_MAX_mask = 0x3ff << 0, */
+/* PITCH_TILE_MAX_shift = 0, */
+/* SLICE_TILE_MAX_mask = 0xfffff << 10, */
+/* SLICE_TILE_MAX_shift = 10, */
+ CB_COLOR0_VIEW = 0x00028080,
+ CB_COLOR0_VIEW_num = 8,
+/* SLICE_START_mask = 0x7ff << 0, */
+/* SLICE_START_shift = 0, */
+/* SLICE_MAX_mask = 0x7ff << 13, */
+/* SLICE_MAX_shift = 13, */
+ CB_COLOR0_INFO = 0x000280a0,
+ CB_COLOR0_INFO_num = 8,
+ ENDIAN_mask = 0x03 << 0,
+ ENDIAN_shift = 0,
+ ENDIAN_NONE = 0x00,
+ ENDIAN_8IN16 = 0x01,
+ ENDIAN_8IN32 = 0x02,
+ ENDIAN_8IN64 = 0x03,
+ CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2,
+ CB_COLOR0_INFO__FORMAT_shift = 2,
+ COLOR_INVALID = 0x00,
+ COLOR_8 = 0x01,
+ COLOR_4_4 = 0x02,
+ COLOR_3_3_2 = 0x03,
+ COLOR_16 = 0x05,
+ COLOR_16_FLOAT = 0x06,
+ COLOR_8_8 = 0x07,
+ COLOR_5_6_5 = 0x08,
+ COLOR_6_5_5 = 0x09,
+ COLOR_1_5_5_5 = 0x0a,
+ COLOR_4_4_4_4 = 0x0b,
+ COLOR_5_5_5_1 = 0x0c,
+ COLOR_32 = 0x0d,
+ COLOR_32_FLOAT = 0x0e,
+ COLOR_16_16 = 0x0f,
+ COLOR_16_16_FLOAT = 0x10,
+ COLOR_8_24 = 0x11,
+ COLOR_8_24_FLOAT = 0x12,
+ COLOR_24_8 = 0x13,
+ COLOR_24_8_FLOAT = 0x14,
+ COLOR_10_11_11 = 0x15,
+ COLOR_10_11_11_FLOAT = 0x16,
+ COLOR_11_11_10 = 0x17,
+ COLOR_11_11_10_FLOAT = 0x18,
+ COLOR_2_10_10_10 = 0x19,
+ COLOR_8_8_8_8 = 0x1a,
+ COLOR_10_10_10_2 = 0x1b,
+ COLOR_X24_8_32_FLOAT = 0x1c,
+ COLOR_32_32 = 0x1d,
+ COLOR_32_32_FLOAT = 0x1e,
+ COLOR_16_16_16_16 = 0x1f,
+ COLOR_16_16_16_16_FLOAT = 0x20,
+ COLOR_32_32_32_32 = 0x22,
+ COLOR_32_32_32_32_FLOAT = 0x23,
+ CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8,
+ CB_COLOR0_INFO__ARRAY_MODE_shift = 8,
+ ARRAY_LINEAR_GENERAL = 0x00,
+ ARRAY_LINEAR_ALIGNED = 0x01,
+/* ARRAY_2D_TILED_THIN1 = 0x04, */
+ NUMBER_TYPE_mask = 0x07 << 12,
+ NUMBER_TYPE_shift = 12,
+ NUMBER_UNORM = 0x00,
+ NUMBER_SNORM = 0x01,
+ NUMBER_USCALED = 0x02,
+ NUMBER_SSCALED = 0x03,
+ NUMBER_UINT = 0x04,
+ NUMBER_SINT = 0x05,
+ NUMBER_SRGB = 0x06,
+ NUMBER_FLOAT = 0x07,
+ CB_COLOR0_INFO__READ_SIZE_bit = 1 << 15,
+ COMP_SWAP_mask = 0x03 << 16,
+ COMP_SWAP_shift = 16,
+ SWAP_STD = 0x00,
+ SWAP_ALT = 0x01,
+ SWAP_STD_REV = 0x02,
+ SWAP_ALT_REV = 0x03,
+ CB_COLOR0_INFO__TILE_MODE_mask = 0x03 << 18,
+ CB_COLOR0_INFO__TILE_MODE_shift = 18,
+ TILE_DISABLE = 0x00,
+ TILE_CLEAR_ENABLE = 0x01,
+ TILE_FRAG_ENABLE = 0x02,
+ BLEND_CLAMP_bit = 1 << 20,
+ CLEAR_COLOR_bit = 1 << 21,
+ BLEND_BYPASS_bit = 1 << 22,
+ BLEND_FLOAT32_bit = 1 << 23,
+ SIMPLE_FLOAT_bit = 1 << 24,
+ CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 25,
+/* TILE_COMPACT_bit = 1 << 26, */
+ SOURCE_FORMAT_bit = 1 << 27,
+ CB_COLOR0_TILE = 0x000280c0,
+ CB_COLOR0_TILE_num = 8,
+ CB_COLOR0_FRAG = 0x000280e0,
+ CB_COLOR0_FRAG_num = 8,
+ CB_COLOR0_MASK = 0x00028100,
+ CB_COLOR0_MASK_num = 8,
+ CMASK_BLOCK_MAX_mask = 0xfff << 0,
+ CMASK_BLOCK_MAX_shift = 0,
+ FMASK_TILE_MAX_mask = 0xfffff << 12,
+ FMASK_TILE_MAX_shift = 12,
+ CB_CLEAR_RED = 0x00028120,
+ CB_CLEAR_GREEN = 0x00028124,
+ CB_CLEAR_BLUE = 0x00028128,
+ CB_CLEAR_ALPHA = 0x0002812c,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift = 0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0_num = 16,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0,
+ SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift = 0,
+ PA_SC_WINDOW_OFFSET = 0x00028200,
+ WINDOW_X_OFFSET_mask = 0x7fff << 0,
+ WINDOW_X_OFFSET_shift = 0,
+ WINDOW_Y_OFFSET_mask = 0x7fff << 16,
+ WINDOW_Y_OFFSET_shift = 16,
+ PA_SC_WINDOW_SCISSOR_TL = 0x00028204,
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift = 16,
+ WINDOW_OFFSET_DISABLE_bit = 1 << 31,
+ PA_SC_WINDOW_SCISSOR_BR = 0x00028208,
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift = 16,
+ PA_SC_CLIPRECT_RULE = 0x0002820c,
+ CLIP_RULE_mask = 0xffff << 0,
+ CLIP_RULE_shift = 0,
+ PA_SC_CLIPRECT_0_TL = 0x00028210,
+ PA_SC_CLIPRECT_0_TL_num = 4,
+ PA_SC_CLIPRECT_0_TL_offset = 8,
+ PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_CLIPRECT_0_TL__TL_X_shift = 0,
+ PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_CLIPRECT_0_TL__TL_Y_shift = 16,
+ PA_SC_CLIPRECT_0_BR = 0x00028214,
+ PA_SC_CLIPRECT_0_BR_num = 4,
+ PA_SC_CLIPRECT_0_BR_offset = 8,
+ PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_CLIPRECT_0_BR__BR_X_shift = 0,
+ PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_CLIPRECT_0_BR__BR_Y_shift = 16,
+ CB_TARGET_MASK = 0x00028238,
+ TARGET0_ENABLE_mask = 0x0f << 0,
+ TARGET0_ENABLE_shift = 0,
+ TARGET1_ENABLE_mask = 0x0f << 4,
+ TARGET1_ENABLE_shift = 4,
+ TARGET2_ENABLE_mask = 0x0f << 8,
+ TARGET2_ENABLE_shift = 8,
+ TARGET3_ENABLE_mask = 0x0f << 12,
+ TARGET3_ENABLE_shift = 12,
+ TARGET4_ENABLE_mask = 0x0f << 16,
+ TARGET4_ENABLE_shift = 16,
+ TARGET5_ENABLE_mask = 0x0f << 20,
+ TARGET5_ENABLE_shift = 20,
+ TARGET6_ENABLE_mask = 0x0f << 24,
+ TARGET6_ENABLE_shift = 24,
+ TARGET7_ENABLE_mask = 0x0f << 28,
+ TARGET7_ENABLE_shift = 28,
+ CB_SHADER_MASK = 0x0002823c,
+ OUTPUT0_ENABLE_mask = 0x0f << 0,
+ OUTPUT0_ENABLE_shift = 0,
+ OUTPUT1_ENABLE_mask = 0x0f << 4,
+ OUTPUT1_ENABLE_shift = 4,
+ OUTPUT2_ENABLE_mask = 0x0f << 8,
+ OUTPUT2_ENABLE_shift = 8,
+ OUTPUT3_ENABLE_mask = 0x0f << 12,
+ OUTPUT3_ENABLE_shift = 12,
+ OUTPUT4_ENABLE_mask = 0x0f << 16,
+ OUTPUT4_ENABLE_shift = 16,
+ OUTPUT5_ENABLE_mask = 0x0f << 20,
+ OUTPUT5_ENABLE_shift = 20,
+ OUTPUT6_ENABLE_mask = 0x0f << 24,
+ OUTPUT6_ENABLE_shift = 24,
+ OUTPUT7_ENABLE_mask = 0x0f << 28,
+ OUTPUT7_ENABLE_shift = 28,
+ PA_SC_GENERIC_SCISSOR_TL = 0x00028240,
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_shift = 0,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift = 16,
+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */
+ PA_SC_GENERIC_SCISSOR_BR = 0x00028244,
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_shift = 0,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift = 16,
+ PA_SC_VPORT_SCISSOR_0_TL = 0x00028250,
+ PA_SC_VPORT_SCISSOR_0_TL_num = 16,
+ PA_SC_VPORT_SCISSOR_0_TL_offset = 8,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x3fff << 0,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift = 0,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x3fff << 16,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift = 16,
+/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */
+ PA_SC_VPORT_SCISSOR_0_BR = 0x00028254,
+ PA_SC_VPORT_SCISSOR_0_BR_num = 16,
+ PA_SC_VPORT_SCISSOR_0_BR_offset = 8,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x3fff << 0,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift = 0,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x3fff << 16,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift = 16,
+ PA_SC_VPORT_ZMIN_0 = 0x000282d0,
+ PA_SC_VPORT_ZMIN_0_num = 16,
+ PA_SC_VPORT_ZMIN_0_offset = 8,
+ PA_SC_VPORT_ZMAX_0 = 0x000282d4,
+ PA_SC_VPORT_ZMAX_0_num = 16,
+ PA_SC_VPORT_ZMAX_0_offset = 8,
+ SX_MISC = 0x00028350,
+ MULTIPASS_bit = 1 << 0,
+ SQ_VTX_SEMANTIC_0 = 0x00028380,
+ SQ_VTX_SEMANTIC_0_num = 32,
+/* SEMANTIC_ID_mask = 0xff << 0, */
+/* SEMANTIC_ID_shift = 0, */
+ VGT_MAX_VTX_INDX = 0x00028400,
+ VGT_MIN_VTX_INDX = 0x00028404,
+ VGT_INDX_OFFSET = 0x00028408,
+ VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c,
+ SX_ALPHA_TEST_CONTROL = 0x00028410,
+ ALPHA_FUNC_mask = 0x07 << 0,
+ ALPHA_FUNC_shift = 0,
+ REF_NEVER = 0x00,
+ REF_LESS = 0x01,
+ REF_EQUAL = 0x02,
+ REF_LEQUAL = 0x03,
+ REF_GREATER = 0x04,
+ REF_NOTEQUAL = 0x05,
+ REF_GEQUAL = 0x06,
+ REF_ALWAYS = 0x07,
+ ALPHA_TEST_ENABLE_bit = 1 << 3,
+ ALPHA_TEST_BYPASS_bit = 1 << 8,
+ CB_BLEND_RED = 0x00028414,
+ CB_BLEND_GREEN = 0x00028418,
+ CB_BLEND_BLUE = 0x0002841c,
+ CB_BLEND_ALPHA = 0x00028420,
+ CB_FOG_RED = 0x00028424,
+ CB_FOG_GREEN = 0x00028428,
+ CB_FOG_BLUE = 0x0002842c,
+ DB_STENCILREFMASK = 0x00028430,
+ STENCILREF_mask = 0xff << 0,
+ STENCILREF_shift = 0,
+ STENCILMASK_mask = 0xff << 8,
+ STENCILMASK_shift = 8,
+ STENCILWRITEMASK_mask = 0xff << 16,
+ STENCILWRITEMASK_shift = 16,
+ DB_STENCILREFMASK_BF = 0x00028434,
+ STENCILREF_BF_mask = 0xff << 0,
+ STENCILREF_BF_shift = 0,
+ STENCILMASK_BF_mask = 0xff << 8,
+ STENCILMASK_BF_shift = 8,
+ STENCILWRITEMASK_BF_mask = 0xff << 16,
+ STENCILWRITEMASK_BF_shift = 16,
+ SX_ALPHA_REF = 0x00028438,
+ PA_CL_VPORT_XSCALE_0 = 0x0002843c,
+ PA_CL_VPORT_XSCALE_0_num = 16,
+ PA_CL_VPORT_XSCALE_0_offset = 24,
+ PA_CL_VPORT_XOFFSET_0 = 0x00028440,
+ PA_CL_VPORT_XOFFSET_0_num = 16,
+ PA_CL_VPORT_XOFFSET_0_offset = 24,
+ PA_CL_VPORT_YSCALE_0 = 0x00028444,
+ PA_CL_VPORT_YSCALE_0_num = 16,
+ PA_CL_VPORT_YSCALE_0_offset = 24,
+ PA_CL_VPORT_YOFFSET_0 = 0x00028448,
+ PA_CL_VPORT_YOFFSET_0_num = 16,
+ PA_CL_VPORT_YOFFSET_0_offset = 24,
+ PA_CL_VPORT_ZSCALE_0 = 0x0002844c,
+ PA_CL_VPORT_ZSCALE_0_num = 16,
+ PA_CL_VPORT_ZSCALE_0_offset = 24,
+ PA_CL_VPORT_ZOFFSET_0 = 0x00028450,
+ PA_CL_VPORT_ZOFFSET_0_num = 16,
+ PA_CL_VPORT_ZOFFSET_0_offset = 24,
+ SPI_VS_OUT_ID_0 = 0x00028614,
+ SPI_VS_OUT_ID_0_num = 10,
+ SEMANTIC_0_mask = 0xff << 0,
+ SEMANTIC_0_shift = 0,
+ SEMANTIC_1_mask = 0xff << 8,
+ SEMANTIC_1_shift = 8,
+ SEMANTIC_2_mask = 0xff << 16,
+ SEMANTIC_2_shift = 16,
+ SEMANTIC_3_mask = 0xff << 24,
+ SEMANTIC_3_shift = 24,
+ SPI_PS_INPUT_CNTL_0 = 0x00028644,
+ SPI_PS_INPUT_CNTL_0_num = 32,
+ SEMANTIC_mask = 0xff << 0,
+ SEMANTIC_shift = 0,
+ DEFAULT_VAL_mask = 0x03 << 8,
+ DEFAULT_VAL_shift = 8,
+ X_0_0F = 0x00,
+ FLAT_SHADE_bit = 1 << 10,
+ SEL_CENTROID_bit = 1 << 11,
+ SEL_LINEAR_bit = 1 << 12,
+ CYL_WRAP_mask = 0x0f << 13,
+ CYL_WRAP_shift = 13,
+ PT_SPRITE_TEX_bit = 1 << 17,
+ SEL_SAMPLE_bit = 1 << 18,
+ SPI_VS_OUT_CONFIG = 0x000286c4,
+ VS_PER_COMPONENT_bit = 1 << 0,
+ VS_EXPORT_COUNT_mask = 0x1f << 1,
+ VS_EXPORT_COUNT_shift = 1,
+ VS_EXPORTS_FOG_bit = 1 << 8,
+ VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9,
+ VS_OUT_FOG_VEC_ADDR_shift = 9,
+ SPI_PS_IN_CONTROL_0 = 0x000286cc,
+ NUM_INTERP_mask = 0x3f << 0,
+ NUM_INTERP_shift = 0,
+ POSITION_ENA_bit = 1 << 8,
+ POSITION_CENTROID_bit = 1 << 9,
+ POSITION_ADDR_mask = 0x1f << 10,
+ POSITION_ADDR_shift = 10,
+ PARAM_GEN_mask = 0x0f << 15,
+ PARAM_GEN_shift = 15,
+ PARAM_GEN_ADDR_mask = 0x7f << 19,
+ PARAM_GEN_ADDR_shift = 19,
+ BARYC_SAMPLE_CNTL_mask = 0x03 << 26,
+ BARYC_SAMPLE_CNTL_shift = 26,
+ CENTROIDS_ONLY = 0x00,
+ CENTERS_ONLY = 0x01,
+ CENTROIDS_AND_CENTERS = 0x02,
+ UNDEF = 0x03,
+ PERSP_GRADIENT_ENA_bit = 1 << 28,
+ LINEAR_GRADIENT_ENA_bit = 1 << 29,
+ POSITION_SAMPLE_bit = 1 << 30,
+ BARYC_AT_SAMPLE_ENA_bit = 1 << 31,
+ SPI_PS_IN_CONTROL_1 = 0x000286d0,
+ GEN_INDEX_PIX_bit = 1 << 0,
+ GEN_INDEX_PIX_ADDR_mask = 0x7f << 1,
+ GEN_INDEX_PIX_ADDR_shift = 1,
+ FRONT_FACE_ENA_bit = 1 << 8,
+ FRONT_FACE_CHAN_mask = 0x03 << 9,
+ FRONT_FACE_CHAN_shift = 9,
+ FRONT_FACE_ALL_BITS_bit = 1 << 11,
+ FRONT_FACE_ADDR_mask = 0x1f << 12,
+ FRONT_FACE_ADDR_shift = 12,
+ FOG_ADDR_mask = 0x7f << 17,
+ FOG_ADDR_shift = 17,
+ FIXED_PT_POSITION_ENA_bit = 1 << 24,
+ FIXED_PT_POSITION_ADDR_mask = 0x1f << 25,
+ FIXED_PT_POSITION_ADDR_shift = 25,
+ SPI_INTERP_CONTROL_0 = 0x000286d4,
+ FLAT_SHADE_ENA_bit = 1 << 0,
+ PNT_SPRITE_ENA_bit = 1 << 1,
+ PNT_SPRITE_OVRD_X_mask = 0x07 << 2,
+ PNT_SPRITE_OVRD_X_shift = 2,
+ SPI_PNT_SPRITE_SEL_0 = 0x00,
+ SPI_PNT_SPRITE_SEL_1 = 0x01,
+ SPI_PNT_SPRITE_SEL_S = 0x02,
+ SPI_PNT_SPRITE_SEL_T = 0x03,
+ SPI_PNT_SPRITE_SEL_NONE = 0x04,
+ PNT_SPRITE_OVRD_Y_mask = 0x07 << 5,
+ PNT_SPRITE_OVRD_Y_shift = 5,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_OVRD_Z_mask = 0x07 << 8,
+ PNT_SPRITE_OVRD_Z_shift = 8,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_OVRD_W_mask = 0x07 << 11,
+ PNT_SPRITE_OVRD_W_shift = 11,
+/* SPI_PNT_SPRITE_SEL_0 = 0x00, */
+/* SPI_PNT_SPRITE_SEL_1 = 0x01, */
+/* SPI_PNT_SPRITE_SEL_S = 0x02, */
+/* SPI_PNT_SPRITE_SEL_T = 0x03, */
+/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */
+ PNT_SPRITE_TOP_1_bit = 1 << 14,
+ SPI_INPUT_Z = 0x000286d8,
+ PROVIDE_Z_TO_SPI_bit = 1 << 0,
+ SPI_FOG_CNTL = 0x000286dc,
+ PASS_FOG_THROUGH_PS_bit = 1 << 0,
+ PIXEL_FOG_FUNC_mask = 0x03 << 1,
+ PIXEL_FOG_FUNC_shift = 1,
+ SPI_FOG_NONE = 0x00,
+ SPI_FOG_EXP = 0x01,
+ SPI_FOG_EXP2 = 0x02,
+ SPI_FOG_LINEAR = 0x03,
+ PIXEL_FOG_SRC_SEL_bit = 1 << 3,
+ VS_FOG_CLAMP_DISABLE_bit = 1 << 4,
+ SPI_FOG_FUNC_SCALE = 0x000286e0,
+ SPI_FOG_FUNC_BIAS = 0x000286e4,
+ CB_BLEND0_CONTROL = 0x00028780,
+ CB_BLEND0_CONTROL_num = 8,
+ COLOR_SRCBLEND_mask = 0x1f << 0,
+ COLOR_SRCBLEND_shift = 0,
+ COLOR_COMB_FCN_mask = 0x07 << 5,
+ COLOR_COMB_FCN_shift = 5,
+ COLOR_DESTBLEND_mask = 0x1f << 8,
+ COLOR_DESTBLEND_shift = 8,
+ OPACITY_WEIGHT_bit = 1 << 13,
+ ALPHA_SRCBLEND_mask = 0x1f << 16,
+ ALPHA_SRCBLEND_shift = 16,
+ ALPHA_COMB_FCN_mask = 0x07 << 21,
+ ALPHA_COMB_FCN_shift = 21,
+ ALPHA_DESTBLEND_mask = 0x1f << 24,
+ ALPHA_DESTBLEND_shift = 24,
+ SEPARATE_ALPHA_BLEND_bit = 1 << 29,
+ VGT_DMA_BASE_HI = 0x000287e4,
+ VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0,
+ VGT_DMA_BASE_HI__BASE_ADDR_shift = 0,
+ VGT_DMA_BASE = 0x000287e8,
+ VGT_DRAW_INITIATOR = 0x000287f0,
+ SOURCE_SELECT_mask = 0x03 << 0,
+ SOURCE_SELECT_shift = 0,
+ DI_SRC_SEL_DMA = 0x00,
+ DI_SRC_SEL_IMMEDIATE = 0x01,
+ DI_SRC_SEL_AUTO_INDEX = 0x02,
+ DI_SRC_SEL_RESERVED = 0x03,
+ MAJOR_MODE_mask = 0x03 << 2,
+ MAJOR_MODE_shift = 2,
+ DI_MAJOR_MODE_0 = 0x00,
+ DI_MAJOR_MODE_1 = 0x01,
+ SPRITE_EN_bit = 1 << 4,
+ NOT_EOP_bit = 1 << 5,
+ USE_OPAQUE_bit = 1 << 6,
+ VGT_IMMED_DATA = 0x000287f4,
+ VGT_EVENT_ADDRESS_REG = 0x000287f8,
+ ADDRESS_LOW_mask = 0xfffffff << 0,
+ ADDRESS_LOW_shift = 0,
+ DB_DEPTH_CONTROL = 0x00028800,
+ STENCIL_ENABLE_bit = 1 << 0,
+ Z_ENABLE_bit = 1 << 1,
+ Z_WRITE_ENABLE_bit = 1 << 2,
+ ZFUNC_mask = 0x07 << 4,
+ ZFUNC_shift = 4,
+ FRAG_NEVER = 0x00,
+ FRAG_LESS = 0x01,
+ FRAG_EQUAL = 0x02,
+ FRAG_LEQUAL = 0x03,
+ FRAG_GREATER = 0x04,
+ FRAG_NOTEQUAL = 0x05,
+ FRAG_GEQUAL = 0x06,
+ FRAG_ALWAYS = 0x07,
+ BACKFACE_ENABLE_bit = 1 << 7,
+ STENCILFUNC_mask = 0x07 << 8,
+ STENCILFUNC_shift = 8,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ STENCILFAIL_mask = 0x07 << 11,
+ STENCILFAIL_shift = 11,
+ STENCIL_KEEP = 0x00,
+ STENCIL_ZERO = 0x01,
+ STENCIL_REPLACE = 0x02,
+ STENCIL_INCR_CLAMP = 0x03,
+ STENCIL_DECR_CLAMP = 0x04,
+ STENCIL_INVERT = 0x05,
+ STENCIL_INCR_WRAP = 0x06,
+ STENCIL_DECR_WRAP = 0x07,
+ STENCILZPASS_mask = 0x07 << 14,
+ STENCILZPASS_shift = 14,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZFAIL_mask = 0x07 << 17,
+ STENCILZFAIL_shift = 17,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILFUNC_BF_mask = 0x07 << 20,
+ STENCILFUNC_BF_shift = 20,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ STENCILFAIL_BF_mask = 0x07 << 23,
+ STENCILFAIL_BF_shift = 23,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZPASS_BF_mask = 0x07 << 26,
+ STENCILZPASS_BF_shift = 26,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ STENCILZFAIL_BF_mask = 0x07 << 29,
+ STENCILZFAIL_BF_shift = 29,
+/* STENCIL_KEEP = 0x00, */
+/* STENCIL_ZERO = 0x01, */
+/* STENCIL_REPLACE = 0x02, */
+/* STENCIL_INCR_CLAMP = 0x03, */
+/* STENCIL_DECR_CLAMP = 0x04, */
+/* STENCIL_INVERT = 0x05, */
+/* STENCIL_INCR_WRAP = 0x06, */
+/* STENCIL_DECR_WRAP = 0x07, */
+ CB_BLEND_CONTROL = 0x00028804,
+/* COLOR_SRCBLEND_mask = 0x1f << 0, */
+/* COLOR_SRCBLEND_shift = 0, */
+ BLEND_ZERO = 0x00,
+ BLEND_ONE = 0x01,
+ BLEND_SRC_COLOR = 0x02,
+ BLEND_ONE_MINUS_SRC_COLOR = 0x03,
+ BLEND_SRC_ALPHA = 0x04,
+ BLEND_ONE_MINUS_SRC_ALPHA = 0x05,
+ BLEND_DST_ALPHA = 0x06,
+ BLEND_ONE_MINUS_DST_ALPHA = 0x07,
+ BLEND_DST_COLOR = 0x08,
+ BLEND_ONE_MINUS_DST_COLOR = 0x09,
+ BLEND_SRC_ALPHA_SATURATE = 0x0a,
+ BLEND_BOTH_SRC_ALPHA = 0x0b,
+ BLEND_BOTH_INV_SRC_ALPHA = 0x0c,
+ BLEND_CONSTANT_COLOR = 0x0d,
+ BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e,
+ BLEND_SRC1_COLOR = 0x0f,
+ BLEND_INV_SRC1_COLOR = 0x10,
+ BLEND_SRC1_ALPHA = 0x11,
+ BLEND_INV_SRC1_ALPHA = 0x12,
+ BLEND_CONSTANT_ALPHA = 0x13,
+ BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14,
+/* COLOR_COMB_FCN_mask = 0x07 << 5, */
+/* COLOR_COMB_FCN_shift = 5, */
+ COMB_DST_PLUS_SRC = 0x00,
+ COMB_SRC_MINUS_DST = 0x01,
+ COMB_MIN_DST_SRC = 0x02,
+ COMB_MAX_DST_SRC = 0x03,
+ COMB_DST_MINUS_SRC = 0x04,
+/* COLOR_DESTBLEND_mask = 0x1f << 8, */
+/* COLOR_DESTBLEND_shift = 8, */
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+/* OPACITY_WEIGHT_bit = 1 << 13, */
+/* ALPHA_SRCBLEND_mask = 0x1f << 16, */
+/* ALPHA_SRCBLEND_shift = 16, */
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+/* ALPHA_COMB_FCN_mask = 0x07 << 21, */
+/* ALPHA_COMB_FCN_shift = 21, */
+/* COMB_DST_PLUS_SRC = 0x00, */
+/* COMB_SRC_MINUS_DST = 0x01, */
+/* COMB_MIN_DST_SRC = 0x02, */
+/* COMB_MAX_DST_SRC = 0x03, */
+/* COMB_DST_MINUS_SRC = 0x04, */
+/* ALPHA_DESTBLEND_mask = 0x1f << 24, */
+/* ALPHA_DESTBLEND_shift = 24, */
+/* BLEND_ZERO = 0x00, */
+/* BLEND_ONE = 0x01, */
+/* BLEND_SRC_COLOR = 0x02, */
+/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */
+/* BLEND_SRC_ALPHA = 0x04, */
+/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */
+/* BLEND_DST_ALPHA = 0x06, */
+/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */
+/* BLEND_DST_COLOR = 0x08, */
+/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */
+/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */
+/* BLEND_BOTH_SRC_ALPHA = 0x0b, */
+/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */
+/* BLEND_CONSTANT_COLOR = 0x0d, */
+/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */
+/* BLEND_SRC1_COLOR = 0x0f, */
+/* BLEND_INV_SRC1_COLOR = 0x10, */
+/* BLEND_SRC1_ALPHA = 0x11, */
+/* BLEND_INV_SRC1_ALPHA = 0x12, */
+/* BLEND_CONSTANT_ALPHA = 0x13, */
+/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */
+/* SEPARATE_ALPHA_BLEND_bit = 1 << 29, */
+ CB_COLOR_CONTROL = 0x00028808,
+ FOG_ENABLE_bit = 1 << 0,
+ MULTIWRITE_ENABLE_bit = 1 << 1,
+ DITHER_ENABLE_bit = 1 << 2,
+ DEGAMMA_ENABLE_bit = 1 << 3,
+ SPECIAL_OP_mask = 0x07 << 4,
+ SPECIAL_OP_shift = 4,
+ SPECIAL_NORMAL = 0x00,
+ SPECIAL_DISABLE = 0x01,
+ SPECIAL_FAST_CLEAR = 0x02,
+ SPECIAL_FORCE_CLEAR = 0x03,
+ SPECIAL_EXPAND_COLOR = 0x04,
+ SPECIAL_EXPAND_TEXTURE = 0x05,
+ SPECIAL_EXPAND_SAMPLES = 0x06,
+ SPECIAL_RESOLVE_BOX = 0x07,
+ PER_MRT_BLEND_bit = 1 << 7,
+ TARGET_BLEND_ENABLE_mask = 0xff << 8,
+ TARGET_BLEND_ENABLE_shift = 8,
+ ROP3_mask = 0xff << 16,
+ ROP3_shift = 16,
+ DB_SHADER_CONTROL = 0x0002880c,
+ Z_EXPORT_ENABLE_bit = 1 << 0,
+ STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1,
+ Z_ORDER_mask = 0x03 << 4,
+ Z_ORDER_shift = 4,
+ LATE_Z = 0x00,
+ EARLY_Z_THEN_LATE_Z = 0x01,
+ RE_Z = 0x02,
+ EARLY_Z_THEN_RE_Z = 0x03,
+ KILL_ENABLE_bit = 1 << 6,
+ COVERAGE_TO_MASK_ENABLE_bit = 1 << 7,
+ MASK_EXPORT_ENABLE_bit = 1 << 8,
+ DUAL_EXPORT_ENABLE_bit = 1 << 9,
+ EXEC_ON_HIER_FAIL_bit = 1 << 10,
+ EXEC_ON_NOOP_bit = 1 << 11,
+ PA_CL_CLIP_CNTL = 0x00028810,
+ UCP_ENA_0_bit = 1 << 0,
+ UCP_ENA_1_bit = 1 << 1,
+ UCP_ENA_2_bit = 1 << 2,
+ UCP_ENA_3_bit = 1 << 3,
+ UCP_ENA_4_bit = 1 << 4,
+ UCP_ENA_5_bit = 1 << 5,
+ PS_UCP_Y_SCALE_NEG_bit = 1 << 13,
+ PS_UCP_MODE_mask = 0x03 << 14,
+ PS_UCP_MODE_shift = 14,
+ CLIP_DISABLE_bit = 1 << 16,
+ UCP_CULL_ONLY_ENA_bit = 1 << 17,
+ BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18,
+ DX_CLIP_SPACE_DEF_bit = 1 << 19,
+ DIS_CLIP_ERR_DETECT_bit = 1 << 20,
+ VTX_KILL_OR_bit = 1 << 21,
+ DX_LINEAR_ATTR_CLIP_ENA_bit = 1 << 24,
+ VTE_VPORT_PROVOKE_DISABLE_bit = 1 << 25,
+ ZCLIP_NEAR_DISABLE_bit = 1 << 26,
+ ZCLIP_FAR_DISABLE_bit = 1 << 27,
+ PA_SU_SC_MODE_CNTL = 0x00028814,
+ CULL_FRONT_bit = 1 << 0,
+ CULL_BACK_bit = 1 << 1,
+ FACE_bit = 1 << 2,
+ POLY_MODE_mask = 0x03 << 3,
+ POLY_MODE_shift = 3,
+ X_DISABLE_POLY_MODE = 0x00,
+ X_DUAL_MODE = 0x01,
+ POLYMODE_FRONT_PTYPE_mask = 0x07 << 5,
+ POLYMODE_FRONT_PTYPE_shift = 5,
+ X_DRAW_POINTS = 0x00,
+ X_DRAW_LINES = 0x01,
+ X_DRAW_TRIANGLES = 0x02,
+ POLYMODE_BACK_PTYPE_mask = 0x07 << 8,
+ POLYMODE_BACK_PTYPE_shift = 8,
+/* X_DRAW_POINTS = 0x00, */
+/* X_DRAW_LINES = 0x01, */
+/* X_DRAW_TRIANGLES = 0x02, */
+ POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11,
+ POLY_OFFSET_BACK_ENABLE_bit = 1 << 12,
+ POLY_OFFSET_PARA_ENABLE_bit = 1 << 13,
+ VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16,
+ PROVOKING_VTX_LAST_bit = 1 << 19,
+ PERSP_CORR_DIS_bit = 1 << 20,
+ MULTI_PRIM_IB_ENA_bit = 1 << 21,
+ PA_CL_VTE_CNTL = 0x00028818,
+ VPORT_X_SCALE_ENA_bit = 1 << 0,
+ VPORT_X_OFFSET_ENA_bit = 1 << 1,
+ VPORT_Y_SCALE_ENA_bit = 1 << 2,
+ VPORT_Y_OFFSET_ENA_bit = 1 << 3,
+ VPORT_Z_SCALE_ENA_bit = 1 << 4,
+ VPORT_Z_OFFSET_ENA_bit = 1 << 5,
+ VTX_XY_FMT_bit = 1 << 8,
+ VTX_Z_FMT_bit = 1 << 9,
+ VTX_W0_FMT_bit = 1 << 10,
+ PERFCOUNTER_REF_bit = 1 << 11,
+ PA_CL_VS_OUT_CNTL = 0x0002881c,
+ CLIP_DIST_ENA_0_bit = 1 << 0,
+ CLIP_DIST_ENA_1_bit = 1 << 1,
+ CLIP_DIST_ENA_2_bit = 1 << 2,
+ CLIP_DIST_ENA_3_bit = 1 << 3,
+ CLIP_DIST_ENA_4_bit = 1 << 4,
+ CLIP_DIST_ENA_5_bit = 1 << 5,
+ CLIP_DIST_ENA_6_bit = 1 << 6,
+ CLIP_DIST_ENA_7_bit = 1 << 7,
+ CULL_DIST_ENA_0_bit = 1 << 8,
+ CULL_DIST_ENA_1_bit = 1 << 9,
+ CULL_DIST_ENA_2_bit = 1 << 10,
+ CULL_DIST_ENA_3_bit = 1 << 11,
+ CULL_DIST_ENA_4_bit = 1 << 12,
+ CULL_DIST_ENA_5_bit = 1 << 13,
+ CULL_DIST_ENA_6_bit = 1 << 14,
+ CULL_DIST_ENA_7_bit = 1 << 15,
+ USE_VTX_POINT_SIZE_bit = 1 << 16,
+ USE_VTX_EDGE_FLAG_bit = 1 << 17,
+ USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18,
+ USE_VTX_VIEWPORT_INDX_bit = 1 << 19,
+ USE_VTX_KILL_FLAG_bit = 1 << 20,
+ VS_OUT_MISC_VEC_ENA_bit = 1 << 21,
+ VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22,
+ VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23,
+ PA_CL_NANINF_CNTL = 0x00028820,
+ VTE_XY_INF_DISCARD_bit = 1 << 0,
+ VTE_Z_INF_DISCARD_bit = 1 << 1,
+ VTE_W_INF_DISCARD_bit = 1 << 2,
+ VTE_0XNANINF_IS_0_bit = 1 << 3,
+ VTE_XY_NAN_RETAIN_bit = 1 << 4,
+ VTE_Z_NAN_RETAIN_bit = 1 << 5,
+ VTE_W_NAN_RETAIN_bit = 1 << 6,
+ VTE_W_RECIP_NAN_IS_0_bit = 1 << 7,
+ VS_XY_NAN_TO_INF_bit = 1 << 8,
+ VS_XY_INF_RETAIN_bit = 1 << 9,
+ VS_Z_NAN_TO_INF_bit = 1 << 10,
+ VS_Z_INF_RETAIN_bit = 1 << 11,
+ VS_W_NAN_TO_INF_bit = 1 << 12,
+ VS_W_INF_RETAIN_bit = 1 << 13,
+ VS_CLIP_DIST_INF_DISCARD_bit = 1 << 14,
+ VTE_NO_OUTPUT_NEG_0_bit = 1 << 20,
+ SQ_PGM_START_PS = 0x00028840,
+ SQ_PGM_RESOURCES_PS = 0x00028850,
+ NUM_GPRS_mask = 0xff << 0,
+ NUM_GPRS_shift = 0,
+ STACK_SIZE_mask = 0xff << 8,
+ STACK_SIZE_shift = 8,
+ SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit = 1 << 21,
+ FETCH_CACHE_LINES_mask = 0x07 << 24,
+ FETCH_CACHE_LINES_shift = 24,
+ UNCACHED_FIRST_INST_bit = 1 << 28,
+ CLAMP_CONSTS_bit = 1 << 31,
+ SQ_PGM_EXPORTS_PS = 0x00028854,
+ EXPORT_MODE_mask = 0x1f << 0,
+ EXPORT_MODE_shift = 0,
+ SQ_PGM_START_VS = 0x00028858,
+ SQ_PGM_RESOURCES_VS = 0x00028868,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+ SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit = 1 << 21,
+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */
+/* FETCH_CACHE_LINES_shift = 24, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_START_GS = 0x0002886c,
+ SQ_PGM_RESOURCES_GS = 0x0002887c,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+ SQ_PGM_RESOURCES_GS__DX10_CLAMP_bit = 1 << 21,
+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */
+/* FETCH_CACHE_LINES_shift = 24, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_START_ES = 0x00028880,
+ SQ_PGM_RESOURCES_ES = 0x00028890,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+ SQ_PGM_RESOURCES_ES__DX10_CLAMP_bit = 1 << 21,
+/* FETCH_CACHE_LINES_mask = 0x07 << 24, */
+/* FETCH_CACHE_LINES_shift = 24, */
+/* UNCACHED_FIRST_INST_bit = 1 << 28, */
+ SQ_PGM_START_FS = 0x00028894,
+ SQ_PGM_RESOURCES_FS = 0x000288a4,
+/* NUM_GPRS_mask = 0xff << 0, */
+/* NUM_GPRS_shift = 0, */
+/* STACK_SIZE_mask = 0xff << 8, */
+/* STACK_SIZE_shift = 8, */
+ SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit = 1 << 21,
+ SQ_ESGS_RING_ITEMSIZE = 0x000288a8,
+ ITEMSIZE_mask = 0x7fff << 0,
+ ITEMSIZE_shift = 0,
+ SQ_GSVS_RING_ITEMSIZE = 0x000288ac,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_ESTMP_RING_ITEMSIZE = 0x000288b0,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GSTMP_RING_ITEMSIZE = 0x000288b4,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_VSTMP_RING_ITEMSIZE = 0x000288b8,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_PSTMP_RING_ITEMSIZE = 0x000288bc,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_FBUF_RING_ITEMSIZE = 0x000288c0,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_REDUC_RING_ITEMSIZE = 0x000288c4,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_GS_VERT_ITEMSIZE = 0x000288c8,
+/* ITEMSIZE_mask = 0x7fff << 0, */
+/* ITEMSIZE_shift = 0, */
+ SQ_PGM_CF_OFFSET_PS = 0x000288cc,
+ PGM_CF_OFFSET_mask = 0xfffff << 0,
+ PGM_CF_OFFSET_shift = 0,
+ SQ_PGM_CF_OFFSET_VS = 0x000288d0,
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+/* PGM_CF_OFFSET_shift = 0, */
+ SQ_PGM_CF_OFFSET_GS = 0x000288d4,
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+/* PGM_CF_OFFSET_shift = 0, */
+ SQ_PGM_CF_OFFSET_ES = 0x000288d8,
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+/* PGM_CF_OFFSET_shift = 0, */
+ SQ_PGM_CF_OFFSET_FS = 0x000288dc,
+/* PGM_CF_OFFSET_mask = 0xfffff << 0, */
+/* PGM_CF_OFFSET_shift = 0, */
+ SQ_VTX_SEMANTIC_CLEAR = 0x000288e0,
+ SQ_ALU_CONST_CACHE_PS_0 = 0x00028940,
+ SQ_ALU_CONST_CACHE_PS_0_num = 16,
+ SQ_ALU_CONST_CACHE_VS_0 = 0x00028980,
+ SQ_ALU_CONST_CACHE_VS_0_num = 16,
+ SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0,
+ SQ_ALU_CONST_CACHE_GS_0_num = 16,
+ PA_SU_POINT_SIZE = 0x00028a00,
+ PA_SU_POINT_SIZE__HEIGHT_mask = 0xffff << 0,
+ PA_SU_POINT_SIZE__HEIGHT_shift = 0,
+ PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16,
+ PA_SU_POINT_SIZE__WIDTH_shift = 16,
+ PA_SU_POINT_MINMAX = 0x00028a04,
+ MIN_SIZE_mask = 0xffff << 0,
+ MIN_SIZE_shift = 0,
+ MAX_SIZE_mask = 0xffff << 16,
+ MAX_SIZE_shift = 16,
+ PA_SU_LINE_CNTL = 0x00028a08,
+ PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0,
+ PA_SU_LINE_CNTL__WIDTH_shift = 0,
+ PA_SC_LINE_STIPPLE = 0x00028a0c,
+ LINE_PATTERN_mask = 0xffff << 0,
+ LINE_PATTERN_shift = 0,
+ REPEAT_COUNT_mask = 0xff << 16,
+ REPEAT_COUNT_shift = 16,
+ PATTERN_BIT_ORDER_bit = 1 << 28,
+ AUTO_RESET_CNTL_mask = 0x03 << 29,
+ AUTO_RESET_CNTL_shift = 29,
+ VGT_OUTPUT_PATH_CNTL = 0x00028a10,
+ PATH_SELECT_mask = 0x03 << 0,
+ PATH_SELECT_shift = 0,
+ VGT_OUTPATH_VTX_REUSE = 0x00,
+ VGT_OUTPATH_TESS_EN = 0x01,
+ VGT_OUTPATH_PASSTHRU = 0x02,
+ VGT_OUTPATH_GS_BLOCK = 0x03,
+ VGT_HOS_CNTL = 0x00028a14,
+ TESS_MODE_mask = 0x03 << 0,
+ TESS_MODE_shift = 0,
+ VGT_HOS_MAX_TESS_LEVEL = 0x00028a18,
+ VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c,
+ VGT_HOS_REUSE_DEPTH = 0x00028a20,
+ REUSE_DEPTH_mask = 0xff << 0,
+ REUSE_DEPTH_shift = 0,
+ VGT_GROUP_PRIM_TYPE = 0x00028a24,
+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0,
+ VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift = 0,
+ VGT_GRP_3D_POINT = 0x00,
+ VGT_GRP_3D_LINE = 0x01,
+ VGT_GRP_3D_TRI = 0x02,
+ VGT_GRP_3D_RECT = 0x03,
+ VGT_GRP_3D_QUAD = 0x04,
+ VGT_GRP_2D_COPY_RECT_V0 = 0x05,
+ VGT_GRP_2D_COPY_RECT_V1 = 0x06,
+ VGT_GRP_2D_COPY_RECT_V2 = 0x07,
+ VGT_GRP_2D_COPY_RECT_V3 = 0x08,
+ VGT_GRP_2D_FILL_RECT = 0x09,
+ VGT_GRP_2D_LINE = 0x0a,
+ VGT_GRP_2D_TRI = 0x0b,
+ VGT_GRP_PRIM_INDEX_LINE = 0x0c,
+ VGT_GRP_PRIM_INDEX_TRI = 0x0d,
+ VGT_GRP_PRIM_INDEX_QUAD = 0x0e,
+ VGT_GRP_3D_LINE_ADJ = 0x0f,
+ VGT_GRP_3D_TRI_ADJ = 0x10,
+ RETAIN_ORDER_bit = 1 << 14,
+ RETAIN_QUADS_bit = 1 << 15,
+ PRIM_ORDER_mask = 0x07 << 16,
+ PRIM_ORDER_shift = 16,
+ VGT_GRP_LIST = 0x00,
+ VGT_GRP_STRIP = 0x01,
+ VGT_GRP_FAN = 0x02,
+ VGT_GRP_LOOP = 0x03,
+ VGT_GRP_POLYGON = 0x04,
+ VGT_GROUP_FIRST_DECR = 0x00028a28,
+ FIRST_DECR_mask = 0x0f << 0,
+ FIRST_DECR_shift = 0,
+ VGT_GROUP_DECR = 0x00028a2c,
+ DECR_mask = 0x0f << 0,
+ DECR_shift = 0,
+ VGT_GROUP_VECT_0_CNTL = 0x00028a30,
+ COMP_X_EN_bit = 1 << 0,
+ COMP_Y_EN_bit = 1 << 1,
+ COMP_Z_EN_bit = 1 << 2,
+ COMP_W_EN_bit = 1 << 3,
+ VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8,
+ VGT_GROUP_VECT_0_CNTL__STRIDE_shift = 8,
+ SHIFT_mask = 0xff << 16,
+ SHIFT_shift = 16,
+ VGT_GROUP_VECT_1_CNTL = 0x00028a34,
+/* COMP_X_EN_bit = 1 << 0, */
+/* COMP_Y_EN_bit = 1 << 1, */
+/* COMP_Z_EN_bit = 1 << 2, */
+/* COMP_W_EN_bit = 1 << 3, */
+ VGT_GROUP_VECT_1_CNTL__STRIDE_mask = 0xff << 8,
+ VGT_GROUP_VECT_1_CNTL__STRIDE_shift = 8,
+/* SHIFT_mask = 0xff << 16, */
+/* SHIFT_shift = 16, */
+ VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38,
+ X_CONV_mask = 0x0f << 0,
+ X_CONV_shift = 0,
+ VGT_GRP_INDEX_16 = 0x00,
+ VGT_GRP_INDEX_32 = 0x01,
+ VGT_GRP_UINT_16 = 0x02,
+ VGT_GRP_UINT_32 = 0x03,
+ VGT_GRP_SINT_16 = 0x04,
+ VGT_GRP_SINT_32 = 0x05,
+ VGT_GRP_FLOAT_32 = 0x06,
+ VGT_GRP_AUTO_PRIM = 0x07,
+ VGT_GRP_FIX_1_23_TO_FLOAT = 0x08,
+ X_OFFSET_mask = 0x0f << 4,
+ X_OFFSET_shift = 4,
+ Y_CONV_mask = 0x0f << 8,
+ Y_CONV_shift = 8,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ Y_OFFSET_mask = 0x0f << 12,
+ Y_OFFSET_shift = 12,
+ Z_CONV_mask = 0x0f << 16,
+ Z_CONV_shift = 16,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ Z_OFFSET_mask = 0x0f << 20,
+ Z_OFFSET_shift = 20,
+ W_CONV_mask = 0x0f << 24,
+ W_CONV_shift = 24,
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+ W_OFFSET_mask = 0x0f << 28,
+ W_OFFSET_shift = 28,
+ VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c,
+/* X_CONV_mask = 0x0f << 0, */
+/* X_CONV_shift = 0, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* X_OFFSET_mask = 0x0f << 4, */
+/* X_OFFSET_shift = 4, */
+/* Y_CONV_mask = 0x0f << 8, */
+/* Y_CONV_shift = 8, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* Y_OFFSET_mask = 0x0f << 12, */
+/* Y_OFFSET_shift = 12, */
+/* Z_CONV_mask = 0x0f << 16, */
+/* Z_CONV_shift = 16, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* Z_OFFSET_mask = 0x0f << 20, */
+/* Z_OFFSET_shift = 20, */
+/* W_CONV_mask = 0x0f << 24, */
+/* W_CONV_shift = 24, */
+/* VGT_GRP_INDEX_16 = 0x00, */
+/* VGT_GRP_INDEX_32 = 0x01, */
+/* VGT_GRP_UINT_16 = 0x02, */
+/* VGT_GRP_UINT_32 = 0x03, */
+/* VGT_GRP_SINT_16 = 0x04, */
+/* VGT_GRP_SINT_32 = 0x05, */
+/* VGT_GRP_FLOAT_32 = 0x06, */
+/* VGT_GRP_AUTO_PRIM = 0x07, */
+/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */
+/* W_OFFSET_mask = 0x0f << 28, */
+/* W_OFFSET_shift = 28, */
+ VGT_GS_MODE = 0x00028a40,
+ MODE_mask = 0x03 << 0,
+ MODE_shift = 0,
+ GS_OFF = 0x00,
+ GS_SCENARIO_A = 0x01,
+ GS_SCENARIO_B = 0x02,
+ GS_SCENARIO_G = 0x03,
+ ES_PASSTHRU_bit = 1 << 2,
+ CUT_MODE_mask = 0x03 << 3,
+ CUT_MODE_shift = 3,
+ GS_CUT_1024 = 0x00,
+ GS_CUT_512 = 0x01,
+ GS_CUT_256 = 0x02,
+ GS_CUT_128 = 0x03,
+ PA_SC_MPASS_PS_CNTL = 0x00028a48,
+ MPASS_PIX_VEC_PER_PASS_mask = 0xfffff << 0,
+ MPASS_PIX_VEC_PER_PASS_shift = 0,
+ MPASS_PS_ENA_bit = 1 << 31,
+ PA_SC_MODE_CNTL = 0x00028a4c,
+ MSAA_ENABLE_bit = 1 << 0,
+ CLIPRECT_ENABLE_bit = 1 << 1,
+ LINE_STIPPLE_ENABLE_bit = 1 << 2,
+ MULTI_CHIP_PRIM_DISCARD_ENAB_bit = 1 << 3,
+ WALK_ORDER_ENABLE_bit = 1 << 4,
+ HALVE_DETAIL_SAMPLE_PERF_bit = 1 << 5,
+ WALK_SIZE_bit = 1 << 6,
+ WALK_ALIGNMENT_bit = 1 << 7,
+ WALK_ALIGN8_PRIM_FITS_ST_bit = 1 << 8,
+ TILE_COVER_NO_SCISSOR_bit = 1 << 9,
+ KILL_PIX_POST_HI_Z_bit = 1 << 10,
+ KILL_PIX_POST_DETAIL_MASK_bit = 1 << 11,
+ MULTI_CHIP_SUPERTILE_ENABLE_bit = 1 << 12,
+ TILE_COVER_DISABLE_bit = 1 << 13,
+ FORCE_EOV_CNTDWN_ENABLE_bit = 1 << 14,
+ FORCE_EOV_TILE_ENABLE_bit = 1 << 15,
+ FORCE_EOV_REZ_ENABLE_bit = 1 << 16,
+ PS_ITER_SAMPLE_bit = 1 << 17,
+ VGT_ENHANCE = 0x00028a50,
+ VGT_ENHANCE__MI_TIMESTAMP_RES_mask = 0x03 << 0,
+ VGT_ENHANCE__MI_TIMESTAMP_RES_shift = 0,
+ X_0_992_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_32 = 0x00,
+ X_0_496_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_16 = 0x01,
+ X_0_248_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_8 = 0x02,
+ X_0_124_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_4 = 0x03,
+ MISC_mask = 0x3fffffff << 2,
+ MISC_shift = 2,
+ VGT_GS_OUT_PRIM_TYPE = 0x00028a6c,
+ OUTPRIM_TYPE_mask = 0x3f << 0,
+ OUTPRIM_TYPE_shift = 0,
+ POINTLIST = 0x00,
+ LINESTRIP = 0x01,
+ TRISTRIP = 0x02,
+ VGT_DMA_SIZE = 0x00028a74,
+ VGT_DMA_INDEX_TYPE = 0x00028a7c,
+/* INDEX_TYPE_mask = 0x03 << 0, */
+/* INDEX_TYPE_shift = 0, */
+ VGT_INDEX_16 = 0x00,
+ VGT_INDEX_32 = 0x01,
+ SWAP_MODE_mask = 0x03 << 2,
+ SWAP_MODE_shift = 2,
+ VGT_DMA_SWAP_NONE = 0x00,
+ VGT_DMA_SWAP_16_BIT = 0x01,
+ VGT_DMA_SWAP_32_BIT = 0x02,
+ VGT_DMA_SWAP_WORD = 0x03,
+ VGT_PRIMITIVEID_EN = 0x00028a84,
+ PRIMITIVEID_EN_bit = 1 << 0,
+ VGT_DMA_NUM_INSTANCES = 0x00028a88,
+ VGT_EVENT_INITIATOR = 0x00028a90,
+ EVENT_TYPE_mask = 0x3f << 0,
+ EVENT_TYPE_shift = 0,
+ CACHE_FLUSH_TS = 0x04,
+ CONTEXT_DONE = 0x05,
+ CACHE_FLUSH = 0x06,
+ VIZQUERY_START = 0x07,
+ VIZQUERY_END = 0x08,
+ SC_WAIT_WC = 0x09,
+ MPASS_PS_CP_REFETCH = 0x0a,
+ MPASS_PS_RST_START = 0x0b,
+ MPASS_PS_INCR_START = 0x0c,
+ RST_PIX_CNT = 0x0d,
+ RST_VTX_CNT = 0x0e,
+ VS_PARTIAL_FLUSH = 0x0f,
+ PS_PARTIAL_FLUSH = 0x10,
+ CACHE_FLUSH_AND_INV_TS_EVENT = 0x14,
+ ZPASS_DONE = 0x15,
+ CACHE_FLUSH_AND_INV_EVENT = 0x16,
+ PERFCOUNTER_START = 0x17,
+ PERFCOUNTER_STOP = 0x18,
+ PIPELINESTAT_START = 0x19,
+ PIPELINESTAT_STOP = 0x1a,
+ PERFCOUNTER_SAMPLE = 0x1b,
+ FLUSH_ES_OUTPUT = 0x1c,
+ FLUSH_GS_OUTPUT = 0x1d,
+ SAMPLE_PIPELINESTAT = 0x1e,
+ SO_VGTSTREAMOUT_FLUSH = 0x1f,
+ SAMPLE_STREAMOUTSTATS = 0x20,
+ RESET_VTX_CNT = 0x21,
+ BLOCK_CONTEXT_DONE = 0x22,
+ CR_CONTEXT_DONE = 0x23,
+ VGT_FLUSH = 0x24,
+ CR_DONE_TS = 0x25,
+ SQ_NON_EVENT = 0x26,
+ SC_SEND_DB_VPZ = 0x27,
+ BOTTOM_OF_PIPE_TS = 0x28,
+ DB_CACHE_FLUSH_AND_INV = 0x2a,
+ ADDRESS_HI_mask = 0xff << 19,
+ ADDRESS_HI_shift = 19,
+ EXTENDED_EVENT_bit = 1 << 27,
+ VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94,
+ RESET_EN_bit = 1 << 0,
+ VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0,
+ VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4,
+ VGT_STRMOUT_EN = 0x00028ab0,
+ STREAMOUT_bit = 1 << 0,
+ VGT_REUSE_OFF = 0x00028ab4,
+ REUSE_OFF_bit = 1 << 0,
+ VGT_VTX_CNT_EN = 0x00028ab8,
+ VTX_CNT_EN_bit = 1 << 0,
+ VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0,
+ VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4,
+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8,
+ VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc,
+ VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0,
+ VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4,
+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8,
+ VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec,
+ VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0,
+ VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4,
+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8,
+ VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc,
+ VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00,
+ VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04,
+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0,
+ VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift = 0,
+ VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08,
+ VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c,
+ VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10,
+ VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14,
+ VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18,
+ VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c,
+ VGT_STRMOUT_BUFFER_EN = 0x00028b20,
+ BUFFER_0_EN_bit = 1 << 0,
+ BUFFER_1_EN_bit = 1 << 1,
+ BUFFER_2_EN_bit = 1 << 2,
+ BUFFER_3_EN_bit = 1 << 3,
+ VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28,
+ VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c,
+ VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30,
+ VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44,
+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48,
+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c,
+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift = 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50,
+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0,
+ VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift = 0,
+ PA_SC_LINE_CNTL = 0x00028c00,
+ BRES_CNTL_mask = 0xff << 0,
+ BRES_CNTL_shift = 0,
+ USE_BRES_CNTL_bit = 1 << 8,
+ EXPAND_LINE_WIDTH_bit = 1 << 9,
+ LAST_PIXEL_bit = 1 << 10,
+ PA_SC_AA_CONFIG = 0x00028c04,
+ MSAA_NUM_SAMPLES_mask = 0x03 << 0,
+ MSAA_NUM_SAMPLES_shift = 0,
+ AA_MASK_CENTROID_DTMN_bit = 1 << 4,
+ MAX_SAMPLE_DIST_mask = 0x0f << 13,
+ MAX_SAMPLE_DIST_shift = 13,
+ PA_SU_VTX_CNTL = 0x00028c08,
+ PIX_CENTER_bit = 1 << 0,
+ PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1,
+ PA_SU_VTX_CNTL__ROUND_MODE_shift = 1,
+ X_TRUNCATE = 0x00,
+ X_ROUND = 0x01,
+ X_ROUND_TO_EVEN = 0x02,
+ X_ROUND_TO_ODD = 0x03,
+ QUANT_MODE_mask = 0x07 << 3,
+ QUANT_MODE_shift = 3,
+ X_1_16TH = 0x00,
+ X_1_8TH = 0x01,
+ X_1_4TH = 0x02,
+ X_1_2 = 0x03,
+ X_1 = 0x04,
+ X_1_256TH = 0x05,
+ PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c,
+ PA_CL_GB_VERT_DISC_ADJ = 0x00028c10,
+ PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14,
+ PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18,
+ PA_SC_AA_SAMPLE_LOCS_MCTX = 0x00028c1c,
+/* S0_X_mask = 0x0f << 0, */
+/* S0_X_shift = 0, */
+/* S0_Y_mask = 0x0f << 4, */
+/* S0_Y_shift = 4, */
+/* S1_X_mask = 0x0f << 8, */
+/* S1_X_shift = 8, */
+/* S1_Y_mask = 0x0f << 12, */
+/* S1_Y_shift = 12, */
+/* S2_X_mask = 0x0f << 16, */
+/* S2_X_shift = 16, */
+/* S2_Y_mask = 0x0f << 20, */
+/* S2_Y_shift = 20, */
+/* S3_X_mask = 0x0f << 24, */
+/* S3_X_shift = 24, */
+/* S3_Y_mask = 0x0f << 28, */
+/* S3_Y_shift = 28, */
+ PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX = 0x00028c20,
+/* S4_X_mask = 0x0f << 0, */
+/* S4_X_shift = 0, */
+/* S4_Y_mask = 0x0f << 4, */
+/* S4_Y_shift = 4, */
+/* S5_X_mask = 0x0f << 8, */
+/* S5_X_shift = 8, */
+/* S5_Y_mask = 0x0f << 12, */
+/* S5_Y_shift = 12, */
+/* S6_X_mask = 0x0f << 16, */
+/* S6_X_shift = 16, */
+/* S6_Y_mask = 0x0f << 20, */
+/* S6_Y_shift = 20, */
+/* S7_X_mask = 0x0f << 24, */
+/* S7_X_shift = 24, */
+/* S7_Y_mask = 0x0f << 28, */
+/* S7_Y_shift = 28, */
+ CB_CLRCMP_CONTROL = 0x00028c30,
+ CLRCMP_FCN_SRC_mask = 0x07 << 0,
+ CLRCMP_FCN_SRC_shift = 0,
+ CLRCMP_DRAW_ALWAYS = 0x00,
+ CLRCMP_DRAW_NEVER = 0x01,
+ CLRCMP_DRAW_ON_NEQ = 0x04,
+ CLRCMP_DRAW_ON_EQ = 0x05,
+ CLRCMP_FCN_DST_mask = 0x07 << 8,
+ CLRCMP_FCN_DST_shift = 8,
+/* CLRCMP_DRAW_ALWAYS = 0x00, */
+/* CLRCMP_DRAW_NEVER = 0x01, */
+/* CLRCMP_DRAW_ON_NEQ = 0x04, */
+/* CLRCMP_DRAW_ON_EQ = 0x05, */
+ CLRCMP_FCN_SEL_mask = 0x03 << 24,
+ CLRCMP_FCN_SEL_shift = 24,
+ CLRCMP_SEL_DST = 0x00,
+ CLRCMP_SEL_SRC = 0x01,
+ CLRCMP_SEL_AND = 0x02,
+ CB_CLRCMP_SRC = 0x00028c34,
+ CB_CLRCMP_DST = 0x00028c38,
+ CB_CLRCMP_MSK = 0x00028c3c,
+ PA_SC_AA_MASK = 0x00028c48,
+ VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58,
+ VTX_REUSE_DEPTH_mask = 0xff << 0,
+ VTX_REUSE_DEPTH_shift = 0,
+ VGT_OUT_DEALLOC_CNTL = 0x00028c5c,
+ DEALLOC_DIST_mask = 0x7f << 0,
+ DEALLOC_DIST_shift = 0,
+ DB_RENDER_CONTROL = 0x00028d0c,
+ DEPTH_CLEAR_ENABLE_bit = 1 << 0,
+ STENCIL_CLEAR_ENABLE_bit = 1 << 1,
+ DEPTH_COPY_bit = 1 << 2,
+ STENCIL_COPY_bit = 1 << 3,
+ RESUMMARIZE_ENABLE_bit = 1 << 4,
+ STENCIL_COMPRESS_DISABLE_bit = 1 << 5,
+ DEPTH_COMPRESS_DISABLE_bit = 1 << 6,
+ COPY_CENTROID_bit = 1 << 7,
+ COPY_SAMPLE_mask = 0x07 << 8,
+ COPY_SAMPLE_shift = 8,
+ ZPASS_INCREMENT_DISABLE_bit = 1 << 11,
+ DB_RENDER_OVERRIDE = 0x00028d10,
+ FORCE_HIZ_ENABLE_mask = 0x03 << 0,
+ FORCE_HIZ_ENABLE_shift = 0,
+ FORCE_OFF = 0x00,
+ FORCE_ENABLE = 0x01,
+ FORCE_DISABLE = 0x02,
+ FORCE_RESERVED = 0x03,
+ FORCE_HIS_ENABLE0_mask = 0x03 << 2,
+ FORCE_HIS_ENABLE0_shift = 2,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_HIS_ENABLE1_mask = 0x03 << 4,
+ FORCE_HIS_ENABLE1_shift = 4,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_SHADER_Z_ORDER_bit = 1 << 6,
+ FAST_Z_DISABLE_bit = 1 << 7,
+ FAST_STENCIL_DISABLE_bit = 1 << 8,
+ NOOP_CULL_DISABLE_bit = 1 << 9,
+ FORCE_COLOR_KILL_bit = 1 << 10,
+ FORCE_Z_READ_bit = 1 << 11,
+ FORCE_STENCIL_READ_bit = 1 << 12,
+ FORCE_FULL_Z_RANGE_mask = 0x03 << 13,
+ FORCE_FULL_Z_RANGE_shift = 13,
+/* FORCE_OFF = 0x00, */
+/* FORCE_ENABLE = 0x01, */
+/* FORCE_DISABLE = 0x02, */
+/* FORCE_RESERVED = 0x03, */
+ FORCE_QC_SMASK_CONFLICT_bit = 1 << 15,
+ DISABLE_VIEWPORT_CLAMP_bit = 1 << 16,
+ IGNORE_SC_ZRANGE_bit = 1 << 17,
+ DB_HTILE_SURFACE = 0x00028d24,
+ HTILE_WIDTH_bit = 1 << 0,
+ HTILE_HEIGHT_bit = 1 << 1,
+ LINEAR_bit = 1 << 2,
+ FULL_CACHE_bit = 1 << 3,
+ HTILE_USES_PRELOAD_WIN_bit = 1 << 4,
+ PRELOAD_bit = 1 << 5,
+ PREFETCH_WIDTH_mask = 0x3f << 6,
+ PREFETCH_WIDTH_shift = 6,
+ PREFETCH_HEIGHT_mask = 0x3f << 12,
+ PREFETCH_HEIGHT_shift = 12,
+ DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c,
+ COMPAREFUNC1_mask = 0x07 << 0,
+ COMPAREFUNC1_shift = 0,
+/* REF_NEVER = 0x00, */
+/* REF_LESS = 0x01, */
+/* REF_EQUAL = 0x02, */
+/* REF_LEQUAL = 0x03, */
+/* REF_GREATER = 0x04, */
+/* REF_NOTEQUAL = 0x05, */
+/* REF_GEQUAL = 0x06, */
+/* REF_ALWAYS = 0x07, */
+ COMPAREVALUE1_mask = 0xff << 4,
+ COMPAREVALUE1_shift = 4,
+ COMPAREMASK1_mask = 0xff << 12,
+ COMPAREMASK1_shift = 12,
+ ENABLE1_bit = 1 << 24,
+ DB_PRELOAD_CONTROL = 0x00028d30,
+ START_X_mask = 0xff << 0,
+ START_X_shift = 0,
+ START_Y_mask = 0xff << 8,
+ START_Y_shift = 8,
+ MAX_X_mask = 0xff << 16,
+ MAX_X_shift = 16,
+ MAX_Y_mask = 0xff << 24,
+ MAX_Y_shift = 24,
+ DB_PREFETCH_LIMIT = 0x00028d34,
+ DEPTH_HEIGHT_TILE_MAX_mask = 0x3ff << 0,
+ DEPTH_HEIGHT_TILE_MAX_shift = 0,
+ PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028df8,
+ POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0,
+ POLY_OFFSET_NEG_NUM_DB_BITS_shift = 0,
+ POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8,
+ PA_SU_POLY_OFFSET_CLAMP = 0x00028dfc,
+ PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028e00,
+ PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028e04,
+ PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028e08,
+ PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028e0c,
+ PA_CL_POINT_X_RAD = 0x00028e10,
+ PA_CL_POINT_Y_RAD = 0x00028e14,
+ PA_CL_POINT_SIZE = 0x00028e18,
+ PA_CL_POINT_CULL_RAD = 0x00028e1c,
+ PA_CL_UCP_0_X = 0x00028e20,
+ PA_CL_UCP_0_X_num = 6,
+ PA_CL_UCP_0_X_offset = 16,
+ PA_CL_UCP_0_Y = 0x00028e24,
+ PA_CL_UCP_0_Y_num = 6,
+ PA_CL_UCP_0_Y_offset = 16,
+ PA_CL_UCP_0_Z = 0x00028e28,
+ PA_CL_UCP_0_Z_num = 6,
+ PA_CL_UCP_0_Z_offset = 16,
+ SQ_ALU_CONSTANT0_0 = 0x00030000,
+ SQ_ALU_CONSTANT1_0 = 0x00030004,
+ SQ_ALU_CONSTANT2_0 = 0x00030008,
+ SQ_ALU_CONSTANT3_0 = 0x0003000c,
+ SQ_VTX_CONSTANT_WORD0_0 = 0x00038000,
+ SQ_TEX_RESOURCE_WORD0_0 = 0x00038000,
+ DIM_mask = 0x07 << 0,
+ DIM_shift = 0,
+ SQ_TEX_DIM_1D = 0x00,
+ SQ_TEX_DIM_2D = 0x01,
+ SQ_TEX_DIM_3D = 0x02,
+ SQ_TEX_DIM_CUBEMAP = 0x03,
+ SQ_TEX_DIM_1D_ARRAY = 0x04,
+ SQ_TEX_DIM_2D_ARRAY = 0x05,
+ SQ_TEX_DIM_2D_MSAA = 0x06,
+ SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask = 0x0f << 3,
+ SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift = 3,
+ TILE_TYPE_bit = 1 << 7,
+ PITCH_mask = 0x7ff << 8,
+ PITCH_shift = 8,
+ TEX_WIDTH_mask = 0x1fff << 19,
+ TEX_WIDTH_shift = 19,
+ SQ_VTX_CONSTANT_WORD1_0 = 0x00038004,
+ SQ_TEX_RESOURCE_WORD1_0 = 0x00038004,
+ TEX_HEIGHT_mask = 0x1fff << 0,
+ TEX_HEIGHT_shift = 0,
+ TEX_DEPTH_mask = 0x1fff << 13,
+ TEX_DEPTH_shift = 13,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask = 0x3f << 26,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift = 26,
+ SQ_VTX_CONSTANT_WORD2_0 = 0x00038008,
+ BASE_ADDRESS_HI_mask = 0xff << 0,
+ BASE_ADDRESS_HI_shift = 0,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask = 0x7ff << 8,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift = 8,
+ SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit = 1 << 19,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift = 20,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask = 0x03 << 26,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift = 26,
+/* SQ_NUM_FORMAT_NORM = 0x00, */
+/* SQ_NUM_FORMAT_INT = 0x01, */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */
+ SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit = 1 << 28,
+ SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit = 1 << 29,
+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask = 0x03 << 30,
+ SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift = 30,
+/* SQ_ENDIAN_NONE = 0x00, */
+/* SQ_ENDIAN_8IN16 = 0x01, */
+/* SQ_ENDIAN_8IN32 = 0x02, */
+ SQ_TEX_RESOURCE_WORD2_0 = 0x00038008,
+ SQ_VTX_CONSTANT_WORD3_0 = 0x0003800c,
+ MEM_REQUEST_SIZE_mask = 0x03 << 0,
+ MEM_REQUEST_SIZE_shift = 0,
+ SQ_TEX_RESOURCE_WORD3_0 = 0x0003800c,
+ SQ_TEX_RESOURCE_WORD4_0 = 0x00038010,
+ FORMAT_COMP_X_mask = 0x03 << 0,
+ FORMAT_COMP_X_shift = 0,
+ SQ_FORMAT_COMP_UNSIGNED = 0x00,
+ SQ_FORMAT_COMP_SIGNED = 0x01,
+ SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02,
+ FORMAT_COMP_Y_mask = 0x03 << 2,
+ FORMAT_COMP_Y_shift = 2,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ FORMAT_COMP_Z_mask = 0x03 << 4,
+ FORMAT_COMP_Z_shift = 4,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ FORMAT_COMP_W_mask = 0x03 << 6,
+ FORMAT_COMP_W_shift = 6,
+/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */
+/* SQ_FORMAT_COMP_SIGNED = 0x01, */
+/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */
+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask = 0x03 << 8,
+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift = 8,
+/* SQ_NUM_FORMAT_NORM = 0x00, */
+/* SQ_NUM_FORMAT_INT = 0x01, */
+/* SQ_NUM_FORMAT_SCALED = 0x02, */
+ SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit = 1 << 10,
+ SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit = 1 << 11,
+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask = 0x03 << 12,
+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift = 12,
+/* SQ_ENDIAN_NONE = 0x00, */
+/* SQ_ENDIAN_8IN16 = 0x01, */
+/* SQ_ENDIAN_8IN32 = 0x02, */
+ REQUEST_SIZE_mask = 0x03 << 14,
+ REQUEST_SIZE_shift = 14,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask = 0x07 << 16,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift = 16,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask = 0x07 << 19,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift = 19,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask = 0x07 << 22,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift = 22,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask = 0x07 << 25,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift = 25,
+/* SQ_SEL_X = 0x00, */
+/* SQ_SEL_Y = 0x01, */
+/* SQ_SEL_Z = 0x02, */
+/* SQ_SEL_W = 0x03, */
+/* SQ_SEL_0 = 0x04, */
+/* SQ_SEL_1 = 0x05, */
+ BASE_LEVEL_mask = 0x0f << 28,
+ BASE_LEVEL_shift = 28,
+ SQ_TEX_RESOURCE_WORD5_0 = 0x00038014,
+ LAST_LEVEL_mask = 0x0f << 0,
+ LAST_LEVEL_shift = 0,
+ BASE_ARRAY_mask = 0x1fff << 4,
+ BASE_ARRAY_shift = 4,
+ LAST_ARRAY_mask = 0x1fff << 17,
+ LAST_ARRAY_shift = 17,
+ SQ_TEX_RESOURCE_WORD6_0 = 0x00038018,
+ MPEG_CLAMP_mask = 0x03 << 0,
+ MPEG_CLAMP_shift = 0,
+ SQ_TEX_MPEG_CLAMP_OFF = 0x00,
+ SQ_TEX_MPEG_9 = 0x01,
+ SQ_TEX_MPEG_10 = 0x02,
+ PERF_MODULATION_mask = 0x07 << 5,
+ PERF_MODULATION_shift = 5,
+ INTERLACED_bit = 1 << 8,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_mask = 0x03 << 30,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_shift = 30,
+ SQ_TEX_VTX_INVALID_TEXTURE = 0x00,
+ SQ_TEX_VTX_INVALID_BUFFER = 0x01,
+ SQ_TEX_VTX_VALID_TEXTURE = 0x02,
+ SQ_TEX_VTX_VALID_BUFFER = 0x03,
+ SQ_VTX_CONSTANT_WORD6_0 = 0x00038018,
+ SQ_VTX_CONSTANT_WORD6_0__TYPE_mask = 0x03 << 30,
+ SQ_VTX_CONSTANT_WORD6_0__TYPE_shift = 30,
+/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */
+/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */
+/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */
+/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */
+ SQ_TEX_SAMPLER_WORD0_0 = 0x0003c000,
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x07 << 0,
+ SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0,
+ SQ_TEX_WRAP = 0x00,
+ SQ_TEX_MIRROR = 0x01,
+ SQ_TEX_CLAMP_LAST_TEXEL = 0x02,
+ SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03,
+ SQ_TEX_CLAMP_HALF_BORDER = 0x04,
+ SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05,
+ SQ_TEX_CLAMP_BORDER = 0x06,
+ SQ_TEX_MIRROR_ONCE_BORDER = 0x07,
+ CLAMP_Y_mask = 0x07 << 3,
+ CLAMP_Y_shift = 3,
+/* SQ_TEX_WRAP = 0x00, */
+/* SQ_TEX_MIRROR = 0x01, */
+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */
+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */
+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */
+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */
+/* SQ_TEX_CLAMP_BORDER = 0x06, */
+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */
+ CLAMP_Z_mask = 0x07 << 6,
+ CLAMP_Z_shift = 6,
+/* SQ_TEX_WRAP = 0x00, */
+/* SQ_TEX_MIRROR = 0x01, */
+/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */
+/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */
+/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */
+/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */
+/* SQ_TEX_CLAMP_BORDER = 0x06, */
+/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */
+ XY_MAG_FILTER_mask = 0x07 << 9,
+ XY_MAG_FILTER_shift = 9,
+ SQ_TEX_XY_FILTER_POINT = 0x00,
+ SQ_TEX_XY_FILTER_BILINEAR = 0x01,
+ SQ_TEX_XY_FILTER_BICUBIC = 0x02,
+ XY_MIN_FILTER_mask = 0x07 << 12,
+ XY_MIN_FILTER_shift = 12,
+/* SQ_TEX_XY_FILTER_POINT = 0x00, */
+/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */
+/* SQ_TEX_XY_FILTER_BICUBIC = 0x02, */
+ Z_FILTER_mask = 0x03 << 15,
+ Z_FILTER_shift = 15,
+ SQ_TEX_Z_FILTER_NONE = 0x00,
+ SQ_TEX_Z_FILTER_POINT = 0x01,
+ SQ_TEX_Z_FILTER_LINEAR = 0x02,
+ MIP_FILTER_mask = 0x03 << 17,
+ MIP_FILTER_shift = 17,
+/* SQ_TEX_Z_FILTER_NONE = 0x00, */
+/* SQ_TEX_Z_FILTER_POINT = 0x01, */
+/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */
+ BORDER_COLOR_TYPE_mask = 0x03 << 22,
+ BORDER_COLOR_TYPE_shift = 22,
+ SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00,
+ SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01,
+ SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02,
+ SQ_TEX_BORDER_COLOR_REGISTER = 0x03,
+ POINT_SAMPLING_CLAMP_bit = 1 << 24,
+ TEX_ARRAY_OVERRIDE_bit = 1 << 25,
+ DEPTH_COMPARE_FUNCTION_mask = 0x07 << 26,
+ DEPTH_COMPARE_FUNCTION_shift = 26,
+ SQ_TEX_DEPTH_COMPARE_NEVER = 0x00,
+ SQ_TEX_DEPTH_COMPARE_LESS = 0x01,
+ SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02,
+ SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03,
+ SQ_TEX_DEPTH_COMPARE_GREATER = 0x04,
+ SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05,
+ SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06,
+ SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07,
+ CHROMA_KEY_mask = 0x03 << 29,
+ CHROMA_KEY_shift = 29,
+ SQ_TEX_CHROMA_KEY_DISABLED = 0x00,
+ SQ_TEX_CHROMA_KEY_KILL = 0x01,
+ SQ_TEX_CHROMA_KEY_BLEND = 0x02,
+ LOD_USES_MINOR_AXIS_bit = 1 << 31,
+ SQ_TEX_SAMPLER_WORD1_0 = 0x0003c004,
+ MIN_LOD_mask = 0x3ff << 0,
+ MIN_LOD_shift = 0,
+ MAX_LOD_mask = 0x3ff << 10,
+ MAX_LOD_shift = 10,
+ SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_mask = 0xfff << 20,
+ SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift = 20,
+ SQ_TEX_SAMPLER_WORD2_0 = 0x0003c008,
+ LOD_BIAS_SEC_mask = 0xfff << 0,
+ LOD_BIAS_SEC_shift = 0,
+ MC_COORD_TRUNCATE_bit = 1 << 12,
+ SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 13,
+ HIGH_PRECISION_FILTER_bit = 1 << 14,
+ PERF_MIP_mask = 0x07 << 15,
+ PERF_MIP_shift = 15,
+ PERF_Z_mask = 0x03 << 18,
+ PERF_Z_shift = 18,
+ FETCH_4_bit = 1 << 26,
+ SAMPLE_IS_PCF_bit = 1 << 27,
+ SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31,
+ SQ_VTX_BASE_VTX_LOC = 0x0003cff0,
+ SQ_VTX_START_INST_LOC = 0x0003cff4,
+ SQ_LOOP_CONST_DX10_0 = 0x0003e200,
+ SQ_LOOP_CONST_0 = 0x0003e200,
+ SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0,
+ SQ_LOOP_CONST_0__COUNT_shift = 0,
+ INIT_mask = 0xfff << 12,
+ INIT_shift = 12,
+ INC_mask = 0xff << 24,
+ INC_shift = 24,
+ SQ_BOOL_CONST_0 = 0x0003e380,
+ SQ_BOOL_CONST_0_num = 3,
+
+} ;
+
+#endif /* _AUTOREGS */
+
diff --git a/src/r600_reg_r6xx.h b/src/r600_reg_r6xx.h
new file mode 100644
index 00000000..2e7dfa94
--- /dev/null
+++ b/src/r600_reg_r6xx.h
@@ -0,0 +1,494 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_R6xx_H_
+#define _R600_REG_R6xx_H_
+
+/*
+ * Registers for R6xx chips that are not documented yet
+ */
+
+enum {
+
+ MM_INDEX = 0x0000,
+ MM_DATA = 0x0004,
+
+ SRBM_STATUS = 0x0e50,
+ RLC_RQ_PENDING_bit = 1 << 3,
+ RCU_RQ_PENDING_bit = 1 << 4,
+ GRBM_RQ_PENDING_bit = 1 << 5,
+ HI_RQ_PENDING_bit = 1 << 6,
+ IO_EXTERN_SIGNAL_bit = 1 << 7,
+ VMC_BUSY_bit = 1 << 8,
+ MCB_BUSY_bit = 1 << 9,
+ MCDZ_BUSY_bit = 1 << 10,
+ MCDY_BUSY_bit = 1 << 11,
+ MCDX_BUSY_bit = 1 << 12,
+ MCDW_BUSY_bit = 1 << 13,
+ SEM_BUSY_bit = 1 << 14,
+ SRBM_STATUS__RLC_BUSY_bit = 1 << 15,
+ PDMA_BUSY_bit = 1 << 16,
+ IH_BUSY_bit = 1 << 17,
+ CSC_BUSY_bit = 1 << 20,
+ CMC7_BUSY_bit = 1 << 21,
+ CMC6_BUSY_bit = 1 << 22,
+ CMC5_BUSY_bit = 1 << 23,
+ CMC4_BUSY_bit = 1 << 24,
+ CMC3_BUSY_bit = 1 << 25,
+ CMC2_BUSY_bit = 1 << 26,
+ CMC1_BUSY_bit = 1 << 27,
+ CMC0_BUSY_bit = 1 << 28,
+ BIF_BUSY_bit = 1 << 29,
+ IDCT_BUSY_bit = 1 << 30,
+
+ SRBM_READ_ERROR = 0x0e98,
+ READ_ADDRESS_mask = 0xffff << 2,
+ READ_ADDRESS_shift = 2,
+ READ_REQUESTER_HI_bit = 1 << 24,
+ READ_REQUESTER_GRBM_bit = 1 << 25,
+ READ_REQUESTER_RCU_bit = 1 << 26,
+ READ_REQUESTER_RLC_bit = 1 << 27,
+ READ_ERROR_bit = 1 << 31,
+
+ SRBM_INT_STATUS = 0x0ea4,
+ RDERR_INT_STAT_bit = 1 << 0,
+ GFX_CNTX_SWITCH_INT_STAT_bit = 1 << 1,
+ SRBM_INT_ACK = 0x0ea8,
+ RDERR_INT_ACK_bit = 1 << 0,
+ GFX_CNTX_SWITCH_INT_ACK_bit = 1 << 1,
+
+ R6XX_MC_VM_FB_LOCATION = 0x2180,
+
+ VENDOR_DEVICE_ID = 0x4000,
+
+ HDP_MEM_COHERENCY_FLUSH_CNTL = 0x5480,
+
+ D1GRPH_PRIMARY_SURFACE_ADDRESS = 0x6110,
+ D1GRPH_PITCH = 0x6120,
+ D1GRPH_Y_END = 0x6138,
+
+ GRBM_STATUS = 0x8010,
+ CMDFIFO_AVAIL_mask = 0x1f << 0,
+ CMDFIFO_AVAIL_shift = 0,
+ SRBM_RQ_PENDING_bit = 1 << 5,
+ CP_RQ_PENDING_bit = 1 << 6,
+ CF_RQ_PENDING_bit = 1 << 7,
+ PF_RQ_PENDING_bit = 1 << 8,
+ GRBM_EE_BUSY_bit = 1 << 10,
+ GRBM_STATUS__VC_BUSY_bit = 1 << 11,
+ DB03_CLEAN_bit = 1 << 12,
+ CB03_CLEAN_bit = 1 << 13,
+ VGT_BUSY_NO_DMA_bit = 1 << 16,
+ GRBM_STATUS__VGT_BUSY_bit = 1 << 17,
+ TA03_BUSY_bit = 1 << 18,
+ GRBM_STATUS__TC_BUSY_bit = 1 << 19,
+ SX_BUSY_bit = 1 << 20,
+ SH_BUSY_bit = 1 << 21,
+ SPI03_BUSY_bit = 1 << 22,
+ SMX_BUSY_bit = 1 << 23,
+ SC_BUSY_bit = 1 << 24,
+ PA_BUSY_bit = 1 << 25,
+ DB03_BUSY_bit = 1 << 26,
+ CR_BUSY_bit = 1 << 27,
+ CP_COHERENCY_BUSY_bit = 1 << 28,
+ GRBM_STATUS__CP_BUSY_bit = 1 << 29,
+ CB03_BUSY_bit = 1 << 30,
+ GUI_ACTIVE_bit = 1 << 31,
+ GRBM_STATUS2 = 0x8014,
+ CR_CLEAN_bit = 1 << 0,
+ SMX_CLEAN_bit = 1 << 1,
+ SPI0_BUSY_bit = 1 << 8,
+ SPI1_BUSY_bit = 1 << 9,
+ SPI2_BUSY_bit = 1 << 10,
+ SPI3_BUSY_bit = 1 << 11,
+ TA0_BUSY_bit = 1 << 12,
+ TA1_BUSY_bit = 1 << 13,
+ TA2_BUSY_bit = 1 << 14,
+ TA3_BUSY_bit = 1 << 15,
+ DB0_BUSY_bit = 1 << 16,
+ DB1_BUSY_bit = 1 << 17,
+ DB2_BUSY_bit = 1 << 18,
+ DB3_BUSY_bit = 1 << 19,
+ CB0_BUSY_bit = 1 << 20,
+ CB1_BUSY_bit = 1 << 21,
+ CB2_BUSY_bit = 1 << 22,
+ CB3_BUSY_bit = 1 << 23,
+ GRBM_SOFT_RESET = 0x8020,
+ SOFT_RESET_CP_bit = 1 << 0,
+ SOFT_RESET_CB_bit = 1 << 1,
+ SOFT_RESET_CR_bit = 1 << 2,
+ SOFT_RESET_DB_bit = 1 << 3,
+ SOFT_RESET_PA_bit = 1 << 5,
+ SOFT_RESET_SC_bit = 1 << 6,
+ SOFT_RESET_SMX_bit = 1 << 7,
+ SOFT_RESET_SPI_bit = 1 << 8,
+ SOFT_RESET_SH_bit = 1 << 9,
+ SOFT_RESET_SX_bit = 1 << 10,
+ SOFT_RESET_TC_bit = 1 << 11,
+ SOFT_RESET_TA_bit = 1 << 12,
+ SOFT_RESET_VC_bit = 1 << 13,
+ SOFT_RESET_VGT_bit = 1 << 14,
+ SOFT_RESET_GRBM_GCA_bit = 1 << 15,
+
+ WAIT_UNTIL = 0x8040,
+ WAIT_CP_DMA_IDLE_bit = 1 << 8,
+ WAIT_CMDFIFO_bit = 1 << 10,
+ WAIT_2D_IDLE_bit = 1 << 14,
+ WAIT_3D_IDLE_bit = 1 << 15,
+ WAIT_2D_IDLECLEAN_bit = 1 << 16,
+ WAIT_3D_IDLECLEAN_bit = 1 << 17,
+ WAIT_EXTERN_SIG_bit = 1 << 19,
+ CMDFIFO_ENTRIES_mask = 0x1f << 20,
+ CMDFIFO_ENTRIES_shift = 20,
+
+ GRBM_READ_ERROR = 0x8058,
+/* READ_ADDRESS_mask = 0xffff << 2, */
+/* READ_ADDRESS_shift = 2, */
+ READ_REQUESTER_SRBM_bit = 1 << 28,
+ READ_REQUESTER_CP_bit = 1 << 29,
+ READ_REQUESTER_WU_POLL_bit = 1 << 30,
+/* READ_ERROR_bit = 1 << 31, */
+
+ SCRATCH_REG0 = 0x8500,
+ SCRATCH_REG1 = 0x8504,
+ SCRATCH_REG2 = 0x8508,
+ SCRATCH_REG3 = 0x850c,
+ SCRATCH_REG4 = 0x8510,
+ SCRATCH_REG5 = 0x8514,
+ SCRATCH_REG6 = 0x8518,
+ SCRATCH_REG7 = 0x851c,
+ SCRATCH_UMSK = 0x8540,
+ SCRATCH_ADDR = 0x8544,
+
+ CP_COHER_CNTL = 0x85f0,
+ DEST_BASE_0_ENA_bit = 1 << 0,
+ DEST_BASE_1_ENA_bit = 1 << 1,
+ SO0_DEST_BASE_ENA_bit = 1 << 2,
+ SO1_DEST_BASE_ENA_bit = 1 << 3,
+ SO2_DEST_BASE_ENA_bit = 1 << 4,
+ SO3_DEST_BASE_ENA_bit = 1 << 5,
+ CB0_DEST_BASE_ENA_bit = 1 << 6,
+ CB1_DEST_BASE_ENA_bit = 1 << 7,
+ CB2_DEST_BASE_ENA_bit = 1 << 8,
+ CB3_DEST_BASE_ENA_bit = 1 << 9,
+ CB4_DEST_BASE_ENA_bit = 1 << 10,
+ CB5_DEST_BASE_ENA_bit = 1 << 11,
+ CB6_DEST_BASE_ENA_bit = 1 << 12,
+ CB7_DEST_BASE_ENA_bit = 1 << 13,
+ DB_DEST_BASE_ENA_bit = 1 << 14,
+ CR_DEST_BASE_ENA_bit = 1 << 15,
+ TC_ACTION_ENA_bit = 1 << 23,
+ VC_ACTION_ENA_bit = 1 << 24,
+ CB_ACTION_ENA_bit = 1 << 25,
+ DB_ACTION_ENA_bit = 1 << 26,
+ SH_ACTION_ENA_bit = 1 << 27,
+ SMX_ACTION_ENA_bit = 1 << 28,
+ CR0_ACTION_ENA_bit = 1 << 29,
+ CR1_ACTION_ENA_bit = 1 << 30,
+ CR2_ACTION_ENA_bit = 1 << 31,
+ CP_COHER_SIZE = 0x85f4,
+ CP_COHER_BASE = 0x85f8,
+ CP_COHER_STATUS = 0x85fc,
+ MATCHING_GFX_CNTX_mask = 0xff << 0,
+ MATCHING_GFX_CNTX_shift = 0,
+ MATCHING_CR_CNTX_mask = 0xffff << 8,
+ MATCHING_CR_CNTX_shift = 8,
+ STATUS_bit = 1 << 31,
+
+ CP_STALLED_STAT1 = 0x8674,
+ RBIU_TO_DMA_NOT_RDY_TO_RCV_bit = 1 << 0,
+ RBIU_TO_IBS_NOT_RDY_TO_RCV_bit = 1 << 1,
+ RBIU_TO_SEM_NOT_RDY_TO_RCV_bit = 1 << 2,
+ RBIU_TO_2DREGS_NOT_RDY_TO_RCV_bit = 1 << 3,
+ RBIU_TO_MEMWR_NOT_RDY_TO_RCV_bit = 1 << 4,
+ RBIU_TO_MEMRD_NOT_RDY_TO_RCV_bit = 1 << 5,
+ RBIU_TO_EOPD_NOT_RDY_TO_RCV_bit = 1 << 6,
+ RBIU_TO_RECT_NOT_RDY_TO_RCV_bit = 1 << 7,
+ RBIU_TO_STRMO_NOT_RDY_TO_RCV_bit = 1 << 8,
+ RBIU_TO_PSTAT_NOT_RDY_TO_RCV_bit = 1 << 9,
+ MIU_WAITING_ON_RDREQ_FREE_bit = 1 << 16,
+ MIU_WAITING_ON_WRREQ_FREE_bit = 1 << 17,
+ MIU_NEEDS_AVAIL_WRREQ_PHASE_bit = 1 << 18,
+ RCIU_WAITING_ON_GRBM_FREE_bit = 1 << 24,
+ RCIU_WAITING_ON_VGT_FREE_bit = 1 << 25,
+ RCIU_STALLED_ON_ME_READ_bit = 1 << 26,
+ RCIU_STALLED_ON_DMA_READ_bit = 1 << 27,
+ RCIU_HALTED_BY_REG_VIOLATION_bit = 1 << 28,
+ CP_STALLED_STAT2 = 0x8678,
+ PFP_TO_CSF_NOT_RDY_TO_RCV_bit = 1 << 0,
+ PFP_TO_MEQ_NOT_RDY_TO_RCV_bit = 1 << 1,
+ PFP_TO_VGT_NOT_RDY_TO_RCV_bit = 1 << 2,
+ PFP_HALTED_BY_INSTR_VIOLATION_bit = 1 << 3,
+ MULTIPASS_IB_PENDING_IN_PFP_bit = 1 << 4,
+ ME_BRUSH_WC_NOT_RDY_TO_RCV_bit = 1 << 8,
+ ME_STALLED_ON_BRUSH_LOGIC_bit = 1 << 9,
+ CR_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 10,
+ GFX_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 11,
+ ME_RCIU_NOT_RDY_TO_RCV_bit = 1 << 12,
+ ME_TO_CONST_NOT_RDY_TO_RCV_bit = 1 << 13,
+ ME_WAITING_DATA_FROM_PFP_bit = 1 << 14,
+ ME_WAITING_ON_PARTIAL_FLUSH_bit = 1 << 15,
+ RECT_FIFO_NEEDS_CR_RECT_DONE_bit = 1 << 16,
+ RECT_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 17,
+ EOPD_FIFO_NEEDS_SC_EOP_DONE_bit = 1 << 18,
+ EOPD_FIFO_NEEDS_SMX_EOP_DONE_bit = 1 << 19,
+ EOPD_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 20,
+ EOPD_FIFO_NEEDS_SIGNAL_SEM_bit = 1 << 21,
+ SO_NUMPRIM_FIFO_NEEDS_SOADDR_bit = 1 << 22,
+ SO_NUMPRIM_FIFO_NEEDS_NUMPRIM_bit = 1 << 23,
+ PIPE_STATS_FIFO_NEEDS_SAMPLE_bit = 1 << 24,
+ SURF_SYNC_NEEDS_IDLE_CNTXS_bit = 1 << 30,
+ SURF_SYNC_NEEDS_ALL_CLEAN_bit = 1 << 31,
+ CP_BUSY_STAT = 0x867c,
+ REG_BUS_FIFO_BUSY_bit = 1 << 0,
+ RING_FETCHING_DATA_bit = 1 << 1,
+ INDR1_FETCHING_DATA_bit = 1 << 2,
+ INDR2_FETCHING_DATA_bit = 1 << 3,
+ STATE_FETCHING_DATA_bit = 1 << 4,
+ PRED_FETCHING_DATA_bit = 1 << 5,
+ COHER_CNTR_NEQ_ZERO_bit = 1 << 6,
+ PFP_PARSING_PACKETS_bit = 1 << 7,
+ ME_PARSING_PACKETS_bit = 1 << 8,
+ RCIU_PFP_BUSY_bit = 1 << 9,
+ RCIU_ME_BUSY_bit = 1 << 10,
+ OUTSTANDING_READ_TAGS_bit = 1 << 11,
+ SEM_CMDFIFO_NOT_EMPTY_bit = 1 << 12,
+ SEM_FAILED_AND_HOLDING_bit = 1 << 13,
+ SEM_POLLING_FOR_PASS_bit = 1 << 14,
+ _3D_BUSY_bit = 1 << 15,
+ _2D_BUSY_bit = 1 << 16,
+ CP_STAT = 0x8680,
+ CSF_RING_BUSY_bit = 1 << 0,
+ CSF_WPTR_POLL_BUSY_bit = 1 << 1,
+ CSF_INDIRECT1_BUSY_bit = 1 << 2,
+ CSF_INDIRECT2_BUSY_bit = 1 << 3,
+ CSF_STATE_BUSY_bit = 1 << 4,
+ CSF_PREDICATE_BUSY_bit = 1 << 5,
+ CSF_BUSY_bit = 1 << 6,
+ MIU_RDREQ_BUSY_bit = 1 << 7,
+ MIU_WRREQ_BUSY_bit = 1 << 8,
+ ROQ_RING_BUSY_bit = 1 << 9,
+ ROQ_INDIRECT1_BUSY_bit = 1 << 10,
+ ROQ_INDIRECT2_BUSY_bit = 1 << 11,
+ ROQ_STATE_BUSY_bit = 1 << 12,
+ ROQ_PREDICATE_BUSY_bit = 1 << 13,
+ ROQ_ALIGN_BUSY_bit = 1 << 14,
+ PFP_BUSY_bit = 1 << 15,
+ MEQ_BUSY_bit = 1 << 16,
+ ME_BUSY_bit = 1 << 17,
+ QUERY_BUSY_bit = 1 << 18,
+ SEMAPHORE_BUSY_bit = 1 << 19,
+ INTERRUPT_BUSY_bit = 1 << 20,
+ SURFACE_SYNC_BUSY_bit = 1 << 21,
+ DMA_BUSY_bit = 1 << 22,
+ RCIU_BUSY_bit = 1 << 23,
+ CP_STAT__CP_BUSY_bit = 1 << 31,
+
+ CP_ME_CNTL = 0x86d8,
+ ME_STATMUX_mask = 0xff << 0,
+ ME_STATMUX_shift = 0,
+ ME_HALT_bit = 1 << 28,
+ CP_ME_STATUS = 0x86dc,
+
+ CP_RB_RPTR = 0x8700,
+ RB_RPTR_mask = 0xfffff << 0,
+ RB_RPTR_shift = 0,
+ CP_RB_WPTR_DELAY = 0x8704,
+ PRE_WRITE_TIMER_mask = 0xfffffff << 0,
+ PRE_WRITE_TIMER_shift = 0,
+ PRE_WRITE_LIMIT_mask = 0x0f << 28,
+ PRE_WRITE_LIMIT_shift = 28,
+
+ CP_ROQ_RB_STAT = 0x8780,
+ ROQ_RPTR_PRIMARY_mask = 0x3ff << 0,
+ ROQ_RPTR_PRIMARY_shift = 0,
+ ROQ_WPTR_PRIMARY_mask = 0x3ff << 16,
+ ROQ_WPTR_PRIMARY_shift = 16,
+ CP_ROQ_IB1_STAT = 0x8784,
+ ROQ_RPTR_INDIRECT1_mask = 0x3ff << 0,
+ ROQ_RPTR_INDIRECT1_shift = 0,
+ ROQ_WPTR_INDIRECT1_mask = 0x3ff << 16,
+ ROQ_WPTR_INDIRECT1_shift = 16,
+ CP_ROQ_IB2_STAT = 0x8788,
+ ROQ_RPTR_INDIRECT2_mask = 0x3ff << 0,
+ ROQ_RPTR_INDIRECT2_shift = 0,
+ ROQ_WPTR_INDIRECT2_mask = 0x3ff << 16,
+ ROQ_WPTR_INDIRECT2_shift = 16,
+
+ CP_MEQ_STAT = 0x8794,
+ MEQ_RPTR_mask = 0x3ff << 0,
+ MEQ_RPTR_shift = 0,
+ MEQ_WPTR_mask = 0x3ff << 16,
+ MEQ_WPTR_shift = 16,
+
+ CC_GC_SHADER_PIPE_CONFIG = 0x8950,
+ INACTIVE_QD_PIPES_mask = 0xff << 8,
+ INACTIVE_QD_PIPES_shift = 8,
+ R6XX_MAX_QD_PIPES = 8,
+ INACTIVE_SIMDS_mask = 0xff << 16,
+ INACTIVE_SIMDS_shift = 16,
+ R6XX_MAX_SIMDS = 8,
+ GC_USER_SHADER_PIPE_CONFIG = 0x8954,
+
+ VC_ENHANCE = 0x9714,
+ DB_DEBUG = 0x9830,
+ PREZ_MUST_WAIT_FOR_POSTZ_DONE = 1 << 31,
+
+ DB_WATERMARKS = 0x00009838,
+ DEPTH_FREE_mask = 0x1f << 0,
+ DEPTH_FREE_shift = 0,
+ DEPTH_FLUSH_mask = 0x3f << 5,
+ DEPTH_FLUSH_shift = 5,
+ FORCE_SUMMARIZE_mask = 0x0f << 11,
+ FORCE_SUMMARIZE_shift = 11,
+ DEPTH_PENDING_FREE_mask = 0x1f << 15,
+ DEPTH_PENDING_FREE_shift = 15,
+ DEPTH_CACHELINE_FREE_mask = 0x1f << 20,
+ DEPTH_CACHELINE_FREE_shift = 20,
+ EARLY_Z_PANIC_DISABLE_bit = 1 << 25,
+ LATE_Z_PANIC_DISABLE_bit = 1 << 26,
+ RE_Z_PANIC_DISABLE_bit = 1 << 27,
+ DB_EXTRA_DEBUG_mask = 0x0f << 28,
+ DB_EXTRA_DEBUG_shift = 28,
+
+ CP_RB_BASE = 0xc100,
+ CP_RB_CNTL = 0xc104,
+ RB_BUFSZ_mask = 0x3f << 0,
+ CP_RB_WPTR = 0xc114,
+ RB_WPTR_mask = 0xfffff << 0,
+ RB_WPTR_shift = 0,
+ CP_RB_RPTR_WR = 0xc108,
+ RB_RPTR_WR_mask = 0xfffff << 0,
+ RB_RPTR_WR_shift = 0,
+
+ CP_INT_STATUS = 0xc128,
+ DISABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 0,
+ ENABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 1,
+ SEM_SIGNAL_INT_STAT_bit = 1 << 18,
+ CNTX_BUSY_INT_STAT_bit = 1 << 19,
+ CNTX_EMPTY_INT_STAT_bit = 1 << 20,
+ WAITMEM_SEM_INT_STAT_bit = 1 << 21,
+ PRIV_INSTR_INT_STAT_bit = 1 << 22,
+ PRIV_REG_INT_STAT_bit = 1 << 23,
+ OPCODE_ERROR_INT_STAT_bit = 1 << 24,
+ SCRATCH_INT_STAT_bit = 1 << 25,
+ TIME_STAMP_INT_STAT_bit = 1 << 26,
+ RESERVED_BIT_ERROR_INT_STAT_bit = 1 << 27,
+ DMA_INT_STAT_bit = 1 << 28,
+ IB2_INT_STAT_bit = 1 << 29,
+ IB1_INT_STAT_bit = 1 << 30,
+ RB_INT_STAT_bit = 1 << 31,
+
+// SX_ALPHA_TEST_CONTROL = 0x00028410,
+ ALPHA_FUNC__REF_NEVER = 0,
+ ALPHA_FUNC__REF_ALWAYS = 7,
+// DB_SHADER_CONTROL = 0x0002880c,
+ Z_ORDER__EARLY_Z_THEN_LATE_Z = 2,
+// PA_SU_SC_MODE_CNTL = 0x00028814,
+// POLY_MODE_mask = 0x03 << 3,
+ POLY_MODE__TRIANGLES = 0, POLY_MODE__DUAL_MODE,
+// POLYMODE_FRONT_PTYPE_mask = 0x07 << 5,
+ POLYMODE_PTYPE__POINTS = 0, POLYMODE_PTYPE__LINES, POLYMODE_PTYPE__TRIANGLES,
+ PA_SC_AA_SAMPLE_LOCS_8S_WD1_M = 0x00028c20,
+ DB_SRESULTS_COMPARE_STATE0 = 0x00028d28, /* See autoregs: DB_SRESULTS_COMPARE_STATE1 */
+// DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c,
+ DB_ALPHA_TO_MASK = 0x00028d44,
+ ALPHA_TO_MASK_ENABLE = 1 << 0,
+ ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET0_shift = 8,
+ ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET1_shift = 10,
+ ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET2_shift = 12,
+ ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET3_shift = 14,
+
+// SQ_VTX_CONSTANT_WORD2_0 = 0x00038008,
+// SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20,
+ FMT_INVALID=0, FMT_8, FMT_4_4, FMT_3_3_2,
+ FMT_16=5, FMT_16_FLOAT, FMT_8_8,
+ FMT_5_6_5, FMT_6_5_5, FMT_1_5_5_5, FMT_4_4_4_4,
+ FMT_5_5_5_1, FMT_32, FMT_32_FLOAT, FMT_16_16,
+ FMT_16_16_FLOAT=16, FMT_8_24, FMT_8_24_FLOAT, FMT_24_8,
+ FMT_24_8_FLOAT, FMT_10_11_11, FMT_10_11_11_FLOAT, FMT_11_11_10,
+ FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8, FMT_10_10_10_2,
+ FMT_X24_8_32_FLOAT, FMT_32_32, FMT_32_32_FLOAT, FMT_16_16_16_16,
+ FMT_16_16_16_16_FLOAT=32, FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT,
+ FMT_1 = 37, FMT_GB_GR=39,
+ FMT_BG_RG, FMT_32_AS_8, FMT_32_AS_8_8, FMT_5_9_9_9_SHAREDEXP,
+ FMT_8_8_8, FMT_16_16_16, FMT_16_16_16_FLOAT, FMT_32_32_32,
+ FMT_32_32_32_FLOAT=48,
+
+// High level register file lengths
+ SQ_ALU_CONSTANT = SQ_ALU_CONSTANT0_0, /* 256 PS, 256 VS */
+ SQ_ALU_CONSTANT_ps_num = 256,
+ SQ_ALU_CONSTANT_vs_num = 256,
+ SQ_ALU_CONSTANT_all_num = 512,
+ SQ_ALU_CONSTANT_offset = 16,
+ SQ_ALU_CONSTANT_ps = 0,
+ SQ_ALU_CONSTANT_vs = SQ_ALU_CONSTANT_ps + SQ_ALU_CONSTANT_ps_num,
+ SQ_TEX_RESOURCE = SQ_TEX_RESOURCE_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */
+ SQ_TEX_RESOURCE_ps_num = 160,
+ SQ_TEX_RESOURCE_vs_num = 160,
+ SQ_TEX_RESOURCE_fs_num = 16,
+ SQ_TEX_RESOURCE_gs_num = 160,
+ SQ_TEX_RESOURCE_all_num = 496,
+ SQ_TEX_RESOURCE_offset = 28,
+ SQ_TEX_RESOURCE_ps = 0,
+ SQ_TEX_RESOURCE_vs = SQ_TEX_RESOURCE_ps + SQ_TEX_RESOURCE_ps_num,
+ SQ_TEX_RESOURCE_fs = SQ_TEX_RESOURCE_vs + SQ_TEX_RESOURCE_vs_num,
+ SQ_TEX_RESOURCE_gs = SQ_TEX_RESOURCE_fs + SQ_TEX_RESOURCE_fs_num,
+ SQ_VTX_RESOURCE = SQ_VTX_CONSTANT_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */
+ SQ_VTX_RESOURCE_ps_num = 160,
+ SQ_VTX_RESOURCE_vs_num = 160,
+ SQ_VTX_RESOURCE_fs_num = 16,
+ SQ_VTX_RESOURCE_gs_num = 160,
+ SQ_VTX_RESOURCE_all_num = 496,
+ SQ_VTX_RESOURCE_offset = 28,
+ SQ_VTX_RESOURCE_ps = 0,
+ SQ_VTX_RESOURCE_vs = SQ_VTX_RESOURCE_ps + SQ_VTX_RESOURCE_ps_num,
+ SQ_VTX_RESOURCE_fs = SQ_VTX_RESOURCE_vs + SQ_VTX_RESOURCE_vs_num,
+ SQ_VTX_RESOURCE_gs = SQ_VTX_RESOURCE_fs + SQ_VTX_RESOURCE_fs_num,
+ SQ_TEX_SAMPLER_WORD = SQ_TEX_SAMPLER_WORD0_0, /* 18 per PS, VS, GS */
+ SQ_TEX_SAMPLER_WORD_ps_num = 18,
+ SQ_TEX_SAMPLER_WORD_vs_num = 18,
+ SQ_TEX_SAMPLER_WORD_gs_num = 18,
+ SQ_TEX_SAMPLER_WORD_all_num = 54,
+ SQ_TEX_SAMPLER_WORD_offset = 12,
+ SQ_TEX_SAMPLER_WORD_ps = 0,
+ SQ_TEX_SAMPLER_WORD_vs = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num,
+ SQ_TEX_SAMPLER_WORD_gs = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num,
+ SQ_LOOP_CONST = SQ_LOOP_CONST_0, /* 32 per PS, VS, GS */
+ SQ_LOOP_CONST_ps_num = 32,
+ SQ_LOOP_CONST_vs_num = 32,
+ SQ_LOOP_CONST_gs_num = 32,
+ SQ_LOOP_CONST_all_num = 96,
+ SQ_LOOP_CONST_offset = 4,
+ SQ_LOOP_CONST_ps = 0,
+ SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num,
+ SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num,
+} ;
+
+
+#endif
diff --git a/src/r600_reg_r7xx.h b/src/r600_reg_r7xx.h
new file mode 100644
index 00000000..e5c01c86
--- /dev/null
+++ b/src/r600_reg_r7xx.h
@@ -0,0 +1,149 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_R7xx_H_
+#define _R600_REG_R7xx_H_
+
+/*
+ * Register update for R7xx chips
+ */
+
+enum {
+
+ R7XX_MC_VM_FB_LOCATION = 0x00002024,
+
+// GRBM_STATUS = 0x00008010,
+ R7XX_TA_BUSY_bit = 1 << 14,
+
+ R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x00008d8c,
+ RING0_OFFSET_mask = 0xff << 0,
+ RING0_OFFSET_shift = 0,
+ ISOLATE_ES_ENABLE_bit = 1 << 12,
+ ISOLATE_GS_ENABLE_bit = 1 << 13,
+ VS_PC_LIMIT_ENABLE_bit = 1 << 14,
+
+// SQ_ALU_WORD0 = 0x00008dfc,
+// SRC0_SEL_mask = 0x1ff << 0,
+// SRC1_SEL_mask = 0x1ff << 13,
+ R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4,
+ R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5,
+ R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6,
+ R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7,
+// INDEX_MODE_mask = 0x07 << 26,
+ R7xx_SQ_INDEX_GLOBAL = 0x05,
+ R7xx_SQ_INDEX_GLOBAL_AR_X = 0x06,
+ R6xx_SQ_ALU_WORD1_OP2 = 0x00008dfc,
+ R7xx_SQ_ALU_WORD1_OP2_V2 = 0x00008dfc,
+ R6xx_FOG_MERGE_bit = 1 << 5,
+ R6xx_OMOD_mask = 0x03 << 6,
+ R7xx_OMOD_mask = 0x03 << 5,
+ R6xx_OMOD_shift = 6,
+ R7xx_OMOD_shift = 5,
+ R6xx_SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8,
+ R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7,
+ R6xx_SQ_ALU_WORD1_OP2__ALU_INST_shift = 8,
+ R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7,
+ R7xx_SQ_OP2_INST_FREXP_64 = 0x07,
+ R7xx_SQ_OP2_INST_ADD_64 = 0x17,
+ R7xx_SQ_OP2_INST_MUL_64 = 0x1b,
+ R7xx_SQ_OP2_INST_FLT64_TO_FLT32 = 0x1c,
+ R7xx_SQ_OP2_INST_FLT32_TO_FLT64 = 0x1d,
+ R7xx_SQ_OP2_INST_LDEXP_64 = 0x7a,
+ R7xx_SQ_OP2_INST_FRACT_64 = 0x7b,
+ R7xx_SQ_OP2_INST_PRED_SETGT_64 = 0x7c,
+ R7xx_SQ_OP2_INST_PRED_SETE_64 = 0x7d,
+ R7xx_SQ_OP2_INST_PRED_SETGE_64 = 0x7e,
+// SQ_ALU_WORD1_OP3 = 0x00008dfc,
+// SRC2_SEL_mask = 0x1ff << 0,
+// R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4,
+// R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5,
+// R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6,
+// R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7,
+// SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13,
+ R7xx_SQ_OP3_INST_MULADD_64 = 0x08,
+ R7xx_SQ_OP3_INST_MULADD_64_M2 = 0x09,
+ R7xx_SQ_OP3_INST_MULADD_64_M4 = 0x0a,
+ R7xx_SQ_OP3_INST_MULADD_64_D2 = 0x0b,
+// SQ_CF_ALU_WORD1 = 0x00008dfc,
+ R6xx_USES_WATERFALL_bit = 1 << 25,
+ R7xx_SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25,
+// SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc,
+// ARRAY_BASE_mask = 0x1fff << 0,
+// TYPE_mask = 0x03 << 13,
+// SQ_EXPORT_PARAM = 0x02,
+// X_UNUSED_FOR_SX_EXPORTS = 0x03,
+// ELEM_SIZE_mask = 0x03 << 30,
+// SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc,
+// SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23,
+ R7xx_SQ_CF_INST_MEM_EXPORT = 0x3a,
+// SQ_CF_WORD1 = 0x00008dfc,
+// SQ_CF_WORD1__COUNT_mask = 0x07 << 10,
+ R7xx_COUNT_3_bit = 1 << 19,
+// SQ_CF_WORD1__CF_INST_mask = 0x7f << 23,
+ R7xx_SQ_CF_INST_END_PROGRAM = 0x19,
+ R7xx_SQ_CF_INST_WAIT_ACK = 0x1a,
+ R7xx_SQ_CF_INST_TEX_ACK = 0x1b,
+ R7xx_SQ_CF_INST_VTX_ACK = 0x1c,
+ R7xx_SQ_CF_INST_VTX_TC_ACK = 0x1d,
+// SQ_VTX_WORD0 = 0x00008dfc,
+// VTX_INST_mask = 0x1f << 0,
+ R7xx_SQ_VTX_INST_MEM = 0x02,
+// SQ_VTX_WORD2 = 0x00008dfc,
+ R7xx_SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20,
+
+// SQ_TEX_WORD0 = 0x00008dfc,
+// TEX_INST_mask = 0x1f << 0,
+ R7xx_X_MEMORY_READ = 0x02,
+ R7xx_SQ_TEX_INST_KEEP_GRADIENTS = 0x0a,
+ R7xx_X_FETCH4_LOAD4_INSTRUCTION_FOR_DX10_1 = 0x0f,
+ R7xx_SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24,
+
+ R7xx_PA_SC_EDGERULE = 0x00028230,
+ R7xx_SPI_THREAD_GROUPING = 0x000286c8,
+ PS_GROUPING_mask = 0x1f << 0,
+ PS_GROUPING_shift = 0,
+ VS_GROUPING_mask = 0x1f << 8,
+ VS_GROUPING_shift = 8,
+ GS_GROUPING_mask = 0x1f << 16,
+ GS_GROUPING_shift = 16,
+ ES_GROUPING_mask = 0x1f << 24,
+ ES_GROUPING_shift = 24,
+ R7xx_CB_SHADER_CONTROL = 0x000287a0,
+ RT0_ENABLE_bit = 1 << 0,
+ RT1_ENABLE_bit = 1 << 1,
+ RT2_ENABLE_bit = 1 << 2,
+ RT3_ENABLE_bit = 1 << 3,
+ RT4_ENABLE_bit = 1 << 4,
+ RT5_ENABLE_bit = 1 << 5,
+ RT6_ENABLE_bit = 1 << 6,
+ RT7_ENABLE_bit = 1 << 7,
+// DB_ALPHA_TO_MASK = 0x00028d44,
+ R7xx_OFFSET_ROUND_bit = 1 << 16,
+// SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c,
+ R7xx_TRUNCATE_COORD_bit = 1 << 9,
+ R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10,
+
+} ;
+
+#endif /* _R600_REG_R7xx_H_ */
diff --git a/src/r600_shader.h b/src/r600_shader.h
new file mode 100644
index 00000000..58f5a528
--- /dev/null
+++ b/src/r600_shader.h
@@ -0,0 +1,346 @@
+/*
+ * RadeonHD R6xx, R7xx DRI driver
+ *
+ * Copyright (C) 2008-2009 Alexander Deucher
+ * Copyright (C) 2008-2009 Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Shader macros
+ */
+
+#ifndef __SHADER_H__
+#define __SHADER_H__
+
+
+/* Restrictions of ALU instructions
+ * order of scalar ops is always x,y,z,w,t(rans), last to be indicated by last==1.
+ * max of 3 different src GPRs per instr.
+ * max of 4 different cfile constant components per instr.
+ * max of 2 (different) constants (any type) for t.
+ * bank swizzle (see below).
+ * GPR write stalls read of same register. Auto-replaced by PV/PS, NOP needed if registers are relative to
+ * different indices (gpr,loop,nothing).
+ * may use constant registers or constant cache, but not both.
+ */
+
+/* Bank_swizzle: (pp. 297ff)
+ * Only one of each x,y,z,w GPR component can be loaded per cycle (3 cycles per instr, called 0-2).
+ * per scalar instruction bank_swizzle can select which cycle each operand comes from. e.g.:
+ * SRC0 SRC1 SRC2 SWIZZLE cycle0 cycle1 cycle2
+ * 1.x 2.x 012 1.x 2.x -
+ * 3.x 1.y 201 1.y - 3.x
+ * 2.x 1.y 102 (1.y) (2.x) -
+ * If data is read in a cycle, multiple scalar instructions can reference it.
+ * Special case: square() - i.e. same component in src0+src1 doesn't need read port -> ignores swizzle for src1.
+ * No restrictions for constants or PV/PS.
+ * t can load multiple components in a single cycle slot, but has to share cycles with xyzw.
+ * t with single constant may not load GPRs or PV/PS in cycle 0 (carefull with ALU_TRANS_210).
+ * t with two constants may only load GPRs or PV/PS in cycle 2.
+ */
+
+
+/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */
+
+
+// CF insts
+// addr
+#define ADDR(x) (x)
+// pc
+#define POP_COUNT(x) (x)
+// const
+#define CF_CONST(x) (x)
+// cond
+#define COND(x) (x) // SQ_COND_*
+// count
+#define I_COUNT(x) ((x) ? ((x) - 1) : 0)
+//r7xx
+#define COUNT_3(x) (x)
+// call count
+#define CALL_COUNT(x) (x)
+// eop
+#define END_OF_PROGRAM(x) (x)
+// vpm
+#define VALID_PIXEL_MODE(x) (x)
+// cf inst
+#define CF_INST(x) (x) // SQ_CF_INST_*
+
+// wqm
+#define WHOLE_QUAD_MODE(x) (x)
+// barrier
+#define BARRIER(x) (x)
+//kb0
+#define KCACHE_BANK0(x) (x)
+//kb1
+#define KCACHE_BANK1(x) (x)
+// km0/1
+#define KCACHE_MODE0(x) (x)
+#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_*
+//
+#define KCACHE_ADDR0(x) (x)
+#define KCACHE_ADDR1(x) (x)
+// uw
+#define USES_WATERFALL(x) (x)
+
+#define ARRAY_BASE(x) (x)
+// export pixel
+#define CF_PIXEL_MRT0 0
+#define CF_PIXEL_MRT1 1
+#define CF_PIXEL_MRT2 2
+#define CF_PIXEL_MRT3 3
+#define CF_PIXEL_MRT4 4
+#define CF_PIXEL_MRT5 5
+#define CF_PIXEL_MRT6 6
+#define CF_PIXEL_MRT7 7
+// *_FOG: r6xx only
+#define CF_PIXEL_MRT0_FOG 16
+#define CF_PIXEL_MRT1_FOG 17
+#define CF_PIXEL_MRT2_FOG 18
+#define CF_PIXEL_MRT3_FOG 19
+#define CF_PIXEL_MRT4_FOG 20
+#define CF_PIXEL_MRT5_FOG 21
+#define CF_PIXEL_MRT6_FOG 22
+#define CF_PIXEL_MRT7_FOG 23
+#define CF_PIXEL_Z 61
+// export pos
+#define CF_POS0 60
+#define CF_POS1 61
+#define CF_POS2 62
+#define CF_POS3 63
+// export param
+// 0...31
+#define TYPE(x) (x) // SQ_EXPORT_*
+#if 0
+// type export
+#define SQ_EXPORT_PIXEL 0
+#define SQ_EXPORT_POS 1
+#define SQ_EXPORT_PARAM 2
+// reserved 3
+// type mem
+#define SQ_EXPORT_WRITE 0
+#define SQ_EXPORT_WRITE_IND 1
+#define SQ_EXPORT_WRITE_ACK 2
+#define SQ_EXPORT_WRITE_IND_ACK 3
+#endif
+
+#define RW_GPR(x) (x)
+#define RW_REL(x) (x)
+#define ABSOLUTE 0
+#define RELATIVE 1
+#define INDEX_GPR(x) (x)
+#define ELEM_SIZE(x) (x ? (x - 1) : 0)
+#define COMP_MASK(x) (x)
+#define R6xx_ELEM_LOOP(x) (x)
+#define BURST_COUNT(x) (x ? (x - 1) : 0)
+
+// swiz
+#define SRC_SEL_X(x) (x) // SQ_SEL_* each
+#define SRC_SEL_Y(x) (x)
+#define SRC_SEL_Z(x) (x)
+#define SRC_SEL_W(x) (x)
+
+#define CF_DWORD0(addr) (addr)
+// R7xx has another entry (COUNT3), but that is only used for adding a bit to count.
+// We allow one more bit for count in the argument of the macro on R7xx instead.
+// R6xx: [0,7] R7xx: [1,16]
+#define CF_DWORD1(pc, cf_const, cond, count, call_count, eop, vpm, cf_inst, wqm, b) \
+ (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \
+ ((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31))
+
+#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))
+#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, uw, cf_inst, wqm, b) \
+ (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
+ ((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))
+
+#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
+ (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \
+ ((es) << 30))
+// R7xx apparently doesn't have the ELEM_LOOP entry any more
+// We still expose it, but ELEM_LOOP is explicitely R6xx now.
+// TODO: is this just forgotten in the docs, or really not available any more?
+#define CF_ALLOC_IMP_EXP_DWORD1_BUF(array_size, comp_mask, el, bc, eop, vpm, cf_inst, wqm, b) \
+ (((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \
+ ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31))
+#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, el, bc, eop, vpm, cf_inst, wqm, b) \
+ (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \
+ ((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \
+ ((wqm) << 30) | ((b) << 31))
+
+// ALU clause insts
+#define SRC0_SEL(x) (x)
+#define SRC1_SEL(x) (x)
+#define SRC2_SEL(x) (x)
+// src[0-2]_sel
+// 0-127 GPR
+// 128-159 kcache constants bank 0
+// 160-191 kcache constants bank 1
+// 248-255 special SQ_ALU_SRC_* (0, 1, etc.)
+
+#define SRC0_REL(x) (x)
+#define SRC1_REL(x) (x)
+#define SRC2_REL(x) (x)
+// elem
+#define SRC0_ELEM(x) (x)
+#define SRC1_ELEM(x) (x)
+#define SRC2_ELEM(x) (x)
+#define ELEM_X 0
+#define ELEM_Y 1
+#define ELEM_Z 2
+#define ELEM_W 3
+// neg
+#define SRC0_NEG(x) (x)
+#define SRC1_NEG(x) (x)
+#define SRC2_NEG(x) (x)
+// im
+#define INDEX_MODE(x) (x) // SQ_INDEX_*
+// ps
+#define PRED_SEL(x) (x) // SQ_PRED_SEL_*
+// last
+#define LAST(x) (x)
+// abs
+#define SRC0_ABS(x) (x)
+#define SRC1_ABS(x) (x)
+// uem
+#define UPDATE_EXECUTE_MASK(x) (x)
+// up
+#define UPDATE_PRED(x) (x)
+// wm
+#define WRITE_MASK(x) (x)
+// fm
+#define FOG_MERGE(x) (x)
+// omod
+#define OMOD(x) (x) // SQ_ALU_OMOD_*
+// alu inst
+#define ALU_INST(x) (x) // SQ_ALU_INST_*
+//bs
+#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_*
+#define DST_GPR(x) (x)
+#define DST_REL(x) (x)
+#define DST_ELEM(x) (x)
+#define CLAMP(x) (x)
+
+#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
+ (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
+ ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
+ ((im) << 26) | ((ps) << 29) | ((last) << 31))
+// R7xx has alu_inst at a different slot, and no fog merge any more (no fix function fog any more)
+#define R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
+ ((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \
+ ((dr) << 28) | ((de) << 29) | ((clamp) << 31))
+#define R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
+ ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
+ ((dr) << 28) | ((de) << 29) | ((clamp) << 31))
+// This is a general chipset macro, but due to selection by chipid typically not usable in static arrays
+// Fog is NOT USED on R7xx, even if specified.
+#define ALU_DWORD1_OP2(chipfamily, s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ ((chipfamily) < CHIP_FAMILY_RV770 ? \
+ R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) : \
+ R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp))
+#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
+ (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
+ ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
+ ((de) << 29) | ((clamp) << 31))
+
+// VTX clause insts
+// vxt insts
+#define VTX_INST(x) (x) // SQ_VTX_INST_*
+
+// fetch type
+#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_*
+
+#define FETCH_WHOLE_QUAD(x) (x)
+#define BUFFER_ID(x) (x)
+#define SRC_GPR(x) (x)
+#define SRC_REL(x) (x)
+#define MEGA_FETCH_COUNT(x) ((x) ? ((x) - 1) : 0)
+
+#define SEMANTIC_ID(x) (x)
+#define DST_SEL_X(x) (x)
+#define DST_SEL_Y(x) (x)
+#define DST_SEL_Z(x) (x)
+#define DST_SEL_W(x) (x)
+#define USE_CONST_FIELDS(x) (x)
+#define DATA_FORMAT(x) (x)
+// num format
+#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_*
+// format comp
+#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_*
+// sma
+#define SRF_MODE_ALL(x) (x)
+#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0
+#define SRF_MODE_NO_ZERO 1
+#define OFFSET(x) (x)
+// endian swap
+#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_*
+#define CONST_BUF_NO_STRIDE(x) (x)
+// mf
+#define MEGA_FETCH(x) (x)
+
+#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \
+ (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
+ ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26))
+#define VTX_DWORD1_SEM(semantic_id, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
+ (((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+ ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))
+#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
+ (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+ ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))
+#define VTX_DWORD2(offset, es, cbns, mf) \
+ (((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19))
+#define VTX_DWORD_PAD 0x00000000
+
+// TEX clause insts
+// tex insts
+#define TEX_INST(x) (x) // SQ_TEX_INST_*
+
+#define BC_FRAC_MODE(x) (x)
+#define FETCH_WHOLE_QUAD(x) (x)
+#define RESOURCE_ID(x) (x)
+#define R7xx_ALT_CONST(x) (x)
+
+#define LOD_BIAS(x) (x)
+//ct
+#define COORD_TYPE_X(x) (x)
+#define COORD_TYPE_Y(x) (x)
+#define COORD_TYPE_Z(x) (x)
+#define COORD_TYPE_W(x) (x)
+#define TEX_UNNORMALIZED 0
+#define TEX_NORMALIZED 1
+#define OFFSET_X(x) (x)
+#define OFFSET_Y(x) (x)
+#define OFFSET_Z(x) (x)
+#define SAMPLER_ID(x) (x)
+
+// R7xx has an additional parameter ALT_CONST. We always expose it, but ALT_CONST is R7xx only
+#define TEX_DWORD0(tex_inst, bfm, fwq, resource_id, src_gpr, sr, ac) \
+ (((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
+ ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24))
+#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
+ (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+ ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))
+#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
+ (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
+ ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))
+#define TEX_DWORD_PAD 0x00000000
+
+
+#endif
diff --git a/src/r600_state.h b/src/r600_state.h
new file mode 100644
index 00000000..9efd557c
--- /dev/null
+++ b/src/r600_state.h
@@ -0,0 +1,229 @@
+#ifndef __R600_STATE_H__
+#define __R600_STATE_H__
+
+#include "xf86drm.h"
+
+typedef int bool_t;
+
+/* seriously ?! @#$%% */
+# define uint32_t CARD32
+# define uint64_t CARD64
+
+#define CLEAR(x) memset (&x, 0, sizeof(x))
+
+/* Sequencer / thread handling */
+typedef struct {
+ int ps_prio;
+ int vs_prio;
+ int gs_prio;
+ int es_prio;
+ int num_ps_gprs;
+ int num_vs_gprs;
+ int num_gs_gprs;
+ int num_es_gprs;
+ int num_temp_gprs;
+ int num_ps_threads;
+ int num_vs_threads;
+ int num_gs_threads;
+ int num_es_threads;
+ int num_ps_stack_entries;
+ int num_vs_stack_entries;
+ int num_gs_stack_entries;
+ int num_es_stack_entries;
+} sq_config_t;
+
+/* Color buffer / render target */
+typedef struct {
+ int id;
+ int w;
+ int h;
+ uint64_t base;
+ int format;
+ int endian;
+ int array_mode; // tiling
+ int number_type;
+ int read_size;
+ int comp_swap;
+ int tile_mode;
+ int blend_clamp;
+ int clear_color;
+ int blend_bypass;
+ int blend_float32;
+ int simple_float;
+ int round_mode;
+ int tile_compact;
+ int source_format;
+} cb_config_t;
+
+/* Depth buffer */
+typedef struct {
+ int w;
+ int h;
+ uint64_t base;
+ int format;
+ int read_size;
+ int array_mode; // tiling
+ int tile_surface_en;
+ int tile_compact;
+ int zrange_precision;
+} db_config_t;
+
+/* Shader */
+typedef struct {
+ uint64_t shader_addr;
+ int num_gprs;
+ int stack_size;
+ int dx10_clamp;
+ int prime_cache_pgm_en;
+ int prime_cache_on_draw;
+ int fetch_cache_lines;
+ int prime_cache_en;
+ int prime_cache_on_const;
+ int clamp_consts;
+ int export_mode;
+ int uncached_first_inst;
+} shader_config_t;
+
+/* Vertex buffer / vtx resource */
+typedef struct {
+ int id;
+ uint64_t vb_addr;
+ uint32_t vtx_num_entries;
+ uint32_t vtx_size_dw;
+ int clamp_x;
+ int format;
+ int num_format_all;
+ int format_comp_all;
+ int srf_mode_all;
+ int endian;
+ int mem_req_size;
+} vtx_resource_t;
+
+/* Texture resource */
+typedef struct {
+ int id;
+ int w;
+ int h;
+ int pitch;
+ int depth;
+ int dim;
+ int tile_mode;
+ int tile_type;
+ int format;
+ uint64_t base;
+ uint64_t mip_base;
+ int format_comp_x;
+ int format_comp_y;
+ int format_comp_z;
+ int format_comp_w;
+ int num_format_all;
+ int srf_mode_all;
+ int force_degamma;
+ int endian;
+ int request_size;
+ int dst_sel_x;
+ int dst_sel_y;
+ int dst_sel_z;
+ int dst_sel_w;
+ int base_level;
+ int last_level;
+ int base_array;
+ int last_array;
+ int mpeg_clamp;
+ int perf_modulation;
+ int interlaced;
+} tex_resource_t;
+
+/* Texture sampler */
+typedef struct {
+ int id;
+ /* Clamping */
+ int clamp_x, clamp_y, clamp_z;
+ int border_color;
+ /* Filtering */
+ int xy_mag_filter, xy_min_filter;
+ int z_filter;
+ int mip_filter;
+ bool_t high_precision_filter; /* ? */
+ int perf_mip; /* ? 0-7 */
+ int perf_z; /* ? 3 */
+ /* LoD selection */
+ int min_lod, max_lod; /* 0-0x3ff */
+ int lod_bias; /* 0-0xfff (signed?) */
+ int lod_bias2; /* ? 0-0xfff (signed?) */
+ bool_t lod_uses_minor_axis; /* ? */
+ /* Other stuff */
+ bool_t point_sampling_clamp; /* ? */
+ bool_t tex_array_override; /* ? */
+ bool_t mc_coord_truncate; /* ? */
+ bool_t force_degamma; /* ? */
+ bool_t fetch_4; /* ? */
+ bool_t sample_is_pcf; /* ? */
+ bool_t type; /* ? */
+ int depth_compare; /* only depth textures? */
+ int chroma_key;
+} tex_sampler_t;
+
+/* Draw command */
+typedef struct {
+ uint32_t prim_type;
+ uint32_t vgt_draw_initiator;
+ uint32_t index_type;
+ uint32_t num_instances;
+ uint32_t num_indices;
+} draw_config_t;
+
+inline void e32(drmBufPtr ib, uint32_t dword);
+inline void efloat(drmBufPtr ib, float f);
+inline void pack3(drmBufPtr ib, int cmd, unsigned num);
+inline void pack0 (drmBufPtr ib, uint32_t reg, int num);
+inline void ereg (drmBufPtr ib, uint32_t reg, uint32_t val);
+void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib);
+void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib);
+
+uint64_t
+upload (ScrnInfoPtr pScrn, void *shader, int size, int offset);
+void
+wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib);
+void
+wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib);
+void
+start_3d(ScrnInfoPtr pScrn, drmBufPtr ib);
+void
+set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf);
+void
+cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr);
+void
+cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, int crtc, int start, int stop, Bool enable);
+void
+fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf);
+void
+vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf);
+void
+ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf);
+void
+set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf);
+void
+set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res);
+void
+set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res);
+void
+set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s);
+void
+set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
+void
+set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2);
+void
+set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
+void
+set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
+void
+set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2);
+void
+set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib);
+void
+draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices);
+void
+draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf);
+
+#endif
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
new file mode 100644
index 00000000..f03fb7df
--- /dev/null
+++ b/src/r600_textured_videofuncs.c
@@ -0,0 +1,521 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Alex Deucher <alexander.deucher@amd.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include "exa.h"
+
+#include "radeon.h"
+#include "r600_shader.h"
+#include "r600_reg.h"
+#include "r600_state.h"
+
+#include "radeon_video.h"
+
+#include <X11/extensions/Xv.h>
+#include "fourcc.h"
+
+#include "damage.h"
+
+static void
+R600DoneTexturedVideo(ScrnInfoPtr pScrn)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ draw_config_t draw_conf;
+ vtx_resource_t vtx_res;
+
+ CLEAR (draw_conf);
+ CLEAR (vtx_res);
+
+ if (accel_state->vb_index == 0) {
+ R600IBDiscard(pScrn, accel_state->ib);
+ return;
+ }
+
+ accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
+ (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
+ accel_state->vb_size = accel_state->vb_index * 16;
+
+ /* flush vertex cache */
+ if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
+ (info->ChipFamily == CHIP_FAMILY_RV620) ||
+ (info->ChipFamily == CHIP_FAMILY_RS780) ||
+ (info->ChipFamily == CHIP_FAMILY_RV710))
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
+ accel_state->vb_size, accel_state->vb_mc_addr);
+ else
+ cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
+ accel_state->vb_size, accel_state->vb_mc_addr);
+
+ /* Vertex buffer setup */
+ vtx_res.id = SQ_VTX_RESOURCE_vs;
+ vtx_res.vtx_size_dw = 16 / 4;
+ vtx_res.vtx_num_entries = accel_state->vb_size / 4;
+ vtx_res.mem_req_size = 1;
+ vtx_res.vb_addr = accel_state->vb_mc_addr;
+ set_vtx_resource (pScrn, accel_state->ib, &vtx_res);
+
+ draw_conf.prim_type = DI_PT_RECTLIST;
+ draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+ draw_conf.num_instances = 1;
+ draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
+ draw_conf.index_type = DI_INDEX_SIZE_16_BIT;
+
+ draw_auto(pScrn, accel_state->ib, &draw_conf);
+
+ wait_3d_idle_clean(pScrn, accel_state->ib);
+
+ /* sync destination surface */
+ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
+ accel_state->dst_size, accel_state->dst_mc_addr);
+
+ R600CPFlushIndirect(pScrn, accel_state->ib);
+}
+
+void
+R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ PixmapPtr pPixmap = pPriv->pPixmap;
+ BoxPtr pBox = REGION_RECTS(&pPriv->clip);
+ int nBox = REGION_NUM_RECTS(&pPriv->clip);
+ int dstxoff, dstyoff;
+ cb_config_t cb_conf;
+ tex_resource_t tex_res;
+ tex_sampler_t tex_samp;
+ shader_config_t vs_conf, ps_conf;
+ int uv_offset;
+
+ static float ps_alu_consts[] = {
+ 1.0, 0.0, 1.4020, 0, // r - c[0]
+ 1.0, -0.34414, -0.71414, 0, // g - c[1]
+ 1.0, 1.7720, 0.0, 0, // b - c[2]
+ /* Constants for undoing Y'CbCr scaling
+ * - Y' is scaled from 16:235
+ * - Cb/Cr are scaled from 16:240
+ * Unscaled value N' = N * N_mul + N_shift (N' in range [-0.5, 0.5])
+ * Vector is [Y_mul, Y_shfit, C_mul, C_shift]
+ */
+ 256.0/219.0, -16.0/219.0, 256.0/224.0, -128.0/224.0,
+ };
+
+ CLEAR (cb_conf);
+ CLEAR (tex_res);
+ CLEAR (tex_samp);
+ CLEAR (vs_conf);
+ CLEAR (ps_conf);
+
+ accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
+ accel_state->src_pitch[0] = pPriv->src_pitch;
+
+ // bad pitch
+ if (accel_state->src_pitch[0] & 7)
+ return;
+ if (accel_state->dst_pitch & 7)
+ return;
+
+#ifdef COMPOSITE
+ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
+ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
+#else
+ dstxoff = 0;
+ dstyoff = 0;
+#endif
+
+ accel_state->ib = RADEONCPGetBuffer(pScrn);
+
+ /* Init */
+ start_3d(pScrn, accel_state->ib);
+
+ //cp_set_surface_sync(pScrn, accel_state->ib);
+
+ set_default_state(pScrn, accel_state->ib);
+
+ /* Scissor / viewport */
+ ereg (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
+ ereg (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+
+ accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+ accel_state->xv_vs_offset;
+
+ switch(pPriv->id) {
+ case FOURCC_YV12:
+ case FOURCC_I420:
+ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+ accel_state->xv_ps_offset_planar;
+ break;
+ case FOURCC_UYVY:
+ case FOURCC_YUY2:
+ default:
+ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+ accel_state->xv_ps_offset_packed;
+ break;
+ }
+
+ accel_state->vs_size = 512;
+ accel_state->ps_size = 512;
+
+ /* Shader */
+
+ /* flush SQ cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
+ accel_state->vs_size, accel_state->vs_mc_addr);
+
+ vs_conf.shader_addr = accel_state->vs_mc_addr;
+ vs_conf.num_gprs = 2;
+ vs_conf.stack_size = 0;
+ vs_setup (pScrn, accel_state->ib, &vs_conf);
+
+ /* flush SQ cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
+ accel_state->ps_size, accel_state->ps_mc_addr);
+
+ ps_conf.shader_addr = accel_state->ps_mc_addr;
+ ps_conf.num_gprs = 3;
+ ps_conf.stack_size = 0;
+ ps_conf.uncached_first_inst = 1;
+ ps_conf.clamp_consts = 0;
+ ps_conf.export_mode = 2;
+ ps_setup (pScrn, accel_state->ib, &ps_conf);
+
+ // PS alu constants
+ set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+
+ /* Texture */
+ switch(pPriv->id) {
+ case FOURCC_YV12:
+ case FOURCC_I420:
+ accel_state->src_mc_addr[0] = pPriv->src_offset;
+ accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h;
+
+ /* flush texture cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0],
+ accel_state->src_mc_addr[0]);
+
+ // Y texture
+ tex_res.id = 0;
+ tex_res.w = pPriv->w;
+ tex_res.h = pPriv->h;
+ tex_res.pitch = accel_state->src_pitch[0];
+ tex_res.depth = 0;
+ tex_res.dim = SQ_TEX_DIM_2D;
+ tex_res.base = accel_state->src_mc_addr[0];
+ tex_res.mip_base = accel_state->src_mc_addr[0];
+
+ tex_res.format = FMT_8;
+ tex_res.dst_sel_x = SQ_SEL_X; //Y
+ tex_res.dst_sel_y = SQ_SEL_1;
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+
+ tex_res.request_size = 1;
+ tex_res.base_level = 0;
+ tex_res.last_level = 0;
+ tex_res.perf_modulation = 0;
+ tex_res.interlaced = 0;
+ set_tex_resource (pScrn, accel_state->ib, &tex_res);
+
+ // Y sampler
+ tex_samp.id = 0;
+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_z = SQ_TEX_WRAP;
+
+ // xxx: switch to bicubic
+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR;
+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR;
+
+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
+ tex_samp.mip_filter = 0; /* no mipmap */
+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+
+ // U or V texture
+ uv_offset = accel_state->src_pitch[0] * pPriv->h;
+ uv_offset = (uv_offset + 255) & ~255;
+
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
+ accel_state->src_size[0] / 4,
+ accel_state->src_mc_addr[0] + uv_offset);
+
+ tex_res.id = 1;
+ tex_res.format = FMT_8;
+ tex_res.w = pPriv->w >> 1;
+ tex_res.h = pPriv->h >> 1;
+ tex_res.pitch = accel_state->src_pitch[0] >> 1;
+ tex_res.dst_sel_x = SQ_SEL_X; //V or U
+ tex_res.dst_sel_y = SQ_SEL_1;
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+ tex_res.interlaced = 0;
+ // XXX tex bases need to be 256B aligned
+ tex_res.base = accel_state->src_mc_addr[0] + uv_offset;
+ tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset;
+ set_tex_resource (pScrn, accel_state->ib, &tex_res);
+
+ // U or V sampler
+ tex_samp.id = 1;
+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+
+ // U or V texture
+ uv_offset += ((accel_state->src_pitch[0] >> 1) * (pPriv->h >> 1));
+ uv_offset = (uv_offset + 255) & ~255;
+
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
+ accel_state->src_size[0] / 4,
+ accel_state->src_mc_addr[0] + uv_offset);
+
+ tex_res.id = 2;
+ tex_res.format = FMT_8;
+ tex_res.w = pPriv->w >> 1;
+ tex_res.h = pPriv->h >> 1;
+ tex_res.pitch = accel_state->src_pitch[0] >> 1;
+ tex_res.dst_sel_x = SQ_SEL_X; //V or U
+ tex_res.dst_sel_y = SQ_SEL_1;
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+ tex_res.interlaced = 0;
+ // XXX tex bases need to be 256B aligned
+ tex_res.base = accel_state->src_mc_addr[0] + uv_offset;
+ tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset;
+ set_tex_resource (pScrn, accel_state->ib, &tex_res);
+
+ // UV sampler
+ tex_samp.id = 2;
+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+ break;
+ case FOURCC_UYVY:
+ case FOURCC_YUY2:
+ default:
+ accel_state->src_mc_addr[0] = pPriv->src_offset;
+ accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h;
+
+ /* flush texture cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0],
+ accel_state->src_mc_addr[0]);
+
+ // Y texture
+ tex_res.id = 0;
+ tex_res.w = pPriv->w;
+ tex_res.h = pPriv->h;
+ tex_res.pitch = accel_state->src_pitch[0] >> 1;
+ tex_res.depth = 0;
+ tex_res.dim = SQ_TEX_DIM_2D;
+ tex_res.base = accel_state->src_mc_addr[0];
+ tex_res.mip_base = accel_state->src_mc_addr[0];
+
+ tex_res.format = FMT_8_8;
+ if (pPriv->id == FOURCC_UYVY)
+ tex_res.dst_sel_x = SQ_SEL_Y; //Y
+ else
+ tex_res.dst_sel_x = SQ_SEL_X; //Y
+ tex_res.dst_sel_y = SQ_SEL_1;
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+
+ tex_res.request_size = 1;
+ tex_res.base_level = 0;
+ tex_res.last_level = 0;
+ tex_res.perf_modulation = 0;
+ tex_res.interlaced = 0;
+ set_tex_resource (pScrn, accel_state->ib, &tex_res);
+
+ // Y sampler
+ tex_samp.id = 0;
+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_z = SQ_TEX_WRAP;
+
+ // xxx: switch to bicubic
+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR;
+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR;
+
+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
+ tex_samp.mip_filter = 0; /* no mipmap */
+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+
+ // UV texture
+ tex_res.id = 1;
+ tex_res.format = FMT_8_8_8_8;
+ tex_res.w = pPriv->w >> 1;
+ tex_res.h = pPriv->h;
+ tex_res.pitch = accel_state->src_pitch[0] >> 2;
+ if (pPriv->id == FOURCC_UYVY) {
+ tex_res.dst_sel_x = SQ_SEL_X; //V
+ tex_res.dst_sel_y = SQ_SEL_Z; //U
+ } else {
+ tex_res.dst_sel_x = SQ_SEL_Y; //V
+ tex_res.dst_sel_y = SQ_SEL_W; //U
+ }
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+ tex_res.interlaced = 0;
+ // XXX tex bases need to be 256B aligned
+ tex_res.base = accel_state->src_mc_addr[0];
+ tex_res.mip_base = accel_state->src_mc_addr[0];
+ set_tex_resource (pScrn, accel_state->ib, &tex_res);
+
+ // UV sampler
+ tex_samp.id = 1;
+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+ break;
+ }
+
+ /* Render setup */
+ ereg (accel_state->ib, CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift));
+ ereg (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
+ ereg (accel_state->ib, CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */
+
+ cb_conf.id = 0;
+
+ accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
+
+ cb_conf.w = accel_state->dst_pitch;
+ cb_conf.h = pPixmap->drawable.height;
+ cb_conf.base = accel_state->dst_mc_addr;
+
+ switch (pPixmap->drawable.bitsPerPixel) {
+ case 16:
+ if (pPixmap->drawable.depth == 15) {
+ cb_conf.format = COLOR_1_5_5_5;
+ cb_conf.comp_swap = 1; //ARGB
+ } else {
+ cb_conf.format = COLOR_5_6_5;
+ cb_conf.comp_swap = 2; //RGB
+ }
+ break;
+ case 32:
+ cb_conf.format = COLOR_8_8_8_8;
+ cb_conf.comp_swap = 1; //ARGB
+ break;
+ default:
+ return;
+ }
+
+ cb_conf.source_format = 1;
+ cb_conf.blend_clamp = 1;
+ set_render_target(pScrn, accel_state->ib, &cb_conf);
+
+ ereg (accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
+ (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
+ ereg (accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */
+ DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+ /* Interpolator setup */
+ // export tex coords from VS
+ ereg (accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
+ ereg (accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
+
+ /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+ * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+ ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift)));
+ ereg (accel_state->ib, SPI_PS_IN_CONTROL_1, 0);
+ ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) |
+ (0x03 << DEFAULT_VAL_shift) |
+ SEL_CENTROID_bit));
+ ereg (accel_state->ib, SPI_INTERP_CONTROL_0, 0);
+
+
+ cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap,
+ radeon_covering_crtc_num(pScrn,
+ pPriv->drw_x,
+ pPriv->drw_x + pPriv->dst_w,
+ pPriv->drw_y,
+ pPriv->drw_y + pPriv->dst_h,
+ pPriv->desired_crtc),
+ pPriv->drw_y,
+ pPriv->drw_y + pPriv->dst_h,
+ pPriv->vsync);
+
+
+ accel_state->vb_index = 0;
+
+ while (nBox--) {
+ int srcX, srcY, srcw, srch;
+ int dstX, dstY, dstw, dsth;
+ struct r6xx_copy_vertex *xv_vb;
+ struct r6xx_copy_vertex vertex[3];
+
+ if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
+ R600DoneTexturedVideo(pScrn);
+ accel_state->vb_index = 0;
+ accel_state->ib = RADEONCPGetBuffer(pScrn);
+ }
+
+ xv_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2));
+
+ dstX = pBox->x1 + dstxoff;
+ dstY = pBox->y1 + dstyoff;
+ dstw = pBox->x2 - pBox->x1;
+ dsth = pBox->y2 - pBox->y1;
+
+ srcX = ((pBox->x1 - pPriv->drw_x) *
+ pPriv->src_w) / pPriv->dst_w;
+ srcY = ((pBox->y1 - pPriv->drw_y) *
+ pPriv->src_h) / pPriv->dst_h;
+
+ srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
+ srch = (pPriv->src_h * dsth) / pPriv->dst_h;
+
+ vertex[0].x = (float)dstX;
+ vertex[0].y = (float)dstY;
+ vertex[0].s = (float)srcX / pPriv->w;
+ vertex[0].t = (float)srcY / pPriv->h;
+
+ vertex[1].x = (float)dstX;
+ vertex[1].y = (float)(dstY + dsth);
+ vertex[1].s = (float)srcX / pPriv->w;
+ vertex[1].t = (float)(srcY + srch) / pPriv->h;
+
+ vertex[2].x = (float)(dstX + dstw);
+ vertex[2].y = (float)(dstY + dsth);
+ vertex[2].s = (float)(srcX + srcw) / pPriv->w;
+ vertex[2].t = (float)(srcY + srch) / pPriv->h;
+
+#if 0
+ ErrorF("vertex 0: %f, %f, %f, %f\n", vertex[0].x, vertex[0].y, vertex[0].s, vertex[0].t);
+ ErrorF("vertex 1: %f, %f, %f, %f\n", vertex[1].x, vertex[1].y, vertex[1].s, vertex[1].t);
+ ErrorF("vertex 2: %f, %f, %f, %f\n", vertex[2].x, vertex[2].y, vertex[2].s, vertex[2].t);
+#endif
+
+ // append to vertex buffer
+ xv_vb[accel_state->vb_index++] = vertex[0];
+ xv_vb[accel_state->vb_index++] = vertex[1];
+ xv_vb[accel_state->vb_index++] = vertex[2];
+
+ pBox++;
+ }
+
+ R600DoneTexturedVideo(pScrn);
+
+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
+}
diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c
new file mode 100644
index 00000000..267a7b01
--- /dev/null
+++ b/src/r6xx_accel.c
@@ -0,0 +1,1160 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Alex Deucher <alexander.deucher@amd.com>
+ * Matthias Hopf <mhopf@suse.de>
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include <errno.h>
+
+#include "radeon.h"
+#include "r600_shader.h"
+#include "radeon_reg.h"
+#include "r600_reg.h"
+#include "r600_state.h"
+
+#include "xf86drm.h"
+#include "radeon_drm.h"
+
+/* Emit uint32_t */
+inline void e32(drmBufPtr ib, uint32_t dword)
+{
+ uint32_t *ib_head = (pointer)(char*)ib->address;
+
+ ib_head[ib->used>>2] = dword;
+ ib->used += 4;
+}
+
+inline void efloat(drmBufPtr ib, float f)
+{
+ union {
+ float f;
+ uint32_t d;
+ } a;
+ a.f=f;
+ e32(ib, a.d);
+}
+
+inline void pack3(drmBufPtr ib, int cmd, unsigned num)
+{
+ e32 (ib, RADEON_CP_PACKET3 | (cmd << 8) | (((num-1) & 0x3fff) << 16));
+}
+
+/* write num registers, start at reg */
+/* If register falls in a special area, special commands are issued */
+inline void pack0 (drmBufPtr ib, uint32_t reg, int num)
+{
+ if (reg >= SET_CONFIG_REG_offset && reg < SET_CONFIG_REG_end) {
+ pack3 (ib, IT_SET_CONFIG_REG, num+1);
+ e32 (ib, (reg-SET_CONFIG_REG_offset) >> 2);
+ } else if (reg >= SET_CONTEXT_REG_offset && reg < SET_CONTEXT_REG_end) {
+ pack3 (ib, IT_SET_CONTEXT_REG, num+1);
+ e32 (ib, (reg-0x28000) >> 2);
+ } else if (reg >= SET_ALU_CONST_offset && reg < SET_ALU_CONST_end) {
+ pack3 (ib, IT_SET_ALU_CONST, num+1);
+ e32 (ib, (reg-SET_ALU_CONST_offset) >> 2);
+ } else if (reg >= SET_RESOURCE_offset && reg < SET_RESOURCE_end) {
+ pack3 (ib, IT_SET_RESOURCE, num+1);
+ e32 (ib, (reg-SET_RESOURCE_offset) >> 2);
+ } else if (reg >= SET_SAMPLER_offset && reg < SET_SAMPLER_end) {
+ pack3 (ib, IT_SET_SAMPLER, num+1);
+ e32 (ib, (reg-SET_SAMPLER_offset) >> 2);
+ } else if (reg >= SET_CTL_CONST_offset && reg < SET_CTL_CONST_end) {
+ pack3 (ib, IT_SET_CTL_CONST, num+1);
+ e32 (ib, (reg-SET_CTL_CONST_offset) >> 2);
+ } else if (reg >= SET_LOOP_CONST_offset && reg < SET_LOOP_CONST_end) {
+ pack3 (ib, IT_SET_LOOP_CONST, num+1);
+ e32 (ib, (reg-SET_LOOP_CONST_offset) >> 2);
+ } else if (reg >= SET_BOOL_CONST_offset && reg < SET_BOOL_CONST_end) {
+ pack3 (ib, IT_SET_BOOL_CONST, num+1);
+ e32 (ib, (reg-SET_BOOL_CONST_offset) >> 2);
+ } else {
+ e32 (ib, CP_PACKET0 (reg, num-1));
+ }
+}
+
+/* write a single register */
+inline void ereg (drmBufPtr ib, uint32_t reg, uint32_t val)
+{
+ pack0 (ib, reg, 1);
+ e32 (ib, val);
+}
+
+/* Flush the indirect buffer to the kernel for submission to the card */
+void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ drmBufPtr buffer = ib;
+ int start = 0;
+ drm_radeon_indirect_t indirect;
+
+ if (!buffer) return;
+
+ //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
+ // buffer->idx);
+
+ while (buffer->used & 0x3c){
+ e32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */
+ }
+
+ //ErrorF("buffer bytes: %d\n", buffer->used);
+
+ indirect.idx = buffer->idx;
+ indirect.start = start;
+ indirect.end = buffer->used;
+ indirect.discard = 1;
+
+ drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
+ &indirect, sizeof(drm_radeon_indirect_t));
+
+}
+
+void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ if (!ib) return;
+
+ ib->used = 0;
+ R600CPFlushIndirect(pScrn, ib);
+}
+
+void
+wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+
+ //flush caches, don't generate timestamp
+ pack3 (ib, IT_EVENT_WRITE, 1);
+ e32 (ib, CACHE_FLUSH_AND_INV_EVENT);
+ // wait for 3D idle clean
+ ereg (ib, WAIT_UNTIL, (WAIT_3D_IDLE_bit |
+ WAIT_3D_IDLECLEAN_bit));
+}
+
+void
+wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+
+ ereg (ib, WAIT_UNTIL, WAIT_3D_IDLE_bit);
+
+}
+
+static void
+reset_cb(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ int i;
+
+ pack0 (ib, CB_COLOR0_INFO, 8);
+ for (i = 0; i < 8; i++)
+ e32 (ib, 0);
+}
+
+static void
+reset_td_samplers(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ int i;
+
+ wait_3d_idle(pScrn, ib);
+
+ pack0 (ib, TD_PS_SAMPLER0_BORDER_RED, 4*TD_PS_SAMPLER0_BORDER_RED_num);
+ for (i = 0; i < 4*TD_PS_SAMPLER0_BORDER_RED_num; i++)
+ e32 (ib, 0);
+ pack0 (ib, TD_VS_SAMPLER0_BORDER_RED, 4*TD_VS_SAMPLER0_BORDER_RED_num);
+ for (i = 0; i < 4*TD_VS_SAMPLER0_BORDER_RED_num; i++)
+ e32 (ib, 0);
+
+ wait_3d_idle(pScrn, ib);
+}
+
+static void
+reset_sampler_const (ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ int i;
+
+ for (i = 0; i < SQ_TEX_SAMPLER_WORD_all_num; i++) {
+ pack0 (ib, SQ_TEX_SAMPLER_WORD + i * SQ_TEX_SAMPLER_WORD_offset, 3);
+ e32 (ib, SQ_TEX_DEPTH_COMPARE_LESSEQUAL << DEPTH_COMPARE_FUNCTION_shift);
+ e32 (ib, MAX_LOD_mask);
+ e32 (ib, 0);
+ }
+}
+
+static void
+reset_dx9_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ int i;
+
+ const int count = SQ_ALU_CONSTANT_all_num * (SQ_ALU_CONSTANT_offset >> 2);
+
+ pack0 (ib, SQ_ALU_CONSTANT, count);
+ for (i = 0; i < count; i++)
+ efloat (ib, 0.0);
+}
+
+static void
+reset_bool_loop_const(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ int i;
+
+ for (i = 0; i < SQ_BOOL_CONST_0_num; i++)
+ ereg (ib, SQ_BOOL_CONST_0 + (i << 2), 0);
+
+ pack0 (ib, SQ_LOOP_CONST, SQ_LOOP_CONST_all_num);
+
+ for (i = 0; i < SQ_LOOP_CONST_all_num; i++)
+ e32 (ib, 0);
+
+}
+
+void
+start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ if (info->ChipFamily < CHIP_FAMILY_RV770) {
+ pack3 (ib, IT_START_3D_CMDBUF, 1);
+ e32 (ib, 0);
+ }
+
+ pack3 (ib, IT_CONTEXT_CONTROL, 2);
+ e32 (ib, 0x80000000);
+ e32 (ib, 0x80000000);
+
+ wait_3d_idle_clean (pScrn, ib);
+}
+
+/*
+ * Setup of functional groups
+ */
+
+// asic stack/thread/gpr limits - need to query the drm
+static void
+sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
+{
+ uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+ uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
+ (info->ChipFamily == CHIP_FAMILY_RV620) ||
+ (info->ChipFamily == CHIP_FAMILY_RS780) ||
+ (info->ChipFamily == CHIP_FAMILY_RV710))
+ sq_config = 0; // no VC
+ else
+ sq_config = VC_ENABLE_bit;
+
+ sq_config |= (DX9_CONSTS_bit |
+ ALU_INST_PREFER_VECTOR_bit |
+ (sq_conf->ps_prio << PS_PRIO_shift) |
+ (sq_conf->vs_prio << VS_PRIO_shift) |
+ (sq_conf->gs_prio << GS_PRIO_shift) |
+ (sq_conf->es_prio << ES_PRIO_shift));
+
+ sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
+ (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
+ (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
+ sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
+ (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
+
+ sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
+ (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
+ (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
+ (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
+
+ sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
+ (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
+
+ sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
+ (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
+
+ pack0 (ib, SQ_CONFIG, 6);
+ e32 (ib, sq_config);
+ e32 (ib, sq_gpr_resource_mgmt_1);
+ e32 (ib, sq_gpr_resource_mgmt_2);
+ e32 (ib, sq_thread_resource_mgmt);
+ e32 (ib, sq_stack_resource_mgmt_1);
+ e32 (ib, sq_stack_resource_mgmt_2);
+
+}
+
+void
+set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf)
+{
+ uint32_t cb_color_info;
+ int pitch, slice, h;
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ cb_color_info = ((cb_conf->endian << ENDIAN_shift) |
+ (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) |
+ (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) |
+ (cb_conf->number_type << NUMBER_TYPE_shift) |
+ (cb_conf->comp_swap << COMP_SWAP_shift) |
+ (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift));
+ if (cb_conf->read_size)
+ cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit;
+ if (cb_conf->blend_clamp)
+ cb_color_info |= BLEND_CLAMP_bit;
+ if (cb_conf->clear_color)
+ cb_color_info |= CLEAR_COLOR_bit;
+ if (cb_conf->blend_bypass)
+ cb_color_info |= BLEND_BYPASS_bit;
+ if (cb_conf->blend_float32)
+ cb_color_info |= BLEND_FLOAT32_bit;
+ if (cb_conf->simple_float)
+ cb_color_info |= SIMPLE_FLOAT_bit;
+ if (cb_conf->round_mode)
+ cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
+ if (cb_conf->tile_compact)
+ cb_color_info |= TILE_COMPACT_bit;
+ if (cb_conf->source_format)
+ cb_color_info |= SOURCE_FORMAT_bit;
+
+ pitch = (cb_conf->w / 8) - 1;
+ h = (cb_conf->h + 7) & ~7;
+ slice = ((cb_conf->w * h) / 64) - 1;
+
+ ereg (ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8));
+
+ // rv6xx workaround
+ if ((info->ChipFamily > CHIP_FAMILY_R600) &&
+ (info->ChipFamily < CHIP_FAMILY_RV770)) {
+ pack3 (ib, IT_SURFACE_BASE_UPDATE, 1);
+ e32 (ib, (2 << cb_conf->id));
+ }
+
+ // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib
+ ereg (ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) |
+ (slice << SLICE_TILE_MAX_shift)));
+ ereg (ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) |
+ (0 << SLICE_MAX_shift)));
+ ereg (ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info);
+ ereg (ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256
+ ereg (ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256
+ ereg (ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) |
+ (0 << FMASK_TILE_MAX_shift)));
+}
+
+void
+cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr)
+{
+ uint32_t cp_coher_size;
+ if (size == 0xffffffff)
+ cp_coher_size = 0xffffffff;
+ else
+ cp_coher_size = ((size + 255) >> 8);
+
+ pack3 (ib, IT_SURFACE_SYNC, 4);
+ e32 (ib, sync_type);
+ e32 (ib, cp_coher_size);
+ e32 (ib, (mc_addr >> 8));
+ e32 (ib, 10); /* poll interval */
+}
+
+/* inserts a wait for vline in the command stream */
+void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix,
+ int crtc, int start, int stop, Bool enable)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(pScrn);
+ uint32_t offset;
+ RADEONCrtcPrivatePtr radeon_crtc;
+
+ if (!enable)
+ return;
+
+ if ((crtc < 0) || (crtc > 1))
+ return;
+
+ if (stop < start)
+ return;
+
+ if (!xf86_config->crtc[crtc]->enabled)
+ return;
+
+#ifdef USE_EXA
+ if (info->useEXA)
+ offset = exaGetPixmapOffset(pPix);
+ else
+#endif
+ offset = pPix->devPrivate.ptr - info->FB;
+
+ /* if drawing to front buffer */
+ if (offset != 0)
+ return;
+
+ start = max(start, 0);
+ stop = min(stop, xf86_config->crtc[crtc]->mode.VDisplay);
+
+ if (start > xf86_config->crtc[crtc]->mode.VDisplay)
+ return;
+
+ radeon_crtc = xf86_config->crtc[crtc]->driver_private;
+
+ /* set the VLINE range */
+ ereg(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset,
+ (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
+ (stop << AVIVO_D1MODE_VLINE_END_SHIFT));
+
+ /* tell the CP to poll the VLINE state register */
+ pack3 (ib, IT_WAIT_REG_MEM, 6);
+ e32 (ib, IT_WAIT_REG | IT_WAIT_EQ);
+ e32 (ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS + radeon_crtc->crtc_offset));
+ e32 (ib, 0);
+ e32 (ib, 0); // Ref value
+ e32 (ib, AVIVO_D1MODE_VLINE_STAT); // Mask
+ e32 (ib, 10); // Wait interval
+}
+
+void
+fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf)
+{
+ uint32_t sq_pgm_resources;
+
+ sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
+ (fs_conf->stack_size << STACK_SIZE_shift));
+
+ if (fs_conf->dx10_clamp)
+ sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit;
+
+ ereg (ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
+ ereg (ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources);
+ ereg (ib, SQ_PGM_CF_OFFSET_FS, 0);
+}
+
+void
+vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf)
+{
+ uint32_t sq_pgm_resources;
+
+ sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
+ (vs_conf->stack_size << STACK_SIZE_shift));
+
+ if (vs_conf->dx10_clamp)
+ sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit;
+ if (vs_conf->fetch_cache_lines)
+ sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
+ if (vs_conf->uncached_first_inst)
+ sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
+
+ ereg (ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
+ ereg (ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources);
+ ereg (ib, SQ_PGM_CF_OFFSET_VS, 0);
+}
+
+void
+ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf)
+{
+ uint32_t sq_pgm_resources;
+
+ sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
+ (ps_conf->stack_size << STACK_SIZE_shift));
+
+ if (ps_conf->dx10_clamp)
+ sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit;
+ if (ps_conf->fetch_cache_lines)
+ sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
+ if (ps_conf->uncached_first_inst)
+ sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
+ if (ps_conf->clamp_consts)
+ sq_pgm_resources |= CLAMP_CONSTS_bit;
+
+ ereg (ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
+ ereg (ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources);
+ ereg (ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode);
+ ereg (ib, SQ_PGM_CF_OFFSET_PS, 0);
+}
+
+void
+set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf)
+{
+ int i;
+ const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2);
+
+ pack0 (ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg);
+ for (i = 0; i < countreg; i++)
+ efloat (ib, const_buf[i]);
+}
+
+void
+set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res)
+{
+ uint32_t sq_vtx_constant_word2;
+
+ sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
+ ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
+ (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
+ (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
+ (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
+ if (res->clamp_x)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
+
+ if (res->format_comp_all)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
+
+ if (res->srf_mode_all)
+ sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
+
+ pack0 (ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
+ e32 (ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS
+ e32 (ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE
+ e32 (ib, sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
+ e32 (ib, res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!?
+ e32 (ib, 0); // 4: n/a
+ e32 (ib, 0); // 5: n/a
+ e32 (ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE
+}
+
+void
+set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res)
+{
+ uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+ uint32_t sq_tex_resource_word5, sq_tex_resource_word6;
+
+ sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) |
+ (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift));
+
+ if (tex_res->w)
+ sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
+ ((tex_res->w - 1) << TEX_WIDTH_shift));
+
+ if (tex_res->tile_type)
+ sq_tex_resource_word0 |= TILE_TYPE_bit;
+
+ sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift);
+
+ if (tex_res->h)
+ sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
+ if (tex_res->depth)
+ sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
+
+ sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
+ (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
+ (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
+ (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
+ (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
+ (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
+ (tex_res->request_size << REQUEST_SIZE_shift) |
+ (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
+ (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
+ (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
+ (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
+ (tex_res->base_level << BASE_LEVEL_shift));
+
+ if (tex_res->srf_mode_all)
+ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
+ if (tex_res->force_degamma)
+ sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
+
+ sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
+ (tex_res->base_array << BASE_ARRAY_shift) |
+ (tex_res->last_array << LAST_ARRAY_shift));
+
+ sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) |
+ (tex_res->perf_modulation << PERF_MODULATION_shift) |
+ (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift));
+
+ if (tex_res->interlaced)
+ sq_tex_resource_word6 |= INTERLACED_bit;
+
+ pack0 (ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
+ e32 (ib, sq_tex_resource_word0);
+ e32 (ib, sq_tex_resource_word1);
+ e32 (ib, ((tex_res->base) >> 8));
+ e32 (ib, ((tex_res->mip_base) >> 8));
+ e32 (ib, sq_tex_resource_word4);
+ e32 (ib, sq_tex_resource_word5);
+ e32 (ib, sq_tex_resource_word6);
+}
+
+void
+set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
+{
+ uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
+
+ sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) |
+ (s->clamp_y << CLAMP_Y_shift) |
+ (s->clamp_z << CLAMP_Z_shift) |
+ (s->xy_mag_filter << XY_MAG_FILTER_shift) |
+ (s->xy_min_filter << XY_MIN_FILTER_shift) |
+ (s->z_filter << Z_FILTER_shift) |
+ (s->mip_filter << MIP_FILTER_shift) |
+ (s->border_color << BORDER_COLOR_TYPE_shift) |
+ (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) |
+ (s->chroma_key << CHROMA_KEY_shift));
+ if (s->point_sampling_clamp)
+ sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit;
+ if (s->tex_array_override)
+ sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit;
+ if (s->lod_uses_minor_axis)
+ sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit;
+
+ sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) |
+ (s->max_lod << MAX_LOD_shift) |
+ (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift));
+
+ sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) |
+ (s->perf_mip << PERF_MIP_shift) |
+ (s->perf_z << PERF_Z_shift));
+ if (s->mc_coord_truncate)
+ sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
+ if (s->force_degamma)
+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
+ if (s->high_precision_filter)
+ sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit;
+ if (s->fetch_4)
+ sq_tex_sampler_word2 |= FETCH_4_bit;
+ if (s->sample_is_pcf)
+ sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit;
+ if (s->type)
+ sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
+
+ pack0 (ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
+ e32 (ib, sq_tex_sampler_word0);
+ e32 (ib, sq_tex_sampler_word1);
+ e32 (ib, sq_tex_sampler_word2);
+}
+
+//XXX deal with clip offsets in clip setup
+void
+set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
+{
+
+ ereg (ib, PA_SC_SCREEN_SCISSOR_TL, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
+ ereg (ib, PA_SC_SCREEN_SCISSOR_BR, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
+}
+
+void
+set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
+{
+
+ ereg (ib, PA_SC_VPORT_SCISSOR_0_TL +
+ id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
+ (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ ereg (ib, PA_SC_VPORT_SCISSOR_0_BR +
+ id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
+ (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
+}
+
+void
+set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
+{
+
+ ereg (ib, PA_SC_GENERIC_SCISSOR_TL, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ ereg (ib, PA_SC_GENERIC_SCISSOR_BR, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
+}
+
+void
+set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
+{
+
+ ereg (ib, PA_SC_WINDOW_SCISSOR_TL, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
+ (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
+ WINDOW_OFFSET_DISABLE_bit));
+ ereg (ib, PA_SC_WINDOW_SCISSOR_BR, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
+ (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
+}
+
+void
+set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
+{
+
+ ereg (ib, PA_SC_CLIPRECT_0_TL +
+ id * PA_SC_CLIPRECT_0_TL_offset, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
+ (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
+ ereg (ib, PA_SC_CLIPRECT_0_BR +
+ id * PA_SC_CLIPRECT_0_BR_offset, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
+ (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
+}
+
+/*
+ * Setup of default state
+ */
+
+void
+set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
+{
+ tex_resource_t tex_res;
+ shader_config_t fs_conf;
+ sq_config_t sq_conf;
+ int i;
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+
+ memset(&tex_res, 0, sizeof(tex_resource_t));
+ memset(&fs_conf, 0, sizeof(shader_config_t));
+
+#if 1
+ if (accel_state->XInited3D)
+ return;
+#endif
+
+ accel_state->XInited3D = TRUE;
+
+ wait_3d_idle(pScrn, ib);
+
+ // ASIC specific setup, see drm
+ if (info->ChipFamily < CHIP_FAMILY_RV770) {
+ ereg (ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) |
+ (28 << TD_FIFO_CREDIT_shift)));
+ ereg (ib, VC_ENHANCE, 0);
+ ereg (ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
+ ereg (ib, DB_DEBUG, 0x82000000); /* ? */
+ ereg (ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
+ (16 << DEPTH_FLUSH_shift) |
+ (0 << FORCE_SUMMARIZE_shift) |
+ (4 << DEPTH_PENDING_FREE_shift) |
+ (16 << DEPTH_CACHELINE_FREE_shift) |
+ 0));
+ } else {
+ ereg (ib, TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) |
+ (28 << TD_FIFO_CREDIT_shift)));
+ ereg (ib, VC_ENHANCE, 0);
+ ereg (ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
+ ereg (ib, DB_DEBUG, 0);
+ ereg (ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) |
+ (16 << DEPTH_FLUSH_shift) |
+ (0 << FORCE_SUMMARIZE_shift) |
+ (4 << DEPTH_PENDING_FREE_shift) |
+ (4 << DEPTH_CACHELINE_FREE_shift) |
+ 0));
+ }
+
+ reset_td_samplers(pScrn, ib);
+ reset_dx9_alu_consts(pScrn, ib);
+ reset_bool_loop_const (pScrn, ib);
+ reset_sampler_const (pScrn, ib);
+
+ // SQ
+ sq_conf.ps_prio = 0;
+ sq_conf.vs_prio = 1;
+ sq_conf.gs_prio = 2;
+ sq_conf.es_prio = 3;
+ // need to set stack/thread/gpr limits based on the asic
+ // for now just set them low enough so any card will work
+ // see r600_cp.c in the drm
+ switch (info->ChipFamily) {
+ case CHIP_FAMILY_R600:
+ sq_conf.num_ps_gprs = 192;
+ sq_conf.num_vs_gprs = 56;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 136;
+ sq_conf.num_vs_threads = 48;
+ sq_conf.num_gs_threads = 4;
+ sq_conf.num_es_threads = 4;
+ sq_conf.num_ps_stack_entries = 128;
+ sq_conf.num_vs_stack_entries = 128;
+ sq_conf.num_gs_stack_entries = 0;
+ sq_conf.num_es_stack_entries = 0;
+ break;
+ case CHIP_FAMILY_RV630:
+ case CHIP_FAMILY_RV635:
+ sq_conf.num_ps_gprs = 84;
+ sq_conf.num_vs_gprs = 36;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 144;
+ sq_conf.num_vs_threads = 40;
+ sq_conf.num_gs_threads = 4;
+ sq_conf.num_es_threads = 4;
+ sq_conf.num_ps_stack_entries = 40;
+ sq_conf.num_vs_stack_entries = 40;
+ sq_conf.num_gs_stack_entries = 32;
+ sq_conf.num_es_stack_entries = 16;
+ break;
+ case CHIP_FAMILY_RV610:
+ case CHIP_FAMILY_RV620:
+ case CHIP_FAMILY_RS780:
+ default:
+ sq_conf.num_ps_gprs = 84;
+ sq_conf.num_vs_gprs = 36;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 136;
+ sq_conf.num_vs_threads = 48;
+ sq_conf.num_gs_threads = 4;
+ sq_conf.num_es_threads = 4;
+ sq_conf.num_ps_stack_entries = 40;
+ sq_conf.num_vs_stack_entries = 40;
+ sq_conf.num_gs_stack_entries = 32;
+ sq_conf.num_es_stack_entries = 16;
+ break;
+ case CHIP_FAMILY_RV670:
+ sq_conf.num_ps_gprs = 144;
+ sq_conf.num_vs_gprs = 40;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 136;
+ sq_conf.num_vs_threads = 48;
+ sq_conf.num_gs_threads = 4;
+ sq_conf.num_es_threads = 4;
+ sq_conf.num_ps_stack_entries = 40;
+ sq_conf.num_vs_stack_entries = 40;
+ sq_conf.num_gs_stack_entries = 32;
+ sq_conf.num_es_stack_entries = 16;
+ break;
+ case CHIP_FAMILY_RV770:
+ sq_conf.num_ps_gprs = 192;
+ sq_conf.num_vs_gprs = 56;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 188;
+ sq_conf.num_vs_threads = 60;
+ sq_conf.num_gs_threads = 0;
+ sq_conf.num_es_threads = 0;
+ sq_conf.num_ps_stack_entries = 256;
+ sq_conf.num_vs_stack_entries = 256;
+ sq_conf.num_gs_stack_entries = 0;
+ sq_conf.num_es_stack_entries = 0;
+ break;
+ case CHIP_FAMILY_RV730:
+ sq_conf.num_ps_gprs = 84;
+ sq_conf.num_vs_gprs = 36;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 188;
+ sq_conf.num_vs_threads = 60;
+ sq_conf.num_gs_threads = 0;
+ sq_conf.num_es_threads = 0;
+ sq_conf.num_ps_stack_entries = 128;
+ sq_conf.num_vs_stack_entries = 128;
+ sq_conf.num_gs_stack_entries = 0;
+ sq_conf.num_es_stack_entries = 0;
+ break;
+ case CHIP_FAMILY_RV710:
+ sq_conf.num_ps_gprs = 192;
+ sq_conf.num_vs_gprs = 56;
+ sq_conf.num_temp_gprs = 4;
+ sq_conf.num_gs_gprs = 0;
+ sq_conf.num_es_gprs = 0;
+ sq_conf.num_ps_threads = 144;
+ sq_conf.num_vs_threads = 48;
+ sq_conf.num_gs_threads = 0;
+ sq_conf.num_es_threads = 0;
+ sq_conf.num_ps_stack_entries = 128;
+ sq_conf.num_vs_stack_entries = 128;
+ sq_conf.num_gs_stack_entries = 0;
+ sq_conf.num_es_stack_entries = 0;
+ break;
+ }
+
+ sq_setup(pScrn, ib, &sq_conf);
+
+ ereg (ib, SQ_VTX_BASE_VTX_LOC, 0);
+ ereg (ib, SQ_VTX_START_INST_LOC, 0);
+
+ pack0 (ib, SQ_ESGS_RING_ITEMSIZE, 9);
+ e32 (ib, 0); // SQ_ESGS_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_GSVS_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_ESTMP_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_GSTMP_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_VSTMP_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_PSTMP_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_FBUF_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_REDUC_RING_ITEMSIZE
+ e32 (ib, 0); // SQ_GS_VERT_ITEMSIZE
+
+ // DB
+ ereg (ib, DB_DEPTH_INFO, 0);
+ ereg (ib, DB_STENCIL_CLEAR, 0);
+ ereg (ib, DB_DEPTH_CLEAR, 0);
+ ereg (ib, DB_STENCILREFMASK, 0);
+ ereg (ib, DB_STENCILREFMASK_BF, 0);
+ ereg (ib, DB_DEPTH_CONTROL, 0);
+ ereg (ib, DB_RENDER_CONTROL, STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit);
+ if (info->ChipFamily < CHIP_FAMILY_RV770)
+ ereg (ib, DB_RENDER_OVERRIDE, FORCE_SHADER_Z_ORDER_bit);
+ else
+ ereg (ib, DB_RENDER_OVERRIDE, 0);
+ ereg (ib, DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET1_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET2_shift) |
+ (2 << ALPHA_TO_MASK_OFFSET3_shift)));
+
+ // SX
+ ereg (ib, SX_ALPHA_TEST_CONTROL, 0);
+ ereg (ib, SX_ALPHA_REF, 0);
+
+ // CB
+ reset_cb(pScrn, ib);
+
+ pack0 (ib, CB_BLEND_RED, 4);
+ e32 (ib, 0x00000000);
+ e32 (ib, 0x00000000);
+ e32 (ib, 0x00000000);
+ e32 (ib, 0x00000000);
+
+ /* CB_COLOR_CONTROL.PER_MRT_BLEND is off */
+ // RV6xx+ have per-MRT blend
+ if (info->ChipFamily > CHIP_FAMILY_R600) {
+ pack0 (ib, CB_BLEND0_CONTROL, CB_BLEND0_CONTROL_num);
+ for (i = 0; i < CB_BLEND0_CONTROL_num; i++)
+ e32 (ib, 0);
+ }
+
+ ereg (ib, CB_BLEND_CONTROL, 0);
+
+ if (info->ChipFamily < CHIP_FAMILY_RV770) {
+ pack0 (ib, CB_FOG_RED, 3);
+ e32 (ib, 0x00000000);
+ e32 (ib, 0x00000000);
+ e32 (ib, 0x00000000);
+ }
+
+ ereg (ib, CB_COLOR_CONTROL, 0);
+ pack0 (ib, CB_CLRCMP_CONTROL, 4);
+ e32 (ib, 1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC
+ e32 (ib, 0); // CB_CLRCMP_SRC
+ e32 (ib, 0); // CB_CLRCMP_DST
+ e32 (ib, 0); // CB_CLRCMP_MSK
+
+
+ if (info->ChipFamily < CHIP_FAMILY_RV770) {
+ pack0 (ib, CB_CLEAR_RED, 4);
+ efloat(ib, 1.0); /* WTF? */
+ efloat(ib, 0.0);
+ efloat(ib, 1.0);
+ efloat(ib, 1.0);
+ }
+ ereg (ib, CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift));
+
+ // SC
+ set_generic_scissor(pScrn, ib, 0, 0, 8192, 8192);
+ set_screen_scissor (pScrn, ib, 0, 0, 8192, 8192);
+ ereg (ib, PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) |
+ (0 << WINDOW_Y_OFFSET_shift)));
+ set_window_scissor (pScrn, ib, 0, 0, 8192, 8192);
+
+ ereg (ib, PA_SC_CLIPRECT_RULE, CLIP_RULE_mask);
+
+ /* clip boolean is set to always visible -> doesn't matter */
+ for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
+ set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192);
+
+ if (info->ChipFamily < CHIP_FAMILY_RV770)
+ ereg (ib, R7xx_PA_SC_EDGERULE, 0x00000000);
+ else
+ ereg (ib, R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); /* ? */
+
+ for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) {
+ set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192);
+ pack0 (ib, PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2);
+ efloat(ib, 0.0);
+ efloat(ib, 1.0);
+ }
+
+ if (info->ChipFamily < CHIP_FAMILY_RV770)
+ ereg (ib, PA_SC_MODE_CNTL, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit));
+ else
+ ereg (ib, PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit |
+ 0x00500000)); /* ? */
+
+ ereg (ib, PA_SC_LINE_CNTL, 0);
+ ereg (ib, PA_SC_AA_CONFIG, 0);
+ ereg (ib, PA_SC_AA_MASK, 0xFFFFFFFF);
+
+ //XXX: double check this
+ if (info->ChipFamily > CHIP_FAMILY_R600) {
+ ereg (ib, PA_SC_AA_SAMPLE_LOCS_MCTX, 0);
+ ereg (ib, PA_SC_AA_SAMPLE_LOCS_8S_WD1_M, 0);
+ }
+
+ ereg (ib, PA_SC_LINE_STIPPLE, 0);
+ ereg (ib, PA_SC_MPASS_PS_CNTL, 0);
+
+ // CL
+ pack0 (ib, PA_CL_VPORT_XSCALE_0, 6);
+ efloat (ib, 0.0f); // PA_CL_VPORT_XSCALE
+ efloat (ib, 0.0f); // PA_CL_VPORT_XOFFSET
+ efloat (ib, 0.0f); // PA_CL_VPORT_YSCALE
+ efloat (ib, 0.0f); // PA_CL_VPORT_YOFFSET
+ efloat (ib, 0.0f); // PA_CL_VPORT_ZSCALE
+ efloat (ib, 0.0f); // PA_CL_VPORT_ZOFFSET
+ ereg (ib, PA_CL_CLIP_CNTL, (CLIP_DISABLE_bit | DX_CLIP_SPACE_DEF_bit));
+ ereg (ib, PA_CL_VTE_CNTL, 0);
+ ereg (ib, PA_CL_VS_OUT_CNTL, 0);
+ ereg (ib, PA_CL_NANINF_CNTL, 0);
+ pack0 (ib, PA_CL_GB_VERT_CLIP_ADJ, 4);
+ efloat (ib, 1.0); // PA_CL_GB_VERT_CLIP_ADJ
+ efloat (ib, 1.0); // PA_CL_GB_VERT_DISC_ADJ
+ efloat (ib, 1.0); // PA_CL_GB_HORZ_CLIP_ADJ
+ efloat (ib, 1.0); // PA_CL_GB_HORZ_DISC_ADJ
+
+ /* user clipping planes are disabled by default */
+ pack0 (ib, PA_CL_UCP_0_X, 24);
+ for (i = 0; i < 24; i++)
+ efloat (ib, 0.0);
+
+ // SU
+ ereg (ib, PA_SU_SC_MODE_CNTL, FACE_bit);
+ ereg (ib, PA_SU_POINT_SIZE, 0);
+ ereg (ib, PA_SU_POINT_MINMAX, 0);
+ ereg (ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0);
+ ereg (ib, PA_SU_POLY_OFFSET_BACK_SCALE, 0);
+ ereg (ib, PA_SU_POLY_OFFSET_FRONT_SCALE, 0);
+ ereg (ib, PA_SU_POLY_OFFSET_BACK_OFFSET, 0);
+ ereg (ib, PA_SU_POLY_OFFSET_FRONT_OFFSET, 0);
+
+ ereg (ib, PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */
+ ereg (ib, PA_SU_VTX_CNTL, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
+ (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */
+ ereg (ib, PA_SU_POLY_OFFSET_CLAMP, 0);
+
+ // SPI
+ if (info->ChipFamily < CHIP_FAMILY_RV770)
+ ereg (ib, R7xx_SPI_THREAD_GROUPING, 0);
+ else
+ ereg (ib, R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift));
+
+ ereg (ib, SPI_INTERP_CONTROL_0, ((2 << PNT_SPRITE_OVRD_X_shift) |
+ (3 << PNT_SPRITE_OVRD_Y_shift) |
+ (0 << PNT_SPRITE_OVRD_Z_shift) |
+ (1 << PNT_SPRITE_OVRD_W_shift))); /* s,t,0,1 */
+ ereg (ib, SPI_INPUT_Z, 0);
+ ereg (ib, SPI_FOG_CNTL, 0);
+ ereg (ib, SPI_FOG_FUNC_SCALE, 0);
+ ereg (ib, SPI_FOG_FUNC_BIAS, 0);
+
+ pack0 (ib, SPI_VS_OUT_ID_0, SPI_VS_OUT_ID_0_num);
+ for (i = 0; i < SPI_VS_OUT_ID_0_num; i++) /* identity mapping */
+ e32 (ib, 0x03020100 + i*0x04040404);
+ ereg (ib, SPI_VS_OUT_CONFIG, 0);
+
+ // clear FS
+ fs_setup(pScrn, ib, &fs_conf);
+
+ // VGT
+ ereg (ib, VGT_MAX_VTX_INDX, 2048); /* XXX set to a reasonably large number of indices */
+ ereg (ib, VGT_MIN_VTX_INDX, 0);
+ ereg (ib, VGT_INDX_OFFSET, 0);
+ ereg (ib, VGT_INSTANCE_STEP_RATE_0, 0);
+ ereg (ib, VGT_INSTANCE_STEP_RATE_1, 0);
+
+ ereg (ib, VGT_MULTI_PRIM_IB_RESET_INDX, 0);
+ ereg (ib, VGT_OUTPUT_PATH_CNTL, 0);
+ ereg (ib, VGT_GS_MODE, 0);
+ ereg (ib, VGT_HOS_CNTL, 0);
+ ereg (ib, VGT_HOS_MAX_TESS_LEVEL, 0);
+ ereg (ib, VGT_HOS_MIN_TESS_LEVEL, 0);
+ ereg (ib, VGT_HOS_REUSE_DEPTH, 0);
+ ereg (ib, VGT_GROUP_PRIM_TYPE, 0);
+ ereg (ib, VGT_GROUP_FIRST_DECR, 0);
+ ereg (ib, VGT_GROUP_DECR, 0);
+ ereg (ib, VGT_GROUP_VECT_0_CNTL, 0);
+ ereg (ib, VGT_GROUP_VECT_1_CNTL, 0);
+ ereg (ib, VGT_GROUP_VECT_0_FMT_CNTL, 0);
+ ereg (ib, VGT_GROUP_VECT_1_FMT_CNTL, 0);
+ ereg (ib, VGT_PRIMITIVEID_EN, 0);
+ ereg (ib, VGT_MULTI_PRIM_IB_RESET_EN, 0);
+ ereg (ib, VGT_STRMOUT_EN, 0);
+ ereg (ib, VGT_REUSE_OFF, 0);
+ ereg (ib, VGT_VTX_CNT_EN, 0);
+ ereg (ib, VGT_STRMOUT_BUFFER_EN, 0);
+
+ // clear tex resources - PS
+ for (i = 0; i < 16; i++) {
+ tex_res.id = i;
+ set_tex_resource(pScrn, ib, &tex_res);
+ }
+
+ // clear tex resources - VS
+ for (i = 160; i < 164; i++) {
+ tex_res.id = i;
+ set_tex_resource(pScrn, ib, &tex_res);
+ }
+
+ // clear tex resources - FS
+ for (i = 320; i < 335; i++) {
+ tex_res.id = i;
+ set_tex_resource(pScrn, ib, &tex_res);
+ }
+
+}
+
+
+/*
+ * Commands
+ */
+
+void
+draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices)
+{
+ uint32_t i, count;
+
+ ereg (ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
+ pack3 (ib, IT_INDEX_TYPE, 1);
+ e32 (ib, draw_conf->index_type);
+ pack3 (ib, IT_NUM_INSTANCES, 1);
+ e32 (ib, draw_conf->num_instances);
+
+ // calculate num of packets
+ count = 2;
+ if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT)
+ count += (draw_conf->num_indices + 1) / 2;
+ else
+ count += draw_conf->num_indices;
+
+ pack3 (ib, IT_DRAW_INDEX_IMMD, count);
+ e32 (ib, draw_conf->num_indices);
+ e32 (ib, draw_conf->vgt_draw_initiator);
+
+ if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) {
+ for (i = 0; i < draw_conf->num_indices; i += 2) {
+ if ((i + 1) == draw_conf->num_indices)
+ e32 (ib, indices[i]);
+ else
+ e32 (ib, (indices[i] | (indices[i + 1] << 16)));
+ }
+ } else {
+ for (i = 0; i < draw_conf->num_indices; i++)
+ e32 (ib, indices[i]);
+ }
+}
+
+void
+draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf)
+{
+
+ ereg (ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
+ pack3 (ib, IT_INDEX_TYPE, 1);
+ e32 (ib, draw_conf->index_type);
+ pack3 (ib, IT_NUM_INSTANCES, 1);
+ e32 (ib, draw_conf->num_instances);
+ pack3 (ib, IT_DRAW_INDEX_AUTO, 2);
+ e32 (ib, draw_conf->num_indices);
+ e32 (ib, draw_conf->vgt_draw_initiator);
+}
diff --git a/src/radeon.h b/src/radeon.h
index af9c7c2e..a7ed95e4 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -354,6 +354,8 @@ typedef enum {
#define IS_DCE32_VARIANT ((info->ChipFamily >= CHIP_FAMILY_RV730))
+#define IS_R600_3D (info->ChipFamily >= CHIP_FAMILY_R600)
+
#define IS_R500_3D ((info->ChipFamily == CHIP_FAMILY_RV515) || \
(info->ChipFamily == CHIP_FAMILY_R520) || \
(info->ChipFamily == CHIP_FAMILY_RV530) || \
@@ -569,6 +571,38 @@ struct radeon_dri {
};
#endif
+#ifdef XF86DRI
+#ifdef USE_EXA
+struct r6xx_solid_vertex {
+ float x;
+ float y;
+};
+
+struct r6xx_copy_vertex {
+ float x;
+ float y;
+ float s;
+ float t;
+};
+
+struct r6xx_comp_vertex {
+ float x;
+ float y;
+ float src_s;
+ float src_t;
+};
+
+struct r6xx_comp_mask_vertex {
+ float x;
+ float y;
+ float src_s;
+ float src_t;
+ float mask_s;
+ float mask_t;
+};
+#endif
+#endif
+
struct radeon_accel_state {
/* common accel data */
int fifo_slots; /* Free slots in the FIFO (64 max) */
@@ -609,6 +643,51 @@ struct radeon_accel_state {
Bool src_tile_height;
Bool vsync;
+
+ drmBufPtr ib;
+ int vb_index;
+
+ // shader storage
+ ExaOffscreenArea *shaders;
+ uint32_t solid_vs_offset;
+ uint32_t solid_ps_offset;
+ uint32_t copy_vs_offset;
+ uint32_t copy_ps_offset;
+ uint32_t comp_vs_offset;
+ uint32_t comp_ps_offset;
+ uint32_t comp_mask_vs_offset;
+ uint32_t comp_mask_ps_offset;
+ uint32_t xv_vs_offset;
+ uint32_t xv_ps_offset_packed;
+ uint32_t xv_ps_offset_planar;
+
+ //size/addr stuff
+ uint32_t src_size[2];
+ uint64_t src_mc_addr[2];
+ uint32_t src_pitch[2];
+ uint32_t src_width[2];
+ uint32_t src_height[2];
+ uint32_t src_bpp[2];
+ uint32_t dst_size;
+ uint64_t dst_mc_addr;
+ uint32_t dst_pitch;
+ uint32_t dst_height;
+ uint32_t dst_bpp;
+ uint32_t vs_size;
+ uint64_t vs_mc_addr;
+ uint32_t ps_size;
+ uint64_t ps_mc_addr;
+ uint32_t vb_size;
+ uint64_t vb_mc_addr;
+
+ // UTS/DFS
+ drmBufPtr scratch;
+
+ // copy
+ ExaOffscreenArea *copy_area;
+ Bool same_surface;
+ int rop;
+ uint32_t planemask;
#endif
#ifdef USE_XAA
@@ -1035,6 +1114,7 @@ extern void RADEONDoPrepareCopyMMIO(ScrnInfoPtr pScrn,
uint32_t dst_pitch_offset,
uint32_t datatype, int rop,
Pixel planemask);
+extern Bool R600DrawInit(ScreenPtr pScreen);
#endif
#if defined(XF86DRI) && defined(USE_EXA)
@@ -1119,15 +1199,16 @@ do { \
#define RADEONCP_STOP(pScrn, info) \
do { \
int _ret; \
- if (info->cp->CPStarted) { \
+ if (info->cp->CPStarted) { \
_ret = RADEONCPStop(pScrn, info); \
if (_ret) { \
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, \
"%s: CP stop %d\n", __FUNCTION__, _ret); \
} \
info->cp->CPStarted = FALSE; \
- } \
- RADEONEngineRestore(pScrn); \
+ } \
+ if (info->ChipFamily < CHIP_FAMILY_R600) \
+ RADEONEngineRestore(pScrn); \
info->cp->CPRuns = FALSE; \
} while (0)
@@ -1235,28 +1316,31 @@ do { \
if (RADEON_VERBOSE) \
xf86DrvMsg(pScrn->scrnIndex, X_INFO, \
"FLUSH_RING in %s\n", __FUNCTION__); \
- if (info->cp->indirectBuffer) { \
+ if (info->cp->indirectBuffer) \
RADEONCPFlushIndirect(pScrn, 0); \
- } \
} while (0)
#define RADEON_WAIT_UNTIL_2D_IDLE() \
do { \
- BEGIN_RING(2); \
- OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \
- OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \
- RADEON_WAIT_HOST_IDLECLEAN)); \
- ADVANCE_RING(); \
+ if (info->ChipFamily < CHIP_FAMILY_R600) { \
+ BEGIN_RING(2); \
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \
+ OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \
+ RADEON_WAIT_HOST_IDLECLEAN)); \
+ ADVANCE_RING(); \
+ } \
} while (0)
#define RADEON_WAIT_UNTIL_3D_IDLE() \
do { \
- BEGIN_RING(2); \
- OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \
- OUT_RING((RADEON_WAIT_3D_IDLECLEAN | \
- RADEON_WAIT_HOST_IDLECLEAN)); \
- ADVANCE_RING(); \
+ if (info->ChipFamily < CHIP_FAMILY_R600) { \
+ BEGIN_RING(2); \
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \
+ OUT_RING((RADEON_WAIT_3D_IDLECLEAN | \
+ RADEON_WAIT_HOST_IDLECLEAN)); \
+ ADVANCE_RING(); \
+ } \
} while (0)
#define RADEON_WAIT_UNTIL_IDLE() \
@@ -1265,38 +1349,44 @@ do { \
xf86DrvMsg(pScrn->scrnIndex, X_INFO, \
"WAIT_UNTIL_IDLE() in %s\n", __FUNCTION__); \
} \
- BEGIN_RING(2); \
- OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \
- OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \
- RADEON_WAIT_3D_IDLECLEAN | \
- RADEON_WAIT_HOST_IDLECLEAN)); \
- ADVANCE_RING(); \
+ if (info->ChipFamily < CHIP_FAMILY_R600) { \
+ BEGIN_RING(2); \
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \
+ OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \
+ RADEON_WAIT_3D_IDLECLEAN | \
+ RADEON_WAIT_HOST_IDLECLEAN)); \
+ ADVANCE_RING(); \
+ } \
} while (0)
#define RADEON_PURGE_CACHE() \
do { \
- BEGIN_RING(2); \
- if (info->ChipFamily <= CHIP_FAMILY_RV280) { \
- OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \
- OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \
- } else { \
- OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \
- OUT_RING(R300_RB3D_DC_FLUSH_ALL); \
- } \
- ADVANCE_RING(); \
+ if (info->ChipFamily < CHIP_FAMILY_R600) { \
+ BEGIN_RING(2); \
+ if (info->ChipFamily <= CHIP_FAMILY_RV280) { \
+ OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \
+ OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \
+ } else { \
+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \
+ OUT_RING(R300_RB3D_DC_FLUSH_ALL); \
+ } \
+ ADVANCE_RING(); \
+ } \
} while (0)
#define RADEON_PURGE_ZCACHE() \
do { \
- BEGIN_RING(2); \
- if (info->ChipFamily <= CHIP_FAMILY_RV280) { \
- OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \
- OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \
- } else { \
- OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); \
- OUT_RING(R300_ZC_FLUSH_ALL); \
+ if (info->ChipFamily < CHIP_FAMILY_R600) { \
+ BEGIN_RING(2); \
+ if (info->ChipFamily <= CHIP_FAMILY_RV280) { \
+ OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \
+ OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \
+ } else { \
+ OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); \
+ OUT_RING(R300_ZC_FLUSH_ALL); \
+ } \
+ ADVANCE_RING(); \
} \
- ADVANCE_RING(); \
} while (0)
#endif /* XF86DRI */
diff --git a/src/radeon_accel.c b/src/radeon_accel.c
index 96570e8c..dffbc576 100644
--- a/src/radeon_accel.c
+++ b/src/radeon_accel.c
@@ -78,6 +78,7 @@
/* Driver data structures */
#include "radeon.h"
#include "radeon_reg.h"
+#include "r600_reg.h"
#include "radeon_macros.h"
#include "radeon_probe.h"
#include "radeon_version.h"
@@ -92,6 +93,7 @@
/* X and server generic header files */
#include "xf86.h"
+static void R600EngineReset(ScrnInfoPtr pScrn);
#ifdef USE_XAA
static struct {
@@ -149,6 +151,37 @@ void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
}
}
+void R600WaitForFifoFunction(ScrnInfoPtr pScrn, int entries)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ unsigned char *RADEONMMIO = info->MMIO;
+ int i;
+
+ for (;;) {
+ for (i = 0; i < RADEON_TIMEOUT; i++) {
+ if (info->ChipFamily >= CHIP_FAMILY_RV770)
+ info->accel_state->fifo_slots =
+ INREG(R600_GRBM_STATUS) & R700_CMDFIFO_AVAIL_MASK;
+ else
+ info->accel_state->fifo_slots =
+ INREG(R600_GRBM_STATUS) & R600_CMDFIFO_AVAIL_MASK;
+ if (info->accel_state->fifo_slots >= entries) return;
+ }
+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
+ "FIFO timed out: stat=0x%08x\n",
+ (unsigned int)INREG(R600_GRBM_STATUS));
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+ "FIFO timed out, resetting engine...\n");
+ R600EngineReset(pScrn);
+#ifdef XF86DRI
+ if (info->directRenderingEnabled) {
+ RADEONCP_RESET(pScrn, info);
+ RADEONCP_START(pScrn, info);
+ }
+#endif
+ }
+}
+
/* Flush all dirty data in the Pixel Cache to memory */
void RADEONEngineFlush(ScrnInfoPtr pScrn)
{
@@ -156,9 +189,6 @@ void RADEONEngineFlush(ScrnInfoPtr pScrn)
unsigned char *RADEONMMIO = info->MMIO;
int i;
- if (info->ChipFamily >= CHIP_FAMILY_R600)
- return;
-
if (info->ChipFamily <= CHIP_FAMILY_RV280) {
OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
RADEON_RB3D_DC_FLUSH_ALL,
@@ -198,8 +228,6 @@ void RADEONEngineReset(ScrnInfoPtr pScrn)
uint32_t rbbm_soft_reset;
uint32_t host_path_cntl;
- if (info->ChipFamily >= CHIP_FAMILY_R600)
- return;
/* The following RBBM_SOFT_RESET sequence can help un-wedge
* an R300 after the command processor got stuck.
*/
@@ -310,6 +338,35 @@ void RADEONEngineReset(ScrnInfoPtr pScrn)
OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl);
}
+/* Reset graphics card to known state */
+static void R600EngineReset(ScrnInfoPtr pScrn)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ unsigned char *RADEONMMIO = info->MMIO;
+ uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl;
+
+ cp_ptr = INREG(R600_CP_RB_WPTR);
+
+ cp_me_cntl = INREG(R600_CP_ME_CNTL);
+ OUTREG(R600_CP_ME_CNTL, 0x10000000);
+
+ OUTREG(R600_GRBM_SOFT_RESET, 0x7fff);
+ INREG(R600_GRBM_SOFT_RESET);
+ usleep (50);
+ OUTREG(R600_GRBM_SOFT_RESET, 0);
+ INREG(R600_GRBM_SOFT_RESET);
+
+ OUTREG(R600_CP_RB_WPTR_DELAY, 0);
+ cp_rb_cntl = INREG(R600_CP_RB_CNTL);
+ OUTREG(R600_CP_RB_CNTL, 0x80000000);
+
+ OUTREG(R600_CP_RB_RPTR_WR, cp_ptr);
+ OUTREG(R600_CP_RB_WPTR, cp_ptr);
+ OUTREG(R600_CP_RB_CNTL, cp_rb_cntl);
+ OUTREG(R600_CP_ME_CNTL, cp_me_cntl);
+
+}
+
/* Restore the acceleration hardware to its previous state */
void RADEONEngineRestore(ScrnInfoPtr pScrn)
{
@@ -611,8 +668,12 @@ drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
"GetBuffer timed out, resetting engine...\n");
- RADEONEngineReset(pScrn);
- RADEONEngineRestore(pScrn);
+
+ if (info->ChipFamily < CHIP_FAMILY_R600) {
+ RADEONEngineReset(pScrn);
+ RADEONEngineRestore(pScrn);
+ } else
+ R600EngineReset(pScrn);
/* Always restart the engine when doing CP 2D acceleration */
RADEONCP_RESET(pScrn, info);
@@ -627,6 +688,8 @@ void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
drmBufPtr buffer = info->cp->indirectBuffer;
int start = info->cp->indirectStart;
drm_radeon_indirect_t indirect;
+ RING_LOCALS;
+ RADEONCP_REFRESH(pScrn, info);
if (!buffer) return;
if (start == buffer->used && !discard) return;
@@ -636,6 +699,14 @@ void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
buffer->idx);
}
+ if (info->ChipFamily >= CHIP_FAMILY_R600) {
+ while (buffer->used & 0x3c){
+ BEGIN_RING(1);
+ OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
+ ADVANCE_RING();
+ }
+ }
+
indirect.idx = buffer->idx;
indirect.start = start;
indirect.end = buffer->used;
@@ -664,6 +735,19 @@ void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
drmBufPtr buffer = info->cp->indirectBuffer;
int start = info->cp->indirectStart;
drm_radeon_indirect_t indirect;
+ RING_LOCALS;
+ RADEONCP_REFRESH(pScrn, info);
+
+
+ if (info->ChipFamily >= CHIP_FAMILY_R600) {
+ if (buffer) {
+ while (buffer->used & 0x3c) {
+ BEGIN_RING(1);
+ OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */
+ ADVANCE_RING();
+ }
+ }
+ }
info->cp->indirectBuffer = NULL;
info->cp->indirectStart = 0;
@@ -926,20 +1010,26 @@ Bool RADEONAccelInit(ScreenPtr pScreen)
ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
RADEONInfoPtr info = RADEONPTR(pScrn);
- if (info->ChipFamily >= CHIP_FAMILY_R600)
- return FALSE;
-
#ifdef USE_EXA
if (info->useEXA) {
# ifdef XF86DRI
if (info->directRenderingEnabled) {
- if (!RADEONDrawInitCP(pScreen))
- return FALSE;
+ if (info->ChipFamily >= CHIP_FAMILY_R600) {
+ if (!R600DrawInit(pScreen))
+ return FALSE;
+ } else {
+ if (!RADEONDrawInitCP(pScreen))
+ return FALSE;
+ }
} else
# endif /* XF86DRI */
{
- if (!RADEONDrawInitMMIO(pScreen))
+ if (info->ChipFamily >= CHIP_FAMILY_R600)
return FALSE;
+ else {
+ if (!RADEONDrawInitMMIO(pScreen))
+ return FALSE;
+ }
}
}
#endif /* USE_EXA */
@@ -947,6 +1037,9 @@ Bool RADEONAccelInit(ScreenPtr pScreen)
if (!info->useEXA) {
XAAInfoRecPtr a;
+ if (info->ChipFamily >= CHIP_FAMILY_R600)
+ return FALSE;
+
if (!(a = info->accel_state->accel = XAACreateInfoRec())) {
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
return FALSE;
diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c
index 3aa61bcc..eabd87df 100644
--- a/src/radeon_commonfuncs.c
+++ b/src/radeon_commonfuncs.c
@@ -656,7 +656,7 @@ void FUNC_NAME(RADEONWaitForVLine)(ScrnInfoPtr pScrn, PixmapPtr pPix,
return;
start = max(start, 0);
- stop = max(stop, xf86_config->crtc[crtc]->mode.VDisplay);
+ stop = min(stop, xf86_config->crtc[crtc]->mode.VDisplay);
if (start > xf86_config->crtc[crtc]->mode.VDisplay)
return;
@@ -730,8 +730,11 @@ void FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn)
xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
"Idle timed out, resetting engine...\n");
- RADEONEngineReset(pScrn);
- RADEONEngineRestore(pScrn);
+ if (info->ChipFamily < CHIP_FAMILY_R600) {
+ RADEONEngineReset(pScrn);
+ RADEONEngineRestore(pScrn);
+ } else
+ R600EngineReset(pScrn);
/* Always restart the engine when doing CP 2D acceleration */
RADEONCP_RESET(pScrn, info);
@@ -740,39 +743,56 @@ void FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn)
}
#endif
-#if 0
- xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
- "WaitForIdle (entering): %d entries, stat=0x%08x\n",
- INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
- INREG(RADEON_RBBM_STATUS));
-#endif
-
- if (info->ChipFamily >= CHIP_FAMILY_R600)
- return;
-
- /* Wait for the engine to go idle */
- RADEONWaitForFifoFunction(pScrn, 64);
+ if (info->ChipFamily >= CHIP_FAMILY_R600) {
+ /* Wait for the engine to go idle */
+ if (info->ChipFamily >= CHIP_FAMILY_RV770)
+ R600WaitForFifoFunction(pScrn, 8);
+ else
+ R600WaitForFifoFunction(pScrn, 16);
- for (;;) {
- for (i = 0; i < RADEON_TIMEOUT; i++) {
- if (!(INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)) {
- RADEONEngineFlush(pScrn);
- return;
+ for (;;) {
+ for (i = 0; i < RADEON_TIMEOUT; i++) {
+ if (!(INREG(R600_GRBM_STATUS) & R600_GUI_ACTIVE))
+ return;
}
- }
- xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
- "Idle timed out: %u entries, stat=0x%08x\n",
- (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
- (unsigned int)INREG(RADEON_RBBM_STATUS));
- xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
- "Idle timed out, resetting engine...\n");
- RADEONEngineReset(pScrn);
- RADEONEngineRestore(pScrn);
+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
+ "Idle timed out: stat=0x%08x\n",
+ (unsigned int)INREG(R600_GRBM_STATUS));
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+ "Idle timed out, resetting engine...\n");
+ R600EngineReset(pScrn);
#ifdef XF86DRI
- if (info->directRenderingEnabled) {
- RADEONCP_RESET(pScrn, info);
- RADEONCP_START(pScrn, info);
+ if (info->directRenderingEnabled) {
+ RADEONCP_RESET(pScrn, info);
+ RADEONCP_START(pScrn, info);
+ }
+#endif
}
+ } else {
+ /* Wait for the engine to go idle */
+ RADEONWaitForFifoFunction(pScrn, 64);
+
+ for (;;) {
+ for (i = 0; i < RADEON_TIMEOUT; i++) {
+ if (!(INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)) {
+ RADEONEngineFlush(pScrn);
+ return;
+ }
+ }
+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
+ "Idle timed out: %u entries, stat=0x%08x\n",
+ (unsigned int)INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
+ (unsigned int)INREG(RADEON_RBBM_STATUS));
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+ "Idle timed out, resetting engine...\n");
+ RADEONEngineReset(pScrn);
+ RADEONEngineRestore(pScrn);
+#ifdef XF86DRI
+ if (info->directRenderingEnabled) {
+ RADEONCP_RESET(pScrn, info);
+ RADEONCP_START(pScrn, info);
+ }
#endif
+ }
}
}
diff --git a/src/radeon_crtc.c b/src/radeon_crtc.c
index 5a7c730a..60140d6b 100644
--- a/src/radeon_crtc.c
+++ b/src/radeon_crtc.c
@@ -587,8 +587,7 @@ Bool RADEONAllocateControllers(ScrnInfoPtr pScrn, int mask)
RADEONEntPtr pRADEONEnt = RADEONEntPriv(pScrn);
RADEONInfoPtr info = RADEONPTR(pScrn);
- if ((info->ChipFamily < CHIP_FAMILY_R600) &&
- (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE))) {
+ if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) {
radeon_crtc_funcs.shadow_create = radeon_crtc_shadow_create;
radeon_crtc_funcs.shadow_allocate = radeon_crtc_shadow_allocate;
radeon_crtc_funcs.shadow_destroy = radeon_crtc_shadow_destroy;
diff --git a/src/radeon_dri.c b/src/radeon_dri.c
index 59d9a832..45c927f2 100644
--- a/src/radeon_dri.c
+++ b/src/radeon_dri.c
@@ -45,6 +45,7 @@
#include "radeon.h"
#include "radeon_video.h"
#include "radeon_reg.h"
+#include "r600_reg.h"
#include "radeon_macros.h"
#include "radeon_drm.h"
#include "radeon_dri.h"
@@ -790,92 +791,96 @@ static Bool RADEONSetAgpMode(RADEONInfoPtr info, ScreenPtr pScreen)
unsigned long mode = drmAgpGetMode(info->dri->drmFD); /* Default mode */
unsigned int vendor = drmAgpVendorId(info->dri->drmFD);
unsigned int device = drmAgpDeviceId(info->dri->drmFD);
- /* ignore agp 3.0 mode bit from the chip as it's buggy on some cards with
- pcie-agp rialto bridge chip - use the one from bridge which must match */
- uint32_t agp_status = (INREG(RADEON_AGP_STATUS) | RADEON_AGPv3_MODE) & mode;
- Bool is_v3 = (agp_status & RADEON_AGPv3_MODE);
- unsigned int defaultMode;
- MessageType from;
- if (is_v3) {
- defaultMode = (agp_status & RADEON_AGPv3_8X_MODE) ? 8 : 4;
- } else {
- if (agp_status & RADEON_AGP_4X_MODE) defaultMode = 4;
- else if (agp_status & RADEON_AGP_2X_MODE) defaultMode = 2;
- else defaultMode = 1;
- }
-
- /* Apply AGPMode Quirks */
- radeon_agpmode_quirk_ptr p = radeon_agpmode_quirk_list;
- while (p && p->chipDevice != 0) {
- if (vendor == p->hostbridgeVendor &&
- device == p->hostbridgeDevice &&
- PCI_DEV_VENDOR_ID(info->PciInfo) == p->chipVendor &&
- PCI_DEV_DEVICE_ID(info->PciInfo) == p->chipDevice &&
- PCI_SUB_VENDOR_ID(info->PciInfo) == p->subsysVendor &&
- PCI_SUB_DEVICE_ID(info->PciInfo) == p->subsysDevice)
- {
- defaultMode = p->defaultMode;
- }
- ++p;
- }
+ if (info->ChipFamily < CHIP_FAMILY_R600) {
+ /* ignore agp 3.0 mode bit from the chip as it's buggy on some cards with
+ pcie-agp rialto bridge chip - use the one from bridge which must match */
+ uint32_t agp_status = (INREG(RADEON_AGP_STATUS) | RADEON_AGPv3_MODE) & mode;
+ Bool is_v3 = (agp_status & RADEON_AGPv3_MODE);
+ unsigned int defaultMode;
+ MessageType from;
- from = X_DEFAULT;
+ if (is_v3) {
+ defaultMode = (agp_status & RADEON_AGPv3_8X_MODE) ? 8 : 4;
+ } else {
+ if (agp_status & RADEON_AGP_4X_MODE) defaultMode = 4;
+ else if (agp_status & RADEON_AGP_2X_MODE) defaultMode = 2;
+ else defaultMode = 1;
+ }
- if (xf86GetOptValInteger(info->Options, OPTION_AGP_MODE, &info->dri->agpMode)) {
- if ((info->dri->agpMode < (is_v3 ? 4 : 1)) ||
- (info->dri->agpMode > (is_v3 ? 8 : 4)) ||
- (info->dri->agpMode & (info->dri->agpMode - 1))) {
- xf86DrvMsg(pScreen->myNum, X_ERROR,
- "Illegal AGP Mode: %d (valid values: %s), leaving at "
- "%dx\n", info->dri->agpMode, is_v3 ? "4, 8" : "1, 2, 4",
- defaultMode);
- info->dri->agpMode = defaultMode;
+ /* Apply AGPMode Quirks */
+ radeon_agpmode_quirk_ptr p = radeon_agpmode_quirk_list;
+ while (p && p->chipDevice != 0) {
+ if (vendor == p->hostbridgeVendor &&
+ device == p->hostbridgeDevice &&
+ PCI_DEV_VENDOR_ID(info->PciInfo) == p->chipVendor &&
+ PCI_DEV_DEVICE_ID(info->PciInfo) == p->chipDevice &&
+ PCI_SUB_VENDOR_ID(info->PciInfo) == p->subsysVendor &&
+ PCI_SUB_DEVICE_ID(info->PciInfo) == p->subsysDevice)
+ {
+ defaultMode = p->defaultMode;
+ }
+ ++p;
+ }
+
+ from = X_DEFAULT;
+
+ if (xf86GetOptValInteger(info->Options, OPTION_AGP_MODE, &info->dri->agpMode)) {
+ if ((info->dri->agpMode < (is_v3 ? 4 : 1)) ||
+ (info->dri->agpMode > (is_v3 ? 8 : 4)) ||
+ (info->dri->agpMode & (info->dri->agpMode - 1))) {
+ xf86DrvMsg(pScreen->myNum, X_ERROR,
+ "Illegal AGP Mode: %d (valid values: %s), leaving at "
+ "%dx\n", info->dri->agpMode, is_v3 ? "4, 8" : "1, 2, 4",
+ defaultMode);
+ info->dri->agpMode = defaultMode;
+ } else
+ from = X_CONFIG;
} else
- from = X_CONFIG;
- } else
- info->dri->agpMode = defaultMode;
+ info->dri->agpMode = defaultMode;
- xf86DrvMsg(pScreen->myNum, from, "Using AGP %dx\n", info->dri->agpMode);
+ xf86DrvMsg(pScreen->myNum, from, "Using AGP %dx\n", info->dri->agpMode);
- mode &= ~RADEON_AGP_MODE_MASK;
- if (is_v3) {
- /* only set one mode bit for AGPv3 */
- switch (info->dri->agpMode) {
- case 8: mode |= RADEON_AGPv3_8X_MODE; break;
- case 4: default: mode |= RADEON_AGPv3_4X_MODE;
- }
- /*TODO: need to take care of other bits valid for v3 mode
- * currently these bits are not used in all tested cards.
- */
- } else {
- switch (info->dri->agpMode) {
- case 4: mode |= RADEON_AGP_4X_MODE;
- case 2: mode |= RADEON_AGP_2X_MODE;
- case 1: default: mode |= RADEON_AGP_1X_MODE;
+ mode &= ~RADEON_AGP_MODE_MASK;
+ if (is_v3) {
+ /* only set one mode bit for AGPv3 */
+ switch (info->dri->agpMode) {
+ case 8: mode |= RADEON_AGPv3_8X_MODE; break;
+ case 4: default: mode |= RADEON_AGPv3_4X_MODE;
+ }
+ /*TODO: need to take care of other bits valid for v3 mode
+ * currently these bits are not used in all tested cards.
+ */
+ } else {
+ switch (info->dri->agpMode) {
+ case 4: mode |= RADEON_AGP_4X_MODE;
+ case 2: mode |= RADEON_AGP_2X_MODE;
+ case 1: default: mode |= RADEON_AGP_1X_MODE;
+ }
}
- }
- /* AGP Fast Writes.
- * TODO: take into account that certain agp modes don't support fast
- * writes at all */
- mode &= ~RADEON_AGP_FW_MODE; /* Disable per default */
- if (xf86ReturnOptValBool(info->Options, OPTION_AGP_FW, FALSE)) {
- xf86DrvMsg(pScreen->myNum, X_WARNING,
- "WARNING: Using the AGPFastWrite option is not recommended.\n");
- xf86Msg(X_NONE, "\tThis option does not provide much of a noticable speed"
- " boost, while it\n\twill probably hard lock your machine."
- " All bets are off!\n");
-
- /* Black list some host/AGP bridges. */
- if ((vendor == PCI_VENDOR_AMD) && (device == PCI_CHIP_AMD761))
- xf86DrvMsg(pScreen->myNum, X_PROBED, "Ignoring AGPFastWrite option "
- "for the AMD 761 northbridge.\n");
- else {
- xf86DrvMsg(pScreen->myNum, X_CONFIG, "Enabling AGP Fast Writes.\n");
- mode |= RADEON_AGP_FW_MODE;
- }
- } /* Don't mention this otherwise, so that people don't get funny ideas */
+ /* AGP Fast Writes.
+ * TODO: take into account that certain agp modes don't support fast
+ * writes at all */
+ mode &= ~RADEON_AGP_FW_MODE; /* Disable per default */
+ if (xf86ReturnOptValBool(info->Options, OPTION_AGP_FW, FALSE)) {
+ xf86DrvMsg(pScreen->myNum, X_WARNING,
+ "WARNING: Using the AGPFastWrite option is not recommended.\n");
+ xf86Msg(X_NONE, "\tThis option does not provide much of a noticable speed"
+ " boost, while it\n\twill probably hard lock your machine."
+ " All bets are off!\n");
+
+ /* Black list some host/AGP bridges. */
+ if ((vendor == PCI_VENDOR_AMD) && (device == PCI_CHIP_AMD761))
+ xf86DrvMsg(pScreen->myNum, X_PROBED, "Ignoring AGPFastWrite option "
+ "for the AMD 761 northbridge.\n");
+ else {
+ xf86DrvMsg(pScreen->myNum, X_CONFIG, "Enabling AGP Fast Writes.\n");
+ mode |= RADEON_AGP_FW_MODE;
+ }
+ } /* Don't mention this otherwise, so that people don't get funny ideas */
+ } else
+ info->dri->agpMode = 8; /* doesn't matter at this point */
xf86DrvMsg(pScreen->myNum, X_INFO,
"[agp] Mode 0x%08lx [AGP 0x%04x/0x%04x; Card 0x%04x/0x%04x 0x%04x/0x%04x]\n",
@@ -910,6 +915,9 @@ static void RADEONSetAgpBase(RADEONInfoPtr info, ScreenPtr pScreen)
ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
unsigned char *RADEONMMIO = info->MMIO;
+ if (info->ChipFamily >= CHIP_FAMILY_R600)
+ return;
+
/* drm already does this, so we can probably remove this.
* agp_base_2 ?
*/
@@ -1183,13 +1191,14 @@ static int RADEONDRIKernelInit(RADEONInfoPtr info, ScreenPtr pScreen)
drm_radeon_init_t drmInfo;
memset(&drmInfo, 0, sizeof(drm_radeon_init_t));
- if ( info->ChipFamily >= CHIP_FAMILY_R300 )
- drmInfo.func = RADEON_INIT_R300_CP;
+ if ( info->ChipFamily >= CHIP_FAMILY_R600 )
+ drmInfo.func = RADEON_INIT_R600_CP;
+ else if ( info->ChipFamily >= CHIP_FAMILY_R300 )
+ drmInfo.func = RADEON_INIT_R300_CP;
+ else if ( info->ChipFamily >= CHIP_FAMILY_R200 )
+ drmInfo.func = RADEON_INIT_R200_CP;
else
- if ( info->ChipFamily >= CHIP_FAMILY_R200 )
- drmInfo.func = RADEON_INIT_R200_CP;
- else
- drmInfo.func = RADEON_INIT_CP;
+ drmInfo.func = RADEON_INIT_CP;
drmInfo.sarea_priv_offset = sizeof(XF86DRISAREARec);
drmInfo.is_pci = (info->cardType!=CARD_AGP);
@@ -1223,7 +1232,8 @@ static int RADEONDRIKernelInit(RADEONInfoPtr info, ScreenPtr pScreen)
* registers back to their default values, so we need to restore
* those engine register here.
*/
- RADEONEngineRestore(pScrn);
+ if (info->ChipFamily < CHIP_FAMILY_R600)
+ RADEONEngineRestore(pScrn);
return TRUE;
}
@@ -1299,14 +1309,16 @@ static void RADEONDRIIrqInit(RADEONInfoPtr info, ScreenPtr pScreen)
"[drm] falling back to irq-free operation\n");
info->dri->irq = 0;
} else {
- unsigned char *RADEONMMIO = info->MMIO;
- info->ModeReg->gen_int_cntl = INREG( RADEON_GEN_INT_CNTL );
-
- /* Let the DRM know it can safely disable the vblank interrupts */
- radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0],
- FALSE);
- radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0],
- TRUE);
+ if (info->ChipFamily < CHIP_FAMILY_R600) {
+ unsigned char *RADEONMMIO = info->MMIO;
+ info->ModeReg->gen_int_cntl = INREG( RADEON_GEN_INT_CNTL );
+
+ /* Let the DRM know it can safely disable the vblank interrupts */
+ radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0],
+ FALSE);
+ radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0],
+ TRUE);
+ }
}
}
@@ -1840,7 +1852,8 @@ void RADEONDRIResume(ScreenPtr pScreen)
/* FIXME: return? */
}
- RADEONEngineRestore(pScrn);
+ if (info->ChipFamily < CHIP_FAMILY_R600)
+ RADEONEngineRestore(pScrn);
RADEONDRICPInit(pScrn);
}
diff --git a/src/radeon_driver.c b/src/radeon_driver.c
index 8bf2a02e..1171de48 100644
--- a/src/radeon_driver.c
+++ b/src/radeon_driver.c
@@ -677,8 +677,8 @@ static void radeon_write_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, uint32_
if (mask & LOC_FB)
OUTREG(R700_MC_VM_FB_LOCATION, fb_loc);
if (mask & LOC_AGP) {
- OUTREG(R600_MC_VM_AGP_BOT, agp_loc);
- OUTREG(R600_MC_VM_AGP_TOP, agp_loc_hi);
+ OUTREG(R700_MC_VM_AGP_BOT, agp_loc);
+ OUTREG(R700_MC_VM_AGP_TOP, agp_loc_hi);
}
} else if (info->ChipFamily >= CHIP_FAMILY_R600) {
if (mask & LOC_FB)
@@ -727,8 +727,8 @@ static void radeon_read_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, uint32_t
if (mask & LOC_FB)
*fb_loc = INREG(R700_MC_VM_FB_LOCATION);
if (mask & LOC_AGP) {
- *agp_loc = INREG(R600_MC_VM_AGP_BOT);
- *agp_loc_hi = INREG(R600_MC_VM_AGP_TOP);
+ *agp_loc = INREG(R700_MC_VM_AGP_BOT);
+ *agp_loc_hi = INREG(R700_MC_VM_AGP_TOP);
}
} else if (info->ChipFamily >= CHIP_FAMILY_R600) {
if (mask & LOC_FB)
@@ -1879,7 +1879,10 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn)
/* treat PCIE IGP cards as PCI */
if (info->cardType == CARD_PCIE && info->IsIGP)
- info->cardType = CARD_PCI;
+ info->cardType = CARD_PCI;
+
+ if ((info->ChipFamily >= CHIP_FAMILY_R600) && info->IsIGP)
+ info->cardType = CARD_PCIE;
/* not sure about gart table requirements */
if ((info->ChipFamily == CHIP_FAMILY_RS600) && info->IsIGP)
@@ -1912,6 +1915,7 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn)
info->Chipset != PCI_CHIP_RN50_5969);
#endif
+#if 0
if (info->ChipFamily >= CHIP_FAMILY_R600) {
info->r600_shadow_fb = TRUE;
xf86DrvMsg(pScrn->scrnIndex, X_INFO,
@@ -1919,6 +1923,7 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn)
if (!xf86LoadSubModule(pScrn, "shadow"))
return FALSE;
}
+#endif
return TRUE;
}
@@ -1996,8 +2001,8 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn)
if (info->ChipFamily >= CHIP_FAMILY_R600) {
xf86DrvMsg(pScrn->scrnIndex, X_DEFAULT,
- "No acceleration support available on R600 yet.\n");
- return TRUE;
+ "Experimental R6xx/R7xx EXA support.\n");
+ info->useEXA = TRUE;
}
if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) {
@@ -2342,7 +2347,10 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn)
xf86DrvMsg(pScrn->scrnIndex, from, "Page Flipping %sabled%s\n",
info->dri->allowPageFlip ? "en" : "dis", reason);
- info->DMAForXv = TRUE;
+ if (info->ChipFamily >= CHIP_FAMILY_R600)
+ info->DMAForXv = FALSE;
+ else
+ info->DMAForXv = TRUE;
from = xf86GetOptValBool(info->Options, OPTION_XV_DMA, &info->DMAForXv)
? X_CONFIG : X_INFO;
xf86DrvMsg(pScrn->scrnIndex, from,
@@ -3650,11 +3658,9 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen,
RADEONDGAInit(pScreen);
/* Init Xv */
- if (info->ChipFamily < CHIP_FAMILY_R600) {
- xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
- "Initializing Xv\n");
- RADEONInitVideo(pScreen);
- }
+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
+ "Initializing Xv\n");
+ RADEONInitVideo(pScreen);
if (info->r600_shadow_fb == TRUE) {
if (!shadowSetup(pScreen)) {
@@ -3779,7 +3785,8 @@ void RADEONRestoreMemMapRegisters(ScrnInfoPtr pScrn,
}
/* Reset the engine and HDP */
- RADEONEngineReset(pScrn);
+ if (info->ChipFamily < CHIP_FAMILY_R600)
+ RADEONEngineReset(pScrn);
}
} else {
@@ -3964,7 +3971,7 @@ static void RADEONAdjustMemMapRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save)
}
#ifdef USE_EXA
- if (info->accelDFS)
+ if (info->accelDFS || (info->ChipFamily >= CHIP_FAMILY_R600))
{
drm_radeon_getparam_t gp;
int gart_base;
@@ -5228,7 +5235,8 @@ Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags)
if (info->accelOn) {
RADEON_SYNC(info, pScrn);
- RADEONEngineRestore(pScrn);
+ if (info->ChipFamily < CHIP_FAMILY_R600)
+ RADEONEngineRestore(pScrn);
}
#ifdef XF86DRI
@@ -5432,6 +5440,10 @@ void RADEONAdjustFrame(int scrnIndex, int x, int y, int flags)
xf86OutputPtr output = config->output[config->compat_output];
xf86CrtcPtr crtc = output->crtc;
+ /* not handled */
+ if (IS_AVIVO_VARIANT)
+ return;
+
#ifdef XF86DRI
if (info->cp->CPStarted && pScrn->pScreen) DRILock(pScrn->pScreen, 0);
#endif
@@ -5544,9 +5556,12 @@ Bool RADEONEnterVT(int scrnIndex, int flags)
if (info->adaptor)
RADEONResetVideo(pScrn);
- if (info->accelOn)
+ if (info->accelOn && (info->ChipFamily < CHIP_FAMILY_R600))
RADEONEngineRestore(pScrn);
+ if (info->accelOn && info->accel_state)
+ info->accel_state->XInited3D = FALSE;
+
#ifdef XF86DRI
if (info->directRenderingEnabled) {
RADEONCP_START(pScrn, info);
diff --git a/src/radeon_exa.c b/src/radeon_exa.c
index 31a60c21..ae681462 100644
--- a/src/radeon_exa.c
+++ b/src/radeon_exa.c
@@ -35,6 +35,7 @@
#include "radeon.h"
#include "radeon_reg.h"
+#include "r600_reg.h"
#ifdef XF86DRI
#include "radeon_drm.h"
#endif
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index 2cc55db6..571204af 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -458,7 +458,7 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
#ifdef ONLY_ONCE
-static PixmapPtr
+PixmapPtr
RADEONGetDrawablePixmap(DrawablePtr pDrawable)
{
if (pDrawable->type == DRAWABLE_WINDOW)
diff --git a/src/radeon_modes.c b/src/radeon_modes.c
index e06f8ddf..0a8fa001 100644
--- a/src/radeon_modes.c
+++ b/src/radeon_modes.c
@@ -65,15 +65,19 @@ void RADEONSetPitch (ScrnInfoPtr pScrn)
align_large = info->allowColorTiling || IS_AVIVO_VARIANT;
/* FIXME: May need to validate line pitch here */
- switch (pScrn->depth / 8) {
- case 1: pitch_mask = align_large ? 255 : 127;
- break;
- case 2: pitch_mask = align_large ? 127 : 31;
- break;
- case 3:
- case 4: pitch_mask = align_large ? 63 : 15;
- break;
- }
+ if (info->ChipFamily < CHIP_FAMILY_R600) {
+ switch (pScrn->depth / 8) {
+ case 1: pitch_mask = align_large ? 255 : 127;
+ break;
+ case 2: pitch_mask = align_large ? 127 : 31;
+ break;
+ case 3:
+ case 4: pitch_mask = align_large ? 63 : 15;
+ break;
+ }
+ } else
+ pitch_mask = 255; /* r6xx/r7xx need 256B alignment for accel */
+
dummy = (pScrn->virtualX + pitch_mask) & ~pitch_mask;
pScrn->displayWidth = dummy;
info->CurrentLayout.displayWidth = pScrn->displayWidth;
diff --git a/src/radeon_reg.h b/src/radeon_reg.h
index f0755e53..0af88597 100644
--- a/src/radeon_reg.h
+++ b/src/radeon_reg.h
@@ -3673,6 +3673,8 @@
# define AVIVO_D1MODE_VLINE_START_SHIFT 0
# define AVIVO_D1MODE_VLINE_END_SHIFT 16
# define AVIVO_D1MODE_VLINE_INV (1 << 31)
+#define AVIVO_D1MODE_VLINE_STATUS 0x653c
+# define AVIVO_D1MODE_VLINE_STAT (1 << 12)
#define AVIVO_D1MODE_VIEWPORT_START 0x6580
#define AVIVO_D1MODE_VIEWPORT_SIZE 0x6584
#define AVIVO_D1MODE_EXT_OVERSCAN_LEFT_RIGHT 0x6588
@@ -3995,6 +3997,9 @@
#define R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198
#define R700_MC_VM_FB_LOCATION 0x2024
+#define R700_MC_VM_AGP_TOP 0x2028
+#define R700_MC_VM_AGP_BOT 0x202c
+#define R700_MC_VM_AGP_BASE 0x2030
#define R600_HDP_NONSURFACE_BASE 0x2c04
@@ -5373,4 +5378,32 @@
#define R500_DYN_SCLK_PWMEM_PIPE 0x000d /* PLL */
+/* r6xx/r7xx stuff */
+#define R600_GRBM_STATUS 0x8010
+# define R600_CMDFIFO_AVAIL_MASK 0x1f
+# define R700_CMDFIFO_AVAIL_MASK 0xf
+# define R600_GUI_ACTIVE (1 << 31)
+
+#define R600_GRBM_SOFT_RESET 0x8020
+# define R600_SOFT_RESET_CP (1 << 0)
+
+#define R600_WAIT_UNTIL 0x8040
+
+#define R600_CP_ME_CNTL 0x86d8
+# define R600_CP_ME_HALT (1 << 28)
+
+#define R600_CP_RB_BASE 0xc100
+#define R600_CP_RB_CNTL 0xc104
+# define R600_RB_NO_UPDATE (1 << 27)
+# define R600_RB_RPTR_WR_ENA (1 << 31)
+#define R600_CP_RB_RPTR_WR 0xc108
+#define R600_CP_RB_RPTR_ADDR 0xc10c
+#define R600_CP_RB_RPTR_ADDR_HI 0xc110
+#define R600_CP_RB_WPTR 0xc114
+#define R600_CP_RB_WPTR_ADDR 0xc118
+#define R600_CP_RB_WPTR_ADDR_HI 0xc11c
+
+#define R600_CP_RB_RPTR 0x8700
+#define R600_CP_RB_WPTR_DELAY 0x8704
+
#endif
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index 7712344b..cbedb7ea 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -36,6 +36,7 @@
#include "radeon.h"
#include "radeon_reg.h"
+#include "r600_reg.h"
#include "radeon_macros.h"
#include "radeon_probe.h"
#include "radeon_video.h"
@@ -43,12 +44,24 @@
#include <X11/extensions/Xv.h>
#include "fourcc.h"
+extern void
+R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv);
+
+extern Bool
+R600CopyToVRAM(ScrnInfoPtr pScrn,
+ char *src, int src_pitch,
+ uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_height, int bpp,
+ int x, int y, int w, int h);
+
#define IMAGE_MAX_WIDTH 2048
#define IMAGE_MAX_HEIGHT 2048
#define IMAGE_MAX_WIDTH_R500 4096
#define IMAGE_MAX_HEIGHT_R500 4096
+#define IMAGE_MAX_WIDTH_R600 8192
+#define IMAGE_MAX_HEIGHT_R600 8192
+
static Bool
RADEONTilingEnabled(ScrnInfoPtr pScrn, PixmapPtr pPix)
{
@@ -146,6 +159,56 @@ static __inline__ uint32_t F_TO_24(float val)
#endif /* XF86DRI */
+static void
+R600CopyPlanar(ScrnInfoPtr pScrn,
+ unsigned char *y_src, unsigned char *u_src, unsigned char *v_src,
+ uint32_t dst_mc_addr,
+ int srcPitch, int srcPitch2, int dstPitch,
+ int w, int h)
+{
+ int dstPitch2 = dstPitch >> 1;
+ int h2 = h >> 1;
+ int w2 = w >> 1;
+ int v_offset, u_offset;
+ v_offset = dstPitch * h;
+ v_offset = (v_offset + 255) & ~255;
+ u_offset = v_offset + (dstPitch2 * h2);
+ u_offset = (u_offset + 255) & ~255;
+
+ /* Y */
+ R600CopyToVRAM(pScrn,
+ (char *)y_src, srcPitch,
+ dstPitch, dst_mc_addr, h, 8,
+ 0, 0, w, h);
+
+ /* V */
+ R600CopyToVRAM(pScrn,
+ (char *)v_src, srcPitch2,
+ dstPitch2, dst_mc_addr + v_offset, h2, 8,
+ 0, 0, w2, h2);
+
+ /* U */
+ R600CopyToVRAM(pScrn,
+ (char *)u_src, srcPitch2,
+ dstPitch2, dst_mc_addr + u_offset, h2, 8,
+ 0, 0, w2, h2);
+}
+
+static void
+R600CopyPacked(ScrnInfoPtr pScrn,
+ unsigned char *src, uint32_t dst_mc_addr,
+ int srcPitch, int dstPitch,
+ int w, int h)
+{
+
+ /* YUV */
+ R600CopyToVRAM(pScrn,
+ (char *)src, srcPitch,
+ dstPitch >> 2, dst_mc_addr, h, 32,
+ 0, 0, w >> 1, h);
+
+}
+
static int
RADEONPutImageTextured(ScrnInfoPtr pScrn,
short src_x, short src_y,
@@ -214,7 +277,10 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
break;
}
- dstPitch = (dstPitch + 63) & ~63;
+ if (info->ChipFamily >= CHIP_FAMILY_R600)
+ dstPitch = (dstPitch + 255) & ~255;
+ else
+ dstPitch = (dstPitch + 63) & ~63;
if (pPriv->video_memory != NULL && size != pPriv->size) {
radeon_legacy_free_memory(pScrn, pPriv->video_memory);
@@ -222,16 +288,21 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
}
if (pPriv->video_memory == NULL) {
- pPriv->video_offset = radeon_legacy_allocate_memory(pScrn,
- &pPriv->video_memory,
- size * 2, 64);
+ if (info->ChipFamily >= CHIP_FAMILY_R600)
+ pPriv->video_offset = radeon_legacy_allocate_memory(pScrn,
+ &pPriv->video_memory,
+ size * 2, 256);
+ else
+ pPriv->video_offset = radeon_legacy_allocate_memory(pScrn,
+ &pPriv->video_memory,
+ size * 2, 64);
if (pPriv->video_offset == 0)
return BadAlloc;
}
/* Bicubic filter setup */
pPriv->bicubic_enabled = (pPriv->bicubic_state != BICUBIC_OFF);
- if (!(IS_R300_3D || IS_R500_3D))
+ if (!(IS_R300_3D || IS_R500_3D || IS_R600_3D))
pPriv->bicubic_enabled = FALSE;
if (pPriv->bicubic_enabled && (pPriv->bicubic_state == BICUBIC_AUTO)) {
/*
@@ -280,7 +351,10 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
npixels = ((((x2 + 0xffff) >> 16) + 1) & ~1) - left;
pPriv->src_offset = pPriv->video_offset + info->fbLocation + pScrn->fbOffset;
- pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch));
+ if (info->ChipFamily >= CHIP_FAMILY_R600)
+ pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset);
+ else
+ pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch));
pPriv->src_pitch = dstPitch;
pPriv->size = size;
pPriv->pDraw = pDraw;
@@ -294,29 +368,51 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
switch(id) {
case FOURCC_YV12:
case FOURCC_I420:
- top &= ~1;
- nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
- s2offset = srcPitch * height;
- s3offset = (srcPitch2 * (height >> 1)) + s2offset;
- top &= ~1;
- pPriv->src_addr += left << 1;
- tmp = ((top >> 1) * srcPitch2) + (left >> 1);
- s2offset += tmp;
- s3offset += tmp;
- if (id == FOURCC_I420) {
- tmp = s2offset;
- s2offset = s3offset;
- s3offset = tmp;
+ if (info->ChipFamily >= CHIP_FAMILY_R600) {
+ s2offset = srcPitch * height;
+ s3offset = (srcPitch2 * (height >> 1)) + s2offset;
+ if (id == FOURCC_YV12)
+ R600CopyPlanar(pScrn, buf, buf + s3offset, buf + s2offset,
+ pPriv->src_offset,
+ srcPitch, srcPitch2, pPriv->src_pitch,
+ width, height);
+ else
+ R600CopyPlanar(pScrn, buf, buf + s2offset, buf + s3offset,
+ pPriv->src_offset,
+ srcPitch, srcPitch2, pPriv->src_pitch,
+ width, height);
+
+ } else {
+ top &= ~1;
+ nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
+ s2offset = srcPitch * height;
+ s3offset = (srcPitch2 * (height >> 1)) + s2offset;
+ top &= ~1;
+ pPriv->src_addr += left << 1;
+ tmp = ((top >> 1) * srcPitch2) + (left >> 1);
+ s2offset += tmp;
+ s3offset += tmp;
+ if (id == FOURCC_I420) {
+ tmp = s2offset;
+ s2offset = s3offset;
+ s3offset = tmp;
+ }
+ RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left,
+ buf + s2offset, buf + s3offset, pPriv->src_addr,
+ srcPitch, srcPitch2, dstPitch, nlines, npixels);
}
- RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left,
- buf + s2offset, buf + s3offset, pPriv->src_addr,
- srcPitch, srcPitch2, dstPitch, nlines, npixels);
break;
case FOURCC_UYVY:
case FOURCC_YUY2:
default:
- nlines = ((y2 + 0xffff) >> 16) - top;
- RADEONCopyData(pScrn, buf, pPriv->src_addr, srcPitch, dstPitch, nlines, npixels, 2);
+ if (info->ChipFamily >= CHIP_FAMILY_R600) {
+ R600CopyPacked(pScrn, buf, pPriv->src_offset,
+ 2 * width, pPriv->src_pitch,
+ width, height);
+ } else {
+ nlines = ((y2 + 0xffff) >> 16) - top;
+ RADEONCopyData(pScrn, buf, pPriv->src_addr, srcPitch, dstPitch, nlines, npixels, 2);
+ }
break;
}
@@ -340,7 +436,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
pPriv->h = height;
#ifdef XF86DRI
- if (info->directRenderingEnabled)
+ if (IS_R600_3D)
+ R600DisplayTexturedVideo(pScrn, pPriv);
+ else if (info->directRenderingEnabled)
RADEONDisplayTexturedVideoCP(pScrn, pPriv);
else
#endif
@@ -370,6 +468,16 @@ static XF86VideoEncodingRec DummyEncodingR500[1] =
}
};
+static XF86VideoEncodingRec DummyEncodingR600[1] =
+{
+ {
+ 0,
+ "XV_IMAGE",
+ IMAGE_MAX_WIDTH_R600, IMAGE_MAX_HEIGHT_R600,
+ {1, 1}
+ }
+};
+
#define NUM_FORMATS 3
static XF86VideoFormatRec Formats[NUM_FORMATS] =
@@ -471,7 +579,9 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
adapt->flags = 0;
adapt->name = "Radeon Textured Video";
adapt->nEncodings = 1;
- if (IS_R500_3D)
+ if (IS_R600_3D)
+ adapt->pEncodings = DummyEncodingR600;
+ else if (IS_R500_3D)
adapt->pEncodings = DummyEncodingR500;
else
adapt->pEncodings = DummyEncoding;
@@ -483,7 +593,7 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
pPortPriv =
(RADEONPortPrivPtr)(&adapt->pPortPrivates[num_texture_ports]);
- if (IS_R300_3D || IS_R500_3D) {
+ if (IS_R300_3D || IS_R500_3D || IS_R600_3D) {
adapt->pAttributes = Attributes_r300;
adapt->nAttributes = NUM_ATTRIBUTES_R300;
} else {