diff options
author | Alex Deucher <alexdeucher@gmail.com> | 2009-02-03 10:44:10 -0500 |
---|---|---|
committer | Alex Deucher <alexdeucher@gmail.com> | 2009-02-03 10:44:10 -0500 |
commit | d1f071c7f1dad6babfbcfcc2cb2b722a4987f372 (patch) | |
tree | 8b1d683610782ab59c382c611f8df474c79da91a /src | |
parent | c88c3ef6f3db266c1aacba5297b8dfc8b66bf00e (diff) |
Initial R6xx/R7xx EXA and textured video support
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 8 | ||||
-rw-r--r-- | src/r600_exa.c | 3663 | ||||
-rw-r--r-- | src/r600_reg.h | 118 | ||||
-rw-r--r-- | src/r600_reg_auto_r6xx.h | 3087 | ||||
-rw-r--r-- | src/r600_reg_r6xx.h | 494 | ||||
-rw-r--r-- | src/r600_reg_r7xx.h | 149 | ||||
-rw-r--r-- | src/r600_shader.h | 346 | ||||
-rw-r--r-- | src/r600_state.h | 227 | ||||
-rw-r--r-- | src/r600_textured_videofuncs.c | 374 | ||||
-rw-r--r-- | src/r6xx_accel.c | 1110 | ||||
-rw-r--r-- | src/radeon.h | 139 | ||||
-rw-r--r-- | src/radeon_accel.c | 87 | ||||
-rw-r--r-- | src/radeon_commonfuncs.c | 9 | ||||
-rw-r--r-- | src/radeon_dri.c | 198 | ||||
-rw-r--r-- | src/radeon_driver.c | 34 | ||||
-rw-r--r-- | src/radeon_exa.c | 1 | ||||
-rw-r--r-- | src/radeon_exa_render.c | 2 | ||||
-rw-r--r-- | src/radeon_modes.c | 22 | ||||
-rw-r--r-- | src/radeon_reg.h | 26 | ||||
-rw-r--r-- | src/radeon_textured_video.c | 184 |
20 files changed, 10093 insertions, 185 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index c15cc301..7ff7d31a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -66,7 +66,7 @@ XMODE_SRCS=\ modes/xf86DiDGA.c if USE_EXA -RADEON_EXA_SOURCES = radeon_exa.c +RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c endif AM_CFLAGS = @XORG_CFLAGS@ @DRI_CFLAGS@ @XMODES_CFLAGS@ -DDISABLE_EASF -DENABLE_ALL_SERVICE_FUNCTIONS -DATOM_BIOS -DATOM_BIOS_PARSER -DDRIVER_PARSER @@ -128,6 +128,12 @@ EXTRA_DIST = \ radeon_render.c \ radeon_accelfuncs.c \ radeon_textured_videofuncs.c \ + r600_reg.h \ + r600_reg_auto_r6xx.h \ + r600_reg_r6xx.h \ + r600_reg_r7xx.h \ + r600_shader.h \ + r600_state.h \ ati.h \ ativersion.h \ bicubic_table.h \ diff --git a/src/r600_exa.c b/src/r600_exa.c new file mode 100644 index 00000000..b9c228fd --- /dev/null +++ b/src/r600_exa.c @@ -0,0 +1,3663 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Alex Deucher <alexander.deucher@amd.com> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" + +#include "exa.h" + +#include "radeon.h" +#include "radeon_macros.h" +#include "r600_shader.h" +#include "r600_reg.h" +#include "r600_state.h" + +extern PixmapPtr +RADEONGetDrawablePixmap(DrawablePtr pDrawable); + +//#define SHOW_VERTEXES + +# define RADEON_ROP3_ZERO 0x00000000 +# define RADEON_ROP3_DSa 0x00880000 +# define RADEON_ROP3_SDna 0x00440000 +# define RADEON_ROP3_S 0x00cc0000 +# define RADEON_ROP3_DSna 0x00220000 +# define RADEON_ROP3_D 0x00aa0000 +# define RADEON_ROP3_DSx 0x00660000 +# define RADEON_ROP3_DSo 0x00ee0000 +# define RADEON_ROP3_DSon 0x00110000 +# define RADEON_ROP3_DSxn 0x00990000 +# define RADEON_ROP3_Dn 0x00550000 +# define RADEON_ROP3_SDno 0x00dd0000 +# define RADEON_ROP3_Sn 0x00330000 +# define RADEON_ROP3_DSno 0x00bb0000 +# define RADEON_ROP3_DSan 0x00770000 +# define RADEON_ROP3_ONE 0x00ff0000 + +uint32_t RADEON_ROP[16] = { + RADEON_ROP3_ZERO, /* GXclear */ + RADEON_ROP3_DSa, /* Gxand */ + RADEON_ROP3_SDna, /* GXandReverse */ + RADEON_ROP3_S, /* GXcopy */ + RADEON_ROP3_DSna, /* GXandInverted */ + RADEON_ROP3_D, /* GXnoop */ + RADEON_ROP3_DSx, /* GXxor */ + RADEON_ROP3_DSo, /* GXor */ + RADEON_ROP3_DSon, /* GXnor */ + RADEON_ROP3_DSxn, /* GXequiv */ + RADEON_ROP3_Dn, /* GXinvert */ + RADEON_ROP3_SDno, /* GXorReverse */ + RADEON_ROP3_Sn, /* GXcopyInverted */ + RADEON_ROP3_DSno, /* GXorInverted */ + RADEON_ROP3_DSan, /* GXnand */ + RADEON_ROP3_ONE, /* GXset */ +}; + +static Bool +R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) +{ + ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + cb_config_t cb_conf; + shader_config_t vs_conf, ps_conf; + int pmask = 0; + uint32_t a, r, g, b; + float ps_alu_consts[4]; + + // FIXME + // R7xx seems to hang when using PS constants for fg color + // sending the color as a vertex attribute works + if (info->ChipFamily >= CHIP_FAMILY_RV770) + return FALSE; + + accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; + accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height; + accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); + + // bad pitch + if (accel_state->dst_pitch & 7) + return FALSE; + + // bad offset + if (accel_state->dst_mc_addr & 0xff) + return FALSE; + + if (pPix->drawable.bitsPerPixel == 24) + return FALSE; + + CLEAR (cb_conf); + CLEAR (vs_conf); + CLEAR (ps_conf); + + //return FALSE; + +#ifdef SHOW_VERTEXES + ErrorF("%dx%d @ %dbpp, 0x%08x\n", pPix->drawable.width, pPix->drawable.height, + pPix->drawable.bitsPerPixel, exaGetPixmapPitch(pPix)); +#endif + + accel_state->ib = RADEONCPGetBuffer(pScrn); + + /* Init */ + start_3d(pScrn, accel_state->ib); + + //cp_set_surface_sync(pScrn, accel_state->ib); + + set_default_state(pScrn, accel_state->ib); + + /* Scissor / viewport */ + ereg (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); + ereg (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); + + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->solid_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->solid_ps_offset; + accel_state->vs_size = 512; + accel_state->ps_size = 512; + + /* Shader */ + + /* flush SQ cache */ + cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, + accel_state->vs_size, accel_state->vs_mc_addr); + + vs_conf.shader_addr = accel_state->vs_mc_addr; + vs_conf.num_gprs = 2; + vs_conf.stack_size = 0; + vs_setup (pScrn, accel_state->ib, &vs_conf); + + /* flush SQ cache */ + cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, + accel_state->ps_size, accel_state->ps_mc_addr); + + ps_conf.shader_addr = accel_state->ps_mc_addr; + ps_conf.num_gprs = 1; + ps_conf.stack_size = 0; + ps_conf.uncached_first_inst = 1; + ps_conf.clamp_consts = 0; + ps_conf.export_mode = 2; + ps_setup (pScrn, accel_state->ib, &ps_conf); + + /* Render setup */ + if (pm & 0x000000ff) + pmask |= 4; //B + if (pm & 0x0000ff00) + pmask |= 2; //G + if (pm & 0x00ff0000) + pmask |= 1; //R + if (pm & 0xff000000) + pmask |= 8; //A + ereg (accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); + ereg (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); + ereg (accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[alu]); + + + cb_conf.id = 0; + cb_conf.w = accel_state->dst_pitch; + cb_conf.h = pPix->drawable.height; + cb_conf.base = accel_state->dst_mc_addr; + + if (pPix->drawable.bitsPerPixel == 8) { + cb_conf.format = COLOR_8; + cb_conf.comp_swap = 3; //A + } else if (pPix->drawable.bitsPerPixel == 16) { + cb_conf.format = COLOR_5_6_5; + cb_conf.comp_swap = 2; //RGB + } else { + cb_conf.format = COLOR_8_8_8_8; + cb_conf.comp_swap = 1; //ARGB + } + cb_conf.source_format = 1; + cb_conf.blend_clamp = 1; + set_render_target(pScrn, accel_state->ib, &cb_conf); + + ereg (accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); + ereg (accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ + DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ + + /* Interpolator setup */ + // one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) + ereg (accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift)); + ereg (accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); + + /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x + * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ + // no VS exports as PS input (NUM_INTERP is not zero based, no minus one) + ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, (0 << NUM_INTERP_shift)); + ereg (accel_state->ib, SPI_PS_IN_CONTROL_1, 0); + // color semantic id 0 -> GPR[0] + ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | + (0x03 << DEFAULT_VAL_shift) | + FLAT_SHADE_bit | + SEL_CENTROID_bit)); + ereg (accel_state->ib, SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit | 0); + + // PS alu constants + if (pPix->drawable.bitsPerPixel == 16) { + r = (fg >> 11) & 0x1f; + g = (fg >> 5) & 0x3f; + b = (fg >> 0) & 0x1f; + ps_alu_consts[0] = (float)r / 31; //R + ps_alu_consts[1] = (float)g / 63; //G + ps_alu_consts[2] = (float)b / 31; //B + ps_alu_consts[3] = 1.0; //A + } else if (pPix->drawable.bitsPerPixel == 8) { + a = (fg >> 0) & 0xff; + ps_alu_consts[0] = 0.0; //R + ps_alu_consts[1] = 0.0; //G + ps_alu_consts[2] = 0.0; //B + ps_alu_consts[3] = (float)a / 255; //A + } else { + a = (fg >> 24) & 0xff; + r = (fg >> 16) & 0xff; + g = (fg >> 8) & 0xff; + b = (fg >> 0) & 0xff; + ps_alu_consts[0] = (float)r / 255; //R + ps_alu_consts[1] = (float)g / 255; //G + ps_alu_consts[2] = (float)b / 255; //B + ps_alu_consts[3] = (float)a / 255; //A + } + set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); + + accel_state->vb_index = 0; + +#ifdef SHOW_VERTEXES + ErrorF("PM: 0x%08x\n", pm); +#endif + + return TRUE; +} + + +static void +R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) +{ + ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + struct r6xx_solid_vertex vertex[3]; + struct r6xx_solid_vertex *solid_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); + + vertex[0].x = (float)x1; + vertex[0].y = (float)y1; + + vertex[1].x = (float)x1; + vertex[1].y = (float)y2; + + vertex[2].x = (float)x2; + vertex[2].y = (float)y2; + +#ifdef SHOW_VERTEXES + ErrorF("vertex 0: %f, %f\n", vertex[0].x, vertex[0].y); + ErrorF("vertex 1: %f, %f\n", vertex[1].x, vertex[1].y); + ErrorF("vertex 2: %f\n", vertex[2].x, vertex[2].y); +#endif + + // append to vertex buffer + solid_vb[accel_state->vb_index++] = vertex[0]; + solid_vb[accel_state->vb_index++] = vertex[1]; + solid_vb[accel_state->vb_index++] = vertex[2]; +} + +static void +R600DoneSolid(PixmapPtr pPix) +{ + ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + draw_config_t draw_conf; + vtx_resource_t vtx_res; + + CLEAR (draw_conf); + CLEAR (vtx_res); + + if (accel_state->vb_index == 0) { + R600IBDiscard(pScrn, accel_state->ib); + return; + } + + accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + + (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); + accel_state->vb_size = accel_state->vb_index * 8; + + /* flush vertex cache */ + if ((info->ChipFamily == CHIP_FAMILY_RV610) || + (info->ChipFamily == CHIP_FAMILY_RV620) || + (info->ChipFamily == CHIP_FAMILY_RS780) || + (info->ChipFamily == CHIP_FAMILY_RV710)) + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, + accel_state->vb_size, accel_state->vb_mc_addr); + else + cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, + accel_state->vb_size, accel_state->vb_mc_addr); + + /* Vertex buffer setup */ + vtx_res.id = SQ_VTX_RESOURCE_vs; + vtx_res.vtx_size_dw = 8 / 4; + vtx_res.vtx_num_entries = accel_state->vb_size / 4; + vtx_res.mem_req_size = 1; + vtx_res.vb_addr = accel_state->vb_mc_addr; + set_vtx_resource (pScrn, accel_state->ib, &vtx_res); + + /* Draw */ + draw_conf.prim_type = DI_PT_RECTLIST; + draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; + draw_conf.num_instances = 1; + draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; + draw_conf.index_type = DI_INDEX_SIZE_16_BIT; + + ereg (accel_state->ib, VGT_INSTANCE_STEP_RATE_0, 0); /* ? */ + ereg (accel_state->ib, VGT_INSTANCE_STEP_RATE_1, 0); + + ereg (accel_state->ib, VGT_MAX_VTX_INDX, draw_conf.num_indices); + ereg (accel_state->ib, VGT_MIN_VTX_INDX, 0); + ereg (accel_state->ib, VGT_INDX_OFFSET, 0); + + draw_auto(pScrn, accel_state->ib, &draw_conf); + + wait_3d_idle_clean(pScrn, accel_state->ib); + + /* sync dst surface */ + cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), + accel_state->dst_size, accel_state->dst_mc_addr); + + R600CPFlushIndirect(pScrn, accel_state->ib); +} + +static void +R600DoPrepareCopy(ScrnInfoPtr pScrn, + int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp, + int dst_pitch, int dst_height, uint32_t dst_offset, int dst_bpp, + int rop, Pixel planemask) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + int pmask = 0; + cb_config_t cb_conf; + tex_resource_t tex_res; + tex_sampler_t tex_samp; + shader_config_t vs_conf, ps_conf; + + CLEAR (cb_conf); + CLEAR (tex_res); + CLEAR (tex_samp); + CLEAR (vs_conf); + CLEAR (ps_conf); + + accel_state->ib = RADEONCPGetBuffer(pScrn); + + /* Init */ + start_3d(pScrn, accel_state->ib); + + //cp_set_surface_sync(pScrn, accel_state->ib); + + set_default_state(pScrn, accel_state->ib); + + /* Scissor / viewport */ + ereg (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); + ereg (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); + + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->copy_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->copy_ps_offset; + accel_state->vs_size = 512; + accel_state->ps_size = 512; + + /* Shader */ + + /* flush SQ cache */ + cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, + accel_state->vs_size, accel_state->vs_mc_addr); + + vs_conf.shader_addr = accel_state->vs_mc_addr; + vs_conf.num_gprs = 2; + vs_conf.stack_size = 0; + vs_setup (pScrn, accel_state->ib, &vs_conf); + + /* flush SQ cache */ + cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, + accel_state->ps_size, accel_state->ps_mc_addr); + + ps_conf.shader_addr = accel_state->ps_mc_addr; + ps_conf.num_gprs = 1; + ps_conf.stack_size = 0; + ps_conf.uncached_first_inst = 1; + ps_conf.clamp_consts = 0; + ps_conf.export_mode = 2; + ps_setup (pScrn, accel_state->ib, &ps_conf); + + accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8); + accel_state->src_mc_addr[0] = src_offset; + accel_state->src_pitch[0] = src_pitch; + + /* flush texture cache */ + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, + accel_state->src_size[0], accel_state->src_mc_addr[0]); + + /* Texture */ + tex_res.id = 0; + tex_res.w = src_width; + tex_res.h = src_height; + tex_res.pitch = accel_state->src_pitch[0]; + tex_res.depth = 0; + tex_res.dim = SQ_TEX_DIM_2D; + tex_res.base = accel_state->src_mc_addr[0]; + tex_res.mip_base = accel_state->src_mc_addr[0]; + if (src_bpp == 8) { + tex_res.format = FMT_8; + tex_res.dst_sel_x = SQ_SEL_1; //R + tex_res.dst_sel_y = SQ_SEL_1; //G + tex_res.dst_sel_z = SQ_SEL_1; //B + tex_res.dst_sel_w = SQ_SEL_X; //A + } else if (src_bpp == 16) { + tex_res.format = FMT_5_6_5; + tex_res.dst_sel_x = SQ_SEL_Z; //R + tex_res.dst_sel_y = SQ_SEL_Y; //G + tex_res.dst_sel_z = SQ_SEL_X; //B + tex_res.dst_sel_w = SQ_SEL_1; //A + } else { + tex_res.format = FMT_8_8_8_8; + tex_res.dst_sel_x = SQ_SEL_Z; //R + tex_res.dst_sel_y = SQ_SEL_Y; //G + tex_res.dst_sel_z = SQ_SEL_X; //B + tex_res.dst_sel_w = SQ_SEL_W; //A + } + + tex_res.request_size = 1; + tex_res.base_level = 0; + tex_res.last_level = 0; + tex_res.perf_modulation = 0; + set_tex_resource (pScrn, accel_state->ib, &tex_res); + + tex_samp.id = 0; + tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_z = SQ_TEX_WRAP; + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; + tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; + tex_samp.mip_filter = 0; /* no mipmap */ + set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + + + /* Render setup */ + if (planemask & 0x000000ff) + pmask |= 4; //B + if (planemask & 0x0000ff00) + pmask |= 2; //G + if (planemask & 0x00ff0000) + pmask |= 1; //R + if (planemask & 0xff000000) + pmask |= 8; //A + ereg (accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); + ereg (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); + ereg (accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[rop]); + + accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); + accel_state->dst_mc_addr = dst_offset; + accel_state->dst_pitch = dst_pitch; + + cb_conf.id = 0; + cb_conf.w = accel_state->dst_pitch; + cb_conf.h = dst_height; + cb_conf.base = accel_state->dst_mc_addr; + if (dst_bpp == 8) { + cb_conf.format = COLOR_8; + cb_conf.comp_swap = 3; // A + } else if (dst_bpp == 16) { + cb_conf.format = COLOR_5_6_5; + cb_conf.comp_swap = 2; // RGB + } else { + cb_conf.format = COLOR_8_8_8_8; + cb_conf.comp_swap = 1; // ARGB + } + cb_conf.source_format = 1; + cb_conf.blend_clamp = 1; + set_render_target(pScrn, accel_state->ib, &cb_conf); + + ereg (accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); + ereg (accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ + DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ + + /* Interpolator setup */ + // export tex coord from VS + ereg (accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); + ereg (accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); + + /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x + * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ + // input tex coord from VS + ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift))); + ereg (accel_state->ib, SPI_PS_IN_CONTROL_1, 0); + // color semantic id 0 -> GPR[0] + ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | + (0x01 << DEFAULT_VAL_shift) | + SEL_CENTROID_bit)); + ereg (accel_state->ib, SPI_INTERP_CONTROL_0, 0); + + accel_state->vb_index = 0; + +} + +static void +R600DoCopy(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + draw_config_t draw_conf; + vtx_resource_t vtx_res; + + CLEAR (draw_conf); + CLEAR (vtx_res); + + if (accel_state->vb_index == 0) { + R600IBDiscard(pScrn, accel_state->ib); + return; + } + + accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + + (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); + accel_state->vb_size = accel_state->vb_index * 16; + + /* flush vertex cache */ + if ((info->ChipFamily == CHIP_FAMILY_RV610) || + (info->ChipFamily == CHIP_FAMILY_RV620) || + (info->ChipFamily == CHIP_FAMILY_RS780) || + (info->ChipFamily == CHIP_FAMILY_RV710)) + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, + accel_state->vb_size, accel_state->vb_mc_addr); + else + cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, + accel_state->vb_size, accel_state->vb_mc_addr); + + /* Vertex buffer setup */ + vtx_res.id = SQ_VTX_RESOURCE_vs; + vtx_res.vtx_size_dw = 16 / 4; + vtx_res.vtx_num_entries = accel_state->vb_size / 4; + vtx_res.mem_req_size = 1; + vtx_res.vb_addr = accel_state->vb_mc_addr; + set_vtx_resource (pScrn, accel_state->ib, &vtx_res); + + draw_conf.prim_type = DI_PT_RECTLIST; + draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; + draw_conf.num_instances = 1; + draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; + draw_conf.index_type = DI_INDEX_SIZE_16_BIT; + + ereg (accel_state->ib, VGT_INSTANCE_STEP_RATE_0, 0); /* ? */ + ereg (accel_state->ib, VGT_INSTANCE_STEP_RATE_1, 0); + + ereg (accel_state->ib, VGT_MAX_VTX_INDX, draw_conf.num_indices); + ereg (accel_state->ib, VGT_MIN_VTX_INDX, 0); + ereg (accel_state->ib, VGT_INDX_OFFSET, 0); + + draw_auto(pScrn, accel_state->ib, &draw_conf); + + wait_3d_idle_clean(pScrn, accel_state->ib); + + /* sync dst surface */ + cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), + accel_state->dst_size, accel_state->dst_mc_addr); + + R600CPFlushIndirect(pScrn, accel_state->ib); +} + +static void +R600AppendCopyVertex(ScrnInfoPtr pScrn, + int srcX, int srcY, + int dstX, int dstY, + int w, int h) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + struct r6xx_copy_vertex *copy_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); + struct r6xx_copy_vertex vertex[3]; + + vertex[0].x = (float)dstX; + vertex[0].y = (float)dstY; + vertex[0].s = (float)srcX; + vertex[0].t = (float)srcY; + + vertex[1].x = (float)dstX; + vertex[1].y = (float)(dstY + h); + vertex[1].s = (float)srcX; + vertex[1].t = (float)(srcY + h); + + vertex[2].x = (float)(dstX + w); + vertex[2].y = (float)(dstY + h); + vertex[2].s = (float)(srcX + w); + vertex[2].t = (float)(srcY + h); + +#ifdef SHOW_VERTEXES + ErrorF("vertex 0: %f, %f, %f, %d\n", vertex[0].x, vertex[0].y, vertex[0].s, vertex[0].t); + ErrorF("vertex 1: %f, %f, %f, %d\n", vertex[1].x, vertex[1].y, vertex[1].s, vertex[1].t); + ErrorF("vertex 2: %f, %f, %f, %d\n", vertex[2].x, vertex[2].y, vertex[2].s, vertex[2].t); +#endif + + // append to vertex buffer + copy_vb[accel_state->vb_index++] = vertex[0]; + copy_vb[accel_state->vb_index++] = vertex[1]; + copy_vb[accel_state->vb_index++] = vertex[2]; + +} + +static Bool +R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, + int xdir, int ydir, + int rop, + Pixel planemask) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); + accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); + + accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; + accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + + // bad pitch + if (accel_state->src_pitch[0] & 7) + return FALSE; + if (accel_state->dst_pitch & 7) + return FALSE; + + // bad offset + if (accel_state->src_mc_addr[0] & 0xff) + return FALSE; + if (accel_state->dst_mc_addr & 0xff) + return FALSE; + + if (pSrc->drawable.bitsPerPixel == 24) + return FALSE; + if (pDst->drawable.bitsPerPixel == 24) + return FALSE; + + //return FALSE; + +#ifdef SHOW_VERTEXES + ErrorF("src: %dx%d @ %dbpp, 0x%08x\n", pSrc->drawable.width, pSrc->drawable.height, + pSrc->drawable.bitsPerPixel, exaGetPixmapPitch(pSrc)); + ErrorF("dst: %dx%d @ %dbpp, 0x%08x\n", pDst->drawable.width, pDst->drawable.height, + pDst->drawable.bitsPerPixel, exaGetPixmapPitch(pDst)); +#endif + + if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) { + accel_state->same_surface = TRUE; + accel_state->rop = rop; + accel_state->planemask = planemask; + +#ifdef SHOW_VERTEXES + ErrorF("same surface!\n"); +#endif + } else { + + accel_state->same_surface = FALSE; + + R600DoPrepareCopy(pScrn, + accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height, + accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel, + accel_state->dst_pitch, pDst->drawable.height, + accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel, + rop, planemask); + + } + + return TRUE; +} + +static Bool +is_overlap(int sx1, int sx2, int sy1, int sy2, int dx1, int dx2, int dy1, int dy2) +{ + if (((sx1 >= dx1) && (sx1 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || // TL x1, y1 + ((sx2 >= dx1) && (sx2 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || // TR x2, y1 + ((sx1 >= dx1) && (sx1 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)) || // BL x1, y2 + ((sx2 >= dx1) && (sx2 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2))) // BR x2, y2 + return TRUE; + else + return FALSE; +} + +static void +R600OverlapCopy(PixmapPtr pDst, + int srcX, int srcY, + int dstX, int dstY, + int w, int h) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); + uint32_t dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + struct r6xx_copy_vertex *copy_vb; + struct r6xx_copy_vertex vertex[3]; + int i; + + if (is_overlap(srcX, srcX + w, srcY, srcY + h, + dstX, dstX + w, dstY, dstY + h)) { + if (srcY == dstY) { // left/right + if (srcX < dstX) { // right + // copy right to left + for (i = w; i > 0; i--) { + R600DoPrepareCopy(pScrn, + dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + accel_state->rop, accel_state->planemask); + + copy_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); + + vertex[0].x = (float)(dstX + i - 1); + vertex[0].y = (float)dstY; + vertex[0].s = (float)(srcX + i - 1); + vertex[0].t = (float)srcY; + + vertex[1].x = (float)(dstX + i - 1); + vertex[1].y = (float)(dstY + h); + vertex[1].s = (float)(srcX + i - 1); + vertex[1].t = (float)(srcY + h); + + vertex[2].x = (float)(dstX + i); + vertex[2].y = (float)(dstY + h); + vertex[2].s = (float)(srcX + i); + vertex[2].t = (float)(srcY + h); + +#ifdef SHOW_VERTEXES + ErrorF("vertex 0: %f, %f, %f, %f\n", vertex[0].x, vertex[0].y, vertex[0].s, vertex[0].t); + ErrorF("vertex 1: %f, %f, %f, %f\n", vertex[1].x, vertex[1].y, vertex[1].s, vertex[1].t); + ErrorF("vertex 2: %f, %f, %f, %f\n", vertex[2].x, vertex[2].y, vertex[2].s, vertex[2].t); +#endif + + // append to vertex buffer + copy_vb[accel_state->vb_index++] = vertex[0]; + copy_vb[accel_state->vb_index++] = vertex[1]; + copy_vb[accel_state->vb_index++] = vertex[2]; + + // do the blit + R600DoCopy(pScrn); + } + } else { //left + // copy left to right + for (i = 0; i < w; i++) { + R600DoPrepareCopy(pScrn, + dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + accel_state->rop, accel_state->planemask); + + copy_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); + + vertex[0].x = (float)(dstX + i); + vertex[0].y = (float)(dstY); + vertex[0].s = (float)(srcX + i); + vertex[0].t = (float)srcY; + + vertex[1].x = (float)(dstX + i); + vertex[1].y = (float)(dstY + h); + vertex[1].s = (float)(srcX + i); + vertex[1].t = (float)(srcY + h); + + vertex[2].x = (float)(dstX + i + 1); + vertex[2].y = (float)(dstY + h); + vertex[2].s = (float)(srcX + i + 1); + vertex[2].t = (float)(srcY + h); + +#ifdef SHOW_VERTEXES + ErrorF("vertex 0: %f, %f, %f, %f\n", vertex[0].x, vertex[0].y, vertex[0].s, vertex[0].t); + ErrorF("vertex 1: %f, %f, %f, %f\n", vertex[1].x, vertex[1].y, vertex[1].s, vertex[1].t); + ErrorF("vertex 2: %f, %f, %f, %f\n", vertex[2].x, vertex[2].y, vertex[2].s, vertex[2].t); +#endif + + // append to vertex buffer + copy_vb[accel_state->vb_index++] = vertex[0]; + copy_vb[accel_state->vb_index++] = vertex[1]; + copy_vb[accel_state->vb_index++] = vertex[2]; + + // do the blit + R600DoCopy(pScrn); + } + } + } else { //up/down + if (srcY > dstY) { // up + // copy top to bottom + for (i = 0; i < h; i++) { + R600DoPrepareCopy(pScrn, + dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + accel_state->rop, accel_state->planemask); + + copy_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); + + vertex[0].x = (float)dstX; + vertex[0].y = (float)(dstY + i); + vertex[0].s = (float)srcX; + vertex[0].t = (float)(srcY + i); + + vertex[1].x = (float)dstX; + vertex[1].y = (float)(dstY + i + 1); + vertex[1].s = (float)srcX; + vertex[1].t = (float)(srcY + i + 1); + + vertex[2].x = (float)(dstX + w); + vertex[2].y = (float)(dstY + i + 1); + vertex[2].s = (float)(srcX + w); + vertex[2].t = (float)(srcY + i + 1); + +#ifdef SHOW_VERTEXES + ErrorF("vertex 0: %f, %f, %f, %f\n", vertex[0].x, vertex[0].y, vertex[0].s, vertex[0].t); + ErrorF("vertex 1: %f, %f, %f, %f\n", vertex[1].x, vertex[1].y, vertex[1].s, vertex[1].t); + ErrorF("vertex 2: %f, %f, %f, %f\n", vertex[2].x, vertex[2].y, vertex[2].s, vertex[2].t); +#endif + + // append to vertex buffer + copy_vb[accel_state->vb_index++] = vertex[0]; + copy_vb[accel_state->vb_index++] = vertex[1]; + copy_vb[accel_state->vb_index++] = vertex[2]; + + // do the blit + R600DoCopy(pScrn); + } + } else { // down + // copy bottom to top + for (i = h; i > 0; i--) { + R600DoPrepareCopy(pScrn, + dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + accel_state->rop, accel_state->planemask); + + copy_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); + + vertex[0].x = (float)dstX; + vertex[0].y = (float)(dstY + i - 1); + vertex[0].s = (float)(srcX); + vertex[0].t = (float)(srcY + i - 1); + + vertex[1].x = (float)dstX; + vertex[1].y = (float)(dstY + i); + vertex[1].s = (float)srcX; + vertex[1].t = (float)srcY + i; + + vertex[2].x = (float)(dstX + w); + vertex[2].y = (float)(dstY + i); + vertex[2].s = (float)(srcX + w); + vertex[2].t = (float)(srcY + i); + +#ifdef SHOW_VERTEXES + ErrorF("vertex 0: %f, %f, %f, %f\n", vertex[0].x, vertex[0].y, vertex[0].s, vertex[0].t); + ErrorF("vertex 1: %f, %f, %f, %f\n", vertex[1].x, vertex[1].y, vertex[1].s, vertex[1].t); + ErrorF("vertex 2: %f, %f, %f, %f\n", vertex[2].x, vertex[2].y, vertex[2].s, vertex[2].t); +#endif + + // append to vertex buffer + copy_vb[accel_state->vb_index++] = vertex[0]; + copy_vb[accel_state->vb_index++] = vertex[1]; + copy_vb[accel_state->vb_index++] = vertex[2]; + + // do the blit + R600DoCopy(pScrn); + } + } + } + } else { + R600DoPrepareCopy(pScrn, + dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + accel_state->rop, accel_state->planemask); + + copy_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); + + vertex[0].x = (float)dstX; + vertex[0].y = (float)dstY; + vertex[0].s = (float)srcX; + vertex[0].t = (float)srcY; + + vertex[1].x = (float)dstX; + vertex[1].y = (float)(dstY + h); + vertex[1].s = (float)srcX; + vertex[1].t = (float)(srcY + h); + + vertex[2].x = (float)(dstX + w); + vertex[2].y = (float)(dstY + h); + vertex[2].s = (float)(srcX + w); + vertex[2].t = (float)(srcY + h); + +#ifdef SHOW_VERTEXES + ErrorF("vertex 0: %f, %f, %f, %f\n", vertex[0].x, vertex[0].y, vertex[0].s, vertex[0].t); + ErrorF("vertex 1: %f, %f, %f, %f\n", vertex[1].x, vertex[1].y, vertex[1].s, vertex[1].t); + ErrorF("vertex 2: %f, %f, %f, %f\n", vertex[2].x, vertex[2].y, vertex[2].s, vertex[2].t); +#endif + + // append to vertex buffer + copy_vb[accel_state->vb_index++] = vertex[0]; + copy_vb[accel_state->vb_index++] = vertex[1]; + copy_vb[accel_state->vb_index++] = vertex[2]; + + // do the blit + R600DoCopy(pScrn); + } +} + +static void +R600Copy(PixmapPtr pDst, + int srcX, int srcY, + int dstX, int dstY, + int w, int h) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + //blit to/from same surfacce + if (accel_state->same_surface) + R600OverlapCopy(pDst, srcX, srcY, dstX, dstY, w, h); + else + R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); +} + +static void +R600DoneCopy(PixmapPtr pDst) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + if (accel_state->same_surface) + return; + else + R600DoCopy(pScrn); +} + +#define RADEON_TRACE_FALL 0 +#define RADEON_TRACE_DRAW 0 + +#if RADEON_TRACE_FALL +#define RADEON_FALLBACK(x) \ +do { \ + ErrorF("%s: ", __FUNCTION__); \ + ErrorF x; \ + return FALSE; \ +} while (0) +#else +#define RADEON_FALLBACK(x) return FALSE +#endif + +#define xFixedToFloat(f) (((float) (f)) / 65536) + +static inline void transformPoint(PictTransform *transform, xPointFixed *point) +{ + PictVector v; + v.vector[0] = point->x; + v.vector[1] = point->y; + v.vector[2] = xFixed1; + PictureTransformPoint(transform, &v); + point->x = v.vector[0]; + point->y = v.vector[1]; +} + +struct blendinfo { + Bool dst_alpha; + Bool src_alpha; + uint32_t blend_cntl; +}; + +static struct blendinfo R600BlendOp[] = { + /* Clear */ + {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, + /* Src */ + {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, + /* Dst */ + {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, + /* Over */ + {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, + /* OverReverse */ + {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, + /* In */ + {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, + /* InReverse */ + {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, + /* Out */ + {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, + /* OutReverse */ + {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, + /* Atop */ + {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, + /* AtopReverse */ + {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, + /* Xor */ + {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, + /* Add */ + {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, +}; + +struct formatinfo { + unsigned int fmt; + uint32_t card_fmt; +}; + +static struct formatinfo R600TexFormats[] = { + {PICT_a8r8g8b8, FMT_8_8_8_8}, + {PICT_x8r8g8b8, FMT_8_8_8_8}, + {PICT_a8b8g8r8, FMT_8_8_8_8}, + {PICT_x8b8g8r8, FMT_8_8_8_8}, + {PICT_r5g6b5, FMT_5_6_5}, + {PICT_a1r5g5b5, FMT_1_5_5_5}, + {PICT_x1r5g5b5, FMT_1_5_5_5}, + {PICT_a8, FMT_8}, +}; + +static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format) +{ + uint32_t sblend, dblend; + + sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask; + dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask; + + /* If there's no dst alpha channel, adjust the blend op so that we'll treat + * it as always 1. + */ + if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) { + if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift)) + sblend = (BLEND_ONE << COLOR_SRCBLEND_shift); + else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift)) + sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift); + } + + /* If the source alpha is being used, then we should only be in a case where + * the source blend factor is 0, and the source blend value is the mask + * channels multiplied by the source picture's alpha. + */ + if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) { + if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) { + dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift); + } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) { + dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift); + } + } + + return sblend | dblend; +} + +static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format) +{ + switch (pDstPicture->format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + *dst_format = COLOR_8_8_8_8; + break; + case PICT_r5g6b5: + *dst_format = COLOR_5_6_5; + break; + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + *dst_format = COLOR_1_5_5_5; + break; + case PICT_a8: + *dst_format = COLOR_8; + break; + default: + RADEON_FALLBACK(("Unsupported dest format 0x%x\n", + (int)pDstPicture->format)); + } + return TRUE; +} + +static Bool R600CheckCompositeTexture(PicturePtr pPict, + PicturePtr pDstPict, + int op, + int unit) +{ + int w = pPict->pDrawable->width; + int h = pPict->pDrawable->height; + unsigned int i; + int max_tex_w, max_tex_h; + + max_tex_w = 8192; + max_tex_h = 8192; + + if ((w > max_tex_w) || (h > max_tex_h)) + RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h)); + + for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { + if (R600TexFormats[i].fmt == pPict->format) + break; + } + if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0])) + RADEON_FALLBACK(("Unsupported picture format 0x%x\n", + (int)pPict->format)); + + if (pPict->filter != PictFilterNearest && + pPict->filter != PictFilterBilinear) + RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter)); + + /* for REPEAT_NONE, Render semantics are that sampling outside the source + * picture results in alpha=0 pixels. We can implement this with a border color + * *if* our source texture has an alpha channel, otherwise we need to fall + * back. If we're not transformed then we hope that upper layers have clipped + * rendering to the bounds of the source drawable, in which case it doesn't + * matter. I have not, however, verified that the X server always does such + * clipping. + */ + //FIXME R6xx + if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) { + if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0))) + RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n")); + } + + return TRUE; +} + +static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, + int unit) +{ + ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + int w = pPict->pDrawable->width; + int h = pPict->pDrawable->height; + unsigned int i; + tex_resource_t tex_res; + tex_sampler_t tex_samp; + + CLEAR (tex_res); + CLEAR (tex_samp); + + for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { + if (R600TexFormats[i].fmt == pPict->format) + break; + } + + accel_state->texW[unit] = w; + accel_state->texH[unit] = h; + + //ErrorF("Tex %d setup %dx%d\n", unit, w, h); + + accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); + accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * h; + accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; + /* flush texture cache */ + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, + accel_state->src_size[unit], accel_state->src_mc_addr[unit]); + + /* Texture */ + tex_res.id = unit; + tex_res.w = w; + tex_res.h = h; + tex_res.pitch = accel_state->src_pitch[unit]; + tex_res.depth = 0; + tex_res.dim = SQ_TEX_DIM_2D; + tex_res.base = accel_state->src_mc_addr[unit]; + tex_res.mip_base = accel_state->src_mc_addr[unit]; + tex_res.format = R600TexFormats[i].card_fmt; + tex_res.request_size = 1; + + /* component swizzles */ + // XXX double check these + switch (pPict->format) { + case PICT_a1r5g5b5: + case PICT_a8r8g8b8: + //ErrorF("%s: PICT_a8r8g8b8\n", unit ? "mask" : "src"); + tex_res.dst_sel_x = SQ_SEL_Z; //R + tex_res.dst_sel_y = SQ_SEL_Y; //G + tex_res.dst_sel_z = SQ_SEL_X; //B + tex_res.dst_sel_w = SQ_SEL_W; //A + break; + case PICT_a8b8g8r8: + //ErrorF("%s: PICT_a8b8g8r8\n", unit ? "mask" : "src"); + tex_res.dst_sel_x = SQ_SEL_X; //R + tex_res.dst_sel_y = SQ_SEL_Y; //G + tex_res.dst_sel_z = SQ_SEL_Z; //B + tex_res.dst_sel_w = SQ_SEL_W; //A + break; + case PICT_x8b8g8r8: + //ErrorF("%s: PICT_x8b8g8r8\n", unit ? "mask" : "src"); + tex_res.dst_sel_x = SQ_SEL_X; //R + tex_res.dst_sel_y = SQ_SEL_Y; //G + tex_res.dst_sel_z = SQ_SEL_Z; //B + tex_res.dst_sel_w = SQ_SEL_1; //A + break; + case PICT_x1r5g5b5: + case PICT_x8r8g8b8: + //ErrorF("%s: PICT_x8r8g8b8\n", unit ? "mask" : "src"); + tex_res.dst_sel_x = SQ_SEL_Z; //R + tex_res.dst_sel_y = SQ_SEL_Y; //G + tex_res.dst_sel_z = SQ_SEL_X; //B + tex_res.dst_sel_w = SQ_SEL_1; //A + break; + case PICT_r5g6b5: + //ErrorF("%s: PICT_r5g6b5\n", unit ? "mask" : "src"); + tex_res.dst_sel_x = SQ_SEL_Z; //R + tex_res.dst_sel_y = SQ_SEL_Y; //G + tex_res.dst_sel_z = SQ_SEL_X; //B + tex_res.dst_sel_w = SQ_SEL_1; //A + break; + case PICT_a8: + //ErrorF("%s: PICT_a8\n", unit ? "mask" : "src"); + tex_res.dst_sel_x = SQ_SEL_0; //R + tex_res.dst_sel_y = SQ_SEL_0; //G + tex_res.dst_sel_z = SQ_SEL_0; //B + tex_res.dst_sel_w = SQ_SEL_X; //A + break; + default: + RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format)); + } + + tex_res.base_level = 0; + tex_res.last_level = 0; + tex_res.perf_modulation = 0; + set_tex_resource (pScrn, accel_state->ib, &tex_res); + + tex_samp.id = unit; + tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK; + + switch (pPict->repeatType) { + case RepeatNormal: + tex_samp.clamp_x = SQ_TEX_WRAP; + tex_samp.clamp_y = SQ_TEX_WRAP; + break; + case RepeatPad: + tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; + break; + case RepeatReflect: + tex_samp.clamp_x = SQ_TEX_MIRROR; + tex_samp.clamp_y = SQ_TEX_MIRROR; + break; + case RepeatNone: + tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER; + tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER; + break; + default: + RADEON_FALLBACK(("Bad repeat 0x%x\n", pPict->repeatType)); + } + + switch (pPict->filter) { + case PictFilterNearest: + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; + break; + case PictFilterBilinear: + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; + break; + default: + RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); + } + + tex_samp.clamp_z = SQ_TEX_WRAP; + tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; + tex_samp.mip_filter = 0; /* no mipmap */ + set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + + if (pPict->transform != 0) { + accel_state->is_transform[unit] = TRUE; + accel_state->transform[unit] = pPict->transform; + } else + accel_state->is_transform[unit] = FALSE; + + return TRUE; +} + +static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, + PicturePtr pDstPicture) +{ + uint32_t tmp1; +// ScreenPtr pScreen = pDstPicture->pDrawable->pScreen; + PixmapPtr pSrcPixmap, pDstPixmap; +// ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; +// RADEONInfoPtr info = RADEONPTR(pScrn); + int max_tex_w, max_tex_h, max_dst_w, max_dst_h; + + /* Check for unsupported compositing operations. */ + if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0]))) + RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); + + pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable); + + max_tex_w = 8192; + max_tex_h = 8192; + max_dst_w = 8192; + max_dst_h = 8192; + + if (pSrcPixmap->drawable.width >= max_tex_w || + pSrcPixmap->drawable.height >= max_tex_h) { + RADEON_FALLBACK(("Source w/h too large (%d,%d).\n", + pSrcPixmap->drawable.width, + pSrcPixmap->drawable.height)); + } + + pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable); + + if (pDstPixmap->drawable.width >= max_dst_w || + pDstPixmap->drawable.height >= max_dst_h) { + RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n", + pDstPixmap->drawable.width, + pDstPixmap->drawable.height)); + } + + if (pMaskPicture) { + PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable); + + if (pMaskPixmap->drawable.width >= max_tex_w || + pMaskPixmap->drawable.height >= max_tex_h) { + RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n", + pMaskPixmap->drawable.width, + pMaskPixmap->drawable.height)); + } + + if (pMaskPicture->componentAlpha) { + /* Check if it's component alpha that relies on a source alpha and + * on the source value. We can only get one of those into the + * single source value that we get to blend with. + */ + if (R600BlendOp[op].src_alpha && + (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) != + (BLEND_ZERO << COLOR_SRCBLEND_shift)) { + RADEON_FALLBACK(("Component alpha not supported with source " + "alpha and source value blending.\n")); + } + } + + if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1)) + return FALSE; + } + + if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0)) + return FALSE; + + if (!R600GetDestFormat(pDstPicture, &tmp1)) + return FALSE; + + return TRUE; + +} + +static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, + PicturePtr pMaskPicture, PicturePtr pDstPicture, + PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) +{ + ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + uint32_t blendcntl, dst_format; + cb_config_t cb_conf; + shader_config_t vs_conf, ps_conf; + int i = 0; + uint32_t ps[24]; + + //return FALSE; + + if (pMask) + accel_state->has_mask = TRUE; + else + accel_state->has_mask = FALSE; + + accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); + accel_state->dst_size = exaGetPixmapPitch(pDst) * pDst->drawable.height; + + accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; + accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); + accel_state->src_size[0] = exaGetPixmapPitch(pSrc) * pSrc->drawable.height; + + if (accel_state->dst_pitch & 7) + RADEON_FALLBACK(("Bad dst pitch 0x%x\n", (int)accel_state->dst_pitch)); + + if (accel_state->dst_mc_addr & 0xff) + RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)accel_state->dst_mc_addr)); + + if (accel_state->src_pitch[0] & 7) + RADEON_FALLBACK(("Bad src pitch 0x%x\n", (int)accel_state->src_pitch[0])); + + if (accel_state->src_mc_addr[0] & 0xff) + RADEON_FALLBACK(("Bad src offset 0x%x\n", (int)accel_state->src_mc_addr[0])); + + if (!R600GetDestFormat(pDstPicture, &dst_format)) + return FALSE; + + if (pMask) { + int src_a, src_r, src_g, src_b; + int mask_a, mask_r, mask_g, mask_b; + + accel_state->src_mc_addr[1] = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset; + accel_state->src_pitch[1] = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8); + accel_state->src_size[1] = exaGetPixmapPitch(pMask) * pMask->drawable.height; + + if (accel_state->src_pitch[1] & 7) + RADEON_FALLBACK(("Bad mask pitch 0x%x\n", (int)accel_state->src_pitch[1])); + + if (accel_state->src_mc_addr[1] & 0xff) + RADEON_FALLBACK(("Bad mask offset 0x%x\n", (int)accel_state->src_mc_addr[1])); + + /* setup pixel shader */ + if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) { + //src_color = R300_ALU_RGB_0_0; + src_r = SQ_SEL_0; + src_g = SQ_SEL_0; + src_b = SQ_SEL_0; + } else { + //src_color = R300_ALU_RGB_SRC0_RGB; + src_r = SQ_SEL_X; + src_g = SQ_SEL_Y; + src_b = SQ_SEL_Z; + } + + if (PICT_FORMAT_A(pSrcPicture->format) == 0) { + //src_alpha = R300_ALU_ALPHA_1_0; + src_a = SQ_SEL_1; + } else { + //src_alpha = R300_ALU_ALPHA_SRC0_A; + src_a = SQ_SEL_W; + } + + if (pMaskPicture->componentAlpha) { + if (R600BlendOp[op].src_alpha) { + if (PICT_FORMAT_A(pSrcPicture->format) == 0) { + //src_color = R300_ALU_RGB_1_0; + //src_alpha = R300_ALU_ALPHA_1_0; + src_r = SQ_SEL_1; + src_g = SQ_SEL_1; + src_b = SQ_SEL_1; + src_a = SQ_SEL_1; + } else { + //src_color = R300_ALU_RGB_SRC0_AAA; + //src_alpha = R300_ALU_ALPHA_SRC0_A; + src_r = SQ_SEL_W; + src_g = SQ_SEL_W; + src_b = SQ_SEL_W; + src_a = SQ_SEL_W; + } + + //mask_color = R300_ALU_RGB_SRC1_RGB; + mask_r = SQ_SEL_X; + mask_g = SQ_SEL_Y; + mask_b = SQ_SEL_Z; + + if (PICT_FORMAT_A(pMaskPicture->format) == 0) { + //mask_alpha = R300_ALU_ALPHA_1_0; + mask_a = SQ_SEL_1; + } else { + //mask_alpha = R300_ALU_ALPHA_SRC1_A; + mask_a = SQ_SEL_W; + } + } else { + //src_color = R300_ALU_RGB_SRC0_RGB; + src_r = SQ_SEL_X; + src_g = SQ_SEL_Y; + src_b = SQ_SEL_Z; + + if (PICT_FORMAT_A(pSrcPicture->format) == 0) { + //src_alpha = R300_ALU_ALPHA_1_0; + src_a = SQ_SEL_1; + } else { + //src_alpha = R300_ALU_ALPHA_SRC0_A; + src_a = SQ_SEL_W; + } + + //mask_color = R300_ALU_RGB_SRC1_RGB; + mask_r = SQ_SEL_X; + mask_g = SQ_SEL_Y; + mask_b = SQ_SEL_Z; + + if (PICT_FORMAT_A(pMaskPicture->format) == 0) { + //mask_alpha = R300_ALU_ALPHA_1_0; + mask_a = SQ_SEL_1; + } else { + //mask_alpha = R300_ALU_ALPHA_SRC1_A; + mask_a = SQ_SEL_W; + } + } + } else { + if (PICT_FORMAT_A(pMaskPicture->format) == 0) { + //mask_color = R300_ALU_RGB_1_0; + mask_r = SQ_SEL_1; + mask_g = SQ_SEL_1; + mask_b = SQ_SEL_1; + } else { + //mask_color = R300_ALU_RGB_SRC1_AAA; + mask_r = SQ_SEL_W; + mask_g = SQ_SEL_W; + mask_b = SQ_SEL_W; + } + if (PICT_FORMAT_A(pMaskPicture->format) == 0) { + //mask_alpha = R300_ALU_ALPHA_1_0; + mask_a = SQ_SEL_1; + } else { + //mask_alpha = R300_ALU_ALPHA_SRC1_A; + mask_a = SQ_SEL_W; + } + } + + //0 + ps[i++] = CF_DWORD0(ADDR(8)); + ps[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_TEX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + // 1 + ps[i++] = CF_ALU_DWORD0(ADDR(3), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + //2 + ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(2), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + + ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + // 3 - alu 0 + // MUL gpr[2].x gpr[1].x gpr[0].x + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + // 4 - alu 1 + // MUL gpr[2].y gpr[1].y gpr[0].y + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + // 5 - alu 2 + // MUL gpr[2].z gpr[1].z gpr[0].z + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + // 6 - alu 3 + // MUL gpr[2].w gpr[1].w gpr[0].w + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + // 7 + ps[i++] = 0x00000000; + ps[i++] = 0x00000000; + + //8/9 - src + ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + BC_FRAC_MODE(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + R7xx_ALT_CONST(0)); + ps[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(src_r), + DST_SEL_Y(src_g), + DST_SEL_Z(src_b), + DST_SEL_W(src_a), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + ps[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + ps[i++] = TEX_DWORD_PAD; + //10/11 - mask + ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + BC_FRAC_MODE(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(1), + SRC_GPR(1), + SRC_REL(ABSOLUTE), + R7xx_ALT_CONST(0)); + ps[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(mask_r), + DST_SEL_Y(mask_g), + DST_SEL_Z(mask_b), + DST_SEL_W(mask_a), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + ps[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(1), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + ps[i++] = TEX_DWORD_PAD; + } else { + int src_a, src_r, src_g, src_b; + /* setup pixel shader */ + if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) { + //src_color = R300_ALU_RGB_0_0; + src_r = SQ_SEL_0; + src_g = SQ_SEL_0; + src_b = SQ_SEL_0; + } else { + //src_color = R300_ALU_RGB_SRC0_RGB; + src_r = SQ_SEL_X; + src_g = SQ_SEL_Y; + src_b = SQ_SEL_Z; + } + + if (PICT_FORMAT_A(pSrcPicture->format) == 0) { + //src_alpha = R300_ALU_ALPHA_1_0; + src_a = SQ_SEL_1; + } else { + //src_alpha = R300_ALU_ALPHA_SRC0_A; + src_a = SQ_SEL_W; + } + + //0 + ps[i++] = CF_DWORD0(ADDR(2)); + ps[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_TEX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + //1 + ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + + ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + + //2/3 - src + ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + BC_FRAC_MODE(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + R7xx_ALT_CONST(0)); + ps[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(src_r), + DST_SEL_Y(src_g), + DST_SEL_Z(src_b), + DST_SEL_W(src_a), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + ps[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + ps[i++] = TEX_DWORD_PAD; + } + + CLEAR (cb_conf); + CLEAR (vs_conf); + CLEAR (ps_conf); + + accel_state->ib = RADEONCPGetBuffer(pScrn); + + /* Init */ + start_3d(pScrn, accel_state->ib); + + //cp_set_surface_sync(pScrn, accel_state->ib); + + set_default_state(pScrn, accel_state->ib); + + /* Scissor / viewport */ + ereg (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); + ereg (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); + + // fix me if false discard buffer! + if (!R600TextureSetup(pSrcPicture, pSrc, 0)) + return FALSE; + + if (pMask != NULL) { + // fix me if false discard buffer! + if (!R600TextureSetup(pMaskPicture, pMask, 1)) + return FALSE; + } else { + accel_state->is_transform[1] = FALSE; + } + + if (pMask != NULL) + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->comp_mask_vs_offset; + else + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->comp_vs_offset; + + memcpy ((char *)accel_state->ib->address + (accel_state->ib->total / 2) - 256, ps, sizeof(ps)); + accel_state->ps_mc_addr = info->gartLocation + info->dri->bufStart + + (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2) - 256; + + accel_state->vs_size = 512; + accel_state->ps_size = 512; + + /* Shader */ + + /* flush SQ cache */ + cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, + accel_state->vs_size, accel_state->vs_mc_addr); + + vs_conf.shader_addr = accel_state->vs_mc_addr; + vs_conf.num_gprs = 3; + vs_conf.stack_size = 0; + vs_setup (pScrn, accel_state->ib, &vs_conf); + + /* flush SQ cache */ + cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, + accel_state->ps_size, accel_state->ps_mc_addr); + + ps_conf.shader_addr = accel_state->ps_mc_addr; + ps_conf.num_gprs = 3; + ps_conf.stack_size = 0; + ps_conf.uncached_first_inst = 1; + ps_conf.clamp_consts = 0; + ps_conf.export_mode = 2; + ps_setup (pScrn, accel_state->ib, &ps_conf); + + ereg (accel_state->ib, CB_SHADER_MASK, (0xf << OUTPUT0_ENABLE_shift)); + ereg (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); + + blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); + + if (info->ChipFamily == CHIP_FAMILY_R600) { + // no per-MRT blend on R600 + ereg (accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift)); + ereg (accel_state->ib, CB_BLEND_CONTROL, blendcntl); + } else { + ereg (accel_state->ib, CB_COLOR_CONTROL, (RADEON_ROP[3] | + (1 << TARGET_BLEND_ENABLE_shift) | + PER_MRT_BLEND_bit)); + ereg (accel_state->ib, CB_BLEND0_CONTROL, blendcntl); + } + + cb_conf.id = 0; + cb_conf.w = accel_state->dst_pitch; + cb_conf.h = pDst->drawable.height; + cb_conf.base = accel_state->dst_mc_addr; + cb_conf.format = dst_format; + + switch (pDstPicture->format) { + case PICT_a8r8g8b8: + //ErrorF("dst: PICT_a8r8g8b8\n"); + cb_conf.comp_swap = 1; //ARGB + break; + case PICT_x8r8g8b8: + //ErrorF("dst: PICT_x8r8g8b8\n"); + cb_conf.comp_swap = 1; //ARGB + break; + case PICT_r5g6b5: + //ErrorF("dst: PICT_r5g6b5\n"); + cb_conf.comp_swap = 2; //RGB + break; + case PICT_a1r5g5b5: + //ErrorF("dst: PICT_a1r5g5b5\n"); + cb_conf.comp_swap = 1; //ARGB + break; + case PICT_x1r5g5b5: + //ErrorF("dst: PICT_x1r5g5b5\n"); + cb_conf.comp_swap = 1; //ARGB + break; + case PICT_a8: + //ErrorF("dst: PICT_a8\n"); + cb_conf.comp_swap = 3; //A + break; + default: + cb_conf.comp_swap = 1; + break; + } + cb_conf.source_format = 1; + cb_conf.blend_clamp = 1; + set_render_target(pScrn, accel_state->ib, &cb_conf); + + ereg (accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); + ereg (accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ + DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ + + /* Interpolator setup */ + if (pMask) { + // export 2 tex coords from VS + ereg (accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift)); + // src = semantic id 0; mask = semantic id 1 + ereg (accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | + (1 << SEMANTIC_1_shift))); + // input 2 tex coords from VS + ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, (2 << NUM_INTERP_shift)); + } else { + // export 1 tex coords from VS + ereg (accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); + // src = semantic id 0 + ereg (accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); + // input 1 tex coords from VS + ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift)); + } + ereg (accel_state->ib, SPI_PS_IN_CONTROL_1, 0); + // SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 + ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | + (0x01 << DEFAULT_VAL_shift) | + SEL_CENTROID_bit)); + // SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 + ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (1 <<2), ((1 << SEMANTIC_shift) | + (0x01 << DEFAULT_VAL_shift) | + SEL_CENTROID_bit)); + ereg (accel_state->ib, SPI_INTERP_CONTROL_0, 0); + + accel_state->vb_index = 0; + + return TRUE; +} + +static void R600Composite(PixmapPtr pDst, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight; + + /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n", + srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ + + srcTopLeft.x = IntToxFixed(srcX); + srcTopLeft.y = IntToxFixed(srcY); + srcTopRight.x = IntToxFixed(srcX + w); + srcTopRight.y = IntToxFixed(srcY); + srcBottomLeft.x = IntToxFixed(srcX); + srcBottomLeft.y = IntToxFixed(srcY + h); + srcBottomRight.x = IntToxFixed(srcX + w); + srcBottomRight.y = IntToxFixed(srcY + h); + + //XXX do transform in vertex shader + if (accel_state->is_transform[0]) { + transformPoint(accel_state->transform[0], &srcTopLeft); + transformPoint(accel_state->transform[0], &srcTopRight); + transformPoint(accel_state->transform[0], &srcBottomLeft); + transformPoint(accel_state->transform[0], &srcBottomRight); + } + + if (accel_state->has_mask) { + struct r6xx_comp_mask_vertex *comp_vb = + (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); + struct r6xx_comp_mask_vertex vertex[3]; + xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight; + + maskTopLeft.x = IntToxFixed(maskX); + maskTopLeft.y = IntToxFixed(maskY); + maskTopRight.x = IntToxFixed(maskX + w); + maskTopRight.y = IntToxFixed(maskY); + maskBottomLeft.x = IntToxFixed(maskX); + maskBottomLeft.y = IntToxFixed(maskY + h); + maskBottomRight.x = IntToxFixed(maskX + w); + maskBottomRight.y = IntToxFixed(maskY + h); + + if (accel_state->is_transform[1]) { + transformPoint(accel_state->transform[1], &maskTopLeft); + transformPoint(accel_state->transform[1], &maskTopRight); + transformPoint(accel_state->transform[1], &maskBottomLeft); + transformPoint(accel_state->transform[1], &maskBottomRight); + } + + vertex[0].x = (float)dstX; + vertex[0].y = (float)dstY; + vertex[0].src_s = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0]; + vertex[0].src_t = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]; + vertex[0].mask_s = xFixedToFloat(maskTopLeft.x) / accel_state->texW[1]; + vertex[0].mask_t = xFixedToFloat(maskTopLeft.y) / accel_state->texH[1]; + + vertex[1].x = (float)dstX; + vertex[1].y = (float)(dstY + h); + vertex[1].src_s = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0]; + vertex[1].src_t = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]; + vertex[1].mask_s = xFixedToFloat(maskBottomLeft.x) / accel_state->texW[1]; + vertex[1].mask_t = xFixedToFloat(maskBottomLeft.y) / accel_state->texH[1]; + + vertex[2].x = (float)(dstX + w); + vertex[2].y = (float)(dstY + h); + vertex[2].src_s = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0]; + vertex[2].src_t = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]; + vertex[2].mask_s = xFixedToFloat(maskBottomRight.x) / accel_state->texW[1]; + vertex[2].mask_t = xFixedToFloat(maskBottomRight.y) / accel_state->texH[1]; + +#ifdef SHOW_VERTEXES + ErrorF("vertex 0: %d, %d, %f, %f, %f, %f\n", vertex[0].x, vertex[0].y, + vertex[0].src_s, vertex[0].src_t, vertex[0].mask_s, vertex[0].mask_t); + ErrorF("vertex 1: %d, %d, %f, %f, %f, %f\n", vertex[1].x, vertex[1].y, + vertex[1].src_s, vertex[1].src_t, vertex[1].mask_s, vertex[1].mask_t); + ErrorF("vertex 2: %d, %d, %f, %f, %f, %f\n", vertex[2].x, vertex[2].y, + vertex[2].src_s, vertex[2].src_t, vertex[2].mask_s, vertex[2].mask_t); +#endif + + // append to vertex buffer + comp_vb[accel_state->vb_index++] = vertex[0]; + comp_vb[accel_state->vb_index++] = vertex[1]; + comp_vb[accel_state->vb_index++] = vertex[2]; + + } else { + struct r6xx_comp_vertex *comp_vb = + (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); + struct r6xx_comp_vertex vertex[3]; + + vertex[0].x = (float)dstX; + vertex[0].y = (float)dstY; + vertex[0].src_s = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0]; + vertex[0].src_t = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]; + + vertex[1].x = (float)dstX; + vertex[1].y = (float)(dstY + h); + vertex[1].src_s = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0]; + vertex[1].src_t = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]; + + vertex[2].x = (float)(dstX + w); + vertex[2].y = (float)(dstY + h); + vertex[2].src_s = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0]; + vertex[2].src_t = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]; + + // append to vertex buffer + comp_vb[accel_state->vb_index++] = vertex[0]; + comp_vb[accel_state->vb_index++] = vertex[1]; + comp_vb[accel_state->vb_index++] = vertex[2]; + +#ifdef SHOW_VERTEXES + ErrorF("vertex 0: %d, %d, %f, %f\n", vertex[0].x, vertex[0].y, vertex[0].src_s, vertex[0].src_t); + ErrorF("vertex 1: %d, %d, %f, %f\n", vertex[1].x, vertex[1].y, vertex[1].src_s, vertex[1].src_t); + ErrorF("vertex 2: %d, %d, %f, %f\n", vertex[2].x, vertex[2].y, vertex[2].src_s, vertex[2].src_t); +#endif + } + + +} + +static void R600DoneComposite(PixmapPtr pDst) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + draw_config_t draw_conf; + vtx_resource_t vtx_res; + + CLEAR (draw_conf); + CLEAR (vtx_res); + + if (accel_state->vb_index == 0) { + R600IBDiscard(pScrn, accel_state->ib); + return; + } + + accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + + (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); + + + /* Vertex buffer setup */ + if (accel_state->has_mask) { + accel_state->vb_size = accel_state->vb_index * 24; + vtx_res.id = SQ_VTX_RESOURCE_vs; + vtx_res.vtx_size_dw = 24 / 4; + vtx_res.vtx_num_entries = accel_state->vb_size / 4; + vtx_res.mem_req_size = 1; + vtx_res.vb_addr = accel_state->vb_mc_addr; + } else { + accel_state->vb_size = accel_state->vb_index * 16; + vtx_res.id = SQ_VTX_RESOURCE_vs; + vtx_res.vtx_size_dw = 16 / 4; + vtx_res.vtx_num_entries = accel_state->vb_size / 4; + vtx_res.mem_req_size = 1; + vtx_res.vb_addr = accel_state->vb_mc_addr; + } + /* flush vertex cache */ + if ((info->ChipFamily == CHIP_FAMILY_RV610) || + (info->ChipFamily == CHIP_FAMILY_RV620) || + (info->ChipFamily == CHIP_FAMILY_RS780) || + (info->ChipFamily == CHIP_FAMILY_RV710)) + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, + accel_state->vb_size, accel_state->vb_mc_addr); + else + cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, + accel_state->vb_size, accel_state->vb_mc_addr); + + set_vtx_resource (pScrn, accel_state->ib, &vtx_res); + + draw_conf.prim_type = DI_PT_RECTLIST; + draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; + draw_conf.num_instances = 1; + draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; + draw_conf.index_type = DI_INDEX_SIZE_16_BIT; + + ereg (accel_state->ib, VGT_INSTANCE_STEP_RATE_0, 0); /* ? */ + ereg (accel_state->ib, VGT_INSTANCE_STEP_RATE_1, 0); + + ereg (accel_state->ib, VGT_MAX_VTX_INDX, draw_conf.num_indices); + ereg (accel_state->ib, VGT_MIN_VTX_INDX, 0); + ereg (accel_state->ib, VGT_INDX_OFFSET, 0); + + draw_auto(pScrn, accel_state->ib, &draw_conf); + + wait_3d_idle_clean(pScrn, accel_state->ib); + + cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), + accel_state->dst_size, accel_state->dst_mc_addr); + + R600CPFlushIndirect(pScrn, accel_state->ib); +} + +static Bool +R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, + char *src, int src_pitch) +{ + ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); +// struct radeon_accel_state *accel_state = info->accel_state; + uint8_t *dst = (pointer)((char *)info->FB + exaGetPixmapOffset(pDst)); + int dst_pitch = exaGetPixmapPitch(pDst); + int bpp = pDst->drawable.bitsPerPixel; + + + //return FALSE; + + dst += (x * bpp / 8) + (y * dst_pitch); + w *= bpp / 8; + + while (h--) { + memcpy(dst, src, w); + src += src_pitch; + dst += dst_pitch; + } + + return TRUE; +} + +static Bool +R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, + char *dst, int dst_pitch) +{ + ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); +// struct radeon_accel_state *accel_state = info->accel_state; + uint8_t *src = (pointer)((char *)info->FB + exaGetPixmapOffset(pSrc)); + int src_pitch = exaGetPixmapPitch(pSrc); + int bpp = pSrc->drawable.bitsPerPixel; + + //return FALSE; + + src += (x * bpp / 8) + (y * src_pitch); + w *= bpp / 8; + + while (h--) { + memcpy(dst, src, w); + src += src_pitch; + dst += dst_pitch; + } + + return TRUE; +} + +static int +R600MarkSync(ScreenPtr pScreen) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + accel_state->exaSyncMarker++; + + return accel_state->exaSyncMarker; +} + +static void +R600Sync(ScreenPtr pScreen, int marker) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + if (accel_state->exaMarkerSynced != marker) + accel_state->exaMarkerSynced = marker; +} + +static Bool +R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + uint32_t *vs; + uint32_t *ps; + // 512 bytes per shader for now + int size = 512 * 10; + int i; + + accel_state->shaders = NULL; + + accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, + TRUE, NULL, NULL); + + if (accel_state->shaders == NULL) + return FALSE; + + vs = (pointer)((char *)info->FB + accel_state->shaders->offset); + ps = (pointer)((char *)info->FB + accel_state->shaders->offset); + accel_state->solid_vs_offset = 0; + accel_state->solid_ps_offset = 512; + accel_state->copy_vs_offset = 1024; + accel_state->copy_ps_offset = 1536; + accel_state->comp_vs_offset = 2048; + accel_state->comp_ps_offset = 2560; + accel_state->comp_mask_vs_offset = 3072; + accel_state->comp_mask_ps_offset = 3584; + accel_state->xv_vs_offset = 4096; + accel_state->xv_ps_offset = 4608; + + // solid vs --------------------------------------- + i = accel_state->solid_vs_offset / 4; + //0 + vs[i++] = CF_DWORD0(ADDR(4)); + vs[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_VTX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + //1 + vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + //2 - always export a param whether it's used or not + vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + //3 - padding + vs[i++] = 0x00000000; + vs[i++] = 0x00000000; + //4/5 + vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), //xxx + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + vs[i++] = VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1)); + vs[i++] = VTX_DWORD_PAD; + + // solid ps --------------------------------------- + i = accel_state->solid_ps_offset / 4; + // 0 + ps[i++] = CF_ALU_DWORD0(ADDR(2), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(0)); + ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(0), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + // 1 + ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + // 2 + ps[i++] = ALU_DWORD0(SRC0_SEL(256), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + // 3 + ps[i++] = ALU_DWORD0(SRC0_SEL(256), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + // 4 + ps[i++] = ALU_DWORD0(SRC0_SEL(256), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + // 5 + ps[i++] = ALU_DWORD0(SRC0_SEL(256), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + + // copy vs --------------------------------------- + i = accel_state->copy_vs_offset / 4; + //0 + vs[i++] = CF_DWORD0(ADDR(4)); + vs[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_VTX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + //1 + vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + //2 + vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + //3 + vs[i++] = 0x00000000; + vs[i++] = 0x00000000; + //4/5 + vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(16)); + vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), //xxx + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + vs[i++] = VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1)); + vs[i++] = VTX_DWORD_PAD; + //6/7 + vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + vs[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), //xxx + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + vs[i++] = VTX_DWORD2(OFFSET(8), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0)); + vs[i++] = VTX_DWORD_PAD; + + // copy ps --------------------------------------- + i = accel_state->copy_ps_offset / 4; + // CF INST 0 + ps[i++] = CF_DWORD0(ADDR(2)); + ps[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_TEX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + // CF INST 1 + ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + // TEX INST 0 + ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + BC_FRAC_MODE(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + R7xx_ALT_CONST(0)); + ps[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), //R + DST_SEL_Y(SQ_SEL_Y), //G + DST_SEL_Z(SQ_SEL_Z), //B + DST_SEL_W(SQ_SEL_W), //A + LOD_BIAS(0), + COORD_TYPE_X(TEX_UNNORMALIZED), + COORD_TYPE_Y(TEX_UNNORMALIZED), + COORD_TYPE_Z(TEX_UNNORMALIZED), + COORD_TYPE_W(TEX_UNNORMALIZED)); + ps[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + ps[i++] = TEX_DWORD_PAD; + + // xv vs --------------------------------------- + i = accel_state->xv_vs_offset / 4; + //0 + vs[i++] = CF_DWORD0(ADDR(4)); + vs[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_VTX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + //1 + vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + //2 + vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + //3 + vs[i++] = 0x00000000; + vs[i++] = 0x00000000; + //4/5 + vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(16)); + vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), //xxx + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + vs[i++] = VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1)); + vs[i++] = VTX_DWORD_PAD; + //6/7 + vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + vs[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), //xxx + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + vs[i++] = VTX_DWORD2(OFFSET(8), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0)); + vs[i++] = VTX_DWORD_PAD; + + // xv ps --------------------------------------- + i = accel_state->xv_ps_offset / 4; + // 0 + ps[i++] = CF_DWORD0(ADDR(20)); + ps[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_TEX), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + // 1 + ps[i++] = CF_ALU_DWORD0(ADDR(3), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(16), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + // 2 + ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(3), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(3)); + ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + // 3 - alu 0 + // DP4 gpr[2].x gpr[1].x c[0].x + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(256), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + // 4 - alu 1 + // DP4 gpr[2].y gpr[1].y c[0].y + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(256), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + // 5 - alu 2 + // DP4 gpr[2].z gpr[1].z c[0].z + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(256), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + // 6 - alu 3 + // DP4 gpr[2].w gpr[1].w c[0].w + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(256), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_021), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + // 7 - alu 4 + // DP4 gpr[2].x gpr[1].x c[1].x + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(257), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + // 8 - alu 5 + // DP4 gpr[2].y gpr[1].y c[1].y + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(257), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + // 9 - alu 6 + // DP4 gpr[2].z gpr[1].z c[1].z + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(257), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + // 10 - alu 7 + // DP4 gpr[2].w gpr[1].w c[1].w + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(257), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_021), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + // 11 - alu 8 + // DP4 gpr[2].x gpr[1].x c[2].x + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(258), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + // 12 - alu 9 + // DP4 gpr[2].y gpr[1].y c[2].y + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(258), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + // 13 - alu 10 + // DP4 gpr[2].z gpr[1].z c[2].z + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(258), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + // 14 - alu 11 + // DP4 gpr[2].w gpr[1].w c[2].w + ps[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(258), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_021), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + // 15 - alu 12 + // MOV gpr[3].x gpr[2].x + ps[i++] = ALU_DWORD0(SRC0_SEL(2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + // 16 - alu 13 + // MOV gpr[3].y gpr[2].y + ps[i++] = ALU_DWORD0(SRC0_SEL(2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + // 17 - alu 14 + // MOV gpr[3].z gpr[2].z + ps[i++] = ALU_DWORD0(SRC0_SEL(2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_210), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + // 18 - alu 15 + // MOV gpr[3].w gpr[2].w + ps[i++] = ALU_DWORD0(SRC0_SEL(2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + // 19 - alignment + ps[i++] = 0x00000000; + ps[i++] = 0x00000000; + // 20/21 - tex 0 + ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + BC_FRAC_MODE(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + R7xx_ALT_CONST(0)); + ps[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), //R + DST_SEL_Y(SQ_SEL_MASK), //G + DST_SEL_Z(SQ_SEL_MASK), //B + DST_SEL_W(SQ_SEL_1), //A + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + ps[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + ps[i++] = TEX_DWORD_PAD; + // 22/23 - tex 1 + ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + BC_FRAC_MODE(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(1), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + R7xx_ALT_CONST(0)); + ps[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_MASK), //R + DST_SEL_Y(SQ_SEL_X), //G + DST_SEL_Z(SQ_SEL_Y), //B + DST_SEL_W(SQ_SEL_MASK), //A + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + ps[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(1), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + ps[i++] = TEX_DWORD_PAD; + + // comp mask vs --------------------------------------- + i = accel_state->comp_mask_vs_offset / 4; + //0 + vs[i++] = CF_DWORD0(ADDR(4)); + vs[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(3), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_VTX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + //1 - dst + vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(2), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + //2 - src + vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + //3 - mask + vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + //4/5 - dst + vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(24)); + vs[i++] = VTX_DWORD1_GPR(DST_GPR(2), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), //xxx + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + vs[i++] = VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1)); + vs[i++] = VTX_DWORD_PAD; + //6/7 - src + vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), //xxx + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + vs[i++] = VTX_DWORD2(OFFSET(8), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0)); + vs[i++] = VTX_DWORD_PAD; + //8/9 - mask + vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + vs[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), //xxx + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + vs[i++] = VTX_DWORD2(OFFSET(16), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0)); + vs[i++] = VTX_DWORD_PAD; + + // comp mask ps --------------------------------------- + // not yet + + // comp vs --------------------------------------- + i = accel_state->comp_vs_offset / 4; + //0 + vs[i++] = CF_DWORD0(ADDR(4)); + vs[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_VTX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + //1 - dst + vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + //2 - src + vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + //3 + vs[i++] = 0x00000000; + vs[i++] = 0x00000000; + //4/5 - dst + vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(16)); + vs[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), //xxx + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + vs[i++] = VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1)); + vs[i++] = VTX_DWORD_PAD; + //6/7 - src + vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + vs[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), //xxx + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + vs[i++] = VTX_DWORD2(OFFSET(8), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0)); + vs[i++] = VTX_DWORD_PAD; + + // comp ps --------------------------------------- + // not yet + + + return TRUE; +} + +static Bool +R600PrepareAccess(PixmapPtr pPix, int index) +{ + ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + unsigned char *RADEONMMIO = info->MMIO; + + //flush HDP read/write caches + OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); + + return TRUE; +} + +static void +R600FinishAccess(PixmapPtr pPix, int index) +{ + ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + unsigned char *RADEONMMIO = info->MMIO; + + //flush HDP read/write caches + OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); + +} + + +Bool +R600DrawInit(ScreenPtr pScreen) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + + if (info->accel_state->exa == NULL) { + xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n"); + return FALSE; + } + + info->accel_state->exa->exa_major = EXA_VERSION_MAJOR; + info->accel_state->exa->exa_minor = EXA_VERSION_MINOR; + + info->accel_state->exa->PrepareSolid = R600PrepareSolid; + info->accel_state->exa->Solid = R600Solid; + info->accel_state->exa->DoneSolid = R600DoneSolid; + + info->accel_state->exa->PrepareCopy = R600PrepareCopy; + info->accel_state->exa->Copy = R600Copy; + info->accel_state->exa->DoneCopy = R600DoneCopy; + + info->accel_state->exa->MarkSync = R600MarkSync; + info->accel_state->exa->WaitMarker = R600Sync; + + info->accel_state->exa->PrepareAccess = R600PrepareAccess; + info->accel_state->exa->FinishAccess = R600FinishAccess; + + info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; + info->accel_state->exa->pixmapOffsetAlign = 256; + info->accel_state->exa->pixmapPitchAlign = 256; + + info->accel_state->exa->CheckComposite = R600CheckComposite; + info->accel_state->exa->PrepareComposite = R600PrepareComposite; + info->accel_state->exa->Composite = R600Composite; + info->accel_state->exa->DoneComposite = R600DoneComposite; + +#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3) + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n"); + + info->accel_state->exa->maxPitchBytes = 16320; + info->accel_state->exa->maxX = 8192; +#else + info->accel_state->exa->maxX = 16320 / 4; +#endif + info->accel_state->exa->maxY = 8192; + + if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) { + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n"); + info->accel_state->vsync = TRUE; + } else + info->accel_state->vsync = FALSE; + + if (!exaDriverInit(pScreen, info->accel_state->exa)) { + xfree(info->accel_state->exa); + return FALSE; + } + + if (!info->gartLocation) + return FALSE; + + info->accel_state->XInited3D = FALSE; + + if (!R600LoadShaders(pScrn, pScreen)) + return FALSE; + + exaMarkSync(pScreen); + + return TRUE; + +} + diff --git a/src/r600_reg.h b/src/r600_reg.h new file mode 100644 index 00000000..dfe47039 --- /dev/null +++ b/src/r600_reg.h @@ -0,0 +1,118 @@ +/* + * RadeonHD R6xx, R7xx Register documentation + * + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _R600_REG_H_ +#define _R600_REG_H_ + +/* + * Register definitions + */ + +#include "r600_reg_auto_r6xx.h" +#include "r600_reg_r6xx.h" +#include "r600_reg_r7xx.h" + + +/* SET_*_REG offsets + ends */ +enum { + SET_CONFIG_REG_offset = 0x00008000, + SET_CONFIG_REG_end = 0x0000ac00, + SET_CONTEXT_REG_offset = 0x00028000, + SET_CONTEXT_REG_end = 0x00029000, + SET_ALU_CONST_offset = 0x00030000, + SET_ALU_CONST_end = 0x00032000, + SET_RESOURCE_offset = 0x00038000, + SET_RESOURCE_end = 0x0003c000, + SET_SAMPLER_offset = 0x0003c000, + SET_SAMPLER_end = 0x0003cff0, + SET_CTL_CONST_offset = 0x0003cff0, + SET_CTL_CONST_end = 0x0003e200, + SET_LOOP_CONST_offset = 0x0003e200, + SET_LOOP_CONST_end = 0x0003e380, + SET_BOOL_CONST_offset = 0x0003e380, + SET_BOOL_CONST_end = 0x00040000, +} ; + +/* packet3 IT_SURFACE_BASE_UPDATE bits */ +enum { + DEPTH_BASE = (1 << 0), + COLOR0_BASE = (1 << 1), + COLOR1_BASE = (1 << 2), + COLOR2_BASE = (1 << 3), + COLOR3_BASE = (1 << 4), + COLOR4_BASE = (1 << 5), + COLOR5_BASE = (1 << 6), + COLOR6_BASE = (1 << 7), + COLOR7_BASE = (1 << 8), + STRMOUT_BASE0 = (1 << 9), + STRMOUT_BASE1 = (1 << 10), + STRMOUT_BASE2 = (1 << 11), + STRMOUT_BASE3 = (1 << 12), + COHER_BASE0 = (1 << 13), + COHER_BASE1 = (1 << 14), +}; + +/* Packet3 commands */ +enum { + IT_NOP = 0x10, + IT_INDIRECT_BUFFER_END = 0x17, + IT_SET_PREDICATION = 0x20, + IT_REG_RMW = 0x21, + IT_COND_EXEC = 0x22, + IT_PRED_EXEC = 0x23, + IT_START_3D_CMDBUF = 0x24, + IT_DRAW_INDEX_2 = 0x27, + IT_CONTEXT_CONTROL = 0x28, + IT_DRAW_INDEX_IMMD_BE = 0x29, + IT_INDEX_TYPE = 0x2A, + IT_DRAW_INDEX = 0x2B, + IT_DRAW_INDEX_AUTO = 0x2D, + IT_DRAW_INDEX_IMMD = 0x2E, + IT_NUM_INSTANCES = 0x2F, + IT_STRMOUT_BUFFER_UPDATE = 0x34, + IT_INDIRECT_BUFFER_MP = 0x38, + IT_MEM_SEMAPHORE = 0x39, + IT_MPEG_INDEX = 0x3A, + IT_WAIT_REG_MEM = 0x3C, + IT_MEM_WRITE = 0x3D, + IT_INDIRECT_BUFFER = 0x32, + IT_CP_INTERRUPT = 0x40, + IT_SURFACE_SYNC = 0x43, + IT_ME_INITIALIZE = 0x44, + IT_COND_WRITE = 0x45, + IT_EVENT_WRITE = 0x46, + IT_EVENT_WRITE_EOP = 0x47, + IT_ONE_REG_WRITE = 0x57, + IT_SET_CONFIG_REG = 0x68, + IT_SET_CONTEXT_REG = 0x69, + IT_SET_ALU_CONST = 0x6A, + IT_SET_BOOL_CONST = 0x6B, + IT_SET_LOOP_CONST = 0x6C, + IT_SET_RESOURCE = 0x6D, + IT_SET_SAMPLER = 0x6E, + IT_SET_CTL_CONST = 0x6F, + IT_SURFACE_BASE_UPDATE = 0x73, +} ; + +#endif diff --git a/src/r600_reg_auto_r6xx.h b/src/r600_reg_auto_r6xx.h new file mode 100644 index 00000000..9d5aa3c7 --- /dev/null +++ b/src/r600_reg_auto_r6xx.h @@ -0,0 +1,3087 @@ +/* + * RadeonHD R6xx, R7xx Register documentation + * + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _AUTOREGS +#define _AUTOREGS + +enum { + + VGT_VTX_VECT_EJECT_REG = 0x000088b0, + PRIM_COUNT_mask = 0x3ff << 0, + PRIM_COUNT_shift = 0, + VGT_LAST_COPY_STATE = 0x000088c0, + SRC_STATE_ID_mask = 0x07 << 0, + SRC_STATE_ID_shift = 0, + DST_STATE_ID_mask = 0x07 << 16, + DST_STATE_ID_shift = 16, + VGT_CACHE_INVALIDATION = 0x000088c4, + CACHE_INVALIDATION_mask = 0x03 << 0, + CACHE_INVALIDATION_shift = 0, + VC_ONLY = 0x00, + TC_ONLY = 0x01, + VC_AND_TC = 0x02, + VS_NO_EXTRA_BUFFER_bit = 1 << 5, + VGT_GS_PER_ES = 0x000088c8, + VGT_ES_PER_GS = 0x000088cc, + VGT_GS_VERTEX_REUSE = 0x000088d4, + VERT_REUSE_mask = 0x1f << 0, + VERT_REUSE_shift = 0, + VGT_MC_LAT_CNTL = 0x000088d8, + MC_TIME_STAMP_RES_mask = 0x03 << 0, + MC_TIME_STAMP_RES_shift = 0, + X_0_992_MAX_LATENCY = 0x00, + X_0_496_MAX_LATENCY = 0x01, + X_0_248_MAX_LATENCY = 0x02, + X_0_124_MAX_LATENCY = 0x03, + VGT_GS_PER_VS = 0x000088e8, + GS_PER_VS_mask = 0x0f << 0, + GS_PER_VS_shift = 0, + VGT_CNTL_STATUS = 0x000088f0, + VGT_OUT_INDX_BUSY_bit = 1 << 0, + VGT_OUT_BUSY_bit = 1 << 1, + VGT_PT_BUSY_bit = 1 << 2, + VGT_TE_BUSY_bit = 1 << 3, + VGT_VR_BUSY_bit = 1 << 4, + VGT_GRP_BUSY_bit = 1 << 5, + VGT_DMA_REQ_BUSY_bit = 1 << 6, + VGT_DMA_BUSY_bit = 1 << 7, + VGT_GS_BUSY_bit = 1 << 8, + VGT_BUSY_bit = 1 << 9, + VGT_PRIMITIVE_TYPE = 0x00008958, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask = 0x3f << 0, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift = 0, + DI_PT_NONE = 0x00, + DI_PT_POINTLIST = 0x01, + DI_PT_LINELIST = 0x02, + DI_PT_LINESTRIP = 0x03, + DI_PT_TRILIST = 0x04, + DI_PT_TRIFAN = 0x05, + DI_PT_TRISTRIP = 0x06, + DI_PT_UNUSED_0 = 0x07, + DI_PT_UNUSED_1 = 0x08, + DI_PT_UNUSED_2 = 0x09, + DI_PT_LINELIST_ADJ = 0x0a, + DI_PT_LINESTRIP_ADJ = 0x0b, + DI_PT_TRILIST_ADJ = 0x0c, + DI_PT_TRISTRIP_ADJ = 0x0d, + DI_PT_UNUSED_3 = 0x0e, + DI_PT_UNUSED_4 = 0x0f, + DI_PT_TRI_WITH_WFLAGS = 0x10, + DI_PT_RECTLIST = 0x11, + DI_PT_LINELOOP = 0x12, + DI_PT_QUADLIST = 0x13, + DI_PT_QUADSTRIP = 0x14, + DI_PT_POLYGON = 0x15, + DI_PT_2D_COPY_RECT_LIST_V0 = 0x16, + DI_PT_2D_COPY_RECT_LIST_V1 = 0x17, + DI_PT_2D_COPY_RECT_LIST_V2 = 0x18, + DI_PT_2D_COPY_RECT_LIST_V3 = 0x19, + DI_PT_2D_FILL_RECT_LIST = 0x1a, + DI_PT_2D_LINE_STRIP = 0x1b, + DI_PT_2D_TRI_STRIP = 0x1c, + VGT_INDEX_TYPE = 0x0000895c, + INDEX_TYPE_mask = 0x03 << 0, + INDEX_TYPE_shift = 0, + DI_INDEX_SIZE_16_BIT = 0x00, + DI_INDEX_SIZE_32_BIT = 0x01, + VGT_STRMOUT_BUFFER_FILLED_SIZE_0 = 0x00008960, + VGT_STRMOUT_BUFFER_FILLED_SIZE_1 = 0x00008964, + VGT_STRMOUT_BUFFER_FILLED_SIZE_2 = 0x00008968, + VGT_STRMOUT_BUFFER_FILLED_SIZE_3 = 0x0000896c, + VGT_NUM_INDICES = 0x00008970, + VGT_NUM_INSTANCES = 0x00008974, + PA_CL_CNTL_STATUS = 0x00008a10, + CL_BUSY_bit = 1 << 31, + PA_CL_ENHANCE = 0x00008a14, + CLIP_VTX_REORDER_ENA_bit = 1 << 0, + NUM_CLIP_SEQ_mask = 0x03 << 1, + NUM_CLIP_SEQ_shift = 1, + CLIPPED_PRIM_SEQ_STALL_bit = 1 << 3, + VE_NAN_PROC_DISABLE_bit = 1 << 4, + PA_SU_CNTL_STATUS = 0x00008a50, + SU_BUSY_bit = 1 << 31, + PA_SC_LINE_STIPPLE_STATE = 0x00008b10, + CURRENT_PTR_mask = 0x0f << 0, + CURRENT_PTR_shift = 0, + CURRENT_COUNT_mask = 0xff << 8, + CURRENT_COUNT_shift = 8, + PA_SC_MULTI_CHIP_CNTL = 0x00008b20, + LOG2_NUM_CHIPS_mask = 0x07 << 0, + LOG2_NUM_CHIPS_shift = 0, + MULTI_CHIP_TILE_SIZE_mask = 0x03 << 3, + MULTI_CHIP_TILE_SIZE_shift = 3, + X_16_X_16_PIXEL_TILE_PER_CHIP = 0x00, + X_32_X_32_PIXEL_TILE_PER_CHIP = 0x01, + X_64_X_64_PIXEL_TILE_PER_CHIP = 0x02, + X_128X128_PIXEL_TILE_PER_CHIP = 0x03, + CHIP_TILE_X_LOC_mask = 0x07 << 5, + CHIP_TILE_X_LOC_shift = 5, + CHIP_TILE_Y_LOC_mask = 0x07 << 8, + CHIP_TILE_Y_LOC_shift = 8, + CHIP_SUPER_TILE_B_bit = 1 << 11, + PA_SC_AA_SAMPLE_LOCS_2S = 0x00008b40, + S0_X_mask = 0x0f << 0, + S0_X_shift = 0, + S0_Y_mask = 0x0f << 4, + S0_Y_shift = 4, + S1_X_mask = 0x0f << 8, + S1_X_shift = 8, + S1_Y_mask = 0x0f << 12, + S1_Y_shift = 12, + PA_SC_AA_SAMPLE_LOCS_4S = 0x00008b44, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ + S2_X_mask = 0x0f << 16, + S2_X_shift = 16, + S2_Y_mask = 0x0f << 20, + S2_Y_shift = 20, + S3_X_mask = 0x0f << 24, + S3_X_shift = 24, + S3_Y_mask = 0x0f << 28, + S3_Y_shift = 28, + PA_SC_AA_SAMPLE_LOCS_8S_WD0 = 0x00008b48, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_8S_WD1 = 0x00008b4c, + S4_X_mask = 0x0f << 0, + S4_X_shift = 0, + S4_Y_mask = 0x0f << 4, + S4_Y_shift = 4, + S5_X_mask = 0x0f << 8, + S5_X_shift = 8, + S5_Y_mask = 0x0f << 12, + S5_Y_shift = 12, + S6_X_mask = 0x0f << 16, + S6_X_shift = 16, + S6_Y_mask = 0x0f << 20, + S6_Y_shift = 20, + S7_X_mask = 0x0f << 24, + S7_X_shift = 24, + S7_Y_mask = 0x0f << 28, + S7_Y_shift = 28, + PA_SC_CNTL_STATUS = 0x00008be0, + MPASS_OVERFLOW_bit = 1 << 30, + PA_SC_ENHANCE = 0x00008bf0, + FORCE_EOV_MAX_CLK_CNT_mask = 0xfff << 0, + FORCE_EOV_MAX_CLK_CNT_shift = 0, + FORCE_EOV_MAX_TILE_CNT_mask = 0xfff << 12, + FORCE_EOV_MAX_TILE_CNT_shift = 12, + SQ_CONFIG = 0x00008c00, + VC_ENABLE_bit = 1 << 0, + EXPORT_SRC_C_bit = 1 << 1, + DX9_CONSTS_bit = 1 << 2, + ALU_INST_PREFER_VECTOR_bit = 1 << 3, + SQ_CONFIG__DX10_CLAMP_bit = 1 << 4, + ALU_PREFER_ONE_WATERFALL_bit = 1 << 5, + ALU_MAX_ONE_WATERFALL_bit = 1 << 6, + CLAUSE_SEQ_PRIO_mask = 0x03 << 8, + CLAUSE_SEQ_PRIO_shift = 8, + SQ_CL_PRIO_RND_ROBIN = 0x00, + SQ_CL_PRIO_MACRO_SEQ = 0x01, + SQ_CL_PRIO_NONE = 0x02, + PS_PRIO_mask = 0x03 << 24, + PS_PRIO_shift = 24, + VS_PRIO_mask = 0x03 << 26, + VS_PRIO_shift = 26, + GS_PRIO_mask = 0x03 << 28, + GS_PRIO_shift = 28, + ES_PRIO_mask = 0x03 << 30, + ES_PRIO_shift = 30, + SQ_GPR_RESOURCE_MGMT_1 = 0x00008c04, + NUM_PS_GPRS_mask = 0xff << 0, + NUM_PS_GPRS_shift = 0, + NUM_VS_GPRS_mask = 0xff << 16, + NUM_VS_GPRS_shift = 16, + NUM_CLAUSE_TEMP_GPRS_mask = 0x0f << 28, + NUM_CLAUSE_TEMP_GPRS_shift = 28, + SQ_GPR_RESOURCE_MGMT_2 = 0x00008c08, + NUM_GS_GPRS_mask = 0xff << 0, + NUM_GS_GPRS_shift = 0, + NUM_ES_GPRS_mask = 0xff << 16, + NUM_ES_GPRS_shift = 16, + SQ_THREAD_RESOURCE_MGMT = 0x00008c0c, + NUM_PS_THREADS_mask = 0xff << 0, + NUM_PS_THREADS_shift = 0, + NUM_VS_THREADS_mask = 0xff << 8, + NUM_VS_THREADS_shift = 8, + NUM_GS_THREADS_mask = 0xff << 16, + NUM_GS_THREADS_shift = 16, + NUM_ES_THREADS_mask = 0xff << 24, + NUM_ES_THREADS_shift = 24, + SQ_STACK_RESOURCE_MGMT_1 = 0x00008c10, + NUM_PS_STACK_ENTRIES_mask = 0xfff << 0, + NUM_PS_STACK_ENTRIES_shift = 0, + NUM_VS_STACK_ENTRIES_mask = 0xfff << 16, + NUM_VS_STACK_ENTRIES_shift = 16, + SQ_STACK_RESOURCE_MGMT_2 = 0x00008c14, + NUM_GS_STACK_ENTRIES_mask = 0xfff << 0, + NUM_GS_STACK_ENTRIES_shift = 0, + NUM_ES_STACK_ENTRIES_mask = 0xfff << 16, + NUM_ES_STACK_ENTRIES_shift = 16, + SQ_ESGS_RING_BASE = 0x00008c40, + SQ_ESGS_RING_SIZE = 0x00008c44, + SQ_GSVS_RING_BASE = 0x00008c48, + SQ_GSVS_RING_SIZE = 0x00008c4c, + SQ_ESTMP_RING_BASE = 0x00008c50, + SQ_ESTMP_RING_SIZE = 0x00008c54, + SQ_GSTMP_RING_BASE = 0x00008c58, + SQ_GSTMP_RING_SIZE = 0x00008c5c, + SQ_VSTMP_RING_BASE = 0x00008c60, + SQ_VSTMP_RING_SIZE = 0x00008c64, + SQ_PSTMP_RING_BASE = 0x00008c68, + SQ_PSTMP_RING_SIZE = 0x00008c6c, + SQ_FBUF_RING_BASE = 0x00008c70, + SQ_FBUF_RING_SIZE = 0x00008c74, + SQ_REDUC_RING_BASE = 0x00008c78, + SQ_REDUC_RING_SIZE = 0x00008c7c, + SQ_ALU_WORD1_OP3 = 0x00008dfc, + SRC2_SEL_mask = 0x1ff << 0, + SRC2_SEL_shift = 0, + SQ_ALU_SRC_0 = 0xf8, + SQ_ALU_SRC_1 = 0xf9, + SQ_ALU_SRC_1_INT = 0xfa, + SQ_ALU_SRC_M_1_INT = 0xfb, + SQ_ALU_SRC_0_5 = 0xfc, + SQ_ALU_SRC_LITERAL = 0xfd, + SQ_ALU_SRC_PV = 0xfe, + SQ_ALU_SRC_PS = 0xff, + SRC2_REL_bit = 1 << 9, + SRC2_CHAN_mask = 0x03 << 10, + SRC2_CHAN_shift = 10, + SQ_CHAN_X = 0x00, + SQ_CHAN_Y = 0x01, + SQ_CHAN_Z = 0x02, + SQ_CHAN_W = 0x03, + SRC2_NEG_bit = 1 << 12, + SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13, + SQ_ALU_WORD1_OP3__ALU_INST_shift = 13, + SQ_OP3_INST_MUL_LIT = 0x0c, + SQ_OP3_INST_MUL_LIT_M2 = 0x0d, + SQ_OP3_INST_MUL_LIT_M4 = 0x0e, + SQ_OP3_INST_MUL_LIT_D2 = 0x0f, + SQ_OP3_INST_MULADD = 0x10, + SQ_OP3_INST_MULADD_M2 = 0x11, + SQ_OP3_INST_MULADD_M4 = 0x12, + SQ_OP3_INST_MULADD_D2 = 0x13, + SQ_OP3_INST_MULADD_IEEE = 0x14, + SQ_OP3_INST_MULADD_IEEE_M2 = 0x15, + SQ_OP3_INST_MULADD_IEEE_M4 = 0x16, + SQ_OP3_INST_MULADD_IEEE_D2 = 0x17, + SQ_OP3_INST_CNDE = 0x18, + SQ_OP3_INST_CNDGT = 0x19, + SQ_OP3_INST_CNDGE = 0x1a, + SQ_OP3_INST_CNDE_INT = 0x1c, + SQ_OP3_INST_CNDGT_INT = 0x1d, + SQ_OP3_INST_CNDGE_INT = 0x1e, + SQ_TEX_WORD2 = 0x00008dfc, + OFFSET_X_mask = 0x1f << 0, + OFFSET_X_shift = 0, + OFFSET_Y_mask = 0x1f << 5, + OFFSET_Y_shift = 5, + OFFSET_Z_mask = 0x1f << 10, + OFFSET_Z_shift = 10, + SAMPLER_ID_mask = 0x1f << 15, + SAMPLER_ID_shift = 15, + SQ_TEX_WORD2__SRC_SEL_X_mask = 0x07 << 20, + SQ_TEX_WORD2__SRC_SEL_X_shift = 20, + SQ_SEL_X = 0x00, + SQ_SEL_Y = 0x01, + SQ_SEL_Z = 0x02, + SQ_SEL_W = 0x03, + SQ_SEL_0 = 0x04, + SQ_SEL_1 = 0x05, + SRC_SEL_Y_mask = 0x07 << 23, + SRC_SEL_Y_shift = 23, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SRC_SEL_Z_mask = 0x07 << 26, + SRC_SEL_Z_shift = 26, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SRC_SEL_W_mask = 0x07 << 29, + SRC_SEL_W_shift = 29, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc, + BURST_COUNT_mask = 0x0f << 17, + BURST_COUNT_shift = 17, + END_OF_PROGRAM_bit = 1 << 21, + VALID_PIXEL_MODE_bit = 1 << 22, + SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23, + SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 23, + SQ_CF_INST_MEM_STREAM0 = 0x20, + SQ_CF_INST_MEM_STREAM1 = 0x21, + SQ_CF_INST_MEM_STREAM2 = 0x22, + SQ_CF_INST_MEM_STREAM3 = 0x23, + SQ_CF_INST_MEM_SCRATCH = 0x24, + SQ_CF_INST_MEM_REDUCTION = 0x25, + SQ_CF_INST_MEM_RING = 0x26, + SQ_CF_INST_EXPORT = 0x27, + SQ_CF_INST_EXPORT_DONE = 0x28, + WHOLE_QUAD_MODE_bit = 1 << 30, + BARRIER_bit = 1 << 31, + SQ_CF_ALU_WORD1 = 0x00008dfc, + KCACHE_MODE1_mask = 0x03 << 0, + KCACHE_MODE1_shift = 0, + SQ_CF_KCACHE_NOP = 0x00, + SQ_CF_KCACHE_LOCK_1 = 0x01, + SQ_CF_KCACHE_LOCK_2 = 0x02, + SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, + KCACHE_ADDR0_mask = 0xff << 2, + KCACHE_ADDR0_shift = 2, + KCACHE_ADDR1_mask = 0xff << 10, + KCACHE_ADDR1_shift = 10, + SQ_CF_ALU_WORD1__COUNT_mask = 0x7f << 18, + SQ_CF_ALU_WORD1__COUNT_shift = 18, + SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, + SQ_CF_ALU_WORD1__CF_INST_mask = 0x0f << 26, + SQ_CF_ALU_WORD1__CF_INST_shift = 26, + SQ_CF_INST_ALU = 0x08, + SQ_CF_INST_ALU_PUSH_BEFORE = 0x09, + SQ_CF_INST_ALU_POP_AFTER = 0x0a, + SQ_CF_INST_ALU_POP2_AFTER = 0x0b, + SQ_CF_INST_ALU_CONTINUE = 0x0d, + SQ_CF_INST_ALU_BREAK = 0x0e, + SQ_CF_INST_ALU_ELSE_AFTER = 0x0f, +/* WHOLE_QUAD_MODE_bit = 1 << 30, */ +/* BARRIER_bit = 1 << 31, */ + SQ_TEX_WORD1 = 0x00008dfc, + SQ_TEX_WORD1__DST_GPR_mask = 0x7f << 0, + SQ_TEX_WORD1__DST_GPR_shift = 0, + SQ_TEX_WORD1__DST_REL_bit = 1 << 7, + SQ_TEX_WORD1__DST_SEL_X_mask = 0x07 << 9, + SQ_TEX_WORD1__DST_SEL_X_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_SEL_MASK = 0x07, + SQ_TEX_WORD1__DST_SEL_Y_mask = 0x07 << 12, + SQ_TEX_WORD1__DST_SEL_Y_shift = 12, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_TEX_WORD1__DST_SEL_Z_mask = 0x07 << 15, + SQ_TEX_WORD1__DST_SEL_Z_shift = 15, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_TEX_WORD1__DST_SEL_W_mask = 0x07 << 18, + SQ_TEX_WORD1__DST_SEL_W_shift = 18, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_TEX_WORD1__LOD_BIAS_mask = 0x7f << 21, + SQ_TEX_WORD1__LOD_BIAS_shift = 21, + COORD_TYPE_X_bit = 1 << 28, + COORD_TYPE_Y_bit = 1 << 29, + COORD_TYPE_Z_bit = 1 << 30, + COORD_TYPE_W_bit = 1 << 31, + SQ_VTX_WORD0 = 0x00008dfc, + VTX_INST_mask = 0x1f << 0, + VTX_INST_shift = 0, + SQ_VTX_INST_FETCH = 0x00, + SQ_VTX_INST_SEMANTIC = 0x01, + FETCH_TYPE_mask = 0x03 << 5, + FETCH_TYPE_shift = 5, + SQ_VTX_FETCH_VERTEX_DATA = 0x00, + SQ_VTX_FETCH_INSTANCE_DATA = 0x01, + SQ_VTX_FETCH_NO_INDEX_OFFSET = 0x02, + FETCH_WHOLE_QUAD_bit = 1 << 7, + BUFFER_ID_mask = 0xff << 8, + BUFFER_ID_shift = 8, + SRC_GPR_mask = 0x7f << 16, + SRC_GPR_shift = 16, + SRC_REL_bit = 1 << 23, + SQ_VTX_WORD0__SRC_SEL_X_mask = 0x03 << 24, + SQ_VTX_WORD0__SRC_SEL_X_shift = 24, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ + MEGA_FETCH_COUNT_mask = 0x3f << 26, + MEGA_FETCH_COUNT_shift = 26, + SQ_CF_ALLOC_EXPORT_WORD1_SWIZ = 0x00008dfc, + SEL_X_mask = 0x07 << 0, + SEL_X_shift = 0, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SEL_Y_mask = 0x07 << 3, + SEL_Y_shift = 3, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SEL_Z_mask = 0x07 << 6, + SEL_Z_shift = 6, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SEL_W_mask = 0x07 << 9, + SEL_W_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_ALU_WORD1 = 0x00008dfc, + ENCODING_mask = 0x07 << 15, + ENCODING_shift = 15, + BANK_SWIZZLE_mask = 0x07 << 18, + BANK_SWIZZLE_shift = 18, + SQ_ALU_VEC_012 = 0x00, + SQ_ALU_VEC_021 = 0x01, + SQ_ALU_VEC_120 = 0x02, + SQ_ALU_VEC_102 = 0x03, + SQ_ALU_VEC_201 = 0x04, + SQ_ALU_VEC_210 = 0x05, + SQ_ALU_WORD1__DST_GPR_mask = 0x7f << 21, + SQ_ALU_WORD1__DST_GPR_shift = 21, + SQ_ALU_WORD1__DST_REL_bit = 1 << 28, + DST_CHAN_mask = 0x03 << 29, + DST_CHAN_shift = 29, + CHAN_X = 0x00, + CHAN_Y = 0x01, + CHAN_Z = 0x02, + CHAN_W = 0x03, + SQ_ALU_WORD1__CLAMP_bit = 1 << 31, + SQ_CF_ALU_WORD0 = 0x00008dfc, + SQ_CF_ALU_WORD0__ADDR_mask = 0x3fffff << 0, + SQ_CF_ALU_WORD0__ADDR_shift = 0, + KCACHE_BANK0_mask = 0x0f << 22, + KCACHE_BANK0_shift = 22, + KCACHE_BANK1_mask = 0x0f << 26, + KCACHE_BANK1_shift = 26, + KCACHE_MODE0_mask = 0x03 << 30, + KCACHE_MODE0_shift = 30, +/* SQ_CF_KCACHE_NOP = 0x00, */ +/* SQ_CF_KCACHE_LOCK_1 = 0x01, */ +/* SQ_CF_KCACHE_LOCK_2 = 0x02, */ +/* SQ_CF_KCACHE_LOCK_LOOP_INDEX = 0x03, */ + SQ_VTX_WORD2 = 0x00008dfc, + SQ_VTX_WORD2__OFFSET_mask = 0xffff << 0, + SQ_VTX_WORD2__OFFSET_shift = 0, + SQ_VTX_WORD2__ENDIAN_SWAP_mask = 0x03 << 16, + SQ_VTX_WORD2__ENDIAN_SWAP_shift = 16, + SQ_ENDIAN_NONE = 0x00, + SQ_ENDIAN_8IN16 = 0x01, + SQ_ENDIAN_8IN32 = 0x02, + CONST_BUF_NO_STRIDE_bit = 1 << 18, + MEGA_FETCH_bit = 1 << 19, + SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20, + SQ_ALU_WORD1_OP2_V2 = 0x00008dfc, + SRC0_ABS_bit = 1 << 0, + SRC1_ABS_bit = 1 << 1, + UPDATE_EXECUTE_MASK_bit = 1 << 2, + UPDATE_PRED_bit = 1 << 3, + WRITE_MASK_bit = 1 << 4, + SQ_ALU_WORD1_OP2_V2__OMOD_mask = 0x03 << 5, + SQ_ALU_WORD1_OP2_V2__OMOD_shift = 5, + SQ_ALU_OMOD_OFF = 0x00, + SQ_ALU_OMOD_M2 = 0x01, + SQ_ALU_OMOD_M4 = 0x02, + SQ_ALU_OMOD_D2 = 0x03, + SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7, + SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7, + SQ_OP2_INST_ADD = 0x00, + SQ_OP2_INST_MUL = 0x01, + SQ_OP2_INST_MUL_IEEE = 0x02, + SQ_OP2_INST_MAX = 0x03, + SQ_OP2_INST_MIN = 0x04, + SQ_OP2_INST_MAX_DX10 = 0x05, + SQ_OP2_INST_MIN_DX10 = 0x06, + SQ_OP2_INST_SETE = 0x08, + SQ_OP2_INST_SETGT = 0x09, + SQ_OP2_INST_SETGE = 0x0a, + SQ_OP2_INST_SETNE = 0x0b, + SQ_OP2_INST_SETE_DX10 = 0x0c, + SQ_OP2_INST_SETGT_DX10 = 0x0d, + SQ_OP2_INST_SETGE_DX10 = 0x0e, + SQ_OP2_INST_SETNE_DX10 = 0x0f, + SQ_OP2_INST_FRACT = 0x10, + SQ_OP2_INST_TRUNC = 0x11, + SQ_OP2_INST_CEIL = 0x12, + SQ_OP2_INST_RNDNE = 0x13, + SQ_OP2_INST_FLOOR = 0x14, + SQ_OP2_INST_MOVA = 0x15, + SQ_OP2_INST_MOVA_FLOOR = 0x16, + SQ_OP2_INST_MOVA_INT = 0x18, + SQ_OP2_INST_MOV = 0x19, + SQ_OP2_INST_NOP = 0x1a, + SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, + SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, + SQ_OP2_INST_PRED_SETE = 0x20, + SQ_OP2_INST_PRED_SETGT = 0x21, + SQ_OP2_INST_PRED_SETGE = 0x22, + SQ_OP2_INST_PRED_SETNE = 0x23, + SQ_OP2_INST_PRED_SET_INV = 0x24, + SQ_OP2_INST_PRED_SET_POP = 0x25, + SQ_OP2_INST_PRED_SET_CLR = 0x26, + SQ_OP2_INST_PRED_SET_RESTORE = 0x27, + SQ_OP2_INST_PRED_SETE_PUSH = 0x28, + SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, + SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, + SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, + SQ_OP2_INST_KILLE = 0x2c, + SQ_OP2_INST_KILLGT = 0x2d, + SQ_OP2_INST_KILLGE = 0x2e, + SQ_OP2_INST_KILLNE = 0x2f, + SQ_OP2_INST_AND_INT = 0x30, + SQ_OP2_INST_OR_INT = 0x31, + SQ_OP2_INST_XOR_INT = 0x32, + SQ_OP2_INST_NOT_INT = 0x33, + SQ_OP2_INST_ADD_INT = 0x34, + SQ_OP2_INST_SUB_INT = 0x35, + SQ_OP2_INST_MAX_INT = 0x36, + SQ_OP2_INST_MIN_INT = 0x37, + SQ_OP2_INST_MAX_UINT = 0x38, + SQ_OP2_INST_MIN_UINT = 0x39, + SQ_OP2_INST_SETE_INT = 0x3a, + SQ_OP2_INST_SETGT_INT = 0x3b, + SQ_OP2_INST_SETGE_INT = 0x3c, + SQ_OP2_INST_SETNE_INT = 0x3d, + SQ_OP2_INST_SETGT_UINT = 0x3e, + SQ_OP2_INST_SETGE_UINT = 0x3f, + SQ_OP2_INST_KILLGT_UINT = 0x40, + SQ_OP2_INST_KILLGE_UINT = 0x41, + SQ_OP2_INST_PRED_SETE_INT = 0x42, + SQ_OP2_INST_PRED_SETGT_INT = 0x43, + SQ_OP2_INST_PRED_SETGE_INT = 0x44, + SQ_OP2_INST_PRED_SETNE_INT = 0x45, + SQ_OP2_INST_KILLE_INT = 0x46, + SQ_OP2_INST_KILLGT_INT = 0x47, + SQ_OP2_INST_KILLGE_INT = 0x48, + SQ_OP2_INST_KILLNE_INT = 0x49, + SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, + SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, + SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, + SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, + SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, + SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, + SQ_OP2_INST_DOT4 = 0x50, + SQ_OP2_INST_DOT4_IEEE = 0x51, + SQ_OP2_INST_CUBE = 0x52, + SQ_OP2_INST_MAX4 = 0x53, + SQ_OP2_INST_MOVA_GPR_INT = 0x60, + SQ_OP2_INST_EXP_IEEE = 0x61, + SQ_OP2_INST_LOG_CLAMPED = 0x62, + SQ_OP2_INST_LOG_IEEE = 0x63, + SQ_OP2_INST_RECIP_CLAMPED = 0x64, + SQ_OP2_INST_RECIP_FF = 0x65, + SQ_OP2_INST_RECIP_IEEE = 0x66, + SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, + SQ_OP2_INST_RECIPSQRT_FF = 0x68, + SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, + SQ_OP2_INST_SQRT_IEEE = 0x6a, + SQ_OP2_INST_FLT_TO_INT = 0x6b, + SQ_OP2_INST_INT_TO_FLT = 0x6c, + SQ_OP2_INST_UINT_TO_FLT = 0x6d, + SQ_OP2_INST_SIN = 0x6e, + SQ_OP2_INST_COS = 0x6f, + SQ_OP2_INST_ASHR_INT = 0x70, + SQ_OP2_INST_LSHR_INT = 0x71, + SQ_OP2_INST_LSHL_INT = 0x72, + SQ_OP2_INST_MULLO_INT = 0x73, + SQ_OP2_INST_MULHI_INT = 0x74, + SQ_OP2_INST_MULLO_UINT = 0x75, + SQ_OP2_INST_MULHI_UINT = 0x76, + SQ_OP2_INST_RECIP_INT = 0x77, + SQ_OP2_INST_RECIP_UINT = 0x78, + SQ_OP2_INST_FLT_TO_UINT = 0x79, + SQ_CF_ALLOC_EXPORT_WORD1_BUF = 0x00008dfc, + ARRAY_SIZE_mask = 0xfff << 0, + ARRAY_SIZE_shift = 0, + COMP_MASK_mask = 0x0f << 12, + COMP_MASK_shift = 12, + SQ_CF_WORD0 = 0x00008dfc, + SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc, + ARRAY_BASE_mask = 0x1fff << 0, + ARRAY_BASE_shift = 0, + SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x03 << 13, + SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13, + SQ_EXPORT_PIXEL = 0x00, + SQ_EXPORT_POS = 0x01, + SQ_EXPORT_PARAM = 0x02, + X_UNUSED_FOR_SX_EXPORTS = 0x03, + RW_GPR_mask = 0x7f << 15, + RW_GPR_shift = 15, + RW_REL_bit = 1 << 22, + INDEX_GPR_mask = 0x7f << 23, + INDEX_GPR_shift = 23, + ELEM_SIZE_mask = 0x03 << 30, + ELEM_SIZE_shift = 30, + SQ_VTX_WORD1 = 0x00008dfc, + SQ_VTX_WORD1__DST_SEL_X_mask = 0x07 << 9, + SQ_VTX_WORD1__DST_SEL_X_shift = 9, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_VTX_WORD1__DST_SEL_Y_mask = 0x07 << 12, + SQ_VTX_WORD1__DST_SEL_Y_shift = 12, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_VTX_WORD1__DST_SEL_Z_mask = 0x07 << 15, + SQ_VTX_WORD1__DST_SEL_Z_shift = 15, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + SQ_VTX_WORD1__DST_SEL_W_mask = 0x07 << 18, + SQ_VTX_WORD1__DST_SEL_W_shift = 18, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ +/* SQ_SEL_MASK = 0x07, */ + USE_CONST_FIELDS_bit = 1 << 21, + SQ_VTX_WORD1__DATA_FORMAT_mask = 0x3f << 22, + SQ_VTX_WORD1__DATA_FORMAT_shift = 22, + SQ_VTX_WORD1__NUM_FORMAT_ALL_mask = 0x03 << 28, + SQ_VTX_WORD1__NUM_FORMAT_ALL_shift = 28, + SQ_NUM_FORMAT_NORM = 0x00, + SQ_NUM_FORMAT_INT = 0x01, + SQ_NUM_FORMAT_SCALED = 0x02, + SQ_VTX_WORD1__FORMAT_COMP_ALL_bit = 1 << 30, + SQ_VTX_WORD1__SRF_MODE_ALL_bit = 1 << 31, + SQ_ALU_WORD1_OP2 = 0x00008dfc, +/* SRC0_ABS_bit = 1 << 0, */ +/* SRC1_ABS_bit = 1 << 1, */ +/* UPDATE_EXECUTE_MASK_bit = 1 << 2, */ +/* UPDATE_PRED_bit = 1 << 3, */ +/* WRITE_MASK_bit = 1 << 4, */ + FOG_MERGE_bit = 1 << 5, + SQ_ALU_WORD1_OP2__OMOD_mask = 0x03 << 6, + SQ_ALU_WORD1_OP2__OMOD_shift = 6, +/* SQ_ALU_OMOD_OFF = 0x00, */ +/* SQ_ALU_OMOD_M2 = 0x01, */ +/* SQ_ALU_OMOD_M4 = 0x02, */ +/* SQ_ALU_OMOD_D2 = 0x03, */ + SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8, + SQ_ALU_WORD1_OP2__ALU_INST_shift = 8, +/* SQ_OP2_INST_ADD = 0x00, */ +/* SQ_OP2_INST_MUL = 0x01, */ +/* SQ_OP2_INST_MUL_IEEE = 0x02, */ +/* SQ_OP2_INST_MAX = 0x03, */ +/* SQ_OP2_INST_MIN = 0x04, */ +/* SQ_OP2_INST_MAX_DX10 = 0x05, */ +/* SQ_OP2_INST_MIN_DX10 = 0x06, */ +/* SQ_OP2_INST_SETE = 0x08, */ +/* SQ_OP2_INST_SETGT = 0x09, */ +/* SQ_OP2_INST_SETGE = 0x0a, */ +/* SQ_OP2_INST_SETNE = 0x0b, */ +/* SQ_OP2_INST_SETE_DX10 = 0x0c, */ +/* SQ_OP2_INST_SETGT_DX10 = 0x0d, */ +/* SQ_OP2_INST_SETGE_DX10 = 0x0e, */ +/* SQ_OP2_INST_SETNE_DX10 = 0x0f, */ +/* SQ_OP2_INST_FRACT = 0x10, */ +/* SQ_OP2_INST_TRUNC = 0x11, */ +/* SQ_OP2_INST_CEIL = 0x12, */ +/* SQ_OP2_INST_RNDNE = 0x13, */ +/* SQ_OP2_INST_FLOOR = 0x14, */ +/* SQ_OP2_INST_MOVA = 0x15, */ +/* SQ_OP2_INST_MOVA_FLOOR = 0x16, */ +/* SQ_OP2_INST_MOVA_INT = 0x18, */ +/* SQ_OP2_INST_MOV = 0x19, */ +/* SQ_OP2_INST_NOP = 0x1a, */ +/* SQ_OP2_INST_PRED_SETGT_UINT = 0x1e, */ +/* SQ_OP2_INST_PRED_SETGE_UINT = 0x1f, */ +/* SQ_OP2_INST_PRED_SETE = 0x20, */ +/* SQ_OP2_INST_PRED_SETGT = 0x21, */ +/* SQ_OP2_INST_PRED_SETGE = 0x22, */ +/* SQ_OP2_INST_PRED_SETNE = 0x23, */ +/* SQ_OP2_INST_PRED_SET_INV = 0x24, */ +/* SQ_OP2_INST_PRED_SET_POP = 0x25, */ +/* SQ_OP2_INST_PRED_SET_CLR = 0x26, */ +/* SQ_OP2_INST_PRED_SET_RESTORE = 0x27, */ +/* SQ_OP2_INST_PRED_SETE_PUSH = 0x28, */ +/* SQ_OP2_INST_PRED_SETGT_PUSH = 0x29, */ +/* SQ_OP2_INST_PRED_SETGE_PUSH = 0x2a, */ +/* SQ_OP2_INST_PRED_SETNE_PUSH = 0x2b, */ +/* SQ_OP2_INST_KILLE = 0x2c, */ +/* SQ_OP2_INST_KILLGT = 0x2d, */ +/* SQ_OP2_INST_KILLGE = 0x2e, */ +/* SQ_OP2_INST_KILLNE = 0x2f, */ +/* SQ_OP2_INST_AND_INT = 0x30, */ +/* SQ_OP2_INST_OR_INT = 0x31, */ +/* SQ_OP2_INST_XOR_INT = 0x32, */ +/* SQ_OP2_INST_NOT_INT = 0x33, */ +/* SQ_OP2_INST_ADD_INT = 0x34, */ +/* SQ_OP2_INST_SUB_INT = 0x35, */ +/* SQ_OP2_INST_MAX_INT = 0x36, */ +/* SQ_OP2_INST_MIN_INT = 0x37, */ +/* SQ_OP2_INST_MAX_UINT = 0x38, */ +/* SQ_OP2_INST_MIN_UINT = 0x39, */ +/* SQ_OP2_INST_SETE_INT = 0x3a, */ +/* SQ_OP2_INST_SETGT_INT = 0x3b, */ +/* SQ_OP2_INST_SETGE_INT = 0x3c, */ +/* SQ_OP2_INST_SETNE_INT = 0x3d, */ +/* SQ_OP2_INST_SETGT_UINT = 0x3e, */ +/* SQ_OP2_INST_SETGE_UINT = 0x3f, */ +/* SQ_OP2_INST_KILLGT_UINT = 0x40, */ +/* SQ_OP2_INST_KILLGE_UINT = 0x41, */ +/* SQ_OP2_INST_PRED_SETE_INT = 0x42, */ +/* SQ_OP2_INST_PRED_SETGT_INT = 0x43, */ +/* SQ_OP2_INST_PRED_SETGE_INT = 0x44, */ +/* SQ_OP2_INST_PRED_SETNE_INT = 0x45, */ +/* SQ_OP2_INST_KILLE_INT = 0x46, */ +/* SQ_OP2_INST_KILLGT_INT = 0x47, */ +/* SQ_OP2_INST_KILLGE_INT = 0x48, */ +/* SQ_OP2_INST_KILLNE_INT = 0x49, */ +/* SQ_OP2_INST_PRED_SETE_PUSH_INT = 0x4a, */ +/* SQ_OP2_INST_PRED_SETGT_PUSH_INT = 0x4b, */ +/* SQ_OP2_INST_PRED_SETGE_PUSH_INT = 0x4c, */ +/* SQ_OP2_INST_PRED_SETNE_PUSH_INT = 0x4d, */ +/* SQ_OP2_INST_PRED_SETLT_PUSH_INT = 0x4e, */ +/* SQ_OP2_INST_PRED_SETLE_PUSH_INT = 0x4f, */ +/* SQ_OP2_INST_DOT4 = 0x50, */ +/* SQ_OP2_INST_DOT4_IEEE = 0x51, */ +/* SQ_OP2_INST_CUBE = 0x52, */ +/* SQ_OP2_INST_MAX4 = 0x53, */ +/* SQ_OP2_INST_MOVA_GPR_INT = 0x60, */ +/* SQ_OP2_INST_EXP_IEEE = 0x61, */ +/* SQ_OP2_INST_LOG_CLAMPED = 0x62, */ +/* SQ_OP2_INST_LOG_IEEE = 0x63, */ +/* SQ_OP2_INST_RECIP_CLAMPED = 0x64, */ +/* SQ_OP2_INST_RECIP_FF = 0x65, */ +/* SQ_OP2_INST_RECIP_IEEE = 0x66, */ +/* SQ_OP2_INST_RECIPSQRT_CLAMPED = 0x67, */ +/* SQ_OP2_INST_RECIPSQRT_FF = 0x68, */ +/* SQ_OP2_INST_RECIPSQRT_IEEE = 0x69, */ +/* SQ_OP2_INST_SQRT_IEEE = 0x6a, */ +/* SQ_OP2_INST_FLT_TO_INT = 0x6b, */ +/* SQ_OP2_INST_INT_TO_FLT = 0x6c, */ +/* SQ_OP2_INST_UINT_TO_FLT = 0x6d, */ +/* SQ_OP2_INST_SIN = 0x6e, */ +/* SQ_OP2_INST_COS = 0x6f, */ +/* SQ_OP2_INST_ASHR_INT = 0x70, */ +/* SQ_OP2_INST_LSHR_INT = 0x71, */ +/* SQ_OP2_INST_LSHL_INT = 0x72, */ +/* SQ_OP2_INST_MULLO_INT = 0x73, */ +/* SQ_OP2_INST_MULHI_INT = 0x74, */ +/* SQ_OP2_INST_MULLO_UINT = 0x75, */ +/* SQ_OP2_INST_MULHI_UINT = 0x76, */ +/* SQ_OP2_INST_RECIP_INT = 0x77, */ +/* SQ_OP2_INST_RECIP_UINT = 0x78, */ +/* SQ_OP2_INST_FLT_TO_UINT = 0x79, */ + SQ_CF_WORD1 = 0x00008dfc, + POP_COUNT_mask = 0x07 << 0, + POP_COUNT_shift = 0, + CF_CONST_mask = 0x1f << 3, + CF_CONST_shift = 3, + COND_mask = 0x03 << 8, + COND_shift = 8, + SQ_CF_COND_ACTIVE = 0x00, + SQ_CF_COND_FALSE = 0x01, + SQ_CF_COND_BOOL = 0x02, + SQ_CF_COND_NOT_BOOL = 0x03, + SQ_CF_WORD1__COUNT_mask = 0x07 << 10, + SQ_CF_WORD1__COUNT_shift = 10, + CALL_COUNT_mask = 0x3f << 13, + CALL_COUNT_shift = 13, + COUNT_3_bit = 1 << 19, +/* END_OF_PROGRAM_bit = 1 << 21, */ +/* VALID_PIXEL_MODE_bit = 1 << 22, */ + SQ_CF_WORD1__CF_INST_mask = 0x7f << 23, + SQ_CF_WORD1__CF_INST_shift = 23, + SQ_CF_INST_NOP = 0x00, + SQ_CF_INST_TEX = 0x01, + SQ_CF_INST_VTX = 0x02, + SQ_CF_INST_VTX_TC = 0x03, + SQ_CF_INST_LOOP_START = 0x04, + SQ_CF_INST_LOOP_END = 0x05, + SQ_CF_INST_LOOP_START_DX10 = 0x06, + SQ_CF_INST_LOOP_START_NO_AL = 0x07, + SQ_CF_INST_LOOP_CONTINUE = 0x08, + SQ_CF_INST_LOOP_BREAK = 0x09, + SQ_CF_INST_JUMP = 0x0a, + SQ_CF_INST_PUSH = 0x0b, + SQ_CF_INST_PUSH_ELSE = 0x0c, + SQ_CF_INST_ELSE = 0x0d, + SQ_CF_INST_POP = 0x0e, + SQ_CF_INST_POP_JUMP = 0x0f, + SQ_CF_INST_POP_PUSH = 0x10, + SQ_CF_INST_POP_PUSH_ELSE = 0x11, + SQ_CF_INST_CALL = 0x12, + SQ_CF_INST_CALL_FS = 0x13, + SQ_CF_INST_RETURN = 0x14, + SQ_CF_INST_EMIT_VERTEX = 0x15, + SQ_CF_INST_EMIT_CUT_VERTEX = 0x16, + SQ_CF_INST_CUT_VERTEX = 0x17, + SQ_CF_INST_KILL = 0x18, +/* WHOLE_QUAD_MODE_bit = 1 << 30, */ +/* BARRIER_bit = 1 << 31, */ + SQ_VTX_WORD1_SEM = 0x00008dfc, + SEMANTIC_ID_mask = 0xff << 0, + SEMANTIC_ID_shift = 0, + SQ_TEX_WORD0 = 0x00008dfc, + TEX_INST_mask = 0x1f << 0, + TEX_INST_shift = 0, + SQ_TEX_INST_VTX_FETCH = 0x00, + SQ_TEX_INST_VTX_SEMANTIC = 0x01, + SQ_TEX_INST_LD = 0x03, + SQ_TEX_INST_GET_TEXTURE_RESINFO = 0x04, + SQ_TEX_INST_GET_NUMBER_OF_SAMPLES = 0x05, + SQ_TEX_INST_GET_LOD = 0x06, + SQ_TEX_INST_GET_GRADIENTS_H = 0x07, + SQ_TEX_INST_GET_GRADIENTS_V = 0x08, + SQ_TEX_INST_GET_LERP = 0x09, + SQ_TEX_INST_RESERVED_10 = 0x0a, + SQ_TEX_INST_SET_GRADIENTS_H = 0x0b, + SQ_TEX_INST_SET_GRADIENTS_V = 0x0c, + SQ_TEX_INST_PASS = 0x0d, + X_Z_SET_INDEX_FOR_ARRAY_OF_CUBEMAPS = 0x0e, + SQ_TEX_INST_SAMPLE = 0x10, + SQ_TEX_INST_SAMPLE_L = 0x11, + SQ_TEX_INST_SAMPLE_LB = 0x12, + SQ_TEX_INST_SAMPLE_LZ = 0x13, + SQ_TEX_INST_SAMPLE_G = 0x14, + SQ_TEX_INST_SAMPLE_G_L = 0x15, + SQ_TEX_INST_SAMPLE_G_LB = 0x16, + SQ_TEX_INST_SAMPLE_G_LZ = 0x17, + SQ_TEX_INST_SAMPLE_C = 0x18, + SQ_TEX_INST_SAMPLE_C_L = 0x19, + SQ_TEX_INST_SAMPLE_C_LB = 0x1a, + SQ_TEX_INST_SAMPLE_C_LZ = 0x1b, + SQ_TEX_INST_SAMPLE_C_G = 0x1c, + SQ_TEX_INST_SAMPLE_C_G_L = 0x1d, + SQ_TEX_INST_SAMPLE_C_G_LB = 0x1e, + SQ_TEX_INST_SAMPLE_C_G_LZ = 0x1f, + BC_FRAC_MODE_bit = 1 << 5, +/* FETCH_WHOLE_QUAD_bit = 1 << 7, */ + RESOURCE_ID_mask = 0xff << 8, + RESOURCE_ID_shift = 8, +/* SRC_GPR_mask = 0x7f << 16, */ +/* SRC_GPR_shift = 16, */ +/* SRC_REL_bit = 1 << 23, */ + SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24, + SQ_VTX_WORD1_GPR = 0x00008dfc, + SQ_VTX_WORD1_GPR__DST_GPR_mask = 0x7f << 0, + SQ_VTX_WORD1_GPR__DST_GPR_shift = 0, + SQ_VTX_WORD1_GPR__DST_REL_bit = 1 << 7, + SQ_ALU_WORD0 = 0x00008dfc, + SRC0_SEL_mask = 0x1ff << 0, + SRC0_SEL_shift = 0, +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ + SRC0_REL_bit = 1 << 9, + SRC0_CHAN_mask = 0x03 << 10, + SRC0_CHAN_shift = 10, +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + SRC0_NEG_bit = 1 << 12, + SRC1_SEL_mask = 0x1ff << 13, + SRC1_SEL_shift = 13, +/* SQ_ALU_SRC_0 = 0xf8, */ +/* SQ_ALU_SRC_1 = 0xf9, */ +/* SQ_ALU_SRC_1_INT = 0xfa, */ +/* SQ_ALU_SRC_M_1_INT = 0xfb, */ +/* SQ_ALU_SRC_0_5 = 0xfc, */ +/* SQ_ALU_SRC_LITERAL = 0xfd, */ +/* SQ_ALU_SRC_PV = 0xfe, */ +/* SQ_ALU_SRC_PS = 0xff, */ + SRC1_REL_bit = 1 << 22, + SRC1_CHAN_mask = 0x03 << 23, + SRC1_CHAN_shift = 23, +/* SQ_CHAN_X = 0x00, */ +/* SQ_CHAN_Y = 0x01, */ +/* SQ_CHAN_Z = 0x02, */ +/* SQ_CHAN_W = 0x03, */ + SRC1_NEG_bit = 1 << 25, + INDEX_MODE_mask = 0x07 << 26, + INDEX_MODE_shift = 26, + SQ_INDEX_AR_X = 0x00, + SQ_INDEX_AR_Y = 0x01, + SQ_INDEX_AR_Z = 0x02, + SQ_INDEX_AR_W = 0x03, + SQ_INDEX_LOOP = 0x04, + PRED_SEL_mask = 0x03 << 29, + PRED_SEL_shift = 29, + SQ_PRED_SEL_OFF = 0x00, + SQ_PRED_SEL_ZERO = 0x02, + SQ_PRED_SEL_ONE = 0x03, + LAST_bit = 1 << 31, + SX_EXPORT_BUFFER_SIZES = 0x0000900c, + COLOR_BUFFER_SIZE_mask = 0xff << 0, + COLOR_BUFFER_SIZE_shift = 0, + POSITION_BUFFER_SIZE_mask = 0xff << 8, + POSITION_BUFFER_SIZE_shift = 8, + SMX_BUFFER_SIZE_mask = 0xff << 16, + SMX_BUFFER_SIZE_shift = 16, + SX_MEMORY_EXPORT_BASE = 0x00009010, + SX_MEMORY_EXPORT_SIZE = 0x00009014, + SPI_CONFIG_CNTL = 0x00009100, + GPR_WRITE_PRIORITY_mask = 0x1f << 0, + GPR_WRITE_PRIORITY_shift = 0, + X_PRIORITY_ORDER = 0x00, + X_PRIORITY_ORDER_VS = 0x01, + DISABLE_INTERP_1_bit = 1 << 5, + DEBUG_THREAD_TYPE_SEL_mask = 0x03 << 6, + DEBUG_THREAD_TYPE_SEL_shift = 6, + DEBUG_GROUP_SEL_mask = 0x1f << 8, + DEBUG_GROUP_SEL_shift = 8, + DEBUG_GRBM_OVERRIDE_bit = 1 << 13, + SPI_CONFIG_CNTL_1 = 0x0000913c, + VTX_DONE_DELAY_mask = 0x0f << 0, + VTX_DONE_DELAY_shift = 0, + X_DELAY_10_CLKS = 0x00, + X_DELAY_11_CLKS = 0x01, + X_DELAY_12_CLKS = 0x02, + X_DELAY_13_CLKS = 0x03, + X_DELAY_14_CLKS = 0x04, + X_DELAY_15_CLKS = 0x05, + X_DELAY_16_CLKS = 0x06, + X_DELAY_17_CLKS = 0x07, + X_DELAY_2_CLKS = 0x08, + X_DELAY_3_CLKS = 0x09, + X_DELAY_4_CLKS = 0x0a, + X_DELAY_5_CLKS = 0x0b, + X_DELAY_6_CLKS = 0x0c, + X_DELAY_7_CLKS = 0x0d, + X_DELAY_8_CLKS = 0x0e, + X_DELAY_9_CLKS = 0x0f, + INTERP_ONE_PRIM_PER_ROW_bit = 1 << 4, + TD_FILTER4 = 0x00009400, + WEIGHT_1_mask = 0x7ff << 0, + WEIGHT_1_shift = 0, + WEIGHT_0_mask = 0x7ff << 11, + WEIGHT_0_shift = 11, + WEIGHT_PAIR_bit = 1 << 22, + PHASE_mask = 0x0f << 23, + PHASE_shift = 23, + DIRECTION_bit = 1 << 27, + TD_FILTER4_1 = 0x00009404, + TD_FILTER4_1_num = 35, +/* WEIGHT_1_mask = 0x7ff << 0, */ +/* WEIGHT_1_shift = 0, */ +/* WEIGHT_0_mask = 0x7ff << 11, */ +/* WEIGHT_0_shift = 11, */ + TD_CNTL = 0x00009490, + SYNC_PHASE_SH_mask = 0x03 << 0, + SYNC_PHASE_SH_shift = 0, + SYNC_PHASE_VC_SMX_mask = 0x03 << 4, + SYNC_PHASE_VC_SMX_shift = 4, + TD0_CNTL = 0x00009494, + TD0_CNTL_num = 4, + ID_OVERRIDE_mask = 0x03 << 28, + ID_OVERRIDE_shift = 28, + TD0_STATUS = 0x000094a4, + TD0_STATUS_num = 4, + BUSY_bit = 1 << 31, + TA_CNTL = 0x00009504, + GRADIENT_CREDIT_mask = 0x1f << 0, + GRADIENT_CREDIT_shift = 0, + WALKER_CREDIT_mask = 0x1f << 8, + WALKER_CREDIT_shift = 8, + ALIGNER_CREDIT_mask = 0x1f << 16, + ALIGNER_CREDIT_shift = 16, + TD_FIFO_CREDIT_mask = 0x3ff << 22, + TD_FIFO_CREDIT_shift = 22, + TA_CNTL_AUX = 0x00009508, + DISABLE_CUBE_WRAP_bit = 1 << 0, + SYNC_GRADIENT_bit = 1 << 24, + SYNC_WALKER_bit = 1 << 25, + SYNC_ALIGNER_bit = 1 << 26, + BILINEAR_PRECISION_bit = 1 << 31, + TA0_CNTL = 0x00009510, +/* ID_OVERRIDE_mask = 0x03 << 28, */ +/* ID_OVERRIDE_shift = 28, */ + TA1_CNTL = 0x00009514, +/* ID_OVERRIDE_mask = 0x03 << 28, */ +/* ID_OVERRIDE_shift = 28, */ + TA2_CNTL = 0x00009518, +/* ID_OVERRIDE_mask = 0x03 << 28, */ +/* ID_OVERRIDE_shift = 28, */ + TA3_CNTL = 0x0000951c, +/* ID_OVERRIDE_mask = 0x03 << 28, */ +/* ID_OVERRIDE_shift = 28, */ + TA0_STATUS = 0x00009520, + FG_PFIFO_EMPTYB_bit = 1 << 12, + FG_LFIFO_EMPTYB_bit = 1 << 13, + FG_SFIFO_EMPTYB_bit = 1 << 14, + FL_PFIFO_EMPTYB_bit = 1 << 16, + FL_LFIFO_EMPTYB_bit = 1 << 17, + FL_SFIFO_EMPTYB_bit = 1 << 18, + FA_PFIFO_EMPTYB_bit = 1 << 20, + FA_LFIFO_EMPTYB_bit = 1 << 21, + FA_SFIFO_EMPTYB_bit = 1 << 22, + IN_BUSY_bit = 1 << 24, + FG_BUSY_bit = 1 << 25, + FL_BUSY_bit = 1 << 27, + TA_BUSY_bit = 1 << 28, + FA_BUSY_bit = 1 << 29, + AL_BUSY_bit = 1 << 30, +/* BUSY_bit = 1 << 31, */ + TA1_STATUS = 0x00009524, +/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ +/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ +/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ +/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ +/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ +/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ +/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ +/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ +/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ +/* IN_BUSY_bit = 1 << 24, */ +/* FG_BUSY_bit = 1 << 25, */ +/* FL_BUSY_bit = 1 << 27, */ +/* TA_BUSY_bit = 1 << 28, */ +/* FA_BUSY_bit = 1 << 29, */ +/* AL_BUSY_bit = 1 << 30, */ +/* BUSY_bit = 1 << 31, */ + TA2_STATUS = 0x00009528, +/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ +/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ +/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ +/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ +/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ +/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ +/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ +/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ +/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ +/* IN_BUSY_bit = 1 << 24, */ +/* FG_BUSY_bit = 1 << 25, */ +/* FL_BUSY_bit = 1 << 27, */ +/* TA_BUSY_bit = 1 << 28, */ +/* FA_BUSY_bit = 1 << 29, */ +/* AL_BUSY_bit = 1 << 30, */ +/* BUSY_bit = 1 << 31, */ + TA3_STATUS = 0x0000952c, +/* FG_PFIFO_EMPTYB_bit = 1 << 12, */ +/* FG_LFIFO_EMPTYB_bit = 1 << 13, */ +/* FG_SFIFO_EMPTYB_bit = 1 << 14, */ +/* FL_PFIFO_EMPTYB_bit = 1 << 16, */ +/* FL_LFIFO_EMPTYB_bit = 1 << 17, */ +/* FL_SFIFO_EMPTYB_bit = 1 << 18, */ +/* FA_PFIFO_EMPTYB_bit = 1 << 20, */ +/* FA_LFIFO_EMPTYB_bit = 1 << 21, */ +/* FA_SFIFO_EMPTYB_bit = 1 << 22, */ +/* IN_BUSY_bit = 1 << 24, */ +/* FG_BUSY_bit = 1 << 25, */ +/* FL_BUSY_bit = 1 << 27, */ +/* TA_BUSY_bit = 1 << 28, */ +/* FA_BUSY_bit = 1 << 29, */ +/* AL_BUSY_bit = 1 << 30, */ +/* BUSY_bit = 1 << 31, */ + TC_STATUS = 0x00009600, + TC_BUSY_bit = 1 << 0, + TC_INVALIDATE = 0x00009604, + START_bit = 1 << 0, + TC_CNTL = 0x00009608, + FORCE_HIT_bit = 1 << 0, + FORCE_MISS_bit = 1 << 1, + L2_SIZE_mask = 0x0f << 5, + L2_SIZE_shift = 5, + _256K = 0x00, + _224K = 0x01, + _192K = 0x02, + _160K = 0x03, + _128K = 0x04, + _96K = 0x05, + _64K = 0x06, + _32K = 0x07, + L2_DISABLE_LATE_HIT_bit = 1 << 9, + DISABLE_VERT_PERF_bit = 1 << 10, + DISABLE_INVAL_BUSY_bit = 1 << 11, + DISABLE_INVAL_SAME_SURFACE_bit = 1 << 12, + PARTITION_MODE_mask = 0x03 << 13, + PARTITION_MODE_shift = 13, + X_VERTEX = 0x00, + MISS_ARB_MODE_bit = 1 << 15, + HIT_ARB_MODE_bit = 1 << 16, + DISABLE_WRITE_DELAY_bit = 1 << 17, + HIT_FIFO_DEPTH_bit = 1 << 18, + VC_CNTL = 0x00009700, + L2_INVALIDATE_bit = 1 << 0, + RESERVED_bit = 1 << 1, + CC_FORCE_MISS_bit = 1 << 2, + MI_CHAN_SEL_mask = 0x03 << 3, + MI_CHAN_SEL_shift = 3, + X_MC0_USES_CH_0_1 = 0x00, + X_MC0_USES_CH_0_3 = 0x01, + X_VC_MC0_IS_ACTIVE = 0x02, + X_VC_MC1_IS_DISABLED = 0x03, + MI_STEER_DISABLE_bit = 1 << 5, + MI_CREDIT_CTR_mask = 0x0f << 6, + MI_CREDIT_CTR_shift = 6, + MI_CREDIT_WE_bit = 1 << 10, + MI_REQ_STALL_THLD_mask = 0x07 << 11, + MI_REQ_STALL_THLD_shift = 11, + X_LATENCY_EXCEEDS_399_CLOCKS = 0x00, + X_LATENCY_EXCEEDS_415_CLOCKS = 0x01, + X_LATENCY_EXCEEDS_431_CLOCKS = 0x02, + X_LATENCY_EXCEEDS_447_CLOCKS = 0x03, + X_LATENCY_EXCEEDS_463_CLOCKS = 0x04, + X_LATENCY_EXCEEDS_479_CLOCKS = 0x05, + X_LATENCY_EXCEEDS_495_CLOCKS = 0x06, + X_LATENCY_EXCEEDS_511_CLOCKS = 0x07, + VC_CNTL__MI_TIMESTAMP_RES_mask = 0x1f << 14, + VC_CNTL__MI_TIMESTAMP_RES_shift = 14, + X_1X_SYSTEM_CLOCK = 0x00, + X_2X_SYSTEM_CLOCK = 0x01, + X_4X_SYSTEM_CLOCK = 0x02, + X_8X_SYSTEM_CLOCK = 0x03, + X_16X_SYSTEM_CLOCK = 0x04, + X_32X_SYSTEM_CLOCK = 0x05, + X_64X_SYSTEM_CLOCK = 0x06, + X_128X_SYSTEM_CLOCK = 0x07, + X_256X_SYSTEM_CLOCK = 0x08, + X_512X_SYSTEM_CLOCK = 0x09, + X_1024X_SYSTEM_CLOCK = 0x0a, + X_2048X_SYSTEM_CLOCK = 0x0b, + X_4092X_SYSTEM_CLOCK = 0x0c, + X_8192X_SYSTEM_CLOCK = 0x0d, + X_16384X_SYSTEM_CLOCK = 0x0e, + X_32768X_SYSTEM_CLOCK = 0x0f, + VC_CNTL_STATUS = 0x00009704, + RP_BUSY_bit = 1 << 0, + RG_BUSY_bit = 1 << 1, + VC_BUSY_bit = 1 << 2, + CLAMP_DETECT_bit = 1 << 3, + VC_CONFIG = 0x00009718, + WRITE_DIS_bit = 1 << 0, + GPR_DATA_PHASE_ADJ_mask = 0x07 << 1, + GPR_DATA_PHASE_ADJ_shift = 1, + X_LATENCY_BASE_0_CYCLES = 0x00, + X_LATENCY_BASE_1_CYCLES = 0x01, + X_LATENCY_BASE_2_CYCLES = 0x02, + X_LATENCY_BASE_3_CYCLES = 0x03, + TD_SIMD_SYNC_ADJ_mask = 0x07 << 4, + TD_SIMD_SYNC_ADJ_shift = 4, + X_0_CYCLES_DELAY = 0x00, + X_1_CYCLES_DELAY = 0x01, + X_2_CYCLES_DELAY = 0x02, + X_3_CYCLES_DELAY = 0x03, + X_4_CYCLES_DELAY = 0x04, + X_5_CYCLES_DELAY = 0x05, + X_6_CYCLES_DELAY = 0x06, + X_7_CYCLES_DELAY = 0x07, + SMX_DC_CTL0 = 0x0000a020, + WR_GATHER_STREAM0_bit = 1 << 0, + WR_GATHER_STREAM1_bit = 1 << 1, + WR_GATHER_STREAM2_bit = 1 << 2, + WR_GATHER_STREAM3_bit = 1 << 3, + WR_GATHER_SCRATCH_bit = 1 << 4, + WR_GATHER_REDUC_BUF_bit = 1 << 5, + WR_GATHER_RING_BUF_bit = 1 << 6, + WR_GATHER_F_BUF_bit = 1 << 7, + DISABLE_CACHES_bit = 1 << 8, + AUTO_FLUSH_INVAL_EN_bit = 1 << 10, + AUTO_FLUSH_EN_bit = 1 << 11, + AUTO_FLUSH_CNT_mask = 0xffff << 12, + AUTO_FLUSH_CNT_shift = 12, + MC_RD_STALL_FACTOR_mask = 0x03 << 28, + MC_RD_STALL_FACTOR_shift = 28, + MC_WR_STALL_FACTOR_mask = 0x03 << 30, + MC_WR_STALL_FACTOR_shift = 30, + SMX_DC_CTL1 = 0x0000a024, + OP_FIFO_SKID_mask = 0x7f << 0, + OP_FIFO_SKID_shift = 0, + CACHE_LINE_SIZE_bit = 1 << 8, + MULTI_FLUSH_MODE_bit = 1 << 9, + MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_mask = 0x0f << 10, + MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_shift = 10, + DISABLE_WR_GATHER_RD_HIT_FORCE_EVICT_bit = 1 << 16, + DISABLE_WR_GATHER_RD_HIT_COMP_VLDS_CHECK_bit = 1 << 17, + DISABLE_FLUSH_ES_ALSO_INVALS_bit = 1 << 18, + DISABLE_FLUSH_GS_ALSO_INVALS_bit = 1 << 19, + SMX_DC_CTL2 = 0x0000a028, + INVALIDATE_CACHES_bit = 1 << 0, + CACHES_INVALID_bit = 1 << 1, + CACHES_DIRTY_bit = 1 << 2, + FLUSH_ALL_bit = 1 << 4, + FLUSH_GS_THREADS_bit = 1 << 8, + FLUSH_ES_THREADS_bit = 1 << 9, + SMX_DC_MC_INTF_CTL = 0x0000a02c, + MC_RD_REQ_CRED_mask = 0xff << 0, + MC_RD_REQ_CRED_shift = 0, + MC_WR_REQ_CRED_mask = 0xff << 16, + MC_WR_REQ_CRED_shift = 16, + TD_PS_SAMPLER0_BORDER_RED = 0x0000a400, + TD_PS_SAMPLER0_BORDER_RED_num = 18, + TD_PS_SAMPLER0_BORDER_RED_offset = 16, + TD_PS_SAMPLER0_BORDER_GREEN = 0x0000a404, + TD_PS_SAMPLER0_BORDER_GREEN_num = 18, + TD_PS_SAMPLER0_BORDER_GREEN_offset = 16, + TD_PS_SAMPLER0_BORDER_BLUE = 0x0000a408, + TD_PS_SAMPLER0_BORDER_BLUE_num = 18, + TD_PS_SAMPLER0_BORDER_BLUE_offset = 16, + TD_PS_SAMPLER0_BORDER_ALPHA = 0x0000a40c, + TD_PS_SAMPLER0_BORDER_ALPHA_num = 18, + TD_PS_SAMPLER0_BORDER_ALPHA_offset = 16, + TD_VS_SAMPLER0_BORDER_RED = 0x0000a600, + TD_VS_SAMPLER0_BORDER_RED_num = 18, + TD_VS_SAMPLER0_BORDER_RED_offset = 16, + TD_VS_SAMPLER0_BORDER_GREEN = 0x0000a604, + TD_VS_SAMPLER0_BORDER_GREEN_num = 18, + TD_VS_SAMPLER0_BORDER_GREEN_offset = 16, + TD_VS_SAMPLER0_BORDER_BLUE = 0x0000a608, + TD_VS_SAMPLER0_BORDER_BLUE_num = 18, + TD_VS_SAMPLER0_BORDER_BLUE_offset = 16, + TD_VS_SAMPLER0_BORDER_ALPHA = 0x0000a60c, + TD_VS_SAMPLER0_BORDER_ALPHA_num = 18, + TD_VS_SAMPLER0_BORDER_ALPHA_offset = 16, + TD_GS_SAMPLER0_BORDER_RED = 0x0000a800, + TD_GS_SAMPLER0_BORDER_RED_num = 18, + TD_GS_SAMPLER0_BORDER_RED_offset = 16, + TD_GS_SAMPLER0_BORDER_GREEN = 0x0000a804, + TD_GS_SAMPLER0_BORDER_GREEN_num = 18, + TD_GS_SAMPLER0_BORDER_GREEN_offset = 16, + TD_GS_SAMPLER0_BORDER_BLUE = 0x0000a808, + TD_GS_SAMPLER0_BORDER_BLUE_num = 18, + TD_GS_SAMPLER0_BORDER_BLUE_offset = 16, + TD_GS_SAMPLER0_BORDER_ALPHA = 0x0000a80c, + TD_GS_SAMPLER0_BORDER_ALPHA_num = 18, + TD_GS_SAMPLER0_BORDER_ALPHA_offset = 16, + TD_PS_SAMPLER0_CLEARTYPE_KERNEL = 0x0000aa00, + TD_PS_SAMPLER0_CLEARTYPE_KERNEL_num = 18, + TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_mask = 0x07 << 0, + TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_shift = 0, + TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_mask = 0x07 << 3, + TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_shift = 3, + DB_DEPTH_SIZE = 0x00028000, + PITCH_TILE_MAX_mask = 0x3ff << 0, + PITCH_TILE_MAX_shift = 0, + SLICE_TILE_MAX_mask = 0xfffff << 10, + SLICE_TILE_MAX_shift = 10, + DB_DEPTH_VIEW = 0x00028004, + SLICE_START_mask = 0x7ff << 0, + SLICE_START_shift = 0, + SLICE_MAX_mask = 0x7ff << 13, + SLICE_MAX_shift = 13, + DB_DEPTH_BASE = 0x0002800c, + DB_DEPTH_INFO = 0x00028010, + DB_DEPTH_INFO__FORMAT_mask = 0x07 << 0, + DB_DEPTH_INFO__FORMAT_shift = 0, + DEPTH_INVALID = 0x00, + DEPTH_16 = 0x01, + DEPTH_X8_24 = 0x02, + DEPTH_8_24 = 0x03, + DEPTH_X8_24_FLOAT = 0x04, + DEPTH_8_24_FLOAT = 0x05, + DEPTH_32_FLOAT = 0x06, + DEPTH_X24_8_32_FLOAT = 0x07, + DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3, + DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15, + DB_DEPTH_INFO__ARRAY_MODE_shift = 15, + ARRAY_2D_TILED_THIN1 = 0x04, + TILE_SURFACE_ENABLE_bit = 1 << 25, + TILE_COMPACT_bit = 1 << 26, + ZRANGE_PRECISION_bit = 1 << 31, + DB_HTILE_DATA_BASE = 0x00028014, + DB_STENCIL_CLEAR = 0x00028028, + DB_STENCIL_CLEAR__CLEAR_mask = 0xff << 0, + DB_STENCIL_CLEAR__CLEAR_shift = 0, + MIN_mask = 0xff << 16, + MIN_shift = 16, + DB_DEPTH_CLEAR = 0x0002802c, + PA_SC_SCREEN_SCISSOR_TL = 0x00028030, + PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0x7fff << 0, + PA_SC_SCREEN_SCISSOR_TL__TL_X_shift = 0, + PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0x7fff << 16, + PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift = 16, + PA_SC_SCREEN_SCISSOR_BR = 0x00028034, + PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0x7fff << 0, + PA_SC_SCREEN_SCISSOR_BR__BR_X_shift = 0, + PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0x7fff << 16, + PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift = 16, + CB_COLOR0_BASE = 0x00028040, + CB_COLOR0_BASE_num = 8, + CB_COLOR0_SIZE = 0x00028060, + CB_COLOR0_SIZE_num = 8, +/* PITCH_TILE_MAX_mask = 0x3ff << 0, */ +/* PITCH_TILE_MAX_shift = 0, */ +/* SLICE_TILE_MAX_mask = 0xfffff << 10, */ +/* SLICE_TILE_MAX_shift = 10, */ + CB_COLOR0_VIEW = 0x00028080, + CB_COLOR0_VIEW_num = 8, +/* SLICE_START_mask = 0x7ff << 0, */ +/* SLICE_START_shift = 0, */ +/* SLICE_MAX_mask = 0x7ff << 13, */ +/* SLICE_MAX_shift = 13, */ + CB_COLOR0_INFO = 0x000280a0, + CB_COLOR0_INFO_num = 8, + ENDIAN_mask = 0x03 << 0, + ENDIAN_shift = 0, + ENDIAN_NONE = 0x00, + ENDIAN_8IN16 = 0x01, + ENDIAN_8IN32 = 0x02, + ENDIAN_8IN64 = 0x03, + CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2, + CB_COLOR0_INFO__FORMAT_shift = 2, + COLOR_INVALID = 0x00, + COLOR_8 = 0x01, + COLOR_4_4 = 0x02, + COLOR_3_3_2 = 0x03, + COLOR_16 = 0x05, + COLOR_16_FLOAT = 0x06, + COLOR_8_8 = 0x07, + COLOR_5_6_5 = 0x08, + COLOR_6_5_5 = 0x09, + COLOR_1_5_5_5 = 0x0a, + COLOR_4_4_4_4 = 0x0b, + COLOR_5_5_5_1 = 0x0c, + COLOR_32 = 0x0d, + COLOR_32_FLOAT = 0x0e, + COLOR_16_16 = 0x0f, + COLOR_16_16_FLOAT = 0x10, + COLOR_8_24 = 0x11, + COLOR_8_24_FLOAT = 0x12, + COLOR_24_8 = 0x13, + COLOR_24_8_FLOAT = 0x14, + COLOR_10_11_11 = 0x15, + COLOR_10_11_11_FLOAT = 0x16, + COLOR_11_11_10 = 0x17, + COLOR_11_11_10_FLOAT = 0x18, + COLOR_2_10_10_10 = 0x19, + COLOR_8_8_8_8 = 0x1a, + COLOR_10_10_10_2 = 0x1b, + COLOR_X24_8_32_FLOAT = 0x1c, + COLOR_32_32 = 0x1d, + COLOR_32_32_FLOAT = 0x1e, + COLOR_16_16_16_16 = 0x1f, + COLOR_16_16_16_16_FLOAT = 0x20, + COLOR_32_32_32_32 = 0x22, + COLOR_32_32_32_32_FLOAT = 0x23, + CB_COLOR0_INFO__ARRAY_MODE_mask = 0x0f << 8, + CB_COLOR0_INFO__ARRAY_MODE_shift = 8, + ARRAY_LINEAR_GENERAL = 0x00, + ARRAY_LINEAR_ALIGNED = 0x01, +/* ARRAY_2D_TILED_THIN1 = 0x04, */ + NUMBER_TYPE_mask = 0x07 << 12, + NUMBER_TYPE_shift = 12, + NUMBER_UNORM = 0x00, + NUMBER_SNORM = 0x01, + NUMBER_USCALED = 0x02, + NUMBER_SSCALED = 0x03, + NUMBER_UINT = 0x04, + NUMBER_SINT = 0x05, + NUMBER_SRGB = 0x06, + NUMBER_FLOAT = 0x07, + CB_COLOR0_INFO__READ_SIZE_bit = 1 << 15, + COMP_SWAP_mask = 0x03 << 16, + COMP_SWAP_shift = 16, + SWAP_STD = 0x00, + SWAP_ALT = 0x01, + SWAP_STD_REV = 0x02, + SWAP_ALT_REV = 0x03, + CB_COLOR0_INFO__TILE_MODE_mask = 0x03 << 18, + CB_COLOR0_INFO__TILE_MODE_shift = 18, + TILE_DISABLE = 0x00, + TILE_CLEAR_ENABLE = 0x01, + TILE_FRAG_ENABLE = 0x02, + BLEND_CLAMP_bit = 1 << 20, + CLEAR_COLOR_bit = 1 << 21, + BLEND_BYPASS_bit = 1 << 22, + BLEND_FLOAT32_bit = 1 << 23, + SIMPLE_FLOAT_bit = 1 << 24, + CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 25, +/* TILE_COMPACT_bit = 1 << 26, */ + SOURCE_FORMAT_bit = 1 << 27, + CB_COLOR0_TILE = 0x000280c0, + CB_COLOR0_TILE_num = 8, + CB_COLOR0_FRAG = 0x000280e0, + CB_COLOR0_FRAG_num = 8, + CB_COLOR0_MASK = 0x00028100, + CB_COLOR0_MASK_num = 8, + CMASK_BLOCK_MAX_mask = 0xfff << 0, + CMASK_BLOCK_MAX_shift = 0, + FMASK_TILE_MAX_mask = 0xfffff << 12, + FMASK_TILE_MAX_shift = 12, + CB_CLEAR_RED = 0x00028120, + CB_CLEAR_GREEN = 0x00028124, + CB_CLEAR_BLUE = 0x00028128, + CB_CLEAR_ALPHA = 0x0002812c, + SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x00028140, + SQ_ALU_CONST_BUFFER_SIZE_PS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift = 0, + SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x00028180, + SQ_ALU_CONST_BUFFER_SIZE_VS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift = 0, + SQ_ALU_CONST_BUFFER_SIZE_GS_0 = 0x000281c0, + SQ_ALU_CONST_BUFFER_SIZE_GS_0_num = 16, + SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask = 0x1ff << 0, + SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift = 0, + PA_SC_WINDOW_OFFSET = 0x00028200, + WINDOW_X_OFFSET_mask = 0x7fff << 0, + WINDOW_X_OFFSET_shift = 0, + WINDOW_Y_OFFSET_mask = 0x7fff << 16, + WINDOW_Y_OFFSET_shift = 16, + PA_SC_WINDOW_SCISSOR_TL = 0x00028204, + PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x3fff << 0, + PA_SC_WINDOW_SCISSOR_TL__TL_X_shift = 0, + PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x3fff << 16, + PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift = 16, + WINDOW_OFFSET_DISABLE_bit = 1 << 31, + PA_SC_WINDOW_SCISSOR_BR = 0x00028208, + PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x3fff << 0, + PA_SC_WINDOW_SCISSOR_BR__BR_X_shift = 0, + PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x3fff << 16, + PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift = 16, + PA_SC_CLIPRECT_RULE = 0x0002820c, + CLIP_RULE_mask = 0xffff << 0, + CLIP_RULE_shift = 0, + PA_SC_CLIPRECT_0_TL = 0x00028210, + PA_SC_CLIPRECT_0_TL_num = 4, + PA_SC_CLIPRECT_0_TL_offset = 8, + PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x3fff << 0, + PA_SC_CLIPRECT_0_TL__TL_X_shift = 0, + PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x3fff << 16, + PA_SC_CLIPRECT_0_TL__TL_Y_shift = 16, + PA_SC_CLIPRECT_0_BR = 0x00028214, + PA_SC_CLIPRECT_0_BR_num = 4, + PA_SC_CLIPRECT_0_BR_offset = 8, + PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x3fff << 0, + PA_SC_CLIPRECT_0_BR__BR_X_shift = 0, + PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x3fff << 16, + PA_SC_CLIPRECT_0_BR__BR_Y_shift = 16, + CB_TARGET_MASK = 0x00028238, + TARGET0_ENABLE_mask = 0x0f << 0, + TARGET0_ENABLE_shift = 0, + TARGET1_ENABLE_mask = 0x0f << 4, + TARGET1_ENABLE_shift = 4, + TARGET2_ENABLE_mask = 0x0f << 8, + TARGET2_ENABLE_shift = 8, + TARGET3_ENABLE_mask = 0x0f << 12, + TARGET3_ENABLE_shift = 12, + TARGET4_ENABLE_mask = 0x0f << 16, + TARGET4_ENABLE_shift = 16, + TARGET5_ENABLE_mask = 0x0f << 20, + TARGET5_ENABLE_shift = 20, + TARGET6_ENABLE_mask = 0x0f << 24, + TARGET6_ENABLE_shift = 24, + TARGET7_ENABLE_mask = 0x0f << 28, + TARGET7_ENABLE_shift = 28, + CB_SHADER_MASK = 0x0002823c, + OUTPUT0_ENABLE_mask = 0x0f << 0, + OUTPUT0_ENABLE_shift = 0, + OUTPUT1_ENABLE_mask = 0x0f << 4, + OUTPUT1_ENABLE_shift = 4, + OUTPUT2_ENABLE_mask = 0x0f << 8, + OUTPUT2_ENABLE_shift = 8, + OUTPUT3_ENABLE_mask = 0x0f << 12, + OUTPUT3_ENABLE_shift = 12, + OUTPUT4_ENABLE_mask = 0x0f << 16, + OUTPUT4_ENABLE_shift = 16, + OUTPUT5_ENABLE_mask = 0x0f << 20, + OUTPUT5_ENABLE_shift = 20, + OUTPUT6_ENABLE_mask = 0x0f << 24, + OUTPUT6_ENABLE_shift = 24, + OUTPUT7_ENABLE_mask = 0x0f << 28, + OUTPUT7_ENABLE_shift = 28, + PA_SC_GENERIC_SCISSOR_TL = 0x00028240, + PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x3fff << 0, + PA_SC_GENERIC_SCISSOR_TL__TL_X_shift = 0, + PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x3fff << 16, + PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift = 16, +/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ + PA_SC_GENERIC_SCISSOR_BR = 0x00028244, + PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x3fff << 0, + PA_SC_GENERIC_SCISSOR_BR__BR_X_shift = 0, + PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x3fff << 16, + PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift = 16, + PA_SC_VPORT_SCISSOR_0_TL = 0x00028250, + PA_SC_VPORT_SCISSOR_0_TL_num = 16, + PA_SC_VPORT_SCISSOR_0_TL_offset = 8, + PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x3fff << 0, + PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift = 0, + PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x3fff << 16, + PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift = 16, +/* WINDOW_OFFSET_DISABLE_bit = 1 << 31, */ + PA_SC_VPORT_SCISSOR_0_BR = 0x00028254, + PA_SC_VPORT_SCISSOR_0_BR_num = 16, + PA_SC_VPORT_SCISSOR_0_BR_offset = 8, + PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x3fff << 0, + PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift = 0, + PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x3fff << 16, + PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift = 16, + PA_SC_VPORT_ZMIN_0 = 0x000282d0, + PA_SC_VPORT_ZMIN_0_num = 16, + PA_SC_VPORT_ZMIN_0_offset = 8, + PA_SC_VPORT_ZMAX_0 = 0x000282d4, + PA_SC_VPORT_ZMAX_0_num = 16, + PA_SC_VPORT_ZMAX_0_offset = 8, + SX_MISC = 0x00028350, + MULTIPASS_bit = 1 << 0, + SQ_VTX_SEMANTIC_0 = 0x00028380, + SQ_VTX_SEMANTIC_0_num = 32, +/* SEMANTIC_ID_mask = 0xff << 0, */ +/* SEMANTIC_ID_shift = 0, */ + VGT_MAX_VTX_INDX = 0x00028400, + VGT_MIN_VTX_INDX = 0x00028404, + VGT_INDX_OFFSET = 0x00028408, + VGT_MULTI_PRIM_IB_RESET_INDX = 0x0002840c, + SX_ALPHA_TEST_CONTROL = 0x00028410, + ALPHA_FUNC_mask = 0x07 << 0, + ALPHA_FUNC_shift = 0, + REF_NEVER = 0x00, + REF_LESS = 0x01, + REF_EQUAL = 0x02, + REF_LEQUAL = 0x03, + REF_GREATER = 0x04, + REF_NOTEQUAL = 0x05, + REF_GEQUAL = 0x06, + REF_ALWAYS = 0x07, + ALPHA_TEST_ENABLE_bit = 1 << 3, + ALPHA_TEST_BYPASS_bit = 1 << 8, + CB_BLEND_RED = 0x00028414, + CB_BLEND_GREEN = 0x00028418, + CB_BLEND_BLUE = 0x0002841c, + CB_BLEND_ALPHA = 0x00028420, + CB_FOG_RED = 0x00028424, + CB_FOG_GREEN = 0x00028428, + CB_FOG_BLUE = 0x0002842c, + DB_STENCILREFMASK = 0x00028430, + STENCILREF_mask = 0xff << 0, + STENCILREF_shift = 0, + STENCILMASK_mask = 0xff << 8, + STENCILMASK_shift = 8, + STENCILWRITEMASK_mask = 0xff << 16, + STENCILWRITEMASK_shift = 16, + DB_STENCILREFMASK_BF = 0x00028434, + STENCILREF_BF_mask = 0xff << 0, + STENCILREF_BF_shift = 0, + STENCILMASK_BF_mask = 0xff << 8, + STENCILMASK_BF_shift = 8, + STENCILWRITEMASK_BF_mask = 0xff << 16, + STENCILWRITEMASK_BF_shift = 16, + SX_ALPHA_REF = 0x00028438, + PA_CL_VPORT_XSCALE_0 = 0x0002843c, + PA_CL_VPORT_XSCALE_0_num = 16, + PA_CL_VPORT_XSCALE_0_offset = 24, + PA_CL_VPORT_XOFFSET_0 = 0x00028440, + PA_CL_VPORT_XOFFSET_0_num = 16, + PA_CL_VPORT_XOFFSET_0_offset = 24, + PA_CL_VPORT_YSCALE_0 = 0x00028444, + PA_CL_VPORT_YSCALE_0_num = 16, + PA_CL_VPORT_YSCALE_0_offset = 24, + PA_CL_VPORT_YOFFSET_0 = 0x00028448, + PA_CL_VPORT_YOFFSET_0_num = 16, + PA_CL_VPORT_YOFFSET_0_offset = 24, + PA_CL_VPORT_ZSCALE_0 = 0x0002844c, + PA_CL_VPORT_ZSCALE_0_num = 16, + PA_CL_VPORT_ZSCALE_0_offset = 24, + PA_CL_VPORT_ZOFFSET_0 = 0x00028450, + PA_CL_VPORT_ZOFFSET_0_num = 16, + PA_CL_VPORT_ZOFFSET_0_offset = 24, + SPI_VS_OUT_ID_0 = 0x00028614, + SPI_VS_OUT_ID_0_num = 10, + SEMANTIC_0_mask = 0xff << 0, + SEMANTIC_0_shift = 0, + SEMANTIC_1_mask = 0xff << 8, + SEMANTIC_1_shift = 8, + SEMANTIC_2_mask = 0xff << 16, + SEMANTIC_2_shift = 16, + SEMANTIC_3_mask = 0xff << 24, + SEMANTIC_3_shift = 24, + SPI_PS_INPUT_CNTL_0 = 0x00028644, + SPI_PS_INPUT_CNTL_0_num = 32, + SEMANTIC_mask = 0xff << 0, + SEMANTIC_shift = 0, + DEFAULT_VAL_mask = 0x03 << 8, + DEFAULT_VAL_shift = 8, + X_0_0F = 0x00, + FLAT_SHADE_bit = 1 << 10, + SEL_CENTROID_bit = 1 << 11, + SEL_LINEAR_bit = 1 << 12, + CYL_WRAP_mask = 0x0f << 13, + CYL_WRAP_shift = 13, + PT_SPRITE_TEX_bit = 1 << 17, + SEL_SAMPLE_bit = 1 << 18, + SPI_VS_OUT_CONFIG = 0x000286c4, + VS_PER_COMPONENT_bit = 1 << 0, + VS_EXPORT_COUNT_mask = 0x1f << 1, + VS_EXPORT_COUNT_shift = 1, + VS_EXPORTS_FOG_bit = 1 << 8, + VS_OUT_FOG_VEC_ADDR_mask = 0x1f << 9, + VS_OUT_FOG_VEC_ADDR_shift = 9, + SPI_PS_IN_CONTROL_0 = 0x000286cc, + NUM_INTERP_mask = 0x3f << 0, + NUM_INTERP_shift = 0, + POSITION_ENA_bit = 1 << 8, + POSITION_CENTROID_bit = 1 << 9, + POSITION_ADDR_mask = 0x1f << 10, + POSITION_ADDR_shift = 10, + PARAM_GEN_mask = 0x0f << 15, + PARAM_GEN_shift = 15, + PARAM_GEN_ADDR_mask = 0x7f << 19, + PARAM_GEN_ADDR_shift = 19, + BARYC_SAMPLE_CNTL_mask = 0x03 << 26, + BARYC_SAMPLE_CNTL_shift = 26, + CENTROIDS_ONLY = 0x00, + CENTERS_ONLY = 0x01, + CENTROIDS_AND_CENTERS = 0x02, + UNDEF = 0x03, + PERSP_GRADIENT_ENA_bit = 1 << 28, + LINEAR_GRADIENT_ENA_bit = 1 << 29, + POSITION_SAMPLE_bit = 1 << 30, + BARYC_AT_SAMPLE_ENA_bit = 1 << 31, + SPI_PS_IN_CONTROL_1 = 0x000286d0, + GEN_INDEX_PIX_bit = 1 << 0, + GEN_INDEX_PIX_ADDR_mask = 0x7f << 1, + GEN_INDEX_PIX_ADDR_shift = 1, + FRONT_FACE_ENA_bit = 1 << 8, + FRONT_FACE_CHAN_mask = 0x03 << 9, + FRONT_FACE_CHAN_shift = 9, + FRONT_FACE_ALL_BITS_bit = 1 << 11, + FRONT_FACE_ADDR_mask = 0x1f << 12, + FRONT_FACE_ADDR_shift = 12, + FOG_ADDR_mask = 0x7f << 17, + FOG_ADDR_shift = 17, + FIXED_PT_POSITION_ENA_bit = 1 << 24, + FIXED_PT_POSITION_ADDR_mask = 0x1f << 25, + FIXED_PT_POSITION_ADDR_shift = 25, + SPI_INTERP_CONTROL_0 = 0x000286d4, + FLAT_SHADE_ENA_bit = 1 << 0, + PNT_SPRITE_ENA_bit = 1 << 1, + PNT_SPRITE_OVRD_X_mask = 0x07 << 2, + PNT_SPRITE_OVRD_X_shift = 2, + SPI_PNT_SPRITE_SEL_0 = 0x00, + SPI_PNT_SPRITE_SEL_1 = 0x01, + SPI_PNT_SPRITE_SEL_S = 0x02, + SPI_PNT_SPRITE_SEL_T = 0x03, + SPI_PNT_SPRITE_SEL_NONE = 0x04, + PNT_SPRITE_OVRD_Y_mask = 0x07 << 5, + PNT_SPRITE_OVRD_Y_shift = 5, +/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ +/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ +/* SPI_PNT_SPRITE_SEL_S = 0x02, */ +/* SPI_PNT_SPRITE_SEL_T = 0x03, */ +/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ + PNT_SPRITE_OVRD_Z_mask = 0x07 << 8, + PNT_SPRITE_OVRD_Z_shift = 8, +/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ +/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ +/* SPI_PNT_SPRITE_SEL_S = 0x02, */ +/* SPI_PNT_SPRITE_SEL_T = 0x03, */ +/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ + PNT_SPRITE_OVRD_W_mask = 0x07 << 11, + PNT_SPRITE_OVRD_W_shift = 11, +/* SPI_PNT_SPRITE_SEL_0 = 0x00, */ +/* SPI_PNT_SPRITE_SEL_1 = 0x01, */ +/* SPI_PNT_SPRITE_SEL_S = 0x02, */ +/* SPI_PNT_SPRITE_SEL_T = 0x03, */ +/* SPI_PNT_SPRITE_SEL_NONE = 0x04, */ + PNT_SPRITE_TOP_1_bit = 1 << 14, + SPI_INPUT_Z = 0x000286d8, + PROVIDE_Z_TO_SPI_bit = 1 << 0, + SPI_FOG_CNTL = 0x000286dc, + PASS_FOG_THROUGH_PS_bit = 1 << 0, + PIXEL_FOG_FUNC_mask = 0x03 << 1, + PIXEL_FOG_FUNC_shift = 1, + SPI_FOG_NONE = 0x00, + SPI_FOG_EXP = 0x01, + SPI_FOG_EXP2 = 0x02, + SPI_FOG_LINEAR = 0x03, + PIXEL_FOG_SRC_SEL_bit = 1 << 3, + VS_FOG_CLAMP_DISABLE_bit = 1 << 4, + SPI_FOG_FUNC_SCALE = 0x000286e0, + SPI_FOG_FUNC_BIAS = 0x000286e4, + CB_BLEND0_CONTROL = 0x00028780, + CB_BLEND0_CONTROL_num = 8, + COLOR_SRCBLEND_mask = 0x1f << 0, + COLOR_SRCBLEND_shift = 0, + COLOR_COMB_FCN_mask = 0x07 << 5, + COLOR_COMB_FCN_shift = 5, + COLOR_DESTBLEND_mask = 0x1f << 8, + COLOR_DESTBLEND_shift = 8, + OPACITY_WEIGHT_bit = 1 << 13, + ALPHA_SRCBLEND_mask = 0x1f << 16, + ALPHA_SRCBLEND_shift = 16, + ALPHA_COMB_FCN_mask = 0x07 << 21, + ALPHA_COMB_FCN_shift = 21, + ALPHA_DESTBLEND_mask = 0x1f << 24, + ALPHA_DESTBLEND_shift = 24, + SEPARATE_ALPHA_BLEND_bit = 1 << 29, + VGT_DMA_BASE_HI = 0x000287e4, + VGT_DMA_BASE_HI__BASE_ADDR_mask = 0xff << 0, + VGT_DMA_BASE_HI__BASE_ADDR_shift = 0, + VGT_DMA_BASE = 0x000287e8, + VGT_DRAW_INITIATOR = 0x000287f0, + SOURCE_SELECT_mask = 0x03 << 0, + SOURCE_SELECT_shift = 0, + DI_SRC_SEL_DMA = 0x00, + DI_SRC_SEL_IMMEDIATE = 0x01, + DI_SRC_SEL_AUTO_INDEX = 0x02, + DI_SRC_SEL_RESERVED = 0x03, + MAJOR_MODE_mask = 0x03 << 2, + MAJOR_MODE_shift = 2, + DI_MAJOR_MODE_0 = 0x00, + DI_MAJOR_MODE_1 = 0x01, + SPRITE_EN_bit = 1 << 4, + NOT_EOP_bit = 1 << 5, + USE_OPAQUE_bit = 1 << 6, + VGT_IMMED_DATA = 0x000287f4, + VGT_EVENT_ADDRESS_REG = 0x000287f8, + ADDRESS_LOW_mask = 0xfffffff << 0, + ADDRESS_LOW_shift = 0, + DB_DEPTH_CONTROL = 0x00028800, + STENCIL_ENABLE_bit = 1 << 0, + Z_ENABLE_bit = 1 << 1, + Z_WRITE_ENABLE_bit = 1 << 2, + ZFUNC_mask = 0x07 << 4, + ZFUNC_shift = 4, + FRAG_NEVER = 0x00, + FRAG_LESS = 0x01, + FRAG_EQUAL = 0x02, + FRAG_LEQUAL = 0x03, + FRAG_GREATER = 0x04, + FRAG_NOTEQUAL = 0x05, + FRAG_GEQUAL = 0x06, + FRAG_ALWAYS = 0x07, + BACKFACE_ENABLE_bit = 1 << 7, + STENCILFUNC_mask = 0x07 << 8, + STENCILFUNC_shift = 8, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + STENCILFAIL_mask = 0x07 << 11, + STENCILFAIL_shift = 11, + STENCIL_KEEP = 0x00, + STENCIL_ZERO = 0x01, + STENCIL_REPLACE = 0x02, + STENCIL_INCR_CLAMP = 0x03, + STENCIL_DECR_CLAMP = 0x04, + STENCIL_INVERT = 0x05, + STENCIL_INCR_WRAP = 0x06, + STENCIL_DECR_WRAP = 0x07, + STENCILZPASS_mask = 0x07 << 14, + STENCILZPASS_shift = 14, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILZFAIL_mask = 0x07 << 17, + STENCILZFAIL_shift = 17, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILFUNC_BF_mask = 0x07 << 20, + STENCILFUNC_BF_shift = 20, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + STENCILFAIL_BF_mask = 0x07 << 23, + STENCILFAIL_BF_shift = 23, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILZPASS_BF_mask = 0x07 << 26, + STENCILZPASS_BF_shift = 26, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + STENCILZFAIL_BF_mask = 0x07 << 29, + STENCILZFAIL_BF_shift = 29, +/* STENCIL_KEEP = 0x00, */ +/* STENCIL_ZERO = 0x01, */ +/* STENCIL_REPLACE = 0x02, */ +/* STENCIL_INCR_CLAMP = 0x03, */ +/* STENCIL_DECR_CLAMP = 0x04, */ +/* STENCIL_INVERT = 0x05, */ +/* STENCIL_INCR_WRAP = 0x06, */ +/* STENCIL_DECR_WRAP = 0x07, */ + CB_BLEND_CONTROL = 0x00028804, +/* COLOR_SRCBLEND_mask = 0x1f << 0, */ +/* COLOR_SRCBLEND_shift = 0, */ + BLEND_ZERO = 0x00, + BLEND_ONE = 0x01, + BLEND_SRC_COLOR = 0x02, + BLEND_ONE_MINUS_SRC_COLOR = 0x03, + BLEND_SRC_ALPHA = 0x04, + BLEND_ONE_MINUS_SRC_ALPHA = 0x05, + BLEND_DST_ALPHA = 0x06, + BLEND_ONE_MINUS_DST_ALPHA = 0x07, + BLEND_DST_COLOR = 0x08, + BLEND_ONE_MINUS_DST_COLOR = 0x09, + BLEND_SRC_ALPHA_SATURATE = 0x0a, + BLEND_BOTH_SRC_ALPHA = 0x0b, + BLEND_BOTH_INV_SRC_ALPHA = 0x0c, + BLEND_CONSTANT_COLOR = 0x0d, + BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, + BLEND_SRC1_COLOR = 0x0f, + BLEND_INV_SRC1_COLOR = 0x10, + BLEND_SRC1_ALPHA = 0x11, + BLEND_INV_SRC1_ALPHA = 0x12, + BLEND_CONSTANT_ALPHA = 0x13, + BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, +/* COLOR_COMB_FCN_mask = 0x07 << 5, */ +/* COLOR_COMB_FCN_shift = 5, */ + COMB_DST_PLUS_SRC = 0x00, + COMB_SRC_MINUS_DST = 0x01, + COMB_MIN_DST_SRC = 0x02, + COMB_MAX_DST_SRC = 0x03, + COMB_DST_MINUS_SRC = 0x04, +/* COLOR_DESTBLEND_mask = 0x1f << 8, */ +/* COLOR_DESTBLEND_shift = 8, */ +/* BLEND_ZERO = 0x00, */ +/* BLEND_ONE = 0x01, */ +/* BLEND_SRC_COLOR = 0x02, */ +/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ +/* BLEND_SRC_ALPHA = 0x04, */ +/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ +/* BLEND_DST_ALPHA = 0x06, */ +/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ +/* BLEND_DST_COLOR = 0x08, */ +/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ +/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ +/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ +/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ +/* BLEND_CONSTANT_COLOR = 0x0d, */ +/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ +/* BLEND_SRC1_COLOR = 0x0f, */ +/* BLEND_INV_SRC1_COLOR = 0x10, */ +/* BLEND_SRC1_ALPHA = 0x11, */ +/* BLEND_INV_SRC1_ALPHA = 0x12, */ +/* BLEND_CONSTANT_ALPHA = 0x13, */ +/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ +/* OPACITY_WEIGHT_bit = 1 << 13, */ +/* ALPHA_SRCBLEND_mask = 0x1f << 16, */ +/* ALPHA_SRCBLEND_shift = 16, */ +/* BLEND_ZERO = 0x00, */ +/* BLEND_ONE = 0x01, */ +/* BLEND_SRC_COLOR = 0x02, */ +/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ +/* BLEND_SRC_ALPHA = 0x04, */ +/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ +/* BLEND_DST_ALPHA = 0x06, */ +/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ +/* BLEND_DST_COLOR = 0x08, */ +/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ +/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ +/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ +/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ +/* BLEND_CONSTANT_COLOR = 0x0d, */ +/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ +/* BLEND_SRC1_COLOR = 0x0f, */ +/* BLEND_INV_SRC1_COLOR = 0x10, */ +/* BLEND_SRC1_ALPHA = 0x11, */ +/* BLEND_INV_SRC1_ALPHA = 0x12, */ +/* BLEND_CONSTANT_ALPHA = 0x13, */ +/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ +/* ALPHA_COMB_FCN_mask = 0x07 << 21, */ +/* ALPHA_COMB_FCN_shift = 21, */ +/* COMB_DST_PLUS_SRC = 0x00, */ +/* COMB_SRC_MINUS_DST = 0x01, */ +/* COMB_MIN_DST_SRC = 0x02, */ +/* COMB_MAX_DST_SRC = 0x03, */ +/* COMB_DST_MINUS_SRC = 0x04, */ +/* ALPHA_DESTBLEND_mask = 0x1f << 24, */ +/* ALPHA_DESTBLEND_shift = 24, */ +/* BLEND_ZERO = 0x00, */ +/* BLEND_ONE = 0x01, */ +/* BLEND_SRC_COLOR = 0x02, */ +/* BLEND_ONE_MINUS_SRC_COLOR = 0x03, */ +/* BLEND_SRC_ALPHA = 0x04, */ +/* BLEND_ONE_MINUS_SRC_ALPHA = 0x05, */ +/* BLEND_DST_ALPHA = 0x06, */ +/* BLEND_ONE_MINUS_DST_ALPHA = 0x07, */ +/* BLEND_DST_COLOR = 0x08, */ +/* BLEND_ONE_MINUS_DST_COLOR = 0x09, */ +/* BLEND_SRC_ALPHA_SATURATE = 0x0a, */ +/* BLEND_BOTH_SRC_ALPHA = 0x0b, */ +/* BLEND_BOTH_INV_SRC_ALPHA = 0x0c, */ +/* BLEND_CONSTANT_COLOR = 0x0d, */ +/* BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0e, */ +/* BLEND_SRC1_COLOR = 0x0f, */ +/* BLEND_INV_SRC1_COLOR = 0x10, */ +/* BLEND_SRC1_ALPHA = 0x11, */ +/* BLEND_INV_SRC1_ALPHA = 0x12, */ +/* BLEND_CONSTANT_ALPHA = 0x13, */ +/* BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x14, */ +/* SEPARATE_ALPHA_BLEND_bit = 1 << 29, */ + CB_COLOR_CONTROL = 0x00028808, + FOG_ENABLE_bit = 1 << 0, + MULTIWRITE_ENABLE_bit = 1 << 1, + DITHER_ENABLE_bit = 1 << 2, + DEGAMMA_ENABLE_bit = 1 << 3, + SPECIAL_OP_mask = 0x07 << 4, + SPECIAL_OP_shift = 4, + SPECIAL_NORMAL = 0x00, + SPECIAL_DISABLE = 0x01, + SPECIAL_FAST_CLEAR = 0x02, + SPECIAL_FORCE_CLEAR = 0x03, + SPECIAL_EXPAND_COLOR = 0x04, + SPECIAL_EXPAND_TEXTURE = 0x05, + SPECIAL_EXPAND_SAMPLES = 0x06, + SPECIAL_RESOLVE_BOX = 0x07, + PER_MRT_BLEND_bit = 1 << 7, + TARGET_BLEND_ENABLE_mask = 0xff << 8, + TARGET_BLEND_ENABLE_shift = 8, + ROP3_mask = 0xff << 16, + ROP3_shift = 16, + DB_SHADER_CONTROL = 0x0002880c, + Z_EXPORT_ENABLE_bit = 1 << 0, + STENCIL_REF_EXPORT_ENABLE_bit = 1 << 1, + Z_ORDER_mask = 0x03 << 4, + Z_ORDER_shift = 4, + LATE_Z = 0x00, + EARLY_Z_THEN_LATE_Z = 0x01, + RE_Z = 0x02, + EARLY_Z_THEN_RE_Z = 0x03, + KILL_ENABLE_bit = 1 << 6, + COVERAGE_TO_MASK_ENABLE_bit = 1 << 7, + MASK_EXPORT_ENABLE_bit = 1 << 8, + DUAL_EXPORT_ENABLE_bit = 1 << 9, + EXEC_ON_HIER_FAIL_bit = 1 << 10, + EXEC_ON_NOOP_bit = 1 << 11, + PA_CL_CLIP_CNTL = 0x00028810, + UCP_ENA_0_bit = 1 << 0, + UCP_ENA_1_bit = 1 << 1, + UCP_ENA_2_bit = 1 << 2, + UCP_ENA_3_bit = 1 << 3, + UCP_ENA_4_bit = 1 << 4, + UCP_ENA_5_bit = 1 << 5, + PS_UCP_Y_SCALE_NEG_bit = 1 << 13, + PS_UCP_MODE_mask = 0x03 << 14, + PS_UCP_MODE_shift = 14, + CLIP_DISABLE_bit = 1 << 16, + UCP_CULL_ONLY_ENA_bit = 1 << 17, + BOUNDARY_EDGE_FLAG_ENA_bit = 1 << 18, + DX_CLIP_SPACE_DEF_bit = 1 << 19, + DIS_CLIP_ERR_DETECT_bit = 1 << 20, + VTX_KILL_OR_bit = 1 << 21, + DX_LINEAR_ATTR_CLIP_ENA_bit = 1 << 24, + VTE_VPORT_PROVOKE_DISABLE_bit = 1 << 25, + ZCLIP_NEAR_DISABLE_bit = 1 << 26, + ZCLIP_FAR_DISABLE_bit = 1 << 27, + PA_SU_SC_MODE_CNTL = 0x00028814, + CULL_FRONT_bit = 1 << 0, + CULL_BACK_bit = 1 << 1, + FACE_bit = 1 << 2, + POLY_MODE_mask = 0x03 << 3, + POLY_MODE_shift = 3, + X_DISABLE_POLY_MODE = 0x00, + X_DUAL_MODE = 0x01, + POLYMODE_FRONT_PTYPE_mask = 0x07 << 5, + POLYMODE_FRONT_PTYPE_shift = 5, + X_DRAW_POINTS = 0x00, + X_DRAW_LINES = 0x01, + X_DRAW_TRIANGLES = 0x02, + POLYMODE_BACK_PTYPE_mask = 0x07 << 8, + POLYMODE_BACK_PTYPE_shift = 8, +/* X_DRAW_POINTS = 0x00, */ +/* X_DRAW_LINES = 0x01, */ +/* X_DRAW_TRIANGLES = 0x02, */ + POLY_OFFSET_FRONT_ENABLE_bit = 1 << 11, + POLY_OFFSET_BACK_ENABLE_bit = 1 << 12, + POLY_OFFSET_PARA_ENABLE_bit = 1 << 13, + VTX_WINDOW_OFFSET_ENABLE_bit = 1 << 16, + PROVOKING_VTX_LAST_bit = 1 << 19, + PERSP_CORR_DIS_bit = 1 << 20, + MULTI_PRIM_IB_ENA_bit = 1 << 21, + PA_CL_VTE_CNTL = 0x00028818, + VPORT_X_SCALE_ENA_bit = 1 << 0, + VPORT_X_OFFSET_ENA_bit = 1 << 1, + VPORT_Y_SCALE_ENA_bit = 1 << 2, + VPORT_Y_OFFSET_ENA_bit = 1 << 3, + VPORT_Z_SCALE_ENA_bit = 1 << 4, + VPORT_Z_OFFSET_ENA_bit = 1 << 5, + VTX_XY_FMT_bit = 1 << 8, + VTX_Z_FMT_bit = 1 << 9, + VTX_W0_FMT_bit = 1 << 10, + PERFCOUNTER_REF_bit = 1 << 11, + PA_CL_VS_OUT_CNTL = 0x0002881c, + CLIP_DIST_ENA_0_bit = 1 << 0, + CLIP_DIST_ENA_1_bit = 1 << 1, + CLIP_DIST_ENA_2_bit = 1 << 2, + CLIP_DIST_ENA_3_bit = 1 << 3, + CLIP_DIST_ENA_4_bit = 1 << 4, + CLIP_DIST_ENA_5_bit = 1 << 5, + CLIP_DIST_ENA_6_bit = 1 << 6, + CLIP_DIST_ENA_7_bit = 1 << 7, + CULL_DIST_ENA_0_bit = 1 << 8, + CULL_DIST_ENA_1_bit = 1 << 9, + CULL_DIST_ENA_2_bit = 1 << 10, + CULL_DIST_ENA_3_bit = 1 << 11, + CULL_DIST_ENA_4_bit = 1 << 12, + CULL_DIST_ENA_5_bit = 1 << 13, + CULL_DIST_ENA_6_bit = 1 << 14, + CULL_DIST_ENA_7_bit = 1 << 15, + USE_VTX_POINT_SIZE_bit = 1 << 16, + USE_VTX_EDGE_FLAG_bit = 1 << 17, + USE_VTX_RENDER_TARGET_INDX_bit = 1 << 18, + USE_VTX_VIEWPORT_INDX_bit = 1 << 19, + USE_VTX_KILL_FLAG_bit = 1 << 20, + VS_OUT_MISC_VEC_ENA_bit = 1 << 21, + VS_OUT_CCDIST0_VEC_ENA_bit = 1 << 22, + VS_OUT_CCDIST1_VEC_ENA_bit = 1 << 23, + PA_CL_NANINF_CNTL = 0x00028820, + VTE_XY_INF_DISCARD_bit = 1 << 0, + VTE_Z_INF_DISCARD_bit = 1 << 1, + VTE_W_INF_DISCARD_bit = 1 << 2, + VTE_0XNANINF_IS_0_bit = 1 << 3, + VTE_XY_NAN_RETAIN_bit = 1 << 4, + VTE_Z_NAN_RETAIN_bit = 1 << 5, + VTE_W_NAN_RETAIN_bit = 1 << 6, + VTE_W_RECIP_NAN_IS_0_bit = 1 << 7, + VS_XY_NAN_TO_INF_bit = 1 << 8, + VS_XY_INF_RETAIN_bit = 1 << 9, + VS_Z_NAN_TO_INF_bit = 1 << 10, + VS_Z_INF_RETAIN_bit = 1 << 11, + VS_W_NAN_TO_INF_bit = 1 << 12, + VS_W_INF_RETAIN_bit = 1 << 13, + VS_CLIP_DIST_INF_DISCARD_bit = 1 << 14, + VTE_NO_OUTPUT_NEG_0_bit = 1 << 20, + SQ_PGM_START_PS = 0x00028840, + SQ_PGM_RESOURCES_PS = 0x00028850, + NUM_GPRS_mask = 0xff << 0, + NUM_GPRS_shift = 0, + STACK_SIZE_mask = 0xff << 8, + STACK_SIZE_shift = 8, + SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit = 1 << 21, + FETCH_CACHE_LINES_mask = 0x07 << 24, + FETCH_CACHE_LINES_shift = 24, + UNCACHED_FIRST_INST_bit = 1 << 28, + CLAMP_CONSTS_bit = 1 << 31, + SQ_PGM_EXPORTS_PS = 0x00028854, + EXPORT_MODE_mask = 0x1f << 0, + EXPORT_MODE_shift = 0, + SQ_PGM_START_VS = 0x00028858, + SQ_PGM_RESOURCES_VS = 0x00028868, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ + SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit = 1 << 21, +/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ +/* FETCH_CACHE_LINES_shift = 24, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_START_GS = 0x0002886c, + SQ_PGM_RESOURCES_GS = 0x0002887c, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ + SQ_PGM_RESOURCES_GS__DX10_CLAMP_bit = 1 << 21, +/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ +/* FETCH_CACHE_LINES_shift = 24, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_START_ES = 0x00028880, + SQ_PGM_RESOURCES_ES = 0x00028890, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ + SQ_PGM_RESOURCES_ES__DX10_CLAMP_bit = 1 << 21, +/* FETCH_CACHE_LINES_mask = 0x07 << 24, */ +/* FETCH_CACHE_LINES_shift = 24, */ +/* UNCACHED_FIRST_INST_bit = 1 << 28, */ + SQ_PGM_START_FS = 0x00028894, + SQ_PGM_RESOURCES_FS = 0x000288a4, +/* NUM_GPRS_mask = 0xff << 0, */ +/* NUM_GPRS_shift = 0, */ +/* STACK_SIZE_mask = 0xff << 8, */ +/* STACK_SIZE_shift = 8, */ + SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit = 1 << 21, + SQ_ESGS_RING_ITEMSIZE = 0x000288a8, + ITEMSIZE_mask = 0x7fff << 0, + ITEMSIZE_shift = 0, + SQ_GSVS_RING_ITEMSIZE = 0x000288ac, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_ESTMP_RING_ITEMSIZE = 0x000288b0, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GSTMP_RING_ITEMSIZE = 0x000288b4, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_VSTMP_RING_ITEMSIZE = 0x000288b8, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_PSTMP_RING_ITEMSIZE = 0x000288bc, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_FBUF_RING_ITEMSIZE = 0x000288c0, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_REDUC_RING_ITEMSIZE = 0x000288c4, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_GS_VERT_ITEMSIZE = 0x000288c8, +/* ITEMSIZE_mask = 0x7fff << 0, */ +/* ITEMSIZE_shift = 0, */ + SQ_PGM_CF_OFFSET_PS = 0x000288cc, + PGM_CF_OFFSET_mask = 0xfffff << 0, + PGM_CF_OFFSET_shift = 0, + SQ_PGM_CF_OFFSET_VS = 0x000288d0, +/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ +/* PGM_CF_OFFSET_shift = 0, */ + SQ_PGM_CF_OFFSET_GS = 0x000288d4, +/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ +/* PGM_CF_OFFSET_shift = 0, */ + SQ_PGM_CF_OFFSET_ES = 0x000288d8, +/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ +/* PGM_CF_OFFSET_shift = 0, */ + SQ_PGM_CF_OFFSET_FS = 0x000288dc, +/* PGM_CF_OFFSET_mask = 0xfffff << 0, */ +/* PGM_CF_OFFSET_shift = 0, */ + SQ_VTX_SEMANTIC_CLEAR = 0x000288e0, + SQ_ALU_CONST_CACHE_PS_0 = 0x00028940, + SQ_ALU_CONST_CACHE_PS_0_num = 16, + SQ_ALU_CONST_CACHE_VS_0 = 0x00028980, + SQ_ALU_CONST_CACHE_VS_0_num = 16, + SQ_ALU_CONST_CACHE_GS_0 = 0x000289c0, + SQ_ALU_CONST_CACHE_GS_0_num = 16, + PA_SU_POINT_SIZE = 0x00028a00, + PA_SU_POINT_SIZE__HEIGHT_mask = 0xffff << 0, + PA_SU_POINT_SIZE__HEIGHT_shift = 0, + PA_SU_POINT_SIZE__WIDTH_mask = 0xffff << 16, + PA_SU_POINT_SIZE__WIDTH_shift = 16, + PA_SU_POINT_MINMAX = 0x00028a04, + MIN_SIZE_mask = 0xffff << 0, + MIN_SIZE_shift = 0, + MAX_SIZE_mask = 0xffff << 16, + MAX_SIZE_shift = 16, + PA_SU_LINE_CNTL = 0x00028a08, + PA_SU_LINE_CNTL__WIDTH_mask = 0xffff << 0, + PA_SU_LINE_CNTL__WIDTH_shift = 0, + PA_SC_LINE_STIPPLE = 0x00028a0c, + LINE_PATTERN_mask = 0xffff << 0, + LINE_PATTERN_shift = 0, + REPEAT_COUNT_mask = 0xff << 16, + REPEAT_COUNT_shift = 16, + PATTERN_BIT_ORDER_bit = 1 << 28, + AUTO_RESET_CNTL_mask = 0x03 << 29, + AUTO_RESET_CNTL_shift = 29, + VGT_OUTPUT_PATH_CNTL = 0x00028a10, + PATH_SELECT_mask = 0x03 << 0, + PATH_SELECT_shift = 0, + VGT_OUTPATH_VTX_REUSE = 0x00, + VGT_OUTPATH_TESS_EN = 0x01, + VGT_OUTPATH_PASSTHRU = 0x02, + VGT_OUTPATH_GS_BLOCK = 0x03, + VGT_HOS_CNTL = 0x00028a14, + TESS_MODE_mask = 0x03 << 0, + TESS_MODE_shift = 0, + VGT_HOS_MAX_TESS_LEVEL = 0x00028a18, + VGT_HOS_MIN_TESS_LEVEL = 0x00028a1c, + VGT_HOS_REUSE_DEPTH = 0x00028a20, + REUSE_DEPTH_mask = 0xff << 0, + REUSE_DEPTH_shift = 0, + VGT_GROUP_PRIM_TYPE = 0x00028a24, + VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask = 0x1f << 0, + VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift = 0, + VGT_GRP_3D_POINT = 0x00, + VGT_GRP_3D_LINE = 0x01, + VGT_GRP_3D_TRI = 0x02, + VGT_GRP_3D_RECT = 0x03, + VGT_GRP_3D_QUAD = 0x04, + VGT_GRP_2D_COPY_RECT_V0 = 0x05, + VGT_GRP_2D_COPY_RECT_V1 = 0x06, + VGT_GRP_2D_COPY_RECT_V2 = 0x07, + VGT_GRP_2D_COPY_RECT_V3 = 0x08, + VGT_GRP_2D_FILL_RECT = 0x09, + VGT_GRP_2D_LINE = 0x0a, + VGT_GRP_2D_TRI = 0x0b, + VGT_GRP_PRIM_INDEX_LINE = 0x0c, + VGT_GRP_PRIM_INDEX_TRI = 0x0d, + VGT_GRP_PRIM_INDEX_QUAD = 0x0e, + VGT_GRP_3D_LINE_ADJ = 0x0f, + VGT_GRP_3D_TRI_ADJ = 0x10, + RETAIN_ORDER_bit = 1 << 14, + RETAIN_QUADS_bit = 1 << 15, + PRIM_ORDER_mask = 0x07 << 16, + PRIM_ORDER_shift = 16, + VGT_GRP_LIST = 0x00, + VGT_GRP_STRIP = 0x01, + VGT_GRP_FAN = 0x02, + VGT_GRP_LOOP = 0x03, + VGT_GRP_POLYGON = 0x04, + VGT_GROUP_FIRST_DECR = 0x00028a28, + FIRST_DECR_mask = 0x0f << 0, + FIRST_DECR_shift = 0, + VGT_GROUP_DECR = 0x00028a2c, + DECR_mask = 0x0f << 0, + DECR_shift = 0, + VGT_GROUP_VECT_0_CNTL = 0x00028a30, + COMP_X_EN_bit = 1 << 0, + COMP_Y_EN_bit = 1 << 1, + COMP_Z_EN_bit = 1 << 2, + COMP_W_EN_bit = 1 << 3, + VGT_GROUP_VECT_0_CNTL__STRIDE_mask = 0xff << 8, + VGT_GROUP_VECT_0_CNTL__STRIDE_shift = 8, + SHIFT_mask = 0xff << 16, + SHIFT_shift = 16, + VGT_GROUP_VECT_1_CNTL = 0x00028a34, +/* COMP_X_EN_bit = 1 << 0, */ +/* COMP_Y_EN_bit = 1 << 1, */ +/* COMP_Z_EN_bit = 1 << 2, */ +/* COMP_W_EN_bit = 1 << 3, */ + VGT_GROUP_VECT_1_CNTL__STRIDE_mask = 0xff << 8, + VGT_GROUP_VECT_1_CNTL__STRIDE_shift = 8, +/* SHIFT_mask = 0xff << 16, */ +/* SHIFT_shift = 16, */ + VGT_GROUP_VECT_0_FMT_CNTL = 0x00028a38, + X_CONV_mask = 0x0f << 0, + X_CONV_shift = 0, + VGT_GRP_INDEX_16 = 0x00, + VGT_GRP_INDEX_32 = 0x01, + VGT_GRP_UINT_16 = 0x02, + VGT_GRP_UINT_32 = 0x03, + VGT_GRP_SINT_16 = 0x04, + VGT_GRP_SINT_32 = 0x05, + VGT_GRP_FLOAT_32 = 0x06, + VGT_GRP_AUTO_PRIM = 0x07, + VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, + X_OFFSET_mask = 0x0f << 4, + X_OFFSET_shift = 4, + Y_CONV_mask = 0x0f << 8, + Y_CONV_shift = 8, +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ + Y_OFFSET_mask = 0x0f << 12, + Y_OFFSET_shift = 12, + Z_CONV_mask = 0x0f << 16, + Z_CONV_shift = 16, +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ + Z_OFFSET_mask = 0x0f << 20, + Z_OFFSET_shift = 20, + W_CONV_mask = 0x0f << 24, + W_CONV_shift = 24, +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ + W_OFFSET_mask = 0x0f << 28, + W_OFFSET_shift = 28, + VGT_GROUP_VECT_1_FMT_CNTL = 0x00028a3c, +/* X_CONV_mask = 0x0f << 0, */ +/* X_CONV_shift = 0, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* X_OFFSET_mask = 0x0f << 4, */ +/* X_OFFSET_shift = 4, */ +/* Y_CONV_mask = 0x0f << 8, */ +/* Y_CONV_shift = 8, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* Y_OFFSET_mask = 0x0f << 12, */ +/* Y_OFFSET_shift = 12, */ +/* Z_CONV_mask = 0x0f << 16, */ +/* Z_CONV_shift = 16, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* Z_OFFSET_mask = 0x0f << 20, */ +/* Z_OFFSET_shift = 20, */ +/* W_CONV_mask = 0x0f << 24, */ +/* W_CONV_shift = 24, */ +/* VGT_GRP_INDEX_16 = 0x00, */ +/* VGT_GRP_INDEX_32 = 0x01, */ +/* VGT_GRP_UINT_16 = 0x02, */ +/* VGT_GRP_UINT_32 = 0x03, */ +/* VGT_GRP_SINT_16 = 0x04, */ +/* VGT_GRP_SINT_32 = 0x05, */ +/* VGT_GRP_FLOAT_32 = 0x06, */ +/* VGT_GRP_AUTO_PRIM = 0x07, */ +/* VGT_GRP_FIX_1_23_TO_FLOAT = 0x08, */ +/* W_OFFSET_mask = 0x0f << 28, */ +/* W_OFFSET_shift = 28, */ + VGT_GS_MODE = 0x00028a40, + MODE_mask = 0x03 << 0, + MODE_shift = 0, + GS_OFF = 0x00, + GS_SCENARIO_A = 0x01, + GS_SCENARIO_B = 0x02, + GS_SCENARIO_G = 0x03, + ES_PASSTHRU_bit = 1 << 2, + CUT_MODE_mask = 0x03 << 3, + CUT_MODE_shift = 3, + GS_CUT_1024 = 0x00, + GS_CUT_512 = 0x01, + GS_CUT_256 = 0x02, + GS_CUT_128 = 0x03, + PA_SC_MPASS_PS_CNTL = 0x00028a48, + MPASS_PIX_VEC_PER_PASS_mask = 0xfffff << 0, + MPASS_PIX_VEC_PER_PASS_shift = 0, + MPASS_PS_ENA_bit = 1 << 31, + PA_SC_MODE_CNTL = 0x00028a4c, + MSAA_ENABLE_bit = 1 << 0, + CLIPRECT_ENABLE_bit = 1 << 1, + LINE_STIPPLE_ENABLE_bit = 1 << 2, + MULTI_CHIP_PRIM_DISCARD_ENAB_bit = 1 << 3, + WALK_ORDER_ENABLE_bit = 1 << 4, + HALVE_DETAIL_SAMPLE_PERF_bit = 1 << 5, + WALK_SIZE_bit = 1 << 6, + WALK_ALIGNMENT_bit = 1 << 7, + WALK_ALIGN8_PRIM_FITS_ST_bit = 1 << 8, + TILE_COVER_NO_SCISSOR_bit = 1 << 9, + KILL_PIX_POST_HI_Z_bit = 1 << 10, + KILL_PIX_POST_DETAIL_MASK_bit = 1 << 11, + MULTI_CHIP_SUPERTILE_ENABLE_bit = 1 << 12, + TILE_COVER_DISABLE_bit = 1 << 13, + FORCE_EOV_CNTDWN_ENABLE_bit = 1 << 14, + FORCE_EOV_TILE_ENABLE_bit = 1 << 15, + FORCE_EOV_REZ_ENABLE_bit = 1 << 16, + PS_ITER_SAMPLE_bit = 1 << 17, + VGT_ENHANCE = 0x00028a50, + VGT_ENHANCE__MI_TIMESTAMP_RES_mask = 0x03 << 0, + VGT_ENHANCE__MI_TIMESTAMP_RES_shift = 0, + X_0_992_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_32 = 0x00, + X_0_496_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_16 = 0x01, + X_0_248_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_8 = 0x02, + X_0_124_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_4 = 0x03, + MISC_mask = 0x3fffffff << 2, + MISC_shift = 2, + VGT_GS_OUT_PRIM_TYPE = 0x00028a6c, + OUTPRIM_TYPE_mask = 0x3f << 0, + OUTPRIM_TYPE_shift = 0, + POINTLIST = 0x00, + LINESTRIP = 0x01, + TRISTRIP = 0x02, + VGT_DMA_SIZE = 0x00028a74, + VGT_DMA_INDEX_TYPE = 0x00028a7c, +/* INDEX_TYPE_mask = 0x03 << 0, */ +/* INDEX_TYPE_shift = 0, */ + VGT_INDEX_16 = 0x00, + VGT_INDEX_32 = 0x01, + SWAP_MODE_mask = 0x03 << 2, + SWAP_MODE_shift = 2, + VGT_DMA_SWAP_NONE = 0x00, + VGT_DMA_SWAP_16_BIT = 0x01, + VGT_DMA_SWAP_32_BIT = 0x02, + VGT_DMA_SWAP_WORD = 0x03, + VGT_PRIMITIVEID_EN = 0x00028a84, + PRIMITIVEID_EN_bit = 1 << 0, + VGT_DMA_NUM_INSTANCES = 0x00028a88, + VGT_EVENT_INITIATOR = 0x00028a90, + EVENT_TYPE_mask = 0x3f << 0, + EVENT_TYPE_shift = 0, + CACHE_FLUSH_TS = 0x04, + CONTEXT_DONE = 0x05, + CACHE_FLUSH = 0x06, + VIZQUERY_START = 0x07, + VIZQUERY_END = 0x08, + SC_WAIT_WC = 0x09, + MPASS_PS_CP_REFETCH = 0x0a, + MPASS_PS_RST_START = 0x0b, + MPASS_PS_INCR_START = 0x0c, + RST_PIX_CNT = 0x0d, + RST_VTX_CNT = 0x0e, + VS_PARTIAL_FLUSH = 0x0f, + PS_PARTIAL_FLUSH = 0x10, + CACHE_FLUSH_AND_INV_TS_EVENT = 0x14, + ZPASS_DONE = 0x15, + CACHE_FLUSH_AND_INV_EVENT = 0x16, + PERFCOUNTER_START = 0x17, + PERFCOUNTER_STOP = 0x18, + PIPELINESTAT_START = 0x19, + PIPELINESTAT_STOP = 0x1a, + PERFCOUNTER_SAMPLE = 0x1b, + FLUSH_ES_OUTPUT = 0x1c, + FLUSH_GS_OUTPUT = 0x1d, + SAMPLE_PIPELINESTAT = 0x1e, + SO_VGTSTREAMOUT_FLUSH = 0x1f, + SAMPLE_STREAMOUTSTATS = 0x20, + RESET_VTX_CNT = 0x21, + BLOCK_CONTEXT_DONE = 0x22, + CR_CONTEXT_DONE = 0x23, + VGT_FLUSH = 0x24, + CR_DONE_TS = 0x25, + SQ_NON_EVENT = 0x26, + SC_SEND_DB_VPZ = 0x27, + BOTTOM_OF_PIPE_TS = 0x28, + DB_CACHE_FLUSH_AND_INV = 0x2a, + ADDRESS_HI_mask = 0xff << 19, + ADDRESS_HI_shift = 19, + EXTENDED_EVENT_bit = 1 << 27, + VGT_MULTI_PRIM_IB_RESET_EN = 0x00028a94, + RESET_EN_bit = 1 << 0, + VGT_INSTANCE_STEP_RATE_0 = 0x00028aa0, + VGT_INSTANCE_STEP_RATE_1 = 0x00028aa4, + VGT_STRMOUT_EN = 0x00028ab0, + STREAMOUT_bit = 1 << 0, + VGT_REUSE_OFF = 0x00028ab4, + REUSE_OFF_bit = 1 << 0, + VGT_VTX_CNT_EN = 0x00028ab8, + VTX_CNT_EN_bit = 1 << 0, + VGT_STRMOUT_BUFFER_SIZE_0 = 0x00028ad0, + VGT_STRMOUT_VTX_STRIDE_0 = 0x00028ad4, + VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_0 = 0x00028ad8, + VGT_STRMOUT_BUFFER_OFFSET_0 = 0x00028adc, + VGT_STRMOUT_BUFFER_SIZE_1 = 0x00028ae0, + VGT_STRMOUT_VTX_STRIDE_1 = 0x00028ae4, + VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_1 = 0x00028ae8, + VGT_STRMOUT_BUFFER_OFFSET_1 = 0x00028aec, + VGT_STRMOUT_BUFFER_SIZE_2 = 0x00028af0, + VGT_STRMOUT_VTX_STRIDE_2 = 0x00028af4, + VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_2 = 0x00028af8, + VGT_STRMOUT_BUFFER_OFFSET_2 = 0x00028afc, + VGT_STRMOUT_BUFFER_SIZE_3 = 0x00028b00, + VGT_STRMOUT_VTX_STRIDE_3 = 0x00028b04, + VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask = 0x3ff << 0, + VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift = 0, + VGT_STRMOUT_BUFFER_BASE_3 = 0x00028b08, + VGT_STRMOUT_BUFFER_OFFSET_3 = 0x00028b0c, + VGT_STRMOUT_BASE_OFFSET_0 = 0x00028b10, + VGT_STRMOUT_BASE_OFFSET_1 = 0x00028b14, + VGT_STRMOUT_BASE_OFFSET_2 = 0x00028b18, + VGT_STRMOUT_BASE_OFFSET_3 = 0x00028b1c, + VGT_STRMOUT_BUFFER_EN = 0x00028b20, + BUFFER_0_EN_bit = 1 << 0, + BUFFER_1_EN_bit = 1 << 1, + BUFFER_2_EN_bit = 1 << 2, + BUFFER_3_EN_bit = 1 << 3, + VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x00028b28, + VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x00028b2c, + VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x00028b30, + VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x00028b44, + VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x00028b48, + VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x00028b4c, + VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift = 0, + VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x00028b50, + VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask = 0x3f << 0, + VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift = 0, + PA_SC_LINE_CNTL = 0x00028c00, + BRES_CNTL_mask = 0xff << 0, + BRES_CNTL_shift = 0, + USE_BRES_CNTL_bit = 1 << 8, + EXPAND_LINE_WIDTH_bit = 1 << 9, + LAST_PIXEL_bit = 1 << 10, + PA_SC_AA_CONFIG = 0x00028c04, + MSAA_NUM_SAMPLES_mask = 0x03 << 0, + MSAA_NUM_SAMPLES_shift = 0, + AA_MASK_CENTROID_DTMN_bit = 1 << 4, + MAX_SAMPLE_DIST_mask = 0x0f << 13, + MAX_SAMPLE_DIST_shift = 13, + PA_SU_VTX_CNTL = 0x00028c08, + PIX_CENTER_bit = 1 << 0, + PA_SU_VTX_CNTL__ROUND_MODE_mask = 0x03 << 1, + PA_SU_VTX_CNTL__ROUND_MODE_shift = 1, + X_TRUNCATE = 0x00, + X_ROUND = 0x01, + X_ROUND_TO_EVEN = 0x02, + X_ROUND_TO_ODD = 0x03, + QUANT_MODE_mask = 0x07 << 3, + QUANT_MODE_shift = 3, + X_1_16TH = 0x00, + X_1_8TH = 0x01, + X_1_4TH = 0x02, + X_1_2 = 0x03, + X_1 = 0x04, + X_1_256TH = 0x05, + PA_CL_GB_VERT_CLIP_ADJ = 0x00028c0c, + PA_CL_GB_VERT_DISC_ADJ = 0x00028c10, + PA_CL_GB_HORZ_CLIP_ADJ = 0x00028c14, + PA_CL_GB_HORZ_DISC_ADJ = 0x00028c18, + PA_SC_AA_SAMPLE_LOCS_MCTX = 0x00028c1c, +/* S0_X_mask = 0x0f << 0, */ +/* S0_X_shift = 0, */ +/* S0_Y_mask = 0x0f << 4, */ +/* S0_Y_shift = 4, */ +/* S1_X_mask = 0x0f << 8, */ +/* S1_X_shift = 8, */ +/* S1_Y_mask = 0x0f << 12, */ +/* S1_Y_shift = 12, */ +/* S2_X_mask = 0x0f << 16, */ +/* S2_X_shift = 16, */ +/* S2_Y_mask = 0x0f << 20, */ +/* S2_Y_shift = 20, */ +/* S3_X_mask = 0x0f << 24, */ +/* S3_X_shift = 24, */ +/* S3_Y_mask = 0x0f << 28, */ +/* S3_Y_shift = 28, */ + PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX = 0x00028c20, +/* S4_X_mask = 0x0f << 0, */ +/* S4_X_shift = 0, */ +/* S4_Y_mask = 0x0f << 4, */ +/* S4_Y_shift = 4, */ +/* S5_X_mask = 0x0f << 8, */ +/* S5_X_shift = 8, */ +/* S5_Y_mask = 0x0f << 12, */ +/* S5_Y_shift = 12, */ +/* S6_X_mask = 0x0f << 16, */ +/* S6_X_shift = 16, */ +/* S6_Y_mask = 0x0f << 20, */ +/* S6_Y_shift = 20, */ +/* S7_X_mask = 0x0f << 24, */ +/* S7_X_shift = 24, */ +/* S7_Y_mask = 0x0f << 28, */ +/* S7_Y_shift = 28, */ + CB_CLRCMP_CONTROL = 0x00028c30, + CLRCMP_FCN_SRC_mask = 0x07 << 0, + CLRCMP_FCN_SRC_shift = 0, + CLRCMP_DRAW_ALWAYS = 0x00, + CLRCMP_DRAW_NEVER = 0x01, + CLRCMP_DRAW_ON_NEQ = 0x04, + CLRCMP_DRAW_ON_EQ = 0x05, + CLRCMP_FCN_DST_mask = 0x07 << 8, + CLRCMP_FCN_DST_shift = 8, +/* CLRCMP_DRAW_ALWAYS = 0x00, */ +/* CLRCMP_DRAW_NEVER = 0x01, */ +/* CLRCMP_DRAW_ON_NEQ = 0x04, */ +/* CLRCMP_DRAW_ON_EQ = 0x05, */ + CLRCMP_FCN_SEL_mask = 0x03 << 24, + CLRCMP_FCN_SEL_shift = 24, + CLRCMP_SEL_DST = 0x00, + CLRCMP_SEL_SRC = 0x01, + CLRCMP_SEL_AND = 0x02, + CB_CLRCMP_SRC = 0x00028c34, + CB_CLRCMP_DST = 0x00028c38, + CB_CLRCMP_MSK = 0x00028c3c, + PA_SC_AA_MASK = 0x00028c48, + VGT_VERTEX_REUSE_BLOCK_CNTL = 0x00028c58, + VTX_REUSE_DEPTH_mask = 0xff << 0, + VTX_REUSE_DEPTH_shift = 0, + VGT_OUT_DEALLOC_CNTL = 0x00028c5c, + DEALLOC_DIST_mask = 0x7f << 0, + DEALLOC_DIST_shift = 0, + DB_RENDER_CONTROL = 0x00028d0c, + DEPTH_CLEAR_ENABLE_bit = 1 << 0, + STENCIL_CLEAR_ENABLE_bit = 1 << 1, + DEPTH_COPY_bit = 1 << 2, + STENCIL_COPY_bit = 1 << 3, + RESUMMARIZE_ENABLE_bit = 1 << 4, + STENCIL_COMPRESS_DISABLE_bit = 1 << 5, + DEPTH_COMPRESS_DISABLE_bit = 1 << 6, + COPY_CENTROID_bit = 1 << 7, + COPY_SAMPLE_mask = 0x07 << 8, + COPY_SAMPLE_shift = 8, + ZPASS_INCREMENT_DISABLE_bit = 1 << 11, + DB_RENDER_OVERRIDE = 0x00028d10, + FORCE_HIZ_ENABLE_mask = 0x03 << 0, + FORCE_HIZ_ENABLE_shift = 0, + FORCE_OFF = 0x00, + FORCE_ENABLE = 0x01, + FORCE_DISABLE = 0x02, + FORCE_RESERVED = 0x03, + FORCE_HIS_ENABLE0_mask = 0x03 << 2, + FORCE_HIS_ENABLE0_shift = 2, +/* FORCE_OFF = 0x00, */ +/* FORCE_ENABLE = 0x01, */ +/* FORCE_DISABLE = 0x02, */ +/* FORCE_RESERVED = 0x03, */ + FORCE_HIS_ENABLE1_mask = 0x03 << 4, + FORCE_HIS_ENABLE1_shift = 4, +/* FORCE_OFF = 0x00, */ +/* FORCE_ENABLE = 0x01, */ +/* FORCE_DISABLE = 0x02, */ +/* FORCE_RESERVED = 0x03, */ + FORCE_SHADER_Z_ORDER_bit = 1 << 6, + FAST_Z_DISABLE_bit = 1 << 7, + FAST_STENCIL_DISABLE_bit = 1 << 8, + NOOP_CULL_DISABLE_bit = 1 << 9, + FORCE_COLOR_KILL_bit = 1 << 10, + FORCE_Z_READ_bit = 1 << 11, + FORCE_STENCIL_READ_bit = 1 << 12, + FORCE_FULL_Z_RANGE_mask = 0x03 << 13, + FORCE_FULL_Z_RANGE_shift = 13, +/* FORCE_OFF = 0x00, */ +/* FORCE_ENABLE = 0x01, */ +/* FORCE_DISABLE = 0x02, */ +/* FORCE_RESERVED = 0x03, */ + FORCE_QC_SMASK_CONFLICT_bit = 1 << 15, + DISABLE_VIEWPORT_CLAMP_bit = 1 << 16, + IGNORE_SC_ZRANGE_bit = 1 << 17, + DB_HTILE_SURFACE = 0x00028d24, + HTILE_WIDTH_bit = 1 << 0, + HTILE_HEIGHT_bit = 1 << 1, + LINEAR_bit = 1 << 2, + FULL_CACHE_bit = 1 << 3, + HTILE_USES_PRELOAD_WIN_bit = 1 << 4, + PRELOAD_bit = 1 << 5, + PREFETCH_WIDTH_mask = 0x3f << 6, + PREFETCH_WIDTH_shift = 6, + PREFETCH_HEIGHT_mask = 0x3f << 12, + PREFETCH_HEIGHT_shift = 12, + DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c, + COMPAREFUNC1_mask = 0x07 << 0, + COMPAREFUNC1_shift = 0, +/* REF_NEVER = 0x00, */ +/* REF_LESS = 0x01, */ +/* REF_EQUAL = 0x02, */ +/* REF_LEQUAL = 0x03, */ +/* REF_GREATER = 0x04, */ +/* REF_NOTEQUAL = 0x05, */ +/* REF_GEQUAL = 0x06, */ +/* REF_ALWAYS = 0x07, */ + COMPAREVALUE1_mask = 0xff << 4, + COMPAREVALUE1_shift = 4, + COMPAREMASK1_mask = 0xff << 12, + COMPAREMASK1_shift = 12, + ENABLE1_bit = 1 << 24, + DB_PRELOAD_CONTROL = 0x00028d30, + START_X_mask = 0xff << 0, + START_X_shift = 0, + START_Y_mask = 0xff << 8, + START_Y_shift = 8, + MAX_X_mask = 0xff << 16, + MAX_X_shift = 16, + MAX_Y_mask = 0xff << 24, + MAX_Y_shift = 24, + DB_PREFETCH_LIMIT = 0x00028d34, + DEPTH_HEIGHT_TILE_MAX_mask = 0x3ff << 0, + DEPTH_HEIGHT_TILE_MAX_shift = 0, + PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x00028df8, + POLY_OFFSET_NEG_NUM_DB_BITS_mask = 0xff << 0, + POLY_OFFSET_NEG_NUM_DB_BITS_shift = 0, + POLY_OFFSET_DB_IS_FLOAT_FMT_bit = 1 << 8, + PA_SU_POLY_OFFSET_CLAMP = 0x00028dfc, + PA_SU_POLY_OFFSET_FRONT_SCALE = 0x00028e00, + PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x00028e04, + PA_SU_POLY_OFFSET_BACK_SCALE = 0x00028e08, + PA_SU_POLY_OFFSET_BACK_OFFSET = 0x00028e0c, + PA_CL_POINT_X_RAD = 0x00028e10, + PA_CL_POINT_Y_RAD = 0x00028e14, + PA_CL_POINT_SIZE = 0x00028e18, + PA_CL_POINT_CULL_RAD = 0x00028e1c, + PA_CL_UCP_0_X = 0x00028e20, + PA_CL_UCP_0_X_num = 6, + PA_CL_UCP_0_X_offset = 16, + PA_CL_UCP_0_Y = 0x00028e24, + PA_CL_UCP_0_Y_num = 6, + PA_CL_UCP_0_Y_offset = 16, + PA_CL_UCP_0_Z = 0x00028e28, + PA_CL_UCP_0_Z_num = 6, + PA_CL_UCP_0_Z_offset = 16, + SQ_ALU_CONSTANT0_0 = 0x00030000, + SQ_ALU_CONSTANT1_0 = 0x00030004, + SQ_ALU_CONSTANT2_0 = 0x00030008, + SQ_ALU_CONSTANT3_0 = 0x0003000c, + SQ_VTX_CONSTANT_WORD0_0 = 0x00038000, + SQ_TEX_RESOURCE_WORD0_0 = 0x00038000, + DIM_mask = 0x07 << 0, + DIM_shift = 0, + SQ_TEX_DIM_1D = 0x00, + SQ_TEX_DIM_2D = 0x01, + SQ_TEX_DIM_3D = 0x02, + SQ_TEX_DIM_CUBEMAP = 0x03, + SQ_TEX_DIM_1D_ARRAY = 0x04, + SQ_TEX_DIM_2D_ARRAY = 0x05, + SQ_TEX_DIM_2D_MSAA = 0x06, + SQ_TEX_DIM_2D_ARRAY_MSAA = 0x07, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask = 0x0f << 3, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift = 3, + TILE_TYPE_bit = 1 << 7, + PITCH_mask = 0x7ff << 8, + PITCH_shift = 8, + TEX_WIDTH_mask = 0x1fff << 19, + TEX_WIDTH_shift = 19, + SQ_VTX_CONSTANT_WORD1_0 = 0x00038004, + SQ_TEX_RESOURCE_WORD1_0 = 0x00038004, + TEX_HEIGHT_mask = 0x1fff << 0, + TEX_HEIGHT_shift = 0, + TEX_DEPTH_mask = 0x1fff << 13, + TEX_DEPTH_shift = 13, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask = 0x3f << 26, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift = 26, + SQ_VTX_CONSTANT_WORD2_0 = 0x00038008, + BASE_ADDRESS_HI_mask = 0xff << 0, + BASE_ADDRESS_HI_shift = 0, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask = 0x7ff << 8, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift = 8, + SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit = 1 << 19, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift = 20, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask = 0x03 << 26, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift = 26, +/* SQ_NUM_FORMAT_NORM = 0x00, */ +/* SQ_NUM_FORMAT_INT = 0x01, */ +/* SQ_NUM_FORMAT_SCALED = 0x02, */ + SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit = 1 << 28, + SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit = 1 << 29, + SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask = 0x03 << 30, + SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift = 30, +/* SQ_ENDIAN_NONE = 0x00, */ +/* SQ_ENDIAN_8IN16 = 0x01, */ +/* SQ_ENDIAN_8IN32 = 0x02, */ + SQ_TEX_RESOURCE_WORD2_0 = 0x00038008, + SQ_VTX_CONSTANT_WORD3_0 = 0x0003800c, + MEM_REQUEST_SIZE_mask = 0x03 << 0, + MEM_REQUEST_SIZE_shift = 0, + SQ_TEX_RESOURCE_WORD3_0 = 0x0003800c, + SQ_TEX_RESOURCE_WORD4_0 = 0x00038010, + FORMAT_COMP_X_mask = 0x03 << 0, + FORMAT_COMP_X_shift = 0, + SQ_FORMAT_COMP_UNSIGNED = 0x00, + SQ_FORMAT_COMP_SIGNED = 0x01, + SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, + FORMAT_COMP_Y_mask = 0x03 << 2, + FORMAT_COMP_Y_shift = 2, +/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ +/* SQ_FORMAT_COMP_SIGNED = 0x01, */ +/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ + FORMAT_COMP_Z_mask = 0x03 << 4, + FORMAT_COMP_Z_shift = 4, +/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ +/* SQ_FORMAT_COMP_SIGNED = 0x01, */ +/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ + FORMAT_COMP_W_mask = 0x03 << 6, + FORMAT_COMP_W_shift = 6, +/* SQ_FORMAT_COMP_UNSIGNED = 0x00, */ +/* SQ_FORMAT_COMP_SIGNED = 0x01, */ +/* SQ_FORMAT_COMP_UNSIGNED_BIASED = 0x02, */ + SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask = 0x03 << 8, + SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift = 8, +/* SQ_NUM_FORMAT_NORM = 0x00, */ +/* SQ_NUM_FORMAT_INT = 0x01, */ +/* SQ_NUM_FORMAT_SCALED = 0x02, */ + SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit = 1 << 10, + SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit = 1 << 11, + SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask = 0x03 << 12, + SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift = 12, +/* SQ_ENDIAN_NONE = 0x00, */ +/* SQ_ENDIAN_8IN16 = 0x01, */ +/* SQ_ENDIAN_8IN32 = 0x02, */ + REQUEST_SIZE_mask = 0x03 << 14, + REQUEST_SIZE_shift = 14, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask = 0x07 << 16, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift = 16, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask = 0x07 << 19, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift = 19, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask = 0x07 << 22, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift = 22, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask = 0x07 << 25, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift = 25, +/* SQ_SEL_X = 0x00, */ +/* SQ_SEL_Y = 0x01, */ +/* SQ_SEL_Z = 0x02, */ +/* SQ_SEL_W = 0x03, */ +/* SQ_SEL_0 = 0x04, */ +/* SQ_SEL_1 = 0x05, */ + BASE_LEVEL_mask = 0x0f << 28, + BASE_LEVEL_shift = 28, + SQ_TEX_RESOURCE_WORD5_0 = 0x00038014, + LAST_LEVEL_mask = 0x0f << 0, + LAST_LEVEL_shift = 0, + BASE_ARRAY_mask = 0x1fff << 4, + BASE_ARRAY_shift = 4, + LAST_ARRAY_mask = 0x1fff << 17, + LAST_ARRAY_shift = 17, + SQ_TEX_RESOURCE_WORD6_0 = 0x00038018, + MPEG_CLAMP_mask = 0x03 << 0, + MPEG_CLAMP_shift = 0, + SQ_TEX_MPEG_CLAMP_OFF = 0x00, + SQ_TEX_MPEG_9 = 0x01, + SQ_TEX_MPEG_10 = 0x02, + PERF_MODULATION_mask = 0x07 << 5, + PERF_MODULATION_shift = 5, + INTERLACED_bit = 1 << 8, + SQ_TEX_RESOURCE_WORD6_0__TYPE_mask = 0x03 << 30, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift = 30, + SQ_TEX_VTX_INVALID_TEXTURE = 0x00, + SQ_TEX_VTX_INVALID_BUFFER = 0x01, + SQ_TEX_VTX_VALID_TEXTURE = 0x02, + SQ_TEX_VTX_VALID_BUFFER = 0x03, + SQ_VTX_CONSTANT_WORD6_0 = 0x00038018, + SQ_VTX_CONSTANT_WORD6_0__TYPE_mask = 0x03 << 30, + SQ_VTX_CONSTANT_WORD6_0__TYPE_shift = 30, +/* SQ_TEX_VTX_INVALID_TEXTURE = 0x00, */ +/* SQ_TEX_VTX_INVALID_BUFFER = 0x01, */ +/* SQ_TEX_VTX_VALID_TEXTURE = 0x02, */ +/* SQ_TEX_VTX_VALID_BUFFER = 0x03, */ + SQ_TEX_SAMPLER_WORD0_0 = 0x0003c000, + SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x07 << 0, + SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0, + SQ_TEX_WRAP = 0x00, + SQ_TEX_MIRROR = 0x01, + SQ_TEX_CLAMP_LAST_TEXEL = 0x02, + SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, + SQ_TEX_CLAMP_HALF_BORDER = 0x04, + SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, + SQ_TEX_CLAMP_BORDER = 0x06, + SQ_TEX_MIRROR_ONCE_BORDER = 0x07, + CLAMP_Y_mask = 0x07 << 3, + CLAMP_Y_shift = 3, +/* SQ_TEX_WRAP = 0x00, */ +/* SQ_TEX_MIRROR = 0x01, */ +/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ +/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ +/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ +/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ +/* SQ_TEX_CLAMP_BORDER = 0x06, */ +/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ + CLAMP_Z_mask = 0x07 << 6, + CLAMP_Z_shift = 6, +/* SQ_TEX_WRAP = 0x00, */ +/* SQ_TEX_MIRROR = 0x01, */ +/* SQ_TEX_CLAMP_LAST_TEXEL = 0x02, */ +/* SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x03, */ +/* SQ_TEX_CLAMP_HALF_BORDER = 0x04, */ +/* SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x05, */ +/* SQ_TEX_CLAMP_BORDER = 0x06, */ +/* SQ_TEX_MIRROR_ONCE_BORDER = 0x07, */ + XY_MAG_FILTER_mask = 0x07 << 9, + XY_MAG_FILTER_shift = 9, + SQ_TEX_XY_FILTER_POINT = 0x00, + SQ_TEX_XY_FILTER_BILINEAR = 0x01, + SQ_TEX_XY_FILTER_BICUBIC = 0x02, + XY_MIN_FILTER_mask = 0x07 << 12, + XY_MIN_FILTER_shift = 12, +/* SQ_TEX_XY_FILTER_POINT = 0x00, */ +/* SQ_TEX_XY_FILTER_BILINEAR = 0x01, */ +/* SQ_TEX_XY_FILTER_BICUBIC = 0x02, */ + Z_FILTER_mask = 0x03 << 15, + Z_FILTER_shift = 15, + SQ_TEX_Z_FILTER_NONE = 0x00, + SQ_TEX_Z_FILTER_POINT = 0x01, + SQ_TEX_Z_FILTER_LINEAR = 0x02, + MIP_FILTER_mask = 0x03 << 17, + MIP_FILTER_shift = 17, +/* SQ_TEX_Z_FILTER_NONE = 0x00, */ +/* SQ_TEX_Z_FILTER_POINT = 0x01, */ +/* SQ_TEX_Z_FILTER_LINEAR = 0x02, */ + BORDER_COLOR_TYPE_mask = 0x03 << 22, + BORDER_COLOR_TYPE_shift = 22, + SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00, + SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x01, + SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x02, + SQ_TEX_BORDER_COLOR_REGISTER = 0x03, + POINT_SAMPLING_CLAMP_bit = 1 << 24, + TEX_ARRAY_OVERRIDE_bit = 1 << 25, + DEPTH_COMPARE_FUNCTION_mask = 0x07 << 26, + DEPTH_COMPARE_FUNCTION_shift = 26, + SQ_TEX_DEPTH_COMPARE_NEVER = 0x00, + SQ_TEX_DEPTH_COMPARE_LESS = 0x01, + SQ_TEX_DEPTH_COMPARE_EQUAL = 0x02, + SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x03, + SQ_TEX_DEPTH_COMPARE_GREATER = 0x04, + SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x05, + SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x06, + SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x07, + CHROMA_KEY_mask = 0x03 << 29, + CHROMA_KEY_shift = 29, + SQ_TEX_CHROMA_KEY_DISABLED = 0x00, + SQ_TEX_CHROMA_KEY_KILL = 0x01, + SQ_TEX_CHROMA_KEY_BLEND = 0x02, + LOD_USES_MINOR_AXIS_bit = 1 << 31, + SQ_TEX_SAMPLER_WORD1_0 = 0x0003c004, + MIN_LOD_mask = 0x3ff << 0, + MIN_LOD_shift = 0, + MAX_LOD_mask = 0x3ff << 10, + MAX_LOD_shift = 10, + SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_mask = 0xfff << 20, + SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift = 20, + SQ_TEX_SAMPLER_WORD2_0 = 0x0003c008, + LOD_BIAS_SEC_mask = 0xfff << 0, + LOD_BIAS_SEC_shift = 0, + MC_COORD_TRUNCATE_bit = 1 << 12, + SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 13, + HIGH_PRECISION_FILTER_bit = 1 << 14, + PERF_MIP_mask = 0x07 << 15, + PERF_MIP_shift = 15, + PERF_Z_mask = 0x03 << 18, + PERF_Z_shift = 18, + FETCH_4_bit = 1 << 26, + SAMPLE_IS_PCF_bit = 1 << 27, + SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31, + SQ_VTX_BASE_VTX_LOC = 0x0003cff0, + SQ_VTX_START_INST_LOC = 0x0003cff4, + SQ_LOOP_CONST_DX10_0 = 0x0003e200, + SQ_LOOP_CONST_0 = 0x0003e200, + SQ_LOOP_CONST_0__COUNT_mask = 0xfff << 0, + SQ_LOOP_CONST_0__COUNT_shift = 0, + INIT_mask = 0xfff << 12, + INIT_shift = 12, + INC_mask = 0xff << 24, + INC_shift = 24, + SQ_BOOL_CONST_0 = 0x0003e380, + SQ_BOOL_CONST_0_num = 3, + +} ; + +#endif /* _AUTOREGS */ + diff --git a/src/r600_reg_r6xx.h b/src/r600_reg_r6xx.h new file mode 100644 index 00000000..2e7dfa94 --- /dev/null +++ b/src/r600_reg_r6xx.h @@ -0,0 +1,494 @@ +/* + * RadeonHD R6xx, R7xx Register documentation + * + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _R600_REG_R6xx_H_ +#define _R600_REG_R6xx_H_ + +/* + * Registers for R6xx chips that are not documented yet + */ + +enum { + + MM_INDEX = 0x0000, + MM_DATA = 0x0004, + + SRBM_STATUS = 0x0e50, + RLC_RQ_PENDING_bit = 1 << 3, + RCU_RQ_PENDING_bit = 1 << 4, + GRBM_RQ_PENDING_bit = 1 << 5, + HI_RQ_PENDING_bit = 1 << 6, + IO_EXTERN_SIGNAL_bit = 1 << 7, + VMC_BUSY_bit = 1 << 8, + MCB_BUSY_bit = 1 << 9, + MCDZ_BUSY_bit = 1 << 10, + MCDY_BUSY_bit = 1 << 11, + MCDX_BUSY_bit = 1 << 12, + MCDW_BUSY_bit = 1 << 13, + SEM_BUSY_bit = 1 << 14, + SRBM_STATUS__RLC_BUSY_bit = 1 << 15, + PDMA_BUSY_bit = 1 << 16, + IH_BUSY_bit = 1 << 17, + CSC_BUSY_bit = 1 << 20, + CMC7_BUSY_bit = 1 << 21, + CMC6_BUSY_bit = 1 << 22, + CMC5_BUSY_bit = 1 << 23, + CMC4_BUSY_bit = 1 << 24, + CMC3_BUSY_bit = 1 << 25, + CMC2_BUSY_bit = 1 << 26, + CMC1_BUSY_bit = 1 << 27, + CMC0_BUSY_bit = 1 << 28, + BIF_BUSY_bit = 1 << 29, + IDCT_BUSY_bit = 1 << 30, + + SRBM_READ_ERROR = 0x0e98, + READ_ADDRESS_mask = 0xffff << 2, + READ_ADDRESS_shift = 2, + READ_REQUESTER_HI_bit = 1 << 24, + READ_REQUESTER_GRBM_bit = 1 << 25, + READ_REQUESTER_RCU_bit = 1 << 26, + READ_REQUESTER_RLC_bit = 1 << 27, + READ_ERROR_bit = 1 << 31, + + SRBM_INT_STATUS = 0x0ea4, + RDERR_INT_STAT_bit = 1 << 0, + GFX_CNTX_SWITCH_INT_STAT_bit = 1 << 1, + SRBM_INT_ACK = 0x0ea8, + RDERR_INT_ACK_bit = 1 << 0, + GFX_CNTX_SWITCH_INT_ACK_bit = 1 << 1, + + R6XX_MC_VM_FB_LOCATION = 0x2180, + + VENDOR_DEVICE_ID = 0x4000, + + HDP_MEM_COHERENCY_FLUSH_CNTL = 0x5480, + + D1GRPH_PRIMARY_SURFACE_ADDRESS = 0x6110, + D1GRPH_PITCH = 0x6120, + D1GRPH_Y_END = 0x6138, + + GRBM_STATUS = 0x8010, + CMDFIFO_AVAIL_mask = 0x1f << 0, + CMDFIFO_AVAIL_shift = 0, + SRBM_RQ_PENDING_bit = 1 << 5, + CP_RQ_PENDING_bit = 1 << 6, + CF_RQ_PENDING_bit = 1 << 7, + PF_RQ_PENDING_bit = 1 << 8, + GRBM_EE_BUSY_bit = 1 << 10, + GRBM_STATUS__VC_BUSY_bit = 1 << 11, + DB03_CLEAN_bit = 1 << 12, + CB03_CLEAN_bit = 1 << 13, + VGT_BUSY_NO_DMA_bit = 1 << 16, + GRBM_STATUS__VGT_BUSY_bit = 1 << 17, + TA03_BUSY_bit = 1 << 18, + GRBM_STATUS__TC_BUSY_bit = 1 << 19, + SX_BUSY_bit = 1 << 20, + SH_BUSY_bit = 1 << 21, + SPI03_BUSY_bit = 1 << 22, + SMX_BUSY_bit = 1 << 23, + SC_BUSY_bit = 1 << 24, + PA_BUSY_bit = 1 << 25, + DB03_BUSY_bit = 1 << 26, + CR_BUSY_bit = 1 << 27, + CP_COHERENCY_BUSY_bit = 1 << 28, + GRBM_STATUS__CP_BUSY_bit = 1 << 29, + CB03_BUSY_bit = 1 << 30, + GUI_ACTIVE_bit = 1 << 31, + GRBM_STATUS2 = 0x8014, + CR_CLEAN_bit = 1 << 0, + SMX_CLEAN_bit = 1 << 1, + SPI0_BUSY_bit = 1 << 8, + SPI1_BUSY_bit = 1 << 9, + SPI2_BUSY_bit = 1 << 10, + SPI3_BUSY_bit = 1 << 11, + TA0_BUSY_bit = 1 << 12, + TA1_BUSY_bit = 1 << 13, + TA2_BUSY_bit = 1 << 14, + TA3_BUSY_bit = 1 << 15, + DB0_BUSY_bit = 1 << 16, + DB1_BUSY_bit = 1 << 17, + DB2_BUSY_bit = 1 << 18, + DB3_BUSY_bit = 1 << 19, + CB0_BUSY_bit = 1 << 20, + CB1_BUSY_bit = 1 << 21, + CB2_BUSY_bit = 1 << 22, + CB3_BUSY_bit = 1 << 23, + GRBM_SOFT_RESET = 0x8020, + SOFT_RESET_CP_bit = 1 << 0, + SOFT_RESET_CB_bit = 1 << 1, + SOFT_RESET_CR_bit = 1 << 2, + SOFT_RESET_DB_bit = 1 << 3, + SOFT_RESET_PA_bit = 1 << 5, + SOFT_RESET_SC_bit = 1 << 6, + SOFT_RESET_SMX_bit = 1 << 7, + SOFT_RESET_SPI_bit = 1 << 8, + SOFT_RESET_SH_bit = 1 << 9, + SOFT_RESET_SX_bit = 1 << 10, + SOFT_RESET_TC_bit = 1 << 11, + SOFT_RESET_TA_bit = 1 << 12, + SOFT_RESET_VC_bit = 1 << 13, + SOFT_RESET_VGT_bit = 1 << 14, + SOFT_RESET_GRBM_GCA_bit = 1 << 15, + + WAIT_UNTIL = 0x8040, + WAIT_CP_DMA_IDLE_bit = 1 << 8, + WAIT_CMDFIFO_bit = 1 << 10, + WAIT_2D_IDLE_bit = 1 << 14, + WAIT_3D_IDLE_bit = 1 << 15, + WAIT_2D_IDLECLEAN_bit = 1 << 16, + WAIT_3D_IDLECLEAN_bit = 1 << 17, + WAIT_EXTERN_SIG_bit = 1 << 19, + CMDFIFO_ENTRIES_mask = 0x1f << 20, + CMDFIFO_ENTRIES_shift = 20, + + GRBM_READ_ERROR = 0x8058, +/* READ_ADDRESS_mask = 0xffff << 2, */ +/* READ_ADDRESS_shift = 2, */ + READ_REQUESTER_SRBM_bit = 1 << 28, + READ_REQUESTER_CP_bit = 1 << 29, + READ_REQUESTER_WU_POLL_bit = 1 << 30, +/* READ_ERROR_bit = 1 << 31, */ + + SCRATCH_REG0 = 0x8500, + SCRATCH_REG1 = 0x8504, + SCRATCH_REG2 = 0x8508, + SCRATCH_REG3 = 0x850c, + SCRATCH_REG4 = 0x8510, + SCRATCH_REG5 = 0x8514, + SCRATCH_REG6 = 0x8518, + SCRATCH_REG7 = 0x851c, + SCRATCH_UMSK = 0x8540, + SCRATCH_ADDR = 0x8544, + + CP_COHER_CNTL = 0x85f0, + DEST_BASE_0_ENA_bit = 1 << 0, + DEST_BASE_1_ENA_bit = 1 << 1, + SO0_DEST_BASE_ENA_bit = 1 << 2, + SO1_DEST_BASE_ENA_bit = 1 << 3, + SO2_DEST_BASE_ENA_bit = 1 << 4, + SO3_DEST_BASE_ENA_bit = 1 << 5, + CB0_DEST_BASE_ENA_bit = 1 << 6, + CB1_DEST_BASE_ENA_bit = 1 << 7, + CB2_DEST_BASE_ENA_bit = 1 << 8, + CB3_DEST_BASE_ENA_bit = 1 << 9, + CB4_DEST_BASE_ENA_bit = 1 << 10, + CB5_DEST_BASE_ENA_bit = 1 << 11, + CB6_DEST_BASE_ENA_bit = 1 << 12, + CB7_DEST_BASE_ENA_bit = 1 << 13, + DB_DEST_BASE_ENA_bit = 1 << 14, + CR_DEST_BASE_ENA_bit = 1 << 15, + TC_ACTION_ENA_bit = 1 << 23, + VC_ACTION_ENA_bit = 1 << 24, + CB_ACTION_ENA_bit = 1 << 25, + DB_ACTION_ENA_bit = 1 << 26, + SH_ACTION_ENA_bit = 1 << 27, + SMX_ACTION_ENA_bit = 1 << 28, + CR0_ACTION_ENA_bit = 1 << 29, + CR1_ACTION_ENA_bit = 1 << 30, + CR2_ACTION_ENA_bit = 1 << 31, + CP_COHER_SIZE = 0x85f4, + CP_COHER_BASE = 0x85f8, + CP_COHER_STATUS = 0x85fc, + MATCHING_GFX_CNTX_mask = 0xff << 0, + MATCHING_GFX_CNTX_shift = 0, + MATCHING_CR_CNTX_mask = 0xffff << 8, + MATCHING_CR_CNTX_shift = 8, + STATUS_bit = 1 << 31, + + CP_STALLED_STAT1 = 0x8674, + RBIU_TO_DMA_NOT_RDY_TO_RCV_bit = 1 << 0, + RBIU_TO_IBS_NOT_RDY_TO_RCV_bit = 1 << 1, + RBIU_TO_SEM_NOT_RDY_TO_RCV_bit = 1 << 2, + RBIU_TO_2DREGS_NOT_RDY_TO_RCV_bit = 1 << 3, + RBIU_TO_MEMWR_NOT_RDY_TO_RCV_bit = 1 << 4, + RBIU_TO_MEMRD_NOT_RDY_TO_RCV_bit = 1 << 5, + RBIU_TO_EOPD_NOT_RDY_TO_RCV_bit = 1 << 6, + RBIU_TO_RECT_NOT_RDY_TO_RCV_bit = 1 << 7, + RBIU_TO_STRMO_NOT_RDY_TO_RCV_bit = 1 << 8, + RBIU_TO_PSTAT_NOT_RDY_TO_RCV_bit = 1 << 9, + MIU_WAITING_ON_RDREQ_FREE_bit = 1 << 16, + MIU_WAITING_ON_WRREQ_FREE_bit = 1 << 17, + MIU_NEEDS_AVAIL_WRREQ_PHASE_bit = 1 << 18, + RCIU_WAITING_ON_GRBM_FREE_bit = 1 << 24, + RCIU_WAITING_ON_VGT_FREE_bit = 1 << 25, + RCIU_STALLED_ON_ME_READ_bit = 1 << 26, + RCIU_STALLED_ON_DMA_READ_bit = 1 << 27, + RCIU_HALTED_BY_REG_VIOLATION_bit = 1 << 28, + CP_STALLED_STAT2 = 0x8678, + PFP_TO_CSF_NOT_RDY_TO_RCV_bit = 1 << 0, + PFP_TO_MEQ_NOT_RDY_TO_RCV_bit = 1 << 1, + PFP_TO_VGT_NOT_RDY_TO_RCV_bit = 1 << 2, + PFP_HALTED_BY_INSTR_VIOLATION_bit = 1 << 3, + MULTIPASS_IB_PENDING_IN_PFP_bit = 1 << 4, + ME_BRUSH_WC_NOT_RDY_TO_RCV_bit = 1 << 8, + ME_STALLED_ON_BRUSH_LOGIC_bit = 1 << 9, + CR_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 10, + GFX_CNTX_NOT_AVAIL_TO_ME_bit = 1 << 11, + ME_RCIU_NOT_RDY_TO_RCV_bit = 1 << 12, + ME_TO_CONST_NOT_RDY_TO_RCV_bit = 1 << 13, + ME_WAITING_DATA_FROM_PFP_bit = 1 << 14, + ME_WAITING_ON_PARTIAL_FLUSH_bit = 1 << 15, + RECT_FIFO_NEEDS_CR_RECT_DONE_bit = 1 << 16, + RECT_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 17, + EOPD_FIFO_NEEDS_SC_EOP_DONE_bit = 1 << 18, + EOPD_FIFO_NEEDS_SMX_EOP_DONE_bit = 1 << 19, + EOPD_FIFO_NEEDS_WR_CONFIRM_bit = 1 << 20, + EOPD_FIFO_NEEDS_SIGNAL_SEM_bit = 1 << 21, + SO_NUMPRIM_FIFO_NEEDS_SOADDR_bit = 1 << 22, + SO_NUMPRIM_FIFO_NEEDS_NUMPRIM_bit = 1 << 23, + PIPE_STATS_FIFO_NEEDS_SAMPLE_bit = 1 << 24, + SURF_SYNC_NEEDS_IDLE_CNTXS_bit = 1 << 30, + SURF_SYNC_NEEDS_ALL_CLEAN_bit = 1 << 31, + CP_BUSY_STAT = 0x867c, + REG_BUS_FIFO_BUSY_bit = 1 << 0, + RING_FETCHING_DATA_bit = 1 << 1, + INDR1_FETCHING_DATA_bit = 1 << 2, + INDR2_FETCHING_DATA_bit = 1 << 3, + STATE_FETCHING_DATA_bit = 1 << 4, + PRED_FETCHING_DATA_bit = 1 << 5, + COHER_CNTR_NEQ_ZERO_bit = 1 << 6, + PFP_PARSING_PACKETS_bit = 1 << 7, + ME_PARSING_PACKETS_bit = 1 << 8, + RCIU_PFP_BUSY_bit = 1 << 9, + RCIU_ME_BUSY_bit = 1 << 10, + OUTSTANDING_READ_TAGS_bit = 1 << 11, + SEM_CMDFIFO_NOT_EMPTY_bit = 1 << 12, + SEM_FAILED_AND_HOLDING_bit = 1 << 13, + SEM_POLLING_FOR_PASS_bit = 1 << 14, + _3D_BUSY_bit = 1 << 15, + _2D_BUSY_bit = 1 << 16, + CP_STAT = 0x8680, + CSF_RING_BUSY_bit = 1 << 0, + CSF_WPTR_POLL_BUSY_bit = 1 << 1, + CSF_INDIRECT1_BUSY_bit = 1 << 2, + CSF_INDIRECT2_BUSY_bit = 1 << 3, + CSF_STATE_BUSY_bit = 1 << 4, + CSF_PREDICATE_BUSY_bit = 1 << 5, + CSF_BUSY_bit = 1 << 6, + MIU_RDREQ_BUSY_bit = 1 << 7, + MIU_WRREQ_BUSY_bit = 1 << 8, + ROQ_RING_BUSY_bit = 1 << 9, + ROQ_INDIRECT1_BUSY_bit = 1 << 10, + ROQ_INDIRECT2_BUSY_bit = 1 << 11, + ROQ_STATE_BUSY_bit = 1 << 12, + ROQ_PREDICATE_BUSY_bit = 1 << 13, + ROQ_ALIGN_BUSY_bit = 1 << 14, + PFP_BUSY_bit = 1 << 15, + MEQ_BUSY_bit = 1 << 16, + ME_BUSY_bit = 1 << 17, + QUERY_BUSY_bit = 1 << 18, + SEMAPHORE_BUSY_bit = 1 << 19, + INTERRUPT_BUSY_bit = 1 << 20, + SURFACE_SYNC_BUSY_bit = 1 << 21, + DMA_BUSY_bit = 1 << 22, + RCIU_BUSY_bit = 1 << 23, + CP_STAT__CP_BUSY_bit = 1 << 31, + + CP_ME_CNTL = 0x86d8, + ME_STATMUX_mask = 0xff << 0, + ME_STATMUX_shift = 0, + ME_HALT_bit = 1 << 28, + CP_ME_STATUS = 0x86dc, + + CP_RB_RPTR = 0x8700, + RB_RPTR_mask = 0xfffff << 0, + RB_RPTR_shift = 0, + CP_RB_WPTR_DELAY = 0x8704, + PRE_WRITE_TIMER_mask = 0xfffffff << 0, + PRE_WRITE_TIMER_shift = 0, + PRE_WRITE_LIMIT_mask = 0x0f << 28, + PRE_WRITE_LIMIT_shift = 28, + + CP_ROQ_RB_STAT = 0x8780, + ROQ_RPTR_PRIMARY_mask = 0x3ff << 0, + ROQ_RPTR_PRIMARY_shift = 0, + ROQ_WPTR_PRIMARY_mask = 0x3ff << 16, + ROQ_WPTR_PRIMARY_shift = 16, + CP_ROQ_IB1_STAT = 0x8784, + ROQ_RPTR_INDIRECT1_mask = 0x3ff << 0, + ROQ_RPTR_INDIRECT1_shift = 0, + ROQ_WPTR_INDIRECT1_mask = 0x3ff << 16, + ROQ_WPTR_INDIRECT1_shift = 16, + CP_ROQ_IB2_STAT = 0x8788, + ROQ_RPTR_INDIRECT2_mask = 0x3ff << 0, + ROQ_RPTR_INDIRECT2_shift = 0, + ROQ_WPTR_INDIRECT2_mask = 0x3ff << 16, + ROQ_WPTR_INDIRECT2_shift = 16, + + CP_MEQ_STAT = 0x8794, + MEQ_RPTR_mask = 0x3ff << 0, + MEQ_RPTR_shift = 0, + MEQ_WPTR_mask = 0x3ff << 16, + MEQ_WPTR_shift = 16, + + CC_GC_SHADER_PIPE_CONFIG = 0x8950, + INACTIVE_QD_PIPES_mask = 0xff << 8, + INACTIVE_QD_PIPES_shift = 8, + R6XX_MAX_QD_PIPES = 8, + INACTIVE_SIMDS_mask = 0xff << 16, + INACTIVE_SIMDS_shift = 16, + R6XX_MAX_SIMDS = 8, + GC_USER_SHADER_PIPE_CONFIG = 0x8954, + + VC_ENHANCE = 0x9714, + DB_DEBUG = 0x9830, + PREZ_MUST_WAIT_FOR_POSTZ_DONE = 1 << 31, + + DB_WATERMARKS = 0x00009838, + DEPTH_FREE_mask = 0x1f << 0, + DEPTH_FREE_shift = 0, + DEPTH_FLUSH_mask = 0x3f << 5, + DEPTH_FLUSH_shift = 5, + FORCE_SUMMARIZE_mask = 0x0f << 11, + FORCE_SUMMARIZE_shift = 11, + DEPTH_PENDING_FREE_mask = 0x1f << 15, + DEPTH_PENDING_FREE_shift = 15, + DEPTH_CACHELINE_FREE_mask = 0x1f << 20, + DEPTH_CACHELINE_FREE_shift = 20, + EARLY_Z_PANIC_DISABLE_bit = 1 << 25, + LATE_Z_PANIC_DISABLE_bit = 1 << 26, + RE_Z_PANIC_DISABLE_bit = 1 << 27, + DB_EXTRA_DEBUG_mask = 0x0f << 28, + DB_EXTRA_DEBUG_shift = 28, + + CP_RB_BASE = 0xc100, + CP_RB_CNTL = 0xc104, + RB_BUFSZ_mask = 0x3f << 0, + CP_RB_WPTR = 0xc114, + RB_WPTR_mask = 0xfffff << 0, + RB_WPTR_shift = 0, + CP_RB_RPTR_WR = 0xc108, + RB_RPTR_WR_mask = 0xfffff << 0, + RB_RPTR_WR_shift = 0, + + CP_INT_STATUS = 0xc128, + DISABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 0, + ENABLE_CNTX_SWITCH_INT_STAT_bit = 1 << 1, + SEM_SIGNAL_INT_STAT_bit = 1 << 18, + CNTX_BUSY_INT_STAT_bit = 1 << 19, + CNTX_EMPTY_INT_STAT_bit = 1 << 20, + WAITMEM_SEM_INT_STAT_bit = 1 << 21, + PRIV_INSTR_INT_STAT_bit = 1 << 22, + PRIV_REG_INT_STAT_bit = 1 << 23, + OPCODE_ERROR_INT_STAT_bit = 1 << 24, + SCRATCH_INT_STAT_bit = 1 << 25, + TIME_STAMP_INT_STAT_bit = 1 << 26, + RESERVED_BIT_ERROR_INT_STAT_bit = 1 << 27, + DMA_INT_STAT_bit = 1 << 28, + IB2_INT_STAT_bit = 1 << 29, + IB1_INT_STAT_bit = 1 << 30, + RB_INT_STAT_bit = 1 << 31, + +// SX_ALPHA_TEST_CONTROL = 0x00028410, + ALPHA_FUNC__REF_NEVER = 0, + ALPHA_FUNC__REF_ALWAYS = 7, +// DB_SHADER_CONTROL = 0x0002880c, + Z_ORDER__EARLY_Z_THEN_LATE_Z = 2, +// PA_SU_SC_MODE_CNTL = 0x00028814, +// POLY_MODE_mask = 0x03 << 3, + POLY_MODE__TRIANGLES = 0, POLY_MODE__DUAL_MODE, +// POLYMODE_FRONT_PTYPE_mask = 0x07 << 5, + POLYMODE_PTYPE__POINTS = 0, POLYMODE_PTYPE__LINES, POLYMODE_PTYPE__TRIANGLES, + PA_SC_AA_SAMPLE_LOCS_8S_WD1_M = 0x00028c20, + DB_SRESULTS_COMPARE_STATE0 = 0x00028d28, /* See autoregs: DB_SRESULTS_COMPARE_STATE1 */ +// DB_SRESULTS_COMPARE_STATE1 = 0x00028d2c, + DB_ALPHA_TO_MASK = 0x00028d44, + ALPHA_TO_MASK_ENABLE = 1 << 0, + ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET0_shift = 8, + ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET1_shift = 10, + ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET2_shift = 12, + ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET3_shift = 14, + +// SQ_VTX_CONSTANT_WORD2_0 = 0x00038008, +// SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask = 0x3f << 20, + FMT_INVALID=0, FMT_8, FMT_4_4, FMT_3_3_2, + FMT_16=5, FMT_16_FLOAT, FMT_8_8, + FMT_5_6_5, FMT_6_5_5, FMT_1_5_5_5, FMT_4_4_4_4, + FMT_5_5_5_1, FMT_32, FMT_32_FLOAT, FMT_16_16, + FMT_16_16_FLOAT=16, FMT_8_24, FMT_8_24_FLOAT, FMT_24_8, + FMT_24_8_FLOAT, FMT_10_11_11, FMT_10_11_11_FLOAT, FMT_11_11_10, + FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8, FMT_10_10_10_2, + FMT_X24_8_32_FLOAT, FMT_32_32, FMT_32_32_FLOAT, FMT_16_16_16_16, + FMT_16_16_16_16_FLOAT=32, FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT, + FMT_1 = 37, FMT_GB_GR=39, + FMT_BG_RG, FMT_32_AS_8, FMT_32_AS_8_8, FMT_5_9_9_9_SHAREDEXP, + FMT_8_8_8, FMT_16_16_16, FMT_16_16_16_FLOAT, FMT_32_32_32, + FMT_32_32_32_FLOAT=48, + +// High level register file lengths + SQ_ALU_CONSTANT = SQ_ALU_CONSTANT0_0, /* 256 PS, 256 VS */ + SQ_ALU_CONSTANT_ps_num = 256, + SQ_ALU_CONSTANT_vs_num = 256, + SQ_ALU_CONSTANT_all_num = 512, + SQ_ALU_CONSTANT_offset = 16, + SQ_ALU_CONSTANT_ps = 0, + SQ_ALU_CONSTANT_vs = SQ_ALU_CONSTANT_ps + SQ_ALU_CONSTANT_ps_num, + SQ_TEX_RESOURCE = SQ_TEX_RESOURCE_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */ + SQ_TEX_RESOURCE_ps_num = 160, + SQ_TEX_RESOURCE_vs_num = 160, + SQ_TEX_RESOURCE_fs_num = 16, + SQ_TEX_RESOURCE_gs_num = 160, + SQ_TEX_RESOURCE_all_num = 496, + SQ_TEX_RESOURCE_offset = 28, + SQ_TEX_RESOURCE_ps = 0, + SQ_TEX_RESOURCE_vs = SQ_TEX_RESOURCE_ps + SQ_TEX_RESOURCE_ps_num, + SQ_TEX_RESOURCE_fs = SQ_TEX_RESOURCE_vs + SQ_TEX_RESOURCE_vs_num, + SQ_TEX_RESOURCE_gs = SQ_TEX_RESOURCE_fs + SQ_TEX_RESOURCE_fs_num, + SQ_VTX_RESOURCE = SQ_VTX_CONSTANT_WORD0_0, /* 160 PS, 160 VS, 16 FS, 160 GS */ + SQ_VTX_RESOURCE_ps_num = 160, + SQ_VTX_RESOURCE_vs_num = 160, + SQ_VTX_RESOURCE_fs_num = 16, + SQ_VTX_RESOURCE_gs_num = 160, + SQ_VTX_RESOURCE_all_num = 496, + SQ_VTX_RESOURCE_offset = 28, + SQ_VTX_RESOURCE_ps = 0, + SQ_VTX_RESOURCE_vs = SQ_VTX_RESOURCE_ps + SQ_VTX_RESOURCE_ps_num, + SQ_VTX_RESOURCE_fs = SQ_VTX_RESOURCE_vs + SQ_VTX_RESOURCE_vs_num, + SQ_VTX_RESOURCE_gs = SQ_VTX_RESOURCE_fs + SQ_VTX_RESOURCE_fs_num, + SQ_TEX_SAMPLER_WORD = SQ_TEX_SAMPLER_WORD0_0, /* 18 per PS, VS, GS */ + SQ_TEX_SAMPLER_WORD_ps_num = 18, + SQ_TEX_SAMPLER_WORD_vs_num = 18, + SQ_TEX_SAMPLER_WORD_gs_num = 18, + SQ_TEX_SAMPLER_WORD_all_num = 54, + SQ_TEX_SAMPLER_WORD_offset = 12, + SQ_TEX_SAMPLER_WORD_ps = 0, + SQ_TEX_SAMPLER_WORD_vs = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num, + SQ_TEX_SAMPLER_WORD_gs = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num, + SQ_LOOP_CONST = SQ_LOOP_CONST_0, /* 32 per PS, VS, GS */ + SQ_LOOP_CONST_ps_num = 32, + SQ_LOOP_CONST_vs_num = 32, + SQ_LOOP_CONST_gs_num = 32, + SQ_LOOP_CONST_all_num = 96, + SQ_LOOP_CONST_offset = 4, + SQ_LOOP_CONST_ps = 0, + SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num, + SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num, +} ; + + +#endif diff --git a/src/r600_reg_r7xx.h b/src/r600_reg_r7xx.h new file mode 100644 index 00000000..e5c01c86 --- /dev/null +++ b/src/r600_reg_r7xx.h @@ -0,0 +1,149 @@ +/* + * RadeonHD R6xx, R7xx Register documentation + * + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _R600_REG_R7xx_H_ +#define _R600_REG_R7xx_H_ + +/* + * Register update for R7xx chips + */ + +enum { + + R7XX_MC_VM_FB_LOCATION = 0x00002024, + +// GRBM_STATUS = 0x00008010, + R7XX_TA_BUSY_bit = 1 << 14, + + R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x00008d8c, + RING0_OFFSET_mask = 0xff << 0, + RING0_OFFSET_shift = 0, + ISOLATE_ES_ENABLE_bit = 1 << 12, + ISOLATE_GS_ENABLE_bit = 1 << 13, + VS_PC_LIMIT_ENABLE_bit = 1 << 14, + +// SQ_ALU_WORD0 = 0x00008dfc, +// SRC0_SEL_mask = 0x1ff << 0, +// SRC1_SEL_mask = 0x1ff << 13, + R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4, + R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5, + R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6, + R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7, +// INDEX_MODE_mask = 0x07 << 26, + R7xx_SQ_INDEX_GLOBAL = 0x05, + R7xx_SQ_INDEX_GLOBAL_AR_X = 0x06, + R6xx_SQ_ALU_WORD1_OP2 = 0x00008dfc, + R7xx_SQ_ALU_WORD1_OP2_V2 = 0x00008dfc, + R6xx_FOG_MERGE_bit = 1 << 5, + R6xx_OMOD_mask = 0x03 << 6, + R7xx_OMOD_mask = 0x03 << 5, + R6xx_OMOD_shift = 6, + R7xx_OMOD_shift = 5, + R6xx_SQ_ALU_WORD1_OP2__ALU_INST_mask = 0x3ff << 8, + R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_mask = 0x7ff << 7, + R6xx_SQ_ALU_WORD1_OP2__ALU_INST_shift = 8, + R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_shift = 7, + R7xx_SQ_OP2_INST_FREXP_64 = 0x07, + R7xx_SQ_OP2_INST_ADD_64 = 0x17, + R7xx_SQ_OP2_INST_MUL_64 = 0x1b, + R7xx_SQ_OP2_INST_FLT64_TO_FLT32 = 0x1c, + R7xx_SQ_OP2_INST_FLT32_TO_FLT64 = 0x1d, + R7xx_SQ_OP2_INST_LDEXP_64 = 0x7a, + R7xx_SQ_OP2_INST_FRACT_64 = 0x7b, + R7xx_SQ_OP2_INST_PRED_SETGT_64 = 0x7c, + R7xx_SQ_OP2_INST_PRED_SETE_64 = 0x7d, + R7xx_SQ_OP2_INST_PRED_SETGE_64 = 0x7e, +// SQ_ALU_WORD1_OP3 = 0x00008dfc, +// SRC2_SEL_mask = 0x1ff << 0, +// R7xx_SQ_ALU_SRC_1_DBL_L = 0xf4, +// R7xx_SQ_ALU_SRC_1_DBL_M = 0xf5, +// R7xx_SQ_ALU_SRC_0_5_DBL_L = 0xf6, +// R7xx_SQ_ALU_SRC_0_5_DBL_M = 0xf7, +// SQ_ALU_WORD1_OP3__ALU_INST_mask = 0x1f << 13, + R7xx_SQ_OP3_INST_MULADD_64 = 0x08, + R7xx_SQ_OP3_INST_MULADD_64_M2 = 0x09, + R7xx_SQ_OP3_INST_MULADD_64_M4 = 0x0a, + R7xx_SQ_OP3_INST_MULADD_64_D2 = 0x0b, +// SQ_CF_ALU_WORD1 = 0x00008dfc, + R6xx_USES_WATERFALL_bit = 1 << 25, + R7xx_SQ_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25, +// SQ_CF_ALLOC_EXPORT_WORD0 = 0x00008dfc, +// ARRAY_BASE_mask = 0x1fff << 0, +// TYPE_mask = 0x03 << 13, +// SQ_EXPORT_PARAM = 0x02, +// X_UNUSED_FOR_SX_EXPORTS = 0x03, +// ELEM_SIZE_mask = 0x03 << 30, +// SQ_CF_ALLOC_EXPORT_WORD1 = 0x00008dfc, +// SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0x7f << 23, + R7xx_SQ_CF_INST_MEM_EXPORT = 0x3a, +// SQ_CF_WORD1 = 0x00008dfc, +// SQ_CF_WORD1__COUNT_mask = 0x07 << 10, + R7xx_COUNT_3_bit = 1 << 19, +// SQ_CF_WORD1__CF_INST_mask = 0x7f << 23, + R7xx_SQ_CF_INST_END_PROGRAM = 0x19, + R7xx_SQ_CF_INST_WAIT_ACK = 0x1a, + R7xx_SQ_CF_INST_TEX_ACK = 0x1b, + R7xx_SQ_CF_INST_VTX_ACK = 0x1c, + R7xx_SQ_CF_INST_VTX_TC_ACK = 0x1d, +// SQ_VTX_WORD0 = 0x00008dfc, +// VTX_INST_mask = 0x1f << 0, + R7xx_SQ_VTX_INST_MEM = 0x02, +// SQ_VTX_WORD2 = 0x00008dfc, + R7xx_SQ_VTX_WORD2__ALT_CONST_bit = 1 << 20, + +// SQ_TEX_WORD0 = 0x00008dfc, +// TEX_INST_mask = 0x1f << 0, + R7xx_X_MEMORY_READ = 0x02, + R7xx_SQ_TEX_INST_KEEP_GRADIENTS = 0x0a, + R7xx_X_FETCH4_LOAD4_INSTRUCTION_FOR_DX10_1 = 0x0f, + R7xx_SQ_TEX_WORD0__ALT_CONST_bit = 1 << 24, + + R7xx_PA_SC_EDGERULE = 0x00028230, + R7xx_SPI_THREAD_GROUPING = 0x000286c8, + PS_GROUPING_mask = 0x1f << 0, + PS_GROUPING_shift = 0, + VS_GROUPING_mask = 0x1f << 8, + VS_GROUPING_shift = 8, + GS_GROUPING_mask = 0x1f << 16, + GS_GROUPING_shift = 16, + ES_GROUPING_mask = 0x1f << 24, + ES_GROUPING_shift = 24, + R7xx_CB_SHADER_CONTROL = 0x000287a0, + RT0_ENABLE_bit = 1 << 0, + RT1_ENABLE_bit = 1 << 1, + RT2_ENABLE_bit = 1 << 2, + RT3_ENABLE_bit = 1 << 3, + RT4_ENABLE_bit = 1 << 4, + RT5_ENABLE_bit = 1 << 5, + RT6_ENABLE_bit = 1 << 6, + RT7_ENABLE_bit = 1 << 7, +// DB_ALPHA_TO_MASK = 0x00028d44, + R7xx_OFFSET_ROUND_bit = 1 << 16, +// SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c, + R7xx_TRUNCATE_COORD_bit = 1 << 9, + R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10, + +} ; + +#endif /* _R600_REG_R7xx_H_ */ diff --git a/src/r600_shader.h b/src/r600_shader.h new file mode 100644 index 00000000..58f5a528 --- /dev/null +++ b/src/r600_shader.h @@ -0,0 +1,346 @@ +/* + * RadeonHD R6xx, R7xx DRI driver + * + * Copyright (C) 2008-2009 Alexander Deucher + * Copyright (C) 2008-2009 Matthias Hopf + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Shader macros + */ + +#ifndef __SHADER_H__ +#define __SHADER_H__ + + +/* Restrictions of ALU instructions + * order of scalar ops is always x,y,z,w,t(rans), last to be indicated by last==1. + * max of 3 different src GPRs per instr. + * max of 4 different cfile constant components per instr. + * max of 2 (different) constants (any type) for t. + * bank swizzle (see below). + * GPR write stalls read of same register. Auto-replaced by PV/PS, NOP needed if registers are relative to + * different indices (gpr,loop,nothing). + * may use constant registers or constant cache, but not both. + */ + +/* Bank_swizzle: (pp. 297ff) + * Only one of each x,y,z,w GPR component can be loaded per cycle (3 cycles per instr, called 0-2). + * per scalar instruction bank_swizzle can select which cycle each operand comes from. e.g.: + * SRC0 SRC1 SRC2 SWIZZLE cycle0 cycle1 cycle2 + * 1.x 2.x 012 1.x 2.x - + * 3.x 1.y 201 1.y - 3.x + * 2.x 1.y 102 (1.y) (2.x) - + * If data is read in a cycle, multiple scalar instructions can reference it. + * Special case: square() - i.e. same component in src0+src1 doesn't need read port -> ignores swizzle for src1. + * No restrictions for constants or PV/PS. + * t can load multiple components in a single cycle slot, but has to share cycles with xyzw. + * t with single constant may not load GPRs or PV/PS in cycle 0 (carefull with ALU_TRANS_210). + * t with two constants may only load GPRs or PV/PS in cycle 2. + */ + + +/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */ + + +// CF insts +// addr +#define ADDR(x) (x) +// pc +#define POP_COUNT(x) (x) +// const +#define CF_CONST(x) (x) +// cond +#define COND(x) (x) // SQ_COND_* +// count +#define I_COUNT(x) ((x) ? ((x) - 1) : 0) +//r7xx +#define COUNT_3(x) (x) +// call count +#define CALL_COUNT(x) (x) +// eop +#define END_OF_PROGRAM(x) (x) +// vpm +#define VALID_PIXEL_MODE(x) (x) +// cf inst +#define CF_INST(x) (x) // SQ_CF_INST_* + +// wqm +#define WHOLE_QUAD_MODE(x) (x) +// barrier +#define BARRIER(x) (x) +//kb0 +#define KCACHE_BANK0(x) (x) +//kb1 +#define KCACHE_BANK1(x) (x) +// km0/1 +#define KCACHE_MODE0(x) (x) +#define KCACHE_MODE1(x) (x) // SQ_CF_KCACHE_* +// +#define KCACHE_ADDR0(x) (x) +#define KCACHE_ADDR1(x) (x) +// uw +#define USES_WATERFALL(x) (x) + +#define ARRAY_BASE(x) (x) +// export pixel +#define CF_PIXEL_MRT0 0 +#define CF_PIXEL_MRT1 1 +#define CF_PIXEL_MRT2 2 +#define CF_PIXEL_MRT3 3 +#define CF_PIXEL_MRT4 4 +#define CF_PIXEL_MRT5 5 +#define CF_PIXEL_MRT6 6 +#define CF_PIXEL_MRT7 7 +// *_FOG: r6xx only +#define CF_PIXEL_MRT0_FOG 16 +#define CF_PIXEL_MRT1_FOG 17 +#define CF_PIXEL_MRT2_FOG 18 +#define CF_PIXEL_MRT3_FOG 19 +#define CF_PIXEL_MRT4_FOG 20 +#define CF_PIXEL_MRT5_FOG 21 +#define CF_PIXEL_MRT6_FOG 22 +#define CF_PIXEL_MRT7_FOG 23 +#define CF_PIXEL_Z 61 +// export pos +#define CF_POS0 60 +#define CF_POS1 61 +#define CF_POS2 62 +#define CF_POS3 63 +// export param +// 0...31 +#define TYPE(x) (x) // SQ_EXPORT_* +#if 0 +// type export +#define SQ_EXPORT_PIXEL 0 +#define SQ_EXPORT_POS 1 +#define SQ_EXPORT_PARAM 2 +// reserved 3 +// type mem +#define SQ_EXPORT_WRITE 0 +#define SQ_EXPORT_WRITE_IND 1 +#define SQ_EXPORT_WRITE_ACK 2 +#define SQ_EXPORT_WRITE_IND_ACK 3 +#endif + +#define RW_GPR(x) (x) +#define RW_REL(x) (x) +#define ABSOLUTE 0 +#define RELATIVE 1 +#define INDEX_GPR(x) (x) +#define ELEM_SIZE(x) (x ? (x - 1) : 0) +#define COMP_MASK(x) (x) +#define R6xx_ELEM_LOOP(x) (x) +#define BURST_COUNT(x) (x ? (x - 1) : 0) + +// swiz +#define SRC_SEL_X(x) (x) // SQ_SEL_* each +#define SRC_SEL_Y(x) (x) +#define SRC_SEL_Z(x) (x) +#define SRC_SEL_W(x) (x) + +#define CF_DWORD0(addr) (addr) +// R7xx has another entry (COUNT3), but that is only used for adding a bit to count. +// We allow one more bit for count in the argument of the macro on R7xx instead. +// R6xx: [0,7] R7xx: [1,16] +#define CF_DWORD1(pc, cf_const, cond, count, call_count, eop, vpm, cf_inst, wqm, b) \ + (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \ + ((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)) + +#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)) +#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, uw, cf_inst, wqm, b) \ + (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \ + ((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)) + +#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \ + (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \ + ((es) << 30)) +// R7xx apparently doesn't have the ELEM_LOOP entry any more +// We still expose it, but ELEM_LOOP is explicitely R6xx now. +// TODO: is this just forgotten in the docs, or really not available any more? +#define CF_ALLOC_IMP_EXP_DWORD1_BUF(array_size, comp_mask, el, bc, eop, vpm, cf_inst, wqm, b) \ + (((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \ + ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)) +#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, el, bc, eop, vpm, cf_inst, wqm, b) \ + (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \ + ((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \ + ((wqm) << 30) | ((b) << 31)) + +// ALU clause insts +#define SRC0_SEL(x) (x) +#define SRC1_SEL(x) (x) +#define SRC2_SEL(x) (x) +// src[0-2]_sel +// 0-127 GPR +// 128-159 kcache constants bank 0 +// 160-191 kcache constants bank 1 +// 248-255 special SQ_ALU_SRC_* (0, 1, etc.) + +#define SRC0_REL(x) (x) +#define SRC1_REL(x) (x) +#define SRC2_REL(x) (x) +// elem +#define SRC0_ELEM(x) (x) +#define SRC1_ELEM(x) (x) +#define SRC2_ELEM(x) (x) +#define ELEM_X 0 +#define ELEM_Y 1 +#define ELEM_Z 2 +#define ELEM_W 3 +// neg +#define SRC0_NEG(x) (x) +#define SRC1_NEG(x) (x) +#define SRC2_NEG(x) (x) +// im +#define INDEX_MODE(x) (x) // SQ_INDEX_* +// ps +#define PRED_SEL(x) (x) // SQ_PRED_SEL_* +// last +#define LAST(x) (x) +// abs +#define SRC0_ABS(x) (x) +#define SRC1_ABS(x) (x) +// uem +#define UPDATE_EXECUTE_MASK(x) (x) +// up +#define UPDATE_PRED(x) (x) +// wm +#define WRITE_MASK(x) (x) +// fm +#define FOG_MERGE(x) (x) +// omod +#define OMOD(x) (x) // SQ_ALU_OMOD_* +// alu inst +#define ALU_INST(x) (x) // SQ_ALU_INST_* +//bs +#define BANK_SWIZZLE(x) (x) // SQ_ALU_VEC_* +#define DST_GPR(x) (x) +#define DST_REL(x) (x) +#define DST_ELEM(x) (x) +#define CLAMP(x) (x) + +#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \ + (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \ + ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \ + ((im) << 26) | ((ps) << 29) | ((last) << 31)) +// R7xx has alu_inst at a different slot, and no fog merge any more (no fix function fog any more) +#define R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ + (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ + ((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \ + ((dr) << 28) | ((de) << 29) | ((clamp) << 31)) +#define R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ + (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \ + ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \ + ((dr) << 28) | ((de) << 29) | ((clamp) << 31)) +// This is a general chipset macro, but due to selection by chipid typically not usable in static arrays +// Fog is NOT USED on R7xx, even if specified. +#define ALU_DWORD1_OP2(chipfamily, s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \ + ((chipfamily) < CHIP_FAMILY_RV770 ? \ + R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) : \ + R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp)) +#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \ + (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \ + ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \ + ((de) << 29) | ((clamp) << 31)) + +// VTX clause insts +// vxt insts +#define VTX_INST(x) (x) // SQ_VTX_INST_* + +// fetch type +#define FETCH_TYPE(x) (x) // SQ_VTX_FETCH_* + +#define FETCH_WHOLE_QUAD(x) (x) +#define BUFFER_ID(x) (x) +#define SRC_GPR(x) (x) +#define SRC_REL(x) (x) +#define MEGA_FETCH_COUNT(x) ((x) ? ((x) - 1) : 0) + +#define SEMANTIC_ID(x) (x) +#define DST_SEL_X(x) (x) +#define DST_SEL_Y(x) (x) +#define DST_SEL_Z(x) (x) +#define DST_SEL_W(x) (x) +#define USE_CONST_FIELDS(x) (x) +#define DATA_FORMAT(x) (x) +// num format +#define NUM_FORMAT_ALL(x) (x) // SQ_NUM_FORMAT_* +// format comp +#define FORMAT_COMP_ALL(x) (x) // SQ_FORMAT_COMP_* +// sma +#define SRF_MODE_ALL(x) (x) +#define SRF_MODE_ZERO_CLAMP_MINUS_ONE 0 +#define SRF_MODE_NO_ZERO 1 +#define OFFSET(x) (x) +// endian swap +#define ENDIAN_SWAP(x) (x) // SQ_ENDIAN_* +#define CONST_BUF_NO_STRIDE(x) (x) +// mf +#define MEGA_FETCH(x) (x) + +#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \ + (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \ + ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26)) +#define VTX_DWORD1_SEM(semantic_id, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ + (((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)) +#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \ + (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)) +#define VTX_DWORD2(offset, es, cbns, mf) \ + (((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19)) +#define VTX_DWORD_PAD 0x00000000 + +// TEX clause insts +// tex insts +#define TEX_INST(x) (x) // SQ_TEX_INST_* + +#define BC_FRAC_MODE(x) (x) +#define FETCH_WHOLE_QUAD(x) (x) +#define RESOURCE_ID(x) (x) +#define R7xx_ALT_CONST(x) (x) + +#define LOD_BIAS(x) (x) +//ct +#define COORD_TYPE_X(x) (x) +#define COORD_TYPE_Y(x) (x) +#define COORD_TYPE_Z(x) (x) +#define COORD_TYPE_W(x) (x) +#define TEX_UNNORMALIZED 0 +#define TEX_NORMALIZED 1 +#define OFFSET_X(x) (x) +#define OFFSET_Y(x) (x) +#define OFFSET_Z(x) (x) +#define SAMPLER_ID(x) (x) + +// R7xx has an additional parameter ALT_CONST. We always expose it, but ALT_CONST is R7xx only +#define TEX_DWORD0(tex_inst, bfm, fwq, resource_id, src_gpr, sr, ac) \ + (((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \ + ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24)) +#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \ + (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \ + ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)) +#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \ + (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \ + ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)) +#define TEX_DWORD_PAD 0x00000000 + + +#endif diff --git a/src/r600_state.h b/src/r600_state.h new file mode 100644 index 00000000..bf9cdb5b --- /dev/null +++ b/src/r600_state.h @@ -0,0 +1,227 @@ +#ifndef __R600_STATE_H__ +#define __R600_STATE_H__ + +#include "xf86drm.h" + +typedef int bool_t; + +/* seriously ?! @#$%% */ +# define uint32_t CARD32 +# define uint64_t CARD64 + +#define CLEAR(x) memset (&x, 0, sizeof(x)) + +/* Sequencer / thread handling */ +typedef struct { + int ps_prio; + int vs_prio; + int gs_prio; + int es_prio; + int num_ps_gprs; + int num_vs_gprs; + int num_gs_gprs; + int num_es_gprs; + int num_temp_gprs; + int num_ps_threads; + int num_vs_threads; + int num_gs_threads; + int num_es_threads; + int num_ps_stack_entries; + int num_vs_stack_entries; + int num_gs_stack_entries; + int num_es_stack_entries; +} sq_config_t; + +/* Color buffer / render target */ +typedef struct { + int id; + int w; + int h; + uint64_t base; + int format; + int endian; + int array_mode; // tiling + int number_type; + int read_size; + int comp_swap; + int tile_mode; + int blend_clamp; + int clear_color; + int blend_bypass; + int blend_float32; + int simple_float; + int round_mode; + int tile_compact; + int source_format; +} cb_config_t; + +/* Depth buffer */ +typedef struct { + int w; + int h; + uint64_t base; + int format; + int read_size; + int array_mode; // tiling + int tile_surface_en; + int tile_compact; + int zrange_precision; +} db_config_t; + +/* Shader */ +typedef struct { + uint64_t shader_addr; + int num_gprs; + int stack_size; + int dx10_clamp; + int prime_cache_pgm_en; + int prime_cache_on_draw; + int fetch_cache_lines; + int prime_cache_en; + int prime_cache_on_const; + int clamp_consts; + int export_mode; + int uncached_first_inst; +} shader_config_t; + +/* Vertex buffer / vtx resource */ +typedef struct { + int id; + uint64_t vb_addr; + uint32_t vtx_num_entries; + uint32_t vtx_size_dw; + int clamp_x; + int format; + int num_format_all; + int format_comp_all; + int srf_mode_all; + int endian; + int mem_req_size; +} vtx_resource_t; + +/* Texture resource */ +typedef struct { + int id; + int w; + int h; + int pitch; + int depth; + int dim; + int tile_mode; + int tile_type; + int format; + uint64_t base; + uint64_t mip_base; + int format_comp_x; + int format_comp_y; + int format_comp_z; + int format_comp_w; + int num_format_all; + int srf_mode_all; + int force_degamma; + int endian; + int request_size; + int dst_sel_x; + int dst_sel_y; + int dst_sel_z; + int dst_sel_w; + int base_level; + int last_level; + int base_array; + int last_array; + int mpeg_clamp; + int perf_modulation; + int interlaced; +} tex_resource_t; + +/* Texture sampler */ +typedef struct { + int id; + /* Clamping */ + int clamp_x, clamp_y, clamp_z; + int border_color; + /* Filtering */ + int xy_mag_filter, xy_min_filter; + int z_filter; + int mip_filter; + bool_t high_precision_filter; /* ? */ + int perf_mip; /* ? 0-7 */ + int perf_z; /* ? 3 */ + /* LoD selection */ + int min_lod, max_lod; /* 0-0x3ff */ + int lod_bias; /* 0-0xfff (signed?) */ + int lod_bias2; /* ? 0-0xfff (signed?) */ + bool_t lod_uses_minor_axis; /* ? */ + /* Other stuff */ + bool_t point_sampling_clamp; /* ? */ + bool_t tex_array_override; /* ? */ + bool_t mc_coord_truncate; /* ? */ + bool_t force_degamma; /* ? */ + bool_t fetch_4; /* ? */ + bool_t sample_is_pcf; /* ? */ + bool_t type; /* ? */ + int depth_compare; /* only depth textures? */ + int chroma_key; +} tex_sampler_t; + +/* Draw command */ +typedef struct { + uint32_t prim_type; + uint32_t vgt_draw_initiator; + uint32_t index_type; + uint32_t num_instances; + uint32_t num_indices; +} draw_config_t; + +inline void e32(drmBufPtr ib, uint32_t dword); +inline void efloat(drmBufPtr ib, float f); +inline void pack3(drmBufPtr ib, int cmd, unsigned num); +inline void pack0 (drmBufPtr ib, uint32_t reg, int num); +inline void ereg (drmBufPtr ib, uint32_t reg, uint32_t val); +void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib); +void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib); + +uint64_t +upload (ScrnInfoPtr pScrn, void *shader, int size, int offset); +void +wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib); +void +wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib); +void +start_3d(ScrnInfoPtr pScrn, drmBufPtr ib); +void +set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf); +void +cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr); +void +fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf); +void +vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf); +void +ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf); +void +set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf); +void +set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res); +void +set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res); +void +set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s); +void +set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); +void +set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2); +void +set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); +void +set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2); +void +set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2); +void +set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib); +void +draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices); +void +draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf); + +#endif diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c new file mode 100644 index 00000000..b1cd4f1d --- /dev/null +++ b/src/r600_textured_videofuncs.c @@ -0,0 +1,374 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Alex Deucher <alexander.deucher@amd.com> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" + +#include "exa.h" + +#include "radeon.h" +#include "r600_shader.h" +#include "r600_reg.h" +#include "r600_state.h" + +#include "radeon_video.h" + +#include <X11/extensions/Xv.h> +#include "fourcc.h" + +#include "damage.h" + + +void +R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + PixmapPtr pPixmap = pPriv->pPixmap; + BoxPtr pBox = REGION_RECTS(&pPriv->clip); + int nBox = REGION_NUM_RECTS(&pPriv->clip); + int dstxoff, dstyoff; + cb_config_t cb_conf; + tex_resource_t tex_res; + tex_sampler_t tex_samp; + shader_config_t vs_conf, ps_conf; + draw_config_t draw_conf; + vtx_resource_t vtx_res; + int uv_offset; + + static float ps_alu_consts[] = { + 1.0, 0.0, 1.13983, -1.13983/2, // r - c[0] + 1.0, -0.39465, -0.5806, (0.39465+0.5806)/2, // g - c[1] + 1.0, 2.03211, 0.0, -2.03211/2, // b - c[2] + }; + + CLEAR (cb_conf); + CLEAR (tex_res); + CLEAR (tex_samp); + CLEAR (vs_conf); + CLEAR (ps_conf); + CLEAR (draw_conf); + CLEAR (vtx_res); + + accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); + accel_state->src_pitch[0] = pPriv->src_pitch; + + // bad pitch + if (accel_state->src_pitch[0] & 7) + return; + if (accel_state->dst_pitch & 7) + return; + +#ifdef COMPOSITE + dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; + dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; +#else + dstxoff = 0; + dstyoff = 0; +#endif + + accel_state->ib = RADEONCPGetBuffer(pScrn); + + /* Init */ + start_3d(pScrn, accel_state->ib); + + //cp_set_surface_sync(pScrn, accel_state->ib); + + set_default_state(pScrn, accel_state->ib); + + /* Scissor / viewport */ + ereg (accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); + ereg (accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); + + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->xv_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->xv_ps_offset; + + accel_state->vs_size = 512; + accel_state->ps_size = 512; + + /* Shader */ + + /* flush SQ cache */ + cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, + accel_state->vs_size, accel_state->vs_mc_addr); + + vs_conf.shader_addr = accel_state->vs_mc_addr; + vs_conf.num_gprs = 2; + vs_conf.stack_size = 0; + vs_setup (pScrn, accel_state->ib, &vs_conf); + + /* flush SQ cache */ + cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, + accel_state->ps_size, accel_state->ps_mc_addr); + + ps_conf.shader_addr = accel_state->ps_mc_addr; + ps_conf.num_gprs = 4; + ps_conf.stack_size = 0; + ps_conf.uncached_first_inst = 1; + ps_conf.clamp_consts = 0; + ps_conf.export_mode = 2; + ps_setup (pScrn, accel_state->ib, &ps_conf); + + // PS alu constants + set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); + + /* Texture */ + accel_state->src_mc_addr[0] = pPriv->src_offset; + accel_state->src_size[0] = exaGetPixmapPitch(pPixmap) * pPriv->w; + + /* flush texture cache */ + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, 512, + accel_state->src_mc_addr[0]); + + // Y texture + tex_res.id = 0; + tex_res.w = pPriv->w; + tex_res.h = pPriv->h; + tex_res.pitch = accel_state->src_pitch[0]; + tex_res.depth = 0; + tex_res.dim = SQ_TEX_DIM_2D; + tex_res.base = accel_state->src_mc_addr[0]; + tex_res.mip_base = accel_state->src_mc_addr[0]; + + tex_res.format = FMT_8; + tex_res.dst_sel_x = SQ_SEL_X; //Y + tex_res.dst_sel_y = SQ_SEL_1; + tex_res.dst_sel_z = SQ_SEL_1; + tex_res.dst_sel_w = SQ_SEL_1; + + tex_res.request_size = 1; + tex_res.base_level = 0; + tex_res.last_level = 0; + tex_res.perf_modulation = 0; + tex_res.interlaced = 0; + set_tex_resource (pScrn, accel_state->ib, &tex_res); + + // UV texture + uv_offset = accel_state->src_pitch[0] * pPriv->h; + uv_offset = (uv_offset + 255) & ~255; + + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, + accel_state->src_size[0] / 2, + accel_state->src_mc_addr[0] + uv_offset); + + tex_res.id = 1; + tex_res.format = FMT_8_8; + tex_res.w = pPriv->w >> 1; + tex_res.h = pPriv->h >> 1; + tex_res.pitch = accel_state->src_pitch[0] >> 1; + tex_res.dst_sel_x = SQ_SEL_Y; //V + tex_res.dst_sel_y = SQ_SEL_X; //U + tex_res.dst_sel_z = SQ_SEL_1; + tex_res.dst_sel_w = SQ_SEL_1; + tex_res.interlaced = 0; + // XXX tex bases need to be 256B aligned + tex_res.base = accel_state->src_mc_addr[0] + uv_offset; + tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset; + set_tex_resource (pScrn, accel_state->ib, &tex_res); + + // Y sampler + tex_samp.id = 0; + tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_z = SQ_TEX_WRAP; + + // xxx: switch to bicubic + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; + + tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; + tex_samp.mip_filter = 0; /* no mipmap */ + set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + + // UV sampler + tex_samp.id = 1; + set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + + /* Render setup */ + ereg (accel_state->ib, CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift)); + ereg (accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); + ereg (accel_state->ib, CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */ + + cb_conf.id = 0; + + accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; + + cb_conf.w = accel_state->dst_pitch; + cb_conf.h = pPixmap->drawable.height; + cb_conf.base = accel_state->dst_mc_addr; + + switch (pPixmap->drawable.bitsPerPixel) { + case 16: + if (pPixmap->drawable.depth == 15) { + cb_conf.format = COLOR_1_5_5_5; + cb_conf.comp_swap = 1; //ARGB + } else { + cb_conf.format = COLOR_5_6_5; + cb_conf.comp_swap = 2; //RGB + } + break; + case 32: + cb_conf.format = COLOR_8_8_8_8; + cb_conf.comp_swap = 1; //ARGB + break; + default: + return; + } + + cb_conf.source_format = 1; + cb_conf.blend_clamp = 1; + set_render_target(pScrn, accel_state->ib, &cb_conf); + + ereg (accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); + ereg (accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ + DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ + + /* Interpolator setup */ + // export tex coords from VS + ereg (accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); + ereg (accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); + + /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x + * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ + ereg (accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift))); + ereg (accel_state->ib, SPI_PS_IN_CONTROL_1, 0); + ereg (accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | + (0x03 << DEFAULT_VAL_shift) | + SEL_CENTROID_bit)); + ereg (accel_state->ib, SPI_INTERP_CONTROL_0, 0); + + + accel_state->vb_index = 0; + + while (nBox--) { + int srcX, srcY, srcw, srch; + int dstX, dstY, dstw, dsth; + struct r6xx_copy_vertex *xv_vb = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); + struct r6xx_copy_vertex vertex[3]; + + dstX = pBox->x1 + dstxoff; + dstY = pBox->y1 + dstyoff; + dstw = pBox->x2 - pBox->x1; + dsth = pBox->y2 - pBox->y1; + + srcX = ((pBox->x1 - pPriv->drw_x) * + pPriv->src_w) / pPriv->dst_w; + srcY = ((pBox->y1 - pPriv->drw_y) * + pPriv->src_h) / pPriv->dst_h; + + srcw = (pPriv->src_w * dstw) / pPriv->dst_w; + srch = (pPriv->src_h * dsth) / pPriv->dst_h; + + vertex[0].x = (float)dstX; + vertex[0].y = (float)dstY; + vertex[0].s = (float)srcX / pPriv->w; + vertex[0].t = (float)srcY / pPriv->h; + + vertex[1].x = (float)dstX; + vertex[1].y = (float)(dstY + dsth); + vertex[1].s = (float)srcX / pPriv->w; + vertex[1].t = (float)(srcY + srch) / pPriv->h; + + vertex[2].x = (float)(dstX + dstw); + vertex[2].y = (float)(dstY + dsth); + vertex[2].s = (float)(srcX + srcw) / pPriv->w; + vertex[2].t = (float)(srcY + srch) / pPriv->h; + +#if 0 + ErrorF("vertex 0: %f, %f, %f, %f\n", vertex[0].x, vertex[0].y, vertex[0].s, vertex[0].t); + ErrorF("vertex 1: %f, %f, %f, %f\n", vertex[1].x, vertex[1].y, vertex[1].s, vertex[1].t); + ErrorF("vertex 2: %f, %f, %f, %f\n", vertex[2].x, vertex[2].y, vertex[2].s, vertex[2].t); +#endif + + // append to vertex buffer + xv_vb[accel_state->vb_index++] = vertex[0]; + xv_vb[accel_state->vb_index++] = vertex[1]; + xv_vb[accel_state->vb_index++] = vertex[2]; + + pBox++; + } + + if (accel_state->vb_index == 0) { + R600IBDiscard(pScrn, accel_state->ib); + DamageDamageRegion(pPriv->pDraw, &pPriv->clip); + return; + } + + accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + + (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); + accel_state->vb_size = accel_state->vb_index * 16; + + /* flush vertex cache */ + if ((info->ChipFamily == CHIP_FAMILY_RV610) || + (info->ChipFamily == CHIP_FAMILY_RV620) || + (info->ChipFamily == CHIP_FAMILY_RS780) || + (info->ChipFamily == CHIP_FAMILY_RV710)) + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, + accel_state->vb_size, accel_state->vb_mc_addr); + else + cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, + accel_state->vb_size, accel_state->vb_mc_addr); + + /* Vertex buffer setup */ + vtx_res.id = SQ_VTX_RESOURCE_vs; + vtx_res.vtx_size_dw = 16 / 4; + vtx_res.vtx_num_entries = accel_state->vb_size / 4; + vtx_res.mem_req_size = 1; + vtx_res.vb_addr = accel_state->vb_mc_addr; + set_vtx_resource (pScrn, accel_state->ib, &vtx_res); + + draw_conf.prim_type = DI_PT_RECTLIST; + draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; + draw_conf.num_instances = 1; + draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; + draw_conf.index_type = DI_INDEX_SIZE_16_BIT; + + ereg (accel_state->ib, VGT_INSTANCE_STEP_RATE_0, 0); /* ? */ + ereg (accel_state->ib, VGT_INSTANCE_STEP_RATE_1, 0); + + ereg (accel_state->ib, VGT_MAX_VTX_INDX, draw_conf.num_indices); + ereg (accel_state->ib, VGT_MIN_VTX_INDX, 0); + ereg (accel_state->ib, VGT_INDX_OFFSET, 0); + + draw_auto(pScrn, accel_state->ib, &draw_conf); + + wait_3d_idle_clean(pScrn, accel_state->ib); + + /* sync destination surface */ + cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit, CB0_DEST_BASE_ENA_bit), + accel_state->dst_size, accel_state->dst_mc_addr); + + R600CPFlushIndirect(pScrn, accel_state->ib); + + DamageDamageRegion(pPriv->pDraw, &pPriv->clip); +} diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c new file mode 100644 index 00000000..659d13da --- /dev/null +++ b/src/r6xx_accel.c @@ -0,0 +1,1110 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Alex Deucher <alexander.deucher@amd.com> + * Matthias Hopf <mhopf@suse.de> + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" + +#include <errno.h> + +#include "radeon.h" +#include "r600_shader.h" +#include "radeon_reg.h" +#include "r600_reg.h" +#include "r600_state.h" + +#include "xf86drm.h" +#include "radeon_drm.h" + +/* Emit uint32_t */ +inline void e32(drmBufPtr ib, uint32_t dword) +{ + uint32_t *ib_head = (pointer)(char*)ib->address; + + ib_head[ib->used>>2] = dword; + ib->used += 4; +} + +inline void efloat(drmBufPtr ib, float f) +{ + union { + float f; + uint32_t d; + } a; + a.f=f; + e32(ib, a.d); +} + +inline void pack3(drmBufPtr ib, int cmd, unsigned num) +{ + e32 (ib, RADEON_CP_PACKET3 | (cmd << 8) | (((num-1) & 0x3fff) << 16)); +} + +/* write num registers, start at reg */ +/* If register falls in a special area, special commands are issued */ +inline void pack0 (drmBufPtr ib, uint32_t reg, int num) +{ + if (reg >= SET_CONFIG_REG_offset && reg < SET_CONFIG_REG_end) { + pack3 (ib, IT_SET_CONFIG_REG, num+1); + e32 (ib, (reg-SET_CONFIG_REG_offset) >> 2); + } else if (reg >= SET_CONTEXT_REG_offset && reg < SET_CONTEXT_REG_end) { + pack3 (ib, IT_SET_CONTEXT_REG, num+1); + e32 (ib, (reg-0x28000) >> 2); + } else if (reg >= SET_ALU_CONST_offset && reg < SET_ALU_CONST_end) { + pack3 (ib, IT_SET_ALU_CONST, num+1); + e32 (ib, (reg-SET_ALU_CONST_offset) >> 2); + } else if (reg >= SET_RESOURCE_offset && reg < SET_RESOURCE_end) { + pack3 (ib, IT_SET_RESOURCE, num+1); + e32 (ib, (reg-SET_RESOURCE_offset) >> 2); + } else if (reg >= SET_SAMPLER_offset && reg < SET_SAMPLER_end) { + pack3 (ib, IT_SET_SAMPLER, num+1); + e32 (ib, (reg-SET_SAMPLER_offset) >> 2); + } else if (reg >= SET_CTL_CONST_offset && reg < SET_CTL_CONST_end) { + pack3 (ib, IT_SET_CTL_CONST, num+1); + e32 (ib, (reg-SET_CTL_CONST_offset) >> 2); + } else if (reg >= SET_LOOP_CONST_offset && reg < SET_LOOP_CONST_end) { + pack3 (ib, IT_SET_LOOP_CONST, num+1); + e32 (ib, (reg-SET_LOOP_CONST_offset) >> 2); + } else if (reg >= SET_BOOL_CONST_offset && reg < SET_BOOL_CONST_end) { + pack3 (ib, IT_SET_BOOL_CONST, num+1); + e32 (ib, (reg-SET_BOOL_CONST_offset) >> 2); + } else { + e32 (ib, CP_PACKET0 (reg, num-1)); + } +} + +/* write a single register */ +inline void ereg (drmBufPtr ib, uint32_t reg, uint32_t val) +{ + pack0 (ib, reg, 1); + e32 (ib, val); +} + +/* Flush the indirect buffer to the kernel for submission to the card */ +void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + drmBufPtr buffer = ib; + int start = 0; + drm_radeon_indirect_t indirect; + + if (!buffer) return; + + //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", + // buffer->idx); + + while (buffer->used & 0x3c){ + e32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */ + } + + //ErrorF("buffer bytes: %d\n", buffer->used); + + indirect.idx = buffer->idx; + indirect.start = start; + indirect.end = buffer->used; + indirect.discard = 1; + + drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT, + &indirect, sizeof(drm_radeon_indirect_t)); + +} + +void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) +{ + if (!ib) return; + + ib->used = 0; + R600CPFlushIndirect(pScrn, ib); +} + +void +wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) +{ + + //flush caches, don't generate timestamp + pack3 (ib, IT_EVENT_WRITE, 1); + e32 (ib, CACHE_FLUSH_AND_INV_EVENT); + // wait for 3D idle clean + ereg (ib, WAIT_UNTIL, (WAIT_3D_IDLE_bit | + WAIT_3D_IDLECLEAN_bit)); +} + +void +wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) +{ + + ereg (ib, WAIT_UNTIL, WAIT_3D_IDLE_bit); + +} + +static void +reset_cb(ScrnInfoPtr pScrn, drmBufPtr ib) +{ + int i; + + pack0 (ib, CB_COLOR0_INFO, 8); + for (i = 0; i < 8; i++) + e32 (ib, 0); +} + +static void +reset_td_samplers(ScrnInfoPtr pScrn, drmBufPtr ib) +{ + int i; + + wait_3d_idle(pScrn, ib); + + pack0 (ib, TD_PS_SAMPLER0_BORDER_RED, 4*TD_PS_SAMPLER0_BORDER_RED_num); + for (i = 0; i < 4*TD_PS_SAMPLER0_BORDER_RED_num; i++) + e32 (ib, 0); + pack0 (ib, TD_VS_SAMPLER0_BORDER_RED, 4*TD_VS_SAMPLER0_BORDER_RED_num); + for (i = 0; i < 4*TD_VS_SAMPLER0_BORDER_RED_num; i++) + e32 (ib, 0); + + wait_3d_idle(pScrn, ib); +} + +static void +reset_sampler_const (ScrnInfoPtr pScrn, drmBufPtr ib) +{ + int i; + + for (i = 0; i < SQ_TEX_SAMPLER_WORD_all_num; i++) { + pack0 (ib, SQ_TEX_SAMPLER_WORD + i * SQ_TEX_SAMPLER_WORD_offset, 3); + e32 (ib, SQ_TEX_DEPTH_COMPARE_LESSEQUAL << DEPTH_COMPARE_FUNCTION_shift); + e32 (ib, MAX_LOD_mask); + e32 (ib, 0); + } +} + +static void +reset_dx9_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib) +{ + int i; + + const int count = SQ_ALU_CONSTANT_all_num * (SQ_ALU_CONSTANT_offset >> 2); + + pack0 (ib, SQ_ALU_CONSTANT, count); + for (i = 0; i < count; i++) + efloat (ib, 0.0); +} + +static void +reset_bool_loop_const(ScrnInfoPtr pScrn, drmBufPtr ib) +{ + int i; + + for (i = 0; i < SQ_BOOL_CONST_0_num; i++) + ereg (ib, SQ_BOOL_CONST_0 + (i << 2), 0); + + pack0 (ib, SQ_LOOP_CONST, SQ_LOOP_CONST_all_num); + + for (i = 0; i < SQ_LOOP_CONST_all_num; i++) + e32 (ib, 0); + +} + +void +start_3d(ScrnInfoPtr pScrn, drmBufPtr ib) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + + if (info->ChipFamily < CHIP_FAMILY_RV770) { + pack3 (ib, IT_START_3D_CMDBUF, 1); + e32 (ib, 0); + } + + pack3 (ib, IT_CONTEXT_CONTROL, 2); + e32 (ib, 0x80000000); + e32 (ib, 0x80000000); + + wait_3d_idle_clean (pScrn, ib); +} + +/* + * Setup of functional groups + */ + +// asic stack/thread/gpr limits - need to query the drm +static void +sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf) +{ + uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; + uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; + RADEONInfoPtr info = RADEONPTR(pScrn); + + if ((info->ChipFamily == CHIP_FAMILY_RV610) || + (info->ChipFamily == CHIP_FAMILY_RV620) || + (info->ChipFamily == CHIP_FAMILY_RS780) || + (info->ChipFamily == CHIP_FAMILY_RV710)) + sq_config = 0; // no VC + else + sq_config = VC_ENABLE_bit; + + sq_config |= (DX9_CONSTS_bit | + ALU_INST_PREFER_VECTOR_bit | + (sq_conf->ps_prio << PS_PRIO_shift) | + (sq_conf->vs_prio << VS_PRIO_shift) | + (sq_conf->gs_prio << GS_PRIO_shift) | + (sq_conf->es_prio << ES_PRIO_shift)); + + sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) | + (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) | + (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift)); + sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) | + (sq_conf->num_es_gprs << NUM_ES_GPRS_shift)); + + sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) | + (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) | + (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) | + (sq_conf->num_es_threads << NUM_ES_THREADS_shift)); + + sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) | + (sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift)); + + sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) | + (sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift)); + + pack0 (ib, SQ_CONFIG, 6); + e32 (ib, sq_config); + e32 (ib, sq_gpr_resource_mgmt_1); + e32 (ib, sq_gpr_resource_mgmt_2); + e32 (ib, sq_thread_resource_mgmt); + e32 (ib, sq_stack_resource_mgmt_1); + e32 (ib, sq_stack_resource_mgmt_2); + +} + +void +set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf) +{ + uint32_t cb_color_info; + int pitch, slice, h; + RADEONInfoPtr info = RADEONPTR(pScrn); + + cb_color_info = ((cb_conf->endian << ENDIAN_shift) | + (cb_conf->format << CB_COLOR0_INFO__FORMAT_shift) | + (cb_conf->array_mode << CB_COLOR0_INFO__ARRAY_MODE_shift) | + (cb_conf->number_type << NUMBER_TYPE_shift) | + (cb_conf->comp_swap << COMP_SWAP_shift) | + (cb_conf->tile_mode << CB_COLOR0_INFO__TILE_MODE_shift)); + if (cb_conf->read_size) + cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit; + if (cb_conf->blend_clamp) + cb_color_info |= BLEND_CLAMP_bit; + if (cb_conf->clear_color) + cb_color_info |= CLEAR_COLOR_bit; + if (cb_conf->blend_bypass) + cb_color_info |= BLEND_BYPASS_bit; + if (cb_conf->blend_float32) + cb_color_info |= BLEND_FLOAT32_bit; + if (cb_conf->simple_float) + cb_color_info |= SIMPLE_FLOAT_bit; + if (cb_conf->round_mode) + cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit; + if (cb_conf->tile_compact) + cb_color_info |= TILE_COMPACT_bit; + if (cb_conf->source_format) + cb_color_info |= SOURCE_FORMAT_bit; + + pitch = (cb_conf->w / 8) - 1; + h = (cb_conf->h + 7) & ~7; + slice = ((cb_conf->w * h) / 64) - 1; + + ereg (ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); + + // rv6xx workaround + if ((info->ChipFamily > CHIP_FAMILY_R600) && + (info->ChipFamily < CHIP_FAMILY_RV770)) { + pack3 (ib, IT_SURFACE_BASE_UPDATE, 1); + e32 (ib, (2 << cb_conf->id)); + } + + // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib + ereg (ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | + (slice << SLICE_TILE_MAX_shift))); + ereg (ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0 << SLICE_START_shift) | + (0 << SLICE_MAX_shift))); + ereg (ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info); + ereg (ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0 >> 8)); // CMASK per-tile data base/256 + ereg (ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0 >> 8)); // FMASK per-tile data base/256 + ereg (ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0 << CMASK_BLOCK_MAX_shift) | + (0 << FMASK_TILE_MAX_shift))); +} + +void +cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr) +{ + uint32_t cp_coher_size; + if (size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((size + 255) >> 8); + + ereg (ib, CP_COHER_CNTL, sync_type); + ereg (ib, CP_COHER_SIZE, cp_coher_size); + ereg (ib, CP_COHER_BASE, (mc_addr >> 8)); + pack3 (ib, IT_WAIT_REG_MEM, 6); + e32 (ib, 0x00000003); // ME, Register, EqualTo + e32 (ib, CP_COHER_STATUS >> 2); + e32 (ib, 0); + e32 (ib, 0); // Ref value + e32 (ib, STATUS_bit); // Ref mask + e32 (ib, 10); // Wait interval +} + +void +fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) +{ + uint32_t sq_pgm_resources; + + sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | + (fs_conf->stack_size << STACK_SIZE_shift)); + + if (fs_conf->dx10_clamp) + sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; + + ereg (ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8); + ereg (ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources); + ereg (ib, SQ_PGM_CF_OFFSET_FS, 0); +} + +void +vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) +{ + uint32_t sq_pgm_resources; + + sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | + (vs_conf->stack_size << STACK_SIZE_shift)); + + if (vs_conf->dx10_clamp) + sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit; + if (vs_conf->fetch_cache_lines) + sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); + if (vs_conf->uncached_first_inst) + sq_pgm_resources |= UNCACHED_FIRST_INST_bit; + + ereg (ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8); + ereg (ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources); + ereg (ib, SQ_PGM_CF_OFFSET_VS, 0); +} + +void +ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) +{ + uint32_t sq_pgm_resources; + + sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | + (ps_conf->stack_size << STACK_SIZE_shift)); + + if (ps_conf->dx10_clamp) + sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit; + if (ps_conf->fetch_cache_lines) + sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift); + if (ps_conf->uncached_first_inst) + sq_pgm_resources |= UNCACHED_FIRST_INST_bit; + if (ps_conf->clamp_consts) + sq_pgm_resources |= CLAMP_CONSTS_bit; + + ereg (ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8); + ereg (ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources); + ereg (ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode); + ereg (ib, SQ_PGM_CF_OFFSET_PS, 0); +} + +void +set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf) +{ + int i; + const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); + + pack0 (ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); + for (i = 0; i < countreg; i++) + efloat (ib, const_buf[i]); +} + +void +set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) +{ + uint32_t sq_vtx_constant_word2; + + sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | + ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) | + (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) | + (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) | + (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift)); + if (res->clamp_x) + sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit; + + if (res->format_comp_all) + sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; + + if (res->srf_mode_all) + sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; + + pack0 (ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); + e32 (ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS + e32 (ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE + e32 (ib, sq_vtx_constant_word2); // 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN + e32 (ib, res->mem_req_size << MEM_REQUEST_SIZE_shift); // 3: MEM_REQUEST_SIZE ?!? + e32 (ib, 0); // 4: n/a + e32 (ib, 0); // 5: n/a + e32 (ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE +} + +void +set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) +{ + uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; + uint32_t sq_tex_resource_word5, sq_tex_resource_word6; + + sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) | + (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift)); + + if (tex_res->w) + sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) | + ((tex_res->w - 1) << TEX_WIDTH_shift)); + + if (tex_res->tile_type) + sq_tex_resource_word0 |= TILE_TYPE_bit; + + sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift); + + if (tex_res->h) + sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift); + if (tex_res->depth) + sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift); + + sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) | + (tex_res->format_comp_y << FORMAT_COMP_Y_shift) | + (tex_res->format_comp_z << FORMAT_COMP_Z_shift) | + (tex_res->format_comp_w << FORMAT_COMP_W_shift) | + (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) | + (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) | + (tex_res->request_size << REQUEST_SIZE_shift) | + (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) | + (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) | + (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) | + (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) | + (tex_res->base_level << BASE_LEVEL_shift)); + + if (tex_res->srf_mode_all) + sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit; + if (tex_res->force_degamma) + sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit; + + sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) | + (tex_res->base_array << BASE_ARRAY_shift) | + (tex_res->last_array << LAST_ARRAY_shift)); + + sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) | + (tex_res->perf_modulation << PERF_MODULATION_shift) | + (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift)); + + if (tex_res->interlaced) + sq_tex_resource_word6 |= INTERLACED_bit; + + pack0 (ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); + e32 (ib, sq_tex_resource_word0); + e32 (ib, sq_tex_resource_word1); + e32 (ib, ((tex_res->base) >> 8)); + e32 (ib, ((tex_res->mip_base) >> 8)); + e32 (ib, sq_tex_resource_word4); + e32 (ib, sq_tex_resource_word5); + e32 (ib, sq_tex_resource_word6); +} + +void +set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) +{ + uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; + + sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | + (s->clamp_y << CLAMP_Y_shift) | + (s->clamp_z << CLAMP_Z_shift) | + (s->xy_mag_filter << XY_MAG_FILTER_shift) | + (s->xy_min_filter << XY_MIN_FILTER_shift) | + (s->z_filter << Z_FILTER_shift) | + (s->mip_filter << MIP_FILTER_shift) | + (s->border_color << BORDER_COLOR_TYPE_shift) | + (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift) | + (s->chroma_key << CHROMA_KEY_shift)); + if (s->point_sampling_clamp) + sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit; + if (s->tex_array_override) + sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit; + if (s->lod_uses_minor_axis) + sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit; + + sq_tex_sampler_word1 = ((s->min_lod << MIN_LOD_shift) | + (s->max_lod << MAX_LOD_shift) | + (s->lod_bias << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift)); + + sq_tex_sampler_word2 = ((s->lod_bias2 << LOD_BIAS_SEC_shift) | + (s->perf_mip << PERF_MIP_shift) | + (s->perf_z << PERF_Z_shift)); + if (s->mc_coord_truncate) + sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit; + if (s->force_degamma) + sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit; + if (s->high_precision_filter) + sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit; + if (s->fetch_4) + sq_tex_sampler_word2 |= FETCH_4_bit; + if (s->sample_is_pcf) + sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit; + if (s->type) + sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit; + + pack0 (ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3); + e32 (ib, sq_tex_sampler_word0); + e32 (ib, sq_tex_sampler_word1); + e32 (ib, sq_tex_sampler_word2); +} + +//XXX deal with clip offsets in clip setup +void +set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) +{ + + ereg (ib, PA_SC_SCREEN_SCISSOR_TL, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | + (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); + ereg (ib, PA_SC_SCREEN_SCISSOR_BR, ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) | + (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift))); +} + +void +set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) +{ + + ereg (ib, PA_SC_VPORT_SCISSOR_0_TL + + id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) | + (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) | + WINDOW_OFFSET_DISABLE_bit)); + ereg (ib, PA_SC_VPORT_SCISSOR_0_BR + + id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) | + (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift))); +} + +void +set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) +{ + + ereg (ib, PA_SC_GENERIC_SCISSOR_TL, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | + (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) | + WINDOW_OFFSET_DISABLE_bit)); + ereg (ib, PA_SC_GENERIC_SCISSOR_BR, ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) | + (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift))); +} + +void +set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) +{ + + ereg (ib, PA_SC_WINDOW_SCISSOR_TL, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | + (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | + WINDOW_OFFSET_DISABLE_bit)); + ereg (ib, PA_SC_WINDOW_SCISSOR_BR, ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) | + (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift))); +} + +void +set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) +{ + + ereg (ib, PA_SC_CLIPRECT_0_TL + + id * PA_SC_CLIPRECT_0_TL_offset, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | + (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift))); + ereg (ib, PA_SC_CLIPRECT_0_BR + + id * PA_SC_CLIPRECT_0_BR_offset, ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) | + (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift))); +} + +/* + * Setup of default state + */ + +void +set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) +{ + tex_resource_t tex_res; + shader_config_t fs_conf; + sq_config_t sq_conf; + int i; + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + + memset(&tex_res, 0, sizeof(tex_resource_t)); + memset(&fs_conf, 0, sizeof(shader_config_t)); + +#if 1 + if (accel_state->XInited3D) + return; +#endif + + accel_state->XInited3D = TRUE; + + wait_3d_idle(pScrn, ib); + + // ASIC specific setup, see drm + if (info->ChipFamily < CHIP_FAMILY_RV770) { + ereg (ib, TA_CNTL_AUX, (( 3 << GRADIENT_CREDIT_shift) | + (28 << TD_FIFO_CREDIT_shift))); + ereg (ib, VC_ENHANCE, 0); + ereg (ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0); + ereg (ib, DB_DEBUG, 0x82000000); /* ? */ + ereg (ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | + (16 << DEPTH_FLUSH_shift) | + (0 << FORCE_SUMMARIZE_shift) | + (4 << DEPTH_PENDING_FREE_shift) | + (16 << DEPTH_CACHELINE_FREE_shift) | + 0)); + } else { + ereg (ib, TA_CNTL_AUX, (( 2 << GRADIENT_CREDIT_shift) | + (28 << TD_FIFO_CREDIT_shift))); + ereg (ib, VC_ENHANCE, 0); + ereg (ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit); + ereg (ib, DB_DEBUG, 0); + ereg (ib, DB_WATERMARKS, ((4 << DEPTH_FREE_shift) | + (16 << DEPTH_FLUSH_shift) | + (0 << FORCE_SUMMARIZE_shift) | + (4 << DEPTH_PENDING_FREE_shift) | + (4 << DEPTH_CACHELINE_FREE_shift) | + 0)); + } + + reset_td_samplers(pScrn, ib); + reset_dx9_alu_consts(pScrn, ib); + reset_bool_loop_const (pScrn, ib); + reset_sampler_const (pScrn, ib); + + // SQ + sq_conf.ps_prio = 0; + sq_conf.vs_prio = 1; + sq_conf.gs_prio = 2; + sq_conf.es_prio = 3; + // need to set stack/thread/gpr limits based on the asic + // for now just set them low enough so any card will work + // see r600_cp.c in the drm + switch (info->ChipFamily) { + case CHIP_FAMILY_R600: + sq_conf.num_ps_gprs = 192; + sq_conf.num_vs_gprs = 56; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 0; + sq_conf.num_es_gprs = 0; + sq_conf.num_ps_threads = 136; + sq_conf.num_vs_threads = 48; + sq_conf.num_gs_threads = 4; + sq_conf.num_es_threads = 4; + sq_conf.num_ps_stack_entries = 128; + sq_conf.num_vs_stack_entries = 128; + sq_conf.num_gs_stack_entries = 0; + sq_conf.num_es_stack_entries = 0; + break; + case CHIP_FAMILY_RV630: + case CHIP_FAMILY_RV635: + sq_conf.num_ps_gprs = 84; + sq_conf.num_vs_gprs = 36; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 0; + sq_conf.num_es_gprs = 0; + sq_conf.num_ps_threads = 144; + sq_conf.num_vs_threads = 40; + sq_conf.num_gs_threads = 4; + sq_conf.num_es_threads = 4; + sq_conf.num_ps_stack_entries = 40; + sq_conf.num_vs_stack_entries = 40; + sq_conf.num_gs_stack_entries = 32; + sq_conf.num_es_stack_entries = 16; + break; + case CHIP_FAMILY_RV610: + case CHIP_FAMILY_RV620: + case CHIP_FAMILY_RS780: + default: + sq_conf.num_ps_gprs = 84; + sq_conf.num_vs_gprs = 36; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 0; + sq_conf.num_es_gprs = 0; + sq_conf.num_ps_threads = 136; + sq_conf.num_vs_threads = 48; + sq_conf.num_gs_threads = 4; + sq_conf.num_es_threads = 4; + sq_conf.num_ps_stack_entries = 40; + sq_conf.num_vs_stack_entries = 40; + sq_conf.num_gs_stack_entries = 32; + sq_conf.num_es_stack_entries = 16; + break; + case CHIP_FAMILY_RV670: + sq_conf.num_ps_gprs = 144; + sq_conf.num_vs_gprs = 40; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 0; + sq_conf.num_es_gprs = 0; + sq_conf.num_ps_threads = 136; + sq_conf.num_vs_threads = 48; + sq_conf.num_gs_threads = 4; + sq_conf.num_es_threads = 4; + sq_conf.num_ps_stack_entries = 40; + sq_conf.num_vs_stack_entries = 40; + sq_conf.num_gs_stack_entries = 32; + sq_conf.num_es_stack_entries = 16; + break; + case CHIP_FAMILY_RV770: + sq_conf.num_ps_gprs = 192; + sq_conf.num_vs_gprs = 56; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 0; + sq_conf.num_es_gprs = 0; + sq_conf.num_ps_threads = 188; + sq_conf.num_vs_threads = 60; + sq_conf.num_gs_threads = 0; + sq_conf.num_es_threads = 0; + sq_conf.num_ps_stack_entries = 256; + sq_conf.num_vs_stack_entries = 256; + sq_conf.num_gs_stack_entries = 0; + sq_conf.num_es_stack_entries = 0; + break; + case CHIP_FAMILY_RV730: + sq_conf.num_ps_gprs = 84; + sq_conf.num_vs_gprs = 36; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 0; + sq_conf.num_es_gprs = 0; + sq_conf.num_ps_threads = 188; + sq_conf.num_vs_threads = 60; + sq_conf.num_gs_threads = 0; + sq_conf.num_es_threads = 0; + sq_conf.num_ps_stack_entries = 128; + sq_conf.num_vs_stack_entries = 128; + sq_conf.num_gs_stack_entries = 0; + sq_conf.num_es_stack_entries = 0; + break; + case CHIP_FAMILY_RV710: + sq_conf.num_ps_gprs = 192; + sq_conf.num_vs_gprs = 56; + sq_conf.num_temp_gprs = 4; + sq_conf.num_gs_gprs = 0; + sq_conf.num_es_gprs = 0; + sq_conf.num_ps_threads = 144; + sq_conf.num_vs_threads = 48; + sq_conf.num_gs_threads = 0; + sq_conf.num_es_threads = 0; + sq_conf.num_ps_stack_entries = 128; + sq_conf.num_vs_stack_entries = 128; + sq_conf.num_gs_stack_entries = 0; + sq_conf.num_es_stack_entries = 0; + break; + } + + sq_setup(pScrn, ib, &sq_conf); + + ereg (ib, SQ_VTX_BASE_VTX_LOC, 0); + ereg (ib, SQ_VTX_START_INST_LOC, 0); + + pack0 (ib, SQ_ESGS_RING_ITEMSIZE, 9); + e32 (ib, 0); // SQ_ESGS_RING_ITEMSIZE + e32 (ib, 0); // SQ_GSVS_RING_ITEMSIZE + e32 (ib, 0); // SQ_ESTMP_RING_ITEMSIZE + e32 (ib, 0); // SQ_GSTMP_RING_ITEMSIZE + e32 (ib, 0); // SQ_VSTMP_RING_ITEMSIZE + e32 (ib, 0); // SQ_PSTMP_RING_ITEMSIZE + e32 (ib, 0); // SQ_FBUF_RING_ITEMSIZE + e32 (ib, 0); // SQ_REDUC_RING_ITEMSIZE + e32 (ib, 0); // SQ_GS_VERT_ITEMSIZE + + // DB + ereg (ib, DB_DEPTH_INFO, 0); + ereg (ib, DB_STENCIL_CLEAR, 0); + ereg (ib, DB_DEPTH_CLEAR, 0); + ereg (ib, DB_STENCILREFMASK, 0); + ereg (ib, DB_STENCILREFMASK_BF, 0); + ereg (ib, DB_DEPTH_CONTROL, 0); + ereg (ib, DB_RENDER_CONTROL, STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); + if (info->ChipFamily < CHIP_FAMILY_RV770) + ereg (ib, DB_RENDER_OVERRIDE, FORCE_SHADER_Z_ORDER_bit); + else + ereg (ib, DB_RENDER_OVERRIDE, 0); + ereg (ib, DB_ALPHA_TO_MASK, ((2 << ALPHA_TO_MASK_OFFSET0_shift) | + (2 << ALPHA_TO_MASK_OFFSET1_shift) | + (2 << ALPHA_TO_MASK_OFFSET2_shift) | + (2 << ALPHA_TO_MASK_OFFSET3_shift))); + + // SX + ereg (ib, SX_ALPHA_TEST_CONTROL, 0); + ereg (ib, SX_ALPHA_REF, 0); + + // CB + reset_cb(pScrn, ib); + + pack0 (ib, CB_BLEND_RED, 4); + e32 (ib, 0x00000000); + e32 (ib, 0x00000000); + e32 (ib, 0x00000000); + e32 (ib, 0x00000000); + + /* CB_COLOR_CONTROL.PER_MRT_BLEND is off */ + // RV6xx+ have per-MRT blend + if (info->ChipFamily > CHIP_FAMILY_R600) { + pack0 (ib, CB_BLEND0_CONTROL, CB_BLEND0_CONTROL_num); + for (i = 0; i < CB_BLEND0_CONTROL_num; i++) + e32 (ib, 0); + } + + ereg (ib, CB_BLEND_CONTROL, 0); + + if (info->ChipFamily < CHIP_FAMILY_RV770) { + pack0 (ib, CB_FOG_RED, 3); + e32 (ib, 0x00000000); + e32 (ib, 0x00000000); + e32 (ib, 0x00000000); + } + + ereg (ib, CB_COLOR_CONTROL, 0); + pack0 (ib, CB_CLRCMP_CONTROL, 4); + e32 (ib, 1 << CLRCMP_FCN_SEL_shift); // CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC + e32 (ib, 0); // CB_CLRCMP_SRC + e32 (ib, 0); // CB_CLRCMP_DST + e32 (ib, 0); // CB_CLRCMP_MSK + + + if (info->ChipFamily < CHIP_FAMILY_RV770) { + pack0 (ib, CB_CLEAR_RED, 4); + efloat(ib, 1.0); /* WTF? */ + efloat(ib, 0.0); + efloat(ib, 1.0); + efloat(ib, 1.0); + } + ereg (ib, CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift)); + + // SC + set_generic_scissor(pScrn, ib, 0, 0, 8192, 8192); + set_screen_scissor (pScrn, ib, 0, 0, 8192, 8192); + ereg (ib, PA_SC_WINDOW_OFFSET, ((0 << WINDOW_X_OFFSET_shift) | + (0 << WINDOW_Y_OFFSET_shift))); + set_window_scissor (pScrn, ib, 0, 0, 8192, 8192); + + ereg (ib, PA_SC_CLIPRECT_RULE, CLIP_RULE_mask); + + /* clip boolean is set to always visible -> doesn't matter */ + for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++) + set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192); + + if (info->ChipFamily < CHIP_FAMILY_RV770) + ereg (ib, R7xx_PA_SC_EDGERULE, 0x00000000); + else + ereg (ib, R7xx_PA_SC_EDGERULE, 0xAAAAAAAA); /* ? */ + + for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) { + set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192); + pack0 (ib, PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2); + efloat(ib, 0.0); + efloat(ib, 1.0); + } + + if (info->ChipFamily < CHIP_FAMILY_RV770) + ereg (ib, PA_SC_MODE_CNTL, (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit)); + else + ereg (ib, PA_SC_MODE_CNTL, (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit | + 0x00500000)); /* ? */ + + ereg (ib, PA_SC_LINE_CNTL, 0); + ereg (ib, PA_SC_AA_CONFIG, 0); + ereg (ib, PA_SC_AA_MASK, 0xFFFFFFFF); + + //XXX: double check this + if (info->ChipFamily > CHIP_FAMILY_R600) { + ereg (ib, PA_SC_AA_SAMPLE_LOCS_MCTX, 0); + ereg (ib, PA_SC_AA_SAMPLE_LOCS_8S_WD1_M, 0); + } + + ereg (ib, PA_SC_LINE_STIPPLE, 0); + ereg (ib, PA_SC_MPASS_PS_CNTL, 0); + + // CL + pack0 (ib, PA_CL_VPORT_XSCALE_0, 6); + efloat (ib, 0.0f); // PA_CL_VPORT_XSCALE + efloat (ib, 0.0f); // PA_CL_VPORT_XOFFSET + efloat (ib, 0.0f); // PA_CL_VPORT_YSCALE + efloat (ib, 0.0f); // PA_CL_VPORT_YOFFSET + efloat (ib, 0.0f); // PA_CL_VPORT_ZSCALE + efloat (ib, 0.0f); // PA_CL_VPORT_ZOFFSET + ereg (ib, PA_CL_CLIP_CNTL, (CLIP_DISABLE_bit | DX_CLIP_SPACE_DEF_bit)); + ereg (ib, PA_CL_VTE_CNTL, 0); + ereg (ib, PA_CL_VS_OUT_CNTL, 0); + ereg (ib, PA_CL_NANINF_CNTL, 0); + pack0 (ib, PA_CL_GB_VERT_CLIP_ADJ, 4); + efloat (ib, 1.0); // PA_CL_GB_VERT_CLIP_ADJ + efloat (ib, 1.0); // PA_CL_GB_VERT_DISC_ADJ + efloat (ib, 1.0); // PA_CL_GB_HORZ_CLIP_ADJ + efloat (ib, 1.0); // PA_CL_GB_HORZ_DISC_ADJ + + /* user clipping planes are disabled by default */ + pack0 (ib, PA_CL_UCP_0_X, 24); + for (i = 0; i < 24; i++) + efloat (ib, 0.0); + + // SU + ereg (ib, PA_SU_SC_MODE_CNTL, FACE_bit); + ereg (ib, PA_SU_POINT_SIZE, 0); + ereg (ib, PA_SU_POINT_MINMAX, 0); + ereg (ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0); + ereg (ib, PA_SU_POLY_OFFSET_BACK_SCALE, 0); + ereg (ib, PA_SU_POLY_OFFSET_FRONT_SCALE, 0); + ereg (ib, PA_SU_POLY_OFFSET_BACK_OFFSET, 0); + ereg (ib, PA_SU_POLY_OFFSET_FRONT_OFFSET, 0); + + ereg (ib, PA_SU_LINE_CNTL, (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ + ereg (ib, PA_SU_VTX_CNTL, ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) | + (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */ + ereg (ib, PA_SU_POLY_OFFSET_CLAMP, 0); + + // SPI + if (info->ChipFamily < CHIP_FAMILY_RV770) + ereg (ib, R7xx_SPI_THREAD_GROUPING, 0); + else + ereg (ib, R7xx_SPI_THREAD_GROUPING, (1 << PS_GROUPING_shift)); + + ereg (ib, SPI_INTERP_CONTROL_0, ((2 << PNT_SPRITE_OVRD_X_shift) | + (3 << PNT_SPRITE_OVRD_Y_shift) | + (0 << PNT_SPRITE_OVRD_Z_shift) | + (1 << PNT_SPRITE_OVRD_W_shift))); /* s,t,0,1 */ + ereg (ib, SPI_INPUT_Z, 0); + ereg (ib, SPI_FOG_CNTL, 0); + ereg (ib, SPI_FOG_FUNC_SCALE, 0); + ereg (ib, SPI_FOG_FUNC_BIAS, 0); + + pack0 (ib, SPI_VS_OUT_ID_0, SPI_VS_OUT_ID_0_num); + for (i = 0; i < SPI_VS_OUT_ID_0_num; i++) /* identity mapping */ + e32 (ib, 0x03020100 + i*0x04040404); + ereg (ib, SPI_VS_OUT_CONFIG, 0); + + // clear FS + fs_setup(pScrn, ib, &fs_conf); + + // VGT + ereg (ib, VGT_MAX_VTX_INDX, 0); + ereg (ib, VGT_MIN_VTX_INDX, 0); + ereg (ib, VGT_INDX_OFFSET, 0); + ereg (ib, VGT_INSTANCE_STEP_RATE_0, 0); + ereg (ib, VGT_INSTANCE_STEP_RATE_1, 0); + + ereg (ib, VGT_MULTI_PRIM_IB_RESET_INDX, 0); + ereg (ib, VGT_OUTPUT_PATH_CNTL, 0); + ereg (ib, VGT_GS_MODE, 0); + ereg (ib, VGT_HOS_CNTL, 0); + ereg (ib, VGT_HOS_MAX_TESS_LEVEL, 0); + ereg (ib, VGT_HOS_MIN_TESS_LEVEL, 0); + ereg (ib, VGT_HOS_REUSE_DEPTH, 0); + ereg (ib, VGT_GROUP_PRIM_TYPE, 0); + ereg (ib, VGT_GROUP_FIRST_DECR, 0); + ereg (ib, VGT_GROUP_DECR, 0); + ereg (ib, VGT_GROUP_VECT_0_CNTL, 0); + ereg (ib, VGT_GROUP_VECT_1_CNTL, 0); + ereg (ib, VGT_GROUP_VECT_0_FMT_CNTL, 0); + ereg (ib, VGT_GROUP_VECT_1_FMT_CNTL, 0); + ereg (ib, VGT_PRIMITIVEID_EN, 0); + ereg (ib, VGT_MULTI_PRIM_IB_RESET_EN, 0); + ereg (ib, VGT_STRMOUT_EN, 0); + ereg (ib, VGT_REUSE_OFF, 0); + ereg (ib, VGT_VTX_CNT_EN, 0); + ereg (ib, VGT_STRMOUT_BUFFER_EN, 0); + + // clear tex resources - PS + for (i = 0; i < 16; i++) { + tex_res.id = i; + set_tex_resource(pScrn, ib, &tex_res); + } + + // clear tex resources - VS + for (i = 160; i < 164; i++) { + tex_res.id = i; + set_tex_resource(pScrn, ib, &tex_res); + } + + // clear tex resources - FS + for (i = 320; i < 335; i++) { + tex_res.id = i; + set_tex_resource(pScrn, ib, &tex_res); + } + +} + + +/* + * Commands + */ + +void +draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices) +{ + uint32_t i, count; + + ereg (ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); + pack3 (ib, IT_INDEX_TYPE, 1); + e32 (ib, draw_conf->index_type); + pack3 (ib, IT_NUM_INSTANCES, 1); + e32 (ib, draw_conf->num_instances); + + // calculate num of packets + count = 2; + if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) + count += (draw_conf->num_indices + 1) / 2; + else + count += draw_conf->num_indices; + + pack3 (ib, IT_DRAW_INDEX_IMMD, count); + e32 (ib, draw_conf->num_indices); + e32 (ib, draw_conf->vgt_draw_initiator); + + if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) { + for (i = 0; i < draw_conf->num_indices; i += 2) { + if ((i + 1) == draw_conf->num_indices) + e32 (ib, indices[i]); + else + e32 (ib, (indices[i] | (indices[i + 1] << 16))); + } + } else { + for (i = 0; i < draw_conf->num_indices; i++) + e32 (ib, indices[i]); + } +} + +void +draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) +{ + + ereg (ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); + pack3 (ib, IT_INDEX_TYPE, 1); + e32 (ib, draw_conf->index_type); + pack3 (ib, IT_NUM_INSTANCES, 1); + e32 (ib, draw_conf->num_instances); + pack3 (ib, IT_DRAW_INDEX_AUTO, 2); + e32 (ib, draw_conf->num_indices); + e32 (ib, draw_conf->vgt_draw_initiator); +} diff --git a/src/radeon.h b/src/radeon.h index 66b23304..629e1ffa 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -354,6 +354,8 @@ typedef enum { #define IS_DCE32_VARIANT ((info->ChipFamily >= CHIP_FAMILY_RV730)) +#define IS_R600_3D (info->ChipFamily >= CHIP_FAMILY_R600) + #define IS_R500_3D ((info->ChipFamily == CHIP_FAMILY_RV515) || \ (info->ChipFamily == CHIP_FAMILY_R520) || \ (info->ChipFamily == CHIP_FAMILY_RV530) || \ @@ -569,6 +571,38 @@ struct radeon_dri { }; #endif +#ifdef XF86DRI +#ifdef USE_EXA +struct r6xx_solid_vertex { + float x; + float y; +}; + +struct r6xx_copy_vertex { + float x; + float y; + float s; + float t; +}; + +struct r6xx_comp_vertex { + float x; + float y; + float src_s; + float src_t; +}; + +struct r6xx_comp_mask_vertex { + float x; + float y; + float src_s; + float src_t; + float mask_s; + float mask_t; +}; +#endif +#endif + struct radeon_accel_state { /* common accel data */ int fifo_slots; /* Free slots in the FIFO (64 max) */ @@ -609,6 +643,44 @@ struct radeon_accel_state { Bool src_tile_height; Bool vsync; + + drmBufPtr ib; + int vb_index; + + // shader storage + ExaOffscreenArea *shaders; + uint32_t solid_vs_offset; + uint32_t solid_ps_offset; + uint32_t copy_vs_offset; + uint32_t copy_ps_offset; + uint32_t comp_vs_offset; + uint32_t comp_ps_offset; + uint32_t comp_mask_vs_offset; + uint32_t comp_mask_ps_offset; + uint32_t xv_vs_offset; + uint32_t xv_ps_offset; + + //size/addr stuff + uint32_t src_size[2]; + uint64_t src_mc_addr[2]; + uint32_t src_pitch[2]; + uint32_t dst_size; + uint64_t dst_mc_addr; + uint32_t dst_pitch; + uint32_t vs_size; + uint64_t vs_mc_addr; + uint32_t ps_size; + uint64_t ps_mc_addr; + uint32_t vb_size; + uint64_t vb_mc_addr; + + // UTS/DFS + drmBufPtr scratch; + + // copy + Bool same_surface; + int rop; + uint32_t planemask; #endif #ifdef USE_XAA @@ -1032,6 +1104,7 @@ extern void RADEONDoPrepareCopyMMIO(ScrnInfoPtr pScrn, uint32_t dst_pitch_offset, uint32_t datatype, int rop, Pixel planemask); +extern Bool R600DrawInit(ScreenPtr pScreen); #endif #if defined(XF86DRI) && defined(USE_EXA) @@ -1116,15 +1189,16 @@ do { \ #define RADEONCP_STOP(pScrn, info) \ do { \ int _ret; \ - if (info->cp->CPStarted) { \ + if (info->cp->CPStarted) { \ _ret = RADEONCPStop(pScrn, info); \ if (_ret) { \ xf86DrvMsg(pScrn->scrnIndex, X_ERROR, \ "%s: CP stop %d\n", __FUNCTION__, _ret); \ } \ info->cp->CPStarted = FALSE; \ - } \ - RADEONEngineRestore(pScrn); \ + } \ + if (info->ChipFamily < CHIP_FAMILY_R600) \ + RADEONEngineRestore(pScrn); \ info->cp->CPRuns = FALSE; \ } while (0) @@ -1240,19 +1314,26 @@ do { \ #define RADEON_WAIT_UNTIL_2D_IDLE() \ do { \ - BEGIN_RING(2); \ - OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ - OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \ - RADEON_WAIT_HOST_IDLECLEAN)); \ - ADVANCE_RING(); \ + if (info->ChipFamily < CHIP_FAMILY_R600) { \ + BEGIN_RING(2); \ + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ + OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \ + RADEON_WAIT_HOST_IDLECLEAN)); \ + ADVANCE_RING(); \ + } \ } while (0) #define RADEON_WAIT_UNTIL_3D_IDLE() \ do { \ BEGIN_RING(2); \ - OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ - OUT_RING((RADEON_WAIT_3D_IDLECLEAN | \ - RADEON_WAIT_HOST_IDLECLEAN)); \ + if (info->ChipFamily >= CHIP_FAMILY_R600) { \ + OUT_RING(CP_PACKET0(R600_WAIT_UNTIL, 0)); \ + OUT_RING((RADEON_WAIT_3D_IDLECLEAN)); \ + } else { \ + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ + OUT_RING((RADEON_WAIT_3D_IDLECLEAN | \ + RADEON_WAIT_HOST_IDLECLEAN)); \ + } \ ADVANCE_RING(); \ } while (0) @@ -1263,17 +1344,25 @@ do { \ "WAIT_UNTIL_IDLE() in %s\n", __FUNCTION__); \ } \ BEGIN_RING(2); \ - OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ - OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \ - RADEON_WAIT_3D_IDLECLEAN | \ - RADEON_WAIT_HOST_IDLECLEAN)); \ + if (info->ChipFamily >= CHIP_FAMILY_R600) { \ + OUT_RING(CP_PACKET0(R600_WAIT_UNTIL, 0)); \ + OUT_RING((RADEON_WAIT_3D_IDLECLEAN)); \ + } else { \ + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); \ + OUT_RING((RADEON_WAIT_2D_IDLECLEAN | \ + RADEON_WAIT_3D_IDLECLEAN | \ + RADEON_WAIT_HOST_IDLECLEAN)); \ + } \ ADVANCE_RING(); \ } while (0) #define RADEON_PURGE_CACHE() \ do { \ BEGIN_RING(2); \ - if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ + if (info->ChipFamily >= CHIP_FAMILY_R600) { \ + OUT_RING(CP_PACKET3(IT_EVENT_WRITE, 0)); \ + OUT_RING(CACHE_FLUSH_AND_INV_EVENT); \ + } else if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \ OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \ } else { \ @@ -1285,15 +1374,17 @@ do { \ #define RADEON_PURGE_ZCACHE() \ do { \ - BEGIN_RING(2); \ - if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ - OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \ - OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \ - } else { \ - OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); \ - OUT_RING(R300_ZC_FLUSH_ALL); \ + if (info->ChipFamily < CHIP_FAMILY_R600) { \ + BEGIN_RING(2); \ + if (info->ChipFamily <= CHIP_FAMILY_RV280) { \ + OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \ + OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \ + } else { \ + OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0)); \ + OUT_RING(R300_ZC_FLUSH_ALL); \ + } \ + ADVANCE_RING(); \ } \ - ADVANCE_RING(); \ } while (0) #endif /* XF86DRI */ diff --git a/src/radeon_accel.c b/src/radeon_accel.c index 96570e8c..2b17cd15 100644 --- a/src/radeon_accel.c +++ b/src/radeon_accel.c @@ -78,6 +78,7 @@ /* Driver data structures */ #include "radeon.h" #include "radeon_reg.h" +#include "r600_reg.h" #include "radeon_macros.h" #include "radeon_probe.h" #include "radeon_version.h" @@ -156,9 +157,6 @@ void RADEONEngineFlush(ScrnInfoPtr pScrn) unsigned char *RADEONMMIO = info->MMIO; int i; - if (info->ChipFamily >= CHIP_FAMILY_R600) - return; - if (info->ChipFamily <= CHIP_FAMILY_RV280) { OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH_ALL, @@ -198,8 +196,6 @@ void RADEONEngineReset(ScrnInfoPtr pScrn) uint32_t rbbm_soft_reset; uint32_t host_path_cntl; - if (info->ChipFamily >= CHIP_FAMILY_R600) - return; /* The following RBBM_SOFT_RESET sequence can help un-wedge * an R300 after the command processor got stuck. */ @@ -310,6 +306,35 @@ void RADEONEngineReset(ScrnInfoPtr pScrn) OUTPLL(pScrn, RADEON_MCLK_CNTL, mclk_cntl); } +/* Reset graphics card to known state */ +void R600EngineReset(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + unsigned char *RADEONMMIO = info->MMIO; + uint32_t cp_ptr, cp_me_cntl, cp_rb_cntl; + + cp_ptr = INREG(R600_CP_RB_WPTR); + + cp_me_cntl = INREG(R600_CP_ME_CNTL); + OUTREG(R600_CP_ME_CNTL, 0x10000000); + + OUTREG(R600_GRBM_SOFT_RESET, 0x7fff); + INREG(R600_GRBM_SOFT_RESET); + usleep (50); + OUTREG(R600_GRBM_SOFT_RESET, 0); + INREG(R600_GRBM_SOFT_RESET); + + OUTREG(R600_CP_RB_WPTR_DELAY, 0); + cp_rb_cntl = INREG(R600_CP_RB_CNTL); + OUTREG(R600_CP_RB_CNTL, 0x80000000); + + OUTREG(R600_CP_RB_RPTR_WR, cp_ptr); + OUTREG(R600_CP_RB_WPTR, cp_ptr); + OUTREG(R600_CP_RB_CNTL, cp_rb_cntl); + OUTREG(R600_CP_ME_CNTL, cp_me_cntl); + +} + /* Restore the acceleration hardware to its previous state */ void RADEONEngineRestore(ScrnInfoPtr pScrn) { @@ -611,8 +636,12 @@ drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn) xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "GetBuffer timed out, resetting engine...\n"); - RADEONEngineReset(pScrn); - RADEONEngineRestore(pScrn); + + if (info->ChipFamily < CHIP_FAMILY_R600) { + RADEONEngineReset(pScrn); + RADEONEngineRestore(pScrn); + } else + R600EngineReset(pScrn); /* Always restart the engine when doing CP 2D acceleration */ RADEONCP_RESET(pScrn, info); @@ -627,6 +656,8 @@ void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard) drmBufPtr buffer = info->cp->indirectBuffer; int start = info->cp->indirectStart; drm_radeon_indirect_t indirect; + RING_LOCALS; + RADEONCP_REFRESH(pScrn, info); if (!buffer) return; if (start == buffer->used && !discard) return; @@ -636,6 +667,14 @@ void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard) buffer->idx); } + if (info->ChipFamily >= CHIP_FAMILY_R600) { + while (buffer->used & 0x3c){ + BEGIN_RING(1); + OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */ + ADVANCE_RING(); + } + } + indirect.idx = buffer->idx; indirect.start = start; indirect.end = buffer->used; @@ -664,6 +703,19 @@ void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn) drmBufPtr buffer = info->cp->indirectBuffer; int start = info->cp->indirectStart; drm_radeon_indirect_t indirect; + RING_LOCALS; + RADEONCP_REFRESH(pScrn, info); + + + if (info->ChipFamily >= CHIP_FAMILY_R600) { + if (buffer) { + while (buffer->used & 0x3c) { + BEGIN_RING(1); + OUT_RING(CP_PACKET2()); /* fill up to multiple of 16 dwords */ + ADVANCE_RING(); + } + } + } info->cp->indirectBuffer = NULL; info->cp->indirectStart = 0; @@ -926,20 +978,26 @@ Bool RADEONAccelInit(ScreenPtr pScreen) ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); - if (info->ChipFamily >= CHIP_FAMILY_R600) - return FALSE; - #ifdef USE_EXA if (info->useEXA) { # ifdef XF86DRI if (info->directRenderingEnabled) { - if (!RADEONDrawInitCP(pScreen)) - return FALSE; + if (info->ChipFamily >= CHIP_FAMILY_R600) { + if (!R600DrawInit(pScreen)) + return FALSE; + } else { + if (!RADEONDrawInitCP(pScreen)) + return FALSE; + } } else # endif /* XF86DRI */ { - if (!RADEONDrawInitMMIO(pScreen)) + if (info->ChipFamily >= CHIP_FAMILY_R600) return FALSE; + else { + if (!RADEONDrawInitMMIO(pScreen)) + return FALSE; + } } } #endif /* USE_EXA */ @@ -947,6 +1005,9 @@ Bool RADEONAccelInit(ScreenPtr pScreen) if (!info->useEXA) { XAAInfoRecPtr a; + if (info->ChipFamily >= CHIP_FAMILY_R600) + return FALSE; + if (!(a = info->accel_state->accel = XAACreateInfoRec())) { xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n"); return FALSE; diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c index 0a9f9db9..7e00384e 100644 --- a/src/radeon_commonfuncs.c +++ b/src/radeon_commonfuncs.c @@ -733,8 +733,11 @@ void FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn) xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Idle timed out, resetting engine...\n"); - RADEONEngineReset(pScrn); - RADEONEngineRestore(pScrn); + if (info->ChipFamily < CHIP_FAMILY_R600) { + RADEONEngineReset(pScrn); + RADEONEngineRestore(pScrn); + } else + R600EngineReset(pScrn); /* Always restart the engine when doing CP 2D acceleration */ RADEONCP_RESET(pScrn, info); @@ -751,7 +754,7 @@ void FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn) #endif if (info->ChipFamily >= CHIP_FAMILY_R600) - return; + return; /* Wait for the engine to go idle */ RADEONWaitForFifoFunction(pScrn, 64); diff --git a/src/radeon_dri.c b/src/radeon_dri.c index 968afe69..4520be55 100644 --- a/src/radeon_dri.c +++ b/src/radeon_dri.c @@ -45,6 +45,7 @@ #include "radeon.h" #include "radeon_video.h" #include "radeon_reg.h" +#include "r600_reg.h" #include "radeon_macros.h" #include "radeon_drm.h" #include "radeon_dri.h" @@ -784,92 +785,96 @@ static Bool RADEONSetAgpMode(RADEONInfoPtr info, ScreenPtr pScreen) unsigned long mode = drmAgpGetMode(info->dri->drmFD); /* Default mode */ unsigned int vendor = drmAgpVendorId(info->dri->drmFD); unsigned int device = drmAgpDeviceId(info->dri->drmFD); - /* ignore agp 3.0 mode bit from the chip as it's buggy on some cards with - pcie-agp rialto bridge chip - use the one from bridge which must match */ - uint32_t agp_status = (INREG(RADEON_AGP_STATUS) | RADEON_AGPv3_MODE) & mode; - Bool is_v3 = (agp_status & RADEON_AGPv3_MODE); - unsigned int defaultMode; - MessageType from; - if (is_v3) { - defaultMode = (agp_status & RADEON_AGPv3_8X_MODE) ? 8 : 4; - } else { - if (agp_status & RADEON_AGP_4X_MODE) defaultMode = 4; - else if (agp_status & RADEON_AGP_2X_MODE) defaultMode = 2; - else defaultMode = 1; - } - - /* Apply AGPMode Quirks */ - radeon_agpmode_quirk_ptr p = radeon_agpmode_quirk_list; - while (p && p->chipDevice != 0) { - if (vendor == p->hostbridgeVendor && - device == p->hostbridgeDevice && - PCI_DEV_VENDOR_ID(info->PciInfo) == p->chipVendor && - PCI_DEV_DEVICE_ID(info->PciInfo) == p->chipDevice && - PCI_SUB_VENDOR_ID(info->PciInfo) == p->subsysVendor && - PCI_SUB_DEVICE_ID(info->PciInfo) == p->subsysDevice) - { - defaultMode = p->defaultMode; - } - ++p; - } + if (info->ChipFamily < CHIP_FAMILY_R600) { + /* ignore agp 3.0 mode bit from the chip as it's buggy on some cards with + pcie-agp rialto bridge chip - use the one from bridge which must match */ + uint32_t agp_status = (INREG(RADEON_AGP_STATUS) | RADEON_AGPv3_MODE) & mode; + Bool is_v3 = (agp_status & RADEON_AGPv3_MODE); + unsigned int defaultMode; + MessageType from; - from = X_DEFAULT; + if (is_v3) { + defaultMode = (agp_status & RADEON_AGPv3_8X_MODE) ? 8 : 4; + } else { + if (agp_status & RADEON_AGP_4X_MODE) defaultMode = 4; + else if (agp_status & RADEON_AGP_2X_MODE) defaultMode = 2; + else defaultMode = 1; + } - if (xf86GetOptValInteger(info->Options, OPTION_AGP_MODE, &info->dri->agpMode)) { - if ((info->dri->agpMode < (is_v3 ? 4 : 1)) || - (info->dri->agpMode > (is_v3 ? 8 : 4)) || - (info->dri->agpMode & (info->dri->agpMode - 1))) { - xf86DrvMsg(pScreen->myNum, X_ERROR, - "Illegal AGP Mode: %d (valid values: %s), leaving at " - "%dx\n", info->dri->agpMode, is_v3 ? "4, 8" : "1, 2, 4", - defaultMode); - info->dri->agpMode = defaultMode; + /* Apply AGPMode Quirks */ + radeon_agpmode_quirk_ptr p = radeon_agpmode_quirk_list; + while (p && p->chipDevice != 0) { + if (vendor == p->hostbridgeVendor && + device == p->hostbridgeDevice && + PCI_DEV_VENDOR_ID(info->PciInfo) == p->chipVendor && + PCI_DEV_DEVICE_ID(info->PciInfo) == p->chipDevice && + PCI_SUB_VENDOR_ID(info->PciInfo) == p->subsysVendor && + PCI_SUB_DEVICE_ID(info->PciInfo) == p->subsysDevice) + { + defaultMode = p->defaultMode; + } + ++p; + } + + from = X_DEFAULT; + + if (xf86GetOptValInteger(info->Options, OPTION_AGP_MODE, &info->dri->agpMode)) { + if ((info->dri->agpMode < (is_v3 ? 4 : 1)) || + (info->dri->agpMode > (is_v3 ? 8 : 4)) || + (info->dri->agpMode & (info->dri->agpMode - 1))) { + xf86DrvMsg(pScreen->myNum, X_ERROR, + "Illegal AGP Mode: %d (valid values: %s), leaving at " + "%dx\n", info->dri->agpMode, is_v3 ? "4, 8" : "1, 2, 4", + defaultMode); + info->dri->agpMode = defaultMode; + } else + from = X_CONFIG; } else - from = X_CONFIG; - } else - info->dri->agpMode = defaultMode; + info->dri->agpMode = defaultMode; - xf86DrvMsg(pScreen->myNum, from, "Using AGP %dx\n", info->dri->agpMode); + xf86DrvMsg(pScreen->myNum, from, "Using AGP %dx\n", info->dri->agpMode); - mode &= ~RADEON_AGP_MODE_MASK; - if (is_v3) { - /* only set one mode bit for AGPv3 */ - switch (info->dri->agpMode) { - case 8: mode |= RADEON_AGPv3_8X_MODE; break; - case 4: default: mode |= RADEON_AGPv3_4X_MODE; - } - /*TODO: need to take care of other bits valid for v3 mode - * currently these bits are not used in all tested cards. - */ - } else { - switch (info->dri->agpMode) { - case 4: mode |= RADEON_AGP_4X_MODE; - case 2: mode |= RADEON_AGP_2X_MODE; - case 1: default: mode |= RADEON_AGP_1X_MODE; + mode &= ~RADEON_AGP_MODE_MASK; + if (is_v3) { + /* only set one mode bit for AGPv3 */ + switch (info->dri->agpMode) { + case 8: mode |= RADEON_AGPv3_8X_MODE; break; + case 4: default: mode |= RADEON_AGPv3_4X_MODE; + } + /*TODO: need to take care of other bits valid for v3 mode + * currently these bits are not used in all tested cards. + */ + } else { + switch (info->dri->agpMode) { + case 4: mode |= RADEON_AGP_4X_MODE; + case 2: mode |= RADEON_AGP_2X_MODE; + case 1: default: mode |= RADEON_AGP_1X_MODE; + } } - } - /* AGP Fast Writes. - * TODO: take into account that certain agp modes don't support fast - * writes at all */ - mode &= ~RADEON_AGP_FW_MODE; /* Disable per default */ - if (xf86ReturnOptValBool(info->Options, OPTION_AGP_FW, FALSE)) { - xf86DrvMsg(pScreen->myNum, X_WARNING, - "WARNING: Using the AGPFastWrite option is not recommended.\n"); - xf86Msg(X_NONE, "\tThis option does not provide much of a noticable speed" - " boost, while it\n\twill probably hard lock your machine." - " All bets are off!\n"); - - /* Black list some host/AGP bridges. */ - if ((vendor == PCI_VENDOR_AMD) && (device == PCI_CHIP_AMD761)) - xf86DrvMsg(pScreen->myNum, X_PROBED, "Ignoring AGPFastWrite option " - "for the AMD 761 northbridge.\n"); - else { - xf86DrvMsg(pScreen->myNum, X_CONFIG, "Enabling AGP Fast Writes.\n"); - mode |= RADEON_AGP_FW_MODE; - } - } /* Don't mention this otherwise, so that people don't get funny ideas */ + /* AGP Fast Writes. + * TODO: take into account that certain agp modes don't support fast + * writes at all */ + mode &= ~RADEON_AGP_FW_MODE; /* Disable per default */ + if (xf86ReturnOptValBool(info->Options, OPTION_AGP_FW, FALSE)) { + xf86DrvMsg(pScreen->myNum, X_WARNING, + "WARNING: Using the AGPFastWrite option is not recommended.\n"); + xf86Msg(X_NONE, "\tThis option does not provide much of a noticable speed" + " boost, while it\n\twill probably hard lock your machine." + " All bets are off!\n"); + + /* Black list some host/AGP bridges. */ + if ((vendor == PCI_VENDOR_AMD) && (device == PCI_CHIP_AMD761)) + xf86DrvMsg(pScreen->myNum, X_PROBED, "Ignoring AGPFastWrite option " + "for the AMD 761 northbridge.\n"); + else { + xf86DrvMsg(pScreen->myNum, X_CONFIG, "Enabling AGP Fast Writes.\n"); + mode |= RADEON_AGP_FW_MODE; + } + } /* Don't mention this otherwise, so that people don't get funny ideas */ + } else + info->dri->agpMode = 8; /* doesn't matter at this point */ xf86DrvMsg(pScreen->myNum, X_INFO, "[agp] Mode 0x%08lx [AGP 0x%04x/0x%04x; Card 0x%04x/0x%04x 0x%04x/0x%04x]\n", @@ -904,6 +909,9 @@ static void RADEONSetAgpBase(RADEONInfoPtr info, ScreenPtr pScreen) ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; unsigned char *RADEONMMIO = info->MMIO; + if (info->ChipFamily >= CHIP_FAMILY_R600) + return; + /* drm already does this, so we can probably remove this. * agp_base_2 ? */ @@ -1177,13 +1185,14 @@ static int RADEONDRIKernelInit(RADEONInfoPtr info, ScreenPtr pScreen) drm_radeon_init_t drmInfo; memset(&drmInfo, 0, sizeof(drm_radeon_init_t)); - if ( info->ChipFamily >= CHIP_FAMILY_R300 ) - drmInfo.func = RADEON_INIT_R300_CP; + if ( info->ChipFamily >= CHIP_FAMILY_R600 ) + drmInfo.func = RADEON_INIT_R600_CP; + else if ( info->ChipFamily >= CHIP_FAMILY_R300 ) + drmInfo.func = RADEON_INIT_R300_CP; + else if ( info->ChipFamily >= CHIP_FAMILY_R200 ) + drmInfo.func = RADEON_INIT_R200_CP; else - if ( info->ChipFamily >= CHIP_FAMILY_R200 ) - drmInfo.func = RADEON_INIT_R200_CP; - else - drmInfo.func = RADEON_INIT_CP; + drmInfo.func = RADEON_INIT_CP; drmInfo.sarea_priv_offset = sizeof(XF86DRISAREARec); drmInfo.is_pci = (info->cardType!=CARD_AGP); @@ -1217,7 +1226,8 @@ static int RADEONDRIKernelInit(RADEONInfoPtr info, ScreenPtr pScreen) * registers back to their default values, so we need to restore * those engine register here. */ - RADEONEngineRestore(pScrn); + if (info->ChipFamily < CHIP_FAMILY_R600) + RADEONEngineRestore(pScrn); return TRUE; } @@ -1293,14 +1303,16 @@ static void RADEONDRIIrqInit(RADEONInfoPtr info, ScreenPtr pScreen) "[drm] falling back to irq-free operation\n"); info->dri->irq = 0; } else { - unsigned char *RADEONMMIO = info->MMIO; - info->ModeReg->gen_int_cntl = INREG( RADEON_GEN_INT_CNTL ); - - /* Let the DRM know it can safely disable the vblank interrupts */ - radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0], - FALSE); - radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0], - TRUE); + if (info->ChipFamily < CHIP_FAMILY_R600) { + unsigned char *RADEONMMIO = info->MMIO; + info->ModeReg->gen_int_cntl = INREG( RADEON_GEN_INT_CNTL ); + + /* Let the DRM know it can safely disable the vblank interrupts */ + radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0], + FALSE); + radeon_crtc_modeset_ioctl(XF86_CRTC_CONFIG_PTR(pScrn)->crtc[0], + TRUE); + } } } diff --git a/src/radeon_driver.c b/src/radeon_driver.c index b0817b0a..919a9fb0 100644 --- a/src/radeon_driver.c +++ b/src/radeon_driver.c @@ -674,8 +674,8 @@ static void radeon_write_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, uint32_ if (mask & LOC_FB) OUTREG(R700_MC_VM_FB_LOCATION, fb_loc); if (mask & LOC_AGP) { - OUTREG(R600_MC_VM_AGP_BOT, agp_loc); - OUTREG(R600_MC_VM_AGP_TOP, agp_loc_hi); + OUTREG(R700_MC_VM_AGP_BOT, agp_loc); + OUTREG(R700_MC_VM_AGP_TOP, agp_loc_hi); } } else if (info->ChipFamily >= CHIP_FAMILY_R600) { if (mask & LOC_FB) @@ -724,8 +724,8 @@ static void radeon_read_mc_fb_agp_location(ScrnInfoPtr pScrn, int mask, uint32_t if (mask & LOC_FB) *fb_loc = INREG(R700_MC_VM_FB_LOCATION); if (mask & LOC_AGP) { - *agp_loc = INREG(R600_MC_VM_AGP_BOT); - *agp_loc_hi = INREG(R600_MC_VM_AGP_TOP); + *agp_loc = INREG(R700_MC_VM_AGP_BOT); + *agp_loc_hi = INREG(R700_MC_VM_AGP_TOP); } } else if (info->ChipFamily >= CHIP_FAMILY_R600) { if (mask & LOC_FB) @@ -1876,7 +1876,10 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) /* treat PCIE IGP cards as PCI */ if (info->cardType == CARD_PCIE && info->IsIGP) - info->cardType = CARD_PCI; + info->cardType = CARD_PCI; + + if ((info->ChipFamily >= CHIP_FAMILY_R600) && info->IsIGP) + info->cardType = CARD_PCIE; if ((s = xf86GetOptValString(info->Options, OPTION_BUS_TYPE))) { if (strcmp(s, "AGP") == 0) { @@ -1905,6 +1908,7 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) info->Chipset != PCI_CHIP_RN50_5969); #endif +#if 0 if (info->ChipFamily >= CHIP_FAMILY_R600) { info->r600_shadow_fb = TRUE; xf86DrvMsg(pScrn->scrnIndex, X_INFO, @@ -1912,6 +1916,7 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn) if (!xf86LoadSubModule(pScrn, "shadow")) return FALSE; } +#endif return TRUE; } @@ -1989,8 +1994,8 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) if (info->ChipFamily >= CHIP_FAMILY_R600) { xf86DrvMsg(pScrn->scrnIndex, X_DEFAULT, - "No acceleration support available on R600 yet.\n"); - return TRUE; + "Experimental R6xx/R7xx EXA support.\n"); + info->useEXA = TRUE; } if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) { @@ -2336,7 +2341,10 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn) xf86DrvMsg(pScrn->scrnIndex, from, "Page Flipping %sabled%s\n", info->dri->allowPageFlip ? "en" : "dis", reason); - info->DMAForXv = TRUE; + if (info->ChipFamily >= CHIP_FAMILY_R600) + info->DMAForXv = FALSE; + else + info->DMAForXv = TRUE; from = xf86GetOptValBool(info->Options, OPTION_XV_DMA, &info->DMAForXv) ? X_CONFIG : X_INFO; xf86DrvMsg(pScrn->scrnIndex, from, @@ -3638,11 +3646,9 @@ Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, RADEONDGAInit(pScreen); /* Init Xv */ - if (info->ChipFamily < CHIP_FAMILY_R600) { - xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, - "Initializing Xv\n"); - RADEONInitVideo(pScreen); - } + xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG, + "Initializing Xv\n"); + RADEONInitVideo(pScreen); if (info->r600_shadow_fb == TRUE) { if (!shadowSetup(pScreen)) { @@ -3952,7 +3958,7 @@ static void RADEONAdjustMemMapRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save) } #ifdef USE_EXA - if (info->accelDFS) + if (info->accelDFS || (info->ChipFamily >= CHIP_FAMILY_R600)) { drm_radeon_getparam_t gp; int gart_base; diff --git a/src/radeon_exa.c b/src/radeon_exa.c index c4bc1bb1..5e236458 100644 --- a/src/radeon_exa.c +++ b/src/radeon_exa.c @@ -35,6 +35,7 @@ #include "radeon.h" #include "radeon_reg.h" +#include "r600_reg.h" #ifdef XF86DRI #include "radeon_drm.h" #endif diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c index ceeee49d..0c84384f 100644 --- a/src/radeon_exa_render.c +++ b/src/radeon_exa_render.c @@ -451,7 +451,7 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, #ifdef ONLY_ONCE -static PixmapPtr +PixmapPtr RADEONGetDrawablePixmap(DrawablePtr pDrawable) { if (pDrawable->type == DRAWABLE_WINDOW) diff --git a/src/radeon_modes.c b/src/radeon_modes.c index e06f8ddf..0a8fa001 100644 --- a/src/radeon_modes.c +++ b/src/radeon_modes.c @@ -65,15 +65,19 @@ void RADEONSetPitch (ScrnInfoPtr pScrn) align_large = info->allowColorTiling || IS_AVIVO_VARIANT; /* FIXME: May need to validate line pitch here */ - switch (pScrn->depth / 8) { - case 1: pitch_mask = align_large ? 255 : 127; - break; - case 2: pitch_mask = align_large ? 127 : 31; - break; - case 3: - case 4: pitch_mask = align_large ? 63 : 15; - break; - } + if (info->ChipFamily < CHIP_FAMILY_R600) { + switch (pScrn->depth / 8) { + case 1: pitch_mask = align_large ? 255 : 127; + break; + case 2: pitch_mask = align_large ? 127 : 31; + break; + case 3: + case 4: pitch_mask = align_large ? 63 : 15; + break; + } + } else + pitch_mask = 255; /* r6xx/r7xx need 256B alignment for accel */ + dummy = (pScrn->virtualX + pitch_mask) & ~pitch_mask; pScrn->displayWidth = dummy; info->CurrentLayout.displayWidth = pScrn->displayWidth; diff --git a/src/radeon_reg.h b/src/radeon_reg.h index 7b8840bf..17f8575c 100644 --- a/src/radeon_reg.h +++ b/src/radeon_reg.h @@ -3984,6 +3984,9 @@ #define R600_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198 #define R700_MC_VM_FB_LOCATION 0x2024 +#define R700_MC_VM_AGP_TOP 0x2028 +#define R700_MC_VM_AGP_BOT 0x202c +#define R700_MC_VM_AGP_BASE 0x2030 #define R600_HDP_NONSURFACE_BASE 0x2c04 @@ -5362,4 +5365,27 @@ #define R500_DYN_SCLK_PWMEM_PIPE 0x000d /* PLL */ +/* r6xx/r7xx stuff */ +#define R600_GRBM_SOFT_RESET 0x8020 +# define R600_SOFT_RESET_CP (1 << 0) + +#define R600_WAIT_UNTIL 0x8040 + +#define R600_CP_ME_CNTL 0x86d8 +# define R600_CP_ME_HALT (1 << 28) + +#define R600_CP_RB_BASE 0xc100 +#define R600_CP_RB_CNTL 0xc104 +# define R600_RB_NO_UPDATE (1 << 27) +# define R600_RB_RPTR_WR_ENA (1 << 31) +#define R600_CP_RB_RPTR_WR 0xc108 +#define R600_CP_RB_RPTR_ADDR 0xc10c +#define R600_CP_RB_RPTR_ADDR_HI 0xc110 +#define R600_CP_RB_WPTR 0xc114 +#define R600_CP_RB_WPTR_ADDR 0xc118 +#define R600_CP_RB_WPTR_ADDR_HI 0xc11c + +#define R600_CP_RB_RPTR 0x8700 +#define R600_CP_RB_WPTR_DELAY 0x8704 + #endif diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c index 7712344b..16b2c829 100644 --- a/src/radeon_textured_video.c +++ b/src/radeon_textured_video.c @@ -36,6 +36,7 @@ #include "radeon.h" #include "radeon_reg.h" +#include "r600_reg.h" #include "radeon_macros.h" #include "radeon_probe.h" #include "radeon_video.h" @@ -43,12 +44,18 @@ #include <X11/extensions/Xv.h> #include "fourcc.h" +extern void +R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv); + #define IMAGE_MAX_WIDTH 2048 #define IMAGE_MAX_HEIGHT 2048 #define IMAGE_MAX_WIDTH_R500 4096 #define IMAGE_MAX_HEIGHT_R500 4096 +#define IMAGE_MAX_WIDTH_R600 8192 +#define IMAGE_MAX_HEIGHT_R600 8192 + static Bool RADEONTilingEnabled(ScrnInfoPtr pScrn, PixmapPtr pPix) { @@ -146,6 +153,82 @@ static __inline__ uint32_t F_TO_24(float val) #endif /* XF86DRI */ +static void +CopyPlanartoNV12(unsigned char *y_src, unsigned char *u_src, unsigned char *v_src, + unsigned char *dst, + int srcPitch, int srcPitch2, int dstPitch, + int w, int h) +{ + int i, j; + + /* Y */ + if (srcPitch == dstPitch) { + memcpy(dst, y_src, srcPitch * h); + dst += (dstPitch * h); + } else { + for (i = 0; i < h; i++) { + memcpy(dst, y_src, srcPitch); + y_src += srcPitch; + dst += dstPitch; + } + } + + /* tex base need 256B alignment */ + if (h & 1) + dst += dstPitch; + + /* UV */ + for (i = 0; i < (h >> 1); i++) { + unsigned char *u = u_src; + unsigned char *v = v_src; + unsigned char *uv = dst; + + for (j = 0; j < w; j++) { + uv[0] = v[j]; + uv[1] = u[j]; + uv += 2; + } + dst += dstPitch; + u_src += srcPitch2; + v_src += srcPitch2; + } +} + +static void +CopyPackedtoNV12(unsigned char *src, unsigned char *dst, + int srcPitch, int dstPitch, + int w, int h, int id) +{ + int i, j; + int uv_offset = dstPitch * h; + uv_offset = (uv_offset + 255) & ~255; + + // FOURCC_UYVY: U0 Y0 V0 Y1 + // FOURCC_YUY2: Y0 U0 Y1 V0 + for (i = 0; i < h; i++) { + unsigned char *y = dst; + unsigned char *uv = (unsigned char *)dst + uv_offset; + + for (j = 0; j < (w / 2); j++) { + if (id == FOURCC_UYVY) { + uv[1] = src[(j * 4) + 0]; + y[0] = src[(j * 4) + 1]; + uv[0] = src[(j * 4) + 2]; + y[1] = src[(j * 4) + 3]; + } else { + y[0] = src[(j * 4) + 0]; + uv[1] = src[(j * 4) + 1]; + y[1] = src[(j * 4) + 2]; + uv[0] = src[(j * 4) + 3]; + } + y += 2; + uv += 2; + } + dst += dstPitch; + src += srcPitch; + } +} + static int RADEONPutImageTextured(ScrnInfoPtr pScrn, short src_x, short src_y, @@ -214,7 +297,10 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, break; } - dstPitch = (dstPitch + 63) & ~63; + if (info->ChipFamily >= CHIP_FAMILY_R600) + dstPitch = (dstPitch + 511) & ~511; + else + dstPitch = (dstPitch + 63) & ~63; if (pPriv->video_memory != NULL && size != pPriv->size) { radeon_legacy_free_memory(pScrn, pPriv->video_memory); @@ -222,16 +308,21 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, } if (pPriv->video_memory == NULL) { - pPriv->video_offset = radeon_legacy_allocate_memory(pScrn, - &pPriv->video_memory, - size * 2, 64); + if (info->ChipFamily >= CHIP_FAMILY_R600) + pPriv->video_offset = radeon_legacy_allocate_memory(pScrn, + &pPriv->video_memory, + size * 2, 512); + else + pPriv->video_offset = radeon_legacy_allocate_memory(pScrn, + &pPriv->video_memory, + size * 2, 64); if (pPriv->video_offset == 0) return BadAlloc; } /* Bicubic filter setup */ pPriv->bicubic_enabled = (pPriv->bicubic_state != BICUBIC_OFF); - if (!(IS_R300_3D || IS_R500_3D)) + if (!(IS_R300_3D || IS_R500_3D || IS_R600_3D)) pPriv->bicubic_enabled = FALSE; if (pPriv->bicubic_enabled && (pPriv->bicubic_state == BICUBIC_AUTO)) { /* @@ -280,7 +371,10 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, npixels = ((((x2 + 0xffff) >> 16) + 1) & ~1) - left; pPriv->src_offset = pPriv->video_offset + info->fbLocation + pScrn->fbOffset; - pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch)); + if (info->ChipFamily >= CHIP_FAMILY_R600) + pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset); + else + pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch)); pPriv->src_pitch = dstPitch; pPriv->size = size; pPriv->pDraw = pDraw; @@ -294,29 +388,51 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, switch(id) { case FOURCC_YV12: case FOURCC_I420: - top &= ~1; - nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top; - s2offset = srcPitch * height; - s3offset = (srcPitch2 * (height >> 1)) + s2offset; - top &= ~1; - pPriv->src_addr += left << 1; - tmp = ((top >> 1) * srcPitch2) + (left >> 1); - s2offset += tmp; - s3offset += tmp; - if (id == FOURCC_I420) { - tmp = s2offset; - s2offset = s3offset; - s3offset = tmp; + if (info->ChipFamily >= CHIP_FAMILY_R600) { + s2offset = srcPitch * height; + s3offset = (srcPitch2 * (height >> 1)) + s2offset; + if (id == FOURCC_YV12) + CopyPlanartoNV12(buf, buf + s3offset, buf + s2offset, + pPriv->src_addr, + srcPitch, srcPitch2, pPriv->src_pitch, + width, height); + else + CopyPlanartoNV12(buf, buf + s2offset, buf + s3offset, + pPriv->src_addr, + srcPitch, srcPitch2, pPriv->src_pitch, + width, height); + + } else { + top &= ~1; + nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top; + s2offset = srcPitch * height; + s3offset = (srcPitch2 * (height >> 1)) + s2offset; + top &= ~1; + pPriv->src_addr += left << 1; + tmp = ((top >> 1) * srcPitch2) + (left >> 1); + s2offset += tmp; + s3offset += tmp; + if (id == FOURCC_I420) { + tmp = s2offset; + s2offset = s3offset; + s3offset = tmp; + } + RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left, + buf + s2offset, buf + s3offset, pPriv->src_addr, + srcPitch, srcPitch2, dstPitch, nlines, npixels); } - RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left, - buf + s2offset, buf + s3offset, pPriv->src_addr, - srcPitch, srcPitch2, dstPitch, nlines, npixels); break; case FOURCC_UYVY: case FOURCC_YUY2: default: - nlines = ((y2 + 0xffff) >> 16) - top; - RADEONCopyData(pScrn, buf, pPriv->src_addr, srcPitch, dstPitch, nlines, npixels, 2); + if (info->ChipFamily >= CHIP_FAMILY_R600) { + CopyPackedtoNV12(buf, pPriv->src_addr, + 2 * width, pPriv->src_pitch, + width, height, id); + } else { + nlines = ((y2 + 0xffff) >> 16) - top; + RADEONCopyData(pScrn, buf, pPriv->src_addr, srcPitch, dstPitch, nlines, npixels, 2); + } break; } @@ -340,7 +456,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, pPriv->h = height; #ifdef XF86DRI - if (info->directRenderingEnabled) + if (IS_R600_3D) + R600DisplayTexturedVideo(pScrn, pPriv); + else if (info->directRenderingEnabled) RADEONDisplayTexturedVideoCP(pScrn, pPriv); else #endif @@ -370,6 +488,16 @@ static XF86VideoEncodingRec DummyEncodingR500[1] = } }; +static XF86VideoEncodingRec DummyEncodingR600[1] = +{ + { + 0, + "XV_IMAGE", + IMAGE_MAX_WIDTH_R600, IMAGE_MAX_HEIGHT_R600, + {1, 1} + } +}; + #define NUM_FORMATS 3 static XF86VideoFormatRec Formats[NUM_FORMATS] = @@ -471,7 +599,9 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen) adapt->flags = 0; adapt->name = "Radeon Textured Video"; adapt->nEncodings = 1; - if (IS_R500_3D) + if (IS_R600_3D) + adapt->pEncodings = DummyEncodingR600; + else if (IS_R500_3D) adapt->pEncodings = DummyEncodingR500; else adapt->pEncodings = DummyEncoding; @@ -483,7 +613,7 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen) pPortPriv = (RADEONPortPrivPtr)(&adapt->pPortPrivates[num_texture_ports]); - if (IS_R300_3D || IS_R500_3D) { + if (IS_R300_3D || IS_R500_3D || IS_R600_3D) { adapt->pAttributes = Attributes_r300; adapt->nAttributes = NUM_ATTRIBUTES_R300; } else { |