diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2011-04-08 07:17:14 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2011-06-04 09:19:46 +0100 |
commit | bcef98af561939aa48d9236b2dfa2c5626adf4cb (patch) | |
tree | 9d05558947a97595a6fdece968b50eeae45bbfb1 /src/sna/kgem_debug_gen4.c | |
parent | 340cfb7f5271fd1df4c8948e5c9336f5b69a6e6c (diff) |
sna: Introduce a new acceleration model.
The premise is that switching between rings (i.e. the BLT and
RENDER rings) on SandyBridge imposes a large latency overhead whilst
rendering. The cause is that in order to switch rings, we need to split
the batch earlier than is desired and to add serialisation between the
rings. Both of which incur large overhead.
By switching to using a pure 3D blit engine (ok, not so pure as the BLT
engine still has uses for the core drawing model which can not be easily
represented without a combinatorial explosion of shaders) we can take
advantage of additional efficiencies, such as relative relocations, that
have been incorporated into recent hardware advances. However, even
older hardware performs better from avoiding the implicit context
switches and from the batching efficiency of the 3D pipeline...
But this is X, and PolyGlyphBlt still exists and remains in use. So for
the operations that are not worth accelerating in hardware, we introduce a
shadow buffer mechanism through out and reintroduce pixmap migration.
Doing this efficiently is the cornerstone of ensuring that we do exploit
the increased potential of recent hardware for running old applications and
environments (i.e. so that the latest and greatest chip is actually faster
than gen2!)
For the curious, sna is SandyBridge's New Acceleration. If you are
running older chipsets and welcome the performance increase offered by
this patch, then you may choose to call it Snazzy instead.
Speedups
========
gen3 firefox-fishtank 1203584.56 (1203842.75 0.01%) -> 85561.71 (125146.44 14.87%): 14.07x speedup
gen5 grads-heat-map 3385.42 (3489.73 1.44%) -> 350.29 (350.75 0.18%): 9.66x speedup
gen3 xfce4-terminal-a1 4179.02 (4180.09 0.06%) -> 503.90 (531.88 4.48%): 8.29x speedup
gen4 grads-heat-map 2458.66 (2826.34 4.64%) -> 348.82 (349.20 0.29%): 7.05x speedup
gen3 grads-heat-map 1443.33 (1445.32 0.09%) -> 298.55 (298.76 0.05%): 4.83x speedup
gen3 swfdec-youtube 3836.14 (3894.14 0.95%) -> 889.84 (979.56 5.99%): 4.31x speedup
gen6 grads-heat-map 742.11 (744.44 0.15%) -> 172.51 (172.93 0.20%): 4.30x speedup
gen3 firefox-talos-svg 71740.44 (72370.13 0.59%) -> 21959.29 (21995.09 0.68%): 3.27x speedup
gen5 gvim 8045.51 (8071.47 0.17%) -> 2589.38 (3246.78 10.74%): 3.11x speedup
gen6 poppler 3800.78 (3817.92 0.24%) -> 1227.36 (1230.12 0.30%): 3.10x speedup
gen6 gnome-terminal-vim 9106.84 (9111.56 0.03%) -> 3459.49 (3478.52 0.25%): 2.63x speedup
gen5 midori-zoomed 9564.53 (9586.58 0.17%) -> 3677.73 (3837.02 2.02%): 2.60x speedup
gen5 gnome-terminal-vim 38167.25 (38215.82 0.08%) -> 14901.09 (14902.28 0.01%): 2.56x speedup
gen5 poppler 13575.66 (13605.04 0.16%) -> 5554.27 (5555.84 0.01%): 2.44x speedup
gen5 swfdec-giant-steps 8941.61 (8988.72 0.52%) -> 3851.98 (3871.01 0.93%): 2.32x speedup
gen5 xfce4-terminal-a1 18956.60 (18986.90 0.07%) -> 8362.75 (8365.70 0.01%): 2.27x speedup
gen5 firefox-fishtank 88750.31 (88858.23 0.14%) -> 39164.57 (39835.54 0.80%): 2.27x speedup
gen3 midori-zoomed 2392.13 (2397.82 0.14%) -> 1109.96 (1303.10 30.35%): 2.16x speedup
gen6 gvim 2510.34 (2513.34 0.20%) -> 1200.76 (1204.30 0.22%): 2.09x speedup
gen5 firefox-planet-gnome 40478.16 (40565.68 0.09%) -> 19606.22 (19648.79 0.16%): 2.06x speedup
gen5 gnome-system-monitor 10344.47 (10385.62 0.29%) -> 5136.69 (5256.85 1.15%): 2.01x speedup
gen3 poppler 2595.23 (2603.10 0.17%) -> 1297.56 (1302.42 0.61%): 2.00x speedup
gen6 firefox-talos-gfx 7184.03 (7194.97 0.13%) -> 3806.31 (3811.66 0.06%): 1.89x speedup
gen5 evolution 8739.25 (8766.12 0.27%) -> 4817.54 (5050.96 1.54%): 1.81x speedup
gen3 evolution 1684.06 (1696.88 0.35%) -> 1004.99 (1008.55 0.85%): 1.68x speedup
gen3 gnome-terminal-vim 4285.13 (4287.68 0.04%) -> 2715.97 (3202.17 13.52%): 1.58x speedup
gen5 swfdec-youtube 5843.94 (5951.07 0.91%) -> 3810.86 (3826.04 1.32%): 1.53x speedup
gen4 poppler 7496.72 (7558.83 0.58%) -> 5125.08 (5247.65 1.44%): 1.46x speedup
gen4 gnome-terminal-vim 21126.24 (21292.08 0.85%) -> 14590.25 (15066.33 1.80%): 1.45x speedup
gen5 firefox-talos-svg 99873.69 (100300.95 0.37%) -> 70745.66 (70818.86 0.05%): 1.41x speedup
gen4 firefox-planet-gnome 28205.10 (28304.45 0.27%) -> 19996.11 (20081.44 0.56%): 1.41x speedup
gen5 firefox-talos-gfx 93070.85 (93194.72 0.10%) -> 67687.93 (70374.37 1.30%): 1.37x speedup
gen4 evolution 6696.25 (6854.14 0.85%) -> 4958.62 (5027.73 0.85%): 1.35x speedup
gen3 swfdec-giant-steps 2538.03 (2539.30 0.04%) -> 1895.71 (2050.62 62.43%): 1.34x speedup
gen4 gvim 4356.18 (4422.78 0.70%) -> 3276.31 (3281.69 0.13%): 1.33x speedup
gen6 evolution 1242.13 (1245.44 0.72%) -> 953.76 (954.54 0.07%): 1.30x speedup
gen6 firefox-planet-gnome 4554.23 (4560.69 0.08%) -> 3758.76 (3768.97 0.28%): 1.21x speedup
gen3 firefox-talos-gfx 6264.13 (6284.65 0.30%) -> 5261.56 (5370.87 1.28%): 1.19x speedup
gen4 midori-zoomed 4771.13 (4809.90 0.73%) -> 4037.03 (4118.93 0.85%): 1.18x speedup
gen6 swfdec-giant-steps 1557.06 (1560.13 0.12%) -> 1336.34 (1341.29 0.32%): 1.17x speedup
gen4 firefox-talos-gfx 80767.28 (80986.31 0.17%) -> 69629.08 (69721.71 0.06%): 1.16x speedup
gen6 midori-zoomed 1463.70 (1463.76 0.08%) -> 1331.45 (1336.56 0.22%): 1.10x speedup
Slowdowns
=========
gen6 xfce4-terminal-a1 2030.25 (2036.23 0.25%) -> 2144.60 (2240.31 4.29%): 1.06x slowdown
gen4 swfdec-youtube 3580.00 (3597.23 3.92%) -> 3826.90 (3862.24 0.91%): 1.07x slowdown
gen4 firefox-talos-svg 66112.25 (66256.51 0.11%) -> 71433.40 (71584.31 0.14%): 1.08x slowdown
gen4 gnome-system-monitor 5691.60 (5724.03 0.56%) -> 6707.56 (6747.83 0.33%): 1.18x slowdown
gen3 ocitysmap 3494.05 (3502.44 0.20%) -> 4321.99 (4524.42 2.78%): 1.24x slowdown
gen4 ocitysmap 3628.42 (3641.66 9.37%) -> 5177.16 (5828.74 8.38%): 1.43x slowdown
gen5 ocitysmap 4027.77 (4068.11 0.80%) -> 5748.26 (6282.25 7.38%): 1.43x slowdown
gen6 ocitysmap 1401.61 (1402.24 0.40%) -> 2365.74 (2379.14 4.12%): 1.69x slowdown
[Note the performance regression for ocitysmap comes from that we now
attempt to support rendering to and (more importantly) from large
surfaces. By enabling such operations is the only way to one day be
faster than purely using the CPU, in the meantime we suffer regression
due to the increased migration and aperture thrashing. The other couple
of regressions will be eliminated with improved span and shader support,
now that the framework for such is in place.]
The performance increase for Cairo completely overlooks the other
critical aspects of the architecture:
World of Padman:
gen3 (800x600): 57.5 -> 96.2
gen4 (800x600): 47.8 -> 74.6
gen6 (1366x768): 100.4 -> 140.3 [F15]
144.3 -> 146.4 [drm-intel-next]
x11perf (gen6);
aa10text: 3.47 -> 14.3 Mglyphs/s [unthrottled!]
copywinwin10: 1.66 -> 1.99 Mops/s
copywinpix10: 2.28 -> 2.98 Mops/s
And we do not have a good measure for how much improvement the reworking
of the fallback paths give, except that xterm is now over 4x faster...
PS: This depends upon the Xorg patchset "Remove the cacheing of the last
scratch PixmapRec" for correct invalidations of scratch Pixmaps (used by
the dix to implement SHM operations, used by chromium and gtk+ pixbufs.
PPS: ./configure --enable-sna
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna/kgem_debug_gen4.c')
-rw-r--r-- | src/sna/kgem_debug_gen4.c | 711 |
1 files changed, 711 insertions, 0 deletions
diff --git a/src/sna/kgem_debug_gen4.c b/src/sna/kgem_debug_gen4.c new file mode 100644 index 00000000..d736cbd9 --- /dev/null +++ b/src/sna/kgem_debug_gen4.c @@ -0,0 +1,711 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/mman.h> +#include <assert.h> + +#include "sna.h" +#include "sna_reg.h" + +#include "gen4_render.h" + +#include "kgem_debug.h" + +static struct state { + struct vertex_buffer { + int handle; + void *base; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb[33]; + struct vertex_elements { + int buffer; + int offset; + bool valid; + uint32_t type; + uint8_t swizzle[4]; + } ve[33]; + int num_ve; + + struct dynamic_state { + struct kgem_bo *current; + void *base, *ptr; + } dynamic_state; +} state; + +static void gen4_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) +{ + uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch); + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == reloc) + break; + assert(i < kgem->nreloc); + reloc = kgem->reloc[i].target_handle; + + if (reloc == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map(kgem, bo, PROT_READ); + } + ptr = (char *)base + kgem->reloc[i].delta; + + i = data[0] >> 27; + if (state.vb[i].current) + munmap(state.vb[i].base, state.vb[i].current->size); + + state.vb[i].current = bo; + state.vb[i].base = base; + state.vb[i].ptr = ptr; + state.vb[i].pitch = data[0] & 0x7ff; +} + +static uint32_t +get_ve_component(uint32_t data, int component) +{ + return (data >> (16 + (3 - component) * 4)) & 0x7; +} + +static void gen4_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) +{ + state.ve[id].buffer = data[0] >> 27; + state.ve[id].valid = !!(data[0] & (1 << 26)); + state.ve[id].type = (data[0] >> 16) & 0x1ff; + state.ve[id].offset = data[0] & 0x7ff; + state.ve[id].swizzle[0] = get_ve_component(data[1], 0); + state.ve[id].swizzle[1] = get_ve_component(data[1], 1); + state.ve[id].swizzle[2] = get_ve_component(data[1], 2); + state.ve[id].swizzle[3] = get_ve_component(data[1], 3); +} + +static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) +{ + int c; + + ErrorF("("); + for (c = 0; c < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%d", v[c]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case GEN4_SURFACEFORMAT_R32_FLOAT: + vertices_float_out(ve, ptr, 1); + break; + case GEN4_SURFACEFORMAT_R32G32_FLOAT: + vertices_float_out(ve, ptr, 2); + break; + case GEN4_SURFACEFORMAT_R32G32B32_FLOAT: + vertices_float_out(ve, ptr, 3); + break; + case GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT: + vertices_float_out(ve, ptr, 4); + break; + case GEN4_SURFACEFORMAT_R16_SINT: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN4_SURFACEFORMAT_R16G16_SINT: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN4_SURFACEFORMAT_R16G16B16A16_SINT: + vertices_sint16_out(ve, ptr, 4); + break; + case GEN4_SURFACEFORMAT_R16_SSCALED: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN4_SURFACEFORMAT_R16G16_SSCALED: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED: + vertices_sint16_out(ve, ptr, 4); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + int i = 0; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const struct vertex_buffer *vb = &state.vb[ve->buffer]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static void primitive_out(struct kgem *kgem, uint32_t *data) +{ + int n; + + assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ + + for (n = 0; n < data[1]; n++) { + int v = data[2] + n; + ErrorF(" [%d:%d] = ", n, v); + indirect_vertex_out(kgem, v); + ErrorF("\n"); + } +} + +static void +state_base_out(uint32_t *data, uint32_t offset, unsigned int index, + char *name) +{ + if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state base address 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state base not updated\n", + name); +} + +static void +state_max_out(uint32_t *data, uint32_t offset, unsigned int index, + char *name) +{ + if (data[index] == 1) + kgem_debug_print(data, offset, index, + "%s state upper bound disabled\n", name); + else if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state upper bound not updated\n", + name); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +#if 0 +struct reloc { + struct kgem_bo *bo; + void *base; +}; + +static void * +get_reloc(struct kgem *kgem, + void *base, const uint32_t *reloc, + struct reloc *r) +{ + uint32_t delta = *reloc; + + memset(r, 0, sizeof(*r)); + + if (base == 0) { + uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); + struct kgem_bo *bo = NULL; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == handle) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + delta = kgem->reloc[i].delta; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map(kgem, bo, PROT_READ); + r->bo = bo; + r->base = base; + } + } + + return (char *)base + delta; +} + +static void +put_reloc(struct kgem *kgem, struct reloc *r) +{ + if (r->bo != NULL) + munmap(r->base, r->bo->size); +} +#endif + +int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x6000, 3, 3, "URB_FENCE" }, + { 0x6001, 2, 2, "CS_URB_FENCE" }, + { 0x6002, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, + { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + { 0x7805, 3, 3, "3DSTATE_URB" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, + { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" }, + { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t op; + unsigned int len; + int i; + char *desc1 = NULL; + + len = (data[0] & 0xff) + 2; + op = (data[0] & 0xffff0000) >> 16; + switch (op) { + case 0x6000: + assert(len == 3); + + kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n", + (data[0] >> 13) & 1 ? "cs " : "", + (data[0] >> 12) & 1 ? "vfe " : "", + (data[0] >> 11) & 1 ? "sf " : "", + (data[0] >> 10) & 1 ? "clip " : "", + (data[0] >> 9) & 1 ? "gs " : "", + (data[0] >> 8) & 1 ? "vs " : ""); + kgem_debug_print(data, offset, 1, + "vs fence: %d, gs_fence: %d, clip_fence: %d\n", + data[1] & 0x3ff, + (data[1] >> 10) & 0x3ff, + (data[1] >> 20) & 0x3ff); + kgem_debug_print(data, offset, 2, + "sf fence: %d, vfe_fence: %d, cs_fence: %d\n", + data[2] & 0x3ff, + (data[2] >> 10) & 0x3ff, + (data[2] >> 20) & 0x7ff); + return len; + + case 0x6001: + kgem_debug_print(data, offset, 0, "CS_URB_STATE\n"); + kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n", + (data[1] >> 4) & 0x1f, + (((data[1] >> 4) & 0x1f) + 1) * 64, + data[1] & 0x7); + return len; + case 0x6002: + kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n", + (data[0] >> 8) & 1 ? "valid" : "invalid"); + kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n", + data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64); + return len; + case 0x6101: + i = 0; + kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); + assert(len == 6); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "media"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "media"); + + return len; + + case 0x7801: + assert(len == 6); + + kgem_debug_print(data, offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "VS binding table\n"); + kgem_debug_print(data, offset, 2, "GS binding table\n"); + kgem_debug_print(data, offset, 3, "CLIP binding table\n"); + kgem_debug_print(data, offset, 4, "SF binding table\n"); + kgem_debug_print(data, offset, 5, "WM binding table\n"); + + return len; + + case 0x7808: + assert((len - 1) % 4 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + gen4_update_vertex_buffer(kgem, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 27, + data[i] & (1 << 20) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i++, "buffer address\n"); + kgem_debug_print(data, offset, i++, "max index\n"); + kgem_debug_print(data, offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + assert((len + 1) % 2 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + memset(state.ve, 0, sizeof(state.ve)); /* XXX? */ + for (i = 1; i < len;) { + gen4_update_vertex_elements(kgem, (i - 1)/2, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 27, + data[i] & (1 << 26) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + state.num_ve = (len - 1) / 2; /* XXX? */ + return len; + + case 0x780a: + assert(len == 3); + kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); + kgem_debug_print(data, offset, 1, "beginning buffer address\n"); + kgem_debug_print(data, offset, 2, "ending buffer address\n"); + return len; + + case 0x7900: + assert(len == 4); + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "top left: %d,%d\n", + data[1] & 0xffff, + (data[1] >> 16) & 0xffff); + kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n", + data[2] & 0xffff, + (data[2] >> 16) & 0xffff); + kgem_debug_print(data, offset, 3, "origin: %d,%d\n", + (int)data[3] & 0xffff, + ((int)data[3] >> 16) & 0xffff); + return len; + + case 0x7905: + assert(len == 7); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); + kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not ", + (data[1] & (1 << 22)) != 0, + (data[1] & (1 << 21)) != 0); + kgem_debug_print(data, offset, 2, "depth offset\n"); + kgem_debug_print(data, offset, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + kgem_debug_print(data, offset, 4, "volume depth\n"); + kgem_debug_print(data, offset, 5, "\n"); + kgem_debug_print(data, offset, 6, "\n"); + return len; + + case 0x7a00: + assert(len == 4 || len == 5); + switch ((data[1] >> 14) & 0x3) { + case 0: desc1 = "no write"; break; + case 1: desc1 = "qword write"; break; + case 2: desc1 = "PS_DEPTH_COUNT write"; break; + case 3: desc1 = "TIMESTAMP write"; break; + } + kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n"); + kgem_debug_print(data, offset, 1, + "%s, %scs stall, %stlb invalidate, " + "%ssync gfdt, %sdepth stall, %sRC write flush, " + "%sinst flush, %sTC flush\n", + desc1, + data[1] & (1 << 20) ? "" : "no ", + data[1] & (1 << 18) ? "" : "no ", + data[1] & (1 << 17) ? "" : "no ", + data[1] & (1 << 13) ? "" : "no ", + data[1] & (1 << 12) ? "" : "no ", + data[1] & (1 << 11) ? "" : "no ", + data[1] & (1 << 10) ? "" : "no "); + if (len == 5) { + kgem_debug_print(data, offset, 2, "destination address\n"); + kgem_debug_print(data, offset, 3, "immediate dword low\n"); + kgem_debug_print(data, offset, 4, "immediate dword high\n"); + } else { + for (i = 2; i < len; i++) { + kgem_debug_print(data, offset, i, "\n"); + } + } + return len; + + case 0x7b00: + assert(len == 6); + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + kgem_debug_print(data, offset, 1, "vertex count\n"); + kgem_debug_print(data, offset, 2, "start vertex\n"); + kgem_debug_print(data, offset, 3, "instance count\n"); + kgem_debug_print(data, offset, 4, "start instance\n"); + kgem_debug_print(data, offset, 5, "index bias\n"); + primitive_out(kgem, data); + return len; + } + + /* For the rest, just dump the bytes */ + for (i = 0; i < ARRAY_SIZE(opcodes); i++) + if (op == opcodes[i].opcode) + break; + + assert(i < ARRAY_SIZE(opcodes)); + + len = 1; + kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); + if (opcodes[i].max_len > 1) { + len = (data[0] & 0xff) + 2; + assert(len >= opcodes[i].min_len && + len <= opcodes[i].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; +} + +static void finish_vertex_buffers(struct kgem *kgem) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(state.vb); i++) + if (state.vb[i].current) + munmap(state.vb[i].base, state.vb[i].current->size); +} + +void kgem_gen4_finish_state(struct kgem *kgem) +{ + finish_vertex_buffers(kgem); + + if (state.dynamic_state.current) + munmap(state.dynamic_state.base, state.dynamic_state.current->size); + + memset(&state, 0, sizeof(state)); +} |