diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-08-26 06:03:18 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-08-26 06:03:18 +0000 |
commit | af5e8f5366b05c3d4f8521f318c143a5c5dc3ea9 (patch) | |
tree | c5691445908b1beca9facf0e5e3c5d7f35f74228 /lib/mesa/src/gallium/drivers/freedreno/a5xx | |
parent | 27c93456b58343162f7c4ad20ca6bea0c9a91646 (diff) |
Merge Mesa 20.1.6
Diffstat (limited to 'lib/mesa/src/gallium/drivers/freedreno/a5xx')
12 files changed, 106 insertions, 954 deletions
diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.c index fee6ba346..a7efcca61 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.c @@ -100,22 +100,14 @@ fd5_blend_state_create(struct pipe_context *pctx, else rt = &cso->rt[0]; - so->rb_mrt[i].blend_control_rgb = + so->rb_mrt[i].blend_control = A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | - A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)); - - so->rb_mrt[i].blend_control_alpha = + A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) | A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) | A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) | A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)); - so->rb_mrt[i].blend_control_no_alpha_rgb = - A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) | - A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | - A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor))); - - so->rb_mrt[i].control = A5XX_RB_MRT_CONTROL_ROP_CODE(rop) | COND(cso->logicop_enable, A5XX_RB_MRT_CONTROL_ROP_ENABLE) | diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.h index 698549548..10cbbaa90 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.h +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_blend.h @@ -38,12 +38,7 @@ struct fd5_blend_stateobj { struct { uint32_t control; uint32_t buf_info; - /* Blend control bits for color if there is an alpha channel */ - uint32_t blend_control_rgb; - /* Blend control bits for color if there is no alpha channel */ - uint32_t blend_control_no_alpha_rgb; - /* Blend control bits for alpha channel */ - uint32_t blend_control_alpha; + uint32_t blend_control; } rb_mrt[A5XX_MAX_RENDER_TARGETS]; uint32_t rb_blend_cntl; uint32_t sp_blend_cntl; diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c index ce93eee74..579d1cdaf 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_draw.c @@ -152,8 +152,8 @@ fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, /* and now binning pass: */ emit.binning_pass = true; emit.dirty = dirty & ~(FD_DIRTY_BLEND); - emit.vp = NULL; /* we changed key so need to refetch vp */ - emit.fp = NULL; + emit.vs = NULL; /* we changed key so need to refetch vp */ + emit.fs = NULL; draw_impl(ctx, ctx->batch->binning, &emit, index_offset); if (emit.streamout_mask) { @@ -318,13 +318,7 @@ fd5_clear(struct fd_context *ctx, unsigned buffers, break; } - if (util_format_is_pure_uint(pfmt)) { - util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1); - } else if (util_format_is_pure_sint(pfmt)) { - util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1); - } else { - util_pack_color(swapped.f, pfmt, &uc); - } + util_pack_color_union(pfmt, &uc, &swapped); OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i)); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index ce0bba60b..5bd429ef6 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -28,7 +28,7 @@ #include "util/u_string.h" #include "util/u_memory.h" #include "util/u_helpers.h" -#include "util/u_format.h" +#include "util/format/u_format.h" #include "util/u_viewport.h" #include "freedreno_resource.h" @@ -365,7 +365,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, enum a5xx_tile_mode tile_mode = TILE5_LINEAR; if (view->base.texture) - tile_mode = fd_resource(view->base.texture)->tile_mode; + tile_mode = fd_resource(view->base.texture)->layout.tile_mode; OUT_RING(ring, view->texconst0 | A5XX_TEX_CONST_0_TILE_MODE(tile_mode)); @@ -400,13 +400,10 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct ir3_shader_variant *v) { unsigned count = util_last_bit(so->enabled_mask); - const struct ir3_ibo_mapping *m = &v->image_mapping; for (unsigned i = 0; i < count; i++) { - unsigned slot = m->ssbo_to_ibo[i]; - OUT_PKT7(ring, CP_LOAD_STATE4, 5); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) | CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | CP_LOAD_STATE4_0_STATE_BLOCK(sb) | CP_LOAD_STATE4_0_NUM_UNIT(1)); @@ -424,7 +421,7 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16)); OUT_PKT7(ring, CP_LOAD_STATE4, 5); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(slot) | + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(i) | CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | CP_LOAD_STATE4_0_STATE_BLOCK(sb) | CP_LOAD_STATE4_0_NUM_UNIT(1)); @@ -728,17 +725,13 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, bool is_int = util_format_is_pure_integer(format); bool has_alpha = util_format_has_alpha(format); uint32_t control = blend->rb_mrt[i].control; - uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha; if (is_int) { control &= A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; control |= A5XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); } - if (has_alpha) { - blend_control |= blend->rb_mrt[i].blend_control_rgb; - } else { - blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb; + if (!has_alpha) { control &= ~A5XX_RB_MRT_CONTROL_BLEND2; } @@ -746,7 +739,7 @@ fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, control); OUT_PKT4(ring, REG_A5XX_RB_MRT_BLEND_CONTROL(i), 1); - OUT_RING(ring, blend_control); + OUT_RING(ring, blend->rb_mrt[i].blend_control); } OUT_PKT4(ring, REG_A5XX_SP_BLEND_CNTL, 1); diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.h b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.h index 90a6f4840..141fc0534 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.h +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_emit.h @@ -60,7 +60,7 @@ struct fd5_emit { bool no_lrz_write; /* cached to avoid repeated lookups of same variants: */ - const struct ir3_shader_variant *vp, *fp; + const struct ir3_shader_variant *vs, *fs; /* TODO: other shader stages.. */ unsigned streamout_mask; @@ -76,29 +76,29 @@ static inline enum a5xx_color_fmt fd5_emit_format(struct pipe_surface *surf) static inline const struct ir3_shader_variant * fd5_emit_get_vp(struct fd5_emit *emit) { - if (!emit->vp) { - struct ir3_shader *shader = emit->prog->vp; - emit->vp = ir3_shader_variant(shader, emit->key, + if (!emit->vs) { + struct ir3_shader *shader = emit->prog->vs; + emit->vs = ir3_shader_variant(shader, emit->key, emit->binning_pass, emit->debug); } - return emit->vp; + return emit->vs; } static inline const struct ir3_shader_variant * fd5_emit_get_fp(struct fd5_emit *emit) { - if (!emit->fp) { + if (!emit->fs) { if (emit->binning_pass) { /* use dummy stateobj to simplify binning vs non-binning: */ - static const struct ir3_shader_variant binning_fp = {}; - emit->fp = &binning_fp; + static const struct ir3_shader_variant binning_fs = {}; + emit->fs = &binning_fs; } else { - struct ir3_shader *shader = emit->prog->fp; - emit->fp = ir3_shader_variant(shader, emit->key, + struct ir3_shader *shader = emit->prog->fs; + emit->fs = ir3_shader_variant(shader, emit->key, false, emit->debug); } } - return emit->fp; + return emit->fs; } static inline void diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c index 59c13fad2..3686f1633 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_format.c @@ -25,7 +25,7 @@ */ #include "pipe/p_defines.h" -#include "util/u_format.h" +#include "util/format/u_format.h" #include "fd5_format.h" @@ -84,7 +84,7 @@ static struct fd5_format formats[PIPE_FORMAT_COUNT] = { VT(R8_UINT, 8_UINT, R8_UINT, WZYX), VT(R8_SINT, 8_SINT, R8_SINT, WZYX), V_(R8_USCALED, 8_UINT, NONE, WZYX), - V_(R8_SSCALED, 8_UINT, NONE, WZYX), + V_(R8_SSCALED, 8_SINT, NONE, WZYX), _T(A8_UNORM, 8_UNORM, A8_UNORM, WZYX), _T(L8_UNORM, 8_UNORM, R8_UNORM, WZYX), @@ -105,7 +105,7 @@ static struct fd5_format formats[PIPE_FORMAT_COUNT] = { VT(R16_UINT, 16_UINT, R16_UINT, WZYX), VT(R16_SINT, 16_SINT, R16_SINT, WZYX), V_(R16_USCALED, 16_UINT, NONE, WZYX), - V_(R16_SSCALED, 16_UINT, NONE, WZYX), + V_(R16_SSCALED, 16_SINT, NONE, WZYX), VT(R16_FLOAT, 16_FLOAT, R16_FLOAT, WZYX), _T(Z16_UNORM, 16_UNORM, R16_UNORM, WZYX), @@ -149,7 +149,7 @@ static struct fd5_format formats[PIPE_FORMAT_COUNT] = { VT(R32_UINT, 32_UINT, R32_UINT, WZYX), VT(R32_SINT, 32_SINT, R32_SINT, WZYX), V_(R32_USCALED, 32_UINT, NONE, WZYX), - V_(R32_SSCALED, 32_UINT, NONE, WZYX), + V_(R32_SSCALED, 32_SINT, NONE, WZYX), VT(R32_FLOAT, 32_FLOAT, R32_FLOAT,WZYX), V_(R32_FIXED, 32_FIXED, NONE, WZYX), diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c index df9e85a8f..f087e04cf 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c @@ -28,7 +28,7 @@ #include "util/u_string.h" #include "util/u_memory.h" #include "util/u_inlines.h" -#include "util/u_format.h" +#include "util/format/u_format.h" #include "freedreno_draw.h" #include "freedreno_state.h" @@ -44,7 +44,7 @@ static void emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, - struct pipe_surface **bufs, struct fd_gmem_stateobj *gmem) + struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem) { enum a5xx_tile_mode tile_mode; unsigned i; @@ -54,7 +54,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, enum a3xx_color_swap swap = WZYX; bool srgb = false, sint = false, uint = false; struct fd_resource *rsc = NULL; - struct fd_resource_slice *slice = NULL; + struct fdl_slice *slice = NULL; uint32_t stride = 0; uint32_t size = 0; uint32_t base = 0; @@ -89,11 +89,10 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, size = stride * gmem->bin_h; base = gmem->cbuf_base[i]; } else { - stride = slice->pitch * rsc->cpp; + stride = slice->pitch; size = slice->size0; - if (!fd_resource_level_linear(psurf->texture, psurf->u.tex.level)) - tile_mode = rsc->tile_mode; + tile_mode = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level); } } @@ -132,12 +131,12 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, static void emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, - struct fd_gmem_stateobj *gmem) + const struct fd_gmem_stateobj *gmem) { if (zsbuf) { struct fd_resource *rsc = fd_resource(zsbuf->texture); enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format); - uint32_t cpp = rsc->cpp; + uint32_t cpp = rsc->layout.cpp; uint32_t stride = 0; uint32_t size = 0; @@ -145,8 +144,8 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, stride = cpp * gmem->bin_w; size = stride * gmem->bin_h; } else { - struct fd_resource_slice *slice = fd_resource_slice(rsc, 0); - stride = slice->pitch * rsc->cpp; + struct fdl_slice *slice = fd_resource_slice(rsc, 0); + stride = slice->pitch; size = slice->size0; } @@ -192,8 +191,8 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, stride = 1 * gmem->bin_w; size = stride * gmem->bin_h; } else { - struct fd_resource_slice *slice = fd_resource_slice(rsc->stencil, 0); - stride = slice->pitch * rsc->cpp; + struct fdl_slice *slice = fd_resource_slice(rsc->stencil, 0); + stride = slice->pitch; size = slice->size0; } @@ -235,7 +234,7 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, static bool use_hw_binning(struct fd_batch *batch) { - struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + const struct fd_gmem_stateobj *gmem = batch->gmem_state; if ((gmem->maxpw * gmem->maxph) > 32) return false; @@ -263,7 +262,7 @@ update_vsc_pipe(struct fd_batch *batch) { struct fd_context *ctx = batch->ctx; struct fd5_context *fd5_ctx = fd5_context(ctx); - struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + const struct fd_gmem_stateobj *gmem = batch->gmem_state; struct fd_ringbuffer *ring = batch->gmem; int i; @@ -278,7 +277,7 @@ update_vsc_pipe(struct fd_batch *batch) OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16); for (i = 0; i < 16; i++) { - struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; + const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i]; OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) | A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) | A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) | @@ -287,18 +286,16 @@ update_vsc_pipe(struct fd_batch *batch) OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32); for (i = 0; i < 16; i++) { - struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; - if (!pipe->bo) { - pipe->bo = fd_bo_new(ctx->dev, 0x20000, + if (!ctx->vsc_pipe_bo[i]) { + ctx->vsc_pipe_bo[i] = fd_bo_new(ctx->dev, 0x20000, DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i); } - OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */ + OUT_RELOCW(ring, ctx->vsc_pipe_bo[i], 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */ } OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16); for (i = 0; i < 16; i++) { - struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; - OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */ + OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) - 32); /* VSC_PIPE_DATA_LENGTH[i] */ } } @@ -307,7 +304,7 @@ emit_binning_pass(struct fd_batch *batch) { struct fd_context *ctx = batch->ctx; struct fd_ringbuffer *ring = batch->gmem; - struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + const struct fd_gmem_stateobj *gmem = batch->gmem_state; uint32_t x1 = gmem->minx; uint32_t y1 = gmem->miny; @@ -369,7 +366,6 @@ emit_binning_pass(struct fd_batch *batch) static void fd5_emit_tile_init(struct fd_batch *batch) { - struct fd_context *ctx = batch->ctx; struct fd_ringbuffer *ring = batch->gmem; struct pipe_framebuffer_state *pfb = &batch->framebuffer; @@ -397,8 +393,8 @@ fd5_emit_tile_init(struct fd_batch *batch) OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */ - emit_zs(ring, pfb->zsbuf, &ctx->gmem); - emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, &ctx->gmem); + emit_zs(ring, pfb->zsbuf, batch->gmem_state); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state); if (use_hw_binning(batch)) { emit_binning_pass(batch); @@ -413,9 +409,10 @@ fd5_emit_tile_init(struct fd_batch *batch) /* before mem2gmem */ static void -fd5_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) +fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) { struct fd_context *ctx = batch->ctx; + const struct fd_gmem_stateobj *gmem = batch->gmem_state; struct fd5_context *fd5_ctx = fd5_context(ctx); struct fd_ringbuffer *ring = batch->gmem; @@ -437,7 +434,8 @@ fd5_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) A5XX_RB_RESOLVE_CNTL_2_Y(y2)); if (use_hw_binning(batch)) { - struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p]; + const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p]; + struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p]; OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); @@ -447,7 +445,7 @@ fd5_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) OUT_PKT7(ring, CP_SET_BIN_DATA5, 5); OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) | CP_SET_BIN_DATA5_0_VSC_N(tile->n)); - OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */ + OUT_RELOC(ring, pipe_bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */ OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */ (tile->p * 4), 0, 0); } else { @@ -470,7 +468,7 @@ emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base, struct pipe_surface *psurf, enum a5xx_blit_buf buf) { struct fd_ringbuffer *ring = batch->gmem; - struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + const struct fd_gmem_stateobj *gmem = batch->gmem_state; struct fd_resource *rsc = fd_resource(psurf->texture); uint32_t stride, size; @@ -485,22 +483,22 @@ emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base, // possibly we want to flip this around gmem2mem and keep depth // tiled in sysmem (and fixup sampler state to assume tiled).. this // might be required for doing depth/stencil in bypass mode? - struct fd_resource_slice *slice = fd_resource_slice(rsc, 0); + struct fdl_slice *slice = fd_resource_slice(rsc, 0); enum a5xx_color_fmt format = fd5_pipe2color(fd_gmem_restore_format(rsc->base.format)); OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5); OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | - A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->tile_mode) | + A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) | A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX)); - OUT_RING(ring, A5XX_RB_MRT_PITCH(slice->pitch * rsc->cpp)); + OUT_RING(ring, A5XX_RB_MRT_PITCH(slice->pitch)); OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0)); OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* BASE_LO/HI */ buf = BLIT_MRT0; } - stride = gmem->bin_w * rsc->cpp; + stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout); size = stride * gmem->bin_h; OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4); @@ -523,11 +521,10 @@ emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base, } static void -fd5_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) +fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile) { struct fd_ringbuffer *ring = batch->gmem; - struct fd_context *ctx = batch->ctx; - struct fd_gmem_stateobj *gmem = &ctx->gmem; + const struct fd_gmem_stateobj *gmem = batch->gmem_state; struct pipe_framebuffer_state *pfb = &batch->framebuffer; /* @@ -567,10 +564,10 @@ fd5_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) /* before IB to rendering cmds: */ static void -fd5_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) +fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile) { struct fd_ringbuffer *ring = batch->gmem; - struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + const struct fd_gmem_stateobj *gmem = batch->gmem_state; struct pipe_framebuffer_state *pfb = &batch->framebuffer; OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); @@ -610,7 +607,7 @@ emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base, { struct fd_ringbuffer *ring = batch->gmem; struct fd_resource *rsc = fd_resource(psurf->texture); - struct fd_resource_slice *slice; + struct fdl_slice *slice; bool tiled; uint32_t offset; @@ -632,14 +629,13 @@ emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base, OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */ OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */ - tiled = rsc->tile_mode && - !fd_resource_level_linear(psurf->texture, psurf->u.tex.level); + tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level); OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5); OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */ COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED)); OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */ - OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(slice->pitch * rsc->cpp)); + OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(slice->pitch)); OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0)); OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); @@ -654,10 +650,9 @@ emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base, } static void -fd5_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) +fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile) { - struct fd_context *ctx = batch->ctx; - struct fd_gmem_stateobj *gmem = &ctx->gmem; + const struct fd_gmem_stateobj *gmem = batch->gmem_state; struct pipe_framebuffer_state *pfb = &batch->framebuffer; if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { @@ -780,7 +775,7 @@ fd5_emit_sysmem_fini(struct fd_batch *batch) fd5_emit_lrz_flush(ring); OUT_PKT7(ring, CP_EVENT_WRITE, 4); - OUT_RING(ring, UNK_1D); + OUT_RING(ring, PC_CCU_FLUSH_COLOR_TS); OUT_RELOCW(ring, fd5_ctx->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ OUT_RING(ring, 0x00000000); } diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_perfcntr.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_perfcntr.c deleted file mode 100644 index cf5571d54..000000000 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_perfcntr.c +++ /dev/null @@ -1,766 +0,0 @@ -/* - * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <robclark@freedesktop.org> - */ - -#ifndef FD5_PERFCNTR_H_ -#define FD5_PERFCNTR_H_ - -#include "freedreno_perfcntr.h" -#include "fd5_format.h" - -#define REG(_x) REG_A5XX_ ## _x - -#define COUNTER(_sel, _lo, _hi) { \ - .select_reg = REG(_sel), \ - .counter_reg_lo = REG(_lo), \ - .counter_reg_hi = REG(_hi), \ -} - -#define COUNTER2(_sel, _lo, _hi, _en, _clr) { \ - .select_reg = REG(_sel), \ - .counter_reg_lo = REG(_lo), \ - .counter_reg_hi = REG(_hi), \ - .enable = REG(_en), \ - .clear = REG(_clr), \ -} - -#define COUNTABLE(_selector, _query_type, _result_type) { \ - .name = #_selector, \ - .selector = _selector, \ - .query_type = PIPE_DRIVER_QUERY_TYPE_ ## _query_type, \ - .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_ ## _result_type, \ -} - -#define GROUP(_name, _counters, _countables) { \ - .name = _name, \ - .num_counters = ARRAY_SIZE(_counters), \ - .counters = _counters, \ - .num_countables = ARRAY_SIZE(_countables), \ - .countables = _countables, \ -} - -static const struct fd_perfcntr_counter cp_counters[] = { -//RESERVED: for kernel -// COUNTER(CP_PERFCTR_CP_SEL_0, RBBM_PERFCTR_CP_0_LO, RBBM_PERFCTR_CP_0_HI), - COUNTER(CP_PERFCTR_CP_SEL_1, RBBM_PERFCTR_CP_1_LO, RBBM_PERFCTR_CP_1_HI), - COUNTER(CP_PERFCTR_CP_SEL_2, RBBM_PERFCTR_CP_2_LO, RBBM_PERFCTR_CP_2_HI), - COUNTER(CP_PERFCTR_CP_SEL_3, RBBM_PERFCTR_CP_3_LO, RBBM_PERFCTR_CP_3_HI), - COUNTER(CP_PERFCTR_CP_SEL_4, RBBM_PERFCTR_CP_4_LO, RBBM_PERFCTR_CP_4_HI), - COUNTER(CP_PERFCTR_CP_SEL_5, RBBM_PERFCTR_CP_5_LO, RBBM_PERFCTR_CP_5_HI), - COUNTER(CP_PERFCTR_CP_SEL_6, RBBM_PERFCTR_CP_6_LO, RBBM_PERFCTR_CP_6_HI), - COUNTER(CP_PERFCTR_CP_SEL_7, RBBM_PERFCTR_CP_7_LO, RBBM_PERFCTR_CP_7_HI), -}; - -static const struct fd_perfcntr_countable cp_countables[] = { - COUNTABLE(PERF_CP_ALWAYS_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_CP_BUSY_GFX_CORE_IDLE, UINT64, AVERAGE), - COUNTABLE(PERF_CP_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_CP_PFP_IDLE, UINT64, AVERAGE), - COUNTABLE(PERF_CP_PFP_BUSY_WORKING, UINT64, AVERAGE), - COUNTABLE(PERF_CP_PFP_STALL_CYCLES_ANY, UINT64, AVERAGE), - COUNTABLE(PERF_CP_PFP_STARVE_CYCLES_ANY, UINT64, AVERAGE), - COUNTABLE(PERF_CP_PFP_ICACHE_MISS, UINT64, AVERAGE), - COUNTABLE(PERF_CP_PFP_ICACHE_HIT, UINT64, AVERAGE), - COUNTABLE(PERF_CP_PFP_MATCH_PM4_PKT_PROFILE, UINT64, AVERAGE), - COUNTABLE(PERF_CP_ME_BUSY_WORKING, UINT64, AVERAGE), - COUNTABLE(PERF_CP_ME_IDLE, UINT64, AVERAGE), - COUNTABLE(PERF_CP_ME_STARVE_CYCLES_ANY, UINT64, AVERAGE), - COUNTABLE(PERF_CP_ME_FIFO_EMPTY_PFP_IDLE, UINT64, AVERAGE), - COUNTABLE(PERF_CP_ME_FIFO_EMPTY_PFP_BUSY, UINT64, AVERAGE), - COUNTABLE(PERF_CP_ME_FIFO_FULL_ME_BUSY, UINT64, AVERAGE), - COUNTABLE(PERF_CP_ME_FIFO_FULL_ME_NON_WORKING, UINT64, AVERAGE), - COUNTABLE(PERF_CP_ME_STALL_CYCLES_ANY, UINT64, AVERAGE), - COUNTABLE(PERF_CP_ME_ICACHE_MISS, UINT64, AVERAGE), - COUNTABLE(PERF_CP_ME_ICACHE_HIT, UINT64, AVERAGE), - COUNTABLE(PERF_CP_NUM_PREEMPTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_CP_PREEMPTION_REACTION_DELAY, UINT64, AVERAGE), - COUNTABLE(PERF_CP_PREEMPTION_SWITCH_OUT_TIME, UINT64, AVERAGE), - COUNTABLE(PERF_CP_PREEMPTION_SWITCH_IN_TIME, UINT64, AVERAGE), - COUNTABLE(PERF_CP_DEAD_DRAWS_IN_BIN_RENDER, UINT64, AVERAGE), - COUNTABLE(PERF_CP_PREDICATED_DRAWS_KILLED, UINT64, AVERAGE), - COUNTABLE(PERF_CP_MODE_SWITCH, UINT64, AVERAGE), - COUNTABLE(PERF_CP_ZPASS_DONE, UINT64, AVERAGE), - COUNTABLE(PERF_CP_CONTEXT_DONE, UINT64, AVERAGE), - COUNTABLE(PERF_CP_CACHE_FLUSH, UINT64, AVERAGE), - COUNTABLE(PERF_CP_LONG_PREEMPTIONS, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter ccu_counters[] = { - COUNTER(RB_PERFCTR_CCU_SEL_0, RBBM_PERFCTR_CCU_0_LO, RBBM_PERFCTR_CCU_0_HI), - COUNTER(RB_PERFCTR_CCU_SEL_1, RBBM_PERFCTR_CCU_1_LO, RBBM_PERFCTR_CCU_1_HI), - COUNTER(RB_PERFCTR_CCU_SEL_2, RBBM_PERFCTR_CCU_2_LO, RBBM_PERFCTR_CCU_2_HI), - COUNTER(RB_PERFCTR_CCU_SEL_3, RBBM_PERFCTR_CCU_3_LO, RBBM_PERFCTR_CCU_3_HI), -}; - -static const struct fd_perfcntr_countable ccu_countables[] = { - COUNTABLE(PERF_CCU_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_STARVE_CYCLES_FLAG_RETURN, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_DEPTH_BLOCKS, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_COLOR_BLOCKS, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_DEPTH_BLOCK_HIT, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_COLOR_BLOCK_HIT, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_PARTIAL_BLOCK_READ, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_GMEM_READ, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_GMEM_WRITE, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_DEPTH_READ_FLAG0_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_DEPTH_READ_FLAG1_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_DEPTH_READ_FLAG2_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_DEPTH_READ_FLAG3_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_DEPTH_READ_FLAG4_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_COLOR_READ_FLAG0_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_COLOR_READ_FLAG1_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_COLOR_READ_FLAG2_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_COLOR_READ_FLAG3_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_COLOR_READ_FLAG4_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_2D_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_2D_RD_REQ, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_2D_WR_REQ, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_2D_REORDER_STARVE_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_CCU_2D_PIXELS, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter tse_counters[] = { - COUNTER(GRAS_PERFCTR_TSE_SEL_0, RBBM_PERFCTR_TSE_0_LO, RBBM_PERFCTR_TSE_0_HI), - COUNTER(GRAS_PERFCTR_TSE_SEL_1, RBBM_PERFCTR_TSE_1_LO, RBBM_PERFCTR_TSE_1_HI), - COUNTER(GRAS_PERFCTR_TSE_SEL_2, RBBM_PERFCTR_TSE_2_LO, RBBM_PERFCTR_TSE_2_HI), - COUNTER(GRAS_PERFCTR_TSE_SEL_3, RBBM_PERFCTR_TSE_3_LO, RBBM_PERFCTR_TSE_3_HI), -}; - -static const struct fd_perfcntr_countable tse_countables[] = { - COUNTABLE(PERF_TSE_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_CLIPPING_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_STALL_CYCLES_RAS, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_STALL_CYCLES_LRZ_ZPLANE, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_STARVE_CYCLES_PC, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_INPUT_PRIM, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_INPUT_NULL_PRIM, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_TRIVAL_REJ_PRIM, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_CLIPPED_PRIM, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_ZERO_AREA_PRIM, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_FACENESS_CULLED_PRIM, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_ZERO_PIXEL_PRIM, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_OUTPUT_NULL_PRIM, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_OUTPUT_VISIBLE_PRIM, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_CINVOCATION, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_CPRIMITIVES, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_2D_INPUT_PRIM, UINT64, AVERAGE), - COUNTABLE(PERF_TSE_2D_ALIVE_CLCLES, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter ras_counters[] = { - COUNTER(GRAS_PERFCTR_RAS_SEL_0, RBBM_PERFCTR_RAS_0_LO, RBBM_PERFCTR_RAS_0_HI), - COUNTER(GRAS_PERFCTR_RAS_SEL_1, RBBM_PERFCTR_RAS_1_LO, RBBM_PERFCTR_RAS_1_HI), - COUNTER(GRAS_PERFCTR_RAS_SEL_2, RBBM_PERFCTR_RAS_2_LO, RBBM_PERFCTR_RAS_2_HI), - COUNTER(GRAS_PERFCTR_RAS_SEL_3, RBBM_PERFCTR_RAS_3_LO, RBBM_PERFCTR_RAS_3_HI), -}; - -static const struct fd_perfcntr_countable ras_countables[] = { - COUNTABLE(PERF_RAS_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_RAS_SUPERTILE_ACTIVE_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_RAS_STALL_CYCLES_LRZ, UINT64, AVERAGE), - COUNTABLE(PERF_RAS_STARVE_CYCLES_TSE, UINT64, AVERAGE), - COUNTABLE(PERF_RAS_SUPER_TILES, UINT64, AVERAGE), - COUNTABLE(PERF_RAS_8X4_TILES, UINT64, AVERAGE), - COUNTABLE(PERF_RAS_MASKGEN_ACTIVE, UINT64, AVERAGE), - COUNTABLE(PERF_RAS_FULLY_COVERED_SUPER_TILES, UINT64, AVERAGE), - COUNTABLE(PERF_RAS_FULLY_COVERED_8X4_TILES, UINT64, AVERAGE), - COUNTABLE(PERF_RAS_PRIM_KILLED_INVISILBE, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter lrz_counters[] = { - COUNTER(GRAS_PERFCTR_LRZ_SEL_0, RBBM_PERFCTR_LRZ_0_LO, RBBM_PERFCTR_LRZ_0_HI), - COUNTER(GRAS_PERFCTR_LRZ_SEL_1, RBBM_PERFCTR_LRZ_1_LO, RBBM_PERFCTR_LRZ_1_HI), - COUNTER(GRAS_PERFCTR_LRZ_SEL_2, RBBM_PERFCTR_LRZ_2_LO, RBBM_PERFCTR_LRZ_2_HI), - COUNTER(GRAS_PERFCTR_LRZ_SEL_3, RBBM_PERFCTR_LRZ_3_LO, RBBM_PERFCTR_LRZ_3_HI), -}; - -static const struct fd_perfcntr_countable lrz_countables[] = { - COUNTABLE(PERF_LRZ_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_STARVE_CYCLES_RAS, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_STALL_CYCLES_RB, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_STALL_CYCLES_VSC, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_STALL_CYCLES_VPC, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_STALL_CYCLES_UCHE, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_LRZ_READ, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_LRZ_WRITE, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_READ_LATENCY, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_MERGE_CACHE_UPDATING, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_PRIM_KILLED_BY_MASKGEN, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_PRIM_KILLED_BY_LRZ, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_FULL_8X8_TILES, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_PARTIAL_8X8_TILES, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_TILE_KILLED, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_TOTAL_PIXEL, UINT64, AVERAGE), - COUNTABLE(PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter hlsq_counters[] = { - COUNTER(HLSQ_PERFCTR_HLSQ_SEL_0, RBBM_PERFCTR_HLSQ_0_LO, RBBM_PERFCTR_HLSQ_0_HI), - COUNTER(HLSQ_PERFCTR_HLSQ_SEL_1, RBBM_PERFCTR_HLSQ_1_LO, RBBM_PERFCTR_HLSQ_1_HI), - COUNTER(HLSQ_PERFCTR_HLSQ_SEL_2, RBBM_PERFCTR_HLSQ_2_LO, RBBM_PERFCTR_HLSQ_2_HI), - COUNTER(HLSQ_PERFCTR_HLSQ_SEL_3, RBBM_PERFCTR_HLSQ_3_LO, RBBM_PERFCTR_HLSQ_3_HI), - COUNTER(HLSQ_PERFCTR_HLSQ_SEL_4, RBBM_PERFCTR_HLSQ_4_LO, RBBM_PERFCTR_HLSQ_4_HI), - COUNTER(HLSQ_PERFCTR_HLSQ_SEL_5, RBBM_PERFCTR_HLSQ_5_LO, RBBM_PERFCTR_HLSQ_5_HI), - COUNTER(HLSQ_PERFCTR_HLSQ_SEL_6, RBBM_PERFCTR_HLSQ_6_LO, RBBM_PERFCTR_HLSQ_6_HI), - COUNTER(HLSQ_PERFCTR_HLSQ_SEL_7, RBBM_PERFCTR_HLSQ_7_LO, RBBM_PERFCTR_HLSQ_7_HI), -}; - -static const struct fd_perfcntr_countable hlsq_countables[] = { - COUNTABLE(PERF_HLSQ_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_STALL_CYCLES_UCHE, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_STALL_CYCLES_SP_STATE, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_UCHE_LATENCY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_UCHE_LATENCY_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_FS_STAGE_32_WAVES, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_FS_STAGE_64_WAVES, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_QUADS, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_SP_STATE_COPY_TRANS_FS_STAGE, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_SP_STATE_COPY_TRANS_VS_STAGE, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_TP_STATE_COPY_TRANS_FS_STAGE, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_TP_STATE_COPY_TRANS_VS_STAGE, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_CS_INVOCATIONS, UINT64, AVERAGE), - COUNTABLE(PERF_HLSQ_COMPUTE_DRAWCALLS, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter pc_counters[] = { - COUNTER(PC_PERFCTR_PC_SEL_0, RBBM_PERFCTR_PC_0_LO, RBBM_PERFCTR_PC_0_HI), - COUNTER(PC_PERFCTR_PC_SEL_1, RBBM_PERFCTR_PC_1_LO, RBBM_PERFCTR_PC_1_HI), - COUNTER(PC_PERFCTR_PC_SEL_2, RBBM_PERFCTR_PC_2_LO, RBBM_PERFCTR_PC_2_HI), - COUNTER(PC_PERFCTR_PC_SEL_3, RBBM_PERFCTR_PC_3_LO, RBBM_PERFCTR_PC_3_HI), - COUNTER(PC_PERFCTR_PC_SEL_4, RBBM_PERFCTR_PC_4_LO, RBBM_PERFCTR_PC_4_HI), - COUNTER(PC_PERFCTR_PC_SEL_5, RBBM_PERFCTR_PC_5_LO, RBBM_PERFCTR_PC_5_HI), - COUNTER(PC_PERFCTR_PC_SEL_6, RBBM_PERFCTR_PC_6_LO, RBBM_PERFCTR_PC_6_HI), - COUNTER(PC_PERFCTR_PC_SEL_7, RBBM_PERFCTR_PC_7_LO, RBBM_PERFCTR_PC_7_HI), -}; - -static const struct fd_perfcntr_countable pc_countables[] = { - COUNTABLE(PERF_PC_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_PC_WORKING_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_PC_STALL_CYCLES_VFD, UINT64, AVERAGE), - COUNTABLE(PERF_PC_STALL_CYCLES_TSE, UINT64, AVERAGE), - COUNTABLE(PERF_PC_STALL_CYCLES_VPC, UINT64, AVERAGE), - COUNTABLE(PERF_PC_STALL_CYCLES_UCHE, UINT64, AVERAGE), - COUNTABLE(PERF_PC_STALL_CYCLES_TESS, UINT64, AVERAGE), - COUNTABLE(PERF_PC_STALL_CYCLES_TSE_ONLY, UINT64, AVERAGE), - COUNTABLE(PERF_PC_STALL_CYCLES_VPC_ONLY, UINT64, AVERAGE), - COUNTABLE(PERF_PC_PASS1_TF_STALL_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_INDEX, UINT64, AVERAGE), - COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR, UINT64, AVERAGE), - COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM, UINT64, AVERAGE), - COUNTABLE(PERF_PC_STARVE_CYCLES_FOR_POSITION, UINT64, AVERAGE), - COUNTABLE(PERF_PC_STARVE_CYCLES_DI, UINT64, AVERAGE), - COUNTABLE(PERF_PC_VIS_STREAMS_LOADED, UINT64, AVERAGE), - COUNTABLE(PERF_PC_INSTANCES, UINT64, AVERAGE), - COUNTABLE(PERF_PC_VPC_PRIMITIVES, UINT64, AVERAGE), - COUNTABLE(PERF_PC_DEAD_PRIM, UINT64, AVERAGE), - COUNTABLE(PERF_PC_LIVE_PRIM, UINT64, AVERAGE), - COUNTABLE(PERF_PC_VERTEX_HITS, UINT64, AVERAGE), - COUNTABLE(PERF_PC_IA_VERTICES, UINT64, AVERAGE), - COUNTABLE(PERF_PC_IA_PRIMITIVES, UINT64, AVERAGE), - COUNTABLE(PERF_PC_GS_PRIMITIVES, UINT64, AVERAGE), - COUNTABLE(PERF_PC_HS_INVOCATIONS, UINT64, AVERAGE), - COUNTABLE(PERF_PC_DS_INVOCATIONS, UINT64, AVERAGE), - COUNTABLE(PERF_PC_VS_INVOCATIONS, UINT64, AVERAGE), - COUNTABLE(PERF_PC_GS_INVOCATIONS, UINT64, AVERAGE), - COUNTABLE(PERF_PC_DS_PRIMITIVES, UINT64, AVERAGE), - COUNTABLE(PERF_PC_VPC_POS_DATA_TRANSACTION, UINT64, AVERAGE), - COUNTABLE(PERF_PC_3D_DRAWCALLS, UINT64, AVERAGE), - COUNTABLE(PERF_PC_2D_DRAWCALLS, UINT64, AVERAGE), - COUNTABLE(PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS, UINT64, AVERAGE), - COUNTABLE(PERF_TESS_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_TESS_WORKING_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_TESS_STALL_CYCLES_PC, UINT64, AVERAGE), - COUNTABLE(PERF_TESS_STARVE_CYCLES_PC, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter rb_counters[] = { - COUNTER(RB_PERFCTR_RB_SEL_0, RBBM_PERFCTR_RB_0_LO, RBBM_PERFCTR_RB_0_HI), - COUNTER(RB_PERFCTR_RB_SEL_1, RBBM_PERFCTR_RB_1_LO, RBBM_PERFCTR_RB_1_HI), - COUNTER(RB_PERFCTR_RB_SEL_2, RBBM_PERFCTR_RB_2_LO, RBBM_PERFCTR_RB_2_HI), - COUNTER(RB_PERFCTR_RB_SEL_3, RBBM_PERFCTR_RB_3_LO, RBBM_PERFCTR_RB_3_HI), - COUNTER(RB_PERFCTR_RB_SEL_4, RBBM_PERFCTR_RB_4_LO, RBBM_PERFCTR_RB_4_HI), - COUNTER(RB_PERFCTR_RB_SEL_5, RBBM_PERFCTR_RB_5_LO, RBBM_PERFCTR_RB_5_HI), - COUNTER(RB_PERFCTR_RB_SEL_6, RBBM_PERFCTR_RB_6_LO, RBBM_PERFCTR_RB_6_HI), - COUNTER(RB_PERFCTR_RB_SEL_7, RBBM_PERFCTR_RB_7_LO, RBBM_PERFCTR_RB_7_HI), -}; - -static const struct fd_perfcntr_countable rb_countables[] = { - COUNTABLE(PERF_RB_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_RB_STALL_CYCLES_CCU, UINT64, AVERAGE), - COUNTABLE(PERF_RB_STALL_CYCLES_HLSQ, UINT64, AVERAGE), - COUNTABLE(PERF_RB_STALL_CYCLES_FIFO0_FULL, UINT64, AVERAGE), - COUNTABLE(PERF_RB_STALL_CYCLES_FIFO1_FULL, UINT64, AVERAGE), - COUNTABLE(PERF_RB_STALL_CYCLES_FIFO2_FULL, UINT64, AVERAGE), - COUNTABLE(PERF_RB_STARVE_CYCLES_SP, UINT64, AVERAGE), - COUNTABLE(PERF_RB_STARVE_CYCLES_LRZ_TILE, UINT64, AVERAGE), - COUNTABLE(PERF_RB_STARVE_CYCLES_CCU, UINT64, AVERAGE), - COUNTABLE(PERF_RB_STARVE_CYCLES_Z_PLANE, UINT64, AVERAGE), - COUNTABLE(PERF_RB_STARVE_CYCLES_BARY_PLANE, UINT64, AVERAGE), - COUNTABLE(PERF_RB_Z_WORKLOAD, UINT64, AVERAGE), - COUNTABLE(PERF_RB_HLSQ_ACTIVE, UINT64, AVERAGE), - COUNTABLE(PERF_RB_Z_READ, UINT64, AVERAGE), - COUNTABLE(PERF_RB_Z_WRITE, UINT64, AVERAGE), - COUNTABLE(PERF_RB_C_READ, UINT64, AVERAGE), - COUNTABLE(PERF_RB_C_WRITE, UINT64, AVERAGE), - COUNTABLE(PERF_RB_TOTAL_PASS, UINT64, AVERAGE), - COUNTABLE(PERF_RB_Z_PASS, UINT64, AVERAGE), - COUNTABLE(PERF_RB_Z_FAIL, UINT64, AVERAGE), - COUNTABLE(PERF_RB_S_FAIL, UINT64, AVERAGE), - COUNTABLE(PERF_RB_BLENDED_FXP_COMPONENTS, UINT64, AVERAGE), - COUNTABLE(PERF_RB_BLENDED_FP16_COMPONENTS, UINT64, AVERAGE), - COUNTABLE(RB_RESERVED, UINT64, AVERAGE), - COUNTABLE(PERF_RB_2D_ALIVE_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_RB_2D_STALL_CYCLES_A2D, UINT64, AVERAGE), - COUNTABLE(PERF_RB_2D_STARVE_CYCLES_SRC, UINT64, AVERAGE), - COUNTABLE(PERF_RB_2D_STARVE_CYCLES_SP, UINT64, AVERAGE), - COUNTABLE(PERF_RB_2D_STARVE_CYCLES_DST, UINT64, AVERAGE), - COUNTABLE(PERF_RB_2D_VALID_PIXELS, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter rbbm_counters[] = { -//RESERVED: for kernel -// COUNTER(RBBM_PERFCTR_RBBM_SEL_0, RBBM_PERFCTR_RBBM_0_LO, RBBM_PERFCTR_RBBM_0_HI), - COUNTER(RBBM_PERFCTR_RBBM_SEL_1, RBBM_PERFCTR_RBBM_1_LO, RBBM_PERFCTR_RBBM_1_HI), - COUNTER(RBBM_PERFCTR_RBBM_SEL_2, RBBM_PERFCTR_RBBM_2_LO, RBBM_PERFCTR_RBBM_2_HI), - COUNTER(RBBM_PERFCTR_RBBM_SEL_3, RBBM_PERFCTR_RBBM_3_LO, RBBM_PERFCTR_RBBM_3_HI), -}; - -static const struct fd_perfcntr_countable rbbm_countables[] = { - COUNTABLE(PERF_RBBM_ALWAYS_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_ALWAYS_ON, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_TSE_BUSY, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_RAS_BUSY, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_PC_DCALL_BUSY, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_PC_VSD_BUSY, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_STATUS_MASKED, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_COM_BUSY, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_DCOM_BUSY, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_VBIF_BUSY, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_VSC_BUSY, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_TESS_BUSY, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_UCHE_BUSY, UINT64, AVERAGE), - COUNTABLE(PERF_RBBM_HLSQ_BUSY, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter sp_counters[] = { -//RESERVED: for kernel -// COUNTER(SP_PERFCTR_SP_SEL_0, RBBM_PERFCTR_SP_0_LO, RBBM_PERFCTR_SP_0_HI), - COUNTER(SP_PERFCTR_SP_SEL_1, RBBM_PERFCTR_SP_1_LO, RBBM_PERFCTR_SP_1_HI), - COUNTER(SP_PERFCTR_SP_SEL_2, RBBM_PERFCTR_SP_2_LO, RBBM_PERFCTR_SP_2_HI), - COUNTER(SP_PERFCTR_SP_SEL_3, RBBM_PERFCTR_SP_3_LO, RBBM_PERFCTR_SP_3_HI), - COUNTER(SP_PERFCTR_SP_SEL_4, RBBM_PERFCTR_SP_4_LO, RBBM_PERFCTR_SP_4_HI), - COUNTER(SP_PERFCTR_SP_SEL_5, RBBM_PERFCTR_SP_5_LO, RBBM_PERFCTR_SP_5_HI), - COUNTER(SP_PERFCTR_SP_SEL_6, RBBM_PERFCTR_SP_6_LO, RBBM_PERFCTR_SP_6_HI), - COUNTER(SP_PERFCTR_SP_SEL_7, RBBM_PERFCTR_SP_7_LO, RBBM_PERFCTR_SP_7_HI), - COUNTER(SP_PERFCTR_SP_SEL_8, RBBM_PERFCTR_SP_8_LO, RBBM_PERFCTR_SP_8_HI), - COUNTER(SP_PERFCTR_SP_SEL_9, RBBM_PERFCTR_SP_9_LO, RBBM_PERFCTR_SP_9_HI), - COUNTER(SP_PERFCTR_SP_SEL_10, RBBM_PERFCTR_SP_10_LO, RBBM_PERFCTR_SP_10_HI), - COUNTER(SP_PERFCTR_SP_SEL_11, RBBM_PERFCTR_SP_11_LO, RBBM_PERFCTR_SP_11_HI), -}; - -static const struct fd_perfcntr_countable sp_countables[] = { - COUNTABLE(PERF_SP_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_ALU_WORKING_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_EFU_WORKING_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_STALL_CYCLES_VPC, UINT64, AVERAGE), - COUNTABLE(PERF_SP_STALL_CYCLES_TP, UINT64, AVERAGE), - COUNTABLE(PERF_SP_STALL_CYCLES_UCHE, UINT64, AVERAGE), - COUNTABLE(PERF_SP_STALL_CYCLES_RB, UINT64, AVERAGE), - COUNTABLE(PERF_SP_SCHEDULER_NON_WORKING, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_CONTEXTS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_CONTEXT_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_FS_STAGE_WAVE_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_FS_STAGE_WAVE_SAMPLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_VS_STAGE_WAVE_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_VS_STAGE_WAVE_SAMPLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_FS_STAGE_DURATION_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_VS_STAGE_DURATION_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_CTRL_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_LOAD_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_EMIT_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_NOP_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_WAIT_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_FETCH_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_IDLE_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_END_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_LONG_SYNC_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_SHORT_SYNC_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_WAVE_JOIN_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_LM_LOAD_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_LM_STORE_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_LM_ATOMICS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_GM_LOAD_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_GM_STORE_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_GM_ATOMICS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_VS_STAGE_CFLOW_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_FS_STAGE_TEX_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_FS_STAGE_BARY_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_VS_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_FS_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_ADDR_LOCK_COUNT, UINT64, AVERAGE), - COUNTABLE(PERF_SP_UCHE_READ_TRANS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_UCHE_WRITE_TRANS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_EXPORT_VPC_TRANS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_EXPORT_RB_TRANS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_PIXELS_KILLED, UINT64, AVERAGE), - COUNTABLE(PERF_SP_ICL1_REQUESTS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_ICL1_MISSES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_ICL0_REQUESTS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_ICL0_MISSES, UINT64, AVERAGE), - COUNTABLE(PERF_SP_HS_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_DS_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_GS_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_CS_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_GPR_READ, UINT64, AVERAGE), - COUNTABLE(PERF_SP_GPR_WRITE, UINT64, AVERAGE), - COUNTABLE(PERF_SP_LM_CH0_REQUESTS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_LM_CH1_REQUESTS, UINT64, AVERAGE), - COUNTABLE(PERF_SP_LM_BANK_CONFLICTS, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter tp_counters[] = { - COUNTER(TPL1_PERFCTR_TP_SEL_0, RBBM_PERFCTR_TP_0_LO, RBBM_PERFCTR_TP_0_HI), - COUNTER(TPL1_PERFCTR_TP_SEL_1, RBBM_PERFCTR_TP_1_LO, RBBM_PERFCTR_TP_1_HI), - COUNTER(TPL1_PERFCTR_TP_SEL_2, RBBM_PERFCTR_TP_2_LO, RBBM_PERFCTR_TP_2_HI), - COUNTER(TPL1_PERFCTR_TP_SEL_3, RBBM_PERFCTR_TP_3_LO, RBBM_PERFCTR_TP_3_HI), - COUNTER(TPL1_PERFCTR_TP_SEL_4, RBBM_PERFCTR_TP_4_LO, RBBM_PERFCTR_TP_4_HI), - COUNTER(TPL1_PERFCTR_TP_SEL_5, RBBM_PERFCTR_TP_5_LO, RBBM_PERFCTR_TP_5_HI), - COUNTER(TPL1_PERFCTR_TP_SEL_6, RBBM_PERFCTR_TP_6_LO, RBBM_PERFCTR_TP_6_HI), - COUNTER(TPL1_PERFCTR_TP_SEL_7, RBBM_PERFCTR_TP_7_LO, RBBM_PERFCTR_TP_7_HI), -}; - -static const struct fd_perfcntr_countable tp_countables[] = { - COUNTABLE(PERF_TP_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_TP_STALL_CYCLES_UCHE, UINT64, AVERAGE), - COUNTABLE(PERF_TP_LATENCY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_TP_LATENCY_TRANS, UINT64, AVERAGE), - COUNTABLE(PERF_TP_FLAG_CACHE_REQUEST_SAMPLES, UINT64, AVERAGE), - COUNTABLE(PERF_TP_FLAG_CACHE_REQUEST_LATENCY, UINT64, AVERAGE), - COUNTABLE(PERF_TP_L1_CACHELINE_REQUESTS, UINT64, AVERAGE), - COUNTABLE(PERF_TP_L1_CACHELINE_MISSES, UINT64, AVERAGE), - COUNTABLE(PERF_TP_SP_TP_TRANS, UINT64, AVERAGE), - COUNTABLE(PERF_TP_TP_SP_TRANS, UINT64, AVERAGE), - COUNTABLE(PERF_TP_OUTPUT_PIXELS, UINT64, AVERAGE), - COUNTABLE(PERF_TP_FILTER_WORKLOAD_16BIT, UINT64, AVERAGE), - COUNTABLE(PERF_TP_FILTER_WORKLOAD_32BIT, UINT64, AVERAGE), - COUNTABLE(PERF_TP_QUADS_RECEIVED, UINT64, AVERAGE), - COUNTABLE(PERF_TP_QUADS_OFFSET, UINT64, AVERAGE), - COUNTABLE(PERF_TP_QUADS_SHADOW, UINT64, AVERAGE), - COUNTABLE(PERF_TP_QUADS_ARRAY, UINT64, AVERAGE), - COUNTABLE(PERF_TP_QUADS_GRADIENT, UINT64, AVERAGE), - COUNTABLE(PERF_TP_QUADS_1D, UINT64, AVERAGE), - COUNTABLE(PERF_TP_QUADS_2D, UINT64, AVERAGE), - COUNTABLE(PERF_TP_QUADS_BUFFER, UINT64, AVERAGE), - COUNTABLE(PERF_TP_QUADS_3D, UINT64, AVERAGE), - COUNTABLE(PERF_TP_QUADS_CUBE, UINT64, AVERAGE), - COUNTABLE(PERF_TP_STATE_CACHE_REQUESTS, UINT64, AVERAGE), - COUNTABLE(PERF_TP_STATE_CACHE_MISSES, UINT64, AVERAGE), - COUNTABLE(PERF_TP_DIVERGENT_QUADS_RECEIVED, UINT64, AVERAGE), - COUNTABLE(PERF_TP_BINDLESS_STATE_CACHE_REQUESTS, UINT64, AVERAGE), - COUNTABLE(PERF_TP_BINDLESS_STATE_CACHE_MISSES, UINT64, AVERAGE), - COUNTABLE(PERF_TP_PRT_NON_RESIDENT_EVENTS, UINT64, AVERAGE), - COUNTABLE(PERF_TP_OUTPUT_PIXELS_POINT, UINT64, AVERAGE), - COUNTABLE(PERF_TP_OUTPUT_PIXELS_BILINEAR, UINT64, AVERAGE), - COUNTABLE(PERF_TP_OUTPUT_PIXELS_MIP, UINT64, AVERAGE), - COUNTABLE(PERF_TP_OUTPUT_PIXELS_ANISO, UINT64, AVERAGE), - COUNTABLE(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, UINT64, AVERAGE), - COUNTABLE(PERF_TP_FLAG_CACHE_REQUESTS, UINT64, AVERAGE), - COUNTABLE(PERF_TP_FLAG_CACHE_MISSES, UINT64, AVERAGE), - COUNTABLE(PERF_TP_L1_5_L2_REQUESTS, UINT64, AVERAGE), - COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS, UINT64, AVERAGE), - COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS_POINT, UINT64, AVERAGE), - COUNTABLE(PERF_TP_2D_OUTPUT_PIXELS_BILINEAR, UINT64, AVERAGE), - COUNTABLE(PERF_TP_2D_FILTER_WORKLOAD_16BIT, UINT64, AVERAGE), - COUNTABLE(PERF_TP_2D_FILTER_WORKLOAD_32BIT, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter uche_counters[] = { - COUNTER(UCHE_PERFCTR_UCHE_SEL_0, RBBM_PERFCTR_UCHE_0_LO, RBBM_PERFCTR_UCHE_0_HI), - COUNTER(UCHE_PERFCTR_UCHE_SEL_1, RBBM_PERFCTR_UCHE_1_LO, RBBM_PERFCTR_UCHE_1_HI), - COUNTER(UCHE_PERFCTR_UCHE_SEL_2, RBBM_PERFCTR_UCHE_2_LO, RBBM_PERFCTR_UCHE_2_HI), - COUNTER(UCHE_PERFCTR_UCHE_SEL_3, RBBM_PERFCTR_UCHE_3_LO, RBBM_PERFCTR_UCHE_3_HI), - COUNTER(UCHE_PERFCTR_UCHE_SEL_4, RBBM_PERFCTR_UCHE_4_LO, RBBM_PERFCTR_UCHE_4_HI), - COUNTER(UCHE_PERFCTR_UCHE_SEL_5, RBBM_PERFCTR_UCHE_5_LO, RBBM_PERFCTR_UCHE_5_HI), - COUNTER(UCHE_PERFCTR_UCHE_SEL_6, RBBM_PERFCTR_UCHE_6_LO, RBBM_PERFCTR_UCHE_6_HI), - COUNTER(UCHE_PERFCTR_UCHE_SEL_7, RBBM_PERFCTR_UCHE_7_LO, RBBM_PERFCTR_UCHE_7_HI), -}; - -static const struct fd_perfcntr_countable uche_countables[] = { - COUNTABLE(PERF_UCHE_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_STALL_CYCLES_VBIF, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_VBIF_LATENCY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_VBIF_LATENCY_SAMPLES, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_TP, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_VFD, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_HLSQ, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_LRZ, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_SP, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_READ_REQUESTS_TP, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_READ_REQUESTS_VFD, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_READ_REQUESTS_HLSQ, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_READ_REQUESTS_LRZ, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_READ_REQUESTS_SP, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_WRITE_REQUESTS_LRZ, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_WRITE_REQUESTS_SP, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_WRITE_REQUESTS_VPC, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_WRITE_REQUESTS_VSC, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_EVICTS, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_BANK_REQ0, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_BANK_REQ1, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_BANK_REQ2, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_BANK_REQ3, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_BANK_REQ4, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_BANK_REQ5, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_BANK_REQ6, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_BANK_REQ7, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_CH0, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_VBIF_READ_BEATS_CH1, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_GMEM_READ_BEATS, UINT64, AVERAGE), - COUNTABLE(PERF_UCHE_FLAG_COUNT, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter vfd_counters[] = { - COUNTER(VFD_PERFCTR_VFD_SEL_0, RBBM_PERFCTR_VFD_0_LO, RBBM_PERFCTR_VFD_0_HI), - COUNTER(VFD_PERFCTR_VFD_SEL_1, RBBM_PERFCTR_VFD_1_LO, RBBM_PERFCTR_VFD_1_HI), - COUNTER(VFD_PERFCTR_VFD_SEL_2, RBBM_PERFCTR_VFD_2_LO, RBBM_PERFCTR_VFD_2_HI), - COUNTER(VFD_PERFCTR_VFD_SEL_3, RBBM_PERFCTR_VFD_3_LO, RBBM_PERFCTR_VFD_3_HI), - COUNTER(VFD_PERFCTR_VFD_SEL_4, RBBM_PERFCTR_VFD_4_LO, RBBM_PERFCTR_VFD_4_HI), - COUNTER(VFD_PERFCTR_VFD_SEL_5, RBBM_PERFCTR_VFD_5_LO, RBBM_PERFCTR_VFD_5_HI), - COUNTER(VFD_PERFCTR_VFD_SEL_6, RBBM_PERFCTR_VFD_6_LO, RBBM_PERFCTR_VFD_6_HI), - COUNTER(VFD_PERFCTR_VFD_SEL_7, RBBM_PERFCTR_VFD_7_LO, RBBM_PERFCTR_VFD_7_HI), -}; - -static const struct fd_perfcntr_countable vfd_countables[] = { - COUNTABLE(PERF_VFD_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_STALL_CYCLES_UCHE, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_STALL_CYCLES_VPC_ALLOC, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_STALL_CYCLES_MISS_VB, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_STALL_CYCLES_MISS_Q, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_STALL_CYCLES_SP_INFO, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_STALL_CYCLES_SP_ATTR, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_STALL_CYCLES_VFDP_VB, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_STALL_CYCLES_VFDP_Q, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_DECODER_PACKER_STALL, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_STARVE_CYCLES_UCHE, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_RBUFFER_FULL, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_ATTR_INFO_FIFO_FULL, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_DECODED_ATTRIBUTE_BYTES, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_NUM_ATTRIBUTES, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_INSTRUCTIONS, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_UPPER_SHADER_FIBERS, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_LOWER_SHADER_FIBERS, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_MODE_0_FIBERS, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_MODE_1_FIBERS, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_MODE_2_FIBERS, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_MODE_3_FIBERS, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_MODE_4_FIBERS, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_TOTAL_VERTICES, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_NUM_ATTR_MISS, UINT64, AVERAGE), - COUNTABLE(PERF_VFD_1_BURST_REQ, UINT64, AVERAGE), - COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD, UINT64, AVERAGE), - COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD_INDEX, UINT64, AVERAGE), - COUNTABLE(PERF_VFDP_STALL_CYCLES_VFD_PROG, UINT64, AVERAGE), - COUNTABLE(PERF_VFDP_STARVE_CYCLES_PC, UINT64, AVERAGE), - COUNTABLE(PERF_VFDP_VS_STAGE_32_WAVES, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter vpc_counters[] = { - COUNTER(VPC_PERFCTR_VPC_SEL_0, RBBM_PERFCTR_VPC_0_LO, RBBM_PERFCTR_VPC_0_HI), - COUNTER(VPC_PERFCTR_VPC_SEL_1, RBBM_PERFCTR_VPC_1_LO, RBBM_PERFCTR_VPC_1_HI), - COUNTER(VPC_PERFCTR_VPC_SEL_2, RBBM_PERFCTR_VPC_2_LO, RBBM_PERFCTR_VPC_2_HI), - COUNTER(VPC_PERFCTR_VPC_SEL_3, RBBM_PERFCTR_VPC_3_LO, RBBM_PERFCTR_VPC_3_HI), -}; - -static const struct fd_perfcntr_countable vpc_countables[] = { - COUNTABLE(PERF_VPC_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_WORKING_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_STALL_CYCLES_UCHE, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_STALL_CYCLES_VFD_WACK, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_STALL_CYCLES_PC, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_STALL_CYCLES_SP_LM, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_POS_EXPORT_STALL_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_STARVE_CYCLES_SP, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_STARVE_CYCLES_LRZ, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_PC_PRIMITIVES, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_SP_COMPONENTS, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_SP_LM_PRIMITIVES, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_SP_LM_COMPONENTS, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_SP_LM_DWORDS, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_STREAMOUT_COMPONENTS, UINT64, AVERAGE), - COUNTABLE(PERF_VPC_GRANT_PHASES, UINT64, AVERAGE), -}; - -static const struct fd_perfcntr_counter vsc_counters[] = { - COUNTER(VSC_PERFCTR_VSC_SEL_0, RBBM_PERFCTR_VSC_0_LO, RBBM_PERFCTR_VSC_0_HI), - COUNTER(VSC_PERFCTR_VSC_SEL_1, RBBM_PERFCTR_VSC_1_LO, RBBM_PERFCTR_VSC_1_HI), -}; - -static const struct fd_perfcntr_countable vsc_countables[] = { - COUNTABLE(PERF_VSC_BUSY_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_VSC_WORKING_CYCLES, UINT64, AVERAGE), - COUNTABLE(PERF_VSC_STALL_CYCLES_UCHE, UINT64, AVERAGE), - COUNTABLE(PERF_VSC_EOT_NUM, UINT64, AVERAGE), -}; - -/* VBIF counters probably not too userful for userspace, and they make - * frameretrace take many more passes to collect all the metrics, so - * for now let's hide them. - */ -#if 0 -/* VBIF counters break the pattern a bit, with enable and clear regs: */ -static const struct fd_perfcntr_counter vbif_counters[] = { - COUNTER2(VBIF_PERF_CNT_SEL0, VBIF_PERF_CNT_LOW0, VBIF_PERF_CNT_HIGH0, VBIF_PERF_CNT_EN0, VBIF_PERF_CNT_CLR0), - COUNTER2(VBIF_PERF_CNT_SEL1, VBIF_PERF_CNT_LOW1, VBIF_PERF_CNT_HIGH1, VBIF_PERF_CNT_EN1, VBIF_PERF_CNT_CLR1), - COUNTER2(VBIF_PERF_CNT_SEL2, VBIF_PERF_CNT_LOW2, VBIF_PERF_CNT_HIGH2, VBIF_PERF_CNT_EN2, VBIF_PERF_CNT_CLR2), - COUNTER2(VBIF_PERF_CNT_SEL3, VBIF_PERF_CNT_LOW3, VBIF_PERF_CNT_HIGH3, VBIF_PERF_CNT_EN3, VBIF_PERF_CNT_CLR3), -}; - -static const struct fd_perfcntr_countable vbif_countables[] = { - COUNTABLE(AXI_READ_REQUESTS_ID_0, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_1, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_2, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_3, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_4, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_5, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_6, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_7, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_8, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_9, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_10, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_11, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_12, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_13, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_14, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_ID_15, UINT64, AVERAGE), - COUNTABLE(AXI0_READ_REQUESTS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI1_READ_REQUESTS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI2_READ_REQUESTS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI3_READ_REQUESTS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI_READ_REQUESTS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_0, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_1, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_2, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_3, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_4, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_5, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_6, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_7, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_8, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_9, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_10, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_11, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_12, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_13, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_14, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_ID_15, UINT64, AVERAGE), - COUNTABLE(AXI0_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI1_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI2_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI3_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_REQUESTS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI_TOTAL_REQUESTS, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_0, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_1, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_2, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_3, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_4, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_5, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_6, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_7, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_8, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_9, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_10, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_11, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_12, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_13, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_14, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_ID_15, UINT64, AVERAGE), - COUNTABLE(AXI0_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI1_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI2_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI3_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI_READ_DATA_BEATS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_0, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_1, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_2, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_3, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_4, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_5, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_6, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_7, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_8, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_9, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_10, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_11, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_12, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_13, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_14, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_ID_15, UINT64, AVERAGE), - COUNTABLE(AXI0_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI1_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI2_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI3_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI_WRITE_DATA_BEATS_TOTAL, UINT64, AVERAGE), - COUNTABLE(AXI_DATA_BEATS_TOTAL, UINT64, AVERAGE), -}; -#endif - -const struct fd_perfcntr_group a5xx_perfcntr_groups[] = { - GROUP("CP", cp_counters, cp_countables), - GROUP("CCU", ccu_counters, ccu_countables), - GROUP("TSE", tse_counters, tse_countables), - GROUP("RAS", ras_counters, ras_countables), - GROUP("LRZ", lrz_counters, lrz_countables), - GROUP("HLSQ", hlsq_counters, hlsq_countables), - GROUP("PC", pc_counters, pc_countables), - GROUP("RB", rb_counters, rb_countables), - GROUP("RBBM", rbbm_counters, rbbm_countables), - GROUP("SP", sp_counters, sp_countables), - GROUP("TP", tp_counters, tp_countables), - GROUP("UCHE", uche_counters, uche_countables), - GROUP("VFD", vfd_counters, vfd_countables), - GROUP("VPC", vpc_counters, vpc_countables), - GROUP("VSC", vsc_counters, vsc_countables), -// GROUP("VBIF", vbif_counters, vbif_countables), -}; - -const unsigned a5xx_num_perfcntr_groups = ARRAY_SIZE(a5xx_perfcntr_groups); - -#endif /* FD5_PERFCNTR_H_ */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c index e52ba900e..72f9f9933 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -28,7 +28,7 @@ #include "util/u_string.h" #include "util/u_memory.h" #include "util/u_inlines.h" -#include "util/u_format.h" +#include "util/format/u_format.h" #include "util/bitset.h" #include "freedreno_program.h" @@ -40,43 +40,6 @@ #include "ir3_cache.h" -static struct ir3_shader * -create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, - gl_shader_stage type) -{ - struct fd_context *ctx = fd_context(pctx); - struct ir3_compiler *compiler = ctx->screen->compiler; - return ir3_shader_create(compiler, cso, type, &ctx->debug, pctx->screen); -} - -static void * -fd5_fp_state_create(struct pipe_context *pctx, - const struct pipe_shader_state *cso) -{ - return create_shader_stateobj(pctx, cso, MESA_SHADER_FRAGMENT); -} - -static void -fd5_fp_state_delete(struct pipe_context *pctx, void *hwcso) -{ - struct ir3_shader *so = hwcso; - ir3_shader_destroy(so); -} - -static void * -fd5_vp_state_create(struct pipe_context *pctx, - const struct pipe_shader_state *cso) -{ - return create_shader_stateobj(pctx, cso, MESA_SHADER_VERTEX); -} - -static void -fd5_vp_state_delete(struct pipe_context *pctx, void *hwcso) -{ - struct ir3_shader *so = hwcso; - ir3_shader_destroy(so); -} - void fd5_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { @@ -357,7 +320,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE); coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD); zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); - vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PIXEL); + vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. @@ -444,27 +407,20 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | 0x6 | /* XXX seems to be always set? */ A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) | - COND(s[VS].v->num_samp > 0, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); + COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); struct ir3_shader_linkage l = {0}; - ir3_link_shaders(&l, s[VS].v, s[FS].v); + ir3_link_shaders(&l, s[VS].v, s[FS].v, true); if ((s[VS].v->shader->stream_output.num_outputs > 0) && !emit->binning_pass) link_stream_out(&l, s[VS].v); - BITSET_DECLARE(varbs, 128) = {0}; - uint32_t *varmask = (uint32_t *)varbs; - - for (i = 0; i < l.cnt; i++) - for (j = 0; j < util_last_bit(l.var[i].compmask); j++) - BITSET_SET(varbs, l.var[i].loc + j); - OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4); - OUT_RING(ring, ~varmask[0]); /* VPC_VAR[0].DISABLE */ - OUT_RING(ring, ~varmask[1]); /* VPC_VAR[1].DISABLE */ - OUT_RING(ring, ~varmask[2]); /* VPC_VAR[2].DISABLE */ - OUT_RING(ring, ~varmask[3]); /* VPC_VAR[3].DISABLE */ + OUT_RING(ring, ~l.varmask[0]); /* VPC_VAR[0].DISABLE */ + OUT_RING(ring, ~l.varmask[1]); /* VPC_VAR[1].DISABLE */ + OUT_RING(ring, ~l.varmask[2]); /* VPC_VAR[2].DISABLE */ + OUT_RING(ring, ~l.varmask[3]); /* VPC_VAR[3].DISABLE */ /* a5xx appends pos/psize to end of the linkage map: */ if (pos_regid != regid(63,0)) @@ -568,7 +524,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) | - COND(s[FS].v->num_samp > 0, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); + COND(s[FS].v->need_pixlod, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); OUT_RING(ring, 0x020fffff); /* XXX */ @@ -722,11 +678,6 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, void fd5_prog_init(struct pipe_context *pctx) { - pctx->create_fs_state = fd5_fp_state_create; - pctx->delete_fs_state = fd5_fp_state_delete; - - pctx->create_vs_state = fd5_vp_state_create; - pctx->delete_vs_state = fd5_vp_state_delete; - + ir3_prog_init(pctx); fd_prog_init(pctx); } diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c index b438c7a56..254b49696 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_query.c @@ -134,7 +134,6 @@ occlusion_predicate_result(struct fd_acc_query *aq, void *buf, static const struct fd_acc_sample_provider occlusion_counter = { .query_type = PIPE_QUERY_OCCLUSION_COUNTER, - .active = FD_STAGE_DRAW, .size = sizeof(struct fd5_query_sample), .resume = occlusion_resume, .pause = occlusion_pause, @@ -143,7 +142,6 @@ static const struct fd_acc_sample_provider occlusion_counter = { static const struct fd_acc_sample_provider occlusion_predicate = { .query_type = PIPE_QUERY_OCCLUSION_PREDICATE, - .active = FD_STAGE_DRAW, .size = sizeof(struct fd5_query_sample), .resume = occlusion_resume, .pause = occlusion_pause, @@ -152,7 +150,6 @@ static const struct fd_acc_sample_provider occlusion_predicate = { static const struct fd_acc_sample_provider occlusion_predicate_conservative = { .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE, - .active = FD_STAGE_DRAW, .size = sizeof(struct fd5_query_sample), .resume = occlusion_resume, .pause = occlusion_pause, @@ -169,7 +166,7 @@ timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) struct fd_ringbuffer *ring = batch->draw; OUT_PKT7(ring, CP_EVENT_WRITE, 4); - OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) | + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP); OUT_RELOCW(ring, query_sample(aq, start)); OUT_RING(ring, 0x00000000); @@ -183,7 +180,7 @@ timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) struct fd_ringbuffer *ring = batch->draw; OUT_PKT7(ring, CP_EVENT_WRITE, 4); - OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) | + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP); OUT_RELOCW(ring, query_sample(aq, stop)); OUT_RING(ring, 0x00000000); @@ -229,7 +226,7 @@ timestamp_accumulate_result(struct fd_acc_query *aq, void *buf, static const struct fd_acc_sample_provider time_elapsed = { .query_type = PIPE_QUERY_TIME_ELAPSED, - .active = FD_STAGE_DRAW | FD_STAGE_CLEAR, + .always = true, .size = sizeof(struct fd5_query_sample), .resume = timestamp_resume, .pause = timestamp_pause, @@ -245,7 +242,7 @@ static const struct fd_acc_sample_provider time_elapsed = { static const struct fd_acc_sample_provider timestamp = { .query_type = PIPE_QUERY_TIMESTAMP, - .active = FD_STAGE_ALL, + .always = true, .size = sizeof(struct fd5_query_sample), .resume = timestamp_resume, .pause = timestamp_pause, @@ -366,7 +363,7 @@ perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf, static const struct fd_acc_sample_provider perfcntr = { .query_type = FD_QUERY_FIRST_PERFCNTR, - .active = FD_STAGE_DRAW | FD_STAGE_CLEAR, + .always = true, .resume = perfcntr_resume, .pause = perfcntr_pause, .result = perfcntr_accumulate_result, @@ -433,7 +430,7 @@ fd5_create_batch_query(struct pipe_context *pctx, counters_per_group[entry->gid]++; } - q = fd_acc_create_query2(ctx, 0, &perfcntr); + q = fd_acc_create_query2(ctx, 0, 0, &perfcntr); aq = fd_acc_query(q); /* sample buffer size is based on # of queries: */ diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c index 14f8ab772..12dcb8a6e 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_screen.c @@ -25,7 +25,7 @@ */ #include "pipe/p_screen.h" -#include "util/u_format.h" +#include "util/format/u_format.h" #include "fd5_screen.h" #include "fd5_blitter.h" @@ -76,9 +76,9 @@ fd5_screen_is_format_supported(struct pipe_screen *pscreen, } if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) && + (fd5_pipe2tex(format) != (enum a5xx_tex_fmt)~0) && (target == PIPE_BUFFER || - util_format_get_blocksize(format) != 12) && - (fd5_pipe2tex(format) != (enum a5xx_tex_fmt)~0)) { + util_format_get_blocksize(format) != 12)) { retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE); } @@ -121,9 +121,6 @@ fd5_screen_is_format_supported(struct pipe_screen *pscreen, return retval == usage; } -extern const struct fd_perfcntr_group a5xx_perfcntr_groups[]; -extern const unsigned a5xx_num_perfcntr_groups; - void fd5_screen_init(struct pipe_screen *pscreen) { @@ -137,10 +134,5 @@ fd5_screen_init(struct pipe_screen *pscreen) if (fd_mesa_debug & FD_DBG_TTILE) screen->tile_mode = fd5_tile_mode; - if (fd_mesa_debug & FD_DBG_PERFC) { - screen->perfcntr_groups = a5xx_perfcntr_groups; - screen->num_perfcntr_groups = a5xx_num_perfcntr_groups; - } - fd5_emit_init_screen(pscreen); } diff --git a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_texture.c b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_texture.c index 1ebaa3a45..fdfb2c7d9 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_texture.c +++ b/lib/mesa/src/gallium/drivers/freedreno/a5xx/fd5_texture.c @@ -28,7 +28,7 @@ #include "util/u_string.h" #include "util/u_memory.h" #include "util/u_inlines.h" -#include "util/u_format.h" +#include "util/format/u_format.h" #include "fd5_texture.h" #include "fd5_format.h" @@ -126,11 +126,20 @@ fd5_sampler_state_create(struct pipe_context *pctx, COND(!cso->seamless_cube_map, A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | COND(!cso->normalized_coords, A5XX_TEX_SAMP_1_UNNORM_COORDS); + so->texsamp0 |= A5XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias); + if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { - so->texsamp0 |= A5XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias); so->texsamp1 |= A5XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | A5XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); + } else { + /* If we're not doing mipmap filtering, we still need a slightly > 0 + * LOD clamp so the HW can decide between min and mag filtering of + * level 0. + */ + so->texsamp1 |= + A5XX_TEX_SAMP_1_MIN_LOD(MIN2(cso->min_lod, 0.125)) | + A5XX_TEX_SAMP_1_MAX_LOD(MIN2(cso->max_lod, 0.125)); } if (cso->compare_mode) @@ -198,6 +207,7 @@ fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, { struct fd5_pipe_sampler_view *so = CALLOC_STRUCT(fd5_pipe_sampler_view); struct fd_resource *rsc = fd_resource(prsc); + struct fdl_slice *slice = NULL; enum pipe_format format = cso->format; unsigned lvl, layers = 0; @@ -249,12 +259,13 @@ fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, A5XX_TEX_CONST_1_HEIGHT(1); so->texconst2 = A5XX_TEX_CONST_2_FETCHSIZE(fd5_pipe2fetchsize(format)) | - A5XX_TEX_CONST_2_PITCH(elements * rsc->cpp); + A5XX_TEX_CONST_2_PITCH(elements * rsc->layout.cpp); so->offset = cso->u.buf.offset; } else { unsigned miplevels; lvl = fd_sampler_first_level(cso); + slice = fd_resource_slice(rsc, lvl); miplevels = fd_sampler_last_level(cso) - lvl; layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1; @@ -264,9 +275,7 @@ fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, A5XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); so->texconst2 = A5XX_TEX_CONST_2_FETCHSIZE(fd5_pipe2fetchsize(format)) | - A5XX_TEX_CONST_2_PITCH( - util_format_get_nblocksx( - format, rsc->slices[lvl].pitch) * rsc->cpp); + A5XX_TEX_CONST_2_PITCH(slice->pitch); so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer); } @@ -277,27 +286,27 @@ fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: so->texconst3 = - A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); + A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size); so->texconst5 = A5XX_TEX_CONST_5_DEPTH(1); break; case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: so->texconst3 = - A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); + A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size); so->texconst5 = A5XX_TEX_CONST_5_DEPTH(layers); break; case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY: so->texconst3 = - A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); + A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size); so->texconst5 = A5XX_TEX_CONST_5_DEPTH(layers / 6); break; case PIPE_TEXTURE_3D: so->texconst3 = - A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->slices[lvl].size0); + A5XX_TEX_CONST_3_ARRAY_PITCH(slice->size0); so->texconst5 = A5XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl)); break; |