diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2024-04-02 09:30:07 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2024-04-02 09:30:07 +0000 |
commit | f54e142455cb3c9d1662dae7e096a32a47e5409b (patch) | |
tree | 440ecd46269f0eac25e349e1ed58f246490c5e26 /lib/mesa/src/gallium/drivers/freedreno/ir3 | |
parent | 36d8503c27530f68d655d3ef77a6eaa4dfd8ad65 (diff) |
Import Mesa 23.3.6
Diffstat (limited to 'lib/mesa/src/gallium/drivers/freedreno/ir3')
6 files changed, 88 insertions, 56 deletions
diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.c index 0501248dc..351ed52bd 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.c @@ -97,6 +97,8 @@ ir3_cache_lookup(struct ir3_cache *cache, const struct ir3_cache_key *key, return entry->data; } + MESA_TRACE_FUNC(); + if (key->hs) assert(key->ds); @@ -115,7 +117,7 @@ ir3_cache_lookup(struct ir3_cache *cache, const struct ir3_cache_key *key, shaders[MESA_SHADER_TESS_CTRL] = hs; } - struct ir3_shader_variant *variants[MESA_SHADER_STAGES]; + const struct ir3_shader_variant *variants[MESA_SHADER_STAGES]; struct ir3_shader_key shader_key = key->key; for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES; @@ -144,7 +146,7 @@ ir3_cache_lookup(struct ir3_cache *cache, const struct ir3_cache_key *key, } } - struct ir3_shader_variant *bs; + const struct ir3_shader_variant *bs; if (ir3_has_binning_vs(&key->key)) { /* starting with a6xx, the same const state is used for binning and draw diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.h b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.h index cd75f33d4..e29561f5b 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.h +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.h @@ -61,10 +61,10 @@ struct ir3_program_state { struct ir3_cache_funcs { struct ir3_program_state *(*create_state)( - void *data, struct ir3_shader_variant *bs, /* binning pass vs */ - struct ir3_shader_variant *vs, struct ir3_shader_variant *hs, - struct ir3_shader_variant *ds, struct ir3_shader_variant *gs, - struct ir3_shader_variant *fs, const struct ir3_cache_key *key); + void *data, const struct ir3_shader_variant *bs, /* binning pass vs */ + const struct ir3_shader_variant *vs, const struct ir3_shader_variant *hs, + const struct ir3_shader_variant *ds, const struct ir3_shader_variant *gs, + const struct ir3_shader_variant *fs, const struct ir3_cache_key *key); void (*destroy_state)(void *data, struct ir3_program_state *state); }; diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_const.h b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_const.h index 21a4ab023..922601d7f 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_const.h +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_const.h @@ -111,8 +111,7 @@ ir3_user_consts_size(struct ir3_ubo_analysis_state *state, unsigned *packets, * constant buffer. */ static inline void -ir3_emit_constant_data(struct fd_screen *screen, - const struct ir3_shader_variant *v, +ir3_emit_constant_data(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { const struct ir3_const_state *const_state = ir3_const_state(v); @@ -308,8 +307,7 @@ ir3_emit_image_dims(struct fd_screen *screen, } static inline void -ir3_emit_immediates(struct fd_screen *screen, - const struct ir3_shader_variant *v, +ir3_emit_immediates(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { const struct ir3_const_state *const_state = ir3_const_state(v); @@ -331,30 +329,29 @@ ir3_emit_immediates(struct fd_screen *screen, /* NIR constant data has the same lifetime as immediates, so upload it * now, too. */ - ir3_emit_constant_data(screen, v, ring); + ir3_emit_constant_data(v, ring); } static inline void -ir3_emit_link_map(struct fd_screen *screen, - const struct ir3_shader_variant *producer, - const struct ir3_shader_variant *v, +ir3_emit_link_map(const struct ir3_shader_variant *producer, + const struct ir3_shader_variant *consumer, struct fd_ringbuffer *ring) { - const struct ir3_const_state *const_state = ir3_const_state(v); + const struct ir3_const_state *const_state = ir3_const_state(consumer); uint32_t base = const_state->offsets.primitive_map; - int size = DIV_ROUND_UP(v->input_size, 4); + int size = DIV_ROUND_UP(consumer->input_size, 4); /* truncate size to avoid writing constants that shader * does not use: */ - size = MIN2(size + base, v->constlen) - base; + size = MIN2(size + base, consumer->constlen) - base; /* convert out of vec4: */ base *= 4; size *= 4; if (size > 0) - emit_const_user(ring, v, base, size, producer->output_loc); + emit_const_user(ring, consumer, base, size, producer->output_loc); } /* emit stream-out buffers: */ @@ -423,7 +420,7 @@ emit_common_consts(const struct ir3_shader_variant *v, ir3_emit_user_consts(v, ring, constbuf); ir3_emit_ubos(ctx, v, ring, constbuf); if (shader_dirty) - ir3_emit_immediates(ctx->screen, v, ring); + ir3_emit_immediates(v, ring); } if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) { @@ -454,17 +451,19 @@ ir3_emit_driver_params(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_context *ctx, const struct pipe_draw_info *info, const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count_bias *draw) assert_dt + const struct pipe_draw_start_count_bias *draw, + const uint32_t draw_id) assert_dt { assert(v->need_driver_params); const struct ir3_const_state *const_state = ir3_const_state(v); uint32_t offset = const_state->offsets.driver_param; uint32_t vertex_params[IR3_DP_VS_COUNT] = { - [IR3_DP_DRAWID] = 0, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */ + [IR3_DP_DRAWID] = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */ [IR3_DP_VTXID_BASE] = info->index_size ? draw->index_bias : draw->start, [IR3_DP_INSTID_BASE] = info->start_instance, [IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx, + [IR3_DP_IS_INDEXED_DRAW] = info->index_size != 0 ? ~0 : 0, }; if (v->key.ucp_enables) { struct pipe_clip_state *ucp = &ctx->ucp; @@ -573,7 +572,7 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v, /* emit driver params every time: */ if (info && v->need_driver_params) { ring_wfi(ctx->batch, ring); - ir3_emit_driver_params(v, ring, ctx, info, indirect, draw); + ir3_emit_driver_params(v, ring, ctx, info, indirect, draw, 0); } } diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_descriptor.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_descriptor.c index 62f69faf7..dedead104 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_descriptor.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_descriptor.c @@ -34,35 +34,15 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr) switch (intr->intrinsic) { case nir_intrinsic_load_ssbo: case nir_intrinsic_store_ssbo: - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: - case nir_intrinsic_ssbo_atomic_fadd: - case nir_intrinsic_ssbo_atomic_fmin: - case nir_intrinsic_ssbo_atomic_fmax: - case nir_intrinsic_ssbo_atomic_fcomp_swap: + case nir_intrinsic_ssbo_atomic: + case nir_intrinsic_ssbo_atomic_swap: case nir_intrinsic_get_ssbo_size: desc_offset = IR3_BINDLESS_SSBO_OFFSET; break; case nir_intrinsic_image_load: case nir_intrinsic_image_store: - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_image_atomic_imin: - case nir_intrinsic_image_atomic_umin: - case nir_intrinsic_image_atomic_imax: - case nir_intrinsic_image_atomic_umax: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_image_atomic_xor: - case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_atomic: + case nir_intrinsic_image_atomic_swap: case nir_intrinsic_image_size: case nir_intrinsic_image_samples: desc_offset = IR3_BINDLESS_IMAGE_OFFSET; @@ -81,8 +61,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr) } unsigned set = ir3_shader_descriptor_set(b->shader->info.stage); - nir_ssa_def *src = nir_ssa_for_src(b, intr->src[buffer_src], 1); - src = nir_iadd(b, src, nir_imm_int(b, desc_offset)); + nir_def *src = intr->src[buffer_src].ssa; + src = nir_iadd_imm(b, src, desc_offset); /* An out-of-bounds index into an SSBO/image array can cause a GPU fault * on access to the descriptor (I don't see any hw mechanism to bound the * access). We could just allow the resulting iova fault (it is a read @@ -91,8 +71,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr) * can avoid the dmesg spam and users thinking this is a driver bug: */ src = nir_umod_imm(b, src, IR3_BINDLESS_DESC_COUNT); - nir_ssa_def *bindless = nir_bindless_resource_ir3(b, 32, src, set); - nir_instr_rewrite_src_ssa(&intr->instr, &intr->src[buffer_src], bindless); + nir_def *bindless = nir_bindless_resource_ir3(b, 32, src, set); + nir_src_rewrite(&intr->src[buffer_src], bindless); return true; } diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index a6eb5febc..32aee3b07 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -27,7 +27,6 @@ #include "pipe/p_screen.h" #include "pipe/p_state.h" #include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" #include "util/format/u_format.h" #include "util/u_inlines.h" #include "util/u_memory.h" @@ -84,14 +83,15 @@ dump_shader_info(struct ir3_shader_variant *v, util_debug_message( debug, SHADER_INFO, "%s shader: %u inst, %u nops, %u non-nops, %u mov, %u cov, " - "%u dwords, %u last-baryf, %u half, %u full, %u constlen, " + "%u dwords, %u last-baryf, %u last-helper, %u half, %u full, %u constlen, " "%u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7, " "%u stp, %u ldp, %u sstall, %u (ss), %u systall, %u (sy), %d waves, " "%d loops\n", ir3_shader_stage(v), v->info.instrs_count, v->info.nops_count, v->info.instrs_count - v->info.nops_count, v->info.mov_count, v->info.cov_count, v->info.sizedwords, v->info.last_baryf, - v->info.max_half_reg + 1, v->info.max_reg + 1, v->constlen, + v->info.last_helper, v->info.max_half_reg + 1, v->info.max_reg + 1, + v->constlen, v->info.instrs_per_cat[0], v->info.instrs_per_cat[1], v->info.instrs_per_cat[2], v->info.instrs_per_cat[3], v->info.instrs_per_cat[4], v->info.instrs_per_cat[5], @@ -124,6 +124,8 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key, struct ir3_shader_variant *v; bool created = false; + MESA_TRACE_FUNC(); + /* Some shader key values may not be used by a given ir3_shader (for * example, fragment shader saturates in the vertex shader), so clean out * those flags to avoid recompiling. @@ -160,9 +162,13 @@ copy_stream_out(struct ir3_stream_output_info *i, STATIC_ASSERT(ARRAY_SIZE(i->stride) == ARRAY_SIZE(p->stride)); STATIC_ASSERT(ARRAY_SIZE(i->output) == ARRAY_SIZE(p->output)); + i->streams_written = 0; i->num_outputs = p->num_outputs; - for (int n = 0; n < ARRAY_SIZE(i->stride); n++) + for (int n = 0; n < ARRAY_SIZE(i->stride); n++) { i->stride[n] = p->stride[n]; + if (p->stride[n]) + i->streams_written |= BIT(n); + } for (int n = 0; n < ARRAY_SIZE(i->output); n++) { i->output[n].register_index = p->output[n].register_index; @@ -248,6 +254,8 @@ create_initial_variants_async(void *job, void *gdata, int thread_index) struct ir3_shader_state *hwcso = job; struct util_debug_callback debug = {}; + MESA_TRACE_FUNC(); + create_initial_variants(hwcso, &debug); } @@ -259,6 +267,8 @@ create_initial_compute_variants_async(void *job, void *gdata, int thread_index) struct util_debug_callback debug = {}; static struct ir3_shader_key key; /* static is implicitly zeroed */ + MESA_TRACE_FUNC(); + ir3_shader_variant(shader, key, false, &debug); shader->initial_variants_done = true; } @@ -310,13 +320,21 @@ ir3_shader_compute_state_create(struct pipe_context *pctx, if (ctx->screen->gen >= 6) ir3_nir_lower_io_to_bindless(nir); + enum ir3_wavesize_option api_wavesize = IR3_SINGLE_OR_DOUBLE; + enum ir3_wavesize_option real_wavesize = IR3_SINGLE_OR_DOUBLE; + + if (ctx->screen->gen >= 6 && !ctx->screen->info->a6xx.supports_double_threadsize) { + api_wavesize = IR3_SINGLE_ONLY; + real_wavesize = IR3_SINGLE_ONLY; + } + struct ir3_shader *shader = ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){ /* TODO: force to single on a6xx with legacy * ballot extension that uses 64-bit masks */ - .api_wavesize = IR3_SINGLE_OR_DOUBLE, - .real_wavesize = IR3_SINGLE_OR_DOUBLE, + .api_wavesize = api_wavesize, + .real_wavesize = real_wavesize, }, NULL); shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */ shader->cs.req_local_mem = cso->static_shared_mem; @@ -450,6 +468,8 @@ ir3_get_shader(struct ir3_shader_state *hwcso) if (!hwcso) return NULL; + MESA_TRACE_FUNC(); + struct ir3_shader *shader = hwcso->shader; perf_time (1000, "waited for %s:%s:%s variants", _mesa_shader_stage_to_abbrev(shader->type), @@ -502,6 +522,8 @@ ir3_screen_finalize_nir(struct pipe_screen *pscreen, void *nir) { struct fd_screen *screen = fd_screen(pscreen); + MESA_TRACE_FUNC(); + ir3_nir_lower_io_to_temporaries(nir); ir3_finalize_nir(screen->compiler, nir); @@ -561,6 +583,10 @@ ir3_screen_init(struct pipe_screen *pscreen) .bindless_fb_read_slot = IR3_BINDLESS_IMAGE_OFFSET + IR3_BINDLESS_IMAGE_COUNT - 1 - screen->max_rts, }; + + if (screen->gen >= 6) { + options.lower_base_vertex = true; + } screen->compiler = ir3_compiler_create(screen->dev, screen->dev_id, &options); /* TODO do we want to limit things to # of fast cores, or just limit @@ -638,3 +664,25 @@ ir3_update_max_tf_vtx(struct fd_context *ctx, ctx->streamout.max_tf_vtx = maxvtxcnt; } + +void +ir3_get_private_mem(struct fd_context *ctx, const struct ir3_shader_variant *so) +{ + uint32_t fibers_per_sp = ctx->screen->info->fibers_per_sp; + uint32_t num_sp_cores = ctx->screen->info->num_sp_cores; + + uint32_t per_fiber_size = so->pvtmem_size; + if (per_fiber_size > ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size) { + if (ctx->pvtmem[so->pvtmem_per_wave].bo) + fd_bo_del(ctx->pvtmem[so->pvtmem_per_wave].bo); + + uint32_t per_sp_size = ALIGN(per_fiber_size * fibers_per_sp, 1 << 12); + uint32_t total_size = per_sp_size * num_sp_cores; + + ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size = per_fiber_size; + ctx->pvtmem[so->pvtmem_per_wave].per_sp_size = per_sp_size; + ctx->pvtmem[so->pvtmem_per_wave].bo = fd_bo_new( + ctx->screen->dev, total_size, FD_BO_NOMAP, "pvtmem_%s_%d", + so->pvtmem_per_wave ? "per_wave" : "per_fiber", per_fiber_size); + } +} diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.h b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.h index 3d684beba..512ff9a30 100644 --- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.h +++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.h @@ -84,6 +84,9 @@ ir3_point_sprite(const struct ir3_shader_variant *fs, int i, void ir3_update_max_tf_vtx(struct fd_context *ctx, const struct ir3_shader_variant *v) assert_dt; +void ir3_get_private_mem(struct fd_context *ctx, + const struct ir3_shader_variant *so) assert_dt; + ENDC; #endif /* IR3_GALLIUM_H_ */ |