summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/freedreno/ir3
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2024-04-02 09:30:07 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2024-04-02 09:30:07 +0000
commitf54e142455cb3c9d1662dae7e096a32a47e5409b (patch)
tree440ecd46269f0eac25e349e1ed58f246490c5e26 /lib/mesa/src/gallium/drivers/freedreno/ir3
parent36d8503c27530f68d655d3ef77a6eaa4dfd8ad65 (diff)
Import Mesa 23.3.6
Diffstat (limited to 'lib/mesa/src/gallium/drivers/freedreno/ir3')
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.c6
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.h8
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_const.h31
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_descriptor.c36
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.c60
-rw-r--r--lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.h3
6 files changed, 88 insertions, 56 deletions
diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.c
index 0501248dc..351ed52bd 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.c
@@ -97,6 +97,8 @@ ir3_cache_lookup(struct ir3_cache *cache, const struct ir3_cache_key *key,
return entry->data;
}
+ MESA_TRACE_FUNC();
+
if (key->hs)
assert(key->ds);
@@ -115,7 +117,7 @@ ir3_cache_lookup(struct ir3_cache *cache, const struct ir3_cache_key *key,
shaders[MESA_SHADER_TESS_CTRL] = hs;
}
- struct ir3_shader_variant *variants[MESA_SHADER_STAGES];
+ const struct ir3_shader_variant *variants[MESA_SHADER_STAGES];
struct ir3_shader_key shader_key = key->key;
for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
@@ -144,7 +146,7 @@ ir3_cache_lookup(struct ir3_cache *cache, const struct ir3_cache_key *key,
}
}
- struct ir3_shader_variant *bs;
+ const struct ir3_shader_variant *bs;
if (ir3_has_binning_vs(&key->key)) {
/* starting with a6xx, the same const state is used for binning and draw
diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.h b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.h
index cd75f33d4..e29561f5b 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_cache.h
@@ -61,10 +61,10 @@ struct ir3_program_state {
struct ir3_cache_funcs {
struct ir3_program_state *(*create_state)(
- void *data, struct ir3_shader_variant *bs, /* binning pass vs */
- struct ir3_shader_variant *vs, struct ir3_shader_variant *hs,
- struct ir3_shader_variant *ds, struct ir3_shader_variant *gs,
- struct ir3_shader_variant *fs, const struct ir3_cache_key *key);
+ void *data, const struct ir3_shader_variant *bs, /* binning pass vs */
+ const struct ir3_shader_variant *vs, const struct ir3_shader_variant *hs,
+ const struct ir3_shader_variant *ds, const struct ir3_shader_variant *gs,
+ const struct ir3_shader_variant *fs, const struct ir3_cache_key *key);
void (*destroy_state)(void *data, struct ir3_program_state *state);
};
diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_const.h b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_const.h
index 21a4ab023..922601d7f 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_const.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_const.h
@@ -111,8 +111,7 @@ ir3_user_consts_size(struct ir3_ubo_analysis_state *state, unsigned *packets,
* constant buffer.
*/
static inline void
-ir3_emit_constant_data(struct fd_screen *screen,
- const struct ir3_shader_variant *v,
+ir3_emit_constant_data(const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
{
const struct ir3_const_state *const_state = ir3_const_state(v);
@@ -308,8 +307,7 @@ ir3_emit_image_dims(struct fd_screen *screen,
}
static inline void
-ir3_emit_immediates(struct fd_screen *screen,
- const struct ir3_shader_variant *v,
+ir3_emit_immediates(const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
{
const struct ir3_const_state *const_state = ir3_const_state(v);
@@ -331,30 +329,29 @@ ir3_emit_immediates(struct fd_screen *screen,
/* NIR constant data has the same lifetime as immediates, so upload it
* now, too.
*/
- ir3_emit_constant_data(screen, v, ring);
+ ir3_emit_constant_data(v, ring);
}
static inline void
-ir3_emit_link_map(struct fd_screen *screen,
- const struct ir3_shader_variant *producer,
- const struct ir3_shader_variant *v,
+ir3_emit_link_map(const struct ir3_shader_variant *producer,
+ const struct ir3_shader_variant *consumer,
struct fd_ringbuffer *ring)
{
- const struct ir3_const_state *const_state = ir3_const_state(v);
+ const struct ir3_const_state *const_state = ir3_const_state(consumer);
uint32_t base = const_state->offsets.primitive_map;
- int size = DIV_ROUND_UP(v->input_size, 4);
+ int size = DIV_ROUND_UP(consumer->input_size, 4);
/* truncate size to avoid writing constants that shader
* does not use:
*/
- size = MIN2(size + base, v->constlen) - base;
+ size = MIN2(size + base, consumer->constlen) - base;
/* convert out of vec4: */
base *= 4;
size *= 4;
if (size > 0)
- emit_const_user(ring, v, base, size, producer->output_loc);
+ emit_const_user(ring, consumer, base, size, producer->output_loc);
}
/* emit stream-out buffers: */
@@ -423,7 +420,7 @@ emit_common_consts(const struct ir3_shader_variant *v,
ir3_emit_user_consts(v, ring, constbuf);
ir3_emit_ubos(ctx, v, ring, constbuf);
if (shader_dirty)
- ir3_emit_immediates(ctx->screen, v, ring);
+ ir3_emit_immediates(v, ring);
}
if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) {
@@ -454,17 +451,19 @@ ir3_emit_driver_params(const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_context *ctx,
const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
- const struct pipe_draw_start_count_bias *draw) assert_dt
+ const struct pipe_draw_start_count_bias *draw,
+ const uint32_t draw_id) assert_dt
{
assert(v->need_driver_params);
const struct ir3_const_state *const_state = ir3_const_state(v);
uint32_t offset = const_state->offsets.driver_param;
uint32_t vertex_params[IR3_DP_VS_COUNT] = {
- [IR3_DP_DRAWID] = 0, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
+ [IR3_DP_DRAWID] = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
[IR3_DP_VTXID_BASE] = info->index_size ? draw->index_bias : draw->start,
[IR3_DP_INSTID_BASE] = info->start_instance,
[IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx,
+ [IR3_DP_IS_INDEXED_DRAW] = info->index_size != 0 ? ~0 : 0,
};
if (v->key.ucp_enables) {
struct pipe_clip_state *ucp = &ctx->ucp;
@@ -573,7 +572,7 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v,
/* emit driver params every time: */
if (info && v->need_driver_params) {
ring_wfi(ctx->batch, ring);
- ir3_emit_driver_params(v, ring, ctx, info, indirect, draw);
+ ir3_emit_driver_params(v, ring, ctx, info, indirect, draw, 0);
}
}
diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_descriptor.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_descriptor.c
index 62f69faf7..dedead104 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_descriptor.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_descriptor.c
@@ -34,35 +34,15 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_load_ssbo:
case nir_intrinsic_store_ssbo:
- case nir_intrinsic_ssbo_atomic_add:
- case nir_intrinsic_ssbo_atomic_imin:
- case nir_intrinsic_ssbo_atomic_umin:
- case nir_intrinsic_ssbo_atomic_imax:
- case nir_intrinsic_ssbo_atomic_umax:
- case nir_intrinsic_ssbo_atomic_and:
- case nir_intrinsic_ssbo_atomic_or:
- case nir_intrinsic_ssbo_atomic_xor:
- case nir_intrinsic_ssbo_atomic_exchange:
- case nir_intrinsic_ssbo_atomic_comp_swap:
- case nir_intrinsic_ssbo_atomic_fadd:
- case nir_intrinsic_ssbo_atomic_fmin:
- case nir_intrinsic_ssbo_atomic_fmax:
- case nir_intrinsic_ssbo_atomic_fcomp_swap:
+ case nir_intrinsic_ssbo_atomic:
+ case nir_intrinsic_ssbo_atomic_swap:
case nir_intrinsic_get_ssbo_size:
desc_offset = IR3_BINDLESS_SSBO_OFFSET;
break;
case nir_intrinsic_image_load:
case nir_intrinsic_image_store:
- case nir_intrinsic_image_atomic_add:
- case nir_intrinsic_image_atomic_imin:
- case nir_intrinsic_image_atomic_umin:
- case nir_intrinsic_image_atomic_imax:
- case nir_intrinsic_image_atomic_umax:
- case nir_intrinsic_image_atomic_and:
- case nir_intrinsic_image_atomic_or:
- case nir_intrinsic_image_atomic_xor:
- case nir_intrinsic_image_atomic_exchange:
- case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_image_atomic:
+ case nir_intrinsic_image_atomic_swap:
case nir_intrinsic_image_size:
case nir_intrinsic_image_samples:
desc_offset = IR3_BINDLESS_IMAGE_OFFSET;
@@ -81,8 +61,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
}
unsigned set = ir3_shader_descriptor_set(b->shader->info.stage);
- nir_ssa_def *src = nir_ssa_for_src(b, intr->src[buffer_src], 1);
- src = nir_iadd(b, src, nir_imm_int(b, desc_offset));
+ nir_def *src = intr->src[buffer_src].ssa;
+ src = nir_iadd_imm(b, src, desc_offset);
/* An out-of-bounds index into an SSBO/image array can cause a GPU fault
* on access to the descriptor (I don't see any hw mechanism to bound the
* access). We could just allow the resulting iova fault (it is a read
@@ -91,8 +71,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
* can avoid the dmesg spam and users thinking this is a driver bug:
*/
src = nir_umod_imm(b, src, IR3_BINDLESS_DESC_COUNT);
- nir_ssa_def *bindless = nir_bindless_resource_ir3(b, 32, src, set);
- nir_instr_rewrite_src_ssa(&intr->instr, &intr->src[buffer_src], bindless);
+ nir_def *bindless = nir_bindless_resource_ir3(b, 32, src, set);
+ nir_src_rewrite(&intr->src[buffer_src], bindless);
return true;
}
diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
index a6eb5febc..32aee3b07 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
+++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
@@ -27,7 +27,6 @@
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_parse.h"
#include "util/format/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
@@ -84,14 +83,15 @@ dump_shader_info(struct ir3_shader_variant *v,
util_debug_message(
debug, SHADER_INFO,
"%s shader: %u inst, %u nops, %u non-nops, %u mov, %u cov, "
- "%u dwords, %u last-baryf, %u half, %u full, %u constlen, "
+ "%u dwords, %u last-baryf, %u last-helper, %u half, %u full, %u constlen, "
"%u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7, "
"%u stp, %u ldp, %u sstall, %u (ss), %u systall, %u (sy), %d waves, "
"%d loops\n",
ir3_shader_stage(v), v->info.instrs_count, v->info.nops_count,
v->info.instrs_count - v->info.nops_count, v->info.mov_count,
v->info.cov_count, v->info.sizedwords, v->info.last_baryf,
- v->info.max_half_reg + 1, v->info.max_reg + 1, v->constlen,
+ v->info.last_helper, v->info.max_half_reg + 1, v->info.max_reg + 1,
+ v->constlen,
v->info.instrs_per_cat[0], v->info.instrs_per_cat[1],
v->info.instrs_per_cat[2], v->info.instrs_per_cat[3],
v->info.instrs_per_cat[4], v->info.instrs_per_cat[5],
@@ -124,6 +124,8 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key,
struct ir3_shader_variant *v;
bool created = false;
+ MESA_TRACE_FUNC();
+
/* Some shader key values may not be used by a given ir3_shader (for
* example, fragment shader saturates in the vertex shader), so clean out
* those flags to avoid recompiling.
@@ -160,9 +162,13 @@ copy_stream_out(struct ir3_stream_output_info *i,
STATIC_ASSERT(ARRAY_SIZE(i->stride) == ARRAY_SIZE(p->stride));
STATIC_ASSERT(ARRAY_SIZE(i->output) == ARRAY_SIZE(p->output));
+ i->streams_written = 0;
i->num_outputs = p->num_outputs;
- for (int n = 0; n < ARRAY_SIZE(i->stride); n++)
+ for (int n = 0; n < ARRAY_SIZE(i->stride); n++) {
i->stride[n] = p->stride[n];
+ if (p->stride[n])
+ i->streams_written |= BIT(n);
+ }
for (int n = 0; n < ARRAY_SIZE(i->output); n++) {
i->output[n].register_index = p->output[n].register_index;
@@ -248,6 +254,8 @@ create_initial_variants_async(void *job, void *gdata, int thread_index)
struct ir3_shader_state *hwcso = job;
struct util_debug_callback debug = {};
+ MESA_TRACE_FUNC();
+
create_initial_variants(hwcso, &debug);
}
@@ -259,6 +267,8 @@ create_initial_compute_variants_async(void *job, void *gdata, int thread_index)
struct util_debug_callback debug = {};
static struct ir3_shader_key key; /* static is implicitly zeroed */
+ MESA_TRACE_FUNC();
+
ir3_shader_variant(shader, key, false, &debug);
shader->initial_variants_done = true;
}
@@ -310,13 +320,21 @@ ir3_shader_compute_state_create(struct pipe_context *pctx,
if (ctx->screen->gen >= 6)
ir3_nir_lower_io_to_bindless(nir);
+ enum ir3_wavesize_option api_wavesize = IR3_SINGLE_OR_DOUBLE;
+ enum ir3_wavesize_option real_wavesize = IR3_SINGLE_OR_DOUBLE;
+
+ if (ctx->screen->gen >= 6 && !ctx->screen->info->a6xx.supports_double_threadsize) {
+ api_wavesize = IR3_SINGLE_ONLY;
+ real_wavesize = IR3_SINGLE_ONLY;
+ }
+
struct ir3_shader *shader =
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){
/* TODO: force to single on a6xx with legacy
* ballot extension that uses 64-bit masks
*/
- .api_wavesize = IR3_SINGLE_OR_DOUBLE,
- .real_wavesize = IR3_SINGLE_OR_DOUBLE,
+ .api_wavesize = api_wavesize,
+ .real_wavesize = real_wavesize,
}, NULL);
shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */
shader->cs.req_local_mem = cso->static_shared_mem;
@@ -450,6 +468,8 @@ ir3_get_shader(struct ir3_shader_state *hwcso)
if (!hwcso)
return NULL;
+ MESA_TRACE_FUNC();
+
struct ir3_shader *shader = hwcso->shader;
perf_time (1000, "waited for %s:%s:%s variants",
_mesa_shader_stage_to_abbrev(shader->type),
@@ -502,6 +522,8 @@ ir3_screen_finalize_nir(struct pipe_screen *pscreen, void *nir)
{
struct fd_screen *screen = fd_screen(pscreen);
+ MESA_TRACE_FUNC();
+
ir3_nir_lower_io_to_temporaries(nir);
ir3_finalize_nir(screen->compiler, nir);
@@ -561,6 +583,10 @@ ir3_screen_init(struct pipe_screen *pscreen)
.bindless_fb_read_slot = IR3_BINDLESS_IMAGE_OFFSET +
IR3_BINDLESS_IMAGE_COUNT - 1 - screen->max_rts,
};
+
+ if (screen->gen >= 6) {
+ options.lower_base_vertex = true;
+ }
screen->compiler = ir3_compiler_create(screen->dev, screen->dev_id, &options);
/* TODO do we want to limit things to # of fast cores, or just limit
@@ -638,3 +664,25 @@ ir3_update_max_tf_vtx(struct fd_context *ctx,
ctx->streamout.max_tf_vtx = maxvtxcnt;
}
+
+void
+ir3_get_private_mem(struct fd_context *ctx, const struct ir3_shader_variant *so)
+{
+ uint32_t fibers_per_sp = ctx->screen->info->fibers_per_sp;
+ uint32_t num_sp_cores = ctx->screen->info->num_sp_cores;
+
+ uint32_t per_fiber_size = so->pvtmem_size;
+ if (per_fiber_size > ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size) {
+ if (ctx->pvtmem[so->pvtmem_per_wave].bo)
+ fd_bo_del(ctx->pvtmem[so->pvtmem_per_wave].bo);
+
+ uint32_t per_sp_size = ALIGN(per_fiber_size * fibers_per_sp, 1 << 12);
+ uint32_t total_size = per_sp_size * num_sp_cores;
+
+ ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size = per_fiber_size;
+ ctx->pvtmem[so->pvtmem_per_wave].per_sp_size = per_sp_size;
+ ctx->pvtmem[so->pvtmem_per_wave].bo = fd_bo_new(
+ ctx->screen->dev, total_size, FD_BO_NOMAP, "pvtmem_%s_%d",
+ so->pvtmem_per_wave ? "per_wave" : "per_fiber", per_fiber_size);
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.h b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.h
index 3d684beba..512ff9a30 100644
--- a/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.h
+++ b/lib/mesa/src/gallium/drivers/freedreno/ir3/ir3_gallium.h
@@ -84,6 +84,9 @@ ir3_point_sprite(const struct ir3_shader_variant *fs, int i,
void ir3_update_max_tf_vtx(struct fd_context *ctx,
const struct ir3_shader_variant *v) assert_dt;
+void ir3_get_private_mem(struct fd_context *ctx,
+ const struct ir3_shader_variant *so) assert_dt;
+
ENDC;
#endif /* IR3_GALLIUM_H_ */