diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2024-04-02 09:30:07 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2024-04-02 09:30:07 +0000 |
commit | f54e142455cb3c9d1662dae7e096a32a47e5409b (patch) | |
tree | 440ecd46269f0eac25e349e1ed58f246490c5e26 /lib/mesa/src/gallium/drivers/zink/zink_compiler.c | |
parent | 36d8503c27530f68d655d3ef77a6eaa4dfd8ad65 (diff) |
Import Mesa 23.3.6
Diffstat (limited to 'lib/mesa/src/gallium/drivers/zink/zink_compiler.c')
-rw-r--r-- | lib/mesa/src/gallium/drivers/zink/zink_compiler.c | 2780 |
1 files changed, 1684 insertions, 1096 deletions
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_compiler.c b/lib/mesa/src/gallium/drivers/zink/zink_compiler.c index eb4e1e593..e1411bcb8 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_compiler.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_compiler.c @@ -40,7 +40,6 @@ #include "nir/tgsi_to_nir.h" #include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_from_mesa.h" #include "util/u_memory.h" @@ -65,7 +64,7 @@ copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src) copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i)); } } else { - nir_ssa_def *load = nir_load_deref(b, src); + nir_def *load = nir_load_deref(b, src); nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components)); } } @@ -102,151 +101,25 @@ fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field); } static bool -lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data) +lower_basevertex_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data) { - if (instr->type != nir_instr_type_intrinsic) - return false; - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic != nir_intrinsic_load_deref) - return false; - nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr)); - if (var->data.mode != nir_var_shader_in) - return false; - if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3) - return false; - - /* create second variable for the split */ - nir_variable *var2 = nir_variable_clone(var, b->shader); - /* split new variable into second slot */ - var2->data.driver_location++; - nir_shader_add_variable(b->shader, var2); - - unsigned total_num_components = glsl_get_vector_elements(var->type); - /* new variable is the second half of the dvec */ - var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2); - /* clamp original variable to a dvec2 */ - var->type = glsl_vector_type(glsl_get_base_type(var->type), 2); - - b->cursor = nir_after_instr(instr); - - /* this is the first load instruction for the first half of the dvec3/4 components */ - nir_ssa_def *load = nir_load_var(b, var); - /* this is the second load instruction for the second half of the dvec3/4 components */ - nir_ssa_def *load2 = nir_load_var(b, var2); - - nir_ssa_def *def[4]; - /* create a new dvec3/4 comprised of all the loaded components from both variables */ - def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0)); - def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1)); - def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0)); - if (total_num_components == 4) - def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1)); - nir_ssa_def *new_vec = nir_vec(b, def, total_num_components); - /* use the assembled dvec3/4 for all other uses of the load */ - nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec, - new_vec->parent_instr); - - /* remove the original instr and its deref chain */ - nir_instr *parent = intr->src[0].ssa->parent_instr; - nir_instr_remove(instr); - nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent)); - - return true; -} - -/* mesa/gallium always provides UINT versions of 64bit formats: - * - rewrite loads as 32bit vec loads - * - cast back to 64bit - */ -static bool -lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data) -{ - if (instr->type != nir_instr_type_intrinsic) - return false; - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic != nir_intrinsic_load_deref) - return false; - nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr)); - if (var->data.mode != nir_var_shader_in) - return false; - if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER) - return false; - - unsigned num_components = glsl_get_vector_elements(var->type); - enum glsl_base_type base_type; - switch (glsl_get_base_type(var->type)) { - case GLSL_TYPE_UINT64: - base_type = GLSL_TYPE_UINT; - break; - case GLSL_TYPE_INT64: - base_type = GLSL_TYPE_INT; - break; - case GLSL_TYPE_DOUBLE: - base_type = GLSL_TYPE_FLOAT; - break; - default: - unreachable("unknown 64-bit vertex attribute format!"); - } - var->type = glsl_vector_type(base_type, num_components * 2); - - b->cursor = nir_after_instr(instr); - - nir_ssa_def *load = nir_load_var(b, var); - nir_ssa_def *casted[2]; - for (unsigned i = 0; i < num_components; i++) - casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2))); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components)); - - /* remove the original instr and its deref chain */ - nir_instr *parent = intr->src[0].ssa->parent_instr; - nir_instr_remove(instr); - nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent)); - - return true; -} - -/* "64-bit three- and four-component vectors consume two consecutive locations." - * - 14.1.4. Location Assignment - * - * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which - * are assigned to consecutive locations, loaded separately, and then assembled back into a - * composite value that's used in place of the original loaded ssa src - */ -static bool -lower_64bit_vertex_attribs(nir_shader *shader) -{ - if (shader->info.stage != MESA_SHADER_VERTEX) - return false; - - bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL); - progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL); - return progress; -} - -static bool -lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data) -{ - if (in->type != nir_instr_type_intrinsic) - return false; - nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); if (instr->intrinsic != nir_intrinsic_load_base_vertex) return false; b->cursor = nir_after_instr(&instr->instr); - nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink); load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED)); - nir_intrinsic_set_range(load, 4); load->num_components = 1; - nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed"); + nir_def_init(&load->instr, &load->def, 1, 32); nir_builder_instr_insert(b, &load->instr); - nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel, - nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL), - &instr->dest.ssa, + nir_def *composite = nir_build_alu(b, nir_op_bcsel, + nir_build_alu(b, nir_op_ieq, &load->def, nir_imm_int(b, 1), NULL, NULL), + &instr->def, nir_imm_int(b, 0), NULL); - nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite, + nir_def_rewrite_uses_after(&instr->def, composite, composite->parent_instr); return true; } @@ -260,28 +133,25 @@ lower_basevertex(nir_shader *shader) if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX)) return false; - return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL); + return nir_shader_intrinsics_pass(shader, lower_basevertex_instr, + nir_metadata_dominance, NULL); } static bool -lower_drawid_instr(nir_builder *b, nir_instr *in, void *data) +lower_drawid_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data) { - if (in->type != nir_instr_type_intrinsic) - return false; - nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); if (instr->intrinsic != nir_intrinsic_load_draw_id) return false; b->cursor = nir_before_instr(&instr->instr); - nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink); load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID)); - nir_intrinsic_set_range(load, 4); load->num_components = 1; - nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id"); + nir_def_init(&load->instr, &load->def, 1, 32); nir_builder_instr_insert(b, &load->instr); - nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa); + nir_def_rewrite_uses(&instr->def, &load->def); return true; } @@ -295,7 +165,8 @@ lower_drawid(nir_shader *shader) if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID)) return false; - return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL); + return nir_shader_intrinsics_pass(shader, lower_drawid_instr, + nir_metadata_dominance, NULL); } struct lower_gl_point_state { @@ -307,7 +178,7 @@ static bool lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data) { struct lower_gl_point_state *state = data; - nir_ssa_def *vp_scale, *pos; + nir_def *vp_scale, *pos; if (instr->type != nir_instr_type_intrinsic) return false; @@ -329,34 +200,34 @@ lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data) b->cursor = nir_before_instr(instr); // viewport-map endpoints - nir_ssa_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE); - vp_scale = nir_load_push_constant(b, 2, 32, vp_const_pos, .base = 1, .range = 2); + nir_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE); + vp_scale = nir_load_push_constant_zink(b, 2, 32, vp_const_pos); // Load point info values - nir_ssa_def *point_size = nir_load_var(b, state->gl_point_size); - nir_ssa_def *point_pos = nir_load_var(b, state->gl_pos_out); + nir_def *point_size = nir_load_var(b, state->gl_point_size); + nir_def *point_pos = nir_load_var(b, state->gl_pos_out); // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w - nir_ssa_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0)); + nir_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0)); w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3)); // halt_w_delta = w_delta / 2 - nir_ssa_def *half_w_delta = nir_fmul(b, w_delta, nir_imm_float(b, 0.5)); + nir_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5); // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w - nir_ssa_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1)); + nir_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1)); h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3)); // halt_h_delta = h_delta / 2 - nir_ssa_def *half_h_delta = nir_fmul(b, h_delta, nir_imm_float(b, 0.5)); + nir_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5); - nir_ssa_def *point_dir[4][2] = { + nir_def *point_dir[4][2] = { { nir_imm_float(b, -1), nir_imm_float(b, -1) }, { nir_imm_float(b, -1), nir_imm_float(b, 1) }, { nir_imm_float(b, 1), nir_imm_float(b, -1) }, { nir_imm_float(b, 1), nir_imm_float(b, 1) } }; - nir_ssa_def *point_pos_x = nir_channel(b, point_pos, 0); - nir_ssa_def *point_pos_y = nir_channel(b, point_pos, 1); + nir_def *point_pos_x = nir_channel(b, point_pos, 0); + nir_def *point_pos_y = nir_channel(b, point_pos, 1); for (size_t i = 0; i < 4; i++) { pos = nir_vec4(b, @@ -381,9 +252,8 @@ static bool lower_gl_point_gs(nir_shader *shader) { struct lower_gl_point_state state; - nir_builder b; - shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP; + shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP; shader->info.gs.vertices_out *= 4; // Gets the gl_Position in and out @@ -398,10 +268,6 @@ lower_gl_point_gs(nir_shader *shader) if (!state.gl_pos_out || !state.gl_point_size) return false; - nir_function_impl *entry = nir_shader_get_entrypoint(shader); - nir_builder_init(&b, entry); - b.cursor = nir_before_cf_list(&entry->body); - return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr, nir_metadata_dominance, &state); } @@ -416,14 +282,14 @@ struct lower_pv_mode_state { unsigned prim; }; -static nir_ssa_def* +static nir_def* lower_pv_mode_gs_ring_index(nir_builder *b, struct lower_pv_mode_state *state, - nir_ssa_def *index) + nir_def *index) { - nir_ssa_def *ring_offset = nir_load_var(b, state->ring_offset); - return nir_imod(b, nir_iadd(b, index, ring_offset), - nir_imm_int(b, state->ring_size)); + nir_def *ring_offset = nir_load_var(b, state->ring_offset); + return nir_imod_imm(b, nir_iadd(b, index, ring_offset), + state->ring_size); } /* Given the final deref of chain of derefs this function will walk up the chain @@ -441,7 +307,6 @@ replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new) case nir_deref_type_var: return new; case nir_deref_type_array: - assert(old->arr.index.is_ssa); return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa); case nir_deref_type_struct: return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index); @@ -466,9 +331,8 @@ lower_pv_mode_gs_store(nir_builder *b, gl_varying_slot location = var->data.location; unsigned location_frac = var->data.location_frac; assert(state->varyings[location][location_frac]); - assert(intrin->src[1].is_ssa); - nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter); - nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter); + nir_def *pos_counter = nir_load_var(b, state->pos_counter); + nir_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter); nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location][location_frac]); nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index); // recreate the chain of deref that lead to the store. @@ -484,10 +348,10 @@ lower_pv_mode_gs_store(nir_builder *b, static void lower_pv_mode_emit_rotated_prim(nir_builder *b, struct lower_pv_mode_state *state, - nir_ssa_def *current_vertex) + nir_def *current_vertex) { - nir_ssa_def *two = nir_imm_int(b, 2); - nir_ssa_def *three = nir_imm_int(b, 3); + nir_def *two = nir_imm_int(b, 2); + nir_def *three = nir_imm_int(b, 3); bool is_triangle = state->primitive_vert_count == 3; /* This shader will always see the last three vertices emitted by the user gs. * The following table is used to to rotate primitives within a strip generated @@ -505,17 +369,17 @@ lower_pv_mode_emit_rotated_prim(nir_builder *b, * * odd or even primitive within draw */ - nir_ssa_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two); + nir_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two); for (unsigned i = 0; i < state->primitive_vert_count; i++) { /* odd or even triangle within strip emitted by user GS * this is handled using the table */ - nir_ssa_def *odd_user_prim = nir_imod(b, current_vertex, two); + nir_def *odd_user_prim = nir_imod(b, current_vertex, two); unsigned offset_even = vert_maps[is_triangle][0][i]; unsigned offset_odd = vert_maps[is_triangle][1][i]; - nir_ssa_def *offset_even_value = nir_imm_int(b, offset_even); - nir_ssa_def *offset_odd_value = nir_imm_int(b, offset_odd); - nir_ssa_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim), + nir_def *offset_even_value = nir_imm_int(b, offset_even); + nir_def *offset_odd_value = nir_imm_int(b, offset_odd); + nir_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim), offset_odd_value, offset_even_value); /* Here we account for how triangles are provided to the gs from a strip. * For even primitives we rotate by 3, meaning we do nothing. @@ -538,7 +402,7 @@ lower_pv_mode_emit_rotated_prim(nir_builder *b, gl_varying_slot location = var->data.location; unsigned location_frac = var->data.location_frac; if (state->varyings[location][location_frac]) { - nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i); + nir_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i); nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location][location_frac]), index); copy_vars(b, nir_build_deref_var(b, var), value); } @@ -555,7 +419,7 @@ lower_pv_mode_gs_emit_vertex(nir_builder *b, b->cursor = nir_before_instr(&intrin->instr); // increment pos_counter - nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter); + nir_def *pos_counter = nir_load_var(b, state->pos_counter); nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1); nir_instr_remove(&intrin->instr); @@ -569,10 +433,10 @@ lower_pv_mode_gs_end_primitive(nir_builder *b, { b->cursor = nir_before_instr(&intrin->instr); - nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter); + nir_def *pos_counter = nir_load_var(b, state->pos_counter); nir_push_loop(b); { - nir_ssa_def *out_pos_counter = nir_load_var(b, state->out_pos_counter); + nir_def *out_pos_counter = nir_load_var(b, state->out_pos_counter); nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter), nir_imm_int(b, state->primitive_vert_count))); nir_jump(b, nir_jump_break); @@ -621,14 +485,14 @@ lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data) } static unsigned int -lower_pv_mode_vertices_for_prim(enum shader_prim prim) +lower_pv_mode_vertices_for_prim(enum mesa_prim prim) { switch (prim) { - case SHADER_PRIM_POINTS: + case MESA_PRIM_POINTS: return 1; - case SHADER_PRIM_LINE_STRIP: + case MESA_PRIM_LINE_STRIP: return 2; - case SHADER_PRIM_TRIANGLE_STRIP: + case MESA_PRIM_TRIANGLE_STRIP: return 3; default: unreachable("unsupported primitive for gs output"); @@ -643,8 +507,7 @@ lower_pv_mode_gs(nir_shader *shader, unsigned prim) memset(state.varyings, 0, sizeof(state.varyings)); nir_function_impl *entry = nir_shader_get_entrypoint(shader); - nir_builder_init(&b, entry); - b.cursor = nir_before_cf_list(&entry->body); + b = nir_builder_at(nir_before_impl(entry)); state.primitive_vert_count = lower_pv_mode_vertices_for_prim(shader->info.gs.output_primitive); @@ -699,12 +562,12 @@ struct lower_line_stipple_state { bool line_rectangular; }; -static nir_ssa_def * -viewport_map(nir_builder *b, nir_ssa_def *vert, - nir_ssa_def *scale) +static nir_def * +viewport_map(nir_builder *b, nir_def *vert, + nir_def *scale) { - nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3)); - nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, vert, 0x3), + nir_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3)); + nir_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2), w_recip); return nir_fmul(b, ndc_point, scale); } @@ -725,21 +588,19 @@ lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data) nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0)); // viewport-map endpoints - nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32, - nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE), - .base = 1, - .range = 2); - nir_ssa_def *prev = nir_load_var(b, state->prev_pos); - nir_ssa_def *curr = nir_load_var(b, state->pos_out); + nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32, + nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE)); + nir_def *prev = nir_load_var(b, state->prev_pos); + nir_def *curr = nir_load_var(b, state->pos_out); prev = viewport_map(b, prev, vp_scale); curr = viewport_map(b, curr, vp_scale); // calculate length of line - nir_ssa_def *len; + nir_def *len; if (state->line_rectangular) len = nir_fast_distance(b, prev, curr); else { - nir_ssa_def *diff = nir_fabs(b, nir_fsub(b, prev, curr)); + nir_def *diff = nir_fabs(b, nir_fsub(b, prev, curr)); len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1)); } // update stipple_counter @@ -796,8 +657,7 @@ lower_line_stipple_gs(nir_shader *shader, bool line_rectangular) state.line_rectangular = line_rectangular; // initialize pos_counter and stipple_counter nir_function_impl *entry = nir_shader_get_entrypoint(shader); - nir_builder_init(&b, entry); - b.cursor = nir_before_cf_list(&entry->body); + b = nir_builder_at(nir_before_impl(entry)); nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1); nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1); @@ -810,7 +670,7 @@ lower_line_stipple_fs(nir_shader *shader) { nir_builder b; nir_function_impl *entry = nir_shader_get_entrypoint(shader); - nir_builder_init(&b, entry); + b = nir_builder_at(nir_after_impl(entry)); // create stipple counter nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in, @@ -831,41 +691,38 @@ lower_line_stipple_fs(nir_shader *shader) sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK; } - b.cursor = nir_after_cf_list(&entry->body); - - nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32, - nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN), - .base = 1); - nir_ssa_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16)); + nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32, + nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN)); + nir_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16)); pattern = nir_iand_imm(&b, pattern, 0xffff); - nir_ssa_def *sample_mask_in = nir_load_sample_mask_in(&b); + nir_def *sample_mask_in = nir_load_sample_mask_in(&b); nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL); nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL); nir_store_var(&b, v, sample_mask_in, 1); nir_store_var(&b, sample_mask, sample_mask_in, 1); nir_push_loop(&b); { - nir_ssa_def *value = nir_load_var(&b, v); - nir_ssa_def *index = nir_ufind_msb(&b, value); - nir_ssa_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index); - nir_ssa_def *new_value = nir_ixor(&b, value, index_mask); + nir_def *value = nir_load_var(&b, v); + nir_def *index = nir_ufind_msb(&b, value); + nir_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index); + nir_def *new_value = nir_ixor(&b, value, index_mask); nir_store_var(&b, v, new_value, 1); nir_push_if(&b, nir_ieq_imm(&b, value, 0)); nir_jump(&b, nir_jump_break); nir_pop_if(&b, NULL); - nir_ssa_def *stipple_pos = + nir_def *stipple_pos = nir_interp_deref_at_sample(&b, 1, 32, - &nir_build_deref_var(&b, stipple)->dest.ssa, index); + &nir_build_deref_var(&b, stipple)->def, index); stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor), nir_imm_float(&b, 16.0)); stipple_pos = nir_f2i32(&b, stipple_pos); - nir_ssa_def *bit = + nir_def *bit = nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1); nir_push_if(&b, nir_ieq_imm(&b, bit, 0)); { - nir_ssa_def *value = nir_load_var(&b, sample_mask); + nir_def *value = nir_load_var(&b, sample_mask); value = nir_ixor(&b, value, index_mask); nir_store_var(&b, sample_mask, value, 1); } @@ -901,7 +758,6 @@ lower_line_smooth_gs_store(nir_builder *b, unsigned location_frac = var->data.location_frac; if (location != VARYING_SLOT_POS) { assert(state->varyings[location]); - assert(intrin->src[1].is_ssa); nir_store_var(b, state->varyings[location][location_frac], intrin->src[1].ssa, nir_intrinsic_write_mask(intrin)); @@ -921,29 +777,26 @@ lower_line_smooth_gs_emit_vertex(nir_builder *b, b->cursor = nir_before_instr(&intrin->instr); nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0)); - nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32, - nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE), - .base = 1, - .range = 2); - nir_ssa_def *prev = nir_load_var(b, state->prev_pos); - nir_ssa_def *curr = nir_load_var(b, state->pos_out); - nir_ssa_def *prev_vp = viewport_map(b, prev, vp_scale); - nir_ssa_def *curr_vp = viewport_map(b, curr, vp_scale); - - nir_ssa_def *width = nir_load_push_constant(b, 1, 32, - nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH), - .base = 1); - nir_ssa_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5); + nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32, + nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE)); + nir_def *prev = nir_load_var(b, state->prev_pos); + nir_def *curr = nir_load_var(b, state->pos_out); + nir_def *prev_vp = viewport_map(b, prev, vp_scale); + nir_def *curr_vp = viewport_map(b, curr, vp_scale); + + nir_def *width = nir_load_push_constant_zink(b, 1, 32, + nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH)); + nir_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5); const unsigned yx[2] = { 1, 0 }; - nir_ssa_def *vec = nir_fsub(b, curr_vp, prev_vp); - nir_ssa_def *len = nir_fast_length(b, vec); - nir_ssa_def *dir = nir_normalize(b, vec); - nir_ssa_def *half_length = nir_fmul_imm(b, len, 0.5); + nir_def *vec = nir_fsub(b, curr_vp, prev_vp); + nir_def *len = nir_fast_length(b, vec); + nir_def *dir = nir_normalize(b, vec); + nir_def *half_length = nir_fmul_imm(b, len, 0.5); half_length = nir_fadd_imm(b, half_length, 0.5); - nir_ssa_def *vp_scale_rcp = nir_frcp(b, vp_scale); - nir_ssa_def *tangent = + nir_def *vp_scale_rcp = nir_frcp(b, vp_scale); + nir_def *tangent = nir_fmul(b, nir_fmul(b, nir_swizzle(b, dir, yx, 2), @@ -953,7 +806,7 @@ lower_line_smooth_gs_emit_vertex(nir_builder *b, tangent = nir_pad_vector_imm_int(b, tangent, 0, 4); dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5); - nir_ssa_def *line_offets[8] = { + nir_def *line_offets[8] = { nir_fadd(b, tangent, nir_fneg(b, dir)), nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)), tangent, @@ -963,9 +816,9 @@ lower_line_smooth_gs_emit_vertex(nir_builder *b, nir_fadd(b, tangent, dir), nir_fadd(b, nir_fneg(b, tangent), dir), }; - nir_ssa_def *line_coord = + nir_def *line_coord = nir_vec4(b, half_width, half_width, half_length, half_length); - nir_ssa_def *line_coords[8] = { + nir_def *line_coords[8] = { nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, -1, 1)), nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, -1, 1)), nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)), @@ -1125,12 +978,11 @@ lower_line_smooth_gs(nir_shader *shader) // initialize pos_counter nir_function_impl *entry = nir_shader_get_entrypoint(shader); - nir_builder_init(&b, entry); - b.cursor = nir_before_cf_list(&entry->body); + b = nir_builder_at(nir_before_impl(entry)); nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1); shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out; - shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP; + shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP; return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr, nir_metadata_dominance, &state); @@ -1159,11 +1011,9 @@ lower_line_smooth_fs(nir_shader *shader, bool lower_stipple) // initialize stipple_pattern nir_function_impl *entry = nir_shader_get_entrypoint(shader); - nir_builder_init(&b, entry); - b.cursor = nir_before_cf_list(&entry->body); - nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32, - nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN), - .base = 1); + b = nir_builder_at(nir_before_impl(entry)); + nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32, + nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN)); nir_store_var(&b, stipple_pattern, pattern, 1); } @@ -1195,8 +1045,8 @@ lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data) alu_instr->op != nir_op_unpack_64_2x32) return false; b->cursor = nir_before_instr(&alu_instr->instr); - nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0); - nir_ssa_def *dest; + nir_def *src = nir_ssa_for_alu_src(b, alu_instr, 0); + nir_def *dest; switch (alu_instr->op) { case nir_op_pack_64_2x32: dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1)); @@ -1207,7 +1057,7 @@ lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data) default: unreachable("Impossible opcode"); } - nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest); + nir_def_rewrite_uses(&alu_instr->def, dest); nir_instr_remove(&alu_instr->instr); return true; } @@ -1221,16 +1071,15 @@ lower_64bit_pack(nir_shader *shader) nir_shader * zink_create_quads_emulation_gs(const nir_shader_compiler_options *options, - const nir_shader *prev_stage, - int last_pv_vert_offset) + const nir_shader *prev_stage) { nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options, "filled quad gs"); nir_shader *nir = b.shader; - nir->info.gs.input_primitive = SHADER_PRIM_LINES_ADJACENCY; - nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP; + nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY; + nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP; nir->info.gs.vertices_in = 4; nir->info.gs.vertices_out = 6; nir->info.gs.invocations = 1; @@ -1287,13 +1136,11 @@ zink_create_quads_emulation_gs(const nir_shader_compiler_options *options, int mapping_first[] = {0, 1, 2, 0, 2, 3}; int mapping_last[] = {0, 1, 3, 1, 2, 3}; - nir_ssa_def *last_pv_vert_def = nir_load_ubo(&b, 1, 32, - nir_imm_int(&b, 0), nir_imm_int(&b, last_pv_vert_offset), - .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0); + nir_def *last_pv_vert_def = nir_load_provoking_last(&b); last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0); for (unsigned i = 0; i < 6; ++i) { /* swap indices 2 and 3 */ - nir_ssa_def *idx = nir_bcsel(&b, last_pv_vert_def, + nir_def *idx = nir_bcsel(&b, last_pv_vert_def, nir_imm_int(&b, mapping_last[i]), nir_imm_int(&b, mapping_first[i])); /* Copy inputs to outputs. */ @@ -1315,6 +1162,41 @@ zink_create_quads_emulation_gs(const nir_shader_compiler_options *options, return nir; } +static bool +lower_system_values_to_inlined_uniforms_instr(nir_builder *b, + nir_intrinsic_instr *intrin, + void *data) +{ + int inlined_uniform_offset; + switch (intrin->intrinsic) { + case nir_intrinsic_load_flat_mask: + inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t); + break; + case nir_intrinsic_load_provoking_last: + inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t); + break; + default: + return false; + } + + b->cursor = nir_before_instr(&intrin->instr); + nir_def *new_dest_def = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0), + nir_imm_int(b, inlined_uniform_offset), + .align_mul = 4, .align_offset = 0, + .range_base = 0, .range = ~0); + nir_def_rewrite_uses(&intrin->def, new_dest_def); + nir_instr_remove(&intrin->instr); + return true; +} + +bool +zink_lower_system_values_to_inlined_uniforms(nir_shader *nir) +{ + return nir_shader_intrinsics_pass(nir, + lower_system_values_to_inlined_uniforms_instr, + nir_metadata_dominance, NULL); +} + void zink_screen_init_compiler(struct zink_screen *screen) { @@ -1328,6 +1210,9 @@ zink_screen_init_compiler(struct zink_screen *screen) .lower_flrp32 = true, .lower_fpow = true, .lower_fsat = true, + .lower_hadd = true, + .lower_iadd_sat = true, + .lower_fisnormal = true, .lower_extract_byte = true, .lower_extract_word = true, .lower_insert_byte = true, @@ -1342,21 +1227,20 @@ zink_screen_init_compiler(struct zink_screen *screen) .lower_ldexp = true, .lower_mul_high = true, - .lower_rotate = true, .lower_uadd_carry = true, .lower_usub_borrow = true, .lower_uadd_sat = true, .lower_usub_sat = true, .lower_vector_cmp = true, .lower_int64_options = 0, - .lower_doubles_options = 0, + .lower_doubles_options = nir_lower_dround_even, .lower_uniforms_to_ubo = true, .has_fsub = true, .has_isub = true, - .has_txs = true, .lower_mul_2x32_64 = true, .support_16bit_alu = true, /* not quite what it sounds like */ .max_unroll_iterations = 0, + .use_interpolated_input_intrinsics = true, }; screen->nir_options = default_options; @@ -1413,10 +1297,10 @@ zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens) static bool -dest_is_64bit(nir_dest *dest, void *state) +def_is_64bit(nir_def *def, void *state) { bool *lower = (bool *)state; - if (dest && (nir_dest_bit_size(*dest) == 64)) { + if (def && (def->bit_size == 64)) { *lower = true; return false; } @@ -1442,7 +1326,7 @@ filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data) * doesn't have const variants, so do the ugly const_cast here. */ nir_instr *instr = (nir_instr *)const_instr; - nir_foreach_dest(instr, dest_is_64bit, &lower); + nir_foreach_def(instr, def_is_64bit, &lower); if (lower) return true; nir_foreach_src(instr, src_is_64bit, &lower); @@ -1514,25 +1398,25 @@ bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) return false; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); nir_variable *var = NULL; - nir_ssa_def *offset = NULL; + nir_def *offset = NULL; bool is_load = true; b->cursor = nir_before_instr(instr); switch (intr->intrinsic) { case nir_intrinsic_store_ssbo: - var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4]; + var = bo->ssbo[intr->def.bit_size >> 4]; offset = intr->src[2].ssa; is_load = false; break; case nir_intrinsic_load_ssbo: - var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4]; + var = bo->ssbo[intr->def.bit_size >> 4]; offset = intr->src[1].ssa; break; case nir_intrinsic_load_ubo: if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0) - var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4]; + var = bo->uniforms[intr->def.bit_size >> 4]; else - var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4]; + var = bo->ubo[intr->def.bit_size >> 4]; offset = intr->src[1].ssa; break; default: @@ -1550,18 +1434,18 @@ bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) return false; unsigned rewrites = 0; - nir_ssa_def *result[2]; + nir_def *result[2]; for (unsigned i = 0; i < intr->num_components; i++) { if (offset_bytes + i >= size) { rewrites++; if (is_load) - result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest)); + result[i] = nir_imm_zero(b, 1, intr->def.bit_size); } } assert(rewrites == intr->num_components); if (is_load) { - nir_ssa_def *load = nir_vec(b, result, intr->num_components); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); + nir_def *load = nir_vec(b, result, intr->num_components); + nir_def_rewrite_uses(&intr->def, load); } nir_instr_remove(instr); return true; @@ -1575,7 +1459,7 @@ bound_bo_access(nir_shader *shader, struct zink_shader *zs) } static void -optimize_nir(struct nir_shader *s, struct zink_shader *zs) +optimize_nir(struct nir_shader *s, struct zink_shader *zs, bool can_shrink) { bool progress; do { @@ -1604,6 +1488,8 @@ optimize_nir(struct nir_shader *s, struct zink_shader *zs) NIR_PASS(progress, s, zink_nir_lower_b2b); if (zs) NIR_PASS(progress, s, bound_bo_access, zs); + if (can_shrink) + NIR_PASS(progress, s, nir_opt_shrink_vectors); } while (progress); do { @@ -1631,7 +1517,7 @@ lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data) nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); if (intr->intrinsic != nir_intrinsic_load_deref) return false; - nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); + nir_variable *var = nir_intrinsic_get_var(intr, 0); if (!var->data.fb_fetch_output) return false; b->cursor = nir_after_instr(instr); @@ -1648,10 +1534,10 @@ lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data) enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS; fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); nir_shader_add_variable(b->shader, fbfetch); - nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa; - nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32); - nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0)); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); + nir_def *deref = &nir_build_deref_var(b, fbfetch)->def; + nir_def *sample = ms ? nir_load_sample_id(b) : nir_undef(b, 1, 32); + nir_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0)); + nir_def_rewrite_uses(&intr->def, load); return true; } @@ -1694,8 +1580,7 @@ lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data) if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0) return false; - assert(lod_src.is_ssa); - nir_ssa_def *lod = lod_src.ssa; + nir_def *lod = lod_src.ssa; int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset); int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle); @@ -1706,17 +1591,17 @@ lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data) levels->dest_type = nir_type_int | lod->bit_size; if (offset_idx >= 0) { levels->src[0].src_type = nir_tex_src_texture_offset; - nir_src_copy(&levels->src[0].src, &txf->src[offset_idx].src, &levels->instr); + levels->src[0].src = nir_src_for_ssa(txf->src[offset_idx].src.ssa); } if (handle_idx >= 0) { levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle; - nir_src_copy(&levels->src[!!(offset_idx >= 0)].src, &txf->src[handle_idx].src, &levels->instr); + levels->src[!!(offset_idx >= 0)].src = nir_src_for_ssa(txf->src[handle_idx].src.ssa); } - nir_ssa_dest_init(&levels->instr, &levels->dest, - nir_tex_instr_dest_size(levels), 32, NULL); + nir_def_init(&levels->instr, &levels->def, + nir_tex_instr_dest_size(levels), 32); nir_builder_instr_insert(b, &levels->instr); - nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->dest.ssa)); + nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->def)); nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in)); nir_builder_instr_insert(b, &new_txf->instr); @@ -1725,12 +1610,12 @@ lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data) unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type); oob_values[3] = (txf->dest_type & nir_type_float) ? nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size); - nir_ssa_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values); + nir_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values); nir_pop_if(b, lod_oob_else); - nir_ssa_def *robust_txf = nir_if_phi(b, &new_txf->dest.ssa, oob_val); + nir_def *robust_txf = nir_if_phi(b, &new_txf->def, oob_val); - nir_ssa_def_rewrite_uses(&txf->dest.ssa, robust_txf); + nir_def_rewrite_uses(&txf->def, robust_txf); nir_instr_remove_v(in); return true; } @@ -1759,48 +1644,31 @@ check_psiz(struct nir_shader *s) } static nir_variable * -find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz) -{ - unsigned found = 0; - if (!location_frac && location != VARYING_SLOT_PSIZ) { - nir_foreach_shader_out_variable(var, nir) { - if (var->data.location == location) - found++; - } - } - if (found) { - /* multiple variables found for this location: find the biggest one */ - nir_variable *out = NULL; - unsigned slots = 0; - nir_foreach_shader_out_variable(var, nir) { - if (var->data.location == location) { - unsigned count_slots = glsl_count_vec4_slots(var->type, false, false); - if (count_slots > slots) { - slots = count_slots; - out = var; - } - } - } - return out; - } else { - /* only one variable found or this is location_frac */ - nir_foreach_shader_out_variable(var, nir) { - if (var->data.location == location && - (var->data.location_frac == location_frac || - (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) { - if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location) - return var; - } +find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz, nir_variable_mode mode) +{ + assert((int)location >= 0); + + nir_foreach_variable_with_modes(var, nir, mode) { + if (var->data.location == location && (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)) { + unsigned num_components = glsl_get_vector_elements(var->type); + if (glsl_type_is_64bit(glsl_without_array(var->type))) + num_components *= 2; + if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0) + num_components = glsl_get_aoa_size(var->type); + if (var->data.location_frac <= location_frac && + var->data.location_frac + num_components > location_frac) + return var; } } return NULL; } static bool -is_inlined(const bool *inlined, const struct pipe_stream_output *output) +is_inlined(const bool *inlined, const nir_xfb_output_info *output) { - for (unsigned i = 0; i < output->num_components; i++) - if (!inlined[output->start_component + i]) + unsigned num_components = util_bitcount(output->component_mask); + for (unsigned i = 0; i < num_components; i++) + if (!inlined[output->component_offset + i]) return false; return true; } @@ -1888,87 +1756,98 @@ get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot) return num_components; } -static const struct pipe_stream_output * -find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot) +static unsigned +get_var_slot_count(nir_shader *nir, nir_variable *var) +{ + assert(var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out); + const struct glsl_type *type = var->type; + if (nir_is_arrayed_io(var, nir->info.stage)) + type = glsl_get_array_element(type); + unsigned slot_count = 0; + if (var->data.location >= VARYING_SLOT_VAR0) + slot_count = glsl_count_vec4_slots(type, false, false); + else if (glsl_type_is_array(type)) + slot_count = DIV_ROUND_UP(glsl_get_aoa_size(type), 4); + else + slot_count = 1; + return slot_count; +} + + +static const nir_xfb_output_info * +find_packed_output(const nir_xfb_info *xfb_info, unsigned slot) { - for (unsigned i = 0; i < so_info->num_outputs; i++) { - const struct pipe_stream_output *packed_output = &so_info->output[i]; - if (reverse_map[packed_output->register_index] == slot) + for (unsigned i = 0; i < xfb_info->output_count; i++) { + const nir_xfb_output_info *packed_output = &xfb_info->outputs[i]; + if (packed_output->location == slot) return packed_output; } return NULL; } static void -update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream_output_info *so_info, - uint64_t outputs_written, bool have_psiz) -{ - uint8_t reverse_map[VARYING_SLOT_MAX] = {0}; - unsigned slot = 0; - /* semi-copied from iris */ - while (outputs_written) { - int bit = u_bit_scan64(&outputs_written); - /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */ - if (bit == VARYING_SLOT_PSIZ && !have_psiz) - continue; - reverse_map[slot++] = bit; - } - - bool have_fake_psiz = false; - nir_foreach_shader_out_variable(var, nir) { - if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location) - have_fake_psiz = true; - } - +update_so_info(struct zink_shader *zs, nir_shader *nir, uint64_t outputs_written, bool have_psiz) +{ bool inlined[VARYING_SLOT_MAX][4] = {0}; uint64_t packed = 0; uint8_t packed_components[VARYING_SLOT_MAX] = {0}; uint8_t packed_streams[VARYING_SLOT_MAX] = {0}; uint8_t packed_buffers[VARYING_SLOT_MAX] = {0}; uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0}; - nir_variable *psiz = NULL; - for (unsigned i = 0; i < so_info->num_outputs; i++) { - const struct pipe_stream_output *output = &so_info->output[i]; - unsigned slot = reverse_map[output->register_index]; + for (unsigned i = 0; i < nir->xfb_info->output_count; i++) { + const nir_xfb_output_info *output = &nir->xfb_info->outputs[i]; + unsigned xfb_components = util_bitcount(output->component_mask); /* always set stride to be used during draw */ - zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer]; + zs->sinfo.stride[output->buffer] = nir->xfb_info->buffers[output->buffer].stride; if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) { - nir_variable *var = NULL; - unsigned so_slot; - while (!var) - var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz); - if (var->data.location == VARYING_SLOT_PSIZ) - psiz = var; - so_slot = slot + 1; - slot = reverse_map[output->register_index]; - if (var->data.explicit_xfb_buffer) { - /* handle dvec3 where gallium splits streamout over 2 registers */ - for (unsigned j = 0; j < output->num_components; j++) - inlined[slot][output->start_component + j] = true; - } - if (is_inlined(inlined[slot], output)) - continue; - bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type)); - unsigned num_components = get_slot_components(var, slot, so_slot); - /* if this is the entire variable, try to blast it out during the initial declaration - * structs must be handled later to ensure accurate analysis - */ - if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) { - var->data.explicit_xfb_buffer = 1; - var->data.xfb.buffer = output->output_buffer; - var->data.xfb.stride = so_info->stride[output->output_buffer] * 4; - var->data.offset = output->dst_offset * 4; - var->data.stream = output->stream; - for (unsigned j = 0; j < output->num_components; j++) - inlined[slot][output->start_component + j] = true; - } else { - /* otherwise store some metadata for later */ - packed |= BITFIELD64_BIT(slot); - packed_components[slot] += output->num_components; - packed_streams[slot] |= BITFIELD_BIT(output->stream); - packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer); - for (unsigned j = 0; j < output->num_components; j++) - packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j; + for (unsigned c = 0; !is_inlined(inlined[output->location], output) && c < xfb_components; c++) { + unsigned slot = output->location; + if (inlined[slot][output->component_offset + c]) + continue; + nir_variable *var = NULL; + while (!var && slot < VARYING_SLOT_TESS_MAX) + var = find_var_with_location_frac(nir, slot--, output->component_offset + c, have_psiz, nir_var_shader_out); + slot = output->location; + unsigned slot_count = var ? get_var_slot_count(nir, var) : 0; + if (!var || var->data.location > slot || var->data.location + slot_count <= slot) { + /* if no variable is found for the xfb output, no output exists */ + inlined[slot][c + output->component_offset] = true; + continue; + } + if (var->data.explicit_xfb_buffer) { + /* handle dvec3 where gallium splits streamout over 2 registers */ + for (unsigned j = 0; j < xfb_components; j++) + inlined[slot][c + output->component_offset + j] = true; + } + if (is_inlined(inlined[slot], output)) + continue; + assert(!glsl_type_is_array(var->type) || var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0); + assert(!glsl_type_is_struct_or_ifc(var->type)); + unsigned num_components = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : glsl_get_vector_elements(var->type); + if (glsl_type_is_64bit(glsl_without_array(var->type))) + num_components *= 2; + /* if this is the entire variable, try to blast it out during the initial declaration + * structs must be handled later to ensure accurate analysis + */ + if ((num_components == xfb_components || + num_components < xfb_components || + (num_components > xfb_components && xfb_components == 4))) { + var->data.explicit_xfb_buffer = 1; + var->data.xfb.buffer = output->buffer; + var->data.xfb.stride = zs->sinfo.stride[output->buffer]; + var->data.offset = (output->offset + c * sizeof(uint32_t)); + var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer]; + for (unsigned j = 0; j < MIN2(num_components, xfb_components); j++) + inlined[slot][c + output->component_offset + j] = true; + } else { + /* otherwise store some metadata for later */ + packed |= BITFIELD64_BIT(slot); + packed_components[slot] += xfb_components; + packed_streams[slot] |= BITFIELD_BIT(nir->xfb_info->buffer_to_stream[output->buffer]); + packed_buffers[slot] |= BITFIELD_BIT(output->buffer); + for (unsigned j = 0; j < xfb_components; j++) + packed_offsets[output->location][j + output->component_offset + c] = output->offset + j * sizeof(uint32_t); + } } } } @@ -1977,24 +1856,30 @@ update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream * being output with the same stream on the same buffer with increasing offsets, this entire variable * can be consolidated into a single output to conserve locations */ - for (unsigned i = 0; i < so_info->num_outputs; i++) { - const struct pipe_stream_output *output = &so_info->output[i]; - unsigned slot = reverse_map[output->register_index]; + for (unsigned i = 0; i < nir->xfb_info->output_count; i++) { + const nir_xfb_output_info *output = &nir->xfb_info->outputs[i]; + unsigned slot = output->location; if (is_inlined(inlined[slot], output)) continue; if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) { nir_variable *var = NULL; while (!var) - var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz); + var = find_var_with_location_frac(nir, slot--, output->component_offset, have_psiz, nir_var_shader_out); + slot = output->location; + unsigned slot_count = var ? get_var_slot_count(nir, var) : 0; + if (!var || var->data.location > slot || var->data.location + slot_count <= slot) + continue; /* this is a lowered 64bit variable that can't be exported due to packing */ if (var->data.is_xfb) goto out; - unsigned num_slots = glsl_count_vec4_slots(var->type, false, false); + unsigned num_slots = var->data.location >= VARYING_SLOT_CLIP_DIST0 && var->data.location <= VARYING_SLOT_CULL_DIST1 ? + glsl_array_size(var->type) / 4 : + glsl_count_vec4_slots(var->type, false, false); /* for each variable, iterate over all the variable's slots and inline the outputs */ for (unsigned j = 0; j < num_slots; j++) { slot = var->data.location + j; - const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot); + const nir_xfb_output_info *packed_output = find_packed_output(nir->xfb_info, slot); if (!packed_output) goto out; @@ -2010,23 +1895,20 @@ update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream goto out; /* in order to pack the xfb output, all the offsets must be sequentially incrementing */ - uint32_t prev_offset = packed_offsets[packed_output->register_index][0]; + uint32_t prev_offset = packed_offsets[packed_output->location][0]; for (unsigned k = 1; k < num_components; k++) { /* if the offsets are not incrementing as expected, skip consolidation */ - if (packed_offsets[packed_output->register_index][k] != prev_offset + 1) + if (packed_offsets[packed_output->location][k] != prev_offset + sizeof(uint32_t)) goto out; - prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component]; + prev_offset = packed_offsets[packed_output->location][k + packed_output->component_offset]; } } /* this output can be consolidated: blast out all the data inlined */ var->data.explicit_xfb_buffer = 1; - var->data.xfb.buffer = output->output_buffer; - var->data.xfb.stride = so_info->stride[output->output_buffer] * 4; - var->data.offset = output->dst_offset * 4; - var->data.stream = output->stream; - /* GLSL specifies that interface blocks are split per-buffer in XFB */ - if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type))) - zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0); + var->data.xfb.buffer = output->buffer; + var->data.xfb.stride = zs->sinfo.stride[output->buffer]; + var->data.offset = output->offset; + var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer]; /* mark all slot components inlined to skip subsequent loop iterations */ for (unsigned j = 0; j < num_slots; j++) { slot = var->data.location + j; @@ -2037,15 +1919,8 @@ update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream continue; } out: - /* these are packed/explicit varyings which can't be exported with normal output */ - zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output; - /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ - zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index]; + unreachable("xfb should be inlined by now!"); } - zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate; - /* ensure this doesn't get output in the shader by unsetting location */ - if (have_fake_psiz && psiz) - update_psiz_location(nir, psiz); } struct decompose_state { @@ -2069,7 +1944,7 @@ lower_attrib(nir_builder *b, nir_instr *instr, void *data) return false; unsigned num_components = glsl_get_vector_elements(split[0]->type); b->cursor = nir_after_instr(instr); - nir_ssa_def *loads[4]; + nir_def *loads[4]; for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++) loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1])); if (state->needs_w) { @@ -2077,8 +1952,8 @@ lower_attrib(nir_builder *b, nir_instr *instr, void *data) loads[3] = nir_channel(b, loads[0], 3); loads[0] = nir_channel(b, loads[0], 0); } - nir_ssa_def *new_load = nir_vec(b, loads, num_components); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load); + nir_def *new_load = nir_vec(b, loads, num_components); + nir_def_rewrite_uses(&intr->def, new_load); nir_instr_remove_v(instr); return true; } @@ -2117,7 +1992,7 @@ decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decompose } nir_fixup_deref_modes(nir); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); - optimize_nir(nir, NULL); + optimize_nir(nir, NULL, true); return true; } @@ -2131,20 +2006,11 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); b->cursor = nir_before_instr(instr); switch (intr->intrinsic) { - case nir_intrinsic_ssbo_atomic_fadd: - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: { + case nir_intrinsic_ssbo_atomic: + case nir_intrinsic_ssbo_atomic_swap: { /* convert offset to uintN_t[idx] */ - nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8); - nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset); + nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, intr->def.bit_size / 8); + nir_src_rewrite(&intr->src[1], offset); return true; } case nir_intrinsic_load_ssbo: @@ -2153,17 +2019,17 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo && nir_src_is_const(intr->src[0]) && nir_src_as_uint(intr->src[0]) == 0 && - nir_dest_bit_size(intr->dest) == 64 && + intr->def.bit_size == 64 && nir_intrinsic_align_offset(intr) % 8 != 0; - force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64; - nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8); - nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset); + force_2x32 |= intr->def.bit_size == 64 && !has_int64; + nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8); + nir_src_rewrite(&intr->src[1], offset); /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ if (force_2x32) { /* this is always scalarized */ - assert(intr->dest.ssa.num_components == 1); + assert(intr->def.num_components == 1); /* rewrite as 2x32 */ - nir_ssa_def *load[2]; + nir_def *load[2]; for (unsigned i = 0; i < 2; i++) { if (intr->intrinsic == nir_intrinsic_load_ssbo) load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0); @@ -2172,28 +2038,29 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr)); } /* cast back to 64bit */ - nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted); + nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]); + nir_def_rewrite_uses(&intr->def, casted); nir_instr_remove(instr); } return true; } + case nir_intrinsic_load_scratch: case nir_intrinsic_load_shared: b->cursor = nir_before_instr(instr); - bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64; - nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8); - nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset); + bool force_2x32 = intr->def.bit_size == 64 && !has_int64; + nir_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8); + nir_src_rewrite(&intr->src[0], offset); /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ if (force_2x32) { /* this is always scalarized */ - assert(intr->dest.ssa.num_components == 1); + assert(intr->def.num_components == 1); /* rewrite as 2x32 */ - nir_ssa_def *load[2]; + nir_def *load[2]; for (unsigned i = 0; i < 2; i++) load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0); /* cast back to 64bit */ - nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted); + nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]); + nir_def_rewrite_uses(&intr->def, casted); nir_instr_remove(instr); return true; } @@ -2201,29 +2068,30 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) case nir_intrinsic_store_ssbo: { b->cursor = nir_before_instr(instr); bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64; - nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8); - nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset); + nir_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8); + nir_src_rewrite(&intr->src[2], offset); /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ if (force_2x32) { /* this is always scalarized */ assert(intr->src[0].ssa->num_components == 1); - nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)}; + nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)}; for (unsigned i = 0; i < 2; i++) nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0); nir_instr_remove(instr); } return true; } + case nir_intrinsic_store_scratch: case nir_intrinsic_store_shared: { b->cursor = nir_before_instr(instr); bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64; - nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8); - nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset); + nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8); + nir_src_rewrite(&intr->src[1], offset); /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) { /* this is always scalarized */ assert(intr->src[0].ssa->num_components == 1); - nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)}; + nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)}; for (unsigned i = 0; i < 2; i++) nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0); nir_instr_remove(instr); @@ -2302,81 +2170,44 @@ rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo) { nir_intrinsic_op op; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - switch (intr->intrinsic) { - case nir_intrinsic_ssbo_atomic_fadd: - op = nir_intrinsic_deref_atomic_fadd; - break; - case nir_intrinsic_ssbo_atomic_fmin: - op = nir_intrinsic_deref_atomic_fmin; - break; - case nir_intrinsic_ssbo_atomic_fmax: - op = nir_intrinsic_deref_atomic_fmax; - break; - case nir_intrinsic_ssbo_atomic_fcomp_swap: - op = nir_intrinsic_deref_atomic_fcomp_swap; - break; - case nir_intrinsic_ssbo_atomic_add: - op = nir_intrinsic_deref_atomic_add; - break; - case nir_intrinsic_ssbo_atomic_umin: - op = nir_intrinsic_deref_atomic_umin; - break; - case nir_intrinsic_ssbo_atomic_imin: - op = nir_intrinsic_deref_atomic_imin; - break; - case nir_intrinsic_ssbo_atomic_umax: - op = nir_intrinsic_deref_atomic_umax; - break; - case nir_intrinsic_ssbo_atomic_imax: - op = nir_intrinsic_deref_atomic_imax; - break; - case nir_intrinsic_ssbo_atomic_and: - op = nir_intrinsic_deref_atomic_and; - break; - case nir_intrinsic_ssbo_atomic_or: - op = nir_intrinsic_deref_atomic_or; - break; - case nir_intrinsic_ssbo_atomic_xor: - op = nir_intrinsic_deref_atomic_xor; - break; - case nir_intrinsic_ssbo_atomic_exchange: - op = nir_intrinsic_deref_atomic_exchange; - break; - case nir_intrinsic_ssbo_atomic_comp_swap: - op = nir_intrinsic_deref_atomic_comp_swap; - break; - default: + if (intr->intrinsic == nir_intrinsic_ssbo_atomic) + op = nir_intrinsic_deref_atomic; + else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap) + op = nir_intrinsic_deref_atomic_swap; + else unreachable("unknown intrinsic"); - } - nir_ssa_def *offset = intr->src[1].ssa; + nir_def *offset = intr->src[1].ssa; nir_src *src = &intr->src[0]; - nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest)); + nir_variable *var = get_bo_var(b->shader, bo, true, src, + intr->def.bit_size); nir_deref_instr *deref_var = nir_build_deref_var(b, var); - nir_ssa_def *idx = src->ssa; + nir_def *idx = src->ssa; if (bo->first_ssbo) idx = nir_iadd_imm(b, idx, -bo->first_ssbo); nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx); nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0); /* generate new atomic deref ops for every component */ - nir_ssa_def *result[4]; - unsigned num_components = nir_dest_num_components(intr->dest); + nir_def *result[4]; + unsigned num_components = intr->def.num_components; for (unsigned i = 0; i < num_components; i++) { nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset); nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op); - nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), ""); - new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa); + nir_def_init(&new_instr->instr, &new_instr->def, 1, + intr->def.bit_size); + nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr)); + new_instr->src[0] = nir_src_for_ssa(&deref_arr->def); /* deref ops have no offset src, so copy the srcs after it */ for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++) - nir_src_copy(&new_instr->src[i - 1], &intr->src[i], &new_instr->instr); + new_instr->src[i - 1] = nir_src_for_ssa(intr->src[i].ssa); nir_builder_instr_insert(b, &new_instr->instr); - result[i] = &new_instr->dest.ssa; + result[i] = &new_instr->def; offset = nir_iadd_imm(b, offset, 1); } - nir_ssa_def *load = nir_vec(b, result, num_components); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); + nir_def *load = nir_vec(b, result, num_components); + nir_def_rewrite_uses(&intr->def, load); nir_instr_remove(instr); } @@ -2388,26 +2219,14 @@ remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) return false; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); nir_variable *var = NULL; - nir_ssa_def *offset = NULL; + nir_def *offset = NULL; bool is_load = true; b->cursor = nir_before_instr(instr); nir_src *src; bool ssbo = true; switch (intr->intrinsic) { - case nir_intrinsic_ssbo_atomic_fadd: - case nir_intrinsic_ssbo_atomic_fmin: - case nir_intrinsic_ssbo_atomic_fmax: - case nir_intrinsic_ssbo_atomic_fcomp_swap: - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic: + case nir_intrinsic_ssbo_atomic_swap: rewrite_atomic_ssbo_instr(b, instr, bo); return true; case nir_intrinsic_store_ssbo: @@ -2418,12 +2237,12 @@ remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) break; case nir_intrinsic_load_ssbo: src = &intr->src[0]; - var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest)); + var = get_bo_var(b->shader, bo, true, src, intr->def.bit_size); offset = intr->src[1].ssa; break; case nir_intrinsic_load_ubo: src = &intr->src[0]; - var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest)); + var = get_bo_var(b->shader, bo, false, src, intr->def.bit_size); offset = intr->src[1].ssa; ssbo = false; break; @@ -2433,28 +2252,31 @@ remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) assert(var); assert(offset); nir_deref_instr *deref_var = nir_build_deref_var(b, var); - nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa; + nir_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa; if (!ssbo && bo->first_ubo && var->data.driver_location) idx = nir_iadd_imm(b, idx, -bo->first_ubo); else if (ssbo && bo->first_ssbo) idx = nir_iadd_imm(b, idx, -bo->first_ssbo); - nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, nir_i2iN(b, idx, nir_dest_bit_size(deref_var->dest))); + nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, + nir_i2iN(b, idx, deref_var->def.bit_size)); nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0); assert(intr->num_components <= 2); if (is_load) { - nir_ssa_def *result[2]; + nir_def *result[2]; for (unsigned i = 0; i < intr->num_components; i++) { - nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest))); + nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, + nir_i2iN(b, offset, deref_struct->def.bit_size)); result[i] = nir_load_deref(b, deref_arr); if (intr->intrinsic == nir_intrinsic_load_ssbo) nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr)); offset = nir_iadd_imm(b, offset, 1); } - nir_ssa_def *load = nir_vec(b, result, intr->num_components); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); + nir_def *load = nir_vec(b, result, intr->num_components); + nir_def_rewrite_uses(&intr->def, load); } else { - nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest))); - nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr)); + nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, + nir_i2iN(b, offset, deref_struct->def.bit_size)); + nir_build_store_deref(b, &deref_arr->def, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr)); } nir_instr_remove(instr); return true; @@ -2468,18 +2290,98 @@ remove_bo_access(nir_shader *shader, struct zink_shader *zs) } static bool +filter_io_instr(nir_intrinsic_instr *intr, bool *is_load, bool *is_input, bool *is_interp) +{ + switch (intr->intrinsic) { + case nir_intrinsic_load_interpolated_input: + *is_interp = true; + FALLTHROUGH; + case nir_intrinsic_load_input: + case nir_intrinsic_load_per_vertex_input: + *is_input = true; + FALLTHROUGH; + case nir_intrinsic_load_output: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_load_per_primitive_output: + *is_load = true; + FALLTHROUGH; + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_primitive_output: + case nir_intrinsic_store_per_vertex_output: + break; + default: + return false; + } + return true; +} + +static bool +io_instr_is_arrayed(nir_intrinsic_instr *intr) +{ + switch (intr->intrinsic) { + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_load_per_primitive_output: + case nir_intrinsic_store_per_primitive_output: + case nir_intrinsic_store_per_vertex_output: + return true; + default: + break; + } + return false; +} + +static bool find_var_deref(nir_shader *nir, nir_variable *var) { + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_deref) + continue; + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (deref->deref_type == nir_deref_type_var && deref->var == var) + return true; + } + } + } + return false; +} + +static bool +find_var_io(nir_shader *nir, nir_variable *var) +{ nir_foreach_function(function, nir) { if (!function->impl) continue; nir_foreach_block(block, function->impl) { nir_foreach_instr(instr, block) { - if (instr->type != nir_instr_type_deref) + if (instr->type != nir_instr_type_intrinsic) continue; - nir_deref_instr *deref = nir_instr_as_deref(instr); - if (deref->deref_type == nir_deref_type_var && deref->var == var) + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) + continue; + if (var->data.mode == nir_var_shader_in && !is_input) + continue; + if (var->data.mode == nir_var_shader_out && is_input) + continue; + unsigned slot_offset = 0; + if (var->data.fb_fetch_output && !is_load) + continue; + if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && !is_input && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index) + continue; + nir_src *src_offset = nir_get_io_offset_src(intr); + if (src_offset && nir_src_is_const(*src_offset)) + slot_offset = nir_src_as_uint(*src_offset); + unsigned slot_count = get_var_slot_count(nir, var); + if (var->data.mode & (nir_var_shader_out | nir_var_shader_in) && + var->data.fb_fetch_output == nir_intrinsic_io_semantics(intr).fb_fetch_output && + var->data.location <= nir_intrinsic_io_semantics(intr).location + slot_offset && + var->data.location + slot_count > nir_intrinsic_io_semantics(intr).location + slot_offset) return true; } } @@ -2495,12 +2397,11 @@ struct clamp_layer_output_state { static void clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state) { - nir_ssa_def *is_layered = nir_load_push_constant(b, 1, 32, - nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED), - .base = ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, .range = 4); + nir_def *is_layered = nir_load_push_constant_zink(b, 1, 32, + nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED)); nir_deref_instr *original_deref = nir_build_deref_var(b, state->original); nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped); - nir_ssa_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1), + nir_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1), nir_load_deref(b, original_deref), nir_imm_int(b, 0)); nir_store_deref(b, clamped_deref, layer, 0); @@ -2537,7 +2438,7 @@ clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location) } struct clamp_layer_output_state state = {0}; state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER); - if (!state.original || !find_var_deref(vs, state.original)) + if (!state.original || (!find_var_deref(vs, state.original) && !find_var_io(vs, state.original))) return false; state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped"); state.clamped->data.location = VARYING_SLOT_LAYER; @@ -2566,13 +2467,12 @@ clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location) } else { nir_builder b; nir_function_impl *impl = nir_shader_get_entrypoint(vs); - nir_builder_init(&b, impl); + b = nir_builder_at(nir_after_impl(impl)); assert(impl->end_block->predecessors->entries == 1); - b.cursor = nir_after_cf_list(&impl->body); clamp_layer_output_emit(&b, &state); nir_metadata_preserve(impl, nir_metadata_dominance); } - optimize_nir(vs, NULL); + optimize_nir(vs, NULL, true); NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL); return true; } @@ -2584,7 +2484,6 @@ assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reser switch (slot) { case -1: case VARYING_SLOT_POS: - case VARYING_SLOT_PNTC: case VARYING_SLOT_PSIZ: case VARYING_SLOT_LAYER: case VARYING_SLOT_PRIMITIVE_ID: @@ -2635,7 +2534,6 @@ assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reser unsigned slot = var->data.location; switch (slot) { case VARYING_SLOT_POS: - case VARYING_SLOT_PNTC: case VARYING_SLOT_PSIZ: case VARYING_SLOT_LAYER: case VARYING_SLOT_PRIMITIVE_ID: @@ -2688,32 +2586,169 @@ rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data) return false; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic != nir_intrinsic_load_deref) + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) return false; - nir_variable *deref_var = nir_intrinsic_get_var(intr, 0); - if (deref_var != var) + if (!is_load) + return false; + unsigned location = nir_intrinsic_io_semantics(intr).location; + if (location != var->data.location) return false; b->cursor = nir_before_instr(instr); - nir_ssa_def *zero = nir_imm_zero(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest)); + nir_def *zero = nir_imm_zero(b, intr->def.num_components, + intr->def.bit_size); if (b->shader->info.stage == MESA_SHADER_FRAGMENT) { - switch (var->data.location) { + switch (location) { case VARYING_SLOT_COL0: case VARYING_SLOT_COL1: case VARYING_SLOT_BFC0: case VARYING_SLOT_BFC1: /* default color is 0,0,0,1 */ - if (nir_dest_num_components(intr->dest) == 4) + if (intr->def.num_components == 4) zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3); break; default: break; } } - nir_ssa_def_rewrite_uses(&intr->dest.ssa, zero); + nir_def_rewrite_uses(&intr->def, zero); nir_instr_remove(instr); return true; } + + +static bool +delete_psiz_store_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + switch (intr->intrinsic) { + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_primitive_output: + case nir_intrinsic_store_per_vertex_output: + break; + default: + return false; + } + if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PSIZ) + return false; + if (!data || (nir_src_is_const(intr->src[0]) && fabs(nir_src_as_float(intr->src[0]) - 1.0) < FLT_EPSILON)) { + nir_instr_remove(&intr->instr); + return true; + } + return false; +} + +static bool +delete_psiz_store(nir_shader *nir, bool one) +{ + bool progress = nir_shader_intrinsics_pass(nir, delete_psiz_store_instr, + nir_metadata_dominance, one ? nir : NULL); + if (progress) + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + return progress; +} + +struct write_components { + unsigned slot; + uint32_t component_mask; +}; + +static bool +fill_zero_reads(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + struct write_components *wc = data; + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) + return false; + if (!is_input) + return false; + nir_io_semantics s = nir_intrinsic_io_semantics(intr); + if (wc->slot < s.location || wc->slot >= s.location + s.num_slots) + return false; + unsigned num_components = intr->num_components; + unsigned c = nir_intrinsic_component(intr); + if (intr->def.bit_size == 64) + num_components *= 2; + nir_src *src_offset = nir_get_io_offset_src(intr); + if (nir_src_is_const(*src_offset)) { + unsigned slot_offset = nir_src_as_uint(*src_offset); + if (s.location + slot_offset != wc->slot) + return false; + } else if (s.location > wc->slot || s.location + s.num_slots <= wc->slot) { + return false; + } + uint32_t readmask = BITFIELD_MASK(intr->num_components) << c; + if (intr->def.bit_size == 64) + readmask |= readmask << (intr->num_components + c); + /* handle dvec3/dvec4 */ + if (num_components + c > 4) + readmask >>= 4; + if ((wc->component_mask & readmask) == readmask) + return false; + uint32_t rewrite_mask = readmask & ~wc->component_mask; + if (!rewrite_mask) + return false; + b->cursor = nir_after_instr(&intr->instr); + nir_def *zero = nir_imm_zero(b, intr->def.num_components, intr->def.bit_size); + if (b->shader->info.stage == MESA_SHADER_FRAGMENT) { + switch (wc->slot) { + case VARYING_SLOT_COL0: + case VARYING_SLOT_COL1: + case VARYING_SLOT_BFC0: + case VARYING_SLOT_BFC1: + /* default color is 0,0,0,1 */ + if (intr->def.num_components == 4) + zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3); + break; + default: + break; + } + } + rewrite_mask >>= c; + nir_def *dest = &intr->def; + u_foreach_bit(component, rewrite_mask) + dest = nir_vector_insert_imm(b, dest, nir_channel(b, zero, component), component); + nir_def_rewrite_uses_after(&intr->def, dest, dest->parent_instr); + return true; +} + +static bool +find_max_write_components(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + struct write_components *wc = data; + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) + return false; + if (is_input || is_load) + return false; + nir_io_semantics s = nir_intrinsic_io_semantics(intr); + if (wc->slot < s.location || wc->slot >= s.location + s.num_slots) + return false; + unsigned location = s.location; + unsigned c = nir_intrinsic_component(intr); + uint32_t wrmask = nir_intrinsic_write_mask(intr) << c; + if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64) { + unsigned num_components = intr->num_components * 2; + nir_src *src_offset = nir_get_io_offset_src(intr); + if (nir_src_is_const(*src_offset)) { + if (location + nir_src_as_uint(*src_offset) != wc->slot && num_components + c < 4) + return false; + } + wrmask |= wrmask << intr->num_components; + /* handle dvec3/dvec4 */ + if (num_components + c > 4) + wrmask >>= 4; + } + wc->component_mask |= wrmask; + return false; +} + void zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer) { @@ -2722,16 +2757,30 @@ zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_sh memset(slot_map, -1, sizeof(slot_map)); bool do_fixup = false; nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer; - if (consumer->info.stage != MESA_SHADER_FRAGMENT) { + nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ); + if (var) { + bool can_remove = false; + if (!nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) { + /* maintenance5 guarantees "A default size of 1.0 is used if PointSize is not written" */ + if (screen->info.have_KHR_maintenance5 && !var->data.explicit_xfb_buffer && delete_psiz_store(producer, true)) + can_remove = !(producer->info.outputs_written & VARYING_BIT_PSIZ); + else if (consumer->info.stage != MESA_SHADER_FRAGMENT) + can_remove = !var->data.explicit_location; + } /* remove injected pointsize from all but the last vertex stage */ - nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ); - if (var && !var->data.explicit_location && !nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) { + if (can_remove) { var->data.mode = nir_var_shader_temp; nir_fixup_deref_modes(producer); + delete_psiz_store(producer, false); NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL); - optimize_nir(producer, NULL); + optimize_nir(producer, NULL, true); } } + if (consumer->info.stage != MESA_SHADER_FRAGMENT) { + producer->info.has_transform_feedback_varyings = false; + nir_foreach_shader_out_variable(var, producer) + var->data.explicit_xfb_buffer = false; + } if (producer->info.stage == MESA_SHADER_TESS_CTRL) { /* never assign from tcs -> tes, always invert */ nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in) @@ -2754,11 +2803,21 @@ zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_sh if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer) do_fixup |= clamp_layer_output(producer, consumer, &reserved); } + nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer)); + if (producer->info.io_lowered && consumer->info.io_lowered) { + u_foreach_bit64(slot, producer->info.outputs_written & BITFIELD64_RANGE(VARYING_SLOT_VAR0, 31)) { + struct write_components wc = {slot, 0}; + nir_shader_intrinsics_pass(producer, find_max_write_components, nir_metadata_all, &wc); + assert(wc.component_mask); + if (wc.component_mask != BITFIELD_MASK(4)) + do_fixup |= nir_shader_intrinsics_pass(consumer, fill_zero_reads, nir_metadata_dominance, &wc); + } + } if (!do_fixup) return; nir_fixup_deref_modes(nir); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); - optimize_nir(nir, NULL); + optimize_nir(nir, NULL, true); } /* all types that hit this function contain something that is 64bit */ @@ -2848,15 +2907,12 @@ deref_is_matrix(nir_deref_instr *deref) } static bool -lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variable *var, +lower_64bit_vars_function(nir_shader *shader, nir_function_impl *impl, nir_variable *var, struct hash_table *derefs, struct set *deletes, bool doubles_only) { bool func_progress = false; - if (!function->impl) - return false; - nir_builder b; - nir_builder_init(&b, function->impl); - nir_foreach_block(block, function->impl) { + nir_builder b = nir_builder_create(impl); + nir_foreach_block(block, impl) { nir_foreach_instr_safe(instr, block) { switch (instr->type) { case nir_instr_type_deref: { @@ -2893,12 +2949,12 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab if (nir_intrinsic_get_var(intr, 0) != var) break; if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) || - (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64)) + (intr->intrinsic == nir_intrinsic_load_deref && intr->def.bit_size != 64)) break; b.cursor = nir_before_instr(instr); nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); unsigned num_components = intr->num_components * 2; - nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS]; + nir_def *comp[NIR_MAX_VEC_COMPONENTS]; /* this is the stored matrix type from the deref */ struct hash_entry *he = _mesa_hash_table_search(derefs, deref); const struct glsl_type *matrix = he ? he->data : NULL; @@ -2908,7 +2964,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab if (intr->intrinsic == nir_intrinsic_store_deref) { /* first, unpack the src data to 32bit vec2 components */ for (unsigned i = 0; i < intr->num_components; i++) { - nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i)); + nir_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i)); comp[i * 2] = nir_channel(&b, ssa, 0); comp[i * 2 + 1] = nir_channel(&b, ssa, 1); } @@ -2924,7 +2980,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab assert(deref->deref_type == nir_deref_type_array); nir_deref_instr *var_deref = nir_deref_instr_parent(deref); /* let optimization clean up consts later */ - nir_ssa_def *index = deref->arr.index.ssa; + nir_def *index = deref->arr.index.ssa; /* this might be an indirect array index: * - iterate over matrix columns * - add if blocks for each column @@ -2949,7 +3005,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member); unsigned incr = MIN2(remaining, 4); /* assemble the write component vec */ - nir_ssa_def *val = nir_vec(&b, &comp[i], incr); + nir_def *val = nir_vec(&b, &comp[i], incr); /* use the number of components being written as the writemask */ if (glsl_get_vector_elements(strct->type) > val->num_components) val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type)); @@ -2962,7 +3018,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab _mesa_set_add(deletes, &deref->instr); } else if (num_components <= 4) { /* simple store case: just write out the components */ - nir_ssa_def *dest = nir_vec(&b, comp, num_components); + nir_def *dest = nir_vec(&b, comp, num_components); nir_store_deref(&b, deref, dest, mask); } else { /* writing > 4 components: access the struct and write to the appropriate vec4 members */ @@ -2970,7 +3026,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab if (!(mask & BITFIELD_MASK(4))) continue; nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i); - nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4)); + nir_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4)); if (glsl_get_vector_elements(strct->type) > dest->num_components) dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type)); nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4)); @@ -2978,20 +3034,20 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab } } } else { - nir_ssa_def *dest = NULL; + nir_def *dest = NULL; if (matrix) { /* matrix types always come from array (row) derefs */ assert(deref->deref_type == nir_deref_type_array); nir_deref_instr *var_deref = nir_deref_instr_parent(deref); /* let optimization clean up consts later */ - nir_ssa_def *index = deref->arr.index.ssa; + nir_def *index = deref->arr.index.ssa; /* this might be an indirect array index: * - iterate over matrix columns * - add if blocks for each column * - phi the loads using the array index */ unsigned cols = glsl_get_matrix_columns(matrix); - nir_ssa_def *dests[4]; + nir_def *dests[4]; for (unsigned idx = 0; idx < cols; idx++) { /* don't add an if for the final row: this will be handled in the else */ if (idx < cols - 1) @@ -3010,7 +3066,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab for (unsigned i = 0; i < num_components; member++) { assert(member < glsl_get_length(var_deref->type)); nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member); - nir_ssa_def *load = nir_load_deref(&b, strct); + nir_def *load = nir_load_deref(&b, strct); unsigned incr = MIN2(remaining, 4); /* repack the loads to 64bit */ for (unsigned c = 0; c < incr / 2; c++, comp_idx++) @@ -3030,7 +3086,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab _mesa_set_add(deletes, &deref->instr); } else if (num_components <= 4) { /* simple load case */ - nir_ssa_def *load = nir_load_deref(&b, deref); + nir_def *load = nir_load_deref(&b, deref); /* pack 32bit loads into 64bit: this will automagically get optimized out later */ for (unsigned i = 0; i < intr->num_components; i++) { comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2))); @@ -3040,14 +3096,15 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab /* writing > 4 components: access the struct and load the appropriate vec4 members */ for (unsigned i = 0; i < 2; i++, num_components -= 4) { nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i); - nir_ssa_def *load = nir_load_deref(&b, strct); - comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2))); + nir_def *load = nir_load_deref(&b, strct); + comp[i * 2] = nir_pack_64_2x32(&b, + nir_trim_vector(&b, load, 2)); if (num_components > 2) comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2))); } dest = nir_vec(&b, comp, intr->num_components); } - nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr); + nir_def_rewrite_uses_after(&intr->def, dest, instr); } _mesa_set_add(deletes, instr); break; @@ -3058,7 +3115,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab } } if (func_progress) - nir_metadata_preserve(function->impl, nir_metadata_none); + nir_metadata_preserve(impl, nir_metadata_none); /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */ set_foreach_remove(deletes, he) nir_instr_remove((void*)he->key); @@ -3073,8 +3130,8 @@ lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table * return false; var->type = rewrite_64bit_type(shader, var->type, var, doubles_only); /* once type is rewritten, rewrite all loads and stores */ - nir_foreach_function(function, shader) - lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only); + nir_foreach_function_impl(impl, shader) + lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only); return true; } @@ -3085,14 +3142,12 @@ lower_64bit_vars(nir_shader *shader, bool doubles_only) bool progress = false; struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); - nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out) - progress |= lower_64bit_vars_loop(shader, var, derefs, deletes, doubles_only); - nir_foreach_function(function, shader) { - nir_foreach_function_temp_variable(var, function->impl) { + nir_foreach_function_impl(impl, shader) { + nir_foreach_function_temp_variable(var, impl) { if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type))) continue; var->type = rewrite_64bit_type(shader, var->type, var, doubles_only); - progress |= lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only); + progress |= lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only); } } ralloc_free(deletes); @@ -3100,97 +3155,51 @@ lower_64bit_vars(nir_shader *shader, bool doubles_only) if (progress) { nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL); nir_lower_phis_to_scalar(shader, false); - optimize_nir(shader, NULL); + optimize_nir(shader, NULL, true); } return progress; } -static bool -split_blocks(nir_shader *nir) -{ - bool progress = false; - bool changed = true; - do { - progress = false; - nir_foreach_shader_out_variable(var, nir) { - const struct glsl_type *base_type = glsl_without_array(var->type); - nir_variable *members[32]; //can't have more than this without breaking NIR - if (!glsl_type_is_struct(base_type)) - continue; - /* TODO: arrays? */ - if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1) - continue; - if (glsl_count_attribute_slots(var->type, false) == 1) - continue; - unsigned offset = 0; - for (unsigned i = 0; i < glsl_get_length(var->type); i++) { - members[i] = nir_variable_clone(var, nir); - members[i]->type = glsl_get_struct_field(var->type, i); - members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i); - members[i]->data.location += offset; - offset += glsl_count_attribute_slots(members[i]->type, false); - nir_shader_add_variable(nir, members[i]); - } - nir_foreach_function(function, nir) { - bool func_progress = false; - if (!function->impl) - continue; - nir_builder b; - nir_builder_init(&b, function->impl); - nir_foreach_block(block, function->impl) { - nir_foreach_instr_safe(instr, block) { - switch (instr->type) { - case nir_instr_type_deref: { - nir_deref_instr *deref = nir_instr_as_deref(instr); - if (!(deref->modes & nir_var_shader_out)) - continue; - if (nir_deref_instr_get_variable(deref) != var) - continue; - if (deref->deref_type != nir_deref_type_struct) - continue; - nir_deref_instr *parent = nir_deref_instr_parent(deref); - if (parent->deref_type != nir_deref_type_var) - continue; - deref->modes = nir_var_shader_temp; - parent->modes = nir_var_shader_temp; - b.cursor = nir_before_instr(instr); - nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa; - nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr); - nir_instr_remove(&deref->instr); - func_progress = true; - break; - } - default: break; - } - } - } - if (func_progress) - nir_metadata_preserve(function->impl, nir_metadata_none); - } - var->data.mode = nir_var_shader_temp; - changed = true; - progress = true; - } - } while (progress); - return changed; -} - static void -zink_shader_dump(void *words, size_t size, const char *file) +zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file) { FILE *fp = fopen(file, "wb"); if (fp) { fwrite(words, 1, size, fp); fclose(fp); - fprintf(stderr, "wrote '%s'...\n", file); + fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file); + } +} + +static VkShaderStageFlagBits +zink_get_next_stage(gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | + VK_SHADER_STAGE_GEOMETRY_BIT | + VK_SHADER_STAGE_FRAGMENT_BIT; + case MESA_SHADER_TESS_CTRL: + return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + case MESA_SHADER_TESS_EVAL: + return VK_SHADER_STAGE_GEOMETRY_BIT | + VK_SHADER_STAGE_FRAGMENT_BIT; + case MESA_SHADER_GEOMETRY: + return VK_SHADER_STAGE_FRAGMENT_BIT; + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: + return 0; + default: + unreachable("invalid shader stage"); } } -VkShaderModule -zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv) +struct zink_shader_object +zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg) { - VkShaderModule mod; VkShaderModuleCreateInfo smci = {0}; + VkShaderCreateInfoEXT sci = {0}; if (!spirv) spirv = zs->spirv; @@ -3199,8 +3208,31 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st char buf[256]; static int i; snprintf(buf, sizeof(buf), "dump%02d.spv", i++); - zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf); + zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf); + } + + sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT; + sci.stage = mesa_to_vk_shader_stage(zs->info.stage); + sci.nextStage = zink_get_next_stage(zs->info.stage); + sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT; + sci.codeSize = spirv->num_words * sizeof(uint32_t); + sci.pCode = spirv->words; + sci.pName = "main"; + VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0}; + if (pg) { + sci.setLayoutCount = pg->num_dsl; + sci.pSetLayouts = pg->dsl; + } else { + sci.setLayoutCount = zs->info.stage + 1; + dsl[zs->info.stage] = zs->precompile.dsl;; + sci.pSetLayouts = dsl; } + VkPushConstantRange pcr; + pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; + pcr.offset = 0; + pcr.size = sizeof(struct zink_gfx_push_constant); + sci.pushConstantRangeCount = 1; + sci.pPushConstantRanges = &pcr; smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; smci.codeSize = spirv->num_words * sizeof(uint32_t); @@ -3246,6 +3278,7 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st .demote_to_helper_invocation = true, .sparse_residency = true, .min_lod = true, + .workgroup_memory_explicit_layout = true, }, .ubo_addr_format = nir_address_format_32bit_index_offset, .ssbo_addr_format = nir_address_format_32bit_index_offset, @@ -3280,21 +3313,26 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st } #endif - VkResult ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod); + VkResult ret; + struct zink_shader_object obj = {0}; + if (!can_shobj || !screen->info.have_EXT_shader_object) + ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod); + else + ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj); bool success = zink_screen_handle_vkresult(screen, ret); assert(success); - return success ? mod : VK_NULL_HANDLE; + return obj; } static void prune_io(nir_shader *nir) { nir_foreach_shader_in_variable_safe(var, nir) { - if (!find_var_deref(nir, var)) + if (!find_var_deref(nir, var) && !find_var_io(nir, var)) var->data.mode = nir_var_shader_temp; } nir_foreach_shader_out_variable_safe(var, nir) { - if (!find_var_deref(nir, var)) + if (!find_var_deref(nir, var) && !find_var_io(nir, var)) var->data.mode = nir_var_shader_temp; } NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); @@ -3309,43 +3347,53 @@ flag_shadow_tex(nir_variable *var, struct zink_shader *zs) zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id); } -static nir_ssa_def * -rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, void *data) +static nir_def * +rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs) { assert(var); const struct glsl_type *type = glsl_without_array(var->type); enum glsl_base_type ret_type = glsl_get_sampler_result_type(type); bool is_int = glsl_base_type_is_integer(ret_type); unsigned bit_size = glsl_base_type_get_bit_size(ret_type); - unsigned dest_size = nir_dest_bit_size(tex->dest); + unsigned dest_size = tex->def.bit_size; b->cursor = nir_after_instr(&tex->instr); - unsigned num_components = nir_dest_num_components(tex->dest); + unsigned num_components = tex->def.num_components; bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse; if (bit_size == dest_size && !rewrite_depth) return NULL; - nir_ssa_def *dest = &tex->dest.ssa; - if (rewrite_depth && data) { - if (b->shader->info.stage == MESA_SHADER_FRAGMENT) - flag_shadow_tex(var, data); - else - mesa_loge("unhandled old-style shadow sampler in non-fragment stage!"); - return NULL; + nir_def *dest = &tex->def; + if (rewrite_depth && zs) { + if (nir_def_components_read(dest) & ~1) { + /* this needs recompiles */ + if (b->shader->info.stage == MESA_SHADER_FRAGMENT) + flag_shadow_tex(var, zs); + else + mesa_loge("unhandled old-style shadow sampler in non-fragment stage!"); + return NULL; + } + /* If only .x is used in the NIR, then it's effectively not a legacy depth + * sample anyway and we don't want to ask for shader recompiles. This is + * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or + * LUMINANCE, so apps just use the first channel. + */ + tex->def.num_components = 1; + tex->is_new_style_shadow = true; } if (bit_size != dest_size) { - tex->dest.ssa.bit_size = bit_size; + tex->def.bit_size = bit_size; tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type); if (is_int) { if (glsl_unsigned_base_type_of(ret_type) == ret_type) - dest = nir_u2uN(b, &tex->dest.ssa, dest_size); + dest = nir_u2uN(b, &tex->def, dest_size); else - dest = nir_i2iN(b, &tex->dest.ssa, dest_size); + dest = nir_i2iN(b, &tex->def, dest_size); } else { - dest = nir_f2fN(b, &tex->dest.ssa, dest_size); + dest = nir_f2fN(b, &tex->def, dest_size); } if (rewrite_depth) return dest; - nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr); + nir_def_rewrite_uses_after(&tex->def, dest, dest->parent_instr); } else if (rewrite_depth) { return dest; } @@ -3393,31 +3441,31 @@ lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data) const struct glsl_type *type = glsl_without_array(var->type); enum glsl_base_type ret_type = glsl_get_sampler_result_type(type); bool is_int = glsl_base_type_is_integer(ret_type); - unsigned num_components = nir_dest_num_components(tex->dest); + unsigned num_components = tex->def.num_components; if (tex->is_shadow) tex->is_new_style_shadow = true; - nir_ssa_def *dest = rewrite_tex_dest(b, tex, var, NULL); + nir_def *dest = rewrite_tex_dest(b, tex, var, NULL); assert(dest || !state->shadow_only); if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id))) return false; else if (!dest) - dest = &tex->dest.ssa; + dest = &tex->def; else - tex->dest.ssa.num_components = 1; + tex->def.num_components = 1; if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) { /* these require manual swizzles */ if (tex->op == nir_texop_tg4) { assert(!tex->is_shadow); - nir_ssa_def *swizzle; + nir_def *swizzle; switch (swizzle_key->swizzle[sampler_id].s[tex->component]) { case PIPE_SWIZZLE_0: - swizzle = nir_imm_zero(b, 4, nir_dest_bit_size(tex->dest)); + swizzle = nir_imm_zero(b, 4, tex->def.bit_size); break; case PIPE_SWIZZLE_1: if (is_int) - swizzle = nir_imm_intN_t(b, 4, nir_dest_bit_size(tex->dest)); + swizzle = nir_imm_intN_t(b, 4, tex->def.bit_size); else - swizzle = nir_imm_floatN_t(b, 4, nir_dest_bit_size(tex->dest)); + swizzle = nir_imm_floatN_t(b, 4, tex->def.bit_size); break; default: if (!tex->component) @@ -3425,101 +3473,373 @@ lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data) tex->component = 0; return true; } - nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr); + nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr); return true; } - nir_ssa_def *vec[4]; + nir_def *vec[4]; for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) { switch (swizzle_key->swizzle[sampler_id].s[i]) { case PIPE_SWIZZLE_0: - vec[i] = nir_imm_zero(b, 1, nir_dest_bit_size(tex->dest)); + vec[i] = nir_imm_zero(b, 1, tex->def.bit_size); break; case PIPE_SWIZZLE_1: if (is_int) - vec[i] = nir_imm_intN_t(b, 1, nir_dest_bit_size(tex->dest)); + vec[i] = nir_imm_intN_t(b, 1, tex->def.bit_size); else - vec[i] = nir_imm_floatN_t(b, 1, nir_dest_bit_size(tex->dest)); + vec[i] = nir_imm_floatN_t(b, 1, tex->def.bit_size); break; default: vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i); break; } } - nir_ssa_def *swizzle = nir_vec(b, vec, num_components); - nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr); + nir_def *swizzle = nir_vec(b, vec, num_components); + nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr); } else { assert(tex->is_shadow); - nir_ssa_def *vec[4] = {dest, dest, dest, dest}; - nir_ssa_def *splat = nir_vec(b, vec, num_components); - nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr); + nir_def *vec[4] = {dest, dest, dest, dest}; + nir_def *splat = nir_vec(b, vec, num_components); + nir_def_rewrite_uses_after(dest, splat, splat->parent_instr); } return true; } +/* Applies in-shader swizzles when necessary for depth/shadow sampling. + * + * SPIRV only has new-style (scalar result) shadow sampling, so to emulate + * !is_new_style_shadow (vec4 result) shadow sampling we lower to a + * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR + * shader to expand out to vec4. Since this depends on sampler state, it's a + * draw-time shader recompile to do so. + * + * We may also need to apply shader swizzles for + * driver_workarounds.needs_zs_shader_swizzle. + */ static bool lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only) { + /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */ unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage; struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle}; return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state); } static bool -invert_point_coord_instr(nir_builder *b, nir_instr *instr, void *data) +invert_point_coord_instr(nir_builder *b, nir_intrinsic_instr *intr, + void *data) { - if (instr->type != nir_instr_type_intrinsic) + if (intr->intrinsic != nir_intrinsic_load_point_coord) return false; - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic != nir_intrinsic_load_deref) + b->cursor = nir_after_instr(&intr->instr); + nir_def *def = nir_vec2(b, nir_channel(b, &intr->def, 0), + nir_fsub_imm(b, 1.0, nir_channel(b, &intr->def, 1))); + nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr); + return true; +} + +static bool +invert_point_coord(nir_shader *nir) +{ + if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD)) return false; - nir_variable *deref_var = nir_intrinsic_get_var(intr, 0); - if (deref_var->data.location != VARYING_SLOT_PNTC) + return nir_shader_intrinsics_pass(nir, invert_point_coord_instr, + nir_metadata_dominance, NULL); +} + +static bool +is_residency_code(nir_def *src) +{ + nir_instr *parent = src->parent_instr; + while (1) { + if (parent->type == nir_instr_type_intrinsic) { + ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent); + assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident); + return false; + } + if (parent->type == nir_instr_type_tex) + return true; + assert(parent->type == nir_instr_type_alu); + nir_alu_instr *alu = nir_instr_as_alu(parent); + parent = alu->src[0].src.ssa->parent_instr; + } +} + +static bool +lower_sparse_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data) +{ + if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) { + b->cursor = nir_before_instr(&instr->instr); + nir_def *src0; + if (is_residency_code(instr->src[0].ssa)) + src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa); + else + src0 = instr->src[0].ssa; + nir_def *src1; + if (is_residency_code(instr->src[1].ssa)) + src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa); + else + src1 = instr->src[1].ssa; + nir_def *def = nir_iand(b, src0, src1); + nir_def_rewrite_uses_after(&instr->def, def, &instr->instr); + nir_instr_remove(&instr->instr); + return true; + } + if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident) return false; - b->cursor = nir_after_instr(instr); - nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0), - nir_fsub(b, nir_imm_float(b, 1.0), nir_channel(b, &intr->dest.ssa, 1))); - nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr); + + /* vulkan vec can only be a vec4, but this is (maybe) vec5, + * so just rewrite as the first component since ntv is going to use a different + * method for storing the residency value anyway + */ + b->cursor = nir_before_instr(&instr->instr); + nir_instr *parent = instr->src[0].ssa->parent_instr; + if (is_residency_code(instr->src[0].ssa)) { + assert(parent->type == nir_instr_type_alu); + nir_alu_instr *alu = nir_instr_as_alu(parent); + nir_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent); + nir_instr_remove(parent); + } else { + nir_def *src; + if (parent->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent); + assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident); + src = intr->src[0].ssa; + } else { + assert(parent->type == nir_instr_type_alu); + nir_alu_instr *alu = nir_instr_as_alu(parent); + src = alu->src[0].src.ssa; + } + if (instr->def.bit_size != 32) { + if (instr->def.bit_size == 1) + src = nir_ieq_imm(b, src, 1); + else + src = nir_u2uN(b, src, instr->def.bit_size); + } + nir_def_rewrite_uses(&instr->def, src); + nir_instr_remove(&instr->instr); + } return true; } static bool -invert_point_coord(nir_shader *nir) +lower_sparse(nir_shader *shader) +{ + return nir_shader_intrinsics_pass(shader, lower_sparse_instr, + nir_metadata_dominance, NULL); +} + +static bool +add_derefs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data) { - if (!(nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC))) + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) return false; - return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL); + unsigned loc = nir_intrinsic_io_semantics(intr).location; + nir_src *src_offset = nir_get_io_offset_src(intr); + const unsigned slot_offset = src_offset && nir_src_is_const(*src_offset) ? nir_src_as_uint(*src_offset) : 0; + unsigned location = loc + slot_offset; + unsigned frac = nir_intrinsic_component(intr); + unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]); + /* set c aligned/rounded down to dword */ + unsigned c = frac; + if (frac && bit_size < 32) + c = frac * bit_size / 32; + /* loop over all the variables and rewrite corresponding access */ + nir_foreach_variable_with_modes(var, b->shader, is_input ? nir_var_shader_in : nir_var_shader_out) { + const struct glsl_type *type = var->type; + if (nir_is_arrayed_io(var, b->shader->info.stage)) + type = glsl_get_array_element(type); + unsigned slot_count = get_var_slot_count(b->shader, var); + /* filter access that isn't specific to this variable */ + if (var->data.location > location || var->data.location + slot_count <= location) + continue; + if (var->data.fb_fetch_output != nir_intrinsic_io_semantics(intr).fb_fetch_output) + continue; + if (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_load && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index) + continue; + + unsigned size = 0; + bool is_struct = glsl_type_is_struct(glsl_without_array(type)); + if (is_struct) + size = get_slot_components(var, var->data.location + slot_offset, var->data.location); + else if ((var->data.mode == nir_var_shader_out && var->data.location < VARYING_SLOT_VAR0) || + (var->data.mode == nir_var_shader_in && var->data.location < (b->shader->info.stage == MESA_SHADER_VERTEX ? VERT_ATTRIB_GENERIC0 : VARYING_SLOT_VAR0))) + size = glsl_type_is_array(type) ? glsl_get_aoa_size(type) : glsl_get_vector_elements(type); + else + size = glsl_get_vector_elements(glsl_without_array(type)); + assert(size); + if (glsl_type_is_64bit(glsl_without_array(var->type))) + size *= 2; + if (var->data.location != location && size > 4 && size % 4 && !is_struct) { + /* adjust for dvec3-type slot overflow */ + assert(location > var->data.location); + size -= (location - var->data.location) * 4; + } + assert(size); + if (var->data.location_frac + size <= c || var->data.location_frac > c) + continue; + + b->cursor = nir_before_instr(&intr->instr); + nir_deref_instr *deref = nir_build_deref_var(b, var); + if (nir_is_arrayed_io(var, b->shader->info.stage)) { + assert(intr->intrinsic != nir_intrinsic_store_output); + deref = nir_build_deref_array(b, deref, intr->src[!is_load].ssa); + } + if (glsl_type_is_array(type)) { + /* unroll array derefs */ + unsigned idx = frac - var->data.location_frac; + assert(src_offset); + if (var->data.location < VARYING_SLOT_VAR0) { + if (src_offset) { + /* clip/cull dist and tess levels use different array offset semantics */ + bool is_clipdist = (b->shader->info.stage != MESA_SHADER_VERTEX || var->data.mode == nir_var_shader_out) && + var->data.location >= VARYING_SLOT_CLIP_DIST0 && var->data.location <= VARYING_SLOT_CULL_DIST1; + bool is_tess_level = b->shader->info.stage == MESA_SHADER_TESS_CTRL && + var->data.location >= VARYING_SLOT_TESS_LEVEL_INNER && var->data.location >= VARYING_SLOT_TESS_LEVEL_OUTER; + bool is_builtin_array = is_clipdist || is_tess_level; + /* this is explicit for ease of debugging but could be collapsed at some point in the future*/ + if (nir_src_is_const(*src_offset)) { + unsigned offset = slot_offset; + if (is_builtin_array) + offset *= 4; + deref = nir_build_deref_array_imm(b, deref, offset + idx); + } else { + nir_def *offset = src_offset->ssa; + if (is_builtin_array) + nir_imul_imm(b, offset, 4); + deref = nir_build_deref_array(b, deref, idx ? nir_iadd_imm(b, offset, idx) : src_offset->ssa); + } + } else { + deref = nir_build_deref_array_imm(b, deref, idx); + } + type = glsl_get_array_element(type); + } else { + /* need to convert possible N*M to [N][M] */ + nir_def *nm = idx ? nir_iadd_imm(b, src_offset->ssa, idx) : src_offset->ssa; + while (glsl_type_is_array(type)) { + const struct glsl_type *elem = glsl_get_array_element(type); + unsigned type_size = glsl_count_vec4_slots(elem, false, false); + nir_def *n = glsl_type_is_array(elem) ? nir_udiv_imm(b, nm, type_size) : nm; + if (glsl_type_is_vector_or_scalar(elem) && glsl_type_is_64bit(elem) && glsl_get_vector_elements(elem) > 2) + n = nir_udiv_imm(b, n, 2); + deref = nir_build_deref_array(b, deref, n); + nm = nir_umod_imm(b, nm, type_size); + type = glsl_get_array_element(type); + } + } + } else if (glsl_type_is_struct(type)) { + deref = nir_build_deref_struct(b, deref, slot_offset); + } + if (is_load) { + nir_def *load; + if (is_interp) { + nir_def *interp = intr->src[0].ssa; + nir_intrinsic_instr *interp_intr = nir_instr_as_intrinsic(interp->parent_instr); + assert(interp_intr); + var->data.interpolation = nir_intrinsic_interp_mode(interp_intr); + switch (interp_intr->intrinsic) { + case nir_intrinsic_load_barycentric_centroid: + load = nir_interp_deref_at_centroid(b, intr->num_components, bit_size, &deref->def); + break; + case nir_intrinsic_load_barycentric_sample: + var->data.sample = 1; + load = nir_load_deref(b, deref); + break; + case nir_intrinsic_load_barycentric_pixel: + load = nir_load_deref(b, deref); + break; + case nir_intrinsic_load_barycentric_at_sample: + load = nir_interp_deref_at_sample(b, intr->num_components, bit_size, &deref->def, interp_intr->src[0].ssa); + break; + case nir_intrinsic_load_barycentric_at_offset: + load = nir_interp_deref_at_offset(b, intr->num_components, bit_size, &deref->def, interp_intr->src[0].ssa); + break; + default: + unreachable("unhandled interp!"); + } + } else { + load = nir_load_deref(b, deref); + } + /* filter needed components */ + if (intr->num_components < load->num_components) + load = nir_channels(b, load, BITFIELD_MASK(intr->num_components) << (c - var->data.location_frac)); + nir_def_rewrite_uses(&intr->def, load); + } else { + nir_def *store = intr->src[0].ssa; + assert(!glsl_type_is_array(type)); + unsigned num_components = glsl_get_vector_elements(type); + /* pad/filter components to match deref type */ + if (intr->num_components < num_components) { + nir_def *zero = nir_imm_zero(b, 1, bit_size); + nir_def *vec[4] = {zero, zero, zero, zero}; + u_foreach_bit(i, nir_intrinsic_write_mask(intr)) + vec[c - var->data.location_frac + i] = nir_channel(b, store, i); + store = nir_vec(b, vec, num_components); + } if (store->num_components > num_components) { + store = nir_channels(b, store, nir_intrinsic_write_mask(intr)); + } + if (store->bit_size != glsl_get_bit_size(type)) { + /* this should be some weird bindless io conversion */ + assert(store->bit_size == 64 && glsl_get_bit_size(type) == 32); + assert(num_components != store->num_components); + store = nir_unpack_64_2x32(b, store); + } + nir_store_deref(b, deref, store, BITFIELD_RANGE(c - var->data.location_frac, intr->num_components)); + } + nir_instr_remove(&intr->instr); + return true; + } + unreachable("failed to find variable for explicit io!"); + return true; +} + +static bool +add_derefs(nir_shader *nir) +{ + return nir_shader_intrinsics_pass(nir, add_derefs_instr, + nir_metadata_dominance, NULL); } -static VkShaderModule -compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir) +static struct zink_shader_object +compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg) { - VkShaderModule mod = VK_NULL_HANDLE; struct zink_shader_info *sinfo = &zs->sinfo; prune_io(nir); NIR_PASS_V(nir, nir_convert_from_ssa, true); + if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV)) + nir_index_ssa_defs(nir_shader_get_entrypoint(nir)); + if (zink_debug & ZINK_DEBUG_NIR) { + fprintf(stderr, "NIR shader:\n---8<---\n"); + nir_print_shader(nir, stderr); + fprintf(stderr, "---8<---\n"); + } + + struct zink_shader_object obj; struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version); if (spirv) - mod = zink_shader_spirv_compile(screen, zs, spirv); + obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg); /* TODO: determine if there's any reason to cache spirv output? */ if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) zs->spirv = spirv; else - ralloc_free(spirv); - return mod; + obj.spirv = spirv; + return obj; } -VkShaderModule -zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, - nir_shader *nir, const struct zink_shader_key *key, const void *extra_data) +struct zink_shader_object +zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs, + nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg) { - VkShaderModule mod = VK_NULL_HANDLE; - struct zink_shader_info *sinfo = &zs->sinfo; - bool need_optimize = false; + bool need_optimize = true; bool inlined_uniforms = false; + NIR_PASS_V(nir, add_derefs); + NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8); if (key) { if (key->inline_uniforms) { NIR_PASS_V(nir, nir_inline_uniforms, @@ -3591,15 +3911,14 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, case MESA_SHADER_TESS_EVAL: case MESA_SHADER_GEOMETRY: if (zink_vs_key_base(key)->last_vertex_stage) { - if (zs->sinfo.have_xfb) - sinfo->last_vertex = true; - if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) { NIR_PASS_V(nir, nir_lower_clip_halfz); } if (zink_vs_key_base(key)->push_drawid) { NIR_PASS_V(nir, lower_drawid); } + } else { + nir->xfb_info = NULL; } if (zink_vs_key_base(key)->robust_access) NIR_PASS(need_optimize, nir, lower_txf_lod_robustness); @@ -3639,7 +3958,7 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, NIR_PASS_V(nir, lower_dual_blend); } if (zink_fs_key_base(key)->coord_replace_bits) - NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, false, false); + NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false); if (zink_fs_key_base(key)->point_coord_yinvert) NIR_PASS_V(nir, invert_point_coord); if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) { @@ -3685,13 +4004,13 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, } } if (screen->driconf.inline_uniforms) { - NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared); + NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL); NIR_PASS_V(nir, rewrite_bo_access, screen); NIR_PASS_V(nir, remove_bo_access, zs); need_optimize = true; } if (inlined_uniforms) { - optimize_nir(nir, zs); + optimize_nir(nir, zs, true); /* This must be done again. */ NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | @@ -3701,18 +4020,22 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT) zs->can_inline = false; } else if (need_optimize) - optimize_nir(nir, zs); + optimize_nir(nir, zs, true); + NIR_PASS_V(nir, lower_sparse); - mod = compile_module(screen, zs, nir); + struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg); ralloc_free(nir); - return mod; + return obj; } -VkShaderModule +struct zink_shader_object zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs) { nir_shader *nir = zink_shader_deserialize(screen, zs); - int set = nir->info.stage == MESA_SHADER_FRAGMENT; + /* TODO: maybe compile multiple variants for different set counts for compact mode? */ + int set = zs->info.stage == MESA_SHADER_FRAGMENT; + if (screen->info.have_EXT_shader_object) + set = zs->info.stage; unsigned offsets[4]; zink_descriptor_shader_get_binding_offsets(zs, offsets); nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) { @@ -3736,23 +4059,45 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs) default: break; } } - optimize_nir(nir, zs); - VkShaderModule mod = compile_module(screen, zs, nir); + NIR_PASS_V(nir, add_derefs); + NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8); + if (screen->driconf.inline_uniforms) { + NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL); + NIR_PASS_V(nir, rewrite_bo_access, screen); + NIR_PASS_V(nir, remove_bo_access, zs); + } + optimize_nir(nir, zs, true); + zink_descriptor_shader_init(screen, zs); + nir_shader *nir_clone = NULL; + if (screen->info.have_EXT_shader_object) + nir_clone = nir_shader_clone(nir, nir); + struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL); + if (screen->info.have_EXT_shader_object && !zs->info.internal) { + /* always try to pre-generate a tcs in case it's needed */ + if (zs->info.stage == MESA_SHADER_TESS_EVAL) { + nir_shader *nir_tcs = NULL; + /* use max pcp for compat */ + zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, nir_clone, 32, &nir_tcs); + nir_tcs->info.separate_shader = true; + zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs); + ralloc_free(nir_tcs); + } + } ralloc_free(nir); - return mod; + spirv_shader_delete(obj.spirv); + obj.spirv = NULL; + return obj; } static bool -lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data) +lower_baseinstance_instr(nir_builder *b, nir_intrinsic_instr *intr, + void *data) { - if (instr->type != nir_instr_type_intrinsic) - return false; - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); if (intr->intrinsic != nir_intrinsic_load_instance_id) return false; - b->cursor = nir_after_instr(instr); - nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b)); - nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr); + b->cursor = nir_after_instr(&intr->instr); + nir_def *def = nir_isub(b, &intr->def, nir_load_base_instance(b)); + nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr); return true; } @@ -3761,7 +4106,8 @@ lower_baseinstance(nir_shader *shader) { if (shader->info.stage != MESA_SHADER_VERTEX) return false; - return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL); + return nir_shader_intrinsics_pass(shader, lower_baseinstance_instr, + nir_metadata_dominance, NULL); } /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access @@ -3813,7 +4159,7 @@ unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size) } nir_fixup_deref_modes(shader); NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL); - optimize_nir(shader, NULL); + optimize_nir(shader, NULL, true); struct glsl_struct_field field = {0}; field.name = ralloc_strdup(shader, "base"); @@ -3915,20 +4261,8 @@ analyze_io(struct zink_shader *zs, nir_shader *shader) ret = true; break; } - case nir_intrinsic_ssbo_atomic_fadd: - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: - case nir_intrinsic_ssbo_atomic_fmin: - case nir_intrinsic_ssbo_atomic_fmax: - case nir_intrinsic_ssbo_atomic_fcomp_swap: + case nir_intrinsic_ssbo_atomic: + case nir_intrinsic_ssbo_atomic_swap: case nir_intrinsic_load_ssbo: zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]); break; @@ -3991,13 +4325,18 @@ lower_bindless_instr(nir_builder *b, nir_instr *in, void *data) return false; nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0]; - if (!var) + if (!var) { var = create_bindless_texture(b->shader, tex, bindless->bindless_set); + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) + bindless->bindless[1] = var; + else + bindless->bindless[0] = var; + } b->cursor = nir_before_instr(in); nir_deref_instr *deref = nir_build_deref_var(b, var); if (glsl_type_is_array(var->type)) deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32)); - nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa); + nir_src_rewrite(&tex->src[idx].src, &deref->def); /* bindless sampling uses the variable type directly, which means the tex instr has to exactly * match up with it in contrast to normal sampler ops where things are a bit more flexible; @@ -4011,8 +4350,8 @@ lower_bindless_instr(nir_builder *b, nir_instr *in, void *data) unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord); unsigned coord_components = nir_src_num_components(tex->src[c].src); if (coord_components < needed_components) { - nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components); - nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def); + nir_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components); + nir_src_rewrite(&tex->src[c].src, def); tex->coord_components = needed_components; } return true; @@ -4030,21 +4369,8 @@ lower_bindless_instr(nir_builder *b, nir_instr *in, void *data) /* convert bindless intrinsics to deref intrinsics */ switch (instr->intrinsic) { - OP_SWAP(atomic_add) - OP_SWAP(atomic_and) - OP_SWAP(atomic_comp_swap) - OP_SWAP(atomic_dec_wrap) - OP_SWAP(atomic_exchange) - OP_SWAP(atomic_fadd) - OP_SWAP(atomic_fmax) - OP_SWAP(atomic_fmin) - OP_SWAP(atomic_imax) - OP_SWAP(atomic_imin) - OP_SWAP(atomic_inc_wrap) - OP_SWAP(atomic_or) - OP_SWAP(atomic_umax) - OP_SWAP(atomic_umin) - OP_SWAP(atomic_xor) + OP_SWAP(atomic) + OP_SWAP(atomic_swap) OP_SWAP(format) OP_SWAP(load) OP_SWAP(order) @@ -4064,7 +4390,7 @@ lower_bindless_instr(nir_builder *b, nir_instr *in, void *data) nir_deref_instr *deref = nir_build_deref_var(b, var); if (glsl_type_is_array(var->type)) deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32)); - nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa); + nir_src_rewrite(&instr->src[0], &deref->def); return true; } @@ -4075,23 +4401,22 @@ lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless) return false; nir_fixup_deref_modes(shader); NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL); - optimize_nir(shader, NULL); + optimize_nir(shader, NULL, true); return true; } /* convert shader image/texture io variables to int64 handles for bindless indexing */ static bool -lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data) +lower_bindless_io_instr(nir_builder *b, nir_intrinsic_instr *instr, + void *data) { - if (in->type != nir_instr_type_intrinsic) - return false; - nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); - if (instr->intrinsic != nir_intrinsic_load_deref && - instr->intrinsic != nir_intrinsic_store_deref) + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(instr, &is_load, &is_input, &is_interp)) return false; - nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]); - nir_variable *var = nir_deref_instr_get_variable(src_deref); + nir_variable *var = find_var_with_location_frac(b->shader, nir_intrinsic_io_semantics(instr).location, nir_intrinsic_component(instr), false, is_input ? nir_var_shader_in : nir_var_shader_out); if (var->data.bindless) return false; if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out) @@ -4099,26 +4424,16 @@ lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data) if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type)) return false; - var->type = glsl_int64_t_type(); + var->type = glsl_vector_type(GLSL_TYPE_INT, 2); var->data.bindless = 1; - b->cursor = nir_before_instr(in); - nir_deref_instr *deref = nir_build_deref_var(b, var); - if (instr->intrinsic == nir_intrinsic_load_deref) { - nir_ssa_def *def = nir_load_deref(b, deref); - nir_instr_rewrite_src_ssa(in, &instr->src[0], def); - nir_ssa_def_rewrite_uses(&instr->dest.ssa, def); - } else { - nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr)); - } - nir_instr_remove(in); - nir_instr_remove(&src_deref->instr); return true; } static bool lower_bindless_io(nir_shader *shader) { - return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL); + return nir_shader_intrinsics_pass(shader, lower_bindless_io_instr, + nir_metadata_dominance, NULL); } static uint32_t @@ -4246,24 +4561,24 @@ convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data) continue; if (tex->src[c].src.ssa->num_components == tex->coord_components) continue; - nir_ssa_def *def; - nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size); + nir_def *def; + nir_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size); if (tex->src[c].src.ssa->num_components == 1) def = nir_vec2(b, tex->src[c].src.ssa, zero); else def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1)); - nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def); + nir_src_rewrite(&tex->src[c].src, def); } b->cursor = nir_after_instr(instr); unsigned needed_components = nir_tex_instr_dest_size(tex); - unsigned num_components = tex->dest.ssa.num_components; + unsigned num_components = tex->def.num_components; if (needed_components > num_components) { - tex->dest.ssa.num_components = needed_components; + tex->def.num_components = needed_components; assert(num_components < 3); /* take either xz or just x since this is promoted to 2D from 1D */ uint32_t mask = num_components == 2 ? (1|4) : 1; - nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask); - nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr); + nir_def *dst = nir_channels(b, &tex->def, mask); + nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr); } return true; } @@ -4290,10 +4605,8 @@ lower_1d_shadow(nir_shader *shader, struct zink_screen *screen) static void scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs) { - nir_foreach_function(function, shader) { - if (!function->impl) - continue; - nir_foreach_block_safe(block, function->impl) { + nir_foreach_function_impl(impl, shader) { + nir_foreach_block_safe(block, impl) { nir_foreach_instr_safe(instr, block) { if (instr->type == nir_instr_type_tex) { nir_tex_instr *tex = nir_instr_as_tex(instr); @@ -4305,24 +4618,14 @@ scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs) if (intr->intrinsic == nir_intrinsic_image_deref_load || intr->intrinsic == nir_intrinsic_image_deref_sparse_load || intr->intrinsic == nir_intrinsic_image_deref_store || - intr->intrinsic == nir_intrinsic_image_deref_atomic_add || - intr->intrinsic == nir_intrinsic_image_deref_atomic_imin || - intr->intrinsic == nir_intrinsic_image_deref_atomic_umin || - intr->intrinsic == nir_intrinsic_image_deref_atomic_imax || - intr->intrinsic == nir_intrinsic_image_deref_atomic_umax || - intr->intrinsic == nir_intrinsic_image_deref_atomic_and || - intr->intrinsic == nir_intrinsic_image_deref_atomic_or || - intr->intrinsic == nir_intrinsic_image_deref_atomic_xor || - intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange || - intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap || - intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd || + intr->intrinsic == nir_intrinsic_image_deref_atomic || + intr->intrinsic == nir_intrinsic_image_deref_atomic_swap || intr->intrinsic == nir_intrinsic_image_deref_size || intr->intrinsic == nir_intrinsic_image_deref_samples || intr->intrinsic == nir_intrinsic_image_deref_format || intr->intrinsic == nir_intrinsic_image_deref_order) { - nir_variable *var = - nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); + nir_variable *var = nir_intrinsic_get_var(intr, 0); /* Structs have been lowered already, so get_aoa_size is sufficient. */ const unsigned size = @@ -4337,9 +4640,10 @@ scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs) static bool warned = false; if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) { switch (intr->intrinsic) { - case nir_intrinsic_image_deref_atomic_add: { + case nir_intrinsic_image_deref_atomic: { nir_variable *var = nir_intrinsic_get_var(intr, 0); - if (util_format_is_float(var->data.image.format)) + if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd && + util_format_is_float(var->data.image.format)) fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n"); break; } @@ -4353,90 +4657,6 @@ scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs) } static bool -is_residency_code(nir_ssa_def *src) -{ - nir_instr *parent = src->parent_instr; - while (1) { - if (parent->type == nir_instr_type_intrinsic) { - ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent); - assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident); - return false; - } - if (parent->type == nir_instr_type_tex) - return true; - assert(parent->type == nir_instr_type_alu); - nir_alu_instr *alu = nir_instr_as_alu(parent); - parent = alu->src[0].src.ssa->parent_instr; - } -} - -static bool -lower_sparse_instr(nir_builder *b, nir_instr *in, void *data) -{ - if (in->type != nir_instr_type_intrinsic) - return false; - nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); - if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) { - b->cursor = nir_before_instr(&instr->instr); - nir_ssa_def *src0; - if (is_residency_code(instr->src[0].ssa)) - src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa); - else - src0 = instr->src[0].ssa; - nir_ssa_def *src1; - if (is_residency_code(instr->src[1].ssa)) - src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa); - else - src1 = instr->src[1].ssa; - nir_ssa_def *def = nir_iand(b, src0, src1); - nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in); - nir_instr_remove(in); - return true; - } - if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident) - return false; - - /* vulkan vec can only be a vec4, but this is (maybe) vec5, - * so just rewrite as the first component since ntv is going to use a different - * method for storing the residency value anyway - */ - b->cursor = nir_before_instr(&instr->instr); - nir_instr *parent = instr->src[0].ssa->parent_instr; - if (is_residency_code(instr->src[0].ssa)) { - assert(parent->type == nir_instr_type_alu); - nir_alu_instr *alu = nir_instr_as_alu(parent); - nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent); - nir_instr_remove(parent); - } else { - nir_ssa_def *src; - if (parent->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent); - assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident); - src = intr->src[0].ssa; - } else { - assert(parent->type == nir_instr_type_alu); - nir_alu_instr *alu = nir_instr_as_alu(parent); - src = alu->src[0].src.ssa; - } - if (instr->dest.ssa.bit_size != 32) { - if (instr->dest.ssa.bit_size == 1) - src = nir_ieq_imm(b, src, 1); - else - src = nir_u2uN(b, src, instr->dest.ssa.bit_size); - } - nir_ssa_def_rewrite_uses(&instr->dest.ssa, src); - nir_instr_remove(in); - } - return true; -} - -static bool -lower_sparse(nir_shader *shader) -{ - return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL); -} - -static bool match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data) { if (in->type != nir_instr_type_tex) @@ -4483,11 +4703,11 @@ split_bitfields_instr(nir_builder *b, nir_instr *in, void *data) default: return false; } - unsigned num_components = nir_dest_num_components(alu->dest.dest); + unsigned num_components = alu->def.num_components; if (num_components == 1) return false; b->cursor = nir_before_instr(in); - nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS]; + nir_def *dests[NIR_MAX_VEC_COMPONENTS]; for (unsigned i = 0; i < num_components; i++) { if (alu->op == nir_op_bitfield_insert) dests[i] = nir_bitfield_insert(b, @@ -4506,8 +4726,8 @@ split_bitfields_instr(nir_builder *b, nir_instr *in, void *data) nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]), nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i])); } - nir_ssa_def *dest = nir_vec(b, dests, num_components); - nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in); + nir_def *dest = nir_vec(b, dests, num_components); + nir_def_rewrite_uses_after(&alu->def, dest, in); nir_instr_remove(in); return true; } @@ -4522,8 +4742,8 @@ split_bitfields(nir_shader *shader) static void rewrite_cl_derefs(nir_shader *nir, nir_variable *var) { - nir_foreach_function(function, nir) { - nir_foreach_block(block, function->impl) { + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { nir_foreach_instr_safe(instr, block) { if (instr->type != nir_instr_type_deref) continue; @@ -4547,8 +4767,8 @@ rewrite_cl_derefs(nir_shader *nir, nir_variable *var) static void type_image(nir_shader *nir, nir_variable *var) { - nir_foreach_function(function, nir) { - nir_foreach_block(block, function->impl) { + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { nir_foreach_instr_safe(instr, block) { if (instr->type != nir_instr_type_intrinsic) continue; @@ -4556,17 +4776,8 @@ type_image(nir_shader *nir, nir_variable *var) if (intr->intrinsic == nir_intrinsic_image_deref_load || intr->intrinsic == nir_intrinsic_image_deref_sparse_load || intr->intrinsic == nir_intrinsic_image_deref_store || - intr->intrinsic == nir_intrinsic_image_deref_atomic_add || - intr->intrinsic == nir_intrinsic_image_deref_atomic_imin || - intr->intrinsic == nir_intrinsic_image_deref_atomic_umin || - intr->intrinsic == nir_intrinsic_image_deref_atomic_imax || - intr->intrinsic == nir_intrinsic_image_deref_atomic_umax || - intr->intrinsic == nir_intrinsic_image_deref_atomic_and || - intr->intrinsic == nir_intrinsic_image_deref_atomic_or || - intr->intrinsic == nir_intrinsic_image_deref_atomic_xor || - intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange || - intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap || - intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd || + intr->intrinsic == nir_intrinsic_image_deref_atomic || + intr->intrinsic == nir_intrinsic_image_deref_atomic_swap || intr->intrinsic == nir_intrinsic_image_deref_samples || intr->intrinsic == nir_intrinsic_image_deref_format || intr->intrinsic == nir_intrinsic_image_deref_order) { @@ -4590,8 +4801,8 @@ type_image(nir_shader *nir, nir_variable *var) } } } - nir_foreach_function(function, nir) { - nir_foreach_block(block, function->impl) { + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { nir_foreach_instr_safe(instr, block) { if (instr->type != nir_instr_type_intrinsic) continue; @@ -4619,72 +4830,22 @@ type_image(nir_shader *nir, nir_variable *var) var->data.mode = nir_var_shader_temp; } -static nir_variable * -find_sampler_var(nir_shader *nir, unsigned texture_index) -{ - nir_foreach_variable_with_modes(var, nir, nir_var_uniform) { - unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1; - if ((glsl_type_is_texture(glsl_without_array(var->type)) || glsl_type_is_sampler(glsl_without_array(var->type))) && - (var->data.binding == texture_index || (var->data.binding < texture_index && var->data.binding + size > texture_index))) - return var; - } - return NULL; -} - static bool type_sampler_vars(nir_shader *nir, unsigned *sampler_mask) { bool progress = false; - nir_foreach_function(function, nir) { - nir_foreach_block(block, function->impl) { + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { nir_foreach_instr(instr, block) { if (instr->type != nir_instr_type_tex) continue; nir_tex_instr *tex = nir_instr_as_tex(instr); - switch (tex->op) { - case nir_texop_lod: - case nir_texop_txs: - case nir_texop_query_levels: - case nir_texop_texture_samples: - case nir_texop_samples_identical: - continue; - default: - break; - } - *sampler_mask |= BITFIELD_BIT(tex->sampler_index); - nir_variable *var = find_sampler_var(nir, tex->texture_index); + if (nir_tex_instr_need_sampler(tex)) + *sampler_mask |= BITFIELD_BIT(tex->sampler_index); + nir_variable *var = nir_find_sampler_variable_with_tex_index(nir, tex->texture_index); assert(var); - if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID) - continue; - const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type)); - unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1; - if (size > 1) - img_type = glsl_array_type(img_type, size, 0); - var->type = img_type; - progress = true; - } - } - } - nir_foreach_function(function, nir) { - nir_foreach_block(block, function->impl) { - nir_foreach_instr(instr, block) { - if (instr->type != nir_instr_type_tex) - continue; - nir_tex_instr *tex = nir_instr_as_tex(instr); - switch (tex->op) { - case nir_texop_lod: - case nir_texop_txs: - case nir_texop_query_levels: - case nir_texop_texture_samples: - case nir_texop_samples_identical: - break; - default: - continue; - } - *sampler_mask |= BITFIELD_BIT(tex->sampler_index); - nir_variable *var = find_sampler_var(nir, tex->texture_index); - assert(var); - if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID) + if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID && + nir_tex_instr_is_query(tex)) continue; const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type)); unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1; @@ -4728,31 +4889,71 @@ type_images(nir_shader *nir, unsigned *sampler_mask) static bool fixup_io_locations(nir_shader *nir) { - nir_variable_mode mode = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out; - /* i/o interface blocks are required to be EXACT matches between stages: - * iterate over all locations and set locations incrementally - */ - unsigned slot = 0; - for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) { - if (nir_slot_is_sysval_output(i)) - continue; - nir_variable *var = nir_find_variable_with_location(nir, mode, i); - if (!var) { - /* locations used between stages are not required to be contiguous */ - if (i >= VARYING_SLOT_VAR0) - slot++; - continue; + nir_variable_mode modes; + if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX) + modes = nir_var_shader_in | nir_var_shader_out; + else + modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out; + u_foreach_bit(mode, modes) { + nir_variable_mode m = BITFIELD_BIT(mode); + if ((m == nir_var_shader_in && ((nir->info.inputs_read & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == nir->info.inputs_read)) || + (m == nir_var_shader_out && ((nir->info.outputs_written | nir->info.outputs_read) & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == (nir->info.outputs_written | nir->info.outputs_read))) { + /* this is a special heuristic to catch ARB/fixedfunc shaders which have different rules: + * - i/o interface blocks don't need to match + * - any location can be present or not + * - it just has to work + * + * VAR0 is the only user varying that mesa can produce in this case, so overwrite POS + * since it's a builtin and yolo it with all the other legacy crap + */ + nir_foreach_variable_with_modes(var, nir, m) { + if (nir_slot_is_sysval_output(var->data.location, MESA_SHADER_NONE)) + continue; + if (var->data.location == VARYING_SLOT_VAR0) + var->data.driver_location = 0; + else if (var->data.patch) + var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0; + else + var->data.driver_location = var->data.location; + } + return true; + } + /* i/o interface blocks are required to be EXACT matches between stages: + * iterate over all locations and set locations incrementally + */ + unsigned slot = 0; + for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) { + if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE)) + continue; + bool found = false; + unsigned size = 0; + nir_foreach_variable_with_modes(var, nir, m) { + if (var->data.location != i) + continue; + /* only add slots for non-component vars or first-time component vars */ + if (!var->data.location_frac || !size) { + /* ensure variable is given enough slots */ + if (nir_is_arrayed_io(var, nir->info.stage)) + size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false); + else + size += glsl_count_vec4_slots(var->type, false, false); + } + if (var->data.patch) + var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0; + else + var->data.driver_location = slot; + found = true; + } + slot += size; + if (found) { + /* ensure the consumed slots aren't double iterated */ + i += size - 1; + } else { + /* locations used between stages are not required to be contiguous */ + if (i >= VARYING_SLOT_VAR0) + slot++; + } } - unsigned size; - /* ensure variable is given enough slots */ - if (nir_is_arrayed_io(var, nir->info.stage)) - size = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false); - else - size = glsl_count_vec4_slots(var->type, false, false); - var->data.driver_location = slot; - slot += size; - /* ensure the consumed slots aren't double iterated */ - i += size - 1; } return true; } @@ -4769,9 +4970,356 @@ zink_flat_flags(struct nir_shader *shader) return flat_flags; } +static nir_variable * +find_io_var_with_semantics(nir_shader *nir, nir_variable_mode mode, nir_variable_mode realmode, nir_io_semantics s, unsigned location, unsigned c, bool is_load) +{ + nir_foreach_variable_with_modes(var, nir, mode) { + const struct glsl_type *type = var->type; + nir_variable_mode m = var->data.mode; + var->data.mode = realmode; + if (nir_is_arrayed_io(var, nir->info.stage)) + type = glsl_get_array_element(type); + var->data.mode = m; + if (var->data.fb_fetch_output != s.fb_fetch_output) + continue; + if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && s.dual_source_blend_index != var->data.index) + continue; + unsigned num_slots = var->data.compact ? DIV_ROUND_UP(glsl_array_size(type), 4) : glsl_count_attribute_slots(type, false); + if (var->data.location > location || var->data.location + num_slots <= location) + continue; + unsigned num_components = glsl_get_vector_elements(glsl_without_array(type)); + if (glsl_type_contains_64bit(type)) { + num_components *= 2; + if (location > var->data.location) { + unsigned sub_components = (location - var->data.location) * 4; + if (sub_components > num_components) + continue; + num_components -= sub_components; + } + } + if (var->data.location_frac > c || var->data.location_frac + num_components <= c) + continue; + return var; + } + return NULL; +} + +static void +rework_io_vars(nir_shader *nir, nir_variable_mode mode) +{ + assert(mode == nir_var_shader_out || mode == nir_var_shader_in); + assert(util_bitcount(mode) == 1); + bool found = false; + /* store old vars */ + nir_foreach_variable_with_modes(var, nir, mode) { + if (nir->info.stage == MESA_SHADER_TESS_CTRL && mode == nir_var_shader_out) + var->data.compact |= var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER; + /* stash vars in this mode for now */ + var->data.mode = nir_var_mem_shared; + found = true; + } + if (!found) { + if (mode == nir_var_shader_out) + found = nir->info.outputs_written || nir->info.outputs_read; + else + found = nir->info.inputs_read; + if (!found) + return; + } + /* scan for vars using indirect array access */ + BITSET_DECLARE(indirect_access, 128); + BITSET_ZERO(indirect_access); + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) + continue; + nir_src *src_offset = nir_get_io_offset_src(intr); + if (!is_input && !src_offset) + continue; + if (mode == nir_var_shader_in && !is_input) + continue; + if (mode == nir_var_shader_out && is_input) + continue; + nir_io_semantics s = nir_intrinsic_io_semantics(intr); + if (!nir_src_is_const(*src_offset)) + BITSET_SET(indirect_access, s.location); + } + } + } + /* loop and create vars */ + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + bool is_load = false; + bool is_input = false; + bool is_interp = false; + if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) + continue; + if (mode == nir_var_shader_in && !is_input) + continue; + if (mode == nir_var_shader_out && is_input) + continue; + nir_io_semantics s = nir_intrinsic_io_semantics(intr); + unsigned slot_offset = 0; + bool is_indirect = BITSET_TEST(indirect_access, s.location); + nir_src *src_offset = nir_get_io_offset_src(intr); + if (src_offset && !is_indirect) { + assert(nir_src_is_const(*src_offset)); + slot_offset = nir_src_as_uint(*src_offset); + } + unsigned location = s.location + slot_offset; + unsigned frac = nir_intrinsic_component(intr); + unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]); + /* set c aligned/rounded down to dword */ + unsigned c = nir_slot_is_sysval_output(location, MESA_SHADER_NONE) ? 0 : frac; + if (frac && bit_size < 32) + c = frac * bit_size / 32; + nir_alu_type type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr); + /* ensure dword is filled with like-sized components */ + unsigned max_components = intr->num_components; + if (mode == nir_var_shader_out && nir->info.stage == MESA_SHADER_FRAGMENT) { + switch (s.location) { + case FRAG_RESULT_DEPTH: + case FRAG_RESULT_STENCIL: + case FRAG_RESULT_SAMPLE_MASK: + max_components = 1; + break; + default: + break; + } + } else if ((nir->info.stage != MESA_SHADER_VERTEX || mode != nir_var_shader_in) && s.location < VARYING_SLOT_VAR0) { + switch (s.location) { + case VARYING_SLOT_FOGC: + /* use intr components */ + break; + case VARYING_SLOT_POS: + case VARYING_SLOT_COL0: + case VARYING_SLOT_COL1: + case VARYING_SLOT_TEX0: + case VARYING_SLOT_TEX1: + case VARYING_SLOT_TEX2: + case VARYING_SLOT_TEX3: + case VARYING_SLOT_TEX4: + case VARYING_SLOT_TEX5: + case VARYING_SLOT_TEX6: + case VARYING_SLOT_TEX7: + case VARYING_SLOT_BFC0: + case VARYING_SLOT_BFC1: + case VARYING_SLOT_EDGE: + case VARYING_SLOT_CLIP_VERTEX: + case VARYING_SLOT_PNTC: + case VARYING_SLOT_BOUNDING_BOX0: + case VARYING_SLOT_BOUNDING_BOX1: + max_components = 4; + break; + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + max_components = s.num_slots; + break; + case VARYING_SLOT_CULL_DIST0: + case VARYING_SLOT_CULL_DIST1: + max_components = s.num_slots; + break; + case VARYING_SLOT_TESS_LEVEL_OUTER: + max_components = 4; + break; + case VARYING_SLOT_TESS_LEVEL_INNER: + max_components = 2; + break; + case VARYING_SLOT_PRIMITIVE_ID: + case VARYING_SLOT_LAYER: + case VARYING_SLOT_VIEWPORT: + case VARYING_SLOT_FACE: + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_VIEW_INDEX: + case VARYING_SLOT_VIEWPORT_MASK: + max_components = 1; + break; + default: + unreachable("???"); + } + } else if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) { + if (s.location == VERT_ATTRIB_POINT_SIZE) + max_components = 1; + else if (s.location < VERT_ATTRIB_GENERIC0) + max_components = 4; + else + max_components = frac + max_components; + } else if (bit_size == 16) + max_components = align(max_components, 2); + else if (bit_size == 8) + max_components = align(max_components, 4); + if (c + (bit_size == 64 ? max_components * 2 : max_components) > 4) + c = 0; + const struct glsl_type *vec_type; + bool is_compact = false; + if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) { + vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(type), max_components); + } else { + switch (s.location) { + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + case VARYING_SLOT_CULL_DIST0: + case VARYING_SLOT_CULL_DIST1: + case VARYING_SLOT_TESS_LEVEL_OUTER: + case VARYING_SLOT_TESS_LEVEL_INNER: + vec_type = glsl_array_type(glsl_float_type(), max_components, sizeof(uint32_t)); + is_compact = true; + break; + default: + vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(type), max_components); + break; + } + } + /* reset the mode for nir_is_arrayed_io to work */ + bool is_arrayed = io_instr_is_arrayed(intr); + if (is_indirect) { + /* indirect array access requires the full array in a single variable */ + unsigned slot_count = s.num_slots; + if (bit_size == 64 && slot_count > 1) + slot_count /= 2; + if (slot_count > 1) + vec_type = glsl_array_type(vec_type, slot_count, glsl_get_explicit_stride(vec_type)); + } + if (is_arrayed) + vec_type = glsl_array_type(vec_type, 32 /* MAX_PATCH_VERTICES */, glsl_get_explicit_stride(vec_type)); + nir_variable *found = find_io_var_with_semantics(nir, mode, mode, s, location, c, is_load); + if (found) { + if (glsl_get_vector_elements(glsl_without_array(found->type)) < glsl_get_vector_elements(glsl_without_array(vec_type))) { + /* enlarge existing vars if necessary */ + found->type = vec_type; + } + continue; + } + + char name[1024]; + if (c) + snprintf(name, sizeof(name), "slot_%u_c%u", location, c); + else + snprintf(name, sizeof(name), "slot_%u", location); + nir_variable *old_var = find_io_var_with_semantics(nir, nir_var_mem_shared, mode, s, location, c, is_load); + nir_variable *var = nir_variable_create(nir, mode, vec_type, old_var ? old_var->name : name); + var->data.mode = mode; + var->type = vec_type; + var->data.driver_location = nir_intrinsic_base(intr) + slot_offset; + var->data.location_frac = c; + var->data.location = location; + var->data.patch = location >= VARYING_SLOT_PATCH0 || + ((nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) && + (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)); + /* set flat by default */ + if (nir->info.stage == MESA_SHADER_FRAGMENT && mode == nir_var_shader_in) + var->data.interpolation = INTERP_MODE_FLAT; + var->data.fb_fetch_output = s.fb_fetch_output; + var->data.index = s.dual_source_blend_index; + var->data.precision = s.medium_precision; + var->data.compact = is_compact; + } + } + } + nir_foreach_variable_with_modes(var, nir, nir_var_mem_shared) + var->data.mode = nir_var_shader_temp; + nir_fixup_deref_modes(nir); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); +} + + +static bool +eliminate_io_wrmasks_instr(const nir_instr *instr, const void *data) +{ + const nir_shader *nir = data; + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + switch (intr->intrinsic) { + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_primitive_output: + case nir_intrinsic_store_per_vertex_output: + break; + default: + return false; + } + unsigned src_components = nir_intrinsic_src_components(intr, 0); + unsigned wrmask = nir_intrinsic_write_mask(intr); + unsigned num_components = util_bitcount(wrmask); + if (num_components != src_components) + return true; + if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64) + num_components *= 2; + if (nir->xfb_info) { + nir_io_semantics s = nir_intrinsic_io_semantics(intr); + nir_src *src_offset = nir_get_io_offset_src(intr); + if (nir_src_is_const(*src_offset)) { + unsigned slot_offset = nir_src_as_uint(*src_offset); + for (unsigned i = 0; i < nir->xfb_info->output_count; i++) { + if (nir->xfb_info->outputs[i].location == s.location + slot_offset) { + unsigned xfb_components = util_bitcount(nir->xfb_info->outputs[i].component_mask); + if (xfb_components != MIN2(4, num_components)) + return true; + num_components -= xfb_components; + if (!num_components) + break; + } + } + } else { + for (unsigned i = 0; i <nir->xfb_info->output_count; i++) { + if (nir->xfb_info->outputs[i].location >= s.location && + nir->xfb_info->outputs[i].location < s.location + s.num_slots) { + unsigned xfb_components = util_bitcount(nir->xfb_info->outputs[i].component_mask); + if (xfb_components < MIN2(num_components, 4)) + return true; + num_components -= xfb_components; + if (!num_components) + break; + } + } + } + } + return false; +} + +static int +zink_type_size(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +static nir_mem_access_size_align +mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes, + uint8_t bit_size, uint32_t align, + uint32_t align_offset, bool offset_is_const, + const void *cb_data) +{ + align = nir_combined_align(align, align_offset); + + assert(util_is_power_of_two_nonzero(align)); + + return (nir_mem_access_size_align){ + .num_components = MIN2(bytes / (bit_size / 8), 4), + .bit_size = bit_size, + .align = bit_size / 8, + }; +} + +static uint8_t +lower_vec816_alu(const nir_instr *instr, const void *cb_data) +{ + return 4; +} + struct zink_shader * -zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, - const struct pipe_stream_output_info *so_info) +zink_shader_create(struct zink_screen *screen, struct nir_shader *nir) { struct zink_shader *ret = rzalloc(NULL, struct zink_shader); bool have_psiz = false; @@ -4780,6 +5328,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_EDGE); ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model; + ret->sinfo.have_workgroup_memory_explicit_layout = screen->info.have_KHR_workgroup_memory_explicit_layout; ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]; util_queue_fence_init(&ret->precompile.fence); @@ -4789,13 +5338,50 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, ret->programs = _mesa_pointer_set_create(NULL); simple_mtx_init(&ret->lock, mtx_plain); - nir_variable_mode indirect_derefs_modes = 0; - if (nir->info.stage == MESA_SHADER_TESS_CTRL || - nir->info.stage == MESA_SHADER_TESS_EVAL) - indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out; + nir_lower_io_options lower_io_flags = 0; + if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64) + lower_io_flags = nir_lower_io_lower_64bit_to_32; + else if (!screen->info.feats.features.shaderFloat64) + lower_io_flags = nir_lower_io_lower_64bit_float_to_32; + bool temp_inputs = nir->info.stage != MESA_SHADER_VERTEX && nir->info.inputs_read & BITFIELD_RANGE(VARYING_SLOT_CLIP_DIST0, 4); + bool temp_outputs = nir->info.stage != MESA_SHADER_FRAGMENT && (nir->info.outputs_read | nir->info.outputs_written) & BITFIELD_RANGE(VARYING_SLOT_CLIP_DIST0, 4); + if (temp_inputs || temp_outputs) { + NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), temp_outputs, temp_inputs); + NIR_PASS_V(nir, nir_lower_global_vars_to_local); + NIR_PASS_V(nir, nir_split_var_copies); + NIR_PASS_V(nir, nir_lower_var_copies); + } + NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out, zink_type_size, lower_io_flags); + if (nir->info.stage == MESA_SHADER_VERTEX) + lower_io_flags |= nir_lower_io_lower_64bit_to_32; + NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in, zink_type_size, lower_io_flags); + nir->info.io_lowered = true; + + if (nir->info.stage == MESA_SHADER_KERNEL) { + nir_lower_mem_access_bit_sizes_options lower_mem_access_options = { + .modes = nir_var_all, + .may_lower_unaligned_stores_to_atomics = true, + .callback = mem_access_size_align_cb, + .cb_data = screen, + }; + NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &lower_mem_access_options); + NIR_PASS_V(nir, nir_lower_alu_width, lower_vec816_alu, NULL); + NIR_PASS_V(nir, nir_lower_alu_vec8_16_srcs); + } - NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes, - UINT32_MAX); + optimize_nir(nir, NULL, true); + nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) { + if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) { + NIR_PASS_V(nir, lower_bindless_io); + break; + } + } + nir_gather_xfb_info_from_intrinsics(nir); + NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_shader_out, eliminate_io_wrmasks_instr, nir); + /* clean up io to improve direct access */ + optimize_nir(nir, NULL, true); + rework_io_vars(nir, nir_var_shader_in); + rework_io_vars(nir, nir_var_shader_out); if (nir->info.stage < MESA_SHADER_COMPUTE) create_gfx_pushconst(nir); @@ -4813,9 +5399,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, NIR_PASS_V(nir, fixup_io_locations); NIR_PASS_V(nir, lower_basevertex); - NIR_PASS_V(nir, nir_lower_regs_to_ssa); NIR_PASS_V(nir, lower_baseinstance); - NIR_PASS_V(nir, lower_sparse); NIR_PASS_V(nir, split_bitfields); NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */ @@ -4839,48 +5423,31 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, subgroup_options.subgroup_size = 1; subgroup_options.lower_vote_trivial = true; } + subgroup_options.lower_inverse_ballot = true; NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options); } - if (so_info && so_info->num_outputs) - NIR_PASS_V(nir, split_blocks); - - optimize_nir(nir, NULL); + optimize_nir(nir, NULL, true); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf | nir_lower_demote_if_to_cf | nir_lower_terminate_if_to_cf)); - NIR_PASS_V(nir, nir_lower_fragcolor, - nir->info.fs.color_is_dual_source ? 1 : 8); - NIR_PASS_V(nir, lower_64bit_vertex_attribs); + bool needs_size = analyze_io(ret, nir); NIR_PASS_V(nir, unbreak_bos, ret, needs_size); /* run in compile if there could be inlined uniforms */ if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) { - NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared); + NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL); NIR_PASS_V(nir, rewrite_bo_access, screen); NIR_PASS_V(nir, remove_bo_access, ret); } - if (zink_debug & ZINK_DEBUG_NIR) { - fprintf(stderr, "NIR shader:\n---8<---\n"); - nir_print_shader(nir, stderr); - fprintf(stderr, "---8<---\n"); - } - struct zink_bindless_info bindless = {0}; bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]; - bool has_bindless_io = false; - nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) { + nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) var->data.is_xfb = false; - if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) { - has_bindless_io = true; - } - } - if (has_bindless_io) - NIR_PASS_V(nir, lower_bindless_io); - optimize_nir(nir, NULL); + optimize_nir(nir, NULL, true); prune_io(nir); scan_nir(screen, nir, ret); @@ -4935,7 +5502,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, } else if (var->data.mode == nir_var_mem_ssbo) { ztype = ZINK_DESCRIPTOR_TYPE_SSBO; var->data.descriptor_set = screen->desc_set_id[ztype]; - var->data.binding = zink_binding(nir->info.stage, + var->data.binding = zink_binding(clamp_stage(&nir->info), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, var->data.driver_location, screen->compact_descriptors); @@ -4992,8 +5559,8 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, if (!nir->info.internal) nir_foreach_shader_out_variable(var, nir) var->data.explicit_xfb_buffer = 0; - if (so_info && so_info->num_outputs) - update_so_info(ret, nir, so_info, nir->info.outputs_written, have_psiz); + if (nir->xfb_info && nir->xfb_info->output_count && nir->info.outputs_written) + update_so_info(ret, nir, nir->info.outputs_written, have_psiz); else if (have_psiz) { bool have_fake_psiz = false; nir_variable *psiz = NULL; @@ -5005,9 +5572,11 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, psiz = var; } } - if (have_fake_psiz && psiz) { + /* maintenance5 allows injected psiz deletion */ + if (have_fake_psiz && (psiz || screen->info.have_KHR_maintenance5)) { psiz->data.mode = nir_var_shader_temp; nir_fixup_deref_modes(nir); + delete_psiz_store(nir, true); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); } } @@ -5040,8 +5609,9 @@ zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr) if (!screen->info.feats.features.shaderImageGatherExtended) tex_opts.lower_tg4_offsets = true; NIR_PASS_V(nir, nir_lower_tex, &tex_opts); - optimize_nir(nir, NULL); - nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + optimize_nir(nir, NULL, false); + if (nir->info.stage == MESA_SHADER_VERTEX) + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); if (screen->driconf.inline_uniforms) nir_find_inlinable_uniforms(nir); @@ -5051,7 +5621,29 @@ zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr) void zink_shader_free(struct zink_screen *screen, struct zink_shader *shader) { + _mesa_set_destroy(shader->programs, NULL); + util_queue_fence_wait(&shader->precompile.fence); + util_queue_fence_destroy(&shader->precompile.fence); + zink_descriptor_shader_deinit(screen, shader); + if (screen->info.have_EXT_shader_object) { + VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL); + } else { + if (shader->precompile.obj.mod) + VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL); + if (shader->precompile.gpl) + VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL); + } + blob_finish(&shader->blob); + ralloc_free(shader->spirv); + free(shader->precompile.bindings); + ralloc_free(shader); +} + +void +zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader) +{ assert(shader->info.stage != MESA_SHADER_COMPUTE); + util_queue_fence_wait(&shader->precompile.fence); set_foreach(shader->programs, entry) { struct zink_gfx_program *prog = (void*)entry->key; gl_shader_stage stage = shader->info.stage; @@ -5115,7 +5707,7 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader) if (shader->info.stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs) { /* automatically destroy generated tcs shaders when tes is destroyed */ - zink_shader_free(screen, shader->non_fs.generated_tcs); + zink_gfx_shader_free(screen, shader->non_fs.generated_tcs); shader->non_fs.generated_tcs = NULL; } for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) { @@ -5123,33 +5715,22 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader) if (shader->info.stage != MESA_SHADER_FRAGMENT && shader->non_fs.generated_gs[i][j]) { /* automatically destroy generated gs shaders when owner is destroyed */ - zink_shader_free(screen, shader->non_fs.generated_gs[i][j]); + zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]); shader->non_fs.generated_gs[i][j] = NULL; } } } - _mesa_set_destroy(shader->programs, NULL); - util_queue_fence_wait(&shader->precompile.fence); - util_queue_fence_destroy(&shader->precompile.fence); - zink_descriptor_shader_deinit(screen, shader); - if (shader->precompile.mod) - VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.mod, NULL); - if (shader->precompile.gpl) - VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL); - blob_finish(&shader->blob); - ralloc_free(shader->spirv); - free(shader->precompile.bindings); - ralloc_free(shader); + zink_shader_free(screen, shader); } -VkShaderModule -zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices) +struct zink_shader_object +zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg) { assert(zs->info.stage == MESA_SHADER_TESS_CTRL); /* shortcut all the nir passes since we just have to change this one word */ zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices; - return zink_shader_spirv_compile(screen, zs, NULL); + return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg); } /* creating a passthrough tcs shader that's roughly: @@ -5174,7 +5755,7 @@ void main() */ struct zink_shader * -zink_shader_tcs_create(struct zink_screen *screen, nir_shader *vs, unsigned vertices_per_patch, nir_shader **nir_ret) +zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret) { struct zink_shader *ret = rzalloc(NULL, struct zink_shader); util_queue_fence_init(&ret->precompile.fence); @@ -5187,20 +5768,22 @@ zink_shader_tcs_create(struct zink_screen *screen, nir_shader *vs, unsigned vert fn->is_entrypoint = true; nir_function_impl *impl = nir_function_impl_create(fn); - nir_builder b; - nir_builder_init(&b, impl); - b.cursor = nir_before_block(nir_start_block(impl)); + nir_builder b = nir_builder_at(nir_before_impl(impl)); - nir_ssa_def *invocation_id = nir_load_invocation_id(&b); + nir_def *invocation_id = nir_load_invocation_id(&b); - nir_foreach_shader_out_variable(var, vs) { - const struct glsl_type *type = var->type; + nir_foreach_shader_in_variable(var, tes) { + if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) + continue; const struct glsl_type *in_type = var->type; const struct glsl_type *out_type = var->type; char buf[1024]; snprintf(buf, sizeof(buf), "%s_out", var->name); - in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0); - out_type = glsl_array_type(type, vertices_per_patch, 0); + if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) { + const struct glsl_type *type = var->type; + in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0); + out_type = glsl_array_type(type, vertices_per_patch, 0); + } nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name); nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf); @@ -5228,12 +5811,10 @@ zink_shader_tcs_create(struct zink_screen *screen, nir_shader *vs, unsigned vert create_gfx_pushconst(nir); - nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, - nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL), - .base = 1, .range = 8); - nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, - nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL), - .base = 2, .range = 16); + nir_def *load_inner = nir_load_push_constant_zink(&b, 2, 32, + nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL)); + nir_def *load_outer = nir_load_push_constant_zink(&b, 4, 32, + nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL)); for (unsigned i = 0; i < 2; i++) { nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i); @@ -5247,8 +5828,7 @@ zink_shader_tcs_create(struct zink_screen *screen, nir_shader *vs, unsigned vert nir->info.tess.tcs_vertices_out = vertices_per_patch; nir_validate_shader(nir, "created"); - NIR_PASS_V(nir, nir_lower_regs_to_ssa); - optimize_nir(nir, NULL); + optimize_nir(nir, NULL, true); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); NIR_PASS_V(nir, nir_convert_from_ssa, true); @@ -5295,3 +5875,11 @@ zink_shader_serialize_blob(nir_shader *nir, struct blob *blob) #endif nir_serialize(blob, nir, strip); } + +void +zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp) +{ + nir_shader *nir = zink_shader_deserialize(screen, zs); + nir_print_shader(nir, fp); + ralloc_free(nir); +} |