diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2021-07-22 10:17:30 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2021-07-22 10:17:30 +0000 |
commit | ca11beabae33eb59fb981b8adf50b1d47a2a98f0 (patch) | |
tree | 3e4691a396e6e54cd54224a190663d5cf976625b /lib/mesa/src/gallium/drivers/zink/zink_compiler.c | |
parent | 27c8a50e8bbde7d28b1fc46d715a4c469e24f2c4 (diff) |
Import Mesa 21.1.5
Diffstat (limited to 'lib/mesa/src/gallium/drivers/zink/zink_compiler.c')
-rw-r--r-- | lib/mesa/src/gallium/drivers/zink/zink_compiler.c | 1088 |
1 files changed, 939 insertions, 149 deletions
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_compiler.c b/lib/mesa/src/gallium/drivers/zink/zink_compiler.c index 74c7ab4fa..cff1f6d67 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_compiler.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_compiler.c @@ -21,7 +21,9 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "zink_context.h" #include "zink_compiler.h" +#include "zink_program.h" #include "zink_screen.h" #include "nir_to_spirv/nir_to_spirv.h" @@ -36,84 +38,41 @@ #include "util/u_memory.h" -static bool -lower_instr(nir_intrinsic_instr *instr, nir_builder *b) +static void +create_vs_pushconst(nir_shader *nir) { - b->cursor = nir_before_instr(&instr->instr); - - if (instr->intrinsic == nir_intrinsic_load_ubo) { - nir_ssa_def *old_idx = nir_ssa_for_src(b, instr->src[0], 1); - nir_ssa_def *new_idx = nir_iadd(b, old_idx, nir_imm_int(b, 1)); - nir_instr_rewrite_src(&instr->instr, &instr->src[0], - nir_src_for_ssa(new_idx)); - return true; - } - - if (instr->intrinsic == nir_intrinsic_load_uniform) { - nir_ssa_def *ubo_idx = nir_imm_int(b, 0); - nir_ssa_def *ubo_offset = - nir_iadd(b, nir_imm_int(b, nir_intrinsic_base(instr)), - nir_ssa_for_src(b, instr->src[0], 1)); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); - load->num_components = instr->num_components; - load->src[0] = nir_src_for_ssa(ubo_idx); - load->src[1] = nir_src_for_ssa(ubo_offset); - nir_ssa_dest_init(&load->instr, &load->dest, - load->num_components, instr->dest.ssa.bit_size, - instr->dest.ssa.name); - nir_builder_instr_insert(b, &load->instr); - nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa)); - - nir_instr_remove(&instr->instr); - return true; - } + nir_variable *vs_pushconst; + /* create compatible layout for the ntv push constant loader */ + struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2); + fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0); + fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed"); + fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed); + fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0); + fields[1].name = ralloc_asprintf(nir, "draw_id"); + fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id); + vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const, + glsl_struct_type(fields, 2, "struct", false), "vs_pushconst"); + vs_pushconst->data.location = INT_MAX; //doesn't really matter +} - return false; +static void +create_cs_pushconst(nir_shader *nir) +{ + nir_variable *cs_pushconst; + /* create compatible layout for the ntv push constant loader */ + struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field)); + fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0); + fields[0].name = ralloc_asprintf(nir, "work_dim"); + fields[0].offset = 0; + cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const, + glsl_struct_type(fields, 1, "struct", false), "cs_pushconst"); + cs_pushconst->data.location = INT_MAX; //doesn't really matter } static bool -lower_uniforms_to_ubo(nir_shader *shader) +reads_work_dim(nir_shader *shader) { - bool progress = false; - - nir_foreach_function(function, shader) { - if (function->impl) { - nir_builder builder; - nir_builder_init(&builder, function->impl); - nir_foreach_block(block, function->impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type == nir_instr_type_intrinsic) - progress |= lower_instr(nir_instr_as_intrinsic(instr), - &builder); - } - } - - nir_metadata_preserve(function->impl, nir_metadata_block_index | - nir_metadata_dominance); - } - } - - if (progress) { - assert(shader->num_uniforms > 0); - const struct glsl_type *type = glsl_array_type(glsl_vec4_type(), - shader->num_uniforms, 0); - nir_variable *ubo = nir_variable_create(shader, nir_var_mem_ubo, type, - "uniform_0"); - ubo->data.binding = 0; - - struct glsl_struct_field field = { - .type = type, - .name = "data", - .location = -1, - }; - ubo->interface_type = - glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, - false, "__ubo0_interface"); - } - - return progress; + return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM); } static bool @@ -123,15 +82,54 @@ lower_discard_if_instr(nir_intrinsic_instr *instr, nir_builder *b) b->cursor = nir_before_instr(&instr->instr); nir_if *if_stmt = nir_push_if(b, nir_ssa_for_src(b, instr->src[0], 1)); - nir_intrinsic_instr *discard = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard); - nir_builder_instr_insert(b, &discard->instr); + nir_discard(b); nir_pop_if(b, if_stmt); nir_instr_remove(&instr->instr); return true; } - assert(instr->intrinsic != nir_intrinsic_discard || - nir_block_last_instr(instr->instr.block) == &instr->instr); + /* a shader like this (shaders@glsl-fs-discard-04): + + uniform int j, k; + + void main() + { + for (int i = 0; i < j; i++) { + if (i > k) + continue; + discard; + } + gl_FragColor = vec4(0.0, 1.0, 0.0, 0.0); + } + + + + will generate nir like: + + loop { + //snip + if ssa_11 { + block block_5: + / preds: block_4 / + vec1 32 ssa_17 = iadd ssa_50, ssa_31 + / succs: block_7 / + } else { + block block_6: + / preds: block_4 / + intrinsic discard () () <-- not last instruction + vec1 32 ssa_23 = iadd ssa_50, ssa_31 <-- dead code loop itr increment + / succs: block_7 / + } + //snip + } + + which means that we can't assert like this: + + assert(instr->intrinsic != nir_intrinsic_discard || + nir_block_last_instr(instr->instr.block) == &instr->instr); + + + and it's unnecessary anyway since post-vtn optimizing will dce the instructions following the discard + */ return false; } @@ -161,21 +159,258 @@ lower_discard_if(nir_shader *shader) return progress; } -static const struct nir_shader_compiler_options nir_options = { - .lower_all_io_to_temps = true, - .lower_ffma = true, - .lower_flrp32 = true, - .lower_fpow = true, - .lower_fsat = true, -}; +static bool +lower_work_dim_instr(nir_builder *b, nir_instr *in, void *data) +{ + if (in->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); + if (instr->intrinsic != nir_intrinsic_load_work_dim) + return false; + + if (instr->intrinsic == nir_intrinsic_load_work_dim) { + b->cursor = nir_after_instr(&instr->instr); + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_intrinsic_set_range(load, 3 * sizeof(uint32_t)); + load->num_components = 1; + nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim"); + nir_builder_instr_insert(b, &load->instr); + + nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa); + } + + return true; +} + +static bool +lower_work_dim(nir_shader *shader) +{ + if (shader->info.stage != MESA_SHADER_KERNEL) + return false; + + if (!reads_work_dim(shader)) + return false; + + return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL); +} + +static bool +lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_deref) + return false; + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (deref->deref_type != nir_deref_type_var) + return false; + nir_variable *var = nir_deref_instr_get_variable(deref); + if (var->data.mode != nir_var_shader_in) + return false; + if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3) + return false; + + /* create second variable for the split */ + nir_variable *var2 = nir_variable_clone(var, b->shader); + /* split new variable into second slot */ + var2->data.driver_location++; + nir_shader_add_variable(b->shader, var2); + + unsigned total_num_components = glsl_get_vector_elements(var->type); + /* new variable is the second half of the dvec */ + var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2); + /* clamp original variable to a dvec2 */ + deref->type = var->type = glsl_vector_type(glsl_get_base_type(var->type), 2); + + /* create deref instr for new variable */ + b->cursor = nir_after_instr(instr); + nir_deref_instr *deref2 = nir_build_deref_var(b, var2); + + nir_foreach_use_safe(use_src, &deref->dest.ssa) { + nir_instr *use_instr = use_src->parent_instr; + assert(use_instr->type == nir_instr_type_intrinsic && + nir_instr_as_intrinsic(use_instr)->intrinsic == nir_intrinsic_load_deref); + + /* this is a load instruction for the deref, and we need to split it into two instructions that we can + * then zip back into a single ssa def */ + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_instr); + /* clamp the first load to 2 64bit components */ + intr->num_components = intr->dest.ssa.num_components = 2; + b->cursor = nir_after_instr(use_instr); + /* this is the second load instruction for the second half of the dvec3/4 components */ + nir_intrinsic_instr *intr2 = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref); + intr2->src[0] = nir_src_for_ssa(&deref2->dest.ssa); + intr2->num_components = total_num_components - 2; + nir_ssa_dest_init(&intr2->instr, &intr2->dest, intr2->num_components, 64, NULL); + nir_builder_instr_insert(b, &intr2->instr); + + nir_ssa_def *def[4]; + /* create a new dvec3/4 comprised of all the loaded components from both variables */ + def[0] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 0)); + def[1] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 1)); + def[2] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 0)); + if (total_num_components == 4) + def[3] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 1)); + nir_ssa_def *new_vec = nir_vec(b, def, total_num_components); + /* use the assembled dvec3/4 for all other uses of the load */ + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec, + new_vec->parent_instr); + } + + return true; +} + +/* "64-bit three- and four-component vectors consume two consecutive locations." + * - 14.1.4. Location Assignment + * + * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which + * are assigned to consecutive locations, loaded separately, and then assembled back into a + * composite value that's used in place of the original loaded ssa src + */ +static bool +lower_64bit_vertex_attribs(nir_shader *shader) +{ + if (shader->info.stage != MESA_SHADER_VERTEX) + return false; + + return nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL); +} + +static bool +lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data) +{ + if (in->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); + if (instr->intrinsic != nir_intrinsic_load_base_vertex) + return false; + + b->cursor = nir_after_instr(&instr->instr); + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_intrinsic_set_range(load, 4); + load->num_components = 1; + nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed"); + nir_builder_instr_insert(b, &load->instr); + + nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel, + nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL), + &instr->dest.ssa, + nir_imm_int(b, 0), + NULL); + + nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite, + composite->parent_instr); + return true; +} + +static bool +lower_basevertex(nir_shader *shader) +{ + if (shader->info.stage != MESA_SHADER_VERTEX) + return false; + + if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX)) + return false; + + return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL); +} + + +static bool +lower_drawid_instr(nir_builder *b, nir_instr *in, void *data) +{ + if (in->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in); + if (instr->intrinsic != nir_intrinsic_load_draw_id) + return false; + + b->cursor = nir_before_instr(&instr->instr); + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant); + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1)); + nir_intrinsic_set_range(load, 4); + load->num_components = 1; + nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id"); + nir_builder_instr_insert(b, &load->instr); + + nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa); + + return true; +} + +static bool +lower_drawid(nir_shader *shader) +{ + if (shader->info.stage != MESA_SHADER_VERTEX) + return false; + + if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID)) + return false; + + return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL); +} + +static bool +lower_dual_blend(nir_shader *shader) +{ + bool progress = false; + nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1); + if (var) { + var->data.location = FRAG_RESULT_DATA0; + var->data.index = 1; + progress = true; + } + nir_shader_preserve_all_metadata(shader); + return progress; +} + +void +zink_screen_init_compiler(struct zink_screen *screen) +{ + static const struct nir_shader_compiler_options + default_options = { + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, + .lower_scmp = true, + .lower_fdph = true, + .lower_flrp32 = true, + .lower_fpow = true, + .lower_fsat = true, + .lower_extract_byte = true, + .lower_extract_word = true, + .lower_mul_high = true, + .lower_rotate = true, + .lower_uadd_carry = true, + .lower_pack_64_2x32_split = true, + .lower_unpack_64_2x32_split = true, + .lower_vector_cmp = true, + .lower_int64_options = 0, + .lower_doubles_options = ~nir_lower_fp64_full_software, + .has_fsub = true, + .has_isub = true, + .lower_mul_2x32_64 = true, + }; + + screen->nir_options = default_options; + + if (!screen->info.feats.features.shaderInt64) + screen->nir_options.lower_int64_options = ~0; + + if (!screen->info.feats.features.shaderFloat64) { + screen->nir_options.lower_doubles_options = ~0; + screen->nir_options.lower_flrp64 = true; + screen->nir_options.lower_ffma64 = true; + } +} const void * -zink_get_compiler_options(struct pipe_screen *screen, +zink_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir, enum pipe_shader_type shader) { assert(ir == PIPE_SHADER_IR_NIR); - return &nir_options; + return &zink_screen(pscreen)->nir_options; } struct nir_shader * @@ -187,7 +422,7 @@ zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens) fprintf(stderr, "---8<---\n\n"); } - return tgsi_to_nir(tokens, screen); + return tgsi_to_nir(tokens, screen, false); } static void @@ -206,87 +441,196 @@ optimize_nir(struct nir_shader *s) NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); + NIR_PASS(progress, s, zink_nir_lower_b2b); } while (progress); } -static uint32_t -zink_binding(enum pipe_shader_type stage, VkDescriptorType type, int index) +/* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */ +static bool +check_psiz(struct nir_shader *s) { - if (stage == PIPE_SHADER_COMPUTE) { - unreachable("not supported"); - } else { - uint32_t stage_offset = (uint32_t)stage * (PIPE_MAX_CONSTANT_BUFFERS + - PIPE_MAX_SHADER_SAMPLER_VIEWS); + nir_foreach_shader_out_variable(var, s) { + if (var->data.location == VARYING_SLOT_PSIZ) { + /* genuine PSIZ outputs will have this set */ + return !!var->data.explicit_location; + } + } + return false; +} - switch (type) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - assert(index < PIPE_MAX_CONSTANT_BUFFERS); - return stage_offset + index; +static void +update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info, + uint64_t outputs_written, bool have_psiz) +{ + uint8_t reverse_map[64] = {}; + unsigned slot = 0; + /* semi-copied from iris */ + while (outputs_written) { + int bit = u_bit_scan64(&outputs_written); + /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */ + if (bit == VARYING_SLOT_PSIZ && !have_psiz) + continue; + reverse_map[slot++] = bit; + } - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS); - return stage_offset + PIPE_MAX_CONSTANT_BUFFERS + index; + nir_foreach_shader_out_variable(var, zs->nir) + var->data.explicit_xfb_buffer = 0; + + bool inlined[64] = {0}; + for (unsigned i = 0; i < so_info->num_outputs; i++) { + const struct pipe_stream_output *output = &so_info->output[i]; + unsigned slot = reverse_map[output->register_index]; + /* always set stride to be used during draw */ + zs->streamout.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer]; + if ((zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) && + !output->start_component) { + nir_variable *var = NULL; + while (!var) + var = nir_find_variable_with_location(zs->nir, nir_var_shader_out, slot--); + slot++; + if (inlined[slot]) + continue; + assert(var && var->data.location == slot); + /* if this is the entire variable, try to blast it out during the initial declaration */ + if (glsl_get_components(var->type) == output->num_components) { + var->data.explicit_xfb_buffer = 1; + var->data.xfb.buffer = output->output_buffer; + var->data.xfb.stride = so_info->stride[output->output_buffer] * 4; + var->data.offset = output->dst_offset * 4; + var->data.stream = output->stream; + inlined[slot] = true; + continue; + } + } + zs->streamout.so_info.output[zs->streamout.so_info.num_outputs] = *output; + /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ + zs->streamout.so_info_slots[zs->streamout.so_info.num_outputs++] = reverse_map[output->register_index]; + } + zs->streamout.have_xfb = !!zs->streamout.so_info.num_outputs; +} + +static void +assign_io_locations(nir_shader *nir, unsigned char *shader_slot_map, + unsigned char *shader_slots_reserved) +{ + unsigned reserved = shader_slots_reserved ? *shader_slots_reserved : 0; + nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) { + if ((nir->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in) || + (nir->info.stage == MESA_SHADER_FRAGMENT && var->data.mode == nir_var_shader_out)) + continue; + + unsigned slot = var->data.location; + switch (var->data.location) { + case VARYING_SLOT_POS: + case VARYING_SLOT_PNTC: + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_LAYER: + case VARYING_SLOT_PRIMITIVE_ID: + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CULL_DIST0: + case VARYING_SLOT_VIEWPORT: + case VARYING_SLOT_FACE: + case VARYING_SLOT_TESS_LEVEL_OUTER: + case VARYING_SLOT_TESS_LEVEL_INNER: + /* use a sentinel value to avoid counting later */ + var->data.driver_location = UINT_MAX; + break; default: - unreachable("unexpected type"); + if (var->data.patch) { + assert(var->data.location >= VARYING_SLOT_PATCH0); + slot = var->data.location - VARYING_SLOT_PATCH0; + } else if (var->data.location >= VARYING_SLOT_VAR0 && + ((var->data.mode == nir_var_shader_out && + nir->info.stage == MESA_SHADER_TESS_CTRL) || + (var->data.mode != nir_var_shader_out && + nir->info.stage == MESA_SHADER_TESS_EVAL))) { + slot = var->data.location - VARYING_SLOT_VAR0; + } else { + if (shader_slot_map[var->data.location] == 0xff) { + assert(reserved < MAX_VARYING); + shader_slot_map[var->data.location] = reserved; + if (nir->info.stage == MESA_SHADER_TESS_CTRL && var->data.location >= VARYING_SLOT_VAR0) + reserved += (glsl_count_vec4_slots(var->type, false, false) / 32 /*MAX_PATCH_VERTICES*/); + else + reserved += glsl_count_vec4_slots(var->type, false, false); + } + slot = shader_slot_map[var->data.location]; + assert(slot < MAX_VARYING); + } + var->data.driver_location = slot; } } + + if (shader_slots_reserved) + *shader_slots_reserved = reserved; } -struct zink_shader * -zink_compile_nir(struct zink_screen *screen, struct nir_shader *nir) +VkShaderModule +zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct zink_shader_key *key, + unsigned char *shader_slot_map, unsigned char *shader_slots_reserved) { - struct zink_shader *ret = CALLOC_STRUCT(zink_shader); + VkShaderModule mod = VK_NULL_HANDLE; + void *streamout = NULL; + nir_shader *nir = nir_shader_clone(NULL, zs->nir); - NIR_PASS_V(nir, lower_uniforms_to_ubo); - NIR_PASS_V(nir, nir_lower_clip_halfz); - NIR_PASS_V(nir, nir_lower_regs_to_ssa); - optimize_nir(nir); - NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp); - NIR_PASS_V(nir, lower_discard_if); - NIR_PASS_V(nir, nir_convert_from_ssa, true); + if (key) { + if (key->inline_uniforms) { + NIR_PASS_V(nir, nir_inline_uniforms, + nir->info.num_inlinable_uniforms, + key->base.inlined_uniform_values, + nir->info.inlinable_uniform_dw_offsets); - if (zink_debug & ZINK_DEBUG_NIR) { - fprintf(stderr, "NIR shader:\n---8<---\n"); - nir_print_shader(nir, stderr); - fprintf(stderr, "---8<---\n"); + optimize_nir(nir); + + /* This must be done again. */ + NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | + nir_var_shader_out); + } } - enum pipe_shader_type stage = pipe_shader_type_from_mesa(nir->info.stage); + /* TODO: use a separate mem ctx here for ralloc */ + if (zs->nir->info.stage < MESA_SHADER_FRAGMENT) { + if (zink_vs_key(key)->last_vertex_stage) { + if (zs->streamout.have_xfb) + streamout = &zs->streamout; - ret->num_bindings = 0; - nir_foreach_variable(var, &nir->uniforms) { - if (var->data.mode == nir_var_mem_ubo) { - ret->bindings[ret->num_bindings].index = var->data.binding; - var->data.binding = zink_binding(stage, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, var->data.binding); - ret->bindings[ret->num_bindings].binding = var->data.binding; - ret->bindings[ret->num_bindings].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - ret->num_bindings++; - } else { - assert(var->data.mode == nir_var_uniform); - if (glsl_type_is_array(var->type) && - glsl_type_is_sampler(glsl_get_array_element(var->type))) { - for (int i = 0; i < glsl_get_length(var->type); ++i) { - ret->bindings[ret->num_bindings].index = var->data.driver_location + i; - var->data.binding = zink_binding(stage, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, var->data.driver_location + i); - ret->bindings[ret->num_bindings].binding = var->data.binding; - ret->bindings[ret->num_bindings].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - ret->num_bindings++; - } - } else if (glsl_type_is_sampler(var->type)) { - ret->bindings[ret->num_bindings].index = var->data.driver_location; - var->data.binding = zink_binding(stage, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, var->data.driver_location); - ret->bindings[ret->num_bindings].binding = var->data.binding; - ret->bindings[ret->num_bindings].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - ret->num_bindings++; + if (!zink_vs_key(key)->clip_halfz) { + NIR_PASS_V(nir, nir_lower_clip_halfz); + } + if (zink_vs_key(key)->push_drawid) { + NIR_PASS_V(nir, lower_drawid); } } + } else if (zs->nir->info.stage == MESA_SHADER_FRAGMENT) { + if (!zink_fs_key(key)->samples && + nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) { + /* VK will always use gl_SampleMask[] values even if sample count is 0, + * so we need to skip this write here to mimic GL's behavior of ignoring it + */ + nir_foreach_shader_out_variable(var, nir) { + if (var->data.location == FRAG_RESULT_SAMPLE_MASK) + var->data.mode = nir_var_shader_temp; + } + nir_fixup_deref_modes(nir); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); + optimize_nir(nir); + } + if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) { + NIR_PASS_V(nir, lower_dual_blend); + } + if (zink_fs_key(key)->coord_replace_bits) { + NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits, + false, zink_fs_key(key)->coord_replace_yinvert); + } } + NIR_PASS_V(nir, nir_convert_from_ssa, true); - ret->info = nir->info; + assign_io_locations(nir, shader_slot_map, shader_slots_reserved); - struct spirv_shader *spirv = nir_to_spirv(nir); - assert(spirv); + struct spirv_shader *spirv = nir_to_spirv(nir, streamout); + if (!spirv) + goto done; if (zink_debug & ZINK_DEBUG_SPIRV) { char buf[256]; @@ -305,15 +649,461 @@ zink_compile_nir(struct zink_screen *screen, struct nir_shader *nir) smci.codeSize = spirv->num_words * sizeof(uint32_t); smci.pCode = spirv->words; - if (vkCreateShaderModule(screen->dev, &smci, NULL, &ret->shader_module) != VK_SUCCESS) - return NULL; + if (vkCreateShaderModule(screen->dev, &smci, NULL, &mod) != VK_SUCCESS) + mod = VK_NULL_HANDLE; + +done: + ralloc_free(nir); + + /* TODO: determine if there's any reason to cache spirv output? */ + ralloc_free(spirv); + return mod; +} + +static bool +lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_instance_id) + return false; + b->cursor = nir_after_instr(instr); + nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b)); + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr); + return true; +} + +static bool +lower_baseinstance(nir_shader *shader) +{ + if (shader->info.stage != MESA_SHADER_VERTEX) + return false; + return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL); +} + +bool nir_lower_dynamic_bo_access(nir_shader *shader); + +/* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access + * so instead we delete all those broken variables and just make new ones + */ +static bool +unbreak_bos(nir_shader *shader) +{ + uint32_t ssbo_used = 0; + uint32_t ubo_used = 0; + uint64_t max_ssbo_size = 0; + uint64_t max_ubo_size = 0; + bool ssbo_sizes[PIPE_MAX_SHADER_BUFFERS] = {false}; + + if (!shader->info.num_ssbos && !shader->info.num_ubos && !shader->num_uniforms) + return false; + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_store_ssbo: + ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[1])); + break; + + case nir_intrinsic_get_ssbo_size: { + uint32_t slot = nir_src_as_uint(intrin->src[0]); + ssbo_used |= BITFIELD_BIT(slot); + ssbo_sizes[slot] = true; + break; + } + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic_fmin: + case nir_intrinsic_ssbo_atomic_fmax: + case nir_intrinsic_ssbo_atomic_fcomp_swap: + case nir_intrinsic_load_ssbo: + ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0])); + break; + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ubo_vec4: + ubo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0])); + break; + default: + break; + } + } + } + + nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) { + const struct glsl_type *type = glsl_without_array(var->type); + if (type_is_counter(type)) + continue; + unsigned size = glsl_count_attribute_slots(type, false); + if (var->data.mode == nir_var_mem_ubo) + max_ubo_size = MAX2(max_ubo_size, size); + else + max_ssbo_size = MAX2(max_ssbo_size, size); + var->data.mode = nir_var_shader_temp; + } + nir_fixup_deref_modes(shader); + NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL); + optimize_nir(shader); + + if (!ssbo_used && !ubo_used) + return false; + + struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2); + fields[0].name = ralloc_strdup(shader, "base"); + fields[1].name = ralloc_strdup(shader, "unsized"); + if (ubo_used) { + const struct glsl_type *ubo_type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4); + fields[0].type = ubo_type; + u_foreach_bit(slot, ubo_used) { + char buf[64]; + snprintf(buf, sizeof(buf), "ubo_slot_%u", slot); + nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, glsl_struct_type(fields, 1, "struct", false), buf); + var->interface_type = var->type; + var->data.driver_location = slot; + } + } + if (ssbo_used) { + const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4); + const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4); + fields[0].type = ssbo_type; + u_foreach_bit(slot, ssbo_used) { + char buf[64]; + snprintf(buf, sizeof(buf), "ssbo_slot_%u", slot); + if (ssbo_sizes[slot]) + fields[1].type = unsized; + else + fields[1].type = NULL; + nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo, + glsl_struct_type(fields, 1 + !!ssbo_sizes[slot], "struct", false), buf); + var->interface_type = var->type; + var->data.driver_location = slot; + } + } + return true; +} + +struct zink_shader * +zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, + const struct pipe_stream_output_info *so_info) +{ + struct zink_shader *ret = CALLOC_STRUCT(zink_shader); + bool have_psiz = false; + + ret->shader_id = p_atomic_inc_return(&screen->shader_id); + ret->programs = _mesa_pointer_set_create(NULL); + + if (nir->info.stage == MESA_SHADER_VERTEX) + create_vs_pushconst(nir); + else if (nir->info.stage == MESA_SHADER_TESS_CTRL || + nir->info.stage == MESA_SHADER_TESS_EVAL) { + NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT_MAX); + NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); + } else if (nir->info.stage == MESA_SHADER_KERNEL) + create_cs_pushconst(nir); + + if (nir->info.stage < MESA_SHADER_FRAGMENT) + have_psiz = check_psiz(nir); + NIR_PASS_V(nir, lower_basevertex); + NIR_PASS_V(nir, lower_work_dim); + NIR_PASS_V(nir, nir_lower_regs_to_ssa); + NIR_PASS_V(nir, lower_baseinstance); + optimize_nir(nir); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); + NIR_PASS_V(nir, lower_discard_if); + NIR_PASS_V(nir, nir_lower_fragcolor); + NIR_PASS_V(nir, lower_64bit_vertex_attribs); + NIR_PASS_V(nir, unbreak_bos); + + if (zink_debug & ZINK_DEBUG_NIR) { + fprintf(stderr, "NIR shader:\n---8<---\n"); + nir_print_shader(nir, stderr); + fprintf(stderr, "---8<---\n"); + } + + foreach_list_typed_reverse(nir_variable, var, node, &nir->variables) { + if (_nir_shader_variable_has_mode(var, nir_var_uniform | + nir_var_mem_ubo | + nir_var_mem_ssbo)) { + enum zink_descriptor_type ztype; + const struct glsl_type *type = glsl_without_array(var->type); + if (var->data.mode == nir_var_mem_ubo) { + ztype = ZINK_DESCRIPTOR_TYPE_UBO; + var->data.descriptor_set = ztype; + var->data.binding = zink_binding(nir->info.stage, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + var->data.driver_location); + VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + int binding = var->data.binding; + + ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; + ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding; + ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; + ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; + ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index); + ret->num_bindings[ztype]++; + } else if (var->data.mode == nir_var_mem_ssbo) { + ztype = ZINK_DESCRIPTOR_TYPE_SSBO; + var->data.descriptor_set = ztype; + var->data.binding = zink_binding(nir->info.stage, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + var->data.driver_location); + ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; + ret->ssbos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index); + ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding; + ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; + ret->num_bindings[ztype]++; + } else { + assert(var->data.mode == nir_var_uniform); + if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) { + VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type); + ztype = zink_desc_type_from_vktype(vktype); + var->data.descriptor_set = ztype; + var->data.driver_location = var->data.binding; + var->data.binding = zink_binding(nir->info.stage, + vktype, + var->data.driver_location); + ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; + ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding; + ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; + if (glsl_type_is_array(var->type)) + ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type); + else + ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; + ret->num_bindings[ztype]++; + } + } + } + } + + ret->nir = nir; + if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings) + update_so_info(ret, so_info, nir->info.outputs_written, have_psiz); return ret; } void -zink_shader_free(struct zink_screen *screen, struct zink_shader *shader) +zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr, bool optimize) { - vkDestroyShaderModule(screen->dev, shader->shader_module, NULL); + struct zink_screen *screen = zink_screen(pscreen); + nir_shader *nir = nirptr; + + if (!screen->info.feats.features.shaderImageGatherExtended) { + nir_lower_tex_options tex_opts = {}; + tex_opts.lower_tg4_offsets = true; + NIR_PASS_V(nir, nir_lower_tex, &tex_opts); + } + NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false); + if (nir->info.stage == MESA_SHADER_GEOMETRY) + NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream); + optimize_nir(nir); + if (nir->info.num_ubos || nir->info.num_ssbos) + NIR_PASS_V(nir, nir_lower_dynamic_bo_access); + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + if (screen->driconf.inline_uniforms) + nir_find_inlinable_uniforms(nir); +} + +void +zink_shader_free(struct zink_context *ctx, struct zink_shader *shader) +{ + struct zink_screen *screen = zink_screen(ctx->base.screen); + set_foreach(shader->programs, entry) { + if (shader->nir->info.stage == MESA_SHADER_COMPUTE) { + struct zink_compute_program *comp = (void*)entry->key; + _mesa_hash_table_remove_key(ctx->compute_program_cache, &comp->shader->shader_id); + comp->shader = NULL; + bool in_use = comp == ctx->curr_compute; + if (in_use) + ctx->compute_stage = NULL; + if (zink_compute_program_reference(screen, &comp, NULL) && in_use) + ctx->curr_compute = NULL; + } else { + struct zink_gfx_program *prog = (void*)entry->key; + enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage); + assert(pstage < ZINK_SHADER_COUNT); + bool in_use = prog == ctx->curr_program; + if (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated) + _mesa_hash_table_remove_key(ctx->program_cache, prog->shaders); + prog->shaders[pstage] = NULL; + if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated) + /* automatically destroy generated tcs shaders when tes is destroyed */ + zink_shader_free(ctx, shader->generated); + if (in_use) { + ctx->gfx_pipeline_state.modules[pstage] = VK_NULL_HANDLE; + ctx->gfx_stages[pstage] = NULL; + } + if (zink_gfx_program_reference(screen, &prog, NULL) && in_use) + ctx->curr_program = NULL; + } + } + _mesa_set_destroy(shader->programs, NULL); + ralloc_free(shader->nir); FREE(shader); } + + +/* creating a passthrough tcs shader that's roughly: + +#version 150 +#extension GL_ARB_tessellation_shader : require + +in vec4 some_var[gl_MaxPatchVertices]; +out vec4 some_var_out; + +layout(push_constant) uniform tcsPushConstants { + layout(offset = 0) float TessLevelInner[2]; + layout(offset = 8) float TessLevelOuter[4]; +} u_tcsPushConstants; +layout(vertices = $vertices_per_patch) out; +void main() +{ + gl_TessLevelInner = u_tcsPushConstants.TessLevelInner; + gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter; + some_var_out = some_var[gl_InvocationID]; +} + +*/ +struct zink_shader * +zink_shader_tcs_create(struct zink_context *ctx, struct zink_shader *vs) +{ + unsigned vertices_per_patch = ctx->gfx_pipeline_state.vertices_per_patch; + struct zink_shader *ret = CALLOC_STRUCT(zink_shader); + ret->shader_id = 0; //special value for internal shaders + ret->programs = _mesa_pointer_set_create(NULL); + + nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &zink_screen(ctx->base.screen)->nir_options, NULL); + nir_function *fn = nir_function_create(nir, "main"); + fn->is_entrypoint = true; + nir_function_impl *impl = nir_function_impl_create(fn); + + nir_builder b; + nir_builder_init(&b, impl); + b.cursor = nir_before_block(nir_start_block(impl)); + + nir_ssa_def *invocation_id = nir_load_invocation_id(&b); + + nir_foreach_shader_out_variable(var, vs->nir) { + const struct glsl_type *type = var->type; + const struct glsl_type *in_type = var->type; + const struct glsl_type *out_type = var->type; + char buf[1024]; + snprintf(buf, sizeof(buf), "%s_out", var->name); + in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0); + out_type = glsl_array_type(type, vertices_per_patch, 0); + + nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name); + nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf); + out->data.location = in->data.location = var->data.location; + out->data.location_frac = in->data.location_frac = var->data.location_frac; + + /* gl_in[] receives values from equivalent built-in output + variables written by the vertex shader (section 2.14.7). Each array + element of gl_in[] is a structure holding values for a specific vertex of + the input patch. The length of gl_in[] is equal to the + implementation-dependent maximum patch size (gl_MaxPatchVertices). + - ARB_tessellation_shader + */ + for (unsigned i = 0; i < vertices_per_patch; i++) { + /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */ + nir_if *start_block = nir_push_if(&b, nir_ieq(&b, invocation_id, nir_imm_int(&b, i))); + nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id); + nir_ssa_def *load = nir_load_deref(&b, in_array_var); + nir_deref_instr *out_array_var = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, out), i); + nir_store_deref(&b, out_array_var, load, 0xff); + nir_pop_if(&b, start_block); + } + } + nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner"); + gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER; + gl_TessLevelInner->data.patch = 1; + nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter"); + gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER; + gl_TessLevelOuter->data.patch = 1; + + /* hacks so we can size these right for now */ + struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3); + /* just use a single blob for padding here because it's easier */ + fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0); + fields[0].name = ralloc_asprintf(nir, "padding"); + fields[0].offset = 0; + fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0); + fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner"); + fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level); + fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0); + fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter"); + fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level); + nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const, + glsl_struct_type(fields, 3, "struct", false), "pushconst"); + pushconst->data.location = VARYING_SLOT_VAR0; + + nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8); + nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16); + + for (unsigned i = 0; i < 2; i++) { + nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i); + nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff); + } + for (unsigned i = 0; i < 4; i++) { + nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i); + nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff); + } + + nir->info.tess.tcs_vertices_out = vertices_per_patch; + nir_validate_shader(nir, "created"); + + NIR_PASS_V(nir, nir_lower_regs_to_ssa); + optimize_nir(nir); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); + NIR_PASS_V(nir, lower_discard_if); + NIR_PASS_V(nir, nir_convert_from_ssa, true); + + ret->nir = nir; + ret->is_generated = true; + return ret; +} + +uint32_t +zink_binding(gl_shader_stage stage, VkDescriptorType type, int index) +{ + if (stage == MESA_SHADER_NONE) { + unreachable("not supported"); + } else { + switch (type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + assert(index < PIPE_MAX_CONSTANT_BUFFERS); + return (stage * PIPE_MAX_CONSTANT_BUFFERS) + index; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + assert(index < PIPE_MAX_SAMPLERS); + return (stage * PIPE_MAX_SAMPLERS) + index; + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + assert(index < PIPE_MAX_SHADER_BUFFERS); + return (stage * PIPE_MAX_SHADER_BUFFERS) + index; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + assert(index < PIPE_MAX_SHADER_IMAGES); + return (stage * PIPE_MAX_SHADER_IMAGES) + index; + + default: + unreachable("unexpected type"); + } + } +} |