Import Mesa 23.3.6

author: Jonathan Gray <jsg@cvs.openbsd.org> 2024-04-02 09:30:07 +0000
committer: Jonathan Gray <jsg@cvs.openbsd.org> 2024-04-02 09:30:07 +0000
commit: f54e142455cb3c9d1662dae7e096a32a47e5409b (patch)
tree: 440ecd46269f0eac25e349e1ed58f246490c5e26 /lib/mesa/src/gallium/drivers/zink/zink_compiler.c
parent: 36d8503c27530f68d655d3ef77a6eaa4dfd8ad65 (diff)
1 files changed, 1684 insertions, 1096 deletions
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_compiler.c b/lib/mesa/src/gallium/drivers/zink/zink_compiler.c
index eb4e1e593..e1411bcb8 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_compiler.c
+++ b/lib/mesa/src/gallium/drivers/zink/zink_compiler.c
@@ -40,7 +40,6 @@
 
 #include "nir/tgsi_to_nir.h"
 #include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_from_mesa.h"
 
 #include "util/u_memory.h"
 
@@ -65,7 +64,7 @@ copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
          copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
       }
    } else {
-      nir_ssa_def *load = nir_load_deref(b, src);
+      nir_def *load = nir_load_deref(b, src);
       nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
    }
 }
@@ -102,151 +101,25 @@ fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
 }
 
 static bool
-lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
+lower_basevertex_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
 {
-   if (instr->type != nir_instr_type_intrinsic)
-      return false;
-   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-   if (intr->intrinsic != nir_intrinsic_load_deref)
-      return false;
-   nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
-   if (var->data.mode != nir_var_shader_in)
-      return false;
-   if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
-      return false;
-
-   /* create second variable for the split */
-   nir_variable *var2 = nir_variable_clone(var, b->shader);
-   /* split new variable into second slot */
-   var2->data.driver_location++;
-   nir_shader_add_variable(b->shader, var2);
-
-   unsigned total_num_components = glsl_get_vector_elements(var->type);
-   /* new variable is the second half of the dvec */
-   var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
-   /* clamp original variable to a dvec2 */
-   var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
-
-   b->cursor = nir_after_instr(instr);
-
-   /* this is the first load instruction for the first half of the dvec3/4 components */
-   nir_ssa_def *load = nir_load_var(b, var);
-   /* this is the second load instruction for the second half of the dvec3/4 components */
-   nir_ssa_def *load2 = nir_load_var(b, var2);
-
-   nir_ssa_def *def[4];
-   /* create a new dvec3/4 comprised of all the loaded components from both variables */
-   def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
-   def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
-   def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
-   if (total_num_components == 4)
-      def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
-   nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
-   /* use the assembled dvec3/4 for all other uses of the load */
-   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
-                                  new_vec->parent_instr);
-
-   /* remove the original instr and its deref chain */
-   nir_instr *parent = intr->src[0].ssa->parent_instr;
-   nir_instr_remove(instr);
-   nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
-
-   return true;
-}
-
-/* mesa/gallium always provides UINT versions of 64bit formats:
- * - rewrite loads as 32bit vec loads
- * - cast back to 64bit
- */
-static bool
-lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
-{
-   if (instr->type != nir_instr_type_intrinsic)
-      return false;
-   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-   if (intr->intrinsic != nir_intrinsic_load_deref)
-      return false;
-   nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
-   if (var->data.mode != nir_var_shader_in)
-      return false;
-   if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
-      return false;
-
-   unsigned num_components = glsl_get_vector_elements(var->type);
-   enum glsl_base_type base_type;
-   switch (glsl_get_base_type(var->type)) {
-   case GLSL_TYPE_UINT64:
-      base_type = GLSL_TYPE_UINT;
-      break;
-   case GLSL_TYPE_INT64:
-      base_type = GLSL_TYPE_INT;
-      break;
-   case GLSL_TYPE_DOUBLE:
-      base_type = GLSL_TYPE_FLOAT;
-      break;
-   default:
-      unreachable("unknown 64-bit vertex attribute format!");
-   }
-   var->type = glsl_vector_type(base_type, num_components * 2);
-
-   b->cursor = nir_after_instr(instr);
-
-   nir_ssa_def *load = nir_load_var(b, var);
-   nir_ssa_def *casted[2];
-   for (unsigned i = 0; i < num_components; i++)
-     casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
-   nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
-
-   /* remove the original instr and its deref chain */
-   nir_instr *parent = intr->src[0].ssa->parent_instr;
-   nir_instr_remove(instr);
-   nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
-
-   return true;
-}
-
-/* "64-bit three- and four-component vectors consume two consecutive locations."
- *  - 14.1.4. Location Assignment
- *
- * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
- * are assigned to consecutive locations, loaded separately, and then assembled back into a
- * composite value that's used in place of the original loaded ssa src
- */
-static bool
-lower_64bit_vertex_attribs(nir_shader *shader)
-{
-   if (shader->info.stage != MESA_SHADER_VERTEX)
-      return false;
-
-   bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
-   progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
-   return progress;
-}
-
-static bool
-lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
-{
-   if (in->type != nir_instr_type_intrinsic)
-      return false;
-   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
    if (instr->intrinsic != nir_intrinsic_load_base_vertex)
       return false;
 
    b->cursor = nir_after_instr(&instr->instr);
-   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
+   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
-   nir_intrinsic_set_range(load, 4);
    load->num_components = 1;
-   nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
+   nir_def_init(&load->instr, &load->def, 1, 32);
    nir_builder_instr_insert(b, &load->instr);
 
-   nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
-                                          nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
-                                          &instr->dest.ssa,
+   nir_def *composite = nir_build_alu(b, nir_op_bcsel,
+                                          nir_build_alu(b, nir_op_ieq, &load->def, nir_imm_int(b, 1), NULL, NULL),
+                                          &instr->def,
                                           nir_imm_int(b, 0),
                                           NULL);
 
-   nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
+   nir_def_rewrite_uses_after(&instr->def, composite,
                                   composite->parent_instr);
    return true;
 }
@@ -260,28 +133,25 @@ lower_basevertex(nir_shader *shader)
    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
       return false;
 
-   return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
+   return nir_shader_intrinsics_pass(shader, lower_basevertex_instr,
+                                     nir_metadata_dominance, NULL);
 }
 
 
 static bool
-lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
+lower_drawid_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
 {
-   if (in->type != nir_instr_type_intrinsic)
-      return false;
-   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
    if (instr->intrinsic != nir_intrinsic_load_draw_id)
       return false;
 
    b->cursor = nir_before_instr(&instr->instr);
-   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
+   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
-   nir_intrinsic_set_range(load, 4);
    load->num_components = 1;
-   nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
+   nir_def_init(&load->instr, &load->def, 1, 32);
    nir_builder_instr_insert(b, &load->instr);
 
-   nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
+   nir_def_rewrite_uses(&instr->def, &load->def);
 
    return true;
 }
@@ -295,7 +165,8 @@ lower_drawid(nir_shader *shader)
    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
       return false;
 
-   return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
+   return nir_shader_intrinsics_pass(shader, lower_drawid_instr,
+                                     nir_metadata_dominance, NULL);
 }
 
 struct lower_gl_point_state {
@@ -307,7 +178,7 @@ static bool
 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
 {
    struct lower_gl_point_state *state = data;
-   nir_ssa_def *vp_scale, *pos;
+   nir_def *vp_scale, *pos;
 
    if (instr->type != nir_instr_type_intrinsic)
       return false;
@@ -329,34 +200,34 @@ lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
    b->cursor = nir_before_instr(instr);
 
    // viewport-map endpoints
-   nir_ssa_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
-   vp_scale = nir_load_push_constant(b, 2, 32, vp_const_pos, .base = 1, .range = 2);
+   nir_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
+   vp_scale = nir_load_push_constant_zink(b, 2, 32, vp_const_pos);
 
    // Load point info values
-   nir_ssa_def *point_size = nir_load_var(b, state->gl_point_size);
-   nir_ssa_def *point_pos = nir_load_var(b, state->gl_pos_out);
+   nir_def *point_size = nir_load_var(b, state->gl_point_size);
+   nir_def *point_pos = nir_load_var(b, state->gl_pos_out);
 
    // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
-   nir_ssa_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
+   nir_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
    w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
    // halt_w_delta = w_delta / 2
-   nir_ssa_def *half_w_delta = nir_fmul(b, w_delta, nir_imm_float(b, 0.5));
+   nir_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5);
 
    // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
-   nir_ssa_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
+   nir_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
    h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
    // halt_h_delta = h_delta / 2
-   nir_ssa_def *half_h_delta = nir_fmul(b, h_delta, nir_imm_float(b, 0.5));
+   nir_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5);
 
-   nir_ssa_def *point_dir[4][2] = {
+   nir_def *point_dir[4][2] = {
       { nir_imm_float(b, -1), nir_imm_float(b, -1) },
       { nir_imm_float(b, -1), nir_imm_float(b, 1) },
       { nir_imm_float(b, 1), nir_imm_float(b, -1) },
       { nir_imm_float(b, 1), nir_imm_float(b, 1) }
    };
 
-   nir_ssa_def *point_pos_x = nir_channel(b, point_pos, 0);
-   nir_ssa_def *point_pos_y = nir_channel(b, point_pos, 1);
+   nir_def *point_pos_x = nir_channel(b, point_pos, 0);
+   nir_def *point_pos_y = nir_channel(b, point_pos, 1);
 
    for (size_t i = 0; i < 4; i++) {
       pos = nir_vec4(b,
@@ -381,9 +252,8 @@ static bool
 lower_gl_point_gs(nir_shader *shader)
 {
    struct lower_gl_point_state state;
-   nir_builder b;
 
-   shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
+   shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
    shader->info.gs.vertices_out *= 4;
 
    // Gets the gl_Position in and out
@@ -398,10 +268,6 @@ lower_gl_point_gs(nir_shader *shader)
    if (!state.gl_pos_out || !state.gl_point_size)
       return false;
 
-   nir_function_impl *entry = nir_shader_get_entrypoint(shader);
-   nir_builder_init(&b, entry);
-   b.cursor = nir_before_cf_list(&entry->body);
-
    return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
                                        nir_metadata_dominance, &state);
 }
@@ -416,14 +282,14 @@ struct lower_pv_mode_state {
    unsigned prim;
 };
 
-static nir_ssa_def*
+static nir_def*
 lower_pv_mode_gs_ring_index(nir_builder *b,
                             struct lower_pv_mode_state *state,
-                            nir_ssa_def *index)
+                            nir_def *index)
 {
-   nir_ssa_def *ring_offset = nir_load_var(b, state->ring_offset);
-   return nir_imod(b, nir_iadd(b, index, ring_offset),
-                      nir_imm_int(b, state->ring_size));
+   nir_def *ring_offset = nir_load_var(b, state->ring_offset);
+   return nir_imod_imm(b, nir_iadd(b, index, ring_offset),
+                          state->ring_size);
 }
 
 /* Given the final deref of chain of derefs this function will walk up the chain
@@ -441,7 +307,6 @@ replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
    case nir_deref_type_var:
       return new;
    case nir_deref_type_array:
-      assert(old->arr.index.is_ssa);
       return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa);
    case nir_deref_type_struct:
       return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index);
@@ -466,9 +331,8 @@ lower_pv_mode_gs_store(nir_builder *b,
       gl_varying_slot location = var->data.location;
       unsigned location_frac = var->data.location_frac;
       assert(state->varyings[location][location_frac]);
-      assert(intrin->src[1].is_ssa);
-      nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
-      nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
+      nir_def *pos_counter = nir_load_var(b, state->pos_counter);
+      nir_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
       nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location][location_frac]);
       nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index);
       // recreate the chain of deref that lead to the store.
@@ -484,10 +348,10 @@ lower_pv_mode_gs_store(nir_builder *b,
 static void
 lower_pv_mode_emit_rotated_prim(nir_builder *b,
                                 struct lower_pv_mode_state *state,
-                                nir_ssa_def *current_vertex)
+                                nir_def *current_vertex)
 {
-   nir_ssa_def *two = nir_imm_int(b, 2);
-   nir_ssa_def *three = nir_imm_int(b, 3);
+   nir_def *two = nir_imm_int(b, 2);
+   nir_def *three = nir_imm_int(b, 3);
    bool is_triangle = state->primitive_vert_count == 3;
    /* This shader will always see the last three vertices emitted by the user gs.
     * The following table is used to to rotate primitives within a strip generated
@@ -505,17 +369,17 @@ lower_pv_mode_emit_rotated_prim(nir_builder *b,
     *
     * odd or even primitive within draw
     */
-   nir_ssa_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
+   nir_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
    for (unsigned i = 0; i < state->primitive_vert_count; i++) {
       /* odd or even triangle within strip emitted by user GS
        * this is handled using the table
        */
-      nir_ssa_def *odd_user_prim = nir_imod(b, current_vertex, two);
+      nir_def *odd_user_prim = nir_imod(b, current_vertex, two);
       unsigned offset_even = vert_maps[is_triangle][0][i];
       unsigned offset_odd = vert_maps[is_triangle][1][i];
-      nir_ssa_def *offset_even_value = nir_imm_int(b, offset_even);
-      nir_ssa_def *offset_odd_value = nir_imm_int(b, offset_odd);
-      nir_ssa_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
+      nir_def *offset_even_value = nir_imm_int(b, offset_even);
+      nir_def *offset_odd_value = nir_imm_int(b, offset_odd);
+      nir_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
                                             offset_odd_value, offset_even_value);
       /* Here we account for how triangles are provided to the gs from a strip.
        * For even primitives we rotate by 3, meaning we do nothing.
@@ -538,7 +402,7 @@ lower_pv_mode_emit_rotated_prim(nir_builder *b,
          gl_varying_slot location = var->data.location;
          unsigned location_frac = var->data.location_frac;
          if (state->varyings[location][location_frac]) {
-            nir_ssa_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
+            nir_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
             nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location][location_frac]), index);
             copy_vars(b, nir_build_deref_var(b, var), value);
          }
@@ -555,7 +419,7 @@ lower_pv_mode_gs_emit_vertex(nir_builder *b,
    b->cursor = nir_before_instr(&intrin->instr);
 
    // increment pos_counter
-   nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
+   nir_def *pos_counter = nir_load_var(b, state->pos_counter);
    nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
 
    nir_instr_remove(&intrin->instr);
@@ -569,10 +433,10 @@ lower_pv_mode_gs_end_primitive(nir_builder *b,
 {
    b->cursor = nir_before_instr(&intrin->instr);
 
-   nir_ssa_def *pos_counter = nir_load_var(b, state->pos_counter);
+   nir_def *pos_counter = nir_load_var(b, state->pos_counter);
    nir_push_loop(b);
    {
-      nir_ssa_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
+      nir_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
       nir_push_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
                                 nir_imm_int(b, state->primitive_vert_count)));
       nir_jump(b, nir_jump_break);
@@ -621,14 +485,14 @@ lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
 }
 
 static unsigned int
-lower_pv_mode_vertices_for_prim(enum shader_prim prim)
+lower_pv_mode_vertices_for_prim(enum mesa_prim prim)
 {
    switch (prim) {
-   case SHADER_PRIM_POINTS:
+   case MESA_PRIM_POINTS:
       return 1;
-   case SHADER_PRIM_LINE_STRIP:
+   case MESA_PRIM_LINE_STRIP:
       return 2;
-   case SHADER_PRIM_TRIANGLE_STRIP:
+   case MESA_PRIM_TRIANGLE_STRIP:
       return 3;
    default:
       unreachable("unsupported primitive for gs output");
@@ -643,8 +507,7 @@ lower_pv_mode_gs(nir_shader *shader, unsigned prim)
    memset(state.varyings, 0, sizeof(state.varyings));
 
    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
-   nir_builder_init(&b, entry);
-   b.cursor = nir_before_cf_list(&entry->body);
+   b = nir_builder_at(nir_before_impl(entry));
 
    state.primitive_vert_count =
       lower_pv_mode_vertices_for_prim(shader->info.gs.output_primitive);
@@ -699,12 +562,12 @@ struct lower_line_stipple_state {
    bool line_rectangular;
 };
 
-static nir_ssa_def *
-viewport_map(nir_builder *b, nir_ssa_def *vert,
-             nir_ssa_def *scale)
+static nir_def *
+viewport_map(nir_builder *b, nir_def *vert,
+             nir_def *scale)
 {
-   nir_ssa_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
-   nir_ssa_def *ndc_point = nir_fmul(b, nir_channels(b, vert, 0x3),
+   nir_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
+   nir_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2),
                                         w_recip);
    return nir_fmul(b, ndc_point, scale);
 }
@@ -725,21 +588,19 @@ lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
 
    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
    // viewport-map endpoints
-   nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
-                                                  nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
-                                                  .base = 1,
-                                                  .range = 2);
-   nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
-   nir_ssa_def *curr = nir_load_var(b, state->pos_out);
+   nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
+                                                       nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
+   nir_def *prev = nir_load_var(b, state->prev_pos);
+   nir_def *curr = nir_load_var(b, state->pos_out);
    prev = viewport_map(b, prev, vp_scale);
    curr = viewport_map(b, curr, vp_scale);
 
    // calculate length of line
-   nir_ssa_def *len;
+   nir_def *len;
    if (state->line_rectangular)
       len = nir_fast_distance(b, prev, curr);
    else {
-      nir_ssa_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
+      nir_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
       len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
    }
    // update stipple_counter
@@ -796,8 +657,7 @@ lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
    state.line_rectangular = line_rectangular;
    // initialize pos_counter and stipple_counter
    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
-   nir_builder_init(&b, entry);
-   b.cursor = nir_before_cf_list(&entry->body);
+   b = nir_builder_at(nir_before_impl(entry));
    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
    nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
 
@@ -810,7 +670,7 @@ lower_line_stipple_fs(nir_shader *shader)
 {
    nir_builder b;
    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
-   nir_builder_init(&b, entry);
+   b = nir_builder_at(nir_after_impl(entry));
 
    // create stipple counter
    nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
@@ -831,41 +691,38 @@ lower_line_stipple_fs(nir_shader *shader)
       sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
    }
 
-   b.cursor = nir_after_cf_list(&entry->body);
-
-   nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
-                                                 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
-                                                 .base = 1);
-   nir_ssa_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
+   nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
+                                                      nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
+   nir_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
    pattern = nir_iand_imm(&b, pattern, 0xffff);
 
-   nir_ssa_def *sample_mask_in = nir_load_sample_mask_in(&b);
+   nir_def *sample_mask_in = nir_load_sample_mask_in(&b);
    nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
    nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
    nir_store_var(&b, v, sample_mask_in, 1);
    nir_store_var(&b, sample_mask, sample_mask_in, 1);
    nir_push_loop(&b);
    {
-      nir_ssa_def *value = nir_load_var(&b, v);
-      nir_ssa_def *index = nir_ufind_msb(&b, value);
-      nir_ssa_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
-      nir_ssa_def *new_value = nir_ixor(&b, value, index_mask);
+      nir_def *value = nir_load_var(&b, v);
+      nir_def *index = nir_ufind_msb(&b, value);
+      nir_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
+      nir_def *new_value = nir_ixor(&b, value, index_mask);
       nir_store_var(&b, v, new_value,  1);
       nir_push_if(&b, nir_ieq_imm(&b, value, 0));
       nir_jump(&b, nir_jump_break);
       nir_pop_if(&b, NULL);
 
-      nir_ssa_def *stipple_pos =
+      nir_def *stipple_pos =
          nir_interp_deref_at_sample(&b, 1, 32,
-            &nir_build_deref_var(&b, stipple)->dest.ssa, index);
+            &nir_build_deref_var(&b, stipple)->def, index);
       stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
                                  nir_imm_float(&b, 16.0));
       stipple_pos = nir_f2i32(&b, stipple_pos);
-      nir_ssa_def *bit =
+      nir_def *bit =
          nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
       nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
       {
-         nir_ssa_def *value = nir_load_var(&b, sample_mask);
+         nir_def *value = nir_load_var(&b, sample_mask);
          value = nir_ixor(&b, value, index_mask);
          nir_store_var(&b, sample_mask, value, 1);
       }
@@ -901,7 +758,6 @@ lower_line_smooth_gs_store(nir_builder *b,
       unsigned location_frac = var->data.location_frac;
       if (location != VARYING_SLOT_POS) {
          assert(state->varyings[location]);
-         assert(intrin->src[1].is_ssa);
          nir_store_var(b, state->varyings[location][location_frac],
                        intrin->src[1].ssa,
                        nir_intrinsic_write_mask(intrin));
@@ -921,29 +777,26 @@ lower_line_smooth_gs_emit_vertex(nir_builder *b,
    b->cursor = nir_before_instr(&intrin->instr);
 
    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
-   nir_ssa_def *vp_scale = nir_load_push_constant(b, 2, 32,
-                                                  nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE),
-                                                  .base = 1,
-                                                  .range = 2);
-   nir_ssa_def *prev = nir_load_var(b, state->prev_pos);
-   nir_ssa_def *curr = nir_load_var(b, state->pos_out);
-   nir_ssa_def *prev_vp = viewport_map(b, prev, vp_scale);
-   nir_ssa_def *curr_vp = viewport_map(b, curr, vp_scale);
-
-   nir_ssa_def *width = nir_load_push_constant(b, 1, 32,
-                                               nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH),
-                                               .base = 1);
-   nir_ssa_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
+   nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
+                                                       nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
+   nir_def *prev = nir_load_var(b, state->prev_pos);
+   nir_def *curr = nir_load_var(b, state->pos_out);
+   nir_def *prev_vp = viewport_map(b, prev, vp_scale);
+   nir_def *curr_vp = viewport_map(b, curr, vp_scale);
+
+   nir_def *width = nir_load_push_constant_zink(b, 1, 32,
+                                                    nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH));
+   nir_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
 
    const unsigned yx[2] = { 1, 0 };
-   nir_ssa_def *vec = nir_fsub(b, curr_vp, prev_vp);
-   nir_ssa_def *len = nir_fast_length(b, vec);
-   nir_ssa_def *dir = nir_normalize(b, vec);
-   nir_ssa_def *half_length = nir_fmul_imm(b, len, 0.5);
+   nir_def *vec = nir_fsub(b, curr_vp, prev_vp);
+   nir_def *len = nir_fast_length(b, vec);
+   nir_def *dir = nir_normalize(b, vec);
+   nir_def *half_length = nir_fmul_imm(b, len, 0.5);
    half_length = nir_fadd_imm(b, half_length, 0.5);
 
-   nir_ssa_def *vp_scale_rcp = nir_frcp(b, vp_scale);
-   nir_ssa_def *tangent =
+   nir_def *vp_scale_rcp = nir_frcp(b, vp_scale);
+   nir_def *tangent =
       nir_fmul(b,
                nir_fmul(b,
                         nir_swizzle(b, dir, yx, 2),
@@ -953,7 +806,7 @@ lower_line_smooth_gs_emit_vertex(nir_builder *b,
    tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
    dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
 
-   nir_ssa_def *line_offets[8] = {
+   nir_def *line_offets[8] = {
       nir_fadd(b, tangent, nir_fneg(b, dir)),
       nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
       tangent,
@@ -963,9 +816,9 @@ lower_line_smooth_gs_emit_vertex(nir_builder *b,
       nir_fadd(b, tangent, dir),
       nir_fadd(b, nir_fneg(b, tangent), dir),
    };
-   nir_ssa_def *line_coord =
+   nir_def *line_coord =
       nir_vec4(b, half_width, half_width, half_length, half_length);
-   nir_ssa_def *line_coords[8] = {
+   nir_def *line_coords[8] = {
       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,  -1,  1)),
       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,  -1,  1)),
       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
@@ -1125,12 +978,11 @@ lower_line_smooth_gs(nir_shader *shader)
 
    // initialize pos_counter
    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
-   nir_builder_init(&b, entry);
-   b.cursor = nir_before_cf_list(&entry->body);
+   b = nir_builder_at(nir_before_impl(entry));
    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
 
    shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
-   shader->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
+   shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
 
    return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
                                        nir_metadata_dominance, &state);
@@ -1159,11 +1011,9 @@ lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
 
       // initialize stipple_pattern
       nir_function_impl *entry = nir_shader_get_entrypoint(shader);
-      nir_builder_init(&b, entry);
-      b.cursor = nir_before_cf_list(&entry->body);
-      nir_ssa_def *pattern = nir_load_push_constant(&b, 1, 32,
-                                                   nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN),
-                                                   .base = 1);
+      b = nir_builder_at(nir_before_impl(entry));
+      nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
+                                                         nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
       nir_store_var(&b, stipple_pattern, pattern, 1);
    }
 
@@ -1195,8 +1045,8 @@ lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
        alu_instr->op != nir_op_unpack_64_2x32)
       return false;
    b->cursor = nir_before_instr(&alu_instr->instr);
-   nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
-   nir_ssa_def *dest;
+   nir_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
+   nir_def *dest;
    switch (alu_instr->op) {
    case nir_op_pack_64_2x32:
       dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
@@ -1207,7 +1057,7 @@ lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
    default:
       unreachable("Impossible opcode");
    }
-   nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest);
+   nir_def_rewrite_uses(&alu_instr->def, dest);
    nir_instr_remove(&alu_instr->instr);
    return true;
 }
@@ -1221,16 +1071,15 @@ lower_64bit_pack(nir_shader *shader)
 
 nir_shader *
 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
-                               const nir_shader *prev_stage,
-                               int last_pv_vert_offset)
+                               const nir_shader *prev_stage)
 {
    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
                                                   options,
                                                   "filled quad gs");
 
    nir_shader *nir = b.shader;
-   nir->info.gs.input_primitive = SHADER_PRIM_LINES_ADJACENCY;
-   nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
+   nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY;
+   nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
    nir->info.gs.vertices_in = 4;
    nir->info.gs.vertices_out = 6;
    nir->info.gs.invocations = 1;
@@ -1287,13 +1136,11 @@ zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
 
    int mapping_first[] = {0, 1, 2, 0, 2, 3};
    int mapping_last[] = {0, 1, 3, 1, 2, 3};
-   nir_ssa_def *last_pv_vert_def = nir_load_ubo(&b, 1, 32,
-                                                nir_imm_int(&b, 0), nir_imm_int(&b, last_pv_vert_offset),
-                                                .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0);
+   nir_def *last_pv_vert_def = nir_load_provoking_last(&b);
    last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
    for (unsigned i = 0; i < 6; ++i) {
       /* swap indices 2 and 3 */
-      nir_ssa_def *idx = nir_bcsel(&b, last_pv_vert_def,
+      nir_def *idx = nir_bcsel(&b, last_pv_vert_def,
                                    nir_imm_int(&b, mapping_last[i]),
                                    nir_imm_int(&b, mapping_first[i]));
       /* Copy inputs to outputs. */
@@ -1315,6 +1162,41 @@ zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
    return nir;
 }
 
+static bool
+lower_system_values_to_inlined_uniforms_instr(nir_builder *b,
+                                              nir_intrinsic_instr *intrin,
+                                              void *data)
+{
+   int inlined_uniform_offset;
+   switch (intrin->intrinsic) {
+   case nir_intrinsic_load_flat_mask:
+      inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
+      break;
+   case nir_intrinsic_load_provoking_last:
+      inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
+      break;
+   default:
+      return false;
+   }
+
+   b->cursor = nir_before_instr(&intrin->instr);
+   nir_def *new_dest_def = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
+                                            nir_imm_int(b, inlined_uniform_offset),
+                                            .align_mul = 4, .align_offset = 0,
+                                            .range_base = 0, .range = ~0);
+   nir_def_rewrite_uses(&intrin->def, new_dest_def);
+   nir_instr_remove(&intrin->instr);
+   return true;
+}
+
+bool
+zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
+{
+   return nir_shader_intrinsics_pass(nir,
+                                       lower_system_values_to_inlined_uniforms_instr,
+                                       nir_metadata_dominance, NULL);
+}
+
 void
 zink_screen_init_compiler(struct zink_screen *screen)
 {
@@ -1328,6 +1210,9 @@ zink_screen_init_compiler(struct zink_screen *screen)
       .lower_flrp32 = true,
       .lower_fpow = true,
       .lower_fsat = true,
+      .lower_hadd = true,
+      .lower_iadd_sat = true,
+      .lower_fisnormal = true,
       .lower_extract_byte = true,
       .lower_extract_word = true,
       .lower_insert_byte = true,
@@ -1342,21 +1227,20 @@ zink_screen_init_compiler(struct zink_screen *screen)
       .lower_ldexp = true,
 
       .lower_mul_high = true,
-      .lower_rotate = true,
       .lower_uadd_carry = true,
       .lower_usub_borrow = true,
       .lower_uadd_sat = true,
       .lower_usub_sat = true,
       .lower_vector_cmp = true,
       .lower_int64_options = 0,
-      .lower_doubles_options = 0,
+      .lower_doubles_options = nir_lower_dround_even,
       .lower_uniforms_to_ubo = true,
       .has_fsub = true,
       .has_isub = true,
-      .has_txs = true,
       .lower_mul_2x32_64 = true,
       .support_16bit_alu = true, /* not quite what it sounds like */
       .max_unroll_iterations = 0,
+      .use_interpolated_input_intrinsics = true,
    };
 
    screen->nir_options = default_options;
@@ -1413,10 +1297,10 @@ zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
 
 
 static bool
-dest_is_64bit(nir_dest *dest, void *state)
+def_is_64bit(nir_def *def, void *state)
 {
    bool *lower = (bool *)state;
-   if (dest && (nir_dest_bit_size(*dest) == 64)) {
+   if (def && (def->bit_size == 64)) {
       *lower = true;
       return false;
    }
@@ -1442,7 +1326,7 @@ filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
     * doesn't have const variants, so do the ugly const_cast here. */
    nir_instr *instr = (nir_instr *)const_instr;
 
-   nir_foreach_dest(instr, dest_is_64bit, &lower);
+   nir_foreach_def(instr, def_is_64bit, &lower);
    if (lower)
       return true;
    nir_foreach_src(instr, src_is_64bit, &lower);
@@ -1514,25 +1398,25 @@ bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
       return false;
    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
    nir_variable *var = NULL;
-   nir_ssa_def *offset = NULL;
+   nir_def *offset = NULL;
    bool is_load = true;
    b->cursor = nir_before_instr(instr);
 
    switch (intr->intrinsic) {
    case nir_intrinsic_store_ssbo:
-      var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
+      var = bo->ssbo[intr->def.bit_size >> 4];
       offset = intr->src[2].ssa;
       is_load = false;
       break;
    case nir_intrinsic_load_ssbo:
-      var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
+      var = bo->ssbo[intr->def.bit_size >> 4];
       offset = intr->src[1].ssa;
       break;
    case nir_intrinsic_load_ubo:
       if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
-         var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
+         var = bo->uniforms[intr->def.bit_size >> 4];
       else
-         var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
+         var = bo->ubo[intr->def.bit_size >> 4];
       offset = intr->src[1].ssa;
       break;
    default:
@@ -1550,18 +1434,18 @@ bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
       return false;
 
    unsigned rewrites = 0;
-   nir_ssa_def *result[2];
+   nir_def *result[2];
    for (unsigned i = 0; i < intr->num_components; i++) {
       if (offset_bytes + i >= size) {
          rewrites++;
          if (is_load)
-            result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
+            result[i] = nir_imm_zero(b, 1, intr->def.bit_size);
       }
    }
    assert(rewrites == intr->num_components);
    if (is_load) {
-      nir_ssa_def *load = nir_vec(b, result, intr->num_components);
-      nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
+      nir_def *load = nir_vec(b, result, intr->num_components);
+      nir_def_rewrite_uses(&intr->def, load);
    }
    nir_instr_remove(instr);
    return true;
@@ -1575,7 +1459,7 @@ bound_bo_access(nir_shader *shader, struct zink_shader *zs)
 }
 
 static void
-optimize_nir(struct nir_shader *s, struct zink_shader *zs)
+optimize_nir(struct nir_shader *s, struct zink_shader *zs, bool can_shrink)
 {
    bool progress;
    do {
@@ -1604,6 +1488,8 @@ optimize_nir(struct nir_shader *s, struct zink_shader *zs)
       NIR_PASS(progress, s, zink_nir_lower_b2b);
       if (zs)
          NIR_PASS(progress, s, bound_bo_access, zs);
+      if (can_shrink)
+         NIR_PASS(progress, s, nir_opt_shrink_vectors);
    } while (progress);
 
    do {
@@ -1631,7 +1517,7 @@ lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
    if (intr->intrinsic != nir_intrinsic_load_deref)
       return false;
-   nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+   nir_variable *var = nir_intrinsic_get_var(intr, 0);
    if (!var->data.fb_fetch_output)
       return false;
    b->cursor = nir_after_instr(instr);
@@ -1648,10 +1534,10 @@ lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
    enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
    fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
    nir_shader_add_variable(b->shader, fbfetch);
-   nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
-   nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
-   nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
-   nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
+   nir_def *deref = &nir_build_deref_var(b, fbfetch)->def;
+   nir_def *sample = ms ? nir_load_sample_id(b) : nir_undef(b, 1, 32);
+   nir_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
+   nir_def_rewrite_uses(&intr->def, load);
    return true;
 }
 
@@ -1694,8 +1580,7 @@ lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
    if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
       return false;
 
-   assert(lod_src.is_ssa);
-   nir_ssa_def *lod = lod_src.ssa;
+   nir_def *lod = lod_src.ssa;
 
    int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
    int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
@@ -1706,17 +1591,17 @@ lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
    levels->dest_type = nir_type_int | lod->bit_size;
    if (offset_idx >= 0) {
       levels->src[0].src_type = nir_tex_src_texture_offset;
-      nir_src_copy(&levels->src[0].src, &txf->src[offset_idx].src, &levels->instr);
+      levels->src[0].src = nir_src_for_ssa(txf->src[offset_idx].src.ssa);
    }
    if (handle_idx >= 0) {
       levels->src[!!(offset_idx >= 0)].src_type = nir_tex_src_texture_handle;
-      nir_src_copy(&levels->src[!!(offset_idx >= 0)].src, &txf->src[handle_idx].src, &levels->instr);
+      levels->src[!!(offset_idx >= 0)].src = nir_src_for_ssa(txf->src[handle_idx].src.ssa);
    }
-   nir_ssa_dest_init(&levels->instr, &levels->dest,
-                     nir_tex_instr_dest_size(levels), 32, NULL);
+   nir_def_init(&levels->instr, &levels->def,
+                nir_tex_instr_dest_size(levels), 32);
    nir_builder_instr_insert(b, &levels->instr);
 
-   nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->dest.ssa));
+   nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->def));
    nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
    nir_builder_instr_insert(b, &new_txf->instr);
 
@@ -1725,12 +1610,12 @@ lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
    unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
    oob_values[3] = (txf->dest_type & nir_type_float) ?
                    nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
-   nir_ssa_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
+   nir_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
 
    nir_pop_if(b, lod_oob_else);
-   nir_ssa_def *robust_txf = nir_if_phi(b, &new_txf->dest.ssa, oob_val);
+   nir_def *robust_txf = nir_if_phi(b, &new_txf->def, oob_val);
 
-   nir_ssa_def_rewrite_uses(&txf->dest.ssa, robust_txf);
+   nir_def_rewrite_uses(&txf->def, robust_txf);
    nir_instr_remove_v(in);
    return true;
 }
@@ -1759,48 +1644,31 @@ check_psiz(struct nir_shader *s)
 }
 
 static nir_variable *
-find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
-{
-   unsigned found = 0;
-   if (!location_frac && location != VARYING_SLOT_PSIZ) {
-      nir_foreach_shader_out_variable(var, nir) {
-         if (var->data.location == location)
-            found++;
-      }
-   }
-   if (found) {
-      /* multiple variables found for this location: find the biggest one */
-      nir_variable *out = NULL;
-      unsigned slots = 0;
-      nir_foreach_shader_out_variable(var, nir) {
-         if (var->data.location == location) {
-            unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
-            if (count_slots > slots) {
-               slots = count_slots;
-               out = var;
-            }
-         }
-      }
-      return out;
-   } else {
-      /* only one variable found or this is location_frac */
-      nir_foreach_shader_out_variable(var, nir) {
-         if (var->data.location == location &&
-             (var->data.location_frac == location_frac ||
-              (glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
-            if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
-               return var;
-         }
+find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz, nir_variable_mode mode)
+{
+   assert((int)location >= 0);
+
+   nir_foreach_variable_with_modes(var, nir, mode) {
+      if (var->data.location == location && (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)) {
+         unsigned num_components = glsl_get_vector_elements(var->type);
+         if (glsl_type_is_64bit(glsl_without_array(var->type)))
+            num_components *= 2;
+         if (var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0)
+            num_components = glsl_get_aoa_size(var->type);
+         if (var->data.location_frac <= location_frac &&
+               var->data.location_frac + num_components > location_frac)
+            return var;
       }
    }
    return NULL;
 }
 
 static bool
-is_inlined(const bool *inlined, const struct pipe_stream_output *output)
+is_inlined(const bool *inlined, const nir_xfb_output_info *output)
 {
-   for (unsigned i = 0; i < output->num_components; i++)
-      if (!inlined[output->start_component + i])
+   unsigned num_components = util_bitcount(output->component_mask);
+   for (unsigned i = 0; i < num_components; i++)
+      if (!inlined[output->component_offset + i])
          return false;
    return true;
 }
@@ -1888,87 +1756,98 @@ get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
    return num_components;
 }
 
-static const struct pipe_stream_output *
-find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
+static unsigned
+get_var_slot_count(nir_shader *nir, nir_variable *var)
+{
+   assert(var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out);
+   const struct glsl_type *type = var->type;
+   if (nir_is_arrayed_io(var, nir->info.stage))
+      type = glsl_get_array_element(type);
+   unsigned slot_count = 0;
+   if (var->data.location >= VARYING_SLOT_VAR0)
+      slot_count = glsl_count_vec4_slots(type, false, false);
+   else if (glsl_type_is_array(type))
+      slot_count = DIV_ROUND_UP(glsl_get_aoa_size(type), 4);
+   else
+      slot_count = 1;
+   return slot_count;
+}
+
+
+static const nir_xfb_output_info *
+find_packed_output(const nir_xfb_info *xfb_info, unsigned slot)
 {
-   for (unsigned i = 0; i < so_info->num_outputs; i++) {
-      const struct pipe_stream_output *packed_output = &so_info->output[i];
-      if (reverse_map[packed_output->register_index] == slot)
+   for (unsigned i = 0; i < xfb_info->output_count; i++) {
+      const nir_xfb_output_info *packed_output = &xfb_info->outputs[i];
+      if (packed_output->location == slot)
          return packed_output;
    }
    return NULL;
 }
 
 static void
-update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream_output_info *so_info,
-               uint64_t outputs_written, bool have_psiz)
-{
-   uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
-   unsigned slot = 0;
-   /* semi-copied from iris */
-   while (outputs_written) {
-      int bit = u_bit_scan64(&outputs_written);
-      /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
-      if (bit == VARYING_SLOT_PSIZ && !have_psiz)
-         continue;
-      reverse_map[slot++] = bit;
-   }
-
-   bool have_fake_psiz = false;
-   nir_foreach_shader_out_variable(var, nir) {
-      if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
-         have_fake_psiz = true;
-   }
-
+update_so_info(struct zink_shader *zs, nir_shader *nir, uint64_t outputs_written, bool have_psiz)
+{
    bool inlined[VARYING_SLOT_MAX][4] = {0};
    uint64_t packed = 0;
    uint8_t packed_components[VARYING_SLOT_MAX] = {0};
    uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
    uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
    uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
-   nir_variable *psiz = NULL;
-   for (unsigned i = 0; i < so_info->num_outputs; i++) {
-      const struct pipe_stream_output *output = &so_info->output[i];
-      unsigned slot = reverse_map[output->register_index];
+   for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
+      const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
+      unsigned xfb_components = util_bitcount(output->component_mask);
       /* always set stride to be used during draw */
-      zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
+      zs->sinfo.stride[output->buffer] = nir->xfb_info->buffers[output->buffer].stride;
       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
-         nir_variable *var = NULL;
-         unsigned so_slot;
-         while (!var)
-            var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
-         if (var->data.location == VARYING_SLOT_PSIZ)
-            psiz = var;
-         so_slot = slot + 1;
-         slot = reverse_map[output->register_index];
-         if (var->data.explicit_xfb_buffer) {
-            /* handle dvec3 where gallium splits streamout over 2 registers */
-            for (unsigned j = 0; j < output->num_components; j++)
-               inlined[slot][output->start_component + j] = true;
-         }
-         if (is_inlined(inlined[slot], output))
-            continue;
-         bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
-         unsigned num_components = get_slot_components(var, slot, so_slot);
-         /* if this is the entire variable, try to blast it out during the initial declaration
-          * structs must be handled later to ensure accurate analysis
-          */
-         if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
-            var->data.explicit_xfb_buffer = 1;
-            var->data.xfb.buffer = output->output_buffer;
-            var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
-            var->data.offset = output->dst_offset * 4;
-            var->data.stream = output->stream;
-            for (unsigned j = 0; j < output->num_components; j++)
-               inlined[slot][output->start_component + j] = true;
-         } else {
-            /* otherwise store some metadata for later */
-            packed |= BITFIELD64_BIT(slot);
-            packed_components[slot] += output->num_components;
-            packed_streams[slot] |= BITFIELD_BIT(output->stream);
-            packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
-            for (unsigned j = 0; j < output->num_components; j++)
-               packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
+         for (unsigned c = 0; !is_inlined(inlined[output->location], output) && c < xfb_components; c++) {
+            unsigned slot = output->location;
+            if (inlined[slot][output->component_offset + c])
+               continue;
+            nir_variable *var = NULL;
+            while (!var && slot < VARYING_SLOT_TESS_MAX)
+               var = find_var_with_location_frac(nir, slot--, output->component_offset + c, have_psiz, nir_var_shader_out);
+            slot = output->location;
+            unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
+            if (!var || var->data.location > slot || var->data.location + slot_count <= slot) {
+               /* if no variable is found for the xfb output, no output exists */
+               inlined[slot][c + output->component_offset] = true;
+               continue;
+            }
+            if (var->data.explicit_xfb_buffer) {
+               /* handle dvec3 where gallium splits streamout over 2 registers */
+               for (unsigned j = 0; j < xfb_components; j++)
+                  inlined[slot][c + output->component_offset + j] = true;
+            }
+            if (is_inlined(inlined[slot], output))
+               continue;
+            assert(!glsl_type_is_array(var->type) || var->data.location == VARYING_SLOT_CLIP_DIST0 || var->data.location == VARYING_SLOT_CULL_DIST0);
+            assert(!glsl_type_is_struct_or_ifc(var->type));
+            unsigned num_components = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : glsl_get_vector_elements(var->type);
+            if (glsl_type_is_64bit(glsl_without_array(var->type)))
+               num_components *= 2;
+            /* if this is the entire variable, try to blast it out during the initial declaration
+            * structs must be handled later to ensure accurate analysis
+            */
+            if ((num_components == xfb_components ||
+                 num_components < xfb_components ||
+                 (num_components > xfb_components && xfb_components == 4))) {
+               var->data.explicit_xfb_buffer = 1;
+               var->data.xfb.buffer = output->buffer;
+               var->data.xfb.stride = zs->sinfo.stride[output->buffer];
+               var->data.offset = (output->offset + c * sizeof(uint32_t));
+               var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
+               for (unsigned j = 0; j < MIN2(num_components, xfb_components); j++)
+                  inlined[slot][c + output->component_offset + j] = true;
+            } else {
+               /* otherwise store some metadata for later */
+               packed |= BITFIELD64_BIT(slot);
+               packed_components[slot] += xfb_components;
+               packed_streams[slot] |= BITFIELD_BIT(nir->xfb_info->buffer_to_stream[output->buffer]);
+               packed_buffers[slot] |= BITFIELD_BIT(output->buffer);
+               for (unsigned j = 0; j < xfb_components; j++)
+                  packed_offsets[output->location][j + output->component_offset + c] = output->offset + j * sizeof(uint32_t);
+            }
          }
       }
    }
@@ -1977,24 +1856,30 @@ update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream
     * being output with the same stream on the same buffer with increasing offsets, this entire variable
     * can be consolidated into a single output to conserve locations
     */
-   for (unsigned i = 0; i < so_info->num_outputs; i++) {
-      const struct pipe_stream_output *output = &so_info->output[i];
-      unsigned slot = reverse_map[output->register_index];
+   for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
+      const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
+      unsigned slot = output->location;
       if (is_inlined(inlined[slot], output))
          continue;
       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
          nir_variable *var = NULL;
          while (!var)
-            var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz);
+            var = find_var_with_location_frac(nir, slot--, output->component_offset, have_psiz, nir_var_shader_out);
+         slot = output->location;
+         unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
+         if (!var || var->data.location > slot || var->data.location + slot_count <= slot)
+            continue;
          /* this is a lowered 64bit variable that can't be exported due to packing */
          if (var->data.is_xfb)
             goto out;
 
-         unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
+         unsigned num_slots = var->data.location >= VARYING_SLOT_CLIP_DIST0 && var->data.location <= VARYING_SLOT_CULL_DIST1 ?
+                              glsl_array_size(var->type) / 4 :
+                              glsl_count_vec4_slots(var->type, false, false);
          /* for each variable, iterate over all the variable's slots and inline the outputs */
          for (unsigned j = 0; j < num_slots; j++) {
             slot = var->data.location + j;
-            const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
+            const nir_xfb_output_info *packed_output = find_packed_output(nir->xfb_info, slot);
             if (!packed_output)
                goto out;
 
@@ -2010,23 +1895,20 @@ update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream
                goto out;
 
             /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
-            uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
+            uint32_t prev_offset = packed_offsets[packed_output->location][0];
             for (unsigned k = 1; k < num_components; k++) {
                /* if the offsets are not incrementing as expected, skip consolidation */
-               if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
+               if (packed_offsets[packed_output->location][k] != prev_offset + sizeof(uint32_t))
                   goto out;
-               prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
+               prev_offset = packed_offsets[packed_output->location][k + packed_output->component_offset];
             }
          }
          /* this output can be consolidated: blast out all the data inlined */
          var->data.explicit_xfb_buffer = 1;
-         var->data.xfb.buffer = output->output_buffer;
-         var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
-         var->data.offset = output->dst_offset * 4;
-         var->data.stream = output->stream;
-         /* GLSL specifies that interface blocks are split per-buffer in XFB */
-         if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
-            zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
+         var->data.xfb.buffer = output->buffer;
+         var->data.xfb.stride = zs->sinfo.stride[output->buffer];
+         var->data.offset = output->offset;
+         var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
          /* mark all slot components inlined to skip subsequent loop iterations */
          for (unsigned j = 0; j < num_slots; j++) {
             slot = var->data.location + j;
@@ -2037,15 +1919,8 @@ update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream
          continue;
       }
 out:
-      /* these are packed/explicit varyings which can't be exported with normal output */
-      zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
-      /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
-      zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
+      unreachable("xfb should be inlined by now!");
    }
-   zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
-   /* ensure this doesn't get output in the shader by unsetting location */
-   if (have_fake_psiz && psiz)
-      update_psiz_location(nir, psiz);
 }
 
 struct decompose_state {
@@ -2069,7 +1944,7 @@ lower_attrib(nir_builder *b, nir_instr *instr, void *data)
       return false;
    unsigned num_components = glsl_get_vector_elements(split[0]->type);
    b->cursor = nir_after_instr(instr);
-   nir_ssa_def *loads[4];
+   nir_def *loads[4];
    for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
       loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
    if (state->needs_w) {
@@ -2077,8 +1952,8 @@ lower_attrib(nir_builder *b, nir_instr *instr, void *data)
       loads[3] = nir_channel(b, loads[0], 3);
       loads[0] = nir_channel(b, loads[0], 0);
    }
-   nir_ssa_def *new_load = nir_vec(b, loads, num_components);
-   nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
+   nir_def *new_load = nir_vec(b, loads, num_components);
+   nir_def_rewrite_uses(&intr->def, new_load);
    nir_instr_remove_v(instr);
    return true;
 }
@@ -2117,7 +1992,7 @@ decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decompose
    }
    nir_fixup_deref_modes(nir);
    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
-   optimize_nir(nir, NULL);
+   optimize_nir(nir, NULL, true);
    return true;
 }
 
@@ -2131,20 +2006,11 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
    b->cursor = nir_before_instr(instr);
    switch (intr->intrinsic) {
-   case nir_intrinsic_ssbo_atomic_fadd:
-   case nir_intrinsic_ssbo_atomic_add:
-   case nir_intrinsic_ssbo_atomic_umin:
-   case nir_intrinsic_ssbo_atomic_imin:
-   case nir_intrinsic_ssbo_atomic_umax:
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_ssbo_atomic_and:
-   case nir_intrinsic_ssbo_atomic_or:
-   case nir_intrinsic_ssbo_atomic_xor:
-   case nir_intrinsic_ssbo_atomic_exchange:
-   case nir_intrinsic_ssbo_atomic_comp_swap: {
+   case nir_intrinsic_ssbo_atomic:
+   case nir_intrinsic_ssbo_atomic_swap: {
       /* convert offset to uintN_t[idx] */
-      nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, nir_dest_bit_size(intr->dest) / 8);
-      nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
+      nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, intr->def.bit_size / 8);
+      nir_src_rewrite(&intr->src[1], offset);
       return true;
    }
    case nir_intrinsic_load_ssbo:
@@ -2153,17 +2019,17 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
       bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
                         nir_src_is_const(intr->src[0]) &&
                         nir_src_as_uint(intr->src[0]) == 0 &&
-                        nir_dest_bit_size(intr->dest) == 64 &&
+                        intr->def.bit_size == 64 &&
                         nir_intrinsic_align_offset(intr) % 8 != 0;
-      force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
-      nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
-      nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
+      force_2x32 |= intr->def.bit_size == 64 && !has_int64;
+      nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
+      nir_src_rewrite(&intr->src[1], offset);
       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
       if (force_2x32) {
          /* this is always scalarized */
-         assert(intr->dest.ssa.num_components == 1);
+         assert(intr->def.num_components == 1);
          /* rewrite as 2x32 */
-         nir_ssa_def *load[2];
+         nir_def *load[2];
          for (unsigned i = 0; i < 2; i++) {
             if (intr->intrinsic == nir_intrinsic_load_ssbo)
                load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
@@ -2172,28 +2038,29 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
             nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
          }
          /* cast back to 64bit */
-         nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
-         nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
+         nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
+         nir_def_rewrite_uses(&intr->def, casted);
          nir_instr_remove(instr);
       }
       return true;
    }
+   case nir_intrinsic_load_scratch:
    case nir_intrinsic_load_shared:
       b->cursor = nir_before_instr(instr);
-      bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
-      nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
-      nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
+      bool force_2x32 = intr->def.bit_size == 64 && !has_int64;
+      nir_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
+      nir_src_rewrite(&intr->src[0], offset);
       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
       if (force_2x32) {
          /* this is always scalarized */
-         assert(intr->dest.ssa.num_components == 1);
+         assert(intr->def.num_components == 1);
          /* rewrite as 2x32 */
-         nir_ssa_def *load[2];
+         nir_def *load[2];
          for (unsigned i = 0; i < 2; i++)
             load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
          /* cast back to 64bit */
-         nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
-         nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
+         nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
+         nir_def_rewrite_uses(&intr->def, casted);
          nir_instr_remove(instr);
          return true;
       }
@@ -2201,29 +2068,30 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
    case nir_intrinsic_store_ssbo: {
       b->cursor = nir_before_instr(instr);
       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
-      nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
-      nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
+      nir_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
+      nir_src_rewrite(&intr->src[2], offset);
       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
       if (force_2x32) {
          /* this is always scalarized */
          assert(intr->src[0].ssa->num_components == 1);
-         nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
+         nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
          for (unsigned i = 0; i < 2; i++)
             nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
          nir_instr_remove(instr);
       }
       return true;
    }
+   case nir_intrinsic_store_scratch:
    case nir_intrinsic_store_shared: {
       b->cursor = nir_before_instr(instr);
       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
-      nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
-      nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
+      nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
+      nir_src_rewrite(&intr->src[1], offset);
       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
       if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
          /* this is always scalarized */
          assert(intr->src[0].ssa->num_components == 1);
-         nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
+         nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
          for (unsigned i = 0; i < 2; i++)
             nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
          nir_instr_remove(instr);
@@ -2302,81 +2170,44 @@ rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
 {
    nir_intrinsic_op op;
    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-   switch (intr->intrinsic) {
-   case nir_intrinsic_ssbo_atomic_fadd:
-      op = nir_intrinsic_deref_atomic_fadd;
-      break;
-   case nir_intrinsic_ssbo_atomic_fmin:
-      op = nir_intrinsic_deref_atomic_fmin;
-      break;
-   case nir_intrinsic_ssbo_atomic_fmax:
-      op = nir_intrinsic_deref_atomic_fmax;
-      break;
-   case nir_intrinsic_ssbo_atomic_fcomp_swap:
-      op = nir_intrinsic_deref_atomic_fcomp_swap;
-      break;
-   case nir_intrinsic_ssbo_atomic_add:
-      op = nir_intrinsic_deref_atomic_add;
-      break;
-   case nir_intrinsic_ssbo_atomic_umin:
-      op = nir_intrinsic_deref_atomic_umin;
-      break;
-   case nir_intrinsic_ssbo_atomic_imin:
-      op = nir_intrinsic_deref_atomic_imin;
-      break;
-   case nir_intrinsic_ssbo_atomic_umax:
-      op = nir_intrinsic_deref_atomic_umax;
-      break;
-   case nir_intrinsic_ssbo_atomic_imax:
-      op = nir_intrinsic_deref_atomic_imax;
-      break;
-   case nir_intrinsic_ssbo_atomic_and:
-      op = nir_intrinsic_deref_atomic_and;
-      break;
-   case nir_intrinsic_ssbo_atomic_or:
-      op = nir_intrinsic_deref_atomic_or;
-      break;
-   case nir_intrinsic_ssbo_atomic_xor:
-      op = nir_intrinsic_deref_atomic_xor;
-      break;
-   case nir_intrinsic_ssbo_atomic_exchange:
-      op = nir_intrinsic_deref_atomic_exchange;
-      break;
-   case nir_intrinsic_ssbo_atomic_comp_swap:
-      op = nir_intrinsic_deref_atomic_comp_swap;
-      break;
-   default:
+   if (intr->intrinsic == nir_intrinsic_ssbo_atomic)
+      op = nir_intrinsic_deref_atomic;
+   else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
+      op = nir_intrinsic_deref_atomic_swap;
+   else
       unreachable("unknown intrinsic");
-   }
-   nir_ssa_def *offset = intr->src[1].ssa;
+   nir_def *offset = intr->src[1].ssa;
    nir_src *src = &intr->src[0];
-   nir_variable *var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
+   nir_variable *var = get_bo_var(b->shader, bo, true, src,
+                                  intr->def.bit_size);
    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
-   nir_ssa_def *idx = src->ssa;
+   nir_def *idx = src->ssa;
    if (bo->first_ssbo)
       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
 
    /* generate new atomic deref ops for every component */
-   nir_ssa_def *result[4];
-   unsigned num_components = nir_dest_num_components(intr->dest);
+   nir_def *result[4];
+   unsigned num_components = intr->def.num_components;
    for (unsigned i = 0; i < num_components; i++) {
       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
       nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
-      nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, nir_dest_bit_size(intr->dest), "");
-      new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
+      nir_def_init(&new_instr->instr, &new_instr->def, 1,
+                   intr->def.bit_size);
+      nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
+      new_instr->src[0] = nir_src_for_ssa(&deref_arr->def);
       /* deref ops have no offset src, so copy the srcs after it */
       for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
-         nir_src_copy(&new_instr->src[i - 1], &intr->src[i], &new_instr->instr);
+         new_instr->src[i - 1] = nir_src_for_ssa(intr->src[i].ssa);
       nir_builder_instr_insert(b, &new_instr->instr);
 
-      result[i] = &new_instr->dest.ssa;
+      result[i] = &new_instr->def;
       offset = nir_iadd_imm(b, offset, 1);
    }
 
-   nir_ssa_def *load = nir_vec(b, result, num_components);
-   nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
+   nir_def *load = nir_vec(b, result, num_components);
+   nir_def_rewrite_uses(&intr->def, load);
    nir_instr_remove(instr);
 }
 
@@ -2388,26 +2219,14 @@ remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
       return false;
    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
    nir_variable *var = NULL;
-   nir_ssa_def *offset = NULL;
+   nir_def *offset = NULL;
    bool is_load = true;
    b->cursor = nir_before_instr(instr);
    nir_src *src;
    bool ssbo = true;
    switch (intr->intrinsic) {
-   case nir_intrinsic_ssbo_atomic_fadd:
-   case nir_intrinsic_ssbo_atomic_fmin:
-   case nir_intrinsic_ssbo_atomic_fmax:
-   case nir_intrinsic_ssbo_atomic_fcomp_swap:
-   case nir_intrinsic_ssbo_atomic_add:
-   case nir_intrinsic_ssbo_atomic_umin:
-   case nir_intrinsic_ssbo_atomic_imin:
-   case nir_intrinsic_ssbo_atomic_umax:
-   case nir_intrinsic_ssbo_atomic_imax:
-   case nir_intrinsic_ssbo_atomic_and:
-   case nir_intrinsic_ssbo_atomic_or:
-   case nir_intrinsic_ssbo_atomic_xor:
-   case nir_intrinsic_ssbo_atomic_exchange:
-   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_ssbo_atomic:
+   case nir_intrinsic_ssbo_atomic_swap:
       rewrite_atomic_ssbo_instr(b, instr, bo);
       return true;
    case nir_intrinsic_store_ssbo:
@@ -2418,12 +2237,12 @@ remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
       break;
    case nir_intrinsic_load_ssbo:
       src = &intr->src[0];
-      var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
+      var = get_bo_var(b->shader, bo, true, src, intr->def.bit_size);
       offset = intr->src[1].ssa;
       break;
    case nir_intrinsic_load_ubo:
       src = &intr->src[0];
-      var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
+      var = get_bo_var(b->shader, bo, false, src, intr->def.bit_size);
       offset = intr->src[1].ssa;
       ssbo = false;
       break;
@@ -2433,28 +2252,31 @@ remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
    assert(var);
    assert(offset);
    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
-   nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
+   nir_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
    if (!ssbo && bo->first_ubo && var->data.driver_location)
       idx = nir_iadd_imm(b, idx, -bo->first_ubo);
    else if (ssbo && bo->first_ssbo)
       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
-   nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, nir_i2iN(b, idx, nir_dest_bit_size(deref_var->dest)));
+   nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var,
+                                                        nir_i2iN(b, idx, deref_var->def.bit_size));
    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
    assert(intr->num_components <= 2);
    if (is_load) {
-      nir_ssa_def *result[2];
+      nir_def *result[2];
       for (unsigned i = 0; i < intr->num_components; i++) {
-         nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
+         nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
+                                                            nir_i2iN(b, offset, deref_struct->def.bit_size));
          result[i] = nir_load_deref(b, deref_arr);
          if (intr->intrinsic == nir_intrinsic_load_ssbo)
             nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
          offset = nir_iadd_imm(b, offset, 1);
       }
-      nir_ssa_def *load = nir_vec(b, result, intr->num_components);
-      nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
+      nir_def *load = nir_vec(b, result, intr->num_components);
+      nir_def_rewrite_uses(&intr->def, load);
    } else {
-      nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, nir_i2iN(b, offset, nir_dest_bit_size(deref_struct->dest)));
-      nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
+      nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
+                                                         nir_i2iN(b, offset, deref_struct->def.bit_size));
+      nir_build_store_deref(b, &deref_arr->def, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
    }
    nir_instr_remove(instr);
    return true;
@@ -2468,18 +2290,98 @@ remove_bo_access(nir_shader *shader, struct zink_shader *zs)
 }
 
 static bool
+filter_io_instr(nir_intrinsic_instr *intr, bool *is_load, bool *is_input, bool *is_interp)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_interpolated_input:
+      *is_interp = true;
+      FALLTHROUGH;
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_per_vertex_input:
+      *is_input = true;
+      FALLTHROUGH;
+   case nir_intrinsic_load_output:
+   case nir_intrinsic_load_per_vertex_output:
+   case nir_intrinsic_load_per_primitive_output:
+      *is_load = true;
+      FALLTHROUGH;
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_primitive_output:
+   case nir_intrinsic_store_per_vertex_output:
+      break;
+   default:
+      return false;
+   }
+   return true;
+}
+
+static bool
+io_instr_is_arrayed(nir_intrinsic_instr *intr)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_per_vertex_input:
+   case nir_intrinsic_load_per_vertex_output:
+   case nir_intrinsic_load_per_primitive_output:
+   case nir_intrinsic_store_per_primitive_output:
+   case nir_intrinsic_store_per_vertex_output:
+      return true;
+   default:
+      break;
+   }
+   return false;
+}
+
+static bool
 find_var_deref(nir_shader *nir, nir_variable *var)
 {
+   nir_foreach_function_impl(impl, nir) {
+      nir_foreach_block(block, impl) {
+         nir_foreach_instr(instr, block) {
+            if (instr->type != nir_instr_type_deref)
+               continue;
+            nir_deref_instr *deref = nir_instr_as_deref(instr);
+            if (deref->deref_type == nir_deref_type_var && deref->var == var)
+               return true;
+         }
+      }
+   }
+   return false;
+}
+
+static bool
+find_var_io(nir_shader *nir, nir_variable *var)
+{
    nir_foreach_function(function, nir) {
       if (!function->impl)
          continue;
 
       nir_foreach_block(block, function->impl) {
          nir_foreach_instr(instr, block) {
-            if (instr->type != nir_instr_type_deref)
+            if (instr->type != nir_instr_type_intrinsic)
                continue;
-            nir_deref_instr *deref = nir_instr_as_deref(instr);
-            if (deref->deref_type == nir_deref_type_var && deref->var == var)
+            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+            bool is_load = false;
+            bool is_input = false;
+            bool is_interp = false;
+            if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+               continue;
+            if (var->data.mode == nir_var_shader_in && !is_input)
+               continue;
+            if (var->data.mode == nir_var_shader_out && is_input)
+               continue;
+            unsigned slot_offset = 0;
+            if (var->data.fb_fetch_output && !is_load)
+               continue;
+            if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && !is_input && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
+               continue;
+            nir_src *src_offset = nir_get_io_offset_src(intr);
+            if (src_offset && nir_src_is_const(*src_offset))
+               slot_offset = nir_src_as_uint(*src_offset);
+            unsigned slot_count = get_var_slot_count(nir, var);
+            if (var->data.mode & (nir_var_shader_out | nir_var_shader_in) &&
+                var->data.fb_fetch_output == nir_intrinsic_io_semantics(intr).fb_fetch_output &&
+                var->data.location <= nir_intrinsic_io_semantics(intr).location + slot_offset &&
+                var->data.location + slot_count > nir_intrinsic_io_semantics(intr).location + slot_offset)
                return true;
          }
       }
@@ -2495,12 +2397,11 @@ struct clamp_layer_output_state {
 static void
 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
 {
-   nir_ssa_def *is_layered = nir_load_push_constant(b, 1, 32,
-                                                    nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED),
-                                                    .base = ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, .range = 4);
+   nir_def *is_layered = nir_load_push_constant_zink(b, 1, 32,
+                                                         nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED));
    nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
    nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
-   nir_ssa_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
+   nir_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
                                   nir_load_deref(b, original_deref),
                                   nir_imm_int(b, 0));
    nir_store_deref(b, clamped_deref, layer, 0);
@@ -2537,7 +2438,7 @@ clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
    }
    struct clamp_layer_output_state state = {0};
    state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
-   if (!state.original || !find_var_deref(vs, state.original))
+   if (!state.original || (!find_var_deref(vs, state.original) && !find_var_io(vs, state.original)))
       return false;
    state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
    state.clamped->data.location = VARYING_SLOT_LAYER;
@@ -2566,13 +2467,12 @@ clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
    } else {
       nir_builder b;
       nir_function_impl *impl = nir_shader_get_entrypoint(vs);
-      nir_builder_init(&b, impl);
+      b = nir_builder_at(nir_after_impl(impl));
       assert(impl->end_block->predecessors->entries == 1);
-      b.cursor = nir_after_cf_list(&impl->body);
       clamp_layer_output_emit(&b, &state);
       nir_metadata_preserve(impl, nir_metadata_dominance);
    }
-   optimize_nir(vs, NULL);
+   optimize_nir(vs, NULL, true);
    NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
    return true;
 }
@@ -2584,7 +2484,6 @@ assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reser
    switch (slot) {
    case -1:
    case VARYING_SLOT_POS:
-   case VARYING_SLOT_PNTC:
    case VARYING_SLOT_PSIZ:
    case VARYING_SLOT_LAYER:
    case VARYING_SLOT_PRIMITIVE_ID:
@@ -2635,7 +2534,6 @@ assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reser
    unsigned slot = var->data.location;
    switch (slot) {
    case VARYING_SLOT_POS:
-   case VARYING_SLOT_PNTC:
    case VARYING_SLOT_PSIZ:
    case VARYING_SLOT_LAYER:
    case VARYING_SLOT_PRIMITIVE_ID:
@@ -2688,32 +2586,169 @@ rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
       return false;
 
    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-   if (intr->intrinsic != nir_intrinsic_load_deref)
+   bool is_load = false;
+   bool is_input = false;
+   bool is_interp = false;
+   if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
       return false;
-   nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
-   if (deref_var != var)
+   if (!is_load)
+      return false;
+   unsigned location = nir_intrinsic_io_semantics(intr).location;
+   if (location != var->data.location)
       return false;
    b->cursor = nir_before_instr(instr);
-   nir_ssa_def *zero = nir_imm_zero(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
+   nir_def *zero = nir_imm_zero(b, intr->def.num_components,
+                                intr->def.bit_size);
    if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
-      switch (var->data.location) {
+      switch (location) {
       case VARYING_SLOT_COL0:
       case VARYING_SLOT_COL1:
       case VARYING_SLOT_BFC0:
       case VARYING_SLOT_BFC1:
          /* default color is 0,0,0,1 */
-         if (nir_dest_num_components(intr->dest) == 4)
+         if (intr->def.num_components == 4)
             zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
          break;
       default:
          break;
       }
    }
-   nir_ssa_def_rewrite_uses(&intr->dest.ssa, zero);
+   nir_def_rewrite_uses(&intr->def, zero);
    nir_instr_remove(instr);
    return true;
 }
 
+
+
+static bool
+delete_psiz_store_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_primitive_output:
+   case nir_intrinsic_store_per_vertex_output:
+      break;
+   default:
+      return false;
+   }
+   if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PSIZ)
+      return false;
+   if (!data || (nir_src_is_const(intr->src[0]) && fabs(nir_src_as_float(intr->src[0]) - 1.0) < FLT_EPSILON)) {
+      nir_instr_remove(&intr->instr);
+      return true;
+   }
+   return false;
+}
+
+static bool
+delete_psiz_store(nir_shader *nir, bool one)
+{
+   bool progress = nir_shader_intrinsics_pass(nir, delete_psiz_store_instr,
+                                              nir_metadata_dominance, one ? nir : NULL);
+   if (progress)
+      nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+   return progress;
+}
+
+struct write_components {
+   unsigned slot;
+   uint32_t component_mask;
+};
+
+static bool
+fill_zero_reads(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+   struct write_components *wc = data;
+   bool is_load = false;
+   bool is_input = false;
+   bool is_interp = false;
+   if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+      return false;
+   if (!is_input)
+      return false;
+   nir_io_semantics s = nir_intrinsic_io_semantics(intr);
+   if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
+      return false;
+   unsigned num_components = intr->num_components;
+   unsigned c = nir_intrinsic_component(intr);
+   if (intr->def.bit_size == 64)
+      num_components *= 2;
+   nir_src *src_offset = nir_get_io_offset_src(intr);
+   if (nir_src_is_const(*src_offset)) {
+      unsigned slot_offset = nir_src_as_uint(*src_offset);
+      if (s.location + slot_offset != wc->slot)
+         return false;
+   } else if (s.location > wc->slot || s.location + s.num_slots <= wc->slot) {
+      return false;
+   }
+   uint32_t readmask = BITFIELD_MASK(intr->num_components) << c;
+   if (intr->def.bit_size == 64)
+      readmask |= readmask << (intr->num_components + c);
+   /* handle dvec3/dvec4 */
+   if (num_components + c > 4)
+      readmask >>= 4;
+   if ((wc->component_mask & readmask) == readmask)
+      return false;
+   uint32_t rewrite_mask = readmask & ~wc->component_mask;
+   if (!rewrite_mask)
+      return false;
+   b->cursor = nir_after_instr(&intr->instr);
+   nir_def *zero = nir_imm_zero(b, intr->def.num_components, intr->def.bit_size);
+   if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
+      switch (wc->slot) {
+      case VARYING_SLOT_COL0:
+      case VARYING_SLOT_COL1:
+      case VARYING_SLOT_BFC0:
+      case VARYING_SLOT_BFC1:
+         /* default color is 0,0,0,1 */
+         if (intr->def.num_components == 4)
+            zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
+         break;
+      default:
+         break;
+      }
+   }
+   rewrite_mask >>= c;
+   nir_def *dest = &intr->def;
+   u_foreach_bit(component, rewrite_mask)
+      dest = nir_vector_insert_imm(b, dest, nir_channel(b, zero, component), component);
+   nir_def_rewrite_uses_after(&intr->def, dest, dest->parent_instr);
+   return true;
+}
+
+static bool
+find_max_write_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+   struct write_components *wc = data;
+   bool is_load = false;
+   bool is_input = false;
+   bool is_interp = false;
+   if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+      return false;
+   if (is_input || is_load)
+      return false;
+   nir_io_semantics s = nir_intrinsic_io_semantics(intr);
+   if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
+      return false;
+   unsigned location = s.location;
+   unsigned c = nir_intrinsic_component(intr);
+   uint32_t wrmask = nir_intrinsic_write_mask(intr) << c;
+   if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64) {
+      unsigned num_components = intr->num_components * 2;
+      nir_src *src_offset = nir_get_io_offset_src(intr);
+      if (nir_src_is_const(*src_offset)) {
+         if (location + nir_src_as_uint(*src_offset) != wc->slot && num_components + c < 4)
+            return false;
+      }
+      wrmask |= wrmask << intr->num_components;
+      /* handle dvec3/dvec4 */
+      if (num_components + c > 4)
+         wrmask >>= 4;
+   }
+   wc->component_mask |= wrmask;
+   return false;
+}
+
 void
 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
 {
@@ -2722,16 +2757,30 @@ zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_sh
    memset(slot_map, -1, sizeof(slot_map));
    bool do_fixup = false;
    nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
-   if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
+   nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
+   if (var) {
+      bool can_remove = false;
+      if (!nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) {
+         /* maintenance5 guarantees "A default size of 1.0 is used if PointSize is not written" */
+         if (screen->info.have_KHR_maintenance5 && !var->data.explicit_xfb_buffer && delete_psiz_store(producer, true))
+            can_remove = !(producer->info.outputs_written & VARYING_BIT_PSIZ);
+         else if (consumer->info.stage != MESA_SHADER_FRAGMENT)
+            can_remove = !var->data.explicit_location;
+      }
       /* remove injected pointsize from all but the last vertex stage */
-      nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
-      if (var && !var->data.explicit_location && !nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) {
+      if (can_remove) {
          var->data.mode = nir_var_shader_temp;
          nir_fixup_deref_modes(producer);
+         delete_psiz_store(producer, false);
          NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
-         optimize_nir(producer, NULL);
+         optimize_nir(producer, NULL, true);
       }
    }
+   if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
+      producer->info.has_transform_feedback_varyings = false;
+      nir_foreach_shader_out_variable(var, producer)
+         var->data.explicit_xfb_buffer = false;
+   }
    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
       /* never assign from tcs -> tes, always invert */
       nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
@@ -2754,11 +2803,21 @@ zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_sh
       if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_workarounds.needs_sanitised_layer)
          do_fixup |= clamp_layer_output(producer, consumer, &reserved);
    }
+   nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
+   if (producer->info.io_lowered && consumer->info.io_lowered) {
+      u_foreach_bit64(slot, producer->info.outputs_written & BITFIELD64_RANGE(VARYING_SLOT_VAR0, 31)) {
+         struct write_components wc = {slot, 0};
+         nir_shader_intrinsics_pass(producer, find_max_write_components, nir_metadata_all, &wc);
+         assert(wc.component_mask);
+         if (wc.component_mask != BITFIELD_MASK(4))
+            do_fixup |= nir_shader_intrinsics_pass(consumer, fill_zero_reads, nir_metadata_dominance, &wc);
+      }
+   }
    if (!do_fixup)
       return;
    nir_fixup_deref_modes(nir);
    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
-   optimize_nir(nir, NULL);
+   optimize_nir(nir, NULL, true);
 }
 
 /* all types that hit this function contain something that is 64bit */
@@ -2848,15 +2907,12 @@ deref_is_matrix(nir_deref_instr *deref)
 }
 
 static bool
-lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variable *var,
+lower_64bit_vars_function(nir_shader *shader, nir_function_impl *impl, nir_variable *var,
                           struct hash_table *derefs, struct set *deletes, bool doubles_only)
 {
    bool func_progress = false;
-   if (!function->impl)
-      return false;
-   nir_builder b;
-   nir_builder_init(&b, function->impl);
-   nir_foreach_block(block, function->impl) {
+   nir_builder b = nir_builder_create(impl);
+   nir_foreach_block(block, impl) {
       nir_foreach_instr_safe(instr, block) {
          switch (instr->type) {
          case nir_instr_type_deref: {
@@ -2893,12 +2949,12 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab
             if (nir_intrinsic_get_var(intr, 0) != var)
                break;
             if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
-                  (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
+                  (intr->intrinsic == nir_intrinsic_load_deref && intr->def.bit_size != 64))
                break;
             b.cursor = nir_before_instr(instr);
             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
             unsigned num_components = intr->num_components * 2;
-            nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
+            nir_def *comp[NIR_MAX_VEC_COMPONENTS];
             /* this is the stored matrix type from the deref */
             struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
             const struct glsl_type *matrix = he ? he->data : NULL;
@@ -2908,7 +2964,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab
             if (intr->intrinsic == nir_intrinsic_store_deref) {
                /* first, unpack the src data to 32bit vec2 components */
                for (unsigned i = 0; i < intr->num_components; i++) {
-                  nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
+                  nir_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
                   comp[i * 2] = nir_channel(&b, ssa, 0);
                   comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
                }
@@ -2924,7 +2980,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab
                   assert(deref->deref_type == nir_deref_type_array);
                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
                   /* let optimization clean up consts later */
-                  nir_ssa_def *index = deref->arr.index.ssa;
+                  nir_def *index = deref->arr.index.ssa;
                   /* this might be an indirect array index:
                      * - iterate over matrix columns
                      * - add if blocks for each column
@@ -2949,7 +3005,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab
                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
                         unsigned incr = MIN2(remaining, 4);
                         /* assemble the write component vec */
-                        nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
+                        nir_def *val = nir_vec(&b, &comp[i], incr);
                         /* use the number of components being written as the writemask */
                         if (glsl_get_vector_elements(strct->type) > val->num_components)
                            val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
@@ -2962,7 +3018,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab
                   _mesa_set_add(deletes, &deref->instr);
                } else if (num_components <= 4) {
                   /* simple store case: just write out the components */
-                  nir_ssa_def *dest = nir_vec(&b, comp, num_components);
+                  nir_def *dest = nir_vec(&b, comp, num_components);
                   nir_store_deref(&b, deref, dest, mask);
                } else {
                   /* writing > 4 components: access the struct and write to the appropriate vec4 members */
@@ -2970,7 +3026,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab
                      if (!(mask & BITFIELD_MASK(4)))
                         continue;
                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
-                     nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
+                     nir_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
                      if (glsl_get_vector_elements(strct->type) > dest->num_components)
                         dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
                      nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
@@ -2978,20 +3034,20 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab
                   }
                }
             } else {
-               nir_ssa_def *dest = NULL;
+               nir_def *dest = NULL;
                if (matrix) {
                   /* matrix types always come from array (row) derefs */
                   assert(deref->deref_type == nir_deref_type_array);
                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
                   /* let optimization clean up consts later */
-                  nir_ssa_def *index = deref->arr.index.ssa;
+                  nir_def *index = deref->arr.index.ssa;
                   /* this might be an indirect array index:
                      * - iterate over matrix columns
                      * - add if blocks for each column
                      * - phi the loads using the array index
                      */
                   unsigned cols = glsl_get_matrix_columns(matrix);
-                  nir_ssa_def *dests[4];
+                  nir_def *dests[4];
                   for (unsigned idx = 0; idx < cols; idx++) {
                      /* don't add an if for the final row: this will be handled in the else */
                      if (idx < cols - 1)
@@ -3010,7 +3066,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab
                      for (unsigned i = 0; i < num_components; member++) {
                         assert(member < glsl_get_length(var_deref->type));
                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
-                        nir_ssa_def *load = nir_load_deref(&b, strct);
+                        nir_def *load = nir_load_deref(&b, strct);
                         unsigned incr = MIN2(remaining, 4);
                         /* repack the loads to 64bit */
                         for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
@@ -3030,7 +3086,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab
                   _mesa_set_add(deletes, &deref->instr);
                } else if (num_components <= 4) {
                   /* simple load case */
-                  nir_ssa_def *load = nir_load_deref(&b, deref);
+                  nir_def *load = nir_load_deref(&b, deref);
                   /* pack 32bit loads into 64bit: this will automagically get optimized out later */
                   for (unsigned i = 0; i < intr->num_components; i++) {
                      comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
@@ -3040,14 +3096,15 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab
                   /* writing > 4 components: access the struct and load the appropriate vec4 members */
                   for (unsigned i = 0; i < 2; i++, num_components -= 4) {
                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
-                     nir_ssa_def *load = nir_load_deref(&b, strct);
-                     comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2)));
+                     nir_def *load = nir_load_deref(&b, strct);
+                     comp[i * 2] = nir_pack_64_2x32(&b,
+                                                    nir_trim_vector(&b, load, 2));
                      if (num_components > 2)
                         comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
                   }
                   dest = nir_vec(&b, comp, intr->num_components);
                }
-               nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
+               nir_def_rewrite_uses_after(&intr->def, dest, instr);
             }
             _mesa_set_add(deletes, instr);
             break;
@@ -3058,7 +3115,7 @@ lower_64bit_vars_function(nir_shader *shader, nir_function *function, nir_variab
       }
    }
    if (func_progress)
-      nir_metadata_preserve(function->impl, nir_metadata_none);
+      nir_metadata_preserve(impl, nir_metadata_none);
    /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
    set_foreach_remove(deletes, he)
       nir_instr_remove((void*)he->key);
@@ -3073,8 +3130,8 @@ lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *
       return false;
    var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
    /* once type is rewritten, rewrite all loads and stores */
-   nir_foreach_function(function, shader)
-      lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
+   nir_foreach_function_impl(impl, shader)
+      lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
    return true;
 }
 
@@ -3085,14 +3142,12 @@ lower_64bit_vars(nir_shader *shader, bool doubles_only)
    bool progress = false;
    struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
    struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
-   nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out)
-      progress |= lower_64bit_vars_loop(shader, var, derefs, deletes, doubles_only);
-   nir_foreach_function(function, shader) {
-      nir_foreach_function_temp_variable(var, function->impl) {
+   nir_foreach_function_impl(impl, shader) {
+      nir_foreach_function_temp_variable(var, impl) {
          if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
             continue;
          var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
-         progress |= lower_64bit_vars_function(shader, function, var, derefs, deletes, doubles_only);
+         progress |= lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
       }
    }
    ralloc_free(deletes);
@@ -3100,97 +3155,51 @@ lower_64bit_vars(nir_shader *shader, bool doubles_only)
    if (progress) {
       nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
       nir_lower_phis_to_scalar(shader, false);
-      optimize_nir(shader, NULL);
+      optimize_nir(shader, NULL, true);
    }
    return progress;
 }
 
-static bool
-split_blocks(nir_shader *nir)
-{
-   bool progress = false;
-   bool changed = true;
-   do {
-      progress = false;
-      nir_foreach_shader_out_variable(var, nir) {
-         const struct glsl_type *base_type = glsl_without_array(var->type);
-         nir_variable *members[32]; //can't have more than this without breaking NIR
-         if (!glsl_type_is_struct(base_type))
-            continue;
-         /* TODO: arrays? */
-         if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
-            continue;
-         if (glsl_count_attribute_slots(var->type, false) == 1)
-            continue;
-         unsigned offset = 0;
-         for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
-            members[i] = nir_variable_clone(var, nir);
-            members[i]->type = glsl_get_struct_field(var->type, i);
-            members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
-            members[i]->data.location += offset;
-            offset += glsl_count_attribute_slots(members[i]->type, false);
-            nir_shader_add_variable(nir, members[i]);
-         }
-         nir_foreach_function(function, nir) {
-            bool func_progress = false;
-            if (!function->impl)
-               continue;
-            nir_builder b;
-            nir_builder_init(&b, function->impl);
-            nir_foreach_block(block, function->impl) {
-               nir_foreach_instr_safe(instr, block) {
-                  switch (instr->type) {
-                  case nir_instr_type_deref: {
-                  nir_deref_instr *deref = nir_instr_as_deref(instr);
-                  if (!(deref->modes & nir_var_shader_out))
-                     continue;
-                  if (nir_deref_instr_get_variable(deref) != var)
-                     continue;
-                  if (deref->deref_type != nir_deref_type_struct)
-                     continue;
-                  nir_deref_instr *parent = nir_deref_instr_parent(deref);
-                  if (parent->deref_type != nir_deref_type_var)
-                     continue;
-                  deref->modes = nir_var_shader_temp;
-                  parent->modes = nir_var_shader_temp;
-                  b.cursor = nir_before_instr(instr);
-                  nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
-                  nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
-                  nir_instr_remove(&deref->instr);
-                  func_progress = true;
-                  break;
-                  }
-                  default: break;
-                  }
-               }
-            }
-            if (func_progress)
-               nir_metadata_preserve(function->impl, nir_metadata_none);
-         }
-         var->data.mode = nir_var_shader_temp;
-         changed = true;
-         progress = true;
-      }
-   } while (progress);
-   return changed;
-}
-
 static void
-zink_shader_dump(void *words, size_t size, const char *file)
+zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file)
 {
    FILE *fp = fopen(file, "wb");
    if (fp) {
       fwrite(words, 1, size, fp);
       fclose(fp);
-      fprintf(stderr, "wrote '%s'...\n", file);
+      fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file);
+   }
+}
+
+static VkShaderStageFlagBits
+zink_get_next_stage(gl_shader_stage stage)
+{
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+      return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
+             VK_SHADER_STAGE_GEOMETRY_BIT |
+             VK_SHADER_STAGE_FRAGMENT_BIT;
+   case MESA_SHADER_TESS_CTRL:
+      return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
+   case MESA_SHADER_TESS_EVAL:
+      return VK_SHADER_STAGE_GEOMETRY_BIT |
+             VK_SHADER_STAGE_FRAGMENT_BIT;
+   case MESA_SHADER_GEOMETRY:
+      return VK_SHADER_STAGE_FRAGMENT_BIT;
+   case MESA_SHADER_FRAGMENT:
+   case MESA_SHADER_COMPUTE:
+   case MESA_SHADER_KERNEL:
+      return 0;
+   default:
+      unreachable("invalid shader stage");
    }
 }
 
-VkShaderModule
-zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv)
+struct zink_shader_object
+zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
 {
-   VkShaderModule mod;
    VkShaderModuleCreateInfo smci = {0};
+   VkShaderCreateInfoEXT sci = {0};
 
    if (!spirv)
       spirv = zs->spirv;
@@ -3199,8 +3208,31 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st
       char buf[256];
       static int i;
       snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
-      zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
+      zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf);
+   }
+
+   sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
+   sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
+   sci.nextStage = zink_get_next_stage(zs->info.stage);
+   sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
+   sci.codeSize = spirv->num_words * sizeof(uint32_t);
+   sci.pCode = spirv->words;
+   sci.pName = "main";
+   VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
+   if (pg) {
+      sci.setLayoutCount = pg->num_dsl;
+      sci.pSetLayouts = pg->dsl;
+   } else {
+      sci.setLayoutCount = zs->info.stage + 1;
+      dsl[zs->info.stage] = zs->precompile.dsl;;
+      sci.pSetLayouts = dsl;
    }
+   VkPushConstantRange pcr;
+   pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
+   pcr.offset = 0;
+   pcr.size = sizeof(struct zink_gfx_push_constant);
+   sci.pushConstantRangeCount = 1;
+   sci.pPushConstantRanges = &pcr;
 
    smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
    smci.codeSize = spirv->num_words * sizeof(uint32_t);
@@ -3246,6 +3278,7 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st
             .demote_to_helper_invocation = true,
             .sparse_residency = true,
             .min_lod = true,
+            .workgroup_memory_explicit_layout = true,
          },
          .ubo_addr_format = nir_address_format_32bit_index_offset,
          .ssbo_addr_format = nir_address_format_32bit_index_offset,
@@ -3280,21 +3313,26 @@ zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, st
    }
 #endif
 
-   VkResult ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod);
+   VkResult ret;
+   struct zink_shader_object obj = {0};
+   if (!can_shobj || !screen->info.have_EXT_shader_object)
+      ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
+   else
+      ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
    bool success = zink_screen_handle_vkresult(screen, ret);
    assert(success);
-   return success ? mod : VK_NULL_HANDLE;
+   return obj;
 }
 
 static void
 prune_io(nir_shader *nir)
 {
    nir_foreach_shader_in_variable_safe(var, nir) {
-      if (!find_var_deref(nir, var))
+      if (!find_var_deref(nir, var) && !find_var_io(nir, var))
          var->data.mode = nir_var_shader_temp;
    }
    nir_foreach_shader_out_variable_safe(var, nir) {
-      if (!find_var_deref(nir, var))
+      if (!find_var_deref(nir, var) && !find_var_io(nir, var))
          var->data.mode = nir_var_shader_temp;
    }
    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
@@ -3309,43 +3347,53 @@ flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
    zs->fs.legacy_shadow_mask |= BITFIELD_BIT(sampler_id);
 }
 
-static nir_ssa_def *
-rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, void *data)
+static nir_def *
+rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
 {
    assert(var);
    const struct glsl_type *type = glsl_without_array(var->type);
    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
    bool is_int = glsl_base_type_is_integer(ret_type);
    unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
-   unsigned dest_size = nir_dest_bit_size(tex->dest);
+   unsigned dest_size = tex->def.bit_size;
    b->cursor = nir_after_instr(&tex->instr);
-   unsigned num_components = nir_dest_num_components(tex->dest);
+   unsigned num_components = tex->def.num_components;
    bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
    if (bit_size == dest_size && !rewrite_depth)
       return NULL;
-   nir_ssa_def *dest = &tex->dest.ssa;
-   if (rewrite_depth && data) {
-      if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
-         flag_shadow_tex(var, data);
-      else
-         mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
-      return NULL;
+   nir_def *dest = &tex->def;
+   if (rewrite_depth && zs) {
+      if (nir_def_components_read(dest) & ~1) {
+         /* this needs recompiles */
+         if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
+            flag_shadow_tex(var, zs);
+         else
+            mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
+         return NULL;
+      }
+      /* If only .x is used in the NIR, then it's effectively not a legacy depth
+       * sample anyway and we don't want to ask for shader recompiles.  This is
+       * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or
+       * LUMINANCE, so apps just use the first channel.
+       */
+      tex->def.num_components = 1;
+      tex->is_new_style_shadow = true;
    }
    if (bit_size != dest_size) {
-      tex->dest.ssa.bit_size = bit_size;
+      tex->def.bit_size = bit_size;
       tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
 
       if (is_int) {
          if (glsl_unsigned_base_type_of(ret_type) == ret_type)
-            dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
+            dest = nir_u2uN(b, &tex->def, dest_size);
          else
-            dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
+            dest = nir_i2iN(b, &tex->def, dest_size);
       } else {
-         dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
+         dest = nir_f2fN(b, &tex->def, dest_size);
       }
       if (rewrite_depth)
          return dest;
-      nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
+      nir_def_rewrite_uses_after(&tex->def, dest, dest->parent_instr);
    } else if (rewrite_depth) {
       return dest;
    }
@@ -3393,31 +3441,31 @@ lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
    const struct glsl_type *type = glsl_without_array(var->type);
    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
    bool is_int = glsl_base_type_is_integer(ret_type);
-   unsigned num_components = nir_dest_num_components(tex->dest);
+   unsigned num_components = tex->def.num_components;
    if (tex->is_shadow)
       tex->is_new_style_shadow = true;
-   nir_ssa_def *dest = rewrite_tex_dest(b, tex, var, NULL);
+   nir_def *dest = rewrite_tex_dest(b, tex, var, NULL);
    assert(dest || !state->shadow_only);
    if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
       return false;
    else if (!dest)
-      dest = &tex->dest.ssa;
+      dest = &tex->def;
    else
-      tex->dest.ssa.num_components = 1;
+      tex->def.num_components = 1;
    if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
       /* these require manual swizzles */
       if (tex->op == nir_texop_tg4) {
          assert(!tex->is_shadow);
-         nir_ssa_def *swizzle;
+         nir_def *swizzle;
          switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
          case PIPE_SWIZZLE_0:
-            swizzle = nir_imm_zero(b, 4, nir_dest_bit_size(tex->dest));
+            swizzle = nir_imm_zero(b, 4, tex->def.bit_size);
             break;
          case PIPE_SWIZZLE_1:
             if (is_int)
-               swizzle = nir_imm_intN_t(b, 4, nir_dest_bit_size(tex->dest));
+               swizzle = nir_imm_intN_t(b, 4, tex->def.bit_size);
             else
-               swizzle = nir_imm_floatN_t(b, 4, nir_dest_bit_size(tex->dest));
+               swizzle = nir_imm_floatN_t(b, 4, tex->def.bit_size);
             break;
          default:
             if (!tex->component)
@@ -3425,101 +3473,373 @@ lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
             tex->component = 0;
             return true;
          }
-         nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
+         nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
          return true;
       }
-      nir_ssa_def *vec[4];
+      nir_def *vec[4];
       for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
          switch (swizzle_key->swizzle[sampler_id].s[i]) {
          case PIPE_SWIZZLE_0:
-            vec[i] = nir_imm_zero(b, 1, nir_dest_bit_size(tex->dest));
+            vec[i] = nir_imm_zero(b, 1, tex->def.bit_size);
             break;
          case PIPE_SWIZZLE_1:
             if (is_int)
-               vec[i] = nir_imm_intN_t(b, 1, nir_dest_bit_size(tex->dest));
+               vec[i] = nir_imm_intN_t(b, 1, tex->def.bit_size);
             else
-               vec[i] = nir_imm_floatN_t(b, 1, nir_dest_bit_size(tex->dest));
+               vec[i] = nir_imm_floatN_t(b, 1, tex->def.bit_size);
             break;
          default:
             vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
             break;
          }
       }
-      nir_ssa_def *swizzle = nir_vec(b, vec, num_components);
-      nir_ssa_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
+      nir_def *swizzle = nir_vec(b, vec, num_components);
+      nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
    } else {
       assert(tex->is_shadow);
-      nir_ssa_def *vec[4] = {dest, dest, dest, dest};
-      nir_ssa_def *splat = nir_vec(b, vec, num_components);
-      nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
+      nir_def *vec[4] = {dest, dest, dest, dest};
+      nir_def *splat = nir_vec(b, vec, num_components);
+      nir_def_rewrite_uses_after(dest, splat, splat->parent_instr);
    }
    return true;
 }
 
+/* Applies in-shader swizzles when necessary for depth/shadow sampling.
+ *
+ * SPIRV only has new-style (scalar result) shadow sampling, so to emulate
+ * !is_new_style_shadow (vec4 result) shadow sampling we lower to a
+ * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR
+ * shader to expand out to vec4.  Since this depends on sampler state, it's a
+ * draw-time shader recompile to do so.
+ *
+ * We may also need to apply shader swizzles for
+ * driver_workarounds.needs_zs_shader_swizzle.
+ */
 static bool
 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
 {
+   /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */
    unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
    struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
    return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr, nir_metadata_dominance | nir_metadata_block_index, (void*)&state);
 }
 
 static bool
-invert_point_coord_instr(nir_builder *b, nir_instr *instr, void *data)
+invert_point_coord_instr(nir_builder *b, nir_intrinsic_instr *intr,
+                         void *data)
 {
-   if (instr->type != nir_instr_type_intrinsic)
+   if (intr->intrinsic != nir_intrinsic_load_point_coord)
       return false;
-   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-   if (intr->intrinsic != nir_intrinsic_load_deref)
+   b->cursor = nir_after_instr(&intr->instr);
+   nir_def *def = nir_vec2(b, nir_channel(b, &intr->def, 0),
+                                  nir_fsub_imm(b, 1.0, nir_channel(b, &intr->def, 1)));
+   nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
+   return true;
+}
+
+static bool
+invert_point_coord(nir_shader *nir)
+{
+   if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD))
       return false;
-   nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
-   if (deref_var->data.location != VARYING_SLOT_PNTC)
+   return nir_shader_intrinsics_pass(nir, invert_point_coord_instr,
+                                     nir_metadata_dominance, NULL);
+}
+
+static bool
+is_residency_code(nir_def *src)
+{
+   nir_instr *parent = src->parent_instr;
+   while (1) {
+      if (parent->type == nir_instr_type_intrinsic) {
+         ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
+         assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
+         return false;
+      }
+      if (parent->type == nir_instr_type_tex)
+         return true;
+      assert(parent->type == nir_instr_type_alu);
+      nir_alu_instr *alu = nir_instr_as_alu(parent);
+      parent = alu->src[0].src.ssa->parent_instr;
+   }
+}
+
+static bool
+lower_sparse_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
+{
+   if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
+      b->cursor = nir_before_instr(&instr->instr);
+      nir_def *src0;
+      if (is_residency_code(instr->src[0].ssa))
+         src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
+      else
+         src0 = instr->src[0].ssa;
+      nir_def *src1;
+      if (is_residency_code(instr->src[1].ssa))
+         src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
+      else
+         src1 = instr->src[1].ssa;
+      nir_def *def = nir_iand(b, src0, src1);
+      nir_def_rewrite_uses_after(&instr->def, def, &instr->instr);
+      nir_instr_remove(&instr->instr);
+      return true;
+   }
+   if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
       return false;
-   b->cursor = nir_after_instr(instr);
-   nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0),
-                                  nir_fsub(b, nir_imm_float(b, 1.0), nir_channel(b, &intr->dest.ssa, 1)));
-   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
+
+   /* vulkan vec can only be a vec4, but this is (maybe) vec5,
+    * so just rewrite as the first component since ntv is going to use a different
+    * method for storing the residency value anyway
+    */
+   b->cursor = nir_before_instr(&instr->instr);
+   nir_instr *parent = instr->src[0].ssa->parent_instr;
+   if (is_residency_code(instr->src[0].ssa)) {
+      assert(parent->type == nir_instr_type_alu);
+      nir_alu_instr *alu = nir_instr_as_alu(parent);
+      nir_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
+      nir_instr_remove(parent);
+   } else {
+      nir_def *src;
+      if (parent->type == nir_instr_type_intrinsic) {
+         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
+         assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
+         src = intr->src[0].ssa;
+      } else {
+         assert(parent->type == nir_instr_type_alu);
+         nir_alu_instr *alu = nir_instr_as_alu(parent);
+         src = alu->src[0].src.ssa;
+      }
+      if (instr->def.bit_size != 32) {
+         if (instr->def.bit_size == 1)
+            src = nir_ieq_imm(b, src, 1);
+         else
+            src = nir_u2uN(b, src, instr->def.bit_size);
+      }
+      nir_def_rewrite_uses(&instr->def, src);
+      nir_instr_remove(&instr->instr);
+   }
    return true;
 }
 
 static bool
-invert_point_coord(nir_shader *nir)
+lower_sparse(nir_shader *shader)
+{
+   return nir_shader_intrinsics_pass(shader, lower_sparse_instr,
+                                     nir_metadata_dominance, NULL);
+}
+
+static bool
+add_derefs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
 {
-   if (!(nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC)))
+   bool is_load = false;
+   bool is_input = false;
+   bool is_interp = false;
+   if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
       return false;
-   return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL);
+   unsigned loc = nir_intrinsic_io_semantics(intr).location;
+   nir_src *src_offset = nir_get_io_offset_src(intr);
+   const unsigned slot_offset = src_offset && nir_src_is_const(*src_offset) ? nir_src_as_uint(*src_offset) : 0;
+   unsigned location = loc + slot_offset;
+   unsigned frac = nir_intrinsic_component(intr);
+   unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
+   /* set c aligned/rounded down to dword */
+   unsigned c = frac;
+   if (frac && bit_size < 32)
+      c = frac * bit_size / 32;
+   /* loop over all the variables and rewrite corresponding access */
+   nir_foreach_variable_with_modes(var, b->shader, is_input ? nir_var_shader_in : nir_var_shader_out) {
+      const struct glsl_type *type = var->type;
+      if (nir_is_arrayed_io(var, b->shader->info.stage))
+         type = glsl_get_array_element(type);
+      unsigned slot_count = get_var_slot_count(b->shader, var);
+      /* filter access that isn't specific to this variable */
+      if (var->data.location > location || var->data.location + slot_count <= location)
+         continue;
+      if (var->data.fb_fetch_output != nir_intrinsic_io_semantics(intr).fb_fetch_output)
+         continue;
+      if (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_load && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
+         continue;
+
+      unsigned size = 0;
+      bool is_struct = glsl_type_is_struct(glsl_without_array(type));
+      if (is_struct)
+         size = get_slot_components(var, var->data.location + slot_offset, var->data.location);
+      else if ((var->data.mode == nir_var_shader_out && var->data.location < VARYING_SLOT_VAR0) ||
+          (var->data.mode == nir_var_shader_in && var->data.location < (b->shader->info.stage == MESA_SHADER_VERTEX ? VERT_ATTRIB_GENERIC0 : VARYING_SLOT_VAR0)))
+         size = glsl_type_is_array(type) ? glsl_get_aoa_size(type) : glsl_get_vector_elements(type);
+      else
+         size = glsl_get_vector_elements(glsl_without_array(type));
+      assert(size);
+      if (glsl_type_is_64bit(glsl_without_array(var->type)))
+         size *= 2;
+      if (var->data.location != location && size > 4 && size % 4 && !is_struct) {
+         /* adjust for dvec3-type slot overflow */
+         assert(location > var->data.location);
+         size -= (location - var->data.location) * 4;
+      }
+      assert(size);
+      if (var->data.location_frac + size <= c || var->data.location_frac > c)
+         continue;
+
+      b->cursor = nir_before_instr(&intr->instr);
+      nir_deref_instr *deref = nir_build_deref_var(b, var);
+      if (nir_is_arrayed_io(var, b->shader->info.stage)) {
+         assert(intr->intrinsic != nir_intrinsic_store_output);
+         deref = nir_build_deref_array(b, deref, intr->src[!is_load].ssa);
+      }
+      if (glsl_type_is_array(type)) {
+         /* unroll array derefs */
+         unsigned idx = frac - var->data.location_frac;
+         assert(src_offset);
+         if (var->data.location < VARYING_SLOT_VAR0) {
+            if (src_offset) {
+               /* clip/cull dist and tess levels use different array offset semantics */
+               bool is_clipdist = (b->shader->info.stage != MESA_SHADER_VERTEX || var->data.mode == nir_var_shader_out) &&
+                                  var->data.location >= VARYING_SLOT_CLIP_DIST0 && var->data.location <= VARYING_SLOT_CULL_DIST1;
+               bool is_tess_level = b->shader->info.stage == MESA_SHADER_TESS_CTRL &&
+                                    var->data.location >= VARYING_SLOT_TESS_LEVEL_INNER && var->data.location >= VARYING_SLOT_TESS_LEVEL_OUTER;
+               bool is_builtin_array = is_clipdist || is_tess_level;
+               /* this is explicit for ease of debugging but could be collapsed at some point in the future*/
+               if (nir_src_is_const(*src_offset)) {
+                  unsigned offset = slot_offset;
+                  if (is_builtin_array)
+                     offset *= 4;
+                  deref = nir_build_deref_array_imm(b, deref, offset + idx);
+               } else {
+                  nir_def *offset = src_offset->ssa;
+                  if (is_builtin_array)
+                     nir_imul_imm(b, offset, 4);
+                  deref = nir_build_deref_array(b, deref, idx ? nir_iadd_imm(b, offset, idx) : src_offset->ssa);
+               }
+            } else {
+               deref = nir_build_deref_array_imm(b, deref, idx);
+            }
+            type = glsl_get_array_element(type);
+         } else {
+            /* need to convert possible N*M to [N][M] */
+            nir_def *nm = idx ? nir_iadd_imm(b, src_offset->ssa, idx) : src_offset->ssa;
+            while (glsl_type_is_array(type)) {
+               const struct glsl_type *elem = glsl_get_array_element(type);
+               unsigned type_size = glsl_count_vec4_slots(elem, false, false);
+               nir_def *n = glsl_type_is_array(elem) ? nir_udiv_imm(b, nm, type_size) : nm;
+               if (glsl_type_is_vector_or_scalar(elem) && glsl_type_is_64bit(elem) && glsl_get_vector_elements(elem) > 2)
+                  n = nir_udiv_imm(b, n, 2);
+               deref = nir_build_deref_array(b, deref, n);
+               nm = nir_umod_imm(b, nm, type_size);
+               type = glsl_get_array_element(type);
+            }
+         }
+      } else if (glsl_type_is_struct(type)) {
+         deref = nir_build_deref_struct(b, deref, slot_offset);
+      }
+      if (is_load) {
+         nir_def *load;
+         if (is_interp) {
+            nir_def *interp = intr->src[0].ssa;
+            nir_intrinsic_instr *interp_intr = nir_instr_as_intrinsic(interp->parent_instr);
+            assert(interp_intr);
+            var->data.interpolation = nir_intrinsic_interp_mode(interp_intr);
+            switch (interp_intr->intrinsic) {
+            case nir_intrinsic_load_barycentric_centroid:
+               load = nir_interp_deref_at_centroid(b, intr->num_components, bit_size, &deref->def);
+               break;
+            case nir_intrinsic_load_barycentric_sample:
+               var->data.sample = 1;
+               load = nir_load_deref(b, deref);
+               break;
+            case nir_intrinsic_load_barycentric_pixel:
+               load = nir_load_deref(b, deref);
+               break;
+            case nir_intrinsic_load_barycentric_at_sample:
+               load = nir_interp_deref_at_sample(b, intr->num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
+               break;
+            case nir_intrinsic_load_barycentric_at_offset:
+               load = nir_interp_deref_at_offset(b, intr->num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
+               break;
+            default:
+               unreachable("unhandled interp!");
+            }
+         } else {
+            load = nir_load_deref(b, deref);
+         }
+         /* filter needed components */
+         if (intr->num_components < load->num_components)
+            load = nir_channels(b, load, BITFIELD_MASK(intr->num_components) << (c - var->data.location_frac));
+         nir_def_rewrite_uses(&intr->def, load);
+      } else {
+         nir_def *store = intr->src[0].ssa;
+         assert(!glsl_type_is_array(type));
+         unsigned num_components = glsl_get_vector_elements(type);
+         /* pad/filter components to match deref type */
+         if (intr->num_components < num_components) {
+            nir_def *zero = nir_imm_zero(b, 1, bit_size);
+            nir_def *vec[4] = {zero, zero, zero, zero};
+            u_foreach_bit(i, nir_intrinsic_write_mask(intr))
+               vec[c - var->data.location_frac + i] = nir_channel(b, store, i);
+            store = nir_vec(b, vec, num_components);
+         } if (store->num_components > num_components) {
+            store = nir_channels(b, store, nir_intrinsic_write_mask(intr));
+         }
+         if (store->bit_size != glsl_get_bit_size(type)) {
+            /* this should be some weird bindless io conversion */
+            assert(store->bit_size == 64 && glsl_get_bit_size(type) == 32);
+            assert(num_components != store->num_components);
+            store = nir_unpack_64_2x32(b, store);
+         }
+         nir_store_deref(b, deref, store, BITFIELD_RANGE(c - var->data.location_frac, intr->num_components));
+      }
+      nir_instr_remove(&intr->instr);
+      return true;
+   }
+   unreachable("failed to find variable for explicit io!");
+   return true;
+}
+
+static bool
+add_derefs(nir_shader *nir)
+{
+   return nir_shader_intrinsics_pass(nir, add_derefs_instr,
+                                     nir_metadata_dominance, NULL);
 }
 
-static VkShaderModule
-compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir)
+static struct zink_shader_object
+compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
 {
-   VkShaderModule mod = VK_NULL_HANDLE;
    struct zink_shader_info *sinfo = &zs->sinfo;
    prune_io(nir);
 
    NIR_PASS_V(nir, nir_convert_from_ssa, true);
 
+   if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))
+      nir_index_ssa_defs(nir_shader_get_entrypoint(nir));
+   if (zink_debug & ZINK_DEBUG_NIR) {
+      fprintf(stderr, "NIR shader:\n---8<---\n");
+      nir_print_shader(nir, stderr);
+      fprintf(stderr, "---8<---\n");
+   }
+
+   struct zink_shader_object obj;
    struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
    if (spirv)
-      mod = zink_shader_spirv_compile(screen, zs, spirv);
+      obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
 
    /* TODO: determine if there's any reason to cache spirv output? */
    if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
       zs->spirv = spirv;
    else
-      ralloc_free(spirv);
-   return mod;
+      obj.spirv = spirv;
+   return obj;
 }
 
-VkShaderModule
-zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
-                    nir_shader *nir, const struct zink_shader_key *key, const void *extra_data)
+struct zink_shader_object
+zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
+                    nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
 {
-   VkShaderModule mod = VK_NULL_HANDLE;
-   struct zink_shader_info *sinfo = &zs->sinfo;
-   bool need_optimize = false;
+   bool need_optimize = true;
    bool inlined_uniforms = false;
 
+   NIR_PASS_V(nir, add_derefs);
+   NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
    if (key) {
       if (key->inline_uniforms) {
          NIR_PASS_V(nir, nir_inline_uniforms,
@@ -3591,15 +3911,14 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
       case MESA_SHADER_TESS_EVAL:
       case MESA_SHADER_GEOMETRY:
          if (zink_vs_key_base(key)->last_vertex_stage) {
-            if (zs->sinfo.have_xfb)
-               sinfo->last_vertex = true;
-
             if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
                NIR_PASS_V(nir, nir_lower_clip_halfz);
             }
             if (zink_vs_key_base(key)->push_drawid) {
                NIR_PASS_V(nir, lower_drawid);
             }
+         } else {
+            nir->xfb_info = NULL;
          }
          if (zink_vs_key_base(key)->robust_access)
             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
@@ -3639,7 +3958,7 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
             NIR_PASS_V(nir, lower_dual_blend);
          }
          if (zink_fs_key_base(key)->coord_replace_bits)
-            NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, false, false);
+            NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false);
          if (zink_fs_key_base(key)->point_coord_yinvert)
             NIR_PASS_V(nir, invert_point_coord);
          if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
@@ -3685,13 +4004,13 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
       }
    }
    if (screen->driconf.inline_uniforms) {
-      NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
+      NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
       NIR_PASS_V(nir, rewrite_bo_access, screen);
       NIR_PASS_V(nir, remove_bo_access, zs);
       need_optimize = true;
    }
    if (inlined_uniforms) {
-      optimize_nir(nir, zs);
+      optimize_nir(nir, zs, true);
 
       /* This must be done again. */
       NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
@@ -3701,18 +4020,22 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs,
       if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
          zs->can_inline = false;
    } else if (need_optimize)
-      optimize_nir(nir, zs);
+      optimize_nir(nir, zs, true);
+   NIR_PASS_V(nir, lower_sparse);
    
-   mod = compile_module(screen, zs, nir);
+   struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
    ralloc_free(nir);
-   return mod;
+   return obj;
 }
 
-VkShaderModule
+struct zink_shader_object
 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
 {
    nir_shader *nir = zink_shader_deserialize(screen, zs);
-   int set = nir->info.stage == MESA_SHADER_FRAGMENT;
+   /* TODO: maybe compile multiple variants for different set counts for compact mode? */
+   int set = zs->info.stage == MESA_SHADER_FRAGMENT;
+   if (screen->info.have_EXT_shader_object)
+      set = zs->info.stage;
    unsigned offsets[4];
    zink_descriptor_shader_get_binding_offsets(zs, offsets);
    nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
@@ -3736,23 +4059,45 @@ zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
       default: break;
       }
    }
-   optimize_nir(nir, zs);
-   VkShaderModule mod = compile_module(screen, zs, nir);
+   NIR_PASS_V(nir, add_derefs);
+   NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
+   if (screen->driconf.inline_uniforms) {
+      NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
+      NIR_PASS_V(nir, rewrite_bo_access, screen);
+      NIR_PASS_V(nir, remove_bo_access, zs);
+   }
+   optimize_nir(nir, zs, true);
+   zink_descriptor_shader_init(screen, zs);
+   nir_shader *nir_clone = NULL;
+   if (screen->info.have_EXT_shader_object)
+      nir_clone = nir_shader_clone(nir, nir);
+   struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
+   if (screen->info.have_EXT_shader_object && !zs->info.internal) {
+      /* always try to pre-generate a tcs in case it's needed */
+      if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
+         nir_shader *nir_tcs = NULL;
+         /* use max pcp for compat */
+         zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, nir_clone, 32, &nir_tcs);
+         nir_tcs->info.separate_shader = true;
+         zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
+         ralloc_free(nir_tcs);
+      }
+   }
    ralloc_free(nir);
-   return mod;
+   spirv_shader_delete(obj.spirv);
+   obj.spirv = NULL;
+   return obj;
 }
 
 static bool
-lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
+lower_baseinstance_instr(nir_builder *b, nir_intrinsic_instr *intr,
+                         void *data)
 {
-   if (instr->type != nir_instr_type_intrinsic)
-      return false;
-   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
    if (intr->intrinsic != nir_intrinsic_load_instance_id)
       return false;
-   b->cursor = nir_after_instr(instr);
-   nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
-   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
+   b->cursor = nir_after_instr(&intr->instr);
+   nir_def *def = nir_isub(b, &intr->def, nir_load_base_instance(b));
+   nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
    return true;
 }
 
@@ -3761,7 +4106,8 @@ lower_baseinstance(nir_shader *shader)
 {
    if (shader->info.stage != MESA_SHADER_VERTEX)
       return false;
-   return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
+   return nir_shader_intrinsics_pass(shader, lower_baseinstance_instr,
+                                     nir_metadata_dominance, NULL);
 }
 
 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
@@ -3813,7 +4159,7 @@ unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
    }
    nir_fixup_deref_modes(shader);
    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
-   optimize_nir(shader, NULL);
+   optimize_nir(shader, NULL, true);
 
    struct glsl_struct_field field = {0};
    field.name = ralloc_strdup(shader, "base");
@@ -3915,20 +4261,8 @@ analyze_io(struct zink_shader *zs, nir_shader *shader)
             ret = true;
             break;
          }
-         case nir_intrinsic_ssbo_atomic_fadd:
-         case nir_intrinsic_ssbo_atomic_add:
-         case nir_intrinsic_ssbo_atomic_imin:
-         case nir_intrinsic_ssbo_atomic_umin:
-         case nir_intrinsic_ssbo_atomic_imax:
-         case nir_intrinsic_ssbo_atomic_umax:
-         case nir_intrinsic_ssbo_atomic_and:
-         case nir_intrinsic_ssbo_atomic_or:
-         case nir_intrinsic_ssbo_atomic_xor:
-         case nir_intrinsic_ssbo_atomic_exchange:
-         case nir_intrinsic_ssbo_atomic_comp_swap:
-         case nir_intrinsic_ssbo_atomic_fmin:
-         case nir_intrinsic_ssbo_atomic_fmax:
-         case nir_intrinsic_ssbo_atomic_fcomp_swap:
+         case nir_intrinsic_ssbo_atomic:
+         case nir_intrinsic_ssbo_atomic_swap:
          case nir_intrinsic_load_ssbo:
             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
             break;
@@ -3991,13 +4325,18 @@ lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
          return false;
 
       nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
-      if (!var)
+      if (!var) {
          var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
+         if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+            bindless->bindless[1] = var;
+         else
+            bindless->bindless[0] = var;
+      }
       b->cursor = nir_before_instr(in);
       nir_deref_instr *deref = nir_build_deref_var(b, var);
       if (glsl_type_is_array(var->type))
          deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
-      nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
+      nir_src_rewrite(&tex->src[idx].src, &deref->def);
 
       /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
        * match up with it in contrast to normal sampler ops where things are a bit more flexible;
@@ -4011,8 +4350,8 @@ lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
       unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
       unsigned coord_components = nir_src_num_components(tex->src[c].src);
       if (coord_components < needed_components) {
-         nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
-         nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
+         nir_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
+         nir_src_rewrite(&tex->src[c].src, def);
          tex->coord_components = needed_components;
       }
       return true;
@@ -4030,21 +4369,8 @@ lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
 
    /* convert bindless intrinsics to deref intrinsics */
    switch (instr->intrinsic) {
-   OP_SWAP(atomic_add)
-   OP_SWAP(atomic_and)
-   OP_SWAP(atomic_comp_swap)
-   OP_SWAP(atomic_dec_wrap)
-   OP_SWAP(atomic_exchange)
-   OP_SWAP(atomic_fadd)
-   OP_SWAP(atomic_fmax)
-   OP_SWAP(atomic_fmin)
-   OP_SWAP(atomic_imax)
-   OP_SWAP(atomic_imin)
-   OP_SWAP(atomic_inc_wrap)
-   OP_SWAP(atomic_or)
-   OP_SWAP(atomic_umax)
-   OP_SWAP(atomic_umin)
-   OP_SWAP(atomic_xor)
+   OP_SWAP(atomic)
+   OP_SWAP(atomic_swap)
    OP_SWAP(format)
    OP_SWAP(load)
    OP_SWAP(order)
@@ -4064,7 +4390,7 @@ lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
    nir_deref_instr *deref = nir_build_deref_var(b, var);
    if (glsl_type_is_array(var->type))
       deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
-   nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
+   nir_src_rewrite(&instr->src[0], &deref->def);
    return true;
 }
 
@@ -4075,23 +4401,22 @@ lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
       return false;
    nir_fixup_deref_modes(shader);
    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
-   optimize_nir(shader, NULL);
+   optimize_nir(shader, NULL, true);
    return true;
 }
 
 /* convert shader image/texture io variables to int64 handles for bindless indexing */
 static bool
-lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
+lower_bindless_io_instr(nir_builder *b, nir_intrinsic_instr *instr,
+                        void *data)
 {
-   if (in->type != nir_instr_type_intrinsic)
-      return false;
-   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
-   if (instr->intrinsic != nir_intrinsic_load_deref &&
-       instr->intrinsic != nir_intrinsic_store_deref)
+   bool is_load = false;
+   bool is_input = false;
+   bool is_interp = false;
+   if (!filter_io_instr(instr, &is_load, &is_input, &is_interp))
       return false;
 
-   nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
-   nir_variable *var = nir_deref_instr_get_variable(src_deref);
+   nir_variable *var = find_var_with_location_frac(b->shader, nir_intrinsic_io_semantics(instr).location, nir_intrinsic_component(instr), false, is_input ? nir_var_shader_in : nir_var_shader_out);
    if (var->data.bindless)
       return false;
    if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
@@ -4099,26 +4424,16 @@ lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
    if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
       return false;
 
-   var->type = glsl_int64_t_type();
+   var->type = glsl_vector_type(GLSL_TYPE_INT, 2);
    var->data.bindless = 1;
-   b->cursor = nir_before_instr(in);
-   nir_deref_instr *deref = nir_build_deref_var(b, var);
-   if (instr->intrinsic == nir_intrinsic_load_deref) {
-       nir_ssa_def *def = nir_load_deref(b, deref);
-       nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
-       nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
-   } else {
-      nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
-   }
-   nir_instr_remove(in);
-   nir_instr_remove(&src_deref->instr);
    return true;
 }
 
 static bool
 lower_bindless_io(nir_shader *shader)
 {
-   return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
+   return nir_shader_intrinsics_pass(shader, lower_bindless_io_instr,
+                                     nir_metadata_dominance, NULL);
 }
 
 static uint32_t
@@ -4246,24 +4561,24 @@ convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
          continue;
       if (tex->src[c].src.ssa->num_components == tex->coord_components)
          continue;
-      nir_ssa_def *def;
-      nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
+      nir_def *def;
+      nir_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
       if (tex->src[c].src.ssa->num_components == 1)
          def = nir_vec2(b, tex->src[c].src.ssa, zero);
       else
          def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
-      nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
+      nir_src_rewrite(&tex->src[c].src, def);
    }
    b->cursor = nir_after_instr(instr);
    unsigned needed_components = nir_tex_instr_dest_size(tex);
-   unsigned num_components = tex->dest.ssa.num_components;
+   unsigned num_components = tex->def.num_components;
    if (needed_components > num_components) {
-      tex->dest.ssa.num_components = needed_components;
+      tex->def.num_components = needed_components;
       assert(num_components < 3);
       /* take either xz or just x since this is promoted to 2D from 1D */
       uint32_t mask = num_components == 2 ? (1|4) : 1;
-      nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
-      nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
+      nir_def *dst = nir_channels(b, &tex->def, mask);
+      nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr);
    }
    return true;
 }
@@ -4290,10 +4605,8 @@ lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
 static void
 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
 {
-   nir_foreach_function(function, shader) {
-      if (!function->impl)
-         continue;
-      nir_foreach_block_safe(block, function->impl) {
+   nir_foreach_function_impl(impl, shader) {
+      nir_foreach_block_safe(block, impl) {
          nir_foreach_instr_safe(instr, block) {
             if (instr->type == nir_instr_type_tex) {
                nir_tex_instr *tex = nir_instr_as_tex(instr);
@@ -4305,24 +4618,14 @@ scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
                 intr->intrinsic == nir_intrinsic_image_deref_store ||
-                intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
-                intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
-                intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
-                intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
-                intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
-                intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
-                intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
-                intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
-                intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
-                intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
-                intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
+                intr->intrinsic == nir_intrinsic_image_deref_atomic ||
+                intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
                 intr->intrinsic == nir_intrinsic_image_deref_size ||
                 intr->intrinsic == nir_intrinsic_image_deref_samples ||
                 intr->intrinsic == nir_intrinsic_image_deref_format ||
                 intr->intrinsic == nir_intrinsic_image_deref_order) {
 
-                nir_variable *var =
-                   nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+                nir_variable *var = nir_intrinsic_get_var(intr, 0);
 
                 /* Structs have been lowered already, so get_aoa_size is sufficient. */
                 const unsigned size =
@@ -4337,9 +4640,10 @@ scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
             static bool warned = false;
             if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
                switch (intr->intrinsic) {
-               case nir_intrinsic_image_deref_atomic_add: {
+               case nir_intrinsic_image_deref_atomic: {
                   nir_variable *var = nir_intrinsic_get_var(intr, 0);
-                  if (util_format_is_float(var->data.image.format))
+                  if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd &&
+                      util_format_is_float(var->data.image.format))
                      fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
                   break;
                }
@@ -4353,90 +4657,6 @@ scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
 }
 
 static bool
-is_residency_code(nir_ssa_def *src)
-{
-   nir_instr *parent = src->parent_instr;
-   while (1) {
-      if (parent->type == nir_instr_type_intrinsic) {
-         ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
-         assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
-         return false;
-      }
-      if (parent->type == nir_instr_type_tex)
-         return true;
-      assert(parent->type == nir_instr_type_alu);
-      nir_alu_instr *alu = nir_instr_as_alu(parent);
-      parent = alu->src[0].src.ssa->parent_instr;
-   }
-}
-
-static bool
-lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
-{
-   if (in->type != nir_instr_type_intrinsic)
-      return false;
-   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
-   if (instr->intrinsic == nir_intrinsic_sparse_residency_code_and) {
-      b->cursor = nir_before_instr(&instr->instr);
-      nir_ssa_def *src0;
-      if (is_residency_code(instr->src[0].ssa))
-         src0 = nir_is_sparse_texels_resident(b, 1, instr->src[0].ssa);
-      else
-         src0 = instr->src[0].ssa;
-      nir_ssa_def *src1;
-      if (is_residency_code(instr->src[1].ssa))
-         src1 = nir_is_sparse_texels_resident(b, 1, instr->src[1].ssa);
-      else
-         src1 = instr->src[1].ssa;
-      nir_ssa_def *def = nir_iand(b, src0, src1);
-      nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, def, in);
-      nir_instr_remove(in);
-      return true;
-   }
-   if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
-      return false;
-
-   /* vulkan vec can only be a vec4, but this is (maybe) vec5,
-    * so just rewrite as the first component since ntv is going to use a different
-    * method for storing the residency value anyway
-    */
-   b->cursor = nir_before_instr(&instr->instr);
-   nir_instr *parent = instr->src[0].ssa->parent_instr;
-   if (is_residency_code(instr->src[0].ssa)) {
-      assert(parent->type == nir_instr_type_alu);
-      nir_alu_instr *alu = nir_instr_as_alu(parent);
-      nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
-      nir_instr_remove(parent);
-   } else {
-      nir_ssa_def *src;
-      if (parent->type == nir_instr_type_intrinsic) {
-         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
-         assert(intr->intrinsic == nir_intrinsic_is_sparse_texels_resident);
-         src = intr->src[0].ssa;
-      } else {
-         assert(parent->type == nir_instr_type_alu);
-         nir_alu_instr *alu = nir_instr_as_alu(parent);
-         src = alu->src[0].src.ssa;
-      }
-      if (instr->dest.ssa.bit_size != 32) {
-         if (instr->dest.ssa.bit_size == 1)
-            src = nir_ieq_imm(b, src, 1);
-         else
-            src = nir_u2uN(b, src, instr->dest.ssa.bit_size);
-      }
-      nir_ssa_def_rewrite_uses(&instr->dest.ssa, src);
-      nir_instr_remove(in);
-   }
-   return true;
-}
-
-static bool
-lower_sparse(nir_shader *shader)
-{
-   return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
-}
-
-static bool
 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
 {
    if (in->type != nir_instr_type_tex)
@@ -4483,11 +4703,11 @@ split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
    default:
       return false;
    }
-   unsigned num_components = nir_dest_num_components(alu->dest.dest);
+   unsigned num_components = alu->def.num_components;
    if (num_components == 1)
       return false;
    b->cursor = nir_before_instr(in);
-   nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS];
+   nir_def *dests[NIR_MAX_VEC_COMPONENTS];
    for (unsigned i = 0; i < num_components; i++) {
       if (alu->op == nir_op_bitfield_insert)
          dests[i] = nir_bitfield_insert(b,
@@ -4506,8 +4726,8 @@ split_bitfields_instr(nir_builder *b, nir_instr *in, void *data)
                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
    }
-   nir_ssa_def *dest = nir_vec(b, dests, num_components);
-   nir_ssa_def_rewrite_uses_after(&alu->dest.dest.ssa, dest, in);
+   nir_def *dest = nir_vec(b, dests, num_components);
+   nir_def_rewrite_uses_after(&alu->def, dest, in);
    nir_instr_remove(in);
    return true;
 }
@@ -4522,8 +4742,8 @@ split_bitfields(nir_shader *shader)
 static void
 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
 {
-   nir_foreach_function(function, nir) {
-      nir_foreach_block(block, function->impl) {
+   nir_foreach_function_impl(impl, nir) {
+      nir_foreach_block(block, impl) {
          nir_foreach_instr_safe(instr, block) {
             if (instr->type != nir_instr_type_deref)
                continue;
@@ -4547,8 +4767,8 @@ rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
 static void
 type_image(nir_shader *nir, nir_variable *var)
 {
-   nir_foreach_function(function, nir) {
-      nir_foreach_block(block, function->impl) {
+   nir_foreach_function_impl(impl, nir) {
+      nir_foreach_block(block, impl) {
          nir_foreach_instr_safe(instr, block) {
             if (instr->type != nir_instr_type_intrinsic)
                continue;
@@ -4556,17 +4776,8 @@ type_image(nir_shader *nir, nir_variable *var)
             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
                intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
                intr->intrinsic == nir_intrinsic_image_deref_store ||
-               intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
-               intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
-               intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
-               intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
-               intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
-               intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
-               intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
-               intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
-               intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
-               intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
-               intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
+               intr->intrinsic == nir_intrinsic_image_deref_atomic ||
+               intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
                intr->intrinsic == nir_intrinsic_image_deref_samples ||
                intr->intrinsic == nir_intrinsic_image_deref_format ||
                intr->intrinsic == nir_intrinsic_image_deref_order) {
@@ -4590,8 +4801,8 @@ type_image(nir_shader *nir, nir_variable *var)
          }
       }
    }
-   nir_foreach_function(function, nir) {
-      nir_foreach_block(block, function->impl) {
+   nir_foreach_function_impl(impl, nir) {
+      nir_foreach_block(block, impl) {
          nir_foreach_instr_safe(instr, block) {
             if (instr->type != nir_instr_type_intrinsic)
                continue;
@@ -4619,72 +4830,22 @@ type_image(nir_shader *nir, nir_variable *var)
    var->data.mode = nir_var_shader_temp;
 }
 
-static nir_variable *
-find_sampler_var(nir_shader *nir, unsigned texture_index)
-{
-   nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
-      unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
-      if ((glsl_type_is_texture(glsl_without_array(var->type)) || glsl_type_is_sampler(glsl_without_array(var->type))) &&
-          (var->data.binding == texture_index || (var->data.binding < texture_index && var->data.binding + size > texture_index)))
-         return var;
-   }
-   return NULL;
-}
-
 static bool
 type_sampler_vars(nir_shader *nir, unsigned *sampler_mask)
 {
    bool progress = false;
-   nir_foreach_function(function, nir) {
-      nir_foreach_block(block, function->impl) {
+   nir_foreach_function_impl(impl, nir) {
+      nir_foreach_block(block, impl) {
          nir_foreach_instr(instr, block) {
             if (instr->type != nir_instr_type_tex)
                continue;
             nir_tex_instr *tex = nir_instr_as_tex(instr);
-            switch (tex->op) {
-            case nir_texop_lod:
-            case nir_texop_txs:
-            case nir_texop_query_levels:
-            case nir_texop_texture_samples:
-            case nir_texop_samples_identical:
-               continue;
-            default:
-               break;
-            }
-            *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
-            nir_variable *var = find_sampler_var(nir, tex->texture_index);
+            if (nir_tex_instr_need_sampler(tex))
+               *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
+            nir_variable *var = nir_find_sampler_variable_with_tex_index(nir, tex->texture_index);
             assert(var);
-            if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
-               continue;
-            const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
-            unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
-            if (size > 1)
-               img_type = glsl_array_type(img_type, size, 0);
-            var->type = img_type;
-            progress = true;
-         }
-      }
-   }
-   nir_foreach_function(function, nir) {
-      nir_foreach_block(block, function->impl) {
-         nir_foreach_instr(instr, block) {
-            if (instr->type != nir_instr_type_tex)
-               continue;
-            nir_tex_instr *tex = nir_instr_as_tex(instr);
-            switch (tex->op) {
-            case nir_texop_lod:
-            case nir_texop_txs:
-            case nir_texop_query_levels:
-            case nir_texop_texture_samples:
-            case nir_texop_samples_identical:
-               break;
-            default:
-               continue;
-            }
-            *sampler_mask |= BITFIELD_BIT(tex->sampler_index);
-            nir_variable *var = find_sampler_var(nir, tex->texture_index);
-            assert(var);
-            if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID)
+            if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID &&
+                nir_tex_instr_is_query(tex))
                continue;
             const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
             unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
@@ -4728,31 +4889,71 @@ type_images(nir_shader *nir, unsigned *sampler_mask)
 static bool
 fixup_io_locations(nir_shader *nir)
 {
-   nir_variable_mode mode = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
-   /* i/o interface blocks are required to be EXACT matches between stages:
-    * iterate over all locations and set locations incrementally
-    */
-   unsigned slot = 0;
-   for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
-      if (nir_slot_is_sysval_output(i))
-         continue;
-      nir_variable *var = nir_find_variable_with_location(nir, mode, i);
-      if (!var) {
-         /* locations used between stages are not required to be contiguous */
-         if (i >= VARYING_SLOT_VAR0)
-            slot++;
-         continue;
+   nir_variable_mode modes;
+   if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX)
+      modes = nir_var_shader_in | nir_var_shader_out;
+   else
+      modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
+   u_foreach_bit(mode, modes) {
+      nir_variable_mode m = BITFIELD_BIT(mode);
+      if ((m == nir_var_shader_in && ((nir->info.inputs_read & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == nir->info.inputs_read)) ||
+          (m == nir_var_shader_out && ((nir->info.outputs_written | nir->info.outputs_read) & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == (nir->info.outputs_written | nir->info.outputs_read))) {
+         /* this is a special heuristic to catch ARB/fixedfunc shaders which have different rules:
+          * - i/o interface blocks don't need to match
+          * - any location can be present or not
+          * - it just has to work
+          *
+          * VAR0 is the only user varying that mesa can produce in this case, so overwrite POS
+          * since it's a builtin and yolo it with all the other legacy crap
+          */
+         nir_foreach_variable_with_modes(var, nir, m) {
+            if (nir_slot_is_sysval_output(var->data.location, MESA_SHADER_NONE))
+               continue;
+            if (var->data.location == VARYING_SLOT_VAR0)
+               var->data.driver_location = 0;
+            else if (var->data.patch)
+               var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
+            else
+               var->data.driver_location = var->data.location;
+         }
+         return true;
+      }
+      /* i/o interface blocks are required to be EXACT matches between stages:
+      * iterate over all locations and set locations incrementally
+      */
+      unsigned slot = 0;
+      for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) {
+         if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
+            continue;
+         bool found = false;
+         unsigned size = 0;
+         nir_foreach_variable_with_modes(var, nir, m) {
+            if (var->data.location != i)
+               continue;
+            /* only add slots for non-component vars or first-time component vars */
+            if (!var->data.location_frac || !size) {
+               /* ensure variable is given enough slots */
+               if (nir_is_arrayed_io(var, nir->info.stage))
+                  size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
+               else
+                  size += glsl_count_vec4_slots(var->type, false, false);
+            }
+            if (var->data.patch)
+               var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
+            else
+               var->data.driver_location = slot;
+            found = true;
+         }
+         slot += size;
+         if (found) {
+            /* ensure the consumed slots aren't double iterated */
+            i += size - 1;
+         } else {
+            /* locations used between stages are not required to be contiguous */
+            if (i >= VARYING_SLOT_VAR0)
+               slot++;
+         }
       }
-      unsigned size;
-      /* ensure variable is given enough slots */
-      if (nir_is_arrayed_io(var, nir->info.stage))
-         size = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
-      else
-         size = glsl_count_vec4_slots(var->type, false, false);
-      var->data.driver_location = slot;
-      slot += size;
-      /* ensure the consumed slots aren't double iterated */
-      i += size - 1;
    }
    return true;
 }
@@ -4769,9 +4970,356 @@ zink_flat_flags(struct nir_shader *shader)
    return flat_flags;
 }
 
+static nir_variable *
+find_io_var_with_semantics(nir_shader *nir, nir_variable_mode mode, nir_variable_mode realmode, nir_io_semantics s, unsigned location, unsigned c, bool is_load)
+{
+   nir_foreach_variable_with_modes(var, nir, mode) {
+      const struct glsl_type *type = var->type;
+      nir_variable_mode m = var->data.mode;
+      var->data.mode = realmode;
+      if (nir_is_arrayed_io(var, nir->info.stage))
+         type = glsl_get_array_element(type);
+      var->data.mode = m;
+      if (var->data.fb_fetch_output != s.fb_fetch_output)
+         continue;
+      if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && s.dual_source_blend_index != var->data.index)
+         continue;
+      unsigned num_slots = var->data.compact ? DIV_ROUND_UP(glsl_array_size(type), 4) : glsl_count_attribute_slots(type, false);
+      if (var->data.location > location || var->data.location + num_slots <= location)
+         continue;
+      unsigned num_components = glsl_get_vector_elements(glsl_without_array(type));
+      if (glsl_type_contains_64bit(type)) {
+         num_components *= 2;
+         if (location > var->data.location) {
+            unsigned sub_components = (location - var->data.location) * 4;
+            if (sub_components > num_components)
+               continue;
+            num_components -= sub_components;
+         }
+      }
+      if (var->data.location_frac > c || var->data.location_frac + num_components <= c)
+         continue;
+      return var;
+   }
+   return NULL;
+}
+
+static void
+rework_io_vars(nir_shader *nir, nir_variable_mode mode)
+{
+   assert(mode == nir_var_shader_out || mode == nir_var_shader_in);
+   assert(util_bitcount(mode) == 1);
+   bool found = false;
+   /* store old vars */
+   nir_foreach_variable_with_modes(var, nir, mode) {
+      if (nir->info.stage == MESA_SHADER_TESS_CTRL && mode == nir_var_shader_out)
+         var->data.compact |= var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
+      /* stash vars in this mode for now */
+      var->data.mode = nir_var_mem_shared;
+      found = true;
+   }
+   if (!found) {
+      if (mode == nir_var_shader_out)
+         found = nir->info.outputs_written || nir->info.outputs_read;
+      else
+         found = nir->info.inputs_read;
+      if (!found)
+         return;
+   }
+   /* scan for vars using indirect array access */
+   BITSET_DECLARE(indirect_access, 128);
+   BITSET_ZERO(indirect_access);
+   nir_foreach_function_impl(impl, nir) {
+      nir_foreach_block(block, impl) {
+         nir_foreach_instr(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+            bool is_load = false;
+            bool is_input = false;
+            bool is_interp = false;
+            if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+               continue;
+            nir_src *src_offset = nir_get_io_offset_src(intr);
+            if (!is_input && !src_offset)
+               continue;
+            if (mode == nir_var_shader_in && !is_input)
+               continue;
+            if (mode == nir_var_shader_out && is_input)
+               continue;
+            nir_io_semantics s = nir_intrinsic_io_semantics(intr);
+            if (!nir_src_is_const(*src_offset))
+               BITSET_SET(indirect_access, s.location);
+         }
+      }
+   }
+   /* loop and create vars */
+   nir_foreach_function_impl(impl, nir) {
+      nir_foreach_block(block, impl) {
+         nir_foreach_instr(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+            bool is_load = false;
+            bool is_input = false;
+            bool is_interp = false;
+            if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
+               continue;
+            if (mode == nir_var_shader_in && !is_input)
+               continue;
+            if (mode == nir_var_shader_out && is_input)
+               continue;
+            nir_io_semantics s = nir_intrinsic_io_semantics(intr);
+            unsigned slot_offset = 0;
+            bool is_indirect = BITSET_TEST(indirect_access, s.location);
+            nir_src *src_offset = nir_get_io_offset_src(intr);
+            if (src_offset && !is_indirect) {
+               assert(nir_src_is_const(*src_offset));
+               slot_offset = nir_src_as_uint(*src_offset);
+            }
+            unsigned location = s.location + slot_offset;
+            unsigned frac = nir_intrinsic_component(intr);
+            unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
+            /* set c aligned/rounded down to dword */
+            unsigned c = nir_slot_is_sysval_output(location, MESA_SHADER_NONE) ? 0 : frac;
+            if (frac && bit_size < 32)
+               c = frac * bit_size / 32;
+            nir_alu_type type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
+            /* ensure dword is filled with like-sized components */
+            unsigned max_components = intr->num_components;
+            if (mode == nir_var_shader_out && nir->info.stage == MESA_SHADER_FRAGMENT) {
+                  switch (s.location) {
+                  case FRAG_RESULT_DEPTH:
+                  case FRAG_RESULT_STENCIL:
+                  case FRAG_RESULT_SAMPLE_MASK:
+                     max_components = 1;
+                     break;
+                  default:
+                     break;
+                  }
+            } else if ((nir->info.stage != MESA_SHADER_VERTEX || mode != nir_var_shader_in) && s.location < VARYING_SLOT_VAR0) {
+               switch (s.location) {
+               case VARYING_SLOT_FOGC:
+                  /* use intr components */
+                  break;
+               case VARYING_SLOT_POS:
+               case VARYING_SLOT_COL0:
+               case VARYING_SLOT_COL1:
+               case VARYING_SLOT_TEX0:
+               case VARYING_SLOT_TEX1:
+               case VARYING_SLOT_TEX2:
+               case VARYING_SLOT_TEX3:
+               case VARYING_SLOT_TEX4:
+               case VARYING_SLOT_TEX5:
+               case VARYING_SLOT_TEX6:
+               case VARYING_SLOT_TEX7:
+               case VARYING_SLOT_BFC0:
+               case VARYING_SLOT_BFC1:
+               case VARYING_SLOT_EDGE:
+               case VARYING_SLOT_CLIP_VERTEX:
+               case VARYING_SLOT_PNTC:
+               case VARYING_SLOT_BOUNDING_BOX0:
+               case VARYING_SLOT_BOUNDING_BOX1:
+                  max_components = 4;
+                  break;
+               case VARYING_SLOT_CLIP_DIST0:
+               case VARYING_SLOT_CLIP_DIST1:
+                  max_components = s.num_slots;
+                  break;
+               case VARYING_SLOT_CULL_DIST0:
+               case VARYING_SLOT_CULL_DIST1:
+                  max_components = s.num_slots;
+                  break;
+               case VARYING_SLOT_TESS_LEVEL_OUTER:
+                  max_components = 4;
+                  break;
+               case VARYING_SLOT_TESS_LEVEL_INNER:
+                  max_components = 2;
+                  break;
+               case VARYING_SLOT_PRIMITIVE_ID:
+               case VARYING_SLOT_LAYER:
+               case VARYING_SLOT_VIEWPORT:
+               case VARYING_SLOT_FACE:
+               case VARYING_SLOT_PSIZ:
+               case VARYING_SLOT_VIEW_INDEX:
+               case VARYING_SLOT_VIEWPORT_MASK:
+                  max_components = 1;
+                  break;
+               default:
+                  unreachable("???");
+               }
+            } else if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) {
+               if (s.location == VERT_ATTRIB_POINT_SIZE)
+                  max_components = 1;
+               else if (s.location < VERT_ATTRIB_GENERIC0)
+                  max_components = 4;
+               else
+                  max_components = frac + max_components;
+            } else if (bit_size == 16)
+               max_components = align(max_components, 2);
+            else if (bit_size == 8)
+               max_components = align(max_components, 4);
+            if (c + (bit_size == 64 ? max_components * 2 : max_components) > 4)
+               c = 0;
+            const struct glsl_type *vec_type;
+            bool is_compact = false;
+            if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) {
+               vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(type), max_components);
+            } else {
+               switch (s.location) {
+               case VARYING_SLOT_CLIP_DIST0:
+               case VARYING_SLOT_CLIP_DIST1:
+               case VARYING_SLOT_CULL_DIST0:
+               case VARYING_SLOT_CULL_DIST1:
+               case VARYING_SLOT_TESS_LEVEL_OUTER:
+               case VARYING_SLOT_TESS_LEVEL_INNER:
+                  vec_type = glsl_array_type(glsl_float_type(), max_components, sizeof(uint32_t));
+                  is_compact = true;
+                  break;
+               default:
+                  vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(type), max_components);
+                  break;
+               }
+            }
+            /* reset the mode for nir_is_arrayed_io to work */
+            bool is_arrayed = io_instr_is_arrayed(intr);
+            if (is_indirect) {
+               /* indirect array access requires the full array in a single variable */
+               unsigned slot_count = s.num_slots;
+               if (bit_size == 64 && slot_count > 1)
+                  slot_count /= 2;
+               if (slot_count > 1)
+                  vec_type = glsl_array_type(vec_type, slot_count, glsl_get_explicit_stride(vec_type));
+            }
+            if (is_arrayed)
+               vec_type = glsl_array_type(vec_type, 32 /* MAX_PATCH_VERTICES */, glsl_get_explicit_stride(vec_type));
+            nir_variable *found = find_io_var_with_semantics(nir, mode, mode, s, location, c, is_load);
+            if (found) {
+               if (glsl_get_vector_elements(glsl_without_array(found->type)) < glsl_get_vector_elements(glsl_without_array(vec_type))) {
+                  /* enlarge existing vars if necessary */
+                  found->type = vec_type;
+               }
+               continue;
+            }
+
+            char name[1024];
+            if (c)
+               snprintf(name, sizeof(name), "slot_%u_c%u", location, c);
+            else
+               snprintf(name, sizeof(name), "slot_%u", location);
+            nir_variable *old_var = find_io_var_with_semantics(nir, nir_var_mem_shared, mode, s, location, c, is_load);
+            nir_variable *var = nir_variable_create(nir, mode, vec_type, old_var ? old_var->name : name);
+            var->data.mode = mode;
+            var->type = vec_type;
+            var->data.driver_location = nir_intrinsic_base(intr) + slot_offset;
+            var->data.location_frac = c;
+            var->data.location = location;
+            var->data.patch = location >= VARYING_SLOT_PATCH0 ||
+                              ((nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) &&
+                               (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER));
+            /* set flat by default */
+            if (nir->info.stage == MESA_SHADER_FRAGMENT && mode == nir_var_shader_in)
+               var->data.interpolation = INTERP_MODE_FLAT;
+            var->data.fb_fetch_output = s.fb_fetch_output;
+            var->data.index = s.dual_source_blend_index;
+            var->data.precision = s.medium_precision;
+            var->data.compact = is_compact;
+         }
+      }
+   }
+   nir_foreach_variable_with_modes(var, nir, nir_var_mem_shared)
+      var->data.mode = nir_var_shader_temp;
+   nir_fixup_deref_modes(nir);
+   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
+}
+
+
+static bool
+eliminate_io_wrmasks_instr(const nir_instr *instr, const void *data)
+{
+   const nir_shader *nir = data;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   switch (intr->intrinsic) {
+   case nir_intrinsic_store_output:
+   case nir_intrinsic_store_per_primitive_output:
+   case nir_intrinsic_store_per_vertex_output:
+      break;
+   default:
+      return false;
+   }
+   unsigned src_components = nir_intrinsic_src_components(intr, 0);
+   unsigned wrmask = nir_intrinsic_write_mask(intr);
+   unsigned num_components = util_bitcount(wrmask);
+   if (num_components != src_components)
+      return true;
+   if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64)
+      num_components *= 2;
+   if (nir->xfb_info) {
+      nir_io_semantics s = nir_intrinsic_io_semantics(intr);
+      nir_src *src_offset = nir_get_io_offset_src(intr);
+      if (nir_src_is_const(*src_offset)) {
+         unsigned slot_offset = nir_src_as_uint(*src_offset);
+         for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
+            if (nir->xfb_info->outputs[i].location == s.location + slot_offset) {
+               unsigned xfb_components = util_bitcount(nir->xfb_info->outputs[i].component_mask);
+               if (xfb_components != MIN2(4, num_components))
+                  return true;
+               num_components -= xfb_components;
+               if (!num_components)
+                  break;
+            }
+         }
+      } else {
+         for (unsigned i = 0; i <nir->xfb_info->output_count; i++) {
+            if (nir->xfb_info->outputs[i].location >= s.location &&
+               nir->xfb_info->outputs[i].location < s.location + s.num_slots) {
+               unsigned xfb_components = util_bitcount(nir->xfb_info->outputs[i].component_mask);
+               if (xfb_components < MIN2(num_components, 4))
+                  return true;
+               num_components -= xfb_components;
+               if (!num_components)
+                  break;
+            }
+         }
+      }
+   }
+   return false;
+}
+
+static int
+zink_type_size(const struct glsl_type *type, bool bindless)
+{
+   return glsl_count_attribute_slots(type, false);
+}
+
+static nir_mem_access_size_align
+mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
+                         uint8_t bit_size, uint32_t align,
+                         uint32_t align_offset, bool offset_is_const,
+                         const void *cb_data)
+{
+   align = nir_combined_align(align, align_offset);
+
+   assert(util_is_power_of_two_nonzero(align));
+
+   return (nir_mem_access_size_align){
+      .num_components = MIN2(bytes / (bit_size / 8), 4),
+      .bit_size = bit_size,
+      .align = bit_size / 8,
+   };
+}
+
+static uint8_t
+lower_vec816_alu(const nir_instr *instr, const void *cb_data)
+{
+   return 4;
+}
+
 struct zink_shader *
-zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
-                   const struct pipe_stream_output_info *so_info)
+zink_shader_create(struct zink_screen *screen, struct nir_shader *nir)
 {
    struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
    bool have_psiz = false;
@@ -4780,6 +5328,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
                         nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_EDGE);
 
    ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
+   ret->sinfo.have_workgroup_memory_explicit_layout = screen->info.have_KHR_workgroup_memory_explicit_layout;
    ret->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
 
    util_queue_fence_init(&ret->precompile.fence);
@@ -4789,13 +5338,50 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
    ret->programs = _mesa_pointer_set_create(NULL);
    simple_mtx_init(&ret->lock, mtx_plain);
 
-   nir_variable_mode indirect_derefs_modes = 0;
-   if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
-       nir->info.stage == MESA_SHADER_TESS_EVAL)
-      indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
+   nir_lower_io_options lower_io_flags = 0;
+   if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
+      lower_io_flags = nir_lower_io_lower_64bit_to_32;
+   else if (!screen->info.feats.features.shaderFloat64)
+      lower_io_flags = nir_lower_io_lower_64bit_float_to_32;
+   bool temp_inputs = nir->info.stage != MESA_SHADER_VERTEX && nir->info.inputs_read & BITFIELD_RANGE(VARYING_SLOT_CLIP_DIST0, 4);
+   bool temp_outputs = nir->info.stage != MESA_SHADER_FRAGMENT && (nir->info.outputs_read | nir->info.outputs_written) & BITFIELD_RANGE(VARYING_SLOT_CLIP_DIST0, 4);
+   if (temp_inputs || temp_outputs) {
+      NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), temp_outputs, temp_inputs);
+      NIR_PASS_V(nir, nir_lower_global_vars_to_local);
+      NIR_PASS_V(nir, nir_split_var_copies);
+      NIR_PASS_V(nir, nir_lower_var_copies);
+   }
+   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out, zink_type_size, lower_io_flags);
+   if (nir->info.stage == MESA_SHADER_VERTEX)
+      lower_io_flags |= nir_lower_io_lower_64bit_to_32;
+   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in, zink_type_size, lower_io_flags);
+   nir->info.io_lowered = true;
+
+   if (nir->info.stage == MESA_SHADER_KERNEL) {
+      nir_lower_mem_access_bit_sizes_options lower_mem_access_options = {
+         .modes = nir_var_all,
+         .may_lower_unaligned_stores_to_atomics = true,
+         .callback = mem_access_size_align_cb,
+         .cb_data = screen,
+      };
+      NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &lower_mem_access_options);
+      NIR_PASS_V(nir, nir_lower_alu_width, lower_vec816_alu, NULL);
+      NIR_PASS_V(nir, nir_lower_alu_vec8_16_srcs);
+   }
 
-   NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
-              UINT32_MAX);
+   optimize_nir(nir, NULL, true);
+   nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
+      if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
+         NIR_PASS_V(nir, lower_bindless_io);
+         break;
+      }
+   }
+   nir_gather_xfb_info_from_intrinsics(nir);
+   NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_shader_out, eliminate_io_wrmasks_instr, nir);
+   /* clean up io to improve direct access */
+   optimize_nir(nir, NULL, true);
+   rework_io_vars(nir, nir_var_shader_in);
+   rework_io_vars(nir, nir_var_shader_out);
 
    if (nir->info.stage < MESA_SHADER_COMPUTE)
       create_gfx_pushconst(nir);
@@ -4813,9 +5399,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
       NIR_PASS_V(nir, fixup_io_locations);
 
    NIR_PASS_V(nir, lower_basevertex);
-   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
    NIR_PASS_V(nir, lower_baseinstance);
-   NIR_PASS_V(nir, lower_sparse);
    NIR_PASS_V(nir, split_bitfields);
    NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
 
@@ -4839,48 +5423,31 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
          subgroup_options.subgroup_size = 1;
          subgroup_options.lower_vote_trivial = true;
       }
+      subgroup_options.lower_inverse_ballot = true;
       NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
    }
 
-   if (so_info && so_info->num_outputs)
-      NIR_PASS_V(nir, split_blocks);
-
-   optimize_nir(nir, NULL);
+   optimize_nir(nir, NULL, true);
    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
    NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
                                           nir_lower_demote_if_to_cf |
                                           nir_lower_terminate_if_to_cf));
-   NIR_PASS_V(nir, nir_lower_fragcolor,
-         nir->info.fs.color_is_dual_source ? 1 : 8);
-   NIR_PASS_V(nir, lower_64bit_vertex_attribs);
+
    bool needs_size = analyze_io(ret, nir);
    NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
    /* run in compile if there could be inlined uniforms */
    if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
-      NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
+      NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
       NIR_PASS_V(nir, rewrite_bo_access, screen);
       NIR_PASS_V(nir, remove_bo_access, ret);
    }
 
-   if (zink_debug & ZINK_DEBUG_NIR) {
-      fprintf(stderr, "NIR shader:\n---8<---\n");
-      nir_print_shader(nir, stderr);
-      fprintf(stderr, "---8<---\n");
-   }
-
    struct zink_bindless_info bindless = {0};
    bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
-   bool has_bindless_io = false;
-   nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
+   nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out)
       var->data.is_xfb = false;
-      if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
-         has_bindless_io = true;
-      }
-   }
-   if (has_bindless_io)
-      NIR_PASS_V(nir, lower_bindless_io);
 
-   optimize_nir(nir, NULL);
+   optimize_nir(nir, NULL, true);
    prune_io(nir);
 
    scan_nir(screen, nir, ret);
@@ -4935,7 +5502,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
          } else if (var->data.mode == nir_var_mem_ssbo) {
             ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
             var->data.descriptor_set = screen->desc_set_id[ztype];
-            var->data.binding = zink_binding(nir->info.stage,
+            var->data.binding = zink_binding(clamp_stage(&nir->info),
                                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
                                              var->data.driver_location,
                                              screen->compact_descriptors);
@@ -4992,8 +5559,8 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
    if (!nir->info.internal)
       nir_foreach_shader_out_variable(var, nir)
          var->data.explicit_xfb_buffer = 0;
-   if (so_info && so_info->num_outputs)
-      update_so_info(ret, nir, so_info, nir->info.outputs_written, have_psiz);
+   if (nir->xfb_info && nir->xfb_info->output_count && nir->info.outputs_written)
+      update_so_info(ret, nir, nir->info.outputs_written, have_psiz);
    else if (have_psiz) {
       bool have_fake_psiz = false;
       nir_variable *psiz = NULL;
@@ -5005,9 +5572,11 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
                psiz = var;
          }
       }
-      if (have_fake_psiz && psiz) {
+      /* maintenance5 allows injected psiz deletion */
+      if (have_fake_psiz && (psiz || screen->info.have_KHR_maintenance5)) {
          psiz->data.mode = nir_var_shader_temp;
          nir_fixup_deref_modes(nir);
+         delete_psiz_store(nir, true);
          NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
       }
    }
@@ -5040,8 +5609,9 @@ zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
    if (!screen->info.feats.features.shaderImageGatherExtended)
       tex_opts.lower_tg4_offsets = true;
    NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
-   optimize_nir(nir, NULL);
-   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+   optimize_nir(nir, NULL, false);
+   if (nir->info.stage == MESA_SHADER_VERTEX)
+      nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
    if (screen->driconf.inline_uniforms)
       nir_find_inlinable_uniforms(nir);
 
@@ -5051,7 +5621,29 @@ zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
 void
 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
 {
+   _mesa_set_destroy(shader->programs, NULL);
+   util_queue_fence_wait(&shader->precompile.fence);
+   util_queue_fence_destroy(&shader->precompile.fence);
+   zink_descriptor_shader_deinit(screen, shader);
+   if (screen->info.have_EXT_shader_object) {
+      VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
+   } else {
+      if (shader->precompile.obj.mod)
+         VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
+      if (shader->precompile.gpl)
+         VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
+   }
+   blob_finish(&shader->blob);
+   ralloc_free(shader->spirv);
+   free(shader->precompile.bindings);
+   ralloc_free(shader);
+}
+
+void
+zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
+{
    assert(shader->info.stage != MESA_SHADER_COMPUTE);
+   util_queue_fence_wait(&shader->precompile.fence);
    set_foreach(shader->programs, entry) {
       struct zink_gfx_program *prog = (void*)entry->key;
       gl_shader_stage stage = shader->info.stage;
@@ -5115,7 +5707,7 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
    if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
        shader->non_fs.generated_tcs) {
       /* automatically destroy generated tcs shaders when tes is destroyed */
-      zink_shader_free(screen, shader->non_fs.generated_tcs);
+      zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
       shader->non_fs.generated_tcs = NULL;
    }
    for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
@@ -5123,33 +5715,22 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
          if (shader->info.stage != MESA_SHADER_FRAGMENT &&
              shader->non_fs.generated_gs[i][j]) {
             /* automatically destroy generated gs shaders when owner is destroyed */
-            zink_shader_free(screen, shader->non_fs.generated_gs[i][j]);
+            zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
             shader->non_fs.generated_gs[i][j] = NULL;
          }
       }
    }
-   _mesa_set_destroy(shader->programs, NULL);
-   util_queue_fence_wait(&shader->precompile.fence);
-   util_queue_fence_destroy(&shader->precompile.fence);
-   zink_descriptor_shader_deinit(screen, shader);
-   if (shader->precompile.mod)
-      VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.mod, NULL);
-   if (shader->precompile.gpl)
-      VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
-   blob_finish(&shader->blob);
-   ralloc_free(shader->spirv);
-   free(shader->precompile.bindings);
-   ralloc_free(shader);
+   zink_shader_free(screen, shader);
 }
 
 
-VkShaderModule
-zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
+struct zink_shader_object
+zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
 {
    assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
    /* shortcut all the nir passes since we just have to change this one word */
    zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
-   return zink_shader_spirv_compile(screen, zs, NULL);
+   return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
 }
 
 /* creating a passthrough tcs shader that's roughly:
@@ -5174,7 +5755,7 @@ void main()
 
 */
 struct zink_shader *
-zink_shader_tcs_create(struct zink_screen *screen, nir_shader *vs, unsigned vertices_per_patch, nir_shader **nir_ret)
+zink_shader_tcs_create(struct zink_screen *screen, nir_shader *tes, unsigned vertices_per_patch, nir_shader **nir_ret)
 {
    struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
    util_queue_fence_init(&ret->precompile.fence);
@@ -5187,20 +5768,22 @@ zink_shader_tcs_create(struct zink_screen *screen, nir_shader *vs, unsigned vert
    fn->is_entrypoint = true;
    nir_function_impl *impl = nir_function_impl_create(fn);
 
-   nir_builder b;
-   nir_builder_init(&b, impl);
-   b.cursor = nir_before_block(nir_start_block(impl));
+   nir_builder b = nir_builder_at(nir_before_impl(impl));
 
-   nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
+   nir_def *invocation_id = nir_load_invocation_id(&b);
 
-   nir_foreach_shader_out_variable(var, vs) {
-      const struct glsl_type *type = var->type;
+   nir_foreach_shader_in_variable(var, tes) {
+      if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
+         continue;
       const struct glsl_type *in_type = var->type;
       const struct glsl_type *out_type = var->type;
       char buf[1024];
       snprintf(buf, sizeof(buf), "%s_out", var->name);
-      in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
-      out_type = glsl_array_type(type, vertices_per_patch, 0);
+      if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
+         const struct glsl_type *type = var->type;
+         in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
+         out_type = glsl_array_type(type, vertices_per_patch, 0);
+      }
 
       nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
       nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
@@ -5228,12 +5811,10 @@ zink_shader_tcs_create(struct zink_screen *screen, nir_shader *vs, unsigned vert
 
    create_gfx_pushconst(nir);
 
-   nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32,
-                                                    nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL),
-                                                    .base = 1, .range = 8);
-   nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32,
-                                                    nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL),
-                                                    .base = 2, .range = 16);
+   nir_def *load_inner = nir_load_push_constant_zink(&b, 2, 32,
+                                                         nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL));
+   nir_def *load_outer = nir_load_push_constant_zink(&b, 4, 32,
+                                                         nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL));
 
    for (unsigned i = 0; i < 2; i++) {
       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
@@ -5247,8 +5828,7 @@ zink_shader_tcs_create(struct zink_screen *screen, nir_shader *vs, unsigned vert
    nir->info.tess.tcs_vertices_out = vertices_per_patch;
    nir_validate_shader(nir, "created");
 
-   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
-   optimize_nir(nir, NULL);
+   optimize_nir(nir, NULL, true);
    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
    NIR_PASS_V(nir, nir_convert_from_ssa, true);
 
@@ -5295,3 +5875,11 @@ zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
 #endif
    nir_serialize(blob, nir, strip);
 }
+
+void
+zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
+{
+   nir_shader *nir = zink_shader_deserialize(screen, zs);
+   nir_print_shader(nir, fp);
+   ralloc_free(nir);
+}
author	Jonathan Gray <jsg@cvs.openbsd.org>	2024-04-02 09:30:07 +0000
committer	Jonathan Gray <jsg@cvs.openbsd.org>	2024-04-02 09:30:07 +0000
commit	f54e142455cb3c9d1662dae7e096a32a47e5409b (patch)
tree	440ecd46269f0eac25e349e1ed58f246490c5e26 /lib/mesa/src/gallium/drivers/zink/zink_compiler.c
parent	36d8503c27530f68d655d3ef77a6eaa4dfd8ad65 (diff)