Merge Mesa 23.1.9

author: Jonathan Gray <jsg@cvs.openbsd.org> 2023-11-02 04:53:47 +0000
committer: Jonathan Gray <jsg@cvs.openbsd.org> 2023-11-02 04:53:47 +0000
commit: b44518130b33cadb5c1d619e9e936ae0e0dbf7cb (patch)
tree: 6069eb03c39fbc79808a7d94f857118cce75cbe3 /lib/mesa/src/gallium/auxiliary
parent: 32aeb3c41fedbbd7b11aacfec48e8f699d16bff0 (diff)
89 files changed, 1583 insertions, 2144 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.c b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.c
index efce6f673..d41fa27fa 100644
--- a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -50,6 +50,7 @@
 #include "cso_cache/cso_hash.h"
 #include "cso_context.h"
 #include "driver_trace/tr_dump.h"
+#include "util/u_threaded_context.h"
 
 /**
  * Per-shader sampler information.
@@ -63,7 +64,7 @@ struct sampler_info
 
 
 struct cso_context {
-   struct pipe_context *pipe;
+   struct cso_context_base base;
 
    struct u_vbuf *vbuf;
    struct u_vbuf *vbuf_current;
@@ -124,13 +125,6 @@ struct cso_context {
 };
 
 
-struct pipe_context *
-cso_get_pipe_context(struct cso_context *cso)
-{
-   return cso->pipe;
-}
-
-
 static inline boolean
 delete_cso(struct cso_context *ctx,
            void *state, enum cso_cache_type type)
@@ -163,7 +157,7 @@ delete_cso(struct cso_context *ctx,
       assert(0);
    }
 
-   cso_delete_state(ctx->pipe, state, type);
+   cso_delete_state(ctx->base.pipe, state, type);
    return true;
 }
 
@@ -253,18 +247,34 @@ cso_init_vbuf(struct cso_context *cso, unsigned flags)
    bool uses_user_vertex_buffers = !(flags & CSO_NO_USER_VERTEX_BUFFERS);
    bool needs64b = !(flags & CSO_NO_64B_VERTEX_BUFFERS);
 
-   u_vbuf_get_caps(cso->pipe->screen, &caps, needs64b);
+   u_vbuf_get_caps(cso->base.pipe->screen, &caps, needs64b);
 
    /* Enable u_vbuf if needed. */
    if (caps.fallback_always ||
        (uses_user_vertex_buffers &&
         caps.fallback_only_for_user_vbuffers)) {
-      cso->vbuf = u_vbuf_create(cso->pipe, &caps);
+      assert(!cso->base.pipe->vbuf);
+      cso->vbuf = u_vbuf_create(cso->base.pipe, &caps);
+      cso->base.pipe->vbuf = cso->vbuf;
       cso->always_use_vbuf = caps.fallback_always;
-      cso->vbuf_current = caps.fallback_always ? cso->vbuf : NULL;
+      cso->vbuf_current = cso->base.pipe->vbuf =
+         caps.fallback_always ? cso->vbuf : NULL;
    }
 }
 
+static void
+cso_draw_vbo_default(struct pipe_context *pipe,
+                     const struct pipe_draw_info *info,
+                     unsigned drawid_offset,
+                     const struct pipe_draw_indirect_info *indirect,
+                     const struct pipe_draw_start_count_bias *draws,
+                     unsigned num_draws)
+{
+   if (pipe->vbuf)
+      u_vbuf_draw_vbo(pipe, info, drawid_offset, indirect, draws, num_draws);
+   else
+      pipe->draw_vbo(pipe, info, drawid_offset, indirect, draws, num_draws);
+}
 
 struct cso_context *
 cso_create_context(struct pipe_context *pipe, unsigned flags)
@@ -276,12 +286,27 @@ cso_create_context(struct pipe_context *pipe, unsigned flags)
    cso_cache_init(&ctx->cache, pipe);
    cso_cache_set_sanitize_callback(&ctx->cache, sanitize_hash, ctx);
 
-   ctx->pipe = pipe;
+   ctx->base.pipe = pipe;
    ctx->sample_mask = ~0;
 
    if (!(flags & CSO_NO_VBUF))
       cso_init_vbuf(ctx, flags);
 
+   /* Only drivers using u_threaded_context benefit from the direct call.
+    * This is because drivers can change draw_vbo, but u_threaded_context
+    * never changes it.
+    */
+   if (pipe->draw_vbo == tc_draw_vbo) {
+      if (ctx->vbuf_current)
+         ctx->base.draw_vbo = u_vbuf_draw_vbo;
+      else
+         ctx->base.draw_vbo = pipe->draw_vbo;
+   } else if (ctx->always_use_vbuf) {
+      ctx->base.draw_vbo = u_vbuf_draw_vbo;
+   } else {
+      ctx->base.draw_vbo = cso_draw_vbo_default;
+   }
+
    /* Enable for testing: */
    if (0) cso_set_maximum_cache_size(&ctx->cache, 4);
 
@@ -330,15 +355,15 @@ cso_unbind_context(struct cso_context *ctx)
    bool dumping = trace_dumping_enabled_locked();
    if (dumping)
       trace_dumping_stop_locked();
-   if (ctx->pipe) {
-      ctx->pipe->bind_blend_state(ctx->pipe, NULL);
-      ctx->pipe->bind_rasterizer_state(ctx->pipe, NULL);
+   if (ctx->base.pipe) {
+      ctx->base.pipe->bind_blend_state(ctx->base.pipe, NULL);
+      ctx->base.pipe->bind_rasterizer_state(ctx->base.pipe, NULL);
 
       {
          static struct pipe_sampler_view *views[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { NULL };
          static struct pipe_shader_buffer ssbos[PIPE_MAX_SHADER_BUFFERS] = { 0 };
          static void *zeros[PIPE_MAX_SAMPLERS] = { NULL };
-         struct pipe_screen *scr = ctx->pipe->screen;
+         struct pipe_screen *scr = ctx->base.pipe->screen;
          enum pipe_shader_type sh;
          for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
             switch (sh) {
@@ -375,44 +400,47 @@ cso_unbind_context(struct cso_context *ctx)
             assert(maxcb <= PIPE_MAX_CONSTANT_BUFFERS);
             assert(maximg <= PIPE_MAX_SHADER_IMAGES);
             if (maxsam > 0) {
-               ctx->pipe->bind_sampler_states(ctx->pipe, sh, 0, maxsam, zeros);
+               ctx->base.pipe->bind_sampler_states(ctx->base.pipe, sh, 0, maxsam, zeros);
             }
             if (maxview > 0) {
-               ctx->pipe->set_sampler_views(ctx->pipe, sh, 0, maxview, 0, false, views);
+               ctx->base.pipe->set_sampler_views(ctx->base.pipe, sh, 0, maxview, 0, false, views);
             }
             if (maxssbo > 0) {
-               ctx->pipe->set_shader_buffers(ctx->pipe, sh, 0, maxssbo, ssbos, 0);
+               ctx->base.pipe->set_shader_buffers(ctx->base.pipe, sh, 0, maxssbo, ssbos, 0);
             }
             if (maximg > 0) {
-               ctx->pipe->set_shader_images(ctx->pipe, sh, 0, 0, maximg, NULL);
+               ctx->base.pipe->set_shader_images(ctx->base.pipe, sh, 0, 0, maximg, NULL);
             }
             for (int i = 0; i < maxcb; i++) {
-               ctx->pipe->set_constant_buffer(ctx->pipe, sh, i, false, NULL);
+               ctx->base.pipe->set_constant_buffer(ctx->base.pipe, sh, i, false, NULL);
             }
          }
       }
 
-      ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, NULL);
+      ctx->base.pipe->bind_depth_stencil_alpha_state(ctx->base.pipe, NULL);
       struct pipe_stencil_ref sr = {0};
-      ctx->pipe->set_stencil_ref(ctx->pipe, sr);
-      ctx->pipe->bind_fs_state(ctx->pipe, NULL);
-      ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, false, NULL);
-      ctx->pipe->bind_vs_state(ctx->pipe, NULL);
-      ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, false, NULL);
+      ctx->base.pipe->set_stencil_ref(ctx->base.pipe, sr);
+      ctx->base.pipe->bind_fs_state(ctx->base.pipe, NULL);
+      ctx->base.pipe->set_constant_buffer(ctx->base.pipe, PIPE_SHADER_FRAGMENT, 0, false, NULL);
+      ctx->base.pipe->bind_vs_state(ctx->base.pipe, NULL);
+      ctx->base.pipe->set_constant_buffer(ctx->base.pipe, PIPE_SHADER_VERTEX, 0, false, NULL);
       if (ctx->has_geometry_shader) {
-         ctx->pipe->bind_gs_state(ctx->pipe, NULL);
+         ctx->base.pipe->bind_gs_state(ctx->base.pipe, NULL);
       }
       if (ctx->has_tessellation) {
-         ctx->pipe->bind_tcs_state(ctx->pipe, NULL);
-         ctx->pipe->bind_tes_state(ctx->pipe, NULL);
+         ctx->base.pipe->bind_tcs_state(ctx->base.pipe, NULL);
+         ctx->base.pipe->bind_tes_state(ctx->base.pipe, NULL);
       }
       if (ctx->has_compute_shader) {
-         ctx->pipe->bind_compute_state(ctx->pipe, NULL);
+         ctx->base.pipe->bind_compute_state(ctx->base.pipe, NULL);
       }
-      ctx->pipe->bind_vertex_elements_state(ctx->pipe, NULL);
+      ctx->base.pipe->bind_vertex_elements_state(ctx->base.pipe, NULL);
 
       if (ctx->has_streamout)
-         ctx->pipe->set_stream_output_targets(ctx->pipe, 0, NULL, NULL);
+         ctx->base.pipe->set_stream_output_targets(ctx->base.pipe, 0, NULL, NULL);
+
+      struct pipe_framebuffer_state fb = {0};
+      ctx->base.pipe->set_framebuffer_state(ctx->base.pipe, &fb);
    }
 
    util_unreference_framebuffer_state(&ctx->fb);
@@ -432,9 +460,9 @@ cso_unbind_context(struct cso_context *ctx)
     * If the cso context is reused (with the same pipe context),
     * need to really make sure the context state doesn't get out of sync.
     */
-   ctx->pipe->set_sample_mask(ctx->pipe, ctx->sample_mask);
-   if (ctx->pipe->set_min_samples)
-      ctx->pipe->set_min_samples(ctx->pipe, ctx->min_samples);
+   ctx->base.pipe->set_sample_mask(ctx->base.pipe, ctx->sample_mask);
+   if (ctx->base.pipe->set_min_samples)
+      ctx->base.pipe->set_min_samples(ctx->base.pipe, ctx->min_samples);
    if (dumping)
       trace_dumping_start_locked();
 }
@@ -451,6 +479,8 @@ cso_destroy_context(struct cso_context *ctx)
 
    if (ctx->vbuf)
       u_vbuf_destroy(ctx->vbuf);
+
+   ctx->base.pipe->vbuf = NULL;
    FREE(ctx);
 }
 
@@ -499,7 +529,7 @@ cso_set_blend(struct cso_context *ctx,
 
       memset(&cso->state, 0, sizeof cso->state);
       memcpy(&cso->state, templ, key_size);
-      cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state);
+      cso->data = ctx->base.pipe->create_blend_state(ctx->base.pipe, &cso->state);
 
       iter = cso_insert_state(&ctx->cache, hash_key, CSO_BLEND, cso);
       if (cso_hash_iter_is_null(iter)) {
@@ -514,7 +544,7 @@ cso_set_blend(struct cso_context *ctx,
 
    if (ctx->blend != handle) {
       ctx->blend = handle;
-      ctx->pipe->bind_blend_state(ctx->pipe, handle);
+      ctx->base.pipe->bind_blend_state(ctx->base.pipe, handle);
    }
    return PIPE_OK;
 }
@@ -533,7 +563,7 @@ cso_restore_blend(struct cso_context *ctx)
 {
    if (ctx->blend != ctx->blend_saved) {
       ctx->blend = ctx->blend_saved;
-      ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend_saved);
+      ctx->base.pipe->bind_blend_state(ctx->base.pipe, ctx->blend_saved);
    }
    ctx->blend_saved = NULL;
 }
@@ -558,7 +588,7 @@ cso_set_depth_stencil_alpha(struct cso_context *ctx,
          return PIPE_ERROR_OUT_OF_MEMORY;
 
       memcpy(&cso->state, templ, sizeof(*templ));
-      cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe,
+      cso->data = ctx->base.pipe->create_depth_stencil_alpha_state(ctx->base.pipe,
                                                               &cso->state);
 
       iter = cso_insert_state(&ctx->cache, hash_key,
@@ -576,7 +606,7 @@ cso_set_depth_stencil_alpha(struct cso_context *ctx,
 
    if (ctx->depth_stencil != handle) {
       ctx->depth_stencil = handle;
-      ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle);
+      ctx->base.pipe->bind_depth_stencil_alpha_state(ctx->base.pipe, handle);
    }
    return PIPE_OK;
 }
@@ -595,7 +625,7 @@ cso_restore_depth_stencil_alpha(struct cso_context *ctx)
 {
    if (ctx->depth_stencil != ctx->depth_stencil_saved) {
       ctx->depth_stencil = ctx->depth_stencil_saved;
-      ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe,
+      ctx->base.pipe->bind_depth_stencil_alpha_state(ctx->base.pipe,
                                                 ctx->depth_stencil_saved);
    }
    ctx->depth_stencil_saved = NULL;
@@ -625,7 +655,7 @@ cso_set_rasterizer(struct cso_context *ctx,
          return PIPE_ERROR_OUT_OF_MEMORY;
 
       memcpy(&cso->state, templ, sizeof(*templ));
-      cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state);
+      cso->data = ctx->base.pipe->create_rasterizer_state(ctx->base.pipe, &cso->state);
 
       iter = cso_insert_state(&ctx->cache, hash_key, CSO_RASTERIZER, cso);
       if (cso_hash_iter_is_null(iter)) {
@@ -643,7 +673,7 @@ cso_set_rasterizer(struct cso_context *ctx,
       ctx->flatshade_first = templ->flatshade_first;
       if (ctx->vbuf)
          u_vbuf_set_flatshade_first(ctx->vbuf, ctx->flatshade_first);
-      ctx->pipe->bind_rasterizer_state(ctx->pipe, handle);
+      ctx->base.pipe->bind_rasterizer_state(ctx->base.pipe, handle);
    }
    return PIPE_OK;
 }
@@ -666,7 +696,7 @@ cso_restore_rasterizer(struct cso_context *ctx)
       ctx->flatshade_first = ctx->flatshade_first_saved;
       if (ctx->vbuf)
          u_vbuf_set_flatshade_first(ctx->vbuf, ctx->flatshade_first);
-      ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rasterizer_saved);
+      ctx->base.pipe->bind_rasterizer_state(ctx->base.pipe, ctx->rasterizer_saved);
    }
    ctx->rasterizer_saved = NULL;
 }
@@ -677,7 +707,7 @@ cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle)
 {
    if (ctx->fragment_shader != handle) {
       ctx->fragment_shader = handle;
-      ctx->pipe->bind_fs_state(ctx->pipe, handle);
+      ctx->base.pipe->bind_fs_state(ctx->base.pipe, handle);
    }
 }
 
@@ -694,7 +724,7 @@ static void
 cso_restore_fragment_shader(struct cso_context *ctx)
 {
    if (ctx->fragment_shader_saved != ctx->fragment_shader) {
-      ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved);
+      ctx->base.pipe->bind_fs_state(ctx->base.pipe, ctx->fragment_shader_saved);
       ctx->fragment_shader = ctx->fragment_shader_saved;
    }
    ctx->fragment_shader_saved = NULL;
@@ -706,7 +736,7 @@ cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle)
 {
    if (ctx->vertex_shader != handle) {
       ctx->vertex_shader = handle;
-      ctx->pipe->bind_vs_state(ctx->pipe, handle);
+      ctx->base.pipe->bind_vs_state(ctx->base.pipe, handle);
    }
 }
 
@@ -723,7 +753,7 @@ static void
 cso_restore_vertex_shader(struct cso_context *ctx)
 {
    if (ctx->vertex_shader_saved != ctx->vertex_shader) {
-      ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved);
+      ctx->base.pipe->bind_vs_state(ctx->base.pipe, ctx->vertex_shader_saved);
       ctx->vertex_shader = ctx->vertex_shader_saved;
    }
    ctx->vertex_shader_saved = NULL;
@@ -736,7 +766,7 @@ cso_set_framebuffer(struct cso_context *ctx,
 {
    if (memcmp(&ctx->fb, fb, sizeof(*fb)) != 0) {
       util_copy_framebuffer_state(&ctx->fb, fb);
-      ctx->pipe->set_framebuffer_state(ctx->pipe, fb);
+      ctx->base.pipe->set_framebuffer_state(ctx->base.pipe, fb);
    }
 }
 
@@ -753,7 +783,7 @@ cso_restore_framebuffer(struct cso_context *ctx)
 {
    if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) {
       util_copy_framebuffer_state(&ctx->fb, &ctx->fb_saved);
-      ctx->pipe->set_framebuffer_state(ctx->pipe, &ctx->fb);
+      ctx->base.pipe->set_framebuffer_state(ctx->base.pipe, &ctx->fb);
       util_unreference_framebuffer_state(&ctx->fb_saved);
    }
 }
@@ -765,7 +795,7 @@ cso_set_viewport(struct cso_context *ctx,
 {
    if (memcmp(&ctx->vp, vp, sizeof(*vp))) {
       ctx->vp = *vp;
-      ctx->pipe->set_viewport_states(ctx->pipe, 0, 1, vp);
+      ctx->base.pipe->set_viewport_states(ctx->base.pipe, 0, 1, vp);
    }
 }
 
@@ -805,7 +835,7 @@ cso_restore_viewport(struct cso_context *ctx)
 {
    if (memcmp(&ctx->vp, &ctx->vp_saved, sizeof(ctx->vp))) {
       ctx->vp = ctx->vp_saved;
-      ctx->pipe->set_viewport_states(ctx->pipe, 0, 1, &ctx->vp);
+      ctx->base.pipe->set_viewport_states(ctx->base.pipe, 0, 1, &ctx->vp);
    }
 }
 
@@ -815,7 +845,7 @@ cso_set_sample_mask(struct cso_context *ctx, unsigned sample_mask)
 {
    if (ctx->sample_mask != sample_mask) {
       ctx->sample_mask = sample_mask;
-      ctx->pipe->set_sample_mask(ctx->pipe, sample_mask);
+      ctx->base.pipe->set_sample_mask(ctx->base.pipe, sample_mask);
    }
 }
 
@@ -837,9 +867,9 @@ cso_restore_sample_mask(struct cso_context *ctx)
 void
 cso_set_min_samples(struct cso_context *ctx, unsigned min_samples)
 {
-   if (ctx->min_samples != min_samples && ctx->pipe->set_min_samples) {
+   if (ctx->min_samples != min_samples && ctx->base.pipe->set_min_samples) {
       ctx->min_samples = min_samples;
-      ctx->pipe->set_min_samples(ctx->pipe, min_samples);
+      ctx->base.pipe->set_min_samples(ctx->base.pipe, min_samples);
    }
 }
 
@@ -864,7 +894,7 @@ cso_set_stencil_ref(struct cso_context *ctx,
 {
    if (memcmp(&ctx->stencil_ref, &sr, sizeof(ctx->stencil_ref))) {
       ctx->stencil_ref = sr;
-      ctx->pipe->set_stencil_ref(ctx->pipe, sr);
+      ctx->base.pipe->set_stencil_ref(ctx->base.pipe, sr);
    }
 }
 
@@ -882,7 +912,7 @@ cso_restore_stencil_ref(struct cso_context *ctx)
    if (memcmp(&ctx->stencil_ref, &ctx->stencil_ref_saved,
               sizeof(ctx->stencil_ref))) {
       ctx->stencil_ref = ctx->stencil_ref_saved;
-      ctx->pipe->set_stencil_ref(ctx->pipe, ctx->stencil_ref);
+      ctx->base.pipe->set_stencil_ref(ctx->base.pipe, ctx->stencil_ref);
    }
 }
 
@@ -893,7 +923,7 @@ cso_set_render_condition(struct cso_context *ctx,
                          boolean condition,
                          enum pipe_render_cond_flag mode)
 {
-   struct pipe_context *pipe = ctx->pipe;
+   struct pipe_context *pipe = ctx->base.pipe;
 
    if (ctx->render_condition != query ||
        ctx->render_condition_mode != mode ||
@@ -931,7 +961,7 @@ cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle)
 
    if (ctx->has_geometry_shader && ctx->geometry_shader != handle) {
       ctx->geometry_shader = handle;
-      ctx->pipe->bind_gs_state(ctx->pipe, handle);
+      ctx->base.pipe->bind_gs_state(ctx->base.pipe, handle);
    }
 }
 
@@ -956,7 +986,7 @@ cso_restore_geometry_shader(struct cso_context *ctx)
    }
 
    if (ctx->geometry_shader_saved != ctx->geometry_shader) {
-      ctx->pipe->bind_gs_state(ctx->pipe, ctx->geometry_shader_saved);
+      ctx->base.pipe->bind_gs_state(ctx->base.pipe, ctx->geometry_shader_saved);
       ctx->geometry_shader = ctx->geometry_shader_saved;
    }
    ctx->geometry_shader_saved = NULL;
@@ -970,7 +1000,7 @@ cso_set_tessctrl_shader_handle(struct cso_context *ctx, void *handle)
 
    if (ctx->has_tessellation && ctx->tessctrl_shader != handle) {
       ctx->tessctrl_shader = handle;
-      ctx->pipe->bind_tcs_state(ctx->pipe, handle);
+      ctx->base.pipe->bind_tcs_state(ctx->base.pipe, handle);
    }
 }
 
@@ -995,7 +1025,7 @@ cso_restore_tessctrl_shader(struct cso_context *ctx)
    }
 
    if (ctx->tessctrl_shader_saved != ctx->tessctrl_shader) {
-      ctx->pipe->bind_tcs_state(ctx->pipe, ctx->tessctrl_shader_saved);
+      ctx->base.pipe->bind_tcs_state(ctx->base.pipe, ctx->tessctrl_shader_saved);
       ctx->tessctrl_shader = ctx->tessctrl_shader_saved;
    }
    ctx->tessctrl_shader_saved = NULL;
@@ -1009,7 +1039,7 @@ cso_set_tesseval_shader_handle(struct cso_context *ctx, void *handle)
 
    if (ctx->has_tessellation && ctx->tesseval_shader != handle) {
       ctx->tesseval_shader = handle;
-      ctx->pipe->bind_tes_state(ctx->pipe, handle);
+      ctx->base.pipe->bind_tes_state(ctx->base.pipe, handle);
    }
 }
 
@@ -1034,7 +1064,7 @@ cso_restore_tesseval_shader(struct cso_context *ctx)
    }
 
    if (ctx->tesseval_shader_saved != ctx->tesseval_shader) {
-      ctx->pipe->bind_tes_state(ctx->pipe, ctx->tesseval_shader_saved);
+      ctx->base.pipe->bind_tes_state(ctx->base.pipe, ctx->tesseval_shader_saved);
       ctx->tesseval_shader = ctx->tesseval_shader_saved;
    }
    ctx->tesseval_shader_saved = NULL;
@@ -1048,7 +1078,7 @@ cso_set_compute_shader_handle(struct cso_context *ctx, void *handle)
 
    if (ctx->has_compute_shader && ctx->compute_shader != handle) {
       ctx->compute_shader = handle;
-      ctx->pipe->bind_compute_state(ctx->pipe, handle);
+      ctx->base.pipe->bind_compute_state(ctx->base.pipe, handle);
    }
 }
 
@@ -1073,7 +1103,7 @@ cso_restore_compute_shader(struct cso_context *ctx)
    }
 
    if (ctx->compute_shader_saved != ctx->compute_shader) {
-      ctx->pipe->bind_compute_state(ctx->pipe, ctx->compute_shader_saved);
+      ctx->base.pipe->bind_compute_state(ctx->base.pipe, ctx->compute_shader_saved);
       ctx->compute_shader = ctx->compute_shader_saved;
    }
    ctx->compute_shader_saved = NULL;
@@ -1143,7 +1173,7 @@ cso_set_vertex_elements_direct(struct cso_context *ctx,
       struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS];
       util_lower_uint64_vertex_elements(&new_elems, &new_count, tmp);
 
-      cso->data = ctx->pipe->create_vertex_elements_state(ctx->pipe, new_count,
+      cso->data = ctx->base.pipe->create_vertex_elements_state(ctx->base.pipe, new_count,
                                                           new_elems);
 
       iter = cso_insert_state(&ctx->cache, hash_key, CSO_VELEMENTS, cso);
@@ -1159,7 +1189,7 @@ cso_set_vertex_elements_direct(struct cso_context *ctx,
 
    if (ctx->velements != handle) {
       ctx->velements = handle;
-      ctx->pipe->bind_vertex_elements_state(ctx->pipe, handle);
+      ctx->base.pipe->bind_vertex_elements_state(ctx->base.pipe, handle);
    }
 }
 
@@ -1207,7 +1237,7 @@ cso_restore_vertex_elements(struct cso_context *ctx)
 
    if (ctx->velements != ctx->velements_saved) {
       ctx->velements = ctx->velements_saved;
-      ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->velements_saved);
+      ctx->base.pipe->bind_vertex_elements_state(ctx->base.pipe, ctx->velements_saved);
    }
    ctx->velements_saved = NULL;
 }
@@ -1232,7 +1262,7 @@ cso_set_vertex_buffers(struct cso_context *ctx,
       return;
    }
 
-   struct pipe_context *pipe = ctx->pipe;
+   struct pipe_context *pipe = ctx->base.pipe;
    pipe->set_vertex_buffers(pipe, start_slot, count, unbind_trailing_count,
                             take_ownership, buffers);
 }
@@ -1260,7 +1290,7 @@ cso_set_vertex_buffers_and_elements(struct cso_context *ctx,
                                     const struct pipe_vertex_buffer *vbuffers)
 {
    struct u_vbuf *vbuf = ctx->vbuf;
-   struct pipe_context *pipe = ctx->pipe;
+   struct pipe_context *pipe = ctx->base.pipe;
 
    if (vbuf && (ctx->always_use_vbuf || uses_user_vertex_buffers)) {
       if (!ctx->vbuf_current) {
@@ -1271,7 +1301,9 @@ cso_set_vertex_buffers_and_elements(struct cso_context *ctx,
 
          /* Unset this to make sure the CSO is re-bound on the next use. */
          ctx->velements = NULL;
-         ctx->vbuf_current = vbuf;
+         ctx->vbuf_current = pipe->vbuf = vbuf;
+         if (pipe->draw_vbo == tc_draw_vbo)
+            ctx->base.draw_vbo = u_vbuf_draw_vbo;
          unbind_trailing_vb_count = 0;
       }
 
@@ -1292,7 +1324,9 @@ cso_set_vertex_buffers_and_elements(struct cso_context *ctx,
 
       /* Unset this to make sure the CSO is re-bound on the next use. */
       u_vbuf_unset_vertex_elements(vbuf);
-      ctx->vbuf_current = NULL;
+      ctx->vbuf_current = pipe->vbuf = NULL;
+      if (pipe->draw_vbo == tc_draw_vbo)
+         ctx->base.draw_vbo = pipe->draw_vbo;
       unbind_trailing_vb_count = 0;
    }
 
@@ -1322,7 +1356,7 @@ set_sampler(struct cso_context *ctx, enum pipe_shader_type shader_stage,
          return false;
 
       memcpy(&cso->state, templ, sizeof(*templ));
-      cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state);
+      cso->data = ctx->base.pipe->create_sampler_state(ctx->base.pipe, &cso->state);
       cso->hash_key = hash_key;
 
       iter = cso_insert_state(&ctx->cache, hash_key, CSO_SAMPLER, cso);
@@ -1381,7 +1415,7 @@ cso_single_sampler_done(struct cso_context *ctx,
    if (ctx->max_sampler_seen == -1)
       return;
 
-   ctx->pipe->bind_sampler_states(ctx->pipe, shader_stage, 0,
+   ctx->base.pipe->bind_sampler_states(ctx->base.pipe, shader_stage, 0,
                                   ctx->max_sampler_seen + 1,
                                   info->samplers);
    ctx->max_sampler_seen = -1;
@@ -1497,7 +1531,7 @@ cso_set_stream_outputs(struct cso_context *ctx,
                        struct pipe_stream_output_target **targets,
                        const unsigned *offsets)
 {
-   struct pipe_context *pipe = ctx->pipe;
+   struct pipe_context *pipe = ctx->base.pipe;
    uint i;
 
    if (!ctx->has_streamout) {
@@ -1544,7 +1578,7 @@ cso_save_stream_outputs(struct cso_context *ctx)
 static void
 cso_restore_stream_outputs(struct cso_context *ctx)
 {
-   struct pipe_context *pipe = ctx->pipe;
+   struct pipe_context *pipe = ctx->base.pipe;
    uint i;
    unsigned offset[PIPE_MAX_SO_BUFFERS];
 
@@ -1624,7 +1658,7 @@ cso_save_state(struct cso_context *cso, unsigned state_mask)
    if (state_mask & CSO_BIT_VIEWPORT)
       cso_save_viewport(cso);
    if (state_mask & CSO_BIT_PAUSE_QUERIES)
-      cso->pipe->set_active_query_state(cso->pipe, false);
+      cso->base.pipe->set_active_query_state(cso->base.pipe, false);
 }
 
 
@@ -1653,15 +1687,15 @@ cso_restore_state(struct cso_context *cso, unsigned unbind)
    if (state_mask & CSO_BIT_VERTEX_SHADER)
       cso_restore_vertex_shader(cso);
    if (unbind & CSO_UNBIND_FS_SAMPLERVIEWS)
-      cso->pipe->set_sampler_views(cso->pipe, PIPE_SHADER_FRAGMENT, 0, 0,
+      cso->base.pipe->set_sampler_views(cso->base.pipe, PIPE_SHADER_FRAGMENT, 0, 0,
                                    cso->max_fs_samplerviews, false, NULL);
    if (unbind & CSO_UNBIND_FS_SAMPLERVIEW0)
-      cso->pipe->set_sampler_views(cso->pipe, PIPE_SHADER_FRAGMENT, 0, 0,
+      cso->base.pipe->set_sampler_views(cso->base.pipe, PIPE_SHADER_FRAGMENT, 0, 0,
                                    1, false, NULL);
    if (state_mask & CSO_BIT_FRAGMENT_SAMPLERS)
       cso_restore_fragment_samplers(cso);
    if (unbind & CSO_UNBIND_FS_IMAGE0)
-      cso->pipe->set_shader_images(cso->pipe, PIPE_SHADER_FRAGMENT, 0, 0, 1, NULL);
+      cso->base.pipe->set_shader_images(cso->base.pipe, PIPE_SHADER_FRAGMENT, 0, 0, 1, NULL);
    if (state_mask & CSO_BIT_FRAMEBUFFER)
       cso_restore_framebuffer(cso);
    if (state_mask & CSO_BIT_BLEND)
@@ -1677,17 +1711,17 @@ cso_restore_state(struct cso_context *cso, unsigned unbind)
    if (state_mask & CSO_BIT_VIEWPORT)
       cso_restore_viewport(cso);
    if (unbind & CSO_UNBIND_VS_CONSTANTS)
-      cso->pipe->set_constant_buffer(cso->pipe, PIPE_SHADER_VERTEX, 0, false, NULL);
+      cso->base.pipe->set_constant_buffer(cso->base.pipe, PIPE_SHADER_VERTEX, 0, false, NULL);
    if (unbind & CSO_UNBIND_FS_CONSTANTS)
-      cso->pipe->set_constant_buffer(cso->pipe, PIPE_SHADER_FRAGMENT, 0, false, NULL);
+      cso->base.pipe->set_constant_buffer(cso->base.pipe, PIPE_SHADER_FRAGMENT, 0, false, NULL);
    if (state_mask & CSO_BIT_VERTEX_ELEMENTS)
       cso_restore_vertex_elements(cso);
    if (unbind & CSO_UNBIND_VERTEX_BUFFER0)
-      cso->pipe->set_vertex_buffers(cso->pipe, 0, 0, 1, false, NULL);
+      cso->base.pipe->set_vertex_buffers(cso->base.pipe, 0, 0, 1, false, NULL);
    if (state_mask & CSO_BIT_STREAM_OUTPUTS)
       cso_restore_stream_outputs(cso);
    if (state_mask & CSO_BIT_PAUSE_QUERIES)
-      cso->pipe->set_active_query_state(cso->pipe, true);
+      cso->base.pipe->set_active_query_state(cso->base.pipe, true);
 
    cso->saved_state = 0;
 }
@@ -1736,53 +1770,6 @@ cso_restore_compute_state(struct cso_context *cso)
 /* drawing */
 
 void
-cso_draw_vbo(struct cso_context *cso,
-             const struct pipe_draw_info *info,
-             unsigned drawid_offset,
-             const struct pipe_draw_indirect_info *indirect,
-             const struct pipe_draw_start_count_bias draw)
-{
-   struct u_vbuf *vbuf = cso->vbuf_current;
-
-   /* We can't have both indirect drawing and SO-vertex-count drawing */
-   assert(!indirect ||
-          indirect->buffer == NULL ||
-          indirect->count_from_stream_output == NULL);
-
-   /* We can't have SO-vertex-count drawing with an index buffer */
-   assert(info->index_size == 0 ||
-          !indirect ||
-          indirect->count_from_stream_output == NULL);
-
-   if (vbuf) {
-      u_vbuf_draw_vbo(vbuf, info, drawid_offset, indirect, &draw, 1);
-   } else {
-      struct pipe_context *pipe = cso->pipe;
-      pipe->draw_vbo(pipe, info, drawid_offset, indirect, &draw, 1);
-   }
-}
-
-/* info->draw_id can be changed by the callee if increment_draw_id is true. */
-void
-cso_multi_draw(struct cso_context *cso,
-               struct pipe_draw_info *info,
-               unsigned drawid_offset,
-               const struct pipe_draw_start_count_bias *draws,
-               unsigned num_draws)
-{
-   struct u_vbuf *vbuf = cso->vbuf_current;
-
-   if (vbuf) {
-      u_vbuf_draw_vbo(vbuf, info, drawid_offset, NULL, draws, num_draws);
-   } else {
-      struct pipe_context *pipe = cso->pipe;
-
-      pipe->draw_vbo(pipe, info, drawid_offset, NULL, draws, num_draws);
-   }
-}
-
-
-void
 cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count)
 {
    struct pipe_draw_info info;
@@ -1799,7 +1786,7 @@ cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count)
    draw.count = count;
    draw.index_bias = 0;
 
-   cso_draw_vbo(cso, &info, 0, NULL, draw);
+   cso_draw_vbo(cso, &info, 0, NULL, &draw, 1);
 }
 
 
@@ -1824,5 +1811,5 @@ cso_draw_arrays_instanced(struct cso_context *cso, uint mode,
    draw.count = count;
    draw.index_bias = 0;
 
-   cso_draw_vbo(cso, &info, 0, NULL, draw);
+   cso_draw_vbo(cso, &info, 0, NULL, &draw, 1);
 }
diff --git a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.h b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.h
index 4b9ec2098..85f98644d 100644
--- a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -42,6 +42,18 @@ extern "C" {
 struct cso_context;
 struct u_vbuf;
 
+struct cso_context_base {
+   struct pipe_context *pipe;
+
+   /* This is equal to either pipe_context::draw_vbo or u_vbuf_draw_vbo. */
+   void (*draw_vbo)(struct pipe_context *pipe,
+                    const struct pipe_draw_info *info,
+                    unsigned drawid_offset,
+                    const struct pipe_draw_indirect_info *indirect,
+                    const struct pipe_draw_start_count_bias *draws,
+                    unsigned num_draws);
+};
+
 #define CSO_NO_USER_VERTEX_BUFFERS (1 << 0)
 #define CSO_NO_64B_VERTEX_BUFFERS  (1 << 1)
 #define CSO_NO_VBUF  (1 << 2)
@@ -55,9 +67,6 @@ cso_unbind_context(struct cso_context *ctx);
 void
 cso_destroy_context(struct cso_context *cso);
 
-struct pipe_context *
-cso_get_pipe_context(struct cso_context *cso);
-
 enum pipe_error
 cso_set_blend(struct cso_context *cso, const struct pipe_blend_state *blend);
 
@@ -210,21 +219,6 @@ cso_set_vertex_buffers_and_elements(struct cso_context *ctx,
                                     const struct pipe_vertex_buffer *vbuffers);
 
 void
-cso_draw_vbo(struct cso_context *cso,
-             const struct pipe_draw_info *info,
-             unsigned drawid_offset,
-             const struct pipe_draw_indirect_info *indirect,
-             const struct pipe_draw_start_count_bias draw);
-
-/* info->draw_id can be changed by the callee if increment_draw_id is true. */
-void
-cso_multi_draw(struct cso_context *cso,
-               struct pipe_draw_info *info,
-               unsigned drawid_offset,
-               const struct pipe_draw_start_count_bias *draws,
-               unsigned num_draws);
-
-void
 cso_draw_arrays_instanced(struct cso_context *cso, uint mode,
                           uint start, uint count,
                           uint start_instance, uint instance_count);
@@ -232,6 +226,43 @@ cso_draw_arrays_instanced(struct cso_context *cso, uint mode,
 void
 cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count);
 
+/* Inline functions. */
+
+static inline struct pipe_context *
+cso_get_pipe_context(struct cso_context *cso)
+{
+   struct cso_context_base *cso_base = (struct cso_context_base *)cso;
+
+   return cso_base->pipe;
+}
+
+static ALWAYS_INLINE void
+cso_draw_vbo(struct cso_context *cso,
+             struct pipe_draw_info *info,
+             unsigned drawid_offset,
+             const struct pipe_draw_indirect_info *indirect,
+             const struct pipe_draw_start_count_bias *draws,
+             unsigned num_draws)
+{
+   /* We can't have both indirect drawing and SO-vertex-count drawing */
+   assert(!indirect ||
+          indirect->buffer == NULL ||
+          indirect->count_from_stream_output == NULL);
+
+   /* We can't have SO-vertex-count drawing with an index buffer */
+   assert(info->index_size == 0 ||
+          !indirect ||
+          indirect->count_from_stream_output == NULL);
+
+   /* Indirect only uses indirect->draw_count, not num_draws. */
+   assert(!indirect || num_draws == 1);
+
+   struct cso_context_base *cso_base = (struct cso_context_base *)cso;
+
+   cso_base->draw_vbo(cso_base->pipe, info, drawid_offset, indirect, draws,
+                      num_draws);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_context.h b/lib/mesa/src/gallium/auxiliary/draw/draw_context.h
index 3986d6469..44acf0116 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_context.h
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_context.h
@@ -39,6 +39,7 @@
 
 
 #include "pipe/p_state.h"
+#include "nir.h"
 
 struct pipe_context;
 struct draw_context;
@@ -130,7 +131,8 @@ boolean
 draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe);
 
 boolean
-draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe);
+draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe,
+                           nir_alu_type bool_type);
 
 boolean
 draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe);
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c
index 50c157bc3..8b3a15227 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c
@@ -784,7 +784,7 @@ draw_llvm_create(struct draw_context *draw, LLVMContextRef context)
    if (!llvm->context) {
       llvm->context = LLVMContextCreate();
 
-#if LLVM_VERSION_MAJOR >= 15
+#if LLVM_VERSION_MAJOR == 15
       LLVMContextSetOpaquePointers(llvm->context, false);
 #endif
 
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 7dac40785..d2bc475b2 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -107,6 +107,7 @@ struct aa_transform_context {
    uint64_t tempsUsed;  /**< bitmask */
    int colorOutput; /**< which output is the primary color */
    int maxInput, maxGeneric;  /**< max input index found */
+   int numImm; /**< number of immediate regsters */
    int colorTemp, aaTemp;  /**< temp registers */
 };
 
@@ -147,6 +148,18 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
    ctx->emit_declaration(ctx, decl);
 }
 
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+aa_immediate(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_immediate *imm)
+{
+   struct aa_transform_context *aactx = (struct aa_transform_context *)ctx;
+
+   ctx->emit_immediate(ctx, imm);
+   aactx->numImm++;
+}
 
 /**
  * Find the lowest zero bit, or -1 if bitfield is all ones.
@@ -182,6 +195,9 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
    /* declare new temp regs */
    tgsi_transform_temp_decl(ctx, aactx->aaTemp);
    tgsi_transform_temp_decl(ctx, aactx->colorTemp);
+
+   /* declare new immediate reg */
+   tgsi_transform_immediate_decl(ctx, 2.0, -1.0, 0.0, 0.25);
 }
 
 
@@ -215,6 +231,26 @@ aa_transform_epilog(struct tgsi_transform_context *ctx)
       inst.Src[1].Register.Negate = true;
       ctx->emit_instruction(ctx, &inst);
 
+      /* linelength * 2 - 1 */
+      tgsi_transform_op3_swz_inst(ctx, TGSI_OPCODE_MAD,
+                                  TGSI_FILE_TEMPORARY, aactx->aaTemp,
+                                  TGSI_WRITEMASK_Y,
+                                  TGSI_FILE_INPUT, aactx->maxInput + 1,
+                                  TGSI_SWIZZLE_W, false,
+                                  TGSI_FILE_IMMEDIATE, aactx->numImm,
+                                  TGSI_SWIZZLE_X,
+                                  TGSI_FILE_IMMEDIATE, aactx->numImm,
+                                  TGSI_SWIZZLE_Y);
+
+      /* MIN height alpha */
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+                                  TGSI_FILE_TEMPORARY, aactx->aaTemp,
+                                  TGSI_WRITEMASK_Z,
+                                  TGSI_FILE_TEMPORARY, aactx->aaTemp,
+                                  TGSI_SWIZZLE_Z,
+                                  TGSI_FILE_TEMPORARY, aactx->aaTemp,
+                                  TGSI_SWIZZLE_Y, false);
+
       /* MUL width / height alpha */
       tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
                                   TGSI_FILE_TEMPORARY, aactx->aaTemp,
@@ -292,6 +328,7 @@ generate_aaline_fs(struct aaline_stage *aaline)
    transform.base.epilog = aa_transform_epilog;
    transform.base.transform_instruction = aa_transform_inst;
    transform.base.transform_declaration = aa_transform_decl;
+   transform.base.transform_immediate = aa_immediate;
 
    aaline_fs.tokens = tgsi_transform_shader(orig_fs->tokens, newLen, &transform.base);
    if (!aaline_fs.tokens)
@@ -324,7 +361,7 @@ generate_aaline_fs_nir(struct aaline_stage *aaline)
    if (!aaline_fs.ir.nir)
       return FALSE;
 
-   nir_lower_aaline_fs(aaline_fs.ir.nir, &aaline->fs->generic_attrib);
+   nir_lower_aaline_fs(aaline_fs.ir.nir, &aaline->fs->generic_attrib, NULL, NULL);
    aaline->fs->aaline_fs = aaline->driver_create_fs_state(pipe, &aaline_fs);
    if (aaline->fs->aaline_fs == NULL)
       return FALSE;
@@ -383,36 +420,13 @@ aaline_line(struct draw_stage *stage, struct prim_header *header)
    float *pos, *tex;
    float dx = header->v[1]->data[posPos][0] - header->v[0]->data[posPos][0];
    float dy = header->v[1]->data[posPos][1] - header->v[0]->data[posPos][1];
-   float a = atan2f(dy, dx);
-   float c_a = cosf(a), s_a = sinf(a);
-   float half_length;
+   float length = sqrtf(dx * dx + dy * dy);
+   float c_a = dx / length, s_a = dy / length;
+   float half_length = 0.5 * length;
    float t_l, t_w;
    uint i;
 
-   half_length = 0.5f * sqrtf(dx * dx + dy * dy);
-
-   if (half_length < 0.5f) {
-      /*
-       * The logic we use for "normal" sized segments is incorrect
-       * for very short segments (basically because we only have
-       * one value to interpolate, not a distance to each endpoint).
-       * Therefore, we calculate half_length differently, so that for
-       * original line length (near) 0, we get alpha 0 - otherwise
-       * max alpha would still be 0.5. This also prevents us from
-       * artifacts due to degenerated lines (the endpoints being
-       * identical, which would still receive anywhere from alpha
-       * 0-0.5 otherwise) (at least the pstipple stage may generate
-       * such lines due to float inaccuracies if line length is very
-       * close to a integer).
-       * Might not be fully accurate neither (because the "strength" of
-       * the line is going to be determined by how close to the pixel
-       * center those 1 or 2 fragments are) but it's probably the best
-       * we can do.
-       */
-      half_length = 2.0f * half_length;
-   } else {
-      half_length = half_length + 0.5f;
-   }
+   half_length = half_length + 0.5f;
 
    t_w = half_width;
    t_l = 0.5f;
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 56fff8788..b1b66f653 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -99,6 +99,9 @@ struct aapoint_stage
    /** vertex attrib slot containing position */
    uint pos_slot;
 
+   /** Type of Boolean variables on this hardware. */
+   nir_alu_type bool_type;
+
    /** Currently bound fragment shader */
    struct aapoint_fragment_shader *fs;
 
@@ -418,7 +421,7 @@ generate_aapoint_fs_nir(struct aapoint_stage *aapoint)
    if (!aapoint_fs.ir.nir)
       return FALSE;
 
-   nir_lower_aapoint_fs(aapoint_fs.ir.nir, &aapoint->fs->generic_attrib);
+   nir_lower_aapoint_fs(aapoint_fs.ir.nir, &aapoint->fs->generic_attrib, aapoint->bool_type);
    aapoint->fs->aapoint_fs = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
    if (aapoint->fs->aapoint_fs == NULL)
       goto fail;
@@ -689,7 +692,7 @@ draw_aapoint_prepare_outputs(struct draw_context *draw,
 
 
 static struct aapoint_stage *
-draw_aapoint_stage(struct draw_context *draw)
+draw_aapoint_stage(struct draw_context *draw, nir_alu_type bool_type)
 {
    struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
    if (!aapoint)
@@ -704,6 +707,7 @@ draw_aapoint_stage(struct draw_context *draw)
    aapoint->stage.flush = aapoint_flush;
    aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
    aapoint->stage.destroy = aapoint_destroy;
+   aapoint->bool_type = bool_type;
 
    if (!draw_alloc_temp_verts(&aapoint->stage, 4))
       goto fail;
@@ -793,7 +797,8 @@ aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
  */
 boolean
 draw_install_aapoint_stage(struct draw_context *draw,
-                           struct pipe_context *pipe)
+                           struct pipe_context *pipe,
+                           nir_alu_type bool_type)
 {
    struct aapoint_stage *aapoint;
 
@@ -802,7 +807,7 @@ draw_install_aapoint_stage(struct draw_context *draw,
    /*
     * Create / install AA point drawing / prim stage
     */
-   aapoint = draw_aapoint_stage(draw);
+   aapoint = draw_aapoint_stage(draw, bool_type);
    if (!aapoint)
       return FALSE;
 
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_private.h b/lib/mesa/src/gallium/auxiliary/draw/draw_private.h
index 6b3de1d40..1780070fa 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_private.h
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_private.h
@@ -184,6 +184,7 @@ struct draw_context
       enum pipe_prim_type prim;
       unsigned opt;     /**< bitmask of PT_x flags */
       unsigned eltSize; /* saved eltSize for flushing */
+      unsigned viewid; /* saved viewid for flushing */
       ubyte vertices_per_patch;
       boolean rebind_parameters;
 
@@ -588,21 +589,4 @@ draw_clamp_viewport_idx(int idx)
    return ((PIPE_MAX_VIEWPORTS > idx && idx >= 0) ? idx : 0);
 }
 
-
-/**
- * Adds two unsigned integers and if the addition
- * overflows then it returns the value from
- * the overflow_value variable.
- */
-static inline unsigned
-draw_overflow_uadd(unsigned a, unsigned b,
-                   unsigned overflow_value)
-{
-   unsigned res = a + b;
-   if (res < a) {
-      res = overflow_value;
-   }
-   return res;
-}
-
 #endif /* DRAW_PRIVATE_H */
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c
index 10908e2d2..3d5d5f088 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c
@@ -103,9 +103,9 @@ draw_pt_arrays(struct draw_context *draw,
           */
          draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
          frontend = NULL;
-      } else if (draw->pt.eltSize != draw->pt.user.eltSize) {
-         /* Flush draw state if eltSize changed.
-          * This could be improved so only the frontend is flushed since it
+      } else if (draw->pt.eltSize != draw->pt.user.eltSize || draw->pt.viewid != draw->pt.user.viewid) {
+         /* Flush draw state if eltSize or viewid changed.
+          * eltSize changes could be improved so only the frontend is flushed since it
           * converts all indices to ushorts and the fetch part of the middle
           * always prepares both linear and indexed.
           */
@@ -121,6 +121,7 @@ draw_pt_arrays(struct draw_context *draw,
 
       draw->pt.frontend = frontend;
       draw->pt.eltSize = draw->pt.user.eltSize;
+      draw->pt.viewid = draw->pt.user.viewid;
       draw->pt.prim = prim;
       draw->pt.opt = opt;
    }
@@ -360,28 +361,26 @@ prim_restart_loop(struct draw_context *draw,
    struct pipe_draw_start_count_bias cur = *draw_info;
    cur.count = 0;
 
-   /* The largest index within a loop using the i variable as the index.
-    * Used for overflow detection */
-   const unsigned MAX_LOOP_IDX = 0xffffffff;
-
    for (unsigned j = 0; j < draw_info->count; j++) {
-      unsigned restart_idx = 0;
-      unsigned i = draw_overflow_uadd(draw_info->start, j, MAX_LOOP_IDX);
-      switch (draw->pt.user.eltSize) {
-      case 1:
-         restart_idx = ((const uint8_t*)elements)[i];
-         break;
-      case 2:
-         restart_idx = ((const uint16_t*)elements)[i];
-         break;
-      case 4:
-         restart_idx = ((const uint32_t*)elements)[i];
-         break;
-      default:
-         assert(0 && "bad eltSize in draw_arrays()");
+      unsigned index = 0;
+      unsigned i = util_clamped_uadd(draw_info->start, j);
+      if (i < elt_max) {
+         switch (draw->pt.user.eltSize) {
+         case 1:
+            index = ((const uint8_t*)elements)[i];
+            break;
+         case 2:
+            index = ((const uint16_t*)elements)[i];
+            break;
+         case 4:
+            index = ((const uint32_t*)elements)[i];
+            break;
+         default:
+            assert(0 && "bad eltSize in draw_arrays()");
+         }
       }
 
-      if (i < elt_max && restart_idx == info->restart_index) {
+      if (index == info->restart_index) {
          if (cur.count > 0) {
             /* draw elts up to prev pos */
             draw_pt_arrays(draw, info->mode, info->index_bias_varies, &cur, 1);
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 2df7c53a8..edaa78af3 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -120,7 +120,7 @@ llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme)
          }
       }
 
-      variant = draw_gs_llvm_create_variant(llvm, gs->info.num_outputs, key);
+      variant = draw_gs_llvm_create_variant(llvm, draw_total_gs_outputs(draw), key);
 
       if (variant) {
          list_add(&variant->list_item_local.list, &shader->variants.list);
@@ -406,7 +406,7 @@ llvm_middle_end_prepare(struct draw_pt_middle_end *middle,
 static unsigned
 get_num_consts_robust(struct draw_context *draw, unsigned *sizes, unsigned idx)
 {
-   unsigned const_bytes = sizes[idx];
+   uint64_t const_bytes = sizes[idx];
 
    if (const_bytes < sizeof(float))
       return 0;
@@ -829,7 +829,7 @@ llvm_middle_end_linear_run(struct draw_pt_middle_end *middle,
    fetch_info.elts = NULL;
 
    prim_info.linear = TRUE;
-   prim_info.start = 0;
+   prim_info.start = start;
    prim_info.count = count;
    prim_info.elts = NULL;
    prim_info.prim = prim_type(fpme->input_prim, prim_flags);
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit.c
index 0455f40df..86548b817 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -23,6 +23,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include "util/macros.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -33,9 +34,6 @@
 #define SEGMENT_SIZE 1024
 #define MAP_SIZE     256
 
-/* The largest possible index within an index buffer */
-#define MAX_ELT_IDX 0xffffffff
-
 struct vsplit_frontend {
    struct draw_pt_front_end base;
    struct draw_context *draw;
@@ -116,7 +114,7 @@ vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch)
 static inline unsigned
 vsplit_get_base_idx(unsigned start, unsigned fetch)
 {
-   return draw_overflow_uadd(start, fetch, MAX_ELT_IDX);
+   return util_clamped_uadd(start, fetch);
 }
 
 
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
index ee4fd56e2..53fa51091 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -23,9 +23,6 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#define CONCAT2(name, elt_type) name ## elt_type
-#define CONCAT(name, elt_type) CONCAT2(name, elt_type)
-
 #ifdef ELT_TYPE
 
 /**
@@ -33,8 +30,8 @@
  * (rebased) index buffer as the draw elements.
  */
 static boolean
-CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
-                                    unsigned istart, unsigned icount)
+CONCAT2(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+                                     unsigned istart, unsigned icount)
 {
    struct draw_context *draw = vsplit->draw;
    const ELT_TYPE *ib = (const ELT_TYPE *) draw->pt.user.elts;
@@ -127,11 +124,11 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
  * appended.
  */
 static inline void
-CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
-                                        unsigned flags,
-                                        unsigned istart, unsigned icount,
-                                        boolean spoken, unsigned ispoken,
-                                        boolean close, unsigned iclose)
+CONCAT2(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+                                         unsigned flags,
+                                         unsigned istart, unsigned icount,
+                                         boolean spoken, unsigned ispoken,
+                                         boolean close, unsigned iclose)
 {
    struct draw_context *draw = vsplit->draw;
    const ELT_TYPE *ib = (const ELT_TYPE *) draw->pt.user.elts;
@@ -168,41 +165,41 @@ CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
 
 
 static void
-CONCAT(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit,
-                                         unsigned flags,
-                                         unsigned istart,
-                                         unsigned icount)
+CONCAT2(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+                                          unsigned flags,
+                                          unsigned istart,
+                                          unsigned icount)
 {
-   CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
-         flags, istart, icount, FALSE, 0, FALSE, 0);
+   CONCAT2(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+          flags, istart, icount, FALSE, 0, FALSE, 0);
 }
 
 
 static void
-CONCAT(vsplit_segment_loop_, ELT_TYPE)(struct vsplit_frontend *vsplit,
-                                       unsigned flags,
-                                       unsigned istart,
-                                       unsigned icount,
-                                       unsigned i0)
+CONCAT2(vsplit_segment_loop_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+                                        unsigned flags,
+                                        unsigned istart,
+                                        unsigned icount,
+                                        unsigned i0)
 {
    const boolean close_loop = ((flags) == DRAW_SPLIT_BEFORE);
 
-   CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
-         flags, istart, icount, FALSE, 0, close_loop, i0);
+   CONCAT2(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+          flags, istart, icount, FALSE, 0, close_loop, i0);
 }
 
 
 static void
-CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit,
-                                      unsigned flags,
-                                      unsigned istart,
-                                      unsigned icount,
-                                      unsigned i0)
+CONCAT2(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+                                       unsigned flags,
+                                       unsigned istart,
+                                       unsigned icount,
+                                       unsigned i0)
 {
    const boolean use_spoken = (((flags) & DRAW_SPLIT_BEFORE) != 0);
 
-   CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
-         flags, istart, icount, use_spoken, i0, FALSE, 0);
+   CONCAT2(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+          flags, istart, icount, use_spoken, i0, FALSE, 0);
 }
 
 
@@ -214,7 +211,7 @@ CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit,
    const unsigned max_count_fan = vsplit->segment_size;
 
 #define PRIMITIVE(istart, icount)   \
-   CONCAT(vsplit_primitive_, ELT_TYPE)(vsplit, istart, icount)
+   CONCAT2(vsplit_primitive_, ELT_TYPE)(vsplit, istart, icount)
 
 #else /* ELT_TYPE */
 
@@ -294,18 +291,15 @@ vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags,
    unsigned count
 
 #define SEGMENT_SIMPLE(flags, istart, icount)   \
-   CONCAT(vsplit_segment_simple_, ELT_TYPE)(vsplit, flags, istart, icount)
+   CONCAT2(vsplit_segment_simple_, ELT_TYPE)(vsplit, flags, istart, icount)
 
 #define SEGMENT_LOOP(flags, istart, icount, i0) \
-   CONCAT(vsplit_segment_loop_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+   CONCAT2(vsplit_segment_loop_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
 
 #define SEGMENT_FAN(flags, istart, icount, i0)  \
-   CONCAT(vsplit_segment_fan_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+   CONCAT2(vsplit_segment_fan_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
 
 #include "draw_split_tmp.h"
 
-#undef CONCAT2
-#undef CONCAT
-
 #undef ELT_TYPE
 #undef ADD_CACHE
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_tess.c b/lib/mesa/src/gallium/auxiliary/draw/draw_tess.c
index c0430b02a..4240b48e5 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_tess.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_tess.c
@@ -189,6 +189,7 @@ int draw_tess_ctrl_shader_run(struct draw_tess_ctrl_shader *shader,
       shader->draw->statistics.hs_invocations += num_patches;
    }
 #ifdef DRAW_LLVM_AVAILABLE
+   unsigned first_patch = input_prim->start / shader->draw->pt.vertices_per_patch;
    for (unsigned i = 0; i < num_patches; i++) {
       uint32_t vert_start = output_verts->count;
 
@@ -196,7 +197,7 @@ int draw_tess_ctrl_shader_run(struct draw_tess_ctrl_shader *shader,
 
       llvm_fetch_tcs_input(shader, input_prim, i, shader->draw->pt.vertices_per_patch);
 
-      llvm_tcs_run(shader, i);
+      llvm_tcs_run(shader, first_patch + i);
 
       uint32_t old_verts = util_align_npot(vert_start, 16);
       uint32_t new_verts = util_align_npot(output_verts->count, 16);
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index dde7f06b2..511c57834 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -65,7 +65,7 @@
 #include "lp_bld_arit.h"
 #include "lp_bld_flow.h"
 
-#if defined(PIPE_ARCH_SSE)
+#if DETECT_ARCH_SSE
 #include <xmmintrin.h>
 #endif
 
@@ -137,7 +137,7 @@ lp_build_min_simple(struct lp_build_context *bld,
    else if (type.floating && util_get_cpu_caps()->has_altivec) {
       if (nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
          debug_printf("%s: altivec doesn't support nan return nan behavior\n",
-                      __FUNCTION__);
+                      __func__);
       }
       if (type.width == 32 && type.length == 4) {
          intrinsic = "llvm.ppc.altivec.vminfp";
@@ -291,7 +291,7 @@ lp_build_max_simple(struct lp_build_context *bld,
    else if (type.floating && util_get_cpu_caps()->has_altivec) {
       if (nan_behavior == GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN) {
          debug_printf("%s: altivec doesn't support nan return nan behavior\n",
-                      __FUNCTION__);
+                      __func__);
       }
       if (type.width == 32 || type.length == 4) {
          intrinsic = "llvm.ppc.altivec.vmaxfp";
@@ -2800,7 +2800,7 @@ lp_build_fast_rsqrt(struct lp_build_context *bld,
       return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
    }
    else {
-      debug_printf("%s: emulating fast rsqrt with rcp/sqrt\n", __FUNCTION__);
+      debug_printf("%s: emulating fast rsqrt with rcp/sqrt\n", __func__);
    }
    return lp_build_rcp(bld, lp_build_sqrt(bld, a));
 }
@@ -3073,7 +3073,7 @@ lp_build_pow(struct lp_build_context *bld,
    if (gallivm_debug & GALLIVM_DEBUG_PERF &&
        LLVMIsConstant(x) && LLVMIsConstant(y)) {
       debug_printf("%s: inefficient/imprecise constant arithmetic\n",
-                   __FUNCTION__);
+                   __func__);
    }
 
    LLVMValueRef cmp = lp_build_cmp(bld, PIPE_FUNC_EQUAL, x, lp_build_const_vec(bld->gallivm, bld->type, 0.0f));
@@ -3157,7 +3157,7 @@ lp_build_polynomial(struct lp_build_context *bld,
    if (gallivm_debug & GALLIVM_DEBUG_PERF &&
        LLVMIsConstant(x)) {
       debug_printf("%s: inefficient/imprecise constant arithmetic\n",
-                   __FUNCTION__);
+                   __func__);
    }
 
    /*
@@ -3253,7 +3253,7 @@ lp_build_exp2(struct lp_build_context *bld,
    if (gallivm_debug & GALLIVM_DEBUG_PERF &&
        LLVMIsConstant(x)) {
       debug_printf("%s: inefficient/imprecise constant arithmetic\n",
-                   __FUNCTION__);
+                   __func__);
    }
 
    assert(type.floating && type.width == 32);
@@ -3444,7 +3444,7 @@ lp_build_log2_approx(struct lp_build_context *bld,
       if (gallivm_debug & GALLIVM_DEBUG_PERF &&
           LLVMIsConstant(x)) {
          debug_printf("%s: inefficient/imprecise constant arithmetic\n",
-                      __FUNCTION__);
+                      __func__);
       }
 
       assert(type.floating && type.width == 32);
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format.h
index 74a625dce..c4505b2fc 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -37,7 +37,7 @@
 #include "gallivm/lp_bld.h"
 #include "gallivm/lp_bld_init.h"
 
-#include "pipe/p_format.h"
+#include "util/format/u_formats.h"
 
 struct util_format_description;
 struct lp_type;
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 86cca0bb0..49aa549f8 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -656,7 +656,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
 
       if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
          debug_printf("%s: unpacking %s with floating point\n",
-                      __FUNCTION__, format_desc->short_name);
+                      __func__, format_desc->short_name);
       }
 
       conv_type = lp_float32_vec4_type();
@@ -813,7 +813,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
 
       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
          debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n",
-                      __FUNCTION__, format_desc->short_name);
+                      __func__, format_desc->short_name);
       }
 
       /*
@@ -919,7 +919,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
 
       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
          debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n",
-                      __FUNCTION__, format_desc->short_name);
+                      __func__, format_desc->short_name);
       }
 
       /*
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 784b8dc66..a22bf3e4e 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -832,7 +832,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
 
       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
          debug_printf("%s: AoS fetch fallback for %s\n",
-                      __FUNCTION__, format_desc->short_name);
+                      __func__, format_desc->short_name);
       }
 
       tmp_type = type;
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
index 3b346f37d..066cb54a6 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -83,7 +83,7 @@ uyvy_to_yuv_soa(struct gallivm_state *gallivm,
     * v = (uyvy >>  8) & 0xff
     */
 
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
    /*
     * Avoid shift with per-element count.
     * No support on x86, gets translated to roughly 5 instructions
@@ -167,7 +167,7 @@ yuyv_to_yuv_soa(struct gallivm_state *gallivm,
     * v = (yuyv)                & 0xff
     */
 
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
    /*
     * Avoid shift with per-element count.
     * No support on x86, gets translated to roughly 5 instructions
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.c
index 2f2506803..f55f2f314 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.c
@@ -598,3 +598,62 @@ lp_build_gather_values(struct gallivm_state * gallivm,
    }
    return vec;
 }
+
+LLVMValueRef
+lp_build_masked_gather(struct gallivm_state *gallivm,
+                       unsigned length,
+                       unsigned bit_size,
+                       LLVMTypeRef vec_type,
+                       LLVMValueRef offset_ptr,
+                       LLVMValueRef exec_mask)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef args[4];
+   char intrin_name[64];
+
+#if LLVM_VERSION_MAJOR >= 16
+   snprintf(intrin_name, 64, "llvm.masked.gather.v%ui%u.v%up0",
+            length, bit_size, length);
+#else
+   snprintf(intrin_name, 64, "llvm.masked.gather.v%ui%u.v%up0i%u",
+            length, bit_size, length, bit_size);
+#endif
+
+   args[0] = offset_ptr;
+   args[1] = lp_build_const_int32(gallivm, bit_size / 8);
+   args[2] = LLVMBuildICmp(builder, LLVMIntNE, exec_mask,
+                           LLVMConstNull(LLVMTypeOf(exec_mask)), "");
+   args[3] = LLVMConstNull(vec_type);
+   return lp_build_intrinsic(builder, intrin_name, vec_type,
+                             args, 4, 0);
+
+}
+
+void
+lp_build_masked_scatter(struct gallivm_state *gallivm,
+                        unsigned length,
+                        unsigned bit_size,
+                        LLVMValueRef offset_ptr,
+                        LLVMValueRef value_vec,
+                        LLVMValueRef exec_mask)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef args[4];
+   char intrin_name[64];
+
+#if LLVM_VERSION_MAJOR >= 16
+   snprintf(intrin_name, 64, "llvm.masked.scatter.v%ui%u.v%up0",
+            length, bit_size, length);
+#else
+   snprintf(intrin_name, 64, "llvm.masked.scatter.v%ui%u.v%up0i%u",
+            length, bit_size, length, bit_size);
+#endif
+
+   args[0] = value_vec;
+   args[1] = offset_ptr;
+   args[2] = lp_build_const_int32(gallivm, bit_size / 8);
+   args[3] = LLVMBuildICmp(builder, LLVMIntNE, exec_mask,
+                           LLVMConstNull(LLVMTypeOf(exec_mask)), "");
+   lp_build_intrinsic(builder, intrin_name, LLVMVoidTypeInContext(gallivm->context),
+                      args, 4, 0);
+}
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.h
index 7930864e6..5fabed956 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.h
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_gather.h
@@ -66,4 +66,20 @@ lp_build_gather_values(struct gallivm_state * gallivm,
                        LLVMValueRef * values,
                        unsigned value_count);
 
+LLVMValueRef
+lp_build_masked_gather(struct gallivm_state *gallivm,
+                       unsigned length,
+                       unsigned bit_size,
+                       LLVMTypeRef vec_type,
+                       LLVMValueRef offset_ptr,
+                       LLVMValueRef exec_mask);
+
+void
+lp_build_masked_scatter(struct gallivm_state *gallivm,
+                        unsigned length,
+                        unsigned bit_size,
+                        LLVMValueRef offset_ptr,
+                        LLVMValueRef value_vec,
+                        LLVMValueRef exec_mask);
+
 #endif /* LP_BLD_GATHER_H_ */
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_init.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 584ea7386..24d082398 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 
-#include "pipe/p_config.h"
+#include "util/detect.h"
 #include "pipe/p_compiler.h"
 #include "util/macros.h"
 #include "util/u_cpu_detect.h"
@@ -50,7 +50,7 @@
 #if GALLIVM_USE_NEW_PASS == 1
 #include <llvm-c/Transforms/PassBuilder.h>
 #elif GALLIVM_HAVE_CORO == 1
-#if LLVM_VERSION_MAJOR <= 8 && (defined(PIPE_ARCH_AARCH64) || defined (PIPE_ARCH_ARM) || defined(PIPE_ARCH_S390) || defined(PIPE_ARCH_MIPS64))
+#if LLVM_VERSION_MAJOR <= 8 && (DETECT_ARCH_AARCH64 || DETECT_ARCH_ARM || DETECT_ARCH_S390 || DETECT_ARCH_MIPS64)
 #include <llvm-c/Transforms/IPO.h>
 #endif
 #include <llvm-c/Transforms/Coroutines.h>
@@ -67,7 +67,6 @@ static const struct debug_named_value lp_bld_perf_flags[] = {
    DEBUG_NAMED_VALUE_END
 };
 
-#ifdef DEBUG
 unsigned gallivm_debug = 0;
 
 static const struct debug_named_value lp_bld_debug_flags[] = {
@@ -76,12 +75,14 @@ static const struct debug_named_value lp_bld_debug_flags[] = {
    { "asm",    GALLIVM_DEBUG_ASM, NULL },
    { "perf",   GALLIVM_DEBUG_PERF, NULL },
    { "gc",     GALLIVM_DEBUG_GC, NULL },
+/* Don't allow setting DUMP_BC for release builds, since writing the files may be an issue with setuid. */
+#ifdef DEBUG
    { "dumpbc", GALLIVM_DEBUG_DUMP_BC, NULL },
+#endif
    DEBUG_NAMED_VALUE_END
 };
 
 DEBUG_GET_ONCE_FLAGS_OPTION(gallivm_debug, "GALLIVM_DEBUG", lp_bld_debug_flags, 0)
-#endif
 
 
 static boolean gallivm_initialized = FALSE;
@@ -140,7 +141,7 @@ create_pass_manager(struct gallivm_state *gallivm)
    }
 
 #if GALLIVM_HAVE_CORO == 1
-#if LLVM_VERSION_MAJOR <= 8 && (defined(PIPE_ARCH_AARCH64) || defined (PIPE_ARCH_ARM) || defined(PIPE_ARCH_S390) || defined(PIPE_ARCH_MIPS64))
+#if LLVM_VERSION_MAJOR <= 8 && (DETECT_ARCH_AARCH64 || DETECT_ARCH_ARM || DETECT_ARCH_S390 || DETECT_ARCH_MIPS64)
    LLVMAddArgumentPromotionPass(gallivm->cgpassmgr);
    LLVMAddFunctionAttrsPass(gallivm->cgpassmgr);
 #endif
@@ -355,7 +356,7 @@ init_gallivm_state(struct gallivm_state *gallivm, const char *name,
    if (!gallivm->module)
       goto fail;
 
-#if defined(PIPE_ARCH_X86)
+#if DETECT_ARCH_X86
    lp_set_module_stack_alignment_override(gallivm->module, 4);
 #endif
 
@@ -419,10 +420,23 @@ fail:
    return FALSE;
 }
 
+unsigned
+lp_build_init_native_width(void)
+{
+   // Default to 256 until we're confident llvmpipe with 512 is as correct and not slower than 256
+   lp_native_vector_width = MIN2(util_get_cpu_caps()->max_vector_bits, 256);
+   assert(lp_native_vector_width);
+
+   lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH", lp_native_vector_width);
+   assert(lp_native_vector_width);
+
+   return lp_native_vector_width;
+}
 
 boolean
 lp_build_init(void)
 {
+   lp_build_init_native_width();
    if (gallivm_initialized)
       return TRUE;
 
@@ -433,21 +447,13 @@ lp_build_init(void)
     */
    LLVMLinkInMCJIT();
 
-#ifdef DEBUG
    gallivm_debug = debug_get_option_gallivm_debug();
-#endif
 
    gallivm_perf = debug_get_flags_option("GALLIVM_PERF", lp_bld_perf_flags, 0 );
 
    lp_set_target_options();
 
-   // Default to 256 until we're confident llvmpipe with 512 is as correct and not slower than 256
-   lp_native_vector_width = MIN2(util_get_cpu_caps()->max_vector_bits, 256);
-
-   lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH",
-                                                 lp_native_vector_width);
-
-#ifdef PIPE_ARCH_PPC_64
+#if DETECT_ARCH_PPC_64
    /* Set the NJ bit in VSCR to 0 so denormalized values are handled as
     * specified by IEEE standard (PowerISA 2.06 - Section 6.3). This guarantees
     * that some rounding and half-float to float handling does not round
@@ -624,7 +630,7 @@ gallivm_compile_module(struct gallivm_state *gallivm)
 
    /* Disable frame pointer omission on debug/profile builds */
    /* XXX: And workaround http://llvm.org/PR21435 */
-#if defined(DEBUG) || defined(PROFILE) || defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#if defined(DEBUG) || defined(PROFILE) || DETECT_ARCH_X86 || DETECT_ARCH_X86_64
       LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim", "true");
       LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim-non-leaf", "true");
 #endif
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index e52ce3713..542a4ba0d 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -138,10 +138,6 @@ static const char *attr_to_str(enum lp_func_attr attr)
    case LP_FUNC_ATTR_INREG: return "inreg";
    case LP_FUNC_ATTR_NOALIAS: return "noalias";
    case LP_FUNC_ATTR_NOUNWIND: return "nounwind";
-   case LP_FUNC_ATTR_READNONE: return "readnone";
-   case LP_FUNC_ATTR_READONLY: return "readonly";
-   case LP_FUNC_ATTR_WRITEONLY: return "writeonly";
-   case LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly";
    case LP_FUNC_ATTR_CONVERGENT: return "convergent";
    case LP_FUNC_ATTR_PRESPLITCORO: return "presplitcoroutine";
    default:
@@ -182,7 +178,6 @@ lp_add_func_attributes(LLVMValueRef function, unsigned attrib_mask)
     * Set it for all intrinsics.
     */
    attrib_mask |= LP_FUNC_ATTR_NOUNWIND;
-   attrib_mask &= ~LP_FUNC_ATTR_LEGACY;
 
    while (attrib_mask) {
       enum lp_func_attr attr = 1u << u_bit_scan(&attrib_mask);
@@ -200,7 +195,6 @@ lp_build_intrinsic(LLVMBuilderRef builder,
 {
    LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
    LLVMValueRef function, call;
-   bool set_callsite_attrs = !(attr_mask & LP_FUNC_ATTR_LEGACY);
 
    LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
 
@@ -229,17 +223,13 @@ lp_build_intrinsic(LLVMBuilderRef builder,
          abort();
       }
 
-      if (!set_callsite_attrs)
-         lp_add_func_attributes(function, attr_mask);
-
       if (gallivm_debug & GALLIVM_DEBUG_IR) {
          lp_debug_dump_value(function);
       }
    }
 
    call = LLVMBuildCall2(builder, function_type, function, args, num_args, "");
-   if (set_callsite_attrs)
-      lp_add_func_attributes(call, attr_mask);
+   lp_add_func_attributes(call, attr_mask);
    return call;
 }
 
@@ -335,7 +325,7 @@ lp_build_intrinsic_binary_anylength(struct gallivm_state *gallivm,
           * so crash and burn.
           */
          debug_printf("%s: should handle arbitrary vector size\n",
-                      __FUNCTION__);
+                      __func__);
          assert(0);
          return NULL;
       }
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index a73f64c0d..47a81cba2 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -53,18 +53,8 @@ enum lp_func_attr {
    LP_FUNC_ATTR_INREG        = (1 << 2),
    LP_FUNC_ATTR_NOALIAS      = (1 << 3),
    LP_FUNC_ATTR_NOUNWIND     = (1 << 4),
-   LP_FUNC_ATTR_READNONE     = (1 << 5),
-   LP_FUNC_ATTR_READONLY     = (1 << 6),
-   LP_FUNC_ATTR_WRITEONLY    = (1 << 7),
-   LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8),
-   LP_FUNC_ATTR_CONVERGENT   = (1 << 9),
-   LP_FUNC_ATTR_PRESPLITCORO = (1 << 10),
-
-   /* Legacy intrinsic that needs attributes on function declarations
-    * and they must match the internal LLVM definition exactly, otherwise
-    * intrinsic selection fails.
-    */
-   LP_FUNC_ATTR_LEGACY       = (1u << 31),
+   LP_FUNC_ATTR_CONVERGENT   = (1 << 5),
+   LP_FUNC_ATTR_PRESPLITCORO = (1 << 6),
 };
 
 void
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index b0b854ad9..30e5c74fc 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -150,8 +150,6 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       return 1;
    case PIPE_SHADER_CAP_DROUND_SUPPORTED:
-   case PIPE_SHADER_CAP_DFRACEXP_DLDEXP_SUPPORTED:
-   case PIPE_SHADER_CAP_LDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
       return 0;
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 711db17d9..3bf4534da 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -189,7 +189,7 @@ lp_build_compare(struct gallivm_state *gallivm,
    assert(func > PIPE_FUNC_NEVER);
    assert(func < PIPE_FUNC_ALWAYS);
 
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#if DETECT_ARCH_X86 || DETECT_ARCH_X86_64
    /*
     * There are no unsigned integer comparison instructions in SSE.
     */
@@ -203,7 +203,7 @@ lp_build_compare(struct gallivm_state *gallivm,
         func == PIPE_FUNC_GEQUAL) &&
        (gallivm_debug & GALLIVM_DEBUG_PERF)) {
          debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
-                      __FUNCTION__, type.length, type.width);
+                      __func__, type.length, type.width);
    }
 #endif
 
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 2279e5acb..5e7a30a6c 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -56,11 +56,9 @@
 #include <llvm-c/ExecutionEngine.h>
 #include <llvm/Target/TargetOptions.h>
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
-#include <llvm/ADT/Triple.h>
 #include <llvm/Analysis/TargetLibraryInfo.h>
 #include <llvm/ExecutionEngine/SectionMemoryManager.h>
 #include <llvm/Support/CommandLine.h>
-#include <llvm/Support/Host.h>
 #include <llvm/Support/PrettyStackTrace.h>
 #include <llvm/ExecutionEngine/ObjectCache.h>
 #include <llvm/Support/TargetSelect.h>
@@ -68,6 +66,14 @@
 #include <llvm/Support/MemoryBuffer.h>
 #endif
 
+#if LLVM_VERSION_MAJOR >= 17
+#include <llvm/TargetParser/Host.h>
+#include <llvm/TargetParser/Triple.h>
+#else
+#include <llvm/Support/Host.h>
+#include <llvm/ADT/Triple.h>
+#endif
+
 #if LLVM_VERSION_MAJOR < 11
 #include <llvm/IR/CallSite.h>
 #endif
@@ -86,8 +92,8 @@
 #endif
 
 #include "c11/threads.h"
-#include "os/os_thread.h"
-#include "pipe/p_config.h"
+#include "util/u_thread.h"
+#include "util/detect.h"
 #include "util/u_debug.h"
 #include "util/u_cpu_detect.h"
 
@@ -353,7 +359,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
     * friends for configuring code generation options, like stack alignment.
     */
    TargetOptions options;
-#if defined(PIPE_ARCH_X86) && LLVM_VERSION_MAJOR < 13
+#if DETECT_ARCH_X86 && LLVM_VERSION_MAJOR < 13
    options.StackAlignmentOverride = 4;
 #endif
 
@@ -362,7 +368,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
           .setTargetOptions(options)
           .setOptLevel((CodeGenOpt::Level)OptLevel);
 
-#ifdef _WIN32
+#if DETECT_OS_WINDOWS
     /*
      * MCJIT works on Windows, but currently only through ELF object format.
      *
@@ -370,16 +376,20 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
      * different strings for MinGW/MSVC, so better play it safe and be
      * explicit.
      */
-#  ifdef _WIN64
+#  if DETECT_ARCH_X86_64
     LLVMSetTarget(M, "x86_64-pc-win32-elf");
-#  else
+#  elif DETECT_ARCH_X86
     LLVMSetTarget(M, "i686-pc-win32-elf");
+#  elif DETECT_ARCH_AARCH64
+    LLVMSetTarget(M, "aarch64-pc-win32-elf");
+#  else
+#    error Unsupported architecture for MCJIT on Windows.
 #  endif
 #endif
 
    llvm::SmallVector<std::string, 16> MAttrs;
 
-#if defined(PIPE_ARCH_ARM)
+#if DETECT_ARCH_ARM
    /* llvm-3.3+ implements sys::getHostCPUFeatures for Arm,
     * which allows us to enable/disable code generation based
     * on the results of cpuid on these architectures.
@@ -392,7 +402,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
         ++f) {
       MAttrs.push_back(((*f).second ? "+" : "-") + (*f).first().str());
    }
-#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#elif DETECT_ARCH_X86 || DETECT_ARCH_X86_64
    /*
     * Because we can override cpu caps with environment variables,
     * so we do not use llvm::sys::getHostCPUFeatures to detect cpu features
@@ -424,7 +434,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
    MAttrs.push_back(util_get_cpu_caps()->has_avx512dq ? "+avx512dq"  : "-avx512dq");
    MAttrs.push_back(util_get_cpu_caps()->has_avx512vl ? "+avx512vl"  : "-avx512vl");
 #endif
-#if defined(PIPE_ARCH_ARM)
+#if DETECT_ARCH_ARM
    if (!util_get_cpu_caps()->has_neon) {
       MAttrs.push_back("-neon");
       MAttrs.push_back("-crypto");
@@ -432,7 +442,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
    }
 #endif
 
-#if defined(PIPE_ARCH_PPC)
+#if DETECT_ARCH_PPC
    MAttrs.push_back(util_get_cpu_caps()->has_altivec ? "+altivec" : "-altivec");
    /*
     * Bug 25503 is fixed, by the same fix that fixed
@@ -449,7 +459,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
    }
 #endif
 
-#if defined(PIPE_ARCH_MIPS64)
+#if DETECT_ARCH_MIPS64
    MAttrs.push_back(util_get_cpu_caps()->has_msa ? "+msa" : "-msa");
    /* MSA requires a 64-bit FPU register file */
    MAttrs.push_back("+fp64");
@@ -481,7 +491,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
     * can't handle. Not entirely sure if we really need to do anything yet.
     */
 
-#ifdef PIPE_ARCH_PPC_64
+#if DETECT_ARCH_PPC_64
    /*
     * Large programs, e.g. gnome-shell and firefox, may tax the addressability
     * of the Medium code model once dynamically generated JIT-compiled shader
@@ -508,7 +518,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
 #endif
 #endif
 
-#if defined(PIPE_ARCH_MIPS64)
+#if DETECT_ARCH_MIPS64
       /*
        * ls3a4000 CPU and ls2k1000 SoC is a mips64r5 compatible with MSA SIMD
        * instruction set implemented, while ls3a3000 is mips64r2 compatible
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 3c2a44419..1b1c7b0c0 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -56,6 +56,7 @@
  */
 #define BRILINEAR_FACTOR 2
 
+
 /**
  * Does the given texture wrap mode allow sampling the texture border color?
  * XXX maybe move this into gallium util code.
@@ -106,7 +107,7 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state,
    const struct pipe_resource *texture = view->texture;
 
    state->format = view->format;
-   state->res_format = view->texture->format;
+   state->res_format = texture->format;
    state->swizzle_r = view->swizzle_r;
    state->swizzle_g = view->swizzle_g;
    state->swizzle_b = view->swizzle_b;
@@ -116,7 +117,12 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state,
    assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
    assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
 
-   state->target = view->target;
+   /* check if it is a tex2d created from buf */
+   if (view->is_tex2d_from_buf)
+      state->target = PIPE_TEXTURE_2D;
+   else
+      state->target = view->target;
+
    state->pot_width = util_is_power_of_two_or_zero(texture->width0);
    state->pot_height = util_is_power_of_two_or_zero(texture->height0);
    state->pot_depth = util_is_power_of_two_or_zero(texture->depth0);
@@ -128,6 +134,7 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state,
     */
 }
 
+
 /**
  * Initialize lp_sampler_static_texture_state object with the gallium
  * texture/sampler_view state (this contains the parts which are
@@ -145,7 +152,7 @@ lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
    const struct pipe_resource *resource = view->resource;
 
    state->format = view->format;
-   state->res_format = view->resource->format;
+   state->res_format = resource->format;
    state->swizzle_r = PIPE_SWIZZLE_X;
    state->swizzle_g = PIPE_SWIZZLE_Y;
    state->swizzle_b = PIPE_SWIZZLE_Z;
@@ -155,7 +162,7 @@ lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
    assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
    assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
 
-   state->target = view->resource->target;
+   state->target = resource->target;
    state->pot_width = util_is_power_of_two_or_zero(resource->width0);
    state->pot_height = util_is_power_of_two_or_zero(resource->height0);
    state->pot_depth = util_is_power_of_two_or_zero(resource->depth0);
@@ -167,6 +174,7 @@ lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
     */
 }
 
+
 /**
  * Initialize lp_sampler_static_sampler_state object with the gallium sampler
  * state (this contains the parts which are considered static).
@@ -241,6 +249,7 @@ lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
    state->normalized_coords = !sampler->unnormalized_coords;
 }
 
+
 /* build aniso pmin value */
 static LLVMValueRef
 lp_build_pmin(struct lp_build_sample_context *bld,
@@ -260,10 +269,9 @@ lp_build_pmin(struct lp_build_sample_context *bld,
    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
    LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
    LLVMValueRef int_size, float_size;
-   unsigned length = coord_bld->type.length;
-   unsigned num_quads = length / 4;
-   boolean pmin_per_quad = pmin_bld->type.length != length;
-   unsigned i;
+   const unsigned length = coord_bld->type.length;
+   const unsigned num_quads = length / 4;
+   const boolean pmin_per_quad = pmin_bld->type.length != length;
 
    int_size = lp_build_minify(int_size_bld, bld->int_size, first_level, TRUE);
    float_size = lp_build_int_to_float(float_size_bld, int_size);
@@ -280,7 +288,7 @@ lp_build_pmin(struct lp_build_sample_context *bld,
    };
    LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
 
-   for (i = 0; i < num_quads; i++) {
+   for (unsigned i = 0; i < num_quads; i++) {
       shuffles[i*4+0] = shuffles[i*4+1] = index0;
       shuffles[i*4+2] = shuffles[i*4+3] = index1;
    }
@@ -326,6 +334,7 @@ lp_build_pmin(struct lp_build_sample_context *bld,
    return pmin2;
 }
 
+
 /**
  * Generate code to compute coordinate gradient (rho).
  * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
@@ -354,34 +363,35 @@ lp_build_rho(struct lp_build_sample_context *bld,
    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
    LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
    LLVMValueRef rho_vec;
-   LLVMValueRef int_size, float_size;
    LLVMValueRef rho;
    unsigned length = coord_bld->type.length;
    unsigned num_quads = length / 4;
    boolean rho_per_quad = rho_bld->type.length != length;
    boolean no_rho_opt = bld->no_rho_approx && (dims > 1);
-   unsigned i;
    LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
    LLVMValueRef rho_xvec, rho_yvec;
 
-   /* Note that all simplified calculations will only work for isotropic filtering */
+   /* Note that all simplified calculations will only work for isotropic
+    * filtering
+    */
 
    /*
     * rho calcs are always per quad except for explicit derivs (excluding
     * the messy cube maps for now) when requested.
     */
 
-   int_size = lp_build_minify(int_size_bld, bld->int_size, first_level, TRUE);
-   float_size = lp_build_int_to_float(float_size_bld, int_size);
+   LLVMValueRef int_size =
+      lp_build_minify(int_size_bld, bld->int_size, first_level, TRUE);
+   LLVMValueRef float_size = lp_build_int_to_float(float_size_bld, int_size);
 
    if (derivs) {
       LLVMValueRef ddmax[3] = { NULL }, ddx[3] = { NULL }, ddy[3] = { NULL };
-      for (i = 0; i < dims; i++) {
-         LLVMValueRef floatdim;
+      for (unsigned i = 0; i < dims; i++) {
          LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
 
-         floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
-                                               coord_bld->type, float_size, indexi);
+         LLVMValueRef floatdim =
+            lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
+                                       coord_bld->type, float_size, indexi);
 
          /*
           * note that for rho_per_quad case could reduce math (at some shuffle
@@ -392,11 +402,9 @@ lp_build_rho(struct lp_build_sample_context *bld,
             ddy[i] = lp_build_mul(coord_bld, floatdim, derivs->ddy[i]);
             ddx[i] = lp_build_mul(coord_bld, ddx[i], ddx[i]);
             ddy[i] = lp_build_mul(coord_bld, ddy[i], ddy[i]);
-         }
-         else {
-            LLVMValueRef tmpx, tmpy;
-            tmpx = lp_build_abs(coord_bld, derivs->ddx[i]);
-            tmpy = lp_build_abs(coord_bld, derivs->ddy[i]);
+         } else {
+            LLVMValueRef tmpx = lp_build_abs(coord_bld, derivs->ddx[i]);
+            LLVMValueRef tmpy = lp_build_abs(coord_bld, derivs->ddy[i]);
             ddmax[i] = lp_build_max(coord_bld, tmpx, tmpy);
             ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
          }
@@ -410,8 +418,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
          }
          rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
          /* skipping sqrt hence returning rho squared */
-     }
-      else {
+      } else {
          rho = ddmax[0];
          if (dims > 1) {
             rho = lp_build_max(coord_bld, rho, ddmax[1]);
@@ -421,7 +428,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
          }
       }
 
-      LLVMValueRef rho_is_inf = lp_build_is_inf_or_nan(gallivm, coord_bld->type, rho);
+      LLVMValueRef rho_is_inf = lp_build_is_inf_or_nan(gallivm,
+                                                       coord_bld->type, rho);
       rho = lp_build_select(coord_bld, rho_is_inf, coord_bld->zero, rho);
 
       if (rho_per_quad) {
@@ -431,8 +439,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
          rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
                                          rho_bld->type, rho, 0);
       }
-   }
-   else {
+   } else {
       /*
        * This looks all a bit complex, but it's not that bad
        * (the shuffle code makes it look worse than it is).
@@ -453,8 +460,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
 
       if (dims < 2) {
          ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
-      }
-      else if (dims >= 2) {
+      } else if (dims >= 2) {
          ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
          if (dims > 2) {
             ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
@@ -470,14 +476,16 @@ lp_build_rho(struct lp_build_sample_context *bld,
             2, 3,
             LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
          };
-         LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
+         LLVMValueRef ddx_ddys, ddx_ddyt, floatdim;
+         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
 
-         for (i = 0; i < num_quads; i++) {
+         for (unsigned i = 0; i < num_quads; i++) {
             shuffles[i*4+0] = shuffles[i*4+1] = index0;
             shuffles[i*4+2] = shuffles[i*4+3] = index1;
          }
          floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
-                                           LLVMConstVector(shuffles, length), "");
+                                           LLVMConstVector(shuffles, length),
+                                           "");
          ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], floatdim);
          ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
          ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
@@ -504,26 +512,22 @@ lp_build_rho(struct lp_build_sample_context *bld,
          if (rho_per_quad) {
             rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
                                             rho_bld->type, rho, 0);
-         }
-         else {
+         } else {
             rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
          }
          /* skipping sqrt hence returning rho squared */
-      }
-      else {
+      } else {
          ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
          if (dims > 2) {
             ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
-         }
-         else {
+         } else {
             ddx_ddy[1] = NULL; /* silence compiler warning */
          }
 
          if (dims < 2) {
             rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle0);
             rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
-         }
-         else if (dims == 2) {
+         } else if (dims == 2) {
             static const unsigned char swizzle02[] = {
                0, 2,
                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
@@ -534,12 +538,11 @@ lp_build_rho(struct lp_build_sample_context *bld,
             };
             rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle02);
             rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle13);
-         }
-         else {
+         } else {
             LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
             LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
             assert(dims == 3);
-            for (i = 0; i < num_quads; i++) {
+            for (unsigned i = 0; i < num_quads; i++) {
                shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
                shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
                shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
@@ -562,20 +565,19 @@ lp_build_rho(struct lp_build_sample_context *bld,
             if (dims > 1) {
                /* could use some broadcast_vector helper for this? */
                LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
-               for (i = 0; i < num_quads; i++) {
+               for (unsigned i = 0; i < num_quads; i++) {
                   src[i] = float_size;
                }
-               float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads);
-            }
-            else {
+               float_size = lp_build_concat(bld->gallivm, src,
+                                            float_size_bld->type, num_quads);
+            } else {
                float_size = lp_build_broadcast_scalar(coord_bld, float_size);
             }
             rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
 
             if (dims <= 1) {
                rho = rho_vec;
-            }
-            else {
+            } else {
                if (dims >= 2) {
                   LLVMValueRef rho_s, rho_t, rho_r;
 
@@ -593,12 +595,10 @@ lp_build_rho(struct lp_build_sample_context *bld,
             if (rho_per_quad) {
                rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
                                                rho_bld->type, rho, 0);
-            }
-            else {
+            } else {
                rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
             }
-         }
-         else {
+         } else {
             if (dims <= 1) {
                rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
             }
@@ -606,8 +606,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
 
             if (dims <= 1) {
                rho = rho_vec;
-            }
-            else {
+            } else {
                if (dims >= 2) {
                   LLVMValueRef rho_s, rho_t, rho_r;
 
@@ -717,9 +716,6 @@ lp_build_brilinear_rho(struct lp_build_context *bld,
                        LLVMValueRef *out_lod_ipart,
                        LLVMValueRef *out_lod_fpart)
 {
-   LLVMValueRef lod_ipart;
-   LLVMValueRef lod_fpart;
-
    const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
    const double post_offset = 1 - 2*factor;
 
@@ -736,14 +732,15 @@ lp_build_brilinear_rho(struct lp_build_context *bld,
                       lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
 
    /* ipart = ifloor(log2(rho)) */
-   lod_ipart = lp_build_extract_exponent(bld, rho, 0);
+   LLVMValueRef lod_ipart = lp_build_extract_exponent(bld, rho, 0);
 
    /* fpart = rho / 2**ipart */
-   lod_fpart = lp_build_extract_mantissa(bld, rho);
+   LLVMValueRef lod_fpart = lp_build_extract_mantissa(bld, rho);
 
-   lod_fpart = lp_build_mad(bld, lod_fpart,
-                            lp_build_const_vec(bld->gallivm, bld->type, factor),
-                            lp_build_const_vec(bld->gallivm, bld->type, post_offset));
+   lod_fpart =
+      lp_build_mad(bld, lod_fpart,
+                   lp_build_const_vec(bld->gallivm, bld->type, factor),
+                   lp_build_const_vec(bld->gallivm, bld->type, post_offset));
 
    /*
     * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
@@ -768,7 +765,6 @@ lp_build_ilog2_sqrt(struct lp_build_context *bld,
                     LLVMValueRef x)
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
-   LLVMValueRef ipart;
    struct lp_type i_type = lp_int_type(bld->type);
    LLVMValueRef one = lp_build_const_int_vec(bld->gallivm, i_type, 1);
 
@@ -777,7 +773,7 @@ lp_build_ilog2_sqrt(struct lp_build_context *bld,
    assert(lp_check_value(bld->type, x));
 
    /* ipart = log2(x) + 0.5 = 0.5*(log2(x^2) + 1.0) */
-   ipart = lp_build_extract_exponent(bld, x, 1);
+   LLVMValueRef ipart = lp_build_extract_exponent(bld, x, 1);
    ipart = LLVMBuildAShr(builder, ipart, one, "");
 
    return ipart;
@@ -824,21 +820,21 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
    *out_lod_fpart = lodf_bld->zero;
 
    /*
-    * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification:
-    * "Implementations may either unconditionally assume c = 0 for the minification
-    * vs. magnification switch-over point, or may choose to make c depend on the
-    * combination of minification and magnification modes as follows: if the
-    * magnification filter is given by LINEAR and the minification filter is given
-    * by NEAREST_MIPMAP_NEAREST or NEAREST_MIPMAP_LINEAR, then c = 0.5. This is
-    * done to ensure that a minified texture does not appear "sharper" than a
-    * magnified texture. Otherwise c = 0."
-    * And 3.9.11 Texture Minification:
-    * "If lod is less than or equal to the constant c (see section 3.9.12) the
-    * texture is said to be magnified; if it is greater, the texture is minified."
-    * So, using 0 as switchover point always, and using magnification for lod == 0.
-    * Note that the always c = 0 behavior is new (first appearing in GL 3.1 spec),
-    * old GL versions required 0.5 for the modes listed above.
-    * I have no clue about the (undocumented) wishes of d3d9/d3d10 here!
+    * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture
+    * Magnification: "Implementations may either unconditionally assume c = 0
+    * for the minification vs. magnification switch-over point, or may choose
+    * to make c depend on the combination of minification and magnification
+    * modes as follows: if the magnification filter is given by LINEAR and the
+    * minification filter is given by NEAREST_MIPMAP_NEAREST or
+    * NEAREST_MIPMAP_LINEAR, then c = 0.5. This is done to ensure that a
+    * minified texture does not appear "sharper" than a magnified
+    * texture. Otherwise c = 0."  And 3.9.11 Texture Minification: "If lod is
+    * less than or equal to the constant c (see section 3.9.12) the texture is
+    * said to be magnified; if it is greater, the texture is minified."  So,
+    * using 0 as switchover point always, and using magnification for lod ==
+    * 0.  Note that the always c = 0 behavior is new (first appearing in GL
+    * 3.1 spec), old GL versions required 0.5 for the modes listed above.  I
+    * have no clue about the (undocumented) wishes of d3d9/d3d10 here!
     */
 
    if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) {
@@ -850,16 +846,14 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                                 bld->context_ptr, sampler_unit);
 
       lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
-   }
-   else {
+   } else {
       if (explicit_lod) {
          if (bld->num_lods != bld->coord_type.length)
             lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
                                             lodf_bld->type, explicit_lod, 0);
          else
             lod = explicit_lod;
-      }
-      else {
+      } else {
          LLVMValueRef rho;
          boolean rho_squared = bld->no_rho_approx && (bld->dims > 1);
 
@@ -867,8 +861,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
              !explicit_lod) {
             rho = lp_build_pmin(bld, first_level, s, t, max_aniso);
             rho_squared = true;
-         } else
+         } else {
             rho = lp_build_rho(bld, first_level, s, t, r, derivs);
+         }
 
          /*
           * Compute lod = log2(rho)
@@ -893,8 +888,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                 */
                if (rho_squared) {
                   *out_lod_ipart = lp_build_ilog2_sqrt(lodf_bld, rho);
-               }
-               else {
+               } else {
                   *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
                }
                *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
@@ -920,8 +914,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 
          if (0) {
             lod = lp_build_log2(lodf_bld, rho);
-         }
-         else {
+         } else {
             /* get more accurate results if we just sqaure rho always */
             if (!rho_squared)
                rho = lp_build_mul(lodf_bld, rho, rho);
@@ -930,13 +923,16 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
 
          /* log2(x^2) == 0.5*log2(x) */
          lod = lp_build_mul(lodf_bld, lod,
-                            lp_build_const_vec(bld->gallivm, lodf_bld->type, 0.5F));
+                            lp_build_const_vec(bld->gallivm,
+                                               lodf_bld->type, 0.5F));
 
          /* add shader lod bias */
          if (lod_bias) {
             if (bld->num_lods != bld->coord_type.length)
-               lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                                                    lodf_bld->type, lod_bias, 0);
+               lod_bias = lp_build_pack_aos_scalars(bld->gallivm,
+                                                    bld->coord_bld.type,
+                                                    lodf_bld->type,
+                                                    lod_bias, 0);
             lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
          }
       }
@@ -988,14 +984,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
       if (!bld->no_brilinear) {
          lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
                                 out_lod_ipart, out_lod_fpart);
-      }
-      else {
+      } else {
          lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
       }
 
       lp_build_name(*out_lod_fpart, "lod_fpart");
-   }
-   else {
+   } else {
       *out_lod_ipart = lp_build_iround(lodf_bld, lod);
    }
 
@@ -1022,9 +1016,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
                            LLVMValueRef *out_of_bounds)
 {
    struct lp_build_context *leveli_bld = &bld->leveli_bld;
-   LLVMValueRef level;
-
-   level = lp_build_add(leveli_bld, lod_ipart, first_level);
+   LLVMValueRef level = lp_build_add(leveli_bld, lod_ipart, first_level);
 
    if (out_of_bounds) {
       LLVMValueRef out, out1;
@@ -1033,21 +1025,19 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
       out = lp_build_or(leveli_bld, out, out1);
       if (bld->num_mips == bld->coord_bld.type.length) {
          *out_of_bounds = out;
-      }
-      else if (bld->num_mips == 1) {
+      } else if (bld->num_mips == 1) {
          *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
-      }
-      else {
+      } else {
          assert(bld->num_mips == bld->coord_bld.type.length / 4);
-         *out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
-                                                                leveli_bld->type,
-                                                                bld->int_coord_bld.type,
-                                                                out);
+         *out_of_bounds =
+            lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
+                                                  leveli_bld->type,
+                                                  bld->int_coord_bld.type,
+                                                  out);
       }
       level = lp_build_andnot(&bld->int_coord_bld, level, *out_of_bounds);
       *level_out = level;
-   }
-   else {
+   } else {
       /* clamp level to legal range of levels */
       *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
 
@@ -1059,7 +1049,8 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
  * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
  * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
  * part accordingly.
- * Later, we'll sample from those two mipmap levels and interpolate between them.
+ * Later, we'll sample from those two mipmap levels and interpolate between
+ * them.
  */
 void
 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
@@ -1083,9 +1074,9 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
    *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
 
    /*
-    * Clamp both *level0_out and *level1_out to [first_level, last_level], with
-    * the minimum number of comparisons, and zeroing lod_fpart in the extreme
-    * ends in the process.
+    * Clamp both *level0_out and *level1_out to [first_level, last_level],
+    * with the minimum number of comparisons, and zeroing lod_fpart in the
+    * extreme ends in the process.
     */
 
    /* *level0_out < first_level */
@@ -1121,18 +1112,25 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
    lp_build_name(*lod_fpart_inout, "texture%u_mipweight", texture_unit);
 }
 
+
 /**
  * A helper function that factorizes this common pattern.
  */
 static LLVMValueRef
 load_mip(struct gallivm_state *gallivm,
-         LLVMTypeRef ptr_type, LLVMValueRef offsets, LLVMValueRef index1) {
+         LLVMTypeRef ptr_type,
+         LLVMValueRef offsets,
+         LLVMValueRef index1)
+{
    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
    LLVMValueRef indexes[2] = {zero, index1};
-   LLVMValueRef ptr = LLVMBuildGEP2(gallivm->builder, ptr_type, offsets, indexes, ARRAY_SIZE(indexes), "");
-   return LLVMBuildLoad2(gallivm->builder, LLVMInt32TypeInContext(gallivm->context), ptr, "");
+   LLVMValueRef ptr = LLVMBuildGEP2(gallivm->builder, ptr_type, offsets,
+                                    indexes, ARRAY_SIZE(indexes), "");
+   return LLVMBuildLoad2(gallivm->builder,
+                         LLVMInt32TypeInContext(gallivm->context), ptr, "");
 }
 
+
 /**
  * Return pointer to a single mipmap level.
  * \param level  integer mipmap level
@@ -1141,14 +1139,17 @@ LLVMValueRef
 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
                           LLVMValueRef level)
 {
-   LLVMValueRef mip_offset = load_mip(bld->gallivm, bld->mip_offsets_type, bld->mip_offsets, level);
+   LLVMValueRef mip_offset = load_mip(bld->gallivm, bld->mip_offsets_type,
+                                      bld->mip_offsets, level);
    LLVMBuilderRef builder = bld->gallivm->builder;
-   LLVMValueRef data_ptr = LLVMBuildGEP2(builder,
-                                         LLVMInt8TypeInContext(bld->gallivm->context),
-                                         bld->base_ptr, &mip_offset, 1, "");
+   LLVMValueRef data_ptr =
+      LLVMBuildGEP2(builder,
+                    LLVMInt8TypeInContext(bld->gallivm->context),
+                    bld->base_ptr, &mip_offset, 1, "");
    return data_ptr;
 }
 
+
 /**
  * Return (per-pixel) offsets to mip levels.
  * \param level  integer mipmap level
@@ -1163,29 +1164,32 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
    if (bld->num_mips == 1) {
       offset1 = load_mip(bld->gallivm, bld->mip_offsets_type, bld->mip_offsets, level);
       offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
-   }
-   else if (bld->num_mips == bld->coord_bld.type.length / 4) {
-      unsigned i;
-
+   } else if (bld->num_mips == bld->coord_bld.type.length / 4) {
       offsets = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->num_mips; i++) {
+      for (unsigned i = 0; i < bld->num_mips; i++) {
          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
-         offset1 = load_mip(bld->gallivm, bld->mip_offsets_type, bld->mip_offsets, LLVMBuildExtractElement(builder, level, indexi, ""));
+         offset1 = load_mip(bld->gallivm, bld->mip_offsets_type,
+                            bld->mip_offsets,
+                            LLVMBuildExtractElement(builder, level,
+                                                    indexi, ""));
          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
-         offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
+         offsets = LLVMBuildInsertElement(builder, offsets, offset1,
+                                          indexo, "");
       }
-      offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0, 4);
-   }
-   else {
-      unsigned i;
-
+      offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld,
+                                            offsets, 0, 4);
+   } else {
       assert (bld->num_mips == bld->coord_bld.type.length);
 
       offsets = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->num_mips; i++) {
+      for (unsigned i = 0; i < bld->num_mips; i++) {
          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
-         offset1 = load_mip(bld->gallivm, bld->mip_offsets_type, bld->mip_offsets, LLVMBuildExtractElement(builder, level, indexi, ""));
-         offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexi, "");
+         offset1 = load_mip(bld->gallivm, bld->mip_offsets_type,
+                            bld->mip_offsets,
+                            LLVMBuildExtractElement(builder, level,
+                                                    indexi, ""));
+         offsets = LLVMBuildInsertElement(builder, offsets, offset1,
+                                          indexi, "");
       }
    }
    return offsets;
@@ -1210,16 +1214,14 @@ lp_build_minify(struct lp_build_context *bld,
    if (level == bld->zero) {
       /* if we're using mipmap level zero, no minification is needed */
       return base_size;
-   }
-   else {
+   } else {
       LLVMValueRef size;
       assert(bld->type.sign);
       if (lod_scalar ||
          (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) {
          size = LLVMBuildLShr(builder, base_size, level, "minify");
          size = lp_build_max(bld, size, bld->one);
-      }
-      else {
+      } else {
          /*
           * emulate shift with float mul, since intel "forgot" shifts with
           * per-element shift count until avx2, which results in terrible
@@ -1259,6 +1261,7 @@ lp_build_minify(struct lp_build_context *bld,
    }
 }
 
+
 /*
  * Scale image dimensions with block sizes.
  *
@@ -1278,14 +1281,18 @@ lp_build_scale_view_dims(struct lp_build_context *bld, LLVMValueRef size,
                          LLVMValueRef view_blocksize)
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
-   LLVMValueRef ret;
-
-   ret = LLVMBuildAdd(builder, size, LLVMBuildSub(builder, tex_blocksize, lp_build_const_int_vec(bld->gallivm, bld->type, 1), ""), "");
+   LLVMValueRef ret =
+      LLVMBuildAdd(builder, size,
+                   LLVMBuildSub(builder, tex_blocksize,
+                                lp_build_const_int_vec(bld->gallivm,
+                                                       bld->type, 1), ""),
+                   "");
    ret = LLVMBuildLShr(builder, ret, tex_blocksize_log2, "");
    ret = LLVMBuildMul(builder, ret, view_blocksize, "");
    return ret;
 }
 
+
 /*
  * Scale a single image dimension.
  *
@@ -1296,18 +1303,22 @@ LLVMValueRef
 lp_build_scale_view_dim(struct gallivm_state *gallivm, LLVMValueRef size,
                         unsigned tex_blocksize, unsigned view_blocksize)
 {
-   LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef ret;
-
    if (tex_blocksize == view_blocksize)
       return size;
 
-   ret = LLVMBuildAdd(builder, size, lp_build_const_int32(gallivm, tex_blocksize - 1), "");
-   ret = LLVMBuildLShr(builder, ret, lp_build_const_int32(gallivm, util_logbase2(tex_blocksize)), "");
-   ret = LLVMBuildMul(builder, ret, lp_build_const_int32(gallivm, view_blocksize), "");
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef ret =
+      LLVMBuildAdd(builder, size,
+                   lp_build_const_int32(gallivm, tex_blocksize - 1), "");
+   ret = LLVMBuildLShr(builder, ret,
+                       lp_build_const_int32(gallivm,
+                                            util_logbase2(tex_blocksize)), "");
+   ret = LLVMBuildMul(builder, ret,
+                      lp_build_const_int32(gallivm, view_blocksize), "");
    return ret;
 }
 
+
 /**
  * Dereference stride_array[mipmap_level] array to get a stride.
  * Return stride as a vector.
@@ -1319,33 +1330,34 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
    LLVMValueRef stride, stride1;
+
    if (bld->num_mips == 1) {
       stride1 = load_mip(bld->gallivm, stride_type, stride_array, level);
       stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
-   }
-   else if (bld->num_mips == bld->coord_bld.type.length / 4) {
+   } else if (bld->num_mips == bld->coord_bld.type.length / 4) {
       LLVMValueRef stride1;
-      unsigned i;
 
       stride = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->num_mips; i++) {
+      for (unsigned i = 0; i < bld->num_mips; i++) {
          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
-         stride1 = load_mip(bld->gallivm, stride_type, stride_array, LLVMBuildExtractElement(builder, level, indexi, ""));
+         stride1 = load_mip(bld->gallivm, stride_type, stride_array,
+                            LLVMBuildExtractElement(builder, level,
+                                                    indexi, ""));
          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
          stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
       }
       stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
-   }
-   else {
+   } else {
       LLVMValueRef stride1;
-      unsigned i;
 
       assert (bld->num_mips == bld->coord_bld.type.length);
 
       stride = bld->int_coord_bld.undef;
-      for (i = 0; i < bld->coord_bld.type.length; i++) {
+      for (unsigned i = 0; i < bld->coord_bld.type.length; i++) {
          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
-         stride1 = load_mip(bld->gallivm, stride_type, stride_array, LLVMBuildExtractElement(builder, level, indexi, ""));
+         stride1 = load_mip(bld->gallivm, stride_type, stride_array,
+                            LLVMBuildExtractElement(builder, level,
+                                                    indexi, ""));
          stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
       }
    }
@@ -1373,19 +1385,18 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
     */
    if (bld->num_mips == 1) {
       ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
-      *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec, TRUE);
+      *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size,
+                                  ilevel_vec, TRUE);
       *out_size = lp_build_scale_view_dims(&bld->int_size_bld, *out_size,
                                            bld->int_tex_blocksize,
                                            bld->int_tex_blocksize_log2,
                                            bld->int_view_blocksize);
-   }
-   else {
+   } else {
       LLVMValueRef int_size_vec;
       LLVMValueRef int_tex_blocksize_vec, int_tex_blocksize_log2_vec;
       LLVMValueRef int_view_blocksize_vec;
       LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
-      unsigned num_quads = bld->coord_bld.type.length / 4;
-      unsigned i;
+      const unsigned num_quads = bld->coord_bld.type.length / 4;
 
       if (bld->num_mips == num_quads) {
          /*
@@ -1408,14 +1419,13 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
             assert(bld->int_size_in_bld.type.length == 1);
             int_size_vec = lp_build_broadcast_scalar(&bld4,
                                                      bld->int_size);
-            int_tex_blocksize_vec = lp_build_broadcast_scalar(&bld4,
-                                                              bld->int_tex_blocksize);
-            int_tex_blocksize_log2_vec = lp_build_broadcast_scalar(&bld4,
-                                                                   bld->int_tex_blocksize_log2);
-            int_view_blocksize_vec = lp_build_broadcast_scalar(&bld4,
-                                                               bld->int_view_blocksize);
-         }
-         else {
+            int_tex_blocksize_vec =
+               lp_build_broadcast_scalar(&bld4, bld->int_tex_blocksize);
+            int_tex_blocksize_log2_vec =
+               lp_build_broadcast_scalar(&bld4, bld->int_tex_blocksize_log2);
+            int_view_blocksize_vec =
+               lp_build_broadcast_scalar(&bld4, bld->int_view_blocksize);
+         } else {
             assert(bld->int_size_in_bld.type.length == 4);
             int_size_vec = bld->int_size;
             int_tex_blocksize_vec = bld->int_tex_blocksize;
@@ -1423,7 +1433,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
             int_view_blocksize_vec = bld->int_view_blocksize;
          }
 
-         for (i = 0; i < num_quads; i++) {
+         for (unsigned i = 0; i < num_quads; i++) {
             LLVMValueRef ileveli;
             LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
 
@@ -1439,51 +1449,59 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
                                               int_view_blocksize_vec);
          }
          /*
-          * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1,
-          * [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
+          * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for
+          * dims > 1, [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
           */
          *out_size = lp_build_concat(bld->gallivm,
                                      tmp,
                                      bld4.type,
                                      num_quads);
-      }
-      else {
-        /* FIXME: this is terrible and results in _huge_ vector
-         * (for the dims > 1 case).
-         * Should refactor this (together with extract_image_sizes) and do
-         * something more useful. Could for instance if we have width,height
-         * with 4-wide vector pack all elements into a 8xi16 vector
-         * (on which we can still do useful math) instead of using a 16xi32
-         * vector.
-         * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
-         * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
-         */
+      } else {
+         /* FIXME: this is terrible and results in _huge_ vector
+          * (for the dims > 1 case).
+          * Should refactor this (together with extract_image_sizes) and do
+          * something more useful. Could for instance if we have width,height
+          * with 4-wide vector pack all elements into a 8xi16 vector
+          * (on which we can still do useful math) instead of using a 16xi32
+          * vector.
+          * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
+          * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...]
+          * vector.
+          */
          assert(bld->num_mips == bld->coord_bld.type.length);
          if (bld->dims == 1) {
             assert(bld->int_size_in_bld.type.length == 1);
             int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
                                                      bld->int_size);
-            int_tex_blocksize_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
-                                                              bld->int_tex_blocksize);
-            int_tex_blocksize_log2_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
-                                                                   bld->int_tex_blocksize_log2);
-            int_view_blocksize_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
-                                                               bld->int_view_blocksize);
-            *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel, FALSE);
-            *out_size = lp_build_scale_view_dims(&bld->int_coord_bld, *out_size,
+            int_tex_blocksize_vec =
+               lp_build_broadcast_scalar(&bld->int_coord_bld,
+                                         bld->int_tex_blocksize);
+            int_tex_blocksize_log2_vec =
+               lp_build_broadcast_scalar(&bld->int_coord_bld,
+                                         bld->int_tex_blocksize_log2);
+            int_view_blocksize_vec =
+               lp_build_broadcast_scalar(&bld->int_coord_bld,
+                                         bld->int_view_blocksize);
+            *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec,
+                                        ilevel, FALSE);
+            *out_size = lp_build_scale_view_dims(&bld->int_coord_bld,
+                                                 *out_size,
                                                  int_tex_blocksize_vec,
                                                  int_tex_blocksize_log2_vec,
                                                  int_view_blocksize_vec);
-         }
-         else {
+         } else {
             LLVMValueRef ilevel1;
-            for (i = 0; i < bld->num_mips; i++) {
+            for (unsigned i = 0; i < bld->num_mips; i++) {
                LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
-               ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
-                                                    bld->int_size_in_bld.type, ilevel, indexi);
+               ilevel1 = lp_build_extract_broadcast(bld->gallivm,
+                                                    bld->int_coord_type,
+                                                    bld->int_size_in_bld.type,
+                                                    ilevel, indexi);
                tmp[i] = bld->int_size;
-               tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1, TRUE);
-               tmp[i] = lp_build_scale_view_dims(&bld->int_size_in_bld, tmp[i],
+               tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i],
+                                        ilevel1, TRUE);
+               tmp[i] = lp_build_scale_view_dims(&bld->int_size_in_bld,
+                                                 tmp[i],
                                                  bld->int_tex_blocksize,
                                                  bld->int_tex_blocksize_log2,
                                                  bld->int_view_blocksize);
@@ -1552,14 +1570,12 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
                                                     LLVMConstInt(i32t, 2, 0));
          }
       }
-   }
-   else {
+   } else {
       unsigned num_quads = bld->coord_bld.type.length / 4;
 
       if (dims == 1) {
          *out_width = size;
-      }
-      else if (bld->num_mips == num_quads) {
+      } else if (bld->num_mips == num_quads) {
          *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
          if (dims >= 2) {
             *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
@@ -1567,8 +1583,7 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
                *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
             }
          }
-      }
-      else {
+      } else {
          assert(bld->num_mips == bld->coord_type.length);
          *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
                                                 coord_type, size, 0);
@@ -1620,6 +1635,7 @@ lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
    }
 }
 
+
 /**
  * Generate new coords and faces for cubemap texels falling off the face.
  *
@@ -1785,8 +1801,7 @@ lp_build_select3(struct lp_build_context *sel_bld,
                  LLVMValueRef val1,
                  LLVMValueRef val2)
 {
-   LLVMValueRef tmp;
-   tmp = lp_build_select(sel_bld, sel0, val0, val1);
+   LLVMValueRef tmp = lp_build_select(sel_bld, sel0, val0, val1);
    return lp_build_select(sel_bld, sel1, val2, tmp);
 }
 
@@ -1874,8 +1889,8 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
       LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
       LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
       /*
-       * s = 1/2 * ( sc / ma + 1)
-       * t = 1/2 * ( tc / ma + 1)
+       * s = 1/2 * (sc / ma + 1)
+       * t = 1/2 * (tc / ma + 1)
        *
        * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
        * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
@@ -1901,8 +1916,7 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
          ddy[0] = lp_build_ddy(coord_bld, s);
          ddy[1] = lp_build_ddy(coord_bld, t);
          ddy[2] = lp_build_ddy(coord_bld, r);
-      }
-      else {
+      } else {
          ddx[0] = derivs_in->ddx[0];
          ddx[1] = derivs_in->ddx[1];
          ddx[2] = derivs_in->ddx[2];
@@ -2095,8 +2109,7 @@ lp_build_sample_partial_offset(struct lp_build_context *bld,
 
    if (block_length == 1) {
       subcoord = bld->zero;
-   }
-   else {
+   } else {
       /*
        * Pixel blocks have power of two dimensions. LLVM should convert the
        * rem/div to bit arithmetic.
@@ -2165,8 +2178,7 @@ lp_build_sample_offset(struct lp_build_context *bld,
                                      y, y_stride,
                                      &y_offset, out_j);
       offset = lp_build_add(bld, offset, y_offset);
-   }
-   else {
+   } else {
       *out_j = bld->zero;
    }
 
@@ -2183,6 +2195,7 @@ lp_build_sample_offset(struct lp_build_context *bld,
    *out_offset = offset;
 }
 
+
 static LLVMValueRef
 lp_build_sample_min(struct lp_build_context *bld,
                     LLVMValueRef x,
@@ -2200,6 +2213,7 @@ lp_build_sample_min(struct lp_build_context *bld,
    return lp_build_select(bld, mask, min, v0);
 }
 
+
 static LLVMValueRef
 lp_build_sample_max(struct lp_build_context *bld,
                     LLVMValueRef x,
@@ -2217,6 +2231,7 @@ lp_build_sample_max(struct lp_build_context *bld,
    return lp_build_select(bld, mask, max, v0);
 }
 
+
 static LLVMValueRef
 lp_build_sample_min_2d(struct lp_build_context *bld,
                        LLVMValueRef x,
@@ -2231,6 +2246,7 @@ lp_build_sample_min_2d(struct lp_build_context *bld,
    return lp_build_sample_min(bld, y, v0, v1);
 }
 
+
 static LLVMValueRef
 lp_build_sample_max_2d(struct lp_build_context *bld,
                        LLVMValueRef x,
@@ -2245,6 +2261,7 @@ lp_build_sample_max_2d(struct lp_build_context *bld,
    return lp_build_sample_max(bld, y, v0, v1);
 }
 
+
 static LLVMValueRef
 lp_build_sample_min_3d(struct lp_build_context *bld,
                 LLVMValueRef x,
@@ -2260,6 +2277,7 @@ lp_build_sample_min_3d(struct lp_build_context *bld,
    return lp_build_sample_min(bld, z, v0, v1);
 }
 
+
 static LLVMValueRef
 lp_build_sample_max_3d(struct lp_build_context *bld,
                        LLVMValueRef x,
@@ -2275,6 +2293,7 @@ lp_build_sample_max_3d(struct lp_build_context *bld,
    return lp_build_sample_max(bld, z, v0, v1);
 }
 
+
 void
 lp_build_reduce_filter(struct lp_build_context *bld,
                        enum pipe_tex_reduction_mode mode,
@@ -2303,6 +2322,7 @@ lp_build_reduce_filter(struct lp_build_context *bld,
    }
 }
 
+
 void
 lp_build_reduce_filter_2d(struct lp_build_context *bld,
                           enum pipe_tex_reduction_mode mode,
@@ -2316,24 +2336,27 @@ lp_build_reduce_filter_2d(struct lp_build_context *bld,
                           LLVMValueRef *v11,
                           LLVMValueRef *out)
 {
-   unsigned chan;
    switch (mode) {
    case PIPE_TEX_REDUCTION_MIN:
-      for (chan = 0; chan < num_chan; chan++)
-         out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
+      for (unsigned chan = 0; chan < num_chan; chan++)
+         out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan],
+                                            v10[chan], v11[chan]);
       break;
    case PIPE_TEX_REDUCTION_MAX:
-      for (chan = 0; chan < num_chan; chan++)
-         out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
+      for (unsigned chan = 0; chan < num_chan; chan++)
+         out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan],
+                                            v10[chan], v11[chan]);
       break;
    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
    default:
-      for (chan = 0; chan < num_chan; chan++)
-         out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan], flags);
+      for (unsigned chan = 0; chan < num_chan; chan++)
+         out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan],
+                                      v10[chan], v11[chan], flags);
       break;
    }
 }
 
+
 void
 lp_build_reduce_filter_3d(struct lp_build_context *bld,
                           enum pipe_tex_reduction_mode mode,
@@ -2352,23 +2375,22 @@ lp_build_reduce_filter_3d(struct lp_build_context *bld,
                           LLVMValueRef *v111,
                           LLVMValueRef *out)
 {
-   unsigned chan;
    switch (mode) {
    case PIPE_TEX_REDUCTION_MIN:
-      for (chan = 0; chan < num_chan; chan++)
+      for (unsigned chan = 0; chan < num_chan; chan++)
          out[chan] = lp_build_sample_min_3d(bld, x, y, z,
                                      v000[chan], v001[chan], v010[chan], v011[chan],
                                      v100[chan], v101[chan], v110[chan], v111[chan]);
       break;
    case PIPE_TEX_REDUCTION_MAX:
-      for (chan = 0; chan < num_chan; chan++)
+      for (unsigned chan = 0; chan < num_chan; chan++)
          out[chan] = lp_build_sample_max_3d(bld, x, y, z,
                                      v000[chan], v001[chan], v010[chan], v011[chan],
                                      v100[chan], v101[chan], v110[chan], v111[chan]);
       break;
    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
    default:
-      for (chan = 0; chan < num_chan; chan++)
+      for (unsigned chan = 0; chan < num_chan; chan++)
          out[chan] = lp_build_lerp_3d(bld, x, y, z,
                                       v000[chan], v001[chan], v010[chan], v011[chan],
                                       v100[chan], v101[chan], v110[chan], v111[chan],
@@ -2377,6 +2399,7 @@ lp_build_reduce_filter_3d(struct lp_build_context *bld,
    }
 }
 
+
 /*
  * generated from
  * const float alpha = 2;
@@ -2515,6 +2538,7 @@ static const float aniso_filter_table[1024] = {
    0.137200, 0.136932, 0.136665, 0.136398, 0.136131, 0.135865, 0.135600, 0.135335,
 };
 
+
 const float *
 lp_build_sample_aniso_filter_table(void)
 {
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 19150f995..140568b99 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -116,8 +116,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
       if (use_border) {
          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
-      }
-      else {
+      } else {
          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
       }
    }
@@ -132,8 +131,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
       if (use_border) {
          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
-      }
-      else {
+      } else {
          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
       }
    }
@@ -174,8 +172,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
     * Ex:
     *   if (use_border) {
     *      texel = border_color;
-    *   }
-    *   else {
+    *   } else {
     *      texel = sample_texture(coord);
     *   }
     * As it is now, we always sample the texture, then selectively replace
@@ -185,7 +182,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
    if (use_border) {
       /* select texel color or border color depending on use_border. */
       const struct util_format_description *format_desc = bld->format_desc;
-      int chan;
       struct lp_type border_type = bld->texel_type;
       border_type.length = 4;
       /*
@@ -193,7 +189,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
        * get optimized away eventually by sampler_view swizzle anyway but it's
        * easier too.
        */
-      for (chan = 0; chan < 4; chan++) {
+      for (unsigned chan = 0; chan < 4; chan++) {
          unsigned chan_s;
          /* reverse-map channel... */
          if (util_format_has_stencil(format_desc)) {
@@ -201,8 +197,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
                chan_s = 0;
             else
                break;
-         }
-         else {
+         } else {
             for (chan_s = 0; chan_s < 4; chan_s++) {
                if (chan_s == format_desc->swizzle[chan]) {
                   break;
@@ -335,7 +330,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
    LLVMValueRef coord0, coord1, weight;
 
-   switch(wrap_mode) {
+   switch (wrap_mode) {
    case PIPE_TEX_WRAP_REPEAT:
       if (is_pot) {
          /* mul by size and subtract 0.5 */
@@ -351,8 +346,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
          /* repeat wrap */
          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
          coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
-      }
-      else {
+      } else {
          LLVMValueRef mask;
          if (offset) {
             offset = lp_build_int_to_float(coord_bld, offset);
@@ -696,8 +690,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
    LLVMBuilderRef builder = bld->gallivm->builder;
    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
    LLVMValueRef icoord;
-   
-   switch(wrap_mode) {
+
+   switch (wrap_mode) {
    case PIPE_TEX_WRAP_REPEAT:
       if (is_pot) {
          coord = lp_build_mul(coord_bld, coord, length_f);
@@ -706,8 +700,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
             icoord = lp_build_add(int_coord_bld, icoord, offset);
          }
          icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
-      }
-      else {
+      } else {
           if (offset) {
              offset = lp_build_int_to_float(coord_bld, offset);
              offset = lp_build_div(coord_bld, offset, length_f);
@@ -845,10 +838,10 @@ lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
     * are ordered except NOT_EQUAL which is unordered.
     */
    if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
-      res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
+      res = lp_build_cmp_ordered(texel_bld,
+                                 bld->static_sampler_state->compare_func,
                                  p, texel);
-   }
-   else {
+   } else {
       res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
                          p, texel);
    }
@@ -923,8 +916,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
          /* add cube layer to face */
          z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
-      }
-      else {
+      } else {
          z = coords[2];
       }
       lp_build_name(z, "tex.z.layer");
@@ -1038,7 +1030,6 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
    LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
    LLVMValueRef xs[4], ys[4], zs[4];
    LLVMValueRef neighbors[2][2][4];
-   int chan, texel_index;
    boolean seamless_cube_filter, accurate_cube_corners;
    unsigned chan_swiz = bld->static_texture_state->swizzle_r;
 
@@ -1049,7 +1040,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
       case 2: chan_swiz = bld->static_texture_state->swizzle_b; break;
       case 3: chan_swiz = bld->static_texture_state->swizzle_a; break;
       default:
-	 break;
+         break;
       }
    }
 
@@ -1122,15 +1113,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
             /* add cube layer to face */
             z00 = z01 = z10 = z11 = z1 =
                lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
-         }
-         else {
+         } else {
             z00 = z01 = z10 = z11 = z1 = coords[2];  /* cube face or layer */
          }
          lp_build_name(z00, "tex.z0.layer");
          lp_build_name(z1, "tex.z1.layer");
       }
-   }
-   else {
+   } else {
       struct lp_build_if_state edge_if;
       LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
       LLVMValueRef coord0, coord1, have_edge, have_corner;
@@ -1186,7 +1175,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
       /* needed for accurate corner filtering branch later, rely on 0 init */
       have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
 
-      for (texel_index = 0; texel_index < 4; texel_index++) {
+      for (unsigned texel_index = 0; texel_index < 4; texel_index++) {
          xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
          ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
          zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
@@ -1292,8 +1281,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
          LLVMBuildStore(builder, cube_layer, zs[1]);
          LLVMBuildStore(builder, cube_layer, zs[2]);
          LLVMBuildStore(builder, cube_layer, zs[3]);
-      }
-      else {
+      } else {
          LLVMBuildStore(builder, face, zs[0]);
          LLVMBuildStore(builder, face, zs[1]);
          LLVMBuildStore(builder, face, zs[2]);
@@ -1369,8 +1357,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                 neighbors[0][0],
                                 neighbors[0][1],
                                 colors_out);
-      }
-      else {
+      } else {
          LLVMValueRef cmpval0, cmpval1;
          cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
          cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
@@ -1379,8 +1366,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                            cmpval0, cmpval1);
          colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
       }
-   }
-   else {
+   } else {
       /* 2D/3D texture */
       struct lp_build_if_state corner_if;
       LLVMValueRef colors0[4], colorss[4] = { 0 };
@@ -1470,7 +1456,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 
             if (bld->static_sampler_state->compare_mode ==
                 PIPE_TEX_COMPARE_NONE) {
-               for (chan = 0; chan < 4; chan++) {
+               for (unsigned chan = 0; chan < 4; chan++) {
                   colors0[chan] = lp_build_mul(coord_bld, w00,
                                                neighbors[0][0][chan]);
                   tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
@@ -1480,8 +1466,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                   tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
                   colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
                }
-            }
-            else {
+            } else {
                LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
                cmpval00 = lp_build_sample_comparefunc(bld, coords[4],
                                                       neighbors[0][0][0]);
@@ -1512,8 +1497,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
                colors0[1] = colors0[2] = colors0[3] = colors0[0];
             }
-         }
-         else {
+         } else {
             /*
              * We don't have any weights to adjust, so instead calculate
              * the fourth texel as simply the average of the other 3.
@@ -1619,8 +1603,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
             colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
                                                       neighbors[0][0],
                                                       chan_swiz);
-         }
-         else {
+         } else {
             /* Bilinear interpolate the four samples from the 2D image / 3D slice */
             lp_build_reduce_filter_2d(texel_bld,
                                       bld->static_sampler_state->reduction_mode,
@@ -1634,8 +1617,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                       neighbors[1][1],
                                       colors0);
          }
-      }
-      else {
+      } else {
          LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
          cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
          cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
@@ -1652,8 +1634,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                          texel_bld->one, texel_bld->zero);
             colors0[3] = lp_build_select(texel_bld, cmpval00,
                                          texel_bld->one, texel_bld->zero);
-         }
-         else {
+         } else {
             colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
                                              cmpval00, cmpval01, cmpval10, cmpval11);
             colors0[1] = colors0[2] = colors0[3] = colors0[0];
@@ -1726,8 +1707,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                    colors0,
                                    colors1,
                                    colors_out);
-         }
-         else {
+         } else {
             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
@@ -1742,10 +1722,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                           0);
             colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
          }
-      }
-      else {
+      } else {
          /* 2D tex */
-         for (chan = 0; chan < 4; chan++) {
+         for (unsigned chan = 0; chan < 4; chan++) {
             colors_out[chan] = colors0[chan];
          }
       }
@@ -1761,11 +1740,11 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
        * gather on a channel which will always return 0 or 1 in any case...
        */
       if (chan_swiz == PIPE_SWIZZLE_1) {
-         for (chan = 0; chan < 4; chan++) {
+         for (unsigned chan = 0; chan < 4; chan++) {
             colors_out[chan] = texel_bld->one;
          }
       } else if (chan_swiz == PIPE_SWIZZLE_0) {
-         for (chan = 0; chan < 4; chan++) {
+         for (unsigned chan = 0; chan < 4; chan++) {
             colors_out[chan] = texel_bld->zero;
          }
       }
@@ -1803,7 +1782,6 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
    LLVMValueRef mipoff0 = NULL;
    LLVMValueRef mipoff1 = NULL;
    LLVMValueRef colors0[4], colors1[4];
-   unsigned chan;
 
    /* sample the first mipmap level */
    lp_build_mipmap_level_sizes(bld, ilevel0,
@@ -1811,19 +1789,18 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                                &row_stride0_vec, &img_stride0_vec);
    if (bld->num_mips == 1) {
       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
-   }
-   else {
+   } else {
       /* This path should work for num_lods 1 too but slightly less efficient */
       data_ptr0 = bld->base_ptr;
       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
    }
+
    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
       lp_build_sample_image_nearest(bld, size0,
                                     row_stride0_vec, img_stride0_vec,
                                     data_ptr0, mipoff0, coords, offsets,
                                     colors0);
-   }
-   else {
+   } else {
       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
       lp_build_sample_image_linear(bld, is_gather, size0, NULL,
                                    row_stride0_vec, img_stride0_vec,
@@ -1832,7 +1809,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
    }
 
    /* Store the first level's colors in the output variables */
-   for (chan = 0; chan < 4; chan++) {
+   for (unsigned chan = 0; chan < 4; chan++) {
        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
    }
 
@@ -1845,8 +1822,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
                                    lod_fpart, bld->lodf_bld.zero,
                                    "need_lerp");
-      }
-      else {
+      } else {
          /*
           * We'll do mip filtering if any of the quads (or individual
           * pixel in case of per-pixel lod) need it.
@@ -1875,8 +1851,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                                      &row_stride1_vec, &img_stride1_vec);
          if (bld->num_mips == 1) {
             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
-         }
-         else {
+         } else {
             data_ptr1 = bld->base_ptr;
             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
          }
@@ -1885,8 +1860,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                                           row_stride1_vec, img_stride1_vec,
                                           data_ptr1, mipoff1, coords, offsets,
                                           colors1);
-         }
-         else {
+         } else {
             lp_build_sample_image_linear(bld, FALSE, size1, NULL,
                                          row_stride1_vec, img_stride1_vec,
                                          data_ptr1, mipoff1, coords, offsets,
@@ -1901,7 +1875,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
                                                               bld->texel_bld.type,
                                                               lod_fpart);
 
-         for (chan = 0; chan < 4; chan++) {
+         for (unsigned chan = 0; chan < 4; chan++) {
             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
                                           colors0[chan], colors1[chan],
                                           0);
@@ -1946,7 +1920,6 @@ lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
    LLVMValueRef mipoff0 = NULL;
    LLVMValueRef mipoff1 = NULL;
    LLVMValueRef colors0[4], colors1[4];
-   unsigned chan;
 
    /* sample the first mipmap level */
    lp_build_mipmap_level_sizes(bld, ilevel0,
@@ -1954,8 +1927,7 @@ lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
                                &row_stride0_vec, &img_stride0_vec);
    if (bld->num_mips == 1) {
       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
-   }
-   else {
+   } else {
       /* This path should work for num_lods 1 too but slightly less efficient */
       data_ptr0 = bld->base_ptr;
       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
@@ -1967,7 +1939,7 @@ lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
                                 colors0);
 
    /* Store the first level's colors in the output variables */
-   for (chan = 0; chan < 4; chan++) {
+   for (unsigned chan = 0; chan < 4; chan++) {
        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
    }
 
@@ -2000,8 +1972,7 @@ lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
                                      &row_stride1_vec, &img_stride1_vec);
          if (bld->num_mips == 1) {
             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
-         }
-         else {
+         } else {
             data_ptr1 = bld->base_ptr;
             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
          }
@@ -2019,7 +1990,7 @@ lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
                                                               bld->texel_bld.type,
                                                               lod_fpart);
 
-         for (chan = 0; chan < 4; chan++) {
+         for (unsigned chan = 0; chan < 4; chan++) {
             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
                                           colors0[chan], colors1[chan],
                                           0);
@@ -2057,8 +2028,7 @@ lp_build_layer_coord(struct lp_build_sample_context *bld,
       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
       *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
       return layer;
-   }
-   else {
+   } else {
       LLVMValueRef maxlayer;
       LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
                                        bld->int_bld.one;
@@ -2088,8 +2058,10 @@ lp_build_sample_ms_offset(struct lp_build_context *int_coord_bld,
    *offset = lp_build_add(int_coord_bld, *offset, sample_offset);
 }
 
+
 #define WEIGHT_LUT_SIZE 1024
 
+
 static void
 lp_build_sample_aniso(struct lp_build_sample_context *bld,
                       unsigned img_filter,
@@ -2122,8 +2094,7 @@ lp_build_sample_aniso(struct lp_build_sample_context *bld,
                                &row_stride0_vec, &img_stride0_vec);
    if (bld->num_mips == 1) {
       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
-   }
-   else {
+   } else {
       /* This path should work for num_lods 1 too but slightly less efficient */
       data_ptr0 = bld->base_ptr;
       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
@@ -2169,14 +2140,15 @@ lp_build_sample_aniso(struct lp_build_sample_context *bld,
    scaling = lp_build_rcp(&bld->levelf_bld, scaling);
 
    if (bld->num_lods != length) {
-      if (bld->levelf_bld.type.length == 1)
+      if (bld->levelf_bld.type.length == 1) {
          scaling = lp_build_broadcast_scalar(coord_bld,
                                              scaling);
-      else
+      } else {
          scaling = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
                                                          bld->levelf_bld.type,
                                                          coord_bld->type,
                                                          scaling);
+      }
    }
 
    ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, scaling);
@@ -2564,7 +2536,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
    struct lp_derivatives cube_derivs;
 
    /*
-   printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
+   printf("%s mip %d  min %d  mag %d\n", __func__,
           mip_filter, min_filter, mag_filter);
    */
 
@@ -2582,11 +2554,10 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
     * calculate / transform derivatives.
     */
    if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
-      boolean need_derivs;
-      need_derivs = ((min_filter != mag_filter ||
-                      mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
-                      !bld->static_sampler_state->min_max_lod_equal &&
-                      !explicit_lod);
+      boolean need_derivs = ((min_filter != mag_filter ||
+                              mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
+                             !bld->static_sampler_state->min_max_lod_equal &&
+                             !explicit_lod);
       lp_build_cube_lookup(bld, coords, derivs, &cube_derivs, need_derivs);
       if (need_derivs)
          derivs = &cube_derivs;
@@ -2599,8 +2570,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
          coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL);
          /* because of seamless filtering can't add it to face (coords[2]) here. */
       }
-   }
-   else if ((target == PIPE_TEXTURE_1D_ARRAY ||
+   } else if ((target == PIPE_TEXTURE_1D_ARRAY ||
              target == PIPE_TEXTURE_2D_ARRAY) && !is_lodq) {
       coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
       coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL);
@@ -2620,11 +2590,11 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
        * too or do some other tricks to make it work).
        */
       const struct util_format_description *format_desc = bld->format_desc;
-      unsigned chan_type;
       /* not entirely sure we couldn't end up with non-valid swizzle here */
-      chan_type = format_desc->swizzle[0] <= PIPE_SWIZZLE_W ?
-                     format_desc->channel[format_desc->swizzle[0]].type :
-                     UTIL_FORMAT_TYPE_FLOAT;
+      const enum util_format_type chan_type =
+         format_desc->swizzle[0] <= PIPE_SWIZZLE_W
+           ? format_desc->channel[format_desc->swizzle[0]].type
+           : UTIL_FORMAT_TYPE_FLOAT;
       if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
          coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
                                     bld->coord_bld.zero, bld->coord_bld.one);
@@ -2647,7 +2617,8 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
       /* Need to compute lod either to choose mipmap levels or to
        * distinguish between minification/magnification with one mipmap level.
        */
-      LLVMValueRef first_level_vec = lp_build_broadcast_scalar(&bld->int_size_in_bld, first_level);
+      LLVMValueRef first_level_vec =
+         lp_build_broadcast_scalar(&bld->int_size_in_bld, first_level);
       lp_build_lod_selector(bld, is_lodq, sampler_index,
                             first_level_vec,
                             coords[0], coords[1], coords[2],
@@ -2780,8 +2751,7 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld,
       /* d/s needs special handling because both present means just sampling depth */
       if (util_format_is_depth_and_stencil(format_desc->format)) {
          chan = format_desc->swizzle[0];
-      }
-      else {
+      } else {
          chan = util_format_get_first_non_void_channel(format_desc->format);
       }
       if (chan >= 0 && chan <= PIPE_SWIZZLE_W) {
@@ -2792,8 +2762,7 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld,
             if (chan_norm) {
                min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
                max_clamp = vec4_bld.one;
-            }
-            else if (chan_pure) {
+            } else if (chan_pure) {
                /*
                 * Border color was stored as int, hence need min/max clamp
                 * only if chan has less than 32 bits..
@@ -2807,24 +2776,23 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld,
                }
             }
             /* TODO: no idea about non-pure, non-normalized! */
-         }
-         else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
+         } else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
             if (chan_norm) {
                min_clamp = vec4_bld.zero;
                max_clamp = vec4_bld.one;
-            }
-            /*
-             * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
-             * we use Z32_FLOAT_S8X24 to imply sampling depth component
-             * and ignoring stencil, which will blow up here if we try to
-             * do a uint clamp in a float texel build...
-             * And even if we had that format, mesa st also thinks using z24s8
-             * means depth sampling ignoring stencil.
-             */
-            else if (chan_pure) {
+            } else if (chan_pure) {
                /*
-                * Border color was stored as uint, hence never need min
-                * clamp, and only need max clamp if chan has less than 32 bits.
+                * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
+                * we use Z32_FLOAT_S8X24 to imply sampling depth component and
+                * ignoring stencil, which will blow up here if we try to do a
+                * uint clamp in a float texel build...  And even if we had
+                * that format, mesa st also thinks using z24s8 means depth
+                * sampling ignoring stencil.
+                */
+
+               /*
+                * Border color was stored as uint, hence never need min clamp,
+                * and only need max clamp if chan has less than 32 bits.
                 */
                unsigned chan_size = format_desc->channel[chan].size;
                if (chan_size < 32) {
@@ -2833,8 +2801,7 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld,
                }
                /* TODO: no idea about non-pure, non-normalized! */
             }
-         }
-         else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
+         } else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
             /* TODO: I have no idea what clamp this would need if any! */
          }
       }
@@ -2862,17 +2829,15 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld,
       default:
          break;
       }
-   }
-   else {
+   } else {
       /* cannot figure this out from format description */
       if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
          /* s3tc formats are always unorm */
          min_clamp = vec4_bld.zero;
          max_clamp = vec4_bld.one;
-      }
-      else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
-               format_desc->layout == UTIL_FORMAT_LAYOUT_ETC ||
-               format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
+      } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
+                 format_desc->layout == UTIL_FORMAT_LAYOUT_ETC ||
+                 format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
          switch (format_desc->format) {
          case PIPE_FORMAT_RGTC1_UNORM:
          case PIPE_FORMAT_RGTC2_UNORM:
@@ -2901,12 +2866,11 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld,
             assert(0);
             break;
          }
-      }
-      /*
-       * all others from subsampled/other group, though we don't care
-       * about yuv (and should not have any from zs here)
-       */
-      else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
+      } else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
+         /*
+          * all others from subsampled/other group, though we don't care
+          * about yuv (and should not have any from zs here)
+          */
          switch (format_desc->format) {
          case PIPE_FORMAT_R8G8_B8G8_UNORM:
          case PIPE_FORMAT_G8R8_G8B8_UNORM:
@@ -3016,8 +2980,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
                              coords, offsets,
                              ilevel0, ilevel1, lod_fpart,
                              texels);
-   }
-   else {
+   } else {
       /*
        * Could also get rid of the if-logic and always use mipmap_both, both
        * for the single lod and multi-lod case if nothing really uses this.
@@ -3050,8 +3013,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
                                    texels);
          }
          lp_build_endif(&if_ctx);
-      }
-      else {
+      } else {
          LLVMValueRef need_linear, linear_mask;
          unsigned mip_filter_for_nearest;
          struct lp_build_if_state if_ctx;
@@ -3059,8 +3021,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
          if (min_filter == PIPE_TEX_FILTER_LINEAR) {
             linear_mask = lod_positive;
             mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
-         }
-         else {
+         } else {
             linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
             mip_filter_for_nearest = mip_filter;
          }
@@ -3152,8 +3113,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
       if (bld->num_mips != int_coord_bld->type.length) {
          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
                                             perquadi_bld->type, explicit_lod, 0);
-      }
-      else {
+      } else {
          ilevel = explicit_lod;
       }
       LLVMValueRef last_level = bld->dynamic_state->last_level(bld->gallivm,
@@ -3165,13 +3125,11 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                                  first_level, last_level,
                                  ilevel, &ilevel,
                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
-   }
-   else {
+   } else {
       assert(bld->num_mips == 1);
       if (bld->static_texture_state->target != PIPE_BUFFER) {
          ilevel = first_level;
-      }
-      else {
+      } else {
          ilevel = lp_build_const_int32(bld->gallivm, 0);
       }
    }
@@ -3186,8 +3144,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
       if (out_of_bound_ret_zero) {
          z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1);
          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
-      }
-      else {
+      } else {
          z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL);
       }
    }
@@ -3311,8 +3268,7 @@ lp_build_texel_type(struct lp_type texel_type,
       } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
          texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length);
       }
-   }
-   else if (util_format_has_stencil(format_desc) &&
+   } else if (util_format_has_stencil(format_desc) &&
        !util_format_has_depth(format_desc)) {
       /* for stencil only formats, sample stencil (uint) */
       texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length);
@@ -3389,17 +3345,14 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
       lod_bias = lod;
       assert(lod);
       assert(derivs == NULL);
-   }
-   else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
+   } else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
       explicit_lod = lod;
       assert(lod);
       assert(derivs == NULL);
-   }
-   else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
+   } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
       assert(derivs);
       assert(lod == NULL);
-   }
-   else {
+   } else {
       assert(derivs == NULL);
       assert(lod == NULL);
    }
@@ -3537,15 +3490,13 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
        */
       bld.num_mips = type.length;
       bld.num_lods = type.length;
-   }
-   else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
+   } else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
        (explicit_lod || lod_bias || derivs)) {
       if ((!op_is_tex && target != PIPE_BUFFER) ||
           (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
          bld.num_mips = type.length;
          bld.num_lods = type.length;
-      }
-      else if (op_is_tex && min_img_filter != mag_img_filter) {
+      } else if (op_is_tex && min_img_filter != mag_img_filter) {
          bld.num_mips = 1;
          bld.num_lods = type.length;
       }
@@ -3555,8 +3506,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
             (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
       bld.num_mips = num_quads;
       bld.num_lods = num_quads;
-   }
-   else if (op_is_tex && min_img_filter != mag_img_filter) {
+   } else if (op_is_tex && min_img_filter != mag_img_filter) {
       bld.num_mips = 1;
       bld.num_lods = num_quads;
    }
@@ -3644,8 +3594,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
       bld.int_tex_blocksize = LLVMConstInt(i32t, res_bw, 0);
       bld.int_tex_blocksize_log2 = LLVMConstInt(i32t, util_logbase2(res_bw), 0);
       bld.int_view_blocksize = LLVMConstInt(i32t, bw, 0);
-   }
-   else {
+   } else {
       bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
                                             tex_width,
                                             LLVMConstInt(i32t, 0, 0), "");
@@ -3769,7 +3718,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
       if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
           !use_aos && util_format_fits_8unorm(bld.format_desc)) {
          debug_printf("%s: using floating point linear filtering for %s\n",
-                      __FUNCTION__, bld.format_desc->short_name);
+                      __func__, bld.format_desc->short_name);
          debug_printf("  min_img %d  mag_img %d  mip %d  target %d  seamless %d"
                       "  wraps %d  wrapt %d  wrapr %d\n",
                       derived_sampler_state.min_img_filter,
@@ -3825,8 +3774,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
                                     ilevel0, ilevel1,
                                     texel_out);
          }
-      }
-      else {
+      } else {
          struct lp_build_sample_context bld4;
          struct lp_type type4 = type;
          LLVMValueRef texelout4[4];
@@ -3887,8 +3835,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
                 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
                bld4.num_mips = type4.length;
                bld4.num_lods = type4.length;
-            }
-            else if (op_is_tex && min_img_filter != mag_img_filter) {
+            } else if (op_is_tex && min_img_filter != mag_img_filter) {
                bld4.num_mips = 1;
                bld4.num_lods = type4.length;
             }
@@ -3963,9 +3910,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
                                    lod_positive4, lod_fpart4,
                                    ilevel04, ilevel14,
                                    texelout4);
-            }
-
-            else {
+            } else {
                /* this path is currently unreachable and hence might break easily... */
                LLVMValueRef newcoords4[5];
                newcoords4[0] = s4;
@@ -4068,15 +4013,14 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
    struct lp_derivatives *deriv_ptr = NULL;
    unsigned num_param = 0;
    unsigned num_coords, num_derivs, num_offsets, layer;
-   enum lp_sampler_lod_control lod_control;
-   enum lp_sampler_op_type op_type;
    boolean need_cache = FALSE;
 
-   lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
-                    LP_SAMPLER_LOD_CONTROL_SHIFT;
+   const enum lp_sampler_lod_control lod_control =
+       (sample_key & LP_SAMPLER_LOD_CONTROL_MASK)
+       >> LP_SAMPLER_LOD_CONTROL_SHIFT;
 
-   op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
-                    LP_SAMPLER_OP_TYPE_SHIFT;
+   const enum lp_sampler_op_type op_type =
+      (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT;
 
    get_target_info(static_texture_state->target,
                    &num_coords, &num_derivs, &num_offsets, &layer);
@@ -4124,8 +4068,7 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
    if (lod_control == LP_SAMPLER_LOD_BIAS ||
        lod_control == LP_SAMPLER_LOD_EXPLICIT) {
       lod = LLVMGetParam(function, num_param++);
-   }
-   else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
+   } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
       for (unsigned i = 0; i < num_derivs; i++) {
          derivs.ddx[i] = LLVMGetParam(function, num_param++);
          derivs.ddy[i] = LLVMGetParam(function, num_param++);
@@ -4269,8 +4212,7 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
    if (lod_control == LP_SAMPLER_LOD_BIAS ||
        lod_control == LP_SAMPLER_LOD_EXPLICIT) {
       arg_types[num_param++] = LLVMTypeOf(params->lod);
-   }
-   else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
+   } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
       for (unsigned i = 0; i < num_derivs; i++) {
          arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
          arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
@@ -4339,8 +4281,7 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
    if (lod_control == LP_SAMPLER_LOD_BIAS ||
        lod_control == LP_SAMPLER_LOD_EXPLICIT) {
       args[num_args++] = params->lod;
-   }
-   else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
+   } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
       for (unsigned i = 0; i < num_derivs; i++) {
          args[num_args++] = derivs->ddx[i];
          args[num_args++] = derivs->ddy[i];
@@ -4353,7 +4294,6 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
    LLVMBasicBlockRef bb = LLVMGetInsertBlock(builder);
    LLVMValueRef inst = LLVMGetLastInstruction(bb);
    LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
-
 }
 
 
@@ -4416,8 +4356,7 @@ lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
          params->texel[i] =
             LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
       }
-   }
-   else {
+   } else {
       lp_build_sample_soa_code(gallivm,
                                static_texture_state,
                                static_sampler_state,
@@ -4644,8 +4583,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
       out = lp_build_or(&leveli_bld, out, out1);
       if (num_lods == 1) {
          out = lp_build_broadcast_scalar(&bld_int_vec4, out);
-      }
-      else {
+      } else {
          /* TODO */
          assert(0);
       }
@@ -4678,8 +4616,7 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
       LLVMValueRef num_levels;
       if (static_state->level_zero_only) {
          num_levels = bld_int_scalar.one;
-      }
-      else {
+      } else {
          LLVMValueRef last_level;
 
          last_level = dynamic_state->last_level(gallivm, context_type,
@@ -4694,6 +4631,14 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
                             lp_build_vec_type(gallivm, params->int_type),
                             num_levels);
    }
+
+   if (target == PIPE_BUFFER) {
+      struct lp_build_context bld_int;
+      lp_build_context_init(&bld_int, gallivm, params->int_type);
+
+      params->sizes_out[0] = lp_build_min(&bld_int, params->sizes_out[0],
+         lp_build_const_int_vec(gallivm, params->int_type, LP_MAX_TEXEL_BUFFER_ELEMENTS));
+   }
 }
 
 
@@ -4932,12 +4877,13 @@ lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
          outdata[chan] = lp_build_select(&texel_bld, out_of_bounds,
                                          texel_bld.zero, outdata[chan]);
       }
-      if (format_desc->swizzle[3] == PIPE_SWIZZLE_1)
+      if (format_desc->swizzle[3] == PIPE_SWIZZLE_1) {
          outdata[3] = lp_build_select(&texel_bld, out_of_bounds,
                                       texel_bld.one, outdata[3]);
-      else
+      } else {
          outdata[3] = lp_build_select(&texel_bld, out_of_bounds,
                                       texel_bld.zero, outdata[3]);
+      }
    } else if (params->img_op == LP_IMG_STORE) {
       lp_build_store_rgba_soa(gallivm, format_desc, params->type,
                               params->exec_mask, base_ptr, offset,
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index edf9cf89a..d859e6ef9 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -37,7 +37,7 @@
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
 
-#include "pipe/p_config.h"
+#include "util/detect.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_debug.h"
 #include "util/u_math.h"
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index d98d20e11..916386d31 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -36,7 +36,7 @@
  * Brian Paul, and others.
  */
 
-#include "pipe/p_config.h"
+#include "util/detect.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_debug.h"
 #include "util/u_math.h"
diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_context.c b/lib/mesa/src/gallium/auxiliary/hud/hud_context.c
index 6b6e15653..f5b11c425 100644
--- a/lib/mesa/src/gallium/auxiliary/hud/hud_context.c
+++ b/lib/mesa/src/gallium/auxiliary/hud/hud_context.c
@@ -68,12 +68,18 @@
 #include "tgsi/tgsi_text.h"
 #include "tgsi/tgsi_dump.h"
 
-/* Control the visibility of all HUD contexts */
-static boolean huds_visible = TRUE;
-static int hud_scale = 1;
+#define HUD_DEFAULT_VISIBILITY TRUE
+#define HUD_DEFAULT_SCALE 1
+#define HUD_DEFAULT_ROTATION 0
+#define HUD_DEFAULT_OPACITY 66
 
+/* Control the visibility of all HUD contexts */
+static boolean huds_visible = HUD_DEFAULT_VISIBILITY;
+static int hud_scale = HUD_DEFAULT_SCALE;
+static int hud_rotate = HUD_DEFAULT_ROTATION;
+static float hud_opacity = HUD_DEFAULT_OPACITY / 100.0f;
 
-#ifdef PIPE_OS_UNIX
+#if DETECT_OS_UNIX
 static void
 signal_visible_handler(int sig, siginfo_t *siginfo, void *context)
 {
@@ -219,6 +225,24 @@ hud_draw_string(struct hud_context *hud, unsigned x, unsigned y,
    hud->text.num_vertices += num/4;
 }
 
+static const char *
+get_float_modifier(double d)
+{
+   /* Round to 3 decimal places so as not to print trailing zeros. */
+   if (d*1000 != (int)(d*1000))
+      d = round(d * 1000) / 1000;
+
+   /* Show at least 4 digits with at most 3 decimal places, but not zeros. */
+   if (d >= 1000 || d == (int)d)
+      return "%.0f";
+   else if (d >= 100 || d*10 == (int)(d*10))
+      return "%.1f";
+   else if (d >= 10 || d*100 == (int)(d*100))
+      return "%.2f";
+   else
+      return "%.3f";
+}
+
 static void
 number_to_human_readable(double num, enum pipe_driver_query_type type,
                          char *out)
@@ -295,20 +319,9 @@ number_to_human_readable(double num, enum pipe_driver_query_type type,
       d /= divisor;
       unit++;
    }
-
-   /* Round to 3 decimal places so as not to print trailing zeros. */
-   if (d*1000 != (int)(d*1000))
-      d = round(d * 1000) / 1000;
-
-   /* Show at least 4 digits with at most 3 decimal places, but not zeros. */
-   if (d >= 1000 || d == (int)d)
-      sprintf(out, "%.0f%s", d, units[unit]);
-   else if (d >= 100 || d*10 == (int)(d*10))
-      sprintf(out, "%.1f%s", d, units[unit]);
-   else if (d >= 10 || d*100 == (int)(d*100))
-      sprintf(out, "%.2f%s", d, units[unit]);
-   else
-      sprintf(out, "%.3f%s", d, units[unit]);
+   int n = sprintf(out, get_float_modifier(d), d);
+   if (n > 0)
+      sprintf(&out[n], "%s", units[unit]);
 }
 
 static void
@@ -486,8 +499,21 @@ hud_draw_results(struct hud_context *hud, struct pipe_resource *tex)
 
    hud->fb_width = tex->width0;
    hud->fb_height = tex->height0;
-   hud->constants.two_div_fb_width = 2.0f / hud->fb_width;
-   hud->constants.two_div_fb_height = 2.0f / hud->fb_height;
+   float th = hud_rotate * (M_PI / 180.0f);
+   hud->constants.rotate[0] = cos(th);
+   hud->constants.rotate[1] = -sin(th);
+   hud->constants.rotate[2] = sin(th);
+   hud->constants.rotate[3] = cos(th);
+
+   /* invert the aspect ratio when we rotate the hud */
+   if (hud_rotate % 180 == 90) {
+      hud->constants.two_div_fb_height = 2.0f / hud->fb_width;
+      hud->constants.two_div_fb_width = 2.0f / hud->fb_height;
+   } else {
+      assert(hud_rotate % 180 == 0);
+      hud->constants.two_div_fb_width = 2.0f / hud->fb_width;
+      hud->constants.two_div_fb_height = 2.0f / hud->fb_height;
+   }
 
    cso_save_state(cso, (CSO_BIT_FRAMEBUFFER |
                         CSO_BIT_SAMPLE_MASK |
@@ -530,6 +556,7 @@ hud_draw_results(struct hud_context *hud, struct pipe_resource *tex)
    fb.zsbuf = NULL;
    fb.width = hud->fb_width;
    fb.height = hud->fb_height;
+   fb.resolve = NULL;
 
    viewport.scale[0] = 0.5f * hud->fb_width;
    viewport.scale[1] = 0.5f * hud->fb_height;
@@ -568,7 +595,7 @@ hud_draw_results(struct hud_context *hud, struct pipe_resource *tex)
       hud->constants.color[0] = 0;
       hud->constants.color[1] = 0;
       hud->constants.color[2] = 0;
-      hud->constants.color[3] = 0.666f;
+      hud->constants.color[3] = hud_opacity;
       hud->constants.translate[0] = 0;
       hud->constants.translate[1] = 0;
       hud->constants.scale[0] = hud_scale;
@@ -627,10 +654,10 @@ done:
 
    /* restore states not restored by cso */
    if (hud->st) {
-      hud->st->invalidate_state(hud->st,
-                                ST_INVALIDATE_FS_SAMPLER_VIEWS |
-                                ST_INVALIDATE_VS_CONSTBUF0 |
-                                ST_INVALIDATE_VERTEX_BUFFERS);
+      hud->st_invalidate_state(hud->st,
+                               ST_INVALIDATE_FS_SAMPLER_VIEWS |
+                               ST_INVALIDATE_VS_CONSTBUF0 |
+                               ST_INVALIDATE_VERTEX_BUFFERS);
    }
 
    pipe_surface_reference(&surf, NULL);
@@ -971,8 +998,12 @@ hud_graph_add_value(struct hud_graph *gr, double value)
    value = value > gr->pane->ceiling ? gr->pane->ceiling : value;
 
    if (gr->fd) {
+      if (gr->fd == stdout) {
+         fprintf(gr->fd, "%s: ", gr->name);
+      }
       if (fabs(value - lround(value)) > FLT_EPSILON) {
-         fprintf(gr->fd, "%f\n", value);
+         fprintf(gr->fd, get_float_modifier(value), value);
+         fprintf(gr->fd, "\n");
       }
       else {
          fprintf(gr->fd, "%" PRIu64 "\n", (uint64_t) lround(value));
@@ -1042,11 +1073,9 @@ static void strcat_without_spaces(char *dst, const char *src)
  * is a HUD variable such as "fps", or "cpu"
  */
 static void
-hud_graph_set_dump_file(struct hud_graph *gr)
+hud_graph_set_dump_file(struct hud_graph *gr, const char *hud_dump_dir, bool to_stdout)
 {
-   const char *hud_dump_dir = getenv("GALLIUM_HUD_DUMP_DIR");
-
-   if (hud_dump_dir && access(hud_dump_dir, W_OK) == 0) {
+   if (hud_dump_dir) {
       char *dump_file = malloc(strlen(hud_dump_dir) + sizeof(PATH_SEP)
                                + sizeof(gr->name));
       if (dump_file) {
@@ -1054,12 +1083,15 @@ hud_graph_set_dump_file(struct hud_graph *gr)
          strcat(dump_file, PATH_SEP);
          strcat_without_spaces(dump_file, gr->name);
          gr->fd = fopen(dump_file, "w+");
-         if (gr->fd) {
-            /* flush output after each line is written */
-            setvbuf(gr->fd, NULL, _IOLBF, 0);
-         }
          free(dump_file);
       }
+   } else if (to_stdout) {
+      gr->fd = stdout;
+   }
+
+   if (gr->fd) {
+      /* flush output after each line is written */
+      setvbuf(gr->fd, NULL, _IOLBF, 0);
    }
 }
 
@@ -1185,7 +1217,7 @@ has_pipeline_stats_query(struct pipe_screen *screen)
 
 static void
 hud_parse_env_var(struct hud_context *hud, struct pipe_screen *screen,
-                  const char *env)
+                  const char *env, unsigned period_ms)
 {
    unsigned num, i;
    char name_a[256], s[256];
@@ -1193,12 +1225,13 @@ hud_parse_env_var(struct hud_context *hud, struct pipe_screen *screen,
    struct hud_pane *pane = NULL;
    unsigned x = 10, y = 10, y_simple = 10;
    unsigned width = 251, height = 100;
-   unsigned period = 500 * 1000;  /* default period (1/2 second) */
+   unsigned period = period_ms * 1000;
    uint64_t ceiling = UINT64_MAX;
    unsigned column_width = 251;
    boolean dyn_ceiling = false;
    boolean reset_colors = false;
    boolean sort_items = false;
+   boolean to_stdout = false;
    const char *period_env;
 
    if (strncmp(env, "simple,", 7) == 0) {
@@ -1359,6 +1392,9 @@ hud_parse_env_var(struct hud_context *hud, struct pipe_screen *screen,
                                 PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
                                 0);
       }
+      else if (strcmp(name, "stdout") == 0) {
+         to_stdout = true;
+      }
       else {
          boolean processed = FALSE;
 
@@ -1509,11 +1545,14 @@ hud_parse_env_var(struct hud_context *hud, struct pipe_screen *screen,
       }
    }
 
-   LIST_FOR_EACH_ENTRY(pane, &hud->pane_list, head) {
-      struct hud_graph *gr;
+   const char *hud_dump_dir = getenv("GALLIUM_HUD_DUMP_DIR");
+   if ((hud_dump_dir && access(hud_dump_dir, W_OK) == 0) || to_stdout) {
+      LIST_FOR_EACH_ENTRY(pane, &hud->pane_list, head) {
+         struct hud_graph *gr;
 
-      LIST_FOR_EACH_ENTRY(gr, &pane->graph_list, head) {
-         hud_graph_set_dump_file(gr);
+         LIST_FOR_EACH_ENTRY(gr, &pane->graph_list, head) {
+            hud_graph_set_dump_file(gr, hud_dump_dir, to_stdout);
+         }
       }
    }
 }
@@ -1570,6 +1609,7 @@ print_help(struct pipe_screen *screen)
    puts("  Example: GALLIUM_HUD=\".w256.h64.x1600.y520.d.c1000fps+cpu,.datom-count\"");
    puts("");
    puts("  Available names:");
+   puts("    stdout (prints the counters value to stdout)");
    puts("    fps");
    puts("    frametime");
    puts("    cpu");
@@ -1660,7 +1700,8 @@ hud_unset_draw_context(struct hud_context *hud)
 
 static bool
 hud_set_draw_context(struct hud_context *hud, struct cso_context *cso,
-                     struct st_context_iface *st)
+                     struct st_context *st,
+                     hud_st_invalidate_state_func st_invalidate_state)
 {
    struct pipe_context *pipe = cso_get_pipe_context(cso);
 
@@ -1668,6 +1709,7 @@ hud_set_draw_context(struct hud_context *hud, struct cso_context *cso,
    hud->pipe = pipe;
    hud->cso = cso;
    hud->st = st;
+   hud->st_invalidate_state = st_invalidate_state;
 
    struct pipe_sampler_view view_templ;
    u_sampler_view_default_template(
@@ -1721,15 +1763,20 @@ hud_set_draw_context(struct hud_context *hud, struct cso_context *cso,
          "DCL OUT[2], GENERIC[0]\n" /* texcoord */
          /* [0] = color,
           * [1] = (2/fb_width, 2/fb_height, xoffset, yoffset)
-          * [2] = (xscale, yscale, 0, 0) */
-         "DCL CONST[0][0..2]\n"
-         "DCL TEMP[0]\n"
+          * [2] = (xscale, yscale, 0, 0)
+          * [3] = rotation_matrix */
+         "DCL CONST[0][0..3]\n"
+         "DCL TEMP[0..2]\n"
          "IMM[0] FLT32 { -1, 0, 0, 1 }\n"
 
          /* v = in * (xscale, yscale) + (xoffset, yoffset) */
          "MAD TEMP[0].xy, IN[0], CONST[0][2].xyyy, CONST[0][1].zwww\n"
-         /* pos = v * (2 / fb_width, 2 / fb_height) - (1, 1) */
-         "MAD OUT[0].xy, TEMP[0], CONST[0][1].xyyy, IMM[0].xxxx\n"
+         /* v = v * (2 / fb_width, 2 / fb_height) - (1, 1) */
+         "MAD TEMP[1].xy, TEMP[0], CONST[0][1].xyyy, IMM[0].xxxx\n"
+
+         /* pos = rotation_matrix * v */
+         "MUL TEMP[2].xyzw, TEMP[1].xyxy, CONST[0][3].xyzw\n"
+         "ADD OUT[0].xy, TEMP[2].xzzz, TEMP[2].ywww\n"
          "MOV OUT[0].zw, IMM[0]\n"
 
          "MOV OUT[1], CONST[0][0]\n"
@@ -1758,16 +1805,21 @@ hud_set_draw_context(struct hud_context *hud, struct cso_context *cso,
          "DCL OUT[1], GENERIC[0]\n" /* texcoord */
          /* [0] = color,
           * [1] = (2/fb_width, 2/fb_height, xoffset, yoffset)
-          * [2] = (xscale, yscale, 0, 0) */
-         "DCL CONST[0][0..2]\n"
-         "DCL TEMP[0]\n"
+          * [2] = (xscale, yscale, 0, 0)
+          * [3] = rotation_matrix */
+         "DCL CONST[0][0..3]\n"
+         "DCL TEMP[0..2]\n"
          "IMM[0] FLT32 { -1, 0, 0, 1 }\n"
          "IMM[1] FLT32 { 0.0078125, 0.00390625, 1, 1 }\n" // 1.0 / 128, 1.0 / 256, 1, 1
 
          /* v = in * (xscale, yscale) + (xoffset, yoffset) */
          "MAD TEMP[0].xy, IN[0], CONST[0][2].xyyy, CONST[0][1].zwww\n"
          /* pos = v * (2 / fb_width, 2 / fb_height) - (1, 1) */
-         "MAD OUT[0].xy, TEMP[0], CONST[0][1].xyyy, IMM[0].xxxx\n"
+         "MAD TEMP[1].xy, TEMP[0], CONST[0][1].xyyy, IMM[0].xxxx\n"
+
+         /* pos = rotation_matrix * v */
+         "MUL TEMP[2].xyzw, TEMP[1].xyxy, CONST[0][3].xyzw\n"
+         "ADD OUT[0].xy, TEMP[2].xzzz, TEMP[2].ywww\n"
          "MOV OUT[0].zw, IMM[0]\n"
 
          "MUL OUT[1], IN[1], IMM[1]\n"
@@ -1831,8 +1883,9 @@ hud_set_record_context(struct hud_context *hud, struct pipe_context *pipe)
  * record queries in one context and draw them in another.
  */
 struct hud_context *
-hud_create(struct cso_context *cso, struct st_context_iface *st,
-           struct hud_context *share)
+hud_create(struct cso_context *cso, struct hud_context *share,
+           struct st_context *st,
+           hud_st_invalidate_state_func st_invalidate_state)
 {
    const char *share_env = debug_get_option("GALLIUM_HUD_SHARE", NULL);
    unsigned record_ctx = 0, draw_ctx = 0;
@@ -1856,7 +1909,7 @@ hud_create(struct cso_context *cso, struct st_context_iface *st,
 
       if (context_id == draw_ctx) {
          assert(!share->pipe);
-         hud_set_draw_context(share, cso, st);
+         hud_set_draw_context(share, cso, st, st_invalidate_state);
       }
 
       return share;
@@ -1865,16 +1918,36 @@ hud_create(struct cso_context *cso, struct st_context_iface *st,
    struct pipe_screen *screen = cso_get_pipe_context(cso)->screen;
    struct hud_context *hud;
    unsigned i;
-   const char *env = debug_get_option("GALLIUM_HUD", NULL);
-#ifdef PIPE_OS_UNIX
+   unsigned default_period_ms = 500;/* default period (1/2 second) */
+   const char *show_fps = getenv("LIBGL_SHOW_FPS");
+   bool emulate_libgl_show_fps = false;
+   if (show_fps) {
+      default_period_ms = atoi(show_fps) * 1000;
+      if (default_period_ms)
+         emulate_libgl_show_fps = true;
+      else
+         default_period_ms = 500;
+   }
+   const char *env = debug_get_option("GALLIUM_HUD",
+      emulate_libgl_show_fps ? "stdout,fps" : NULL);
+#if DETECT_OS_UNIX
    unsigned signo = debug_get_num_option("GALLIUM_HUD_TOGGLE_SIGNAL", 0);
    static boolean sig_handled = FALSE;
    struct sigaction action;
 
    memset(&action, 0, sizeof(action));
 #endif
-   huds_visible = debug_get_bool_option("GALLIUM_HUD_VISIBLE", TRUE);
-   hud_scale = debug_get_num_option("GALLIUM_HUD_SCALE", 1);
+   huds_visible = debug_get_bool_option("GALLIUM_HUD_VISIBLE", !emulate_libgl_show_fps);
+   hud_opacity = debug_get_num_option("GALLIUM_HUD_OPACITY", HUD_DEFAULT_OPACITY) / 100.0f;
+   hud_scale = debug_get_num_option("GALLIUM_HUD_SCALE", HUD_DEFAULT_SCALE);
+   hud_rotate = debug_get_num_option("GALLIUM_HUD_ROTATION", HUD_DEFAULT_ROTATION) % 360;
+   if (hud_rotate < 0) {
+      hud_rotate += 360;
+   }
+   if (hud_rotate % 90 != 0) {
+      fprintf(stderr, "gallium_hud: rotation must be a multiple of 90. Falling back to 0.\n");
+      hud_rotate = 0;
+   }
 
    if (!env || !*env)
       return NULL;
@@ -1953,7 +2026,7 @@ hud_create(struct cso_context *cso, struct st_context_iface *st,
    list_inithead(&hud->pane_list);
 
    /* setup sig handler once for all hud contexts */
-#ifdef PIPE_OS_UNIX
+#if DETECT_OS_UNIX
    if (!sig_handled && signo != 0) {
       action.sa_sigaction = &signal_visible_handler;
       action.sa_flags = SA_SIGINFO;
@@ -1971,9 +2044,9 @@ hud_create(struct cso_context *cso, struct st_context_iface *st,
    if (record_ctx == 0)
       hud_set_record_context(hud, cso_get_pipe_context(cso));
    if (draw_ctx == 0)
-      hud_set_draw_context(hud, cso, st);
+      hud_set_draw_context(hud, cso, st, st_invalidate_state);
 
-   hud_parse_env_var(hud, screen, env);
+   hud_parse_env_var(hud, screen, env, default_period_ms);
    return hud;
 }
 
diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_context.h b/lib/mesa/src/gallium/auxiliary/hud/hud_context.h
index ed5dd5dbf..ad495970d 100644
--- a/lib/mesa/src/gallium/auxiliary/hud/hud_context.h
+++ b/lib/mesa/src/gallium/auxiliary/hud/hud_context.h
@@ -33,11 +33,15 @@ struct cso_context;
 struct pipe_context;
 struct pipe_resource;
 struct util_queue_monitoring;
-struct st_context_iface;
+struct st_context;
+
+typedef void (*hud_st_invalidate_state_func)(struct st_context *st,
+                                             unsigned flags);
 
 struct hud_context *
-hud_create(struct cso_context *cso, struct st_context_iface *st,
-           struct hud_context *share);
+hud_create(struct cso_context *cso, struct hud_context *share,
+           struct st_context *st,
+           hud_st_invalidate_state_func st_invalidate_state);
 
 void
 hud_destroy(struct hud_context *hud, struct cso_context *cso);
diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_cpu.c b/lib/mesa/src/gallium/auxiliary/hud/hud_cpu.c
index 820e7d710..a4313b2c7 100644
--- a/lib/mesa/src/gallium/auxiliary/hud/hud_cpu.c
+++ b/lib/mesa/src/gallium/auxiliary/hud/hud_cpu.c
@@ -30,18 +30,18 @@
 
 #include "hud/hud_private.h"
 #include "util/os_time.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 #include "util/u_memory.h"
 #include "util/u_queue.h"
 #include <stdio.h>
 #include <inttypes.h>
-#ifdef PIPE_OS_WINDOWS
+#if DETECT_OS_WINDOWS
 #include <windows.h>
 #endif
-#if defined(PIPE_OS_BSD)
+#if DETECT_OS_BSD
 #include <sys/types.h>
 #include <sys/sysctl.h>
-#if defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
+#if DETECT_OS_NETBSD || DETECT_OS_OPENBSD
 #include <sys/sched.h>
 #else
 #include <sys/resource.h>
@@ -49,7 +49,7 @@
 #endif
 
 
-#ifdef PIPE_OS_WINDOWS
+#if DETECT_OS_WINDOWS
 
 static inline uint64_t
 filetime_to_scalar(FILETIME ft)
@@ -95,12 +95,12 @@ get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
    return TRUE;
 }
 
-#elif defined(PIPE_OS_BSD)
+#elif DETECT_OS_BSD
 
 static boolean
 get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
 {
-#if defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
+#if DETECT_OS_NETBSD || DETECT_OS_OPENBSD
    uint64_t cp_time[CPUSTATES];
 #else
    long cp_time[CPUSTATES];
@@ -110,12 +110,12 @@ get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
    if (cpu_index == ALL_CPUS) {
       len = sizeof(cp_time);
 
-#if defined(PIPE_OS_NETBSD)
+#if DETECT_OS_NETBSD
       int mib[] = { CTL_KERN, KERN_CP_TIME };
 
       if (sysctl(mib, ARRAY_SIZE(mib), cp_time, &len, NULL, 0) == -1)
          return FALSE;
-#elif defined(PIPE_OS_OPENBSD)
+#elif DETECT_OS_OPENBSD
       int mib[] = { CTL_KERN, KERN_CPTIME };
       long sum_cp_time[CPUSTATES];
 
@@ -130,13 +130,13 @@ get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
          return FALSE;
 #endif
    } else {
-#if defined(PIPE_OS_NETBSD)
+#if DETECT_OS_NETBSD
       int mib[] = { CTL_KERN, KERN_CP_TIME, cpu_index };
 
       len = sizeof(cp_time);
       if (sysctl(mib, ARRAY_SIZE(mib), cp_time, &len, NULL, 0) == -1)
          return FALSE;
-#elif defined(PIPE_OS_OPENBSD)
+#elif DETECT_OS_OPENBSD
       int mib[] = { CTL_KERN, KERN_CPTIME2, cpu_index };
 
       len = sizeof(cp_time);
diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_cpufreq.c b/lib/mesa/src/gallium/auxiliary/hud/hud_cpufreq.c
index 9c7b90f73..bf1be1e9b 100644
--- a/lib/mesa/src/gallium/auxiliary/hud/hud_cpufreq.c
+++ b/lib/mesa/src/gallium/auxiliary/hud/hud_cpufreq.c
@@ -36,7 +36,7 @@
 #include "hud/hud_private.h"
 #include "util/list.h"
 #include "util/os_time.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 #include "util/u_memory.h"
 #include <stdio.h>
 #include <unistd.h>
diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_diskstat.c b/lib/mesa/src/gallium/auxiliary/hud/hud_diskstat.c
index b5d9710ef..55bb9023a 100644
--- a/lib/mesa/src/gallium/auxiliary/hud/hud_diskstat.c
+++ b/lib/mesa/src/gallium/auxiliary/hud/hud_diskstat.c
@@ -35,7 +35,7 @@
 #include "hud/hud_private.h"
 #include "util/list.h"
 #include "util/os_time.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 #include "util/u_memory.h"
 #include "util/u_string.h"
 #include <stdio.h>
diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_nic.c b/lib/mesa/src/gallium/auxiliary/hud/hud_nic.c
index b10247e44..b54af2add 100644
--- a/lib/mesa/src/gallium/auxiliary/hud/hud_nic.c
+++ b/lib/mesa/src/gallium/auxiliary/hud/hud_nic.c
@@ -35,7 +35,7 @@
 #include "hud/hud_private.h"
 #include "util/list.h"
 #include "util/os_time.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 #include "util/u_memory.h"
 #include "util/u_string.h"
 #include <stdio.h>
diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_private.h b/lib/mesa/src/gallium/auxiliary/hud/hud_private.h
index 3604760c7..0c3fbbba4 100644
--- a/lib/mesa/src/gallium/auxiliary/hud/hud_private.h
+++ b/lib/mesa/src/gallium/auxiliary/hud/hud_private.h
@@ -32,6 +32,7 @@
 #include "pipe/p_state.h"
 #include "util/list.h"
 #include "hud/font.h"
+#include "hud/hud_context.h"
 #include "cso_cache/cso_context.h"
 
 enum hud_counter {
@@ -51,7 +52,10 @@ struct hud_context {
    /* Context where the HUD is drawn: */
    struct pipe_context *pipe;
    struct cso_context *cso;
-   struct st_context_iface *st;
+
+   /* For notifying st_context to rebind states that we clobbered. */
+   struct st_context *st;
+   hud_st_invalidate_state_func st_invalidate_state;
 
    struct hud_batch_query_context *batch_query;
    struct list_head pane_list;
@@ -79,6 +83,7 @@ struct hud_context {
       float translate[2];
       float scale[2];
       float padding[2];
+      float rotate[4];
    } constants;
    struct pipe_constant_buffer constbuf;
 
diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_sensors_temp.c b/lib/mesa/src/gallium/auxiliary/hud/hud_sensors_temp.c
index f99752f38..96876d3a7 100644
--- a/lib/mesa/src/gallium/auxiliary/hud/hud_sensors_temp.c
+++ b/lib/mesa/src/gallium/auxiliary/hud/hud_sensors_temp.c
@@ -32,7 +32,7 @@
 #include "hud/hud_private.h"
 #include "util/list.h"
 #include "util/os_time.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 #include "util/u_memory.h"
 #include "util/u_string.h"
 #include <stdio.h>
diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_indices.c b/lib/mesa/src/gallium/auxiliary/indices/u_indices.c
index 53dbb760d..e43072351 100644
--- a/lib/mesa/src/gallium/auxiliary/indices/u_indices.c
+++ b/lib/mesa/src/gallium/auxiliary/indices/u_indices.c
@@ -57,6 +57,10 @@ u_index_prim_type_convert(unsigned hw_mask, enum pipe_prim_type prim, bool pv_ma
    case PIPE_PRIM_TRIANGLE_FAN:
    case PIPE_PRIM_QUADS:
    case PIPE_PRIM_QUAD_STRIP:
+      if ((hw_mask & (1<<PIPE_PRIM_QUADS)) && pv_matches)
+         return PIPE_PRIM_QUADS;
+      else
+         return PIPE_PRIM_TRIANGLES;
    case PIPE_PRIM_POLYGON:
       return PIPE_PRIM_TRIANGLES;
    case PIPE_PRIM_LINES_ADJACENCY:
@@ -140,8 +144,9 @@ u_index_translator(unsigned hw_mask,
 
       return U_TRANSLATE_MEMCPY;
    }
-   *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim_restart][prim];
    *out_prim = u_index_prim_type_convert(hw_mask, prim, in_pv == out_pv);
+   *out_translate = (*out_prim == PIPE_PRIM_QUADS ? translate_quads : translate)
+      [in_idx][out_idx][in_pv][out_pv][prim_restart][prim];
    *out_nr = u_index_count_converted_indices(hw_mask, in_pv == out_pv, prim, nr);
 
    return ret;
@@ -170,9 +175,9 @@ u_index_count_converted_indices(unsigned hw_mask, bool pv_matches, enum pipe_pri
    case PIPE_PRIM_TRIANGLE_FAN:
       return (nr - 2) * 3;
    case PIPE_PRIM_QUADS:
-      return (nr / 4) * 6;
+      return ((hw_mask & (1<<PIPE_PRIM_QUADS)) && pv_matches) ? nr : (nr / 4) * 6;
    case PIPE_PRIM_QUAD_STRIP:
-      return (nr - 2) * 3;
+      return ((hw_mask & (1<<PIPE_PRIM_QUADS)) && pv_matches) ? (nr - 2) * 2 : (nr - 2) * 3;
    case PIPE_PRIM_POLYGON:
       return (nr - 2) * 3;
    case PIPE_PRIM_LINES_ADJACENCY:
@@ -237,9 +242,11 @@ u_index_generator(unsigned hw_mask,
    if ((hw_mask & (1<<prim)) &&
        (in_pv == out_pv)) {
 
-      *out_generate = generate[out_idx][in_pv][out_pv][PIPE_PRIM_POINTS];
+      *out_generate = (*out_prim == PIPE_PRIM_QUADS ? generate_quads : generate)
+         [out_idx][in_pv][out_pv][PIPE_PRIM_POINTS];
       return U_GENERATE_LINEAR;
    }
-   *out_generate = generate[out_idx][in_pv][out_pv][prim];
+   *out_generate = (*out_prim == PIPE_PRIM_QUADS ? generate_quads : generate)
+      [out_idx][in_pv][out_pv][prim];
    return prim == PIPE_PRIM_LINE_LOOP ? U_GENERATE_ONE_OFF : U_GENERATE_REUSABLE;
 }
diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_indices_gen.py b/lib/mesa/src/gallium/auxiliary/indices/u_indices_gen.py
index de3bf9570..03640699c 100644
--- a/lib/mesa/src/gallium/auxiliary/indices/u_indices_gen.py
+++ b/lib/mesa/src/gallium/auxiliary/indices/u_indices_gen.py
@@ -51,6 +51,8 @@ PRIMS=('points',
        'trisadj',
        'tristripadj')
 
+OUT_TRIS, OUT_QUADS = 'tris', 'quads'
+
 LONGPRIMS=('PIPE_PRIM_POINTS',
            'PIPE_PRIM_LINES',
            'PIPE_PRIM_LINE_STRIP',
@@ -91,6 +93,9 @@ def prolog(f: 'T.TextIO') -> None:
 static u_translate_func translate[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PR_COUNT][PRIM_COUNT];
 static u_generate_func  generate[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT];
 
+static u_translate_func translate_quads[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PR_COUNT][PRIM_COUNT];
+static u_generate_func  generate_quads[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT];
+
 
 ''')
 
@@ -121,13 +126,21 @@ def do_tri(f: 'T.TextIO', intype, outtype, ptr, v0, v1, v2, inpv, outpv ):
     else:
         shape(f, intype, outtype, ptr, v2, v0, v1 )
 
-def do_quad(f: 'T.TextIO', intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv ):
-    if inpv == LAST:
-        do_tri(f, intype, outtype, ptr+'+0',  v0, v1, v3, inpv, outpv );
-        do_tri(f, intype, outtype, ptr+'+3',  v1, v2, v3, inpv, outpv );
+def do_quad(f: 'T.TextIO', intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv, out_prim ):
+    if out_prim == OUT_TRIS:
+        if inpv == LAST:
+            do_tri(f, intype, outtype, ptr+'+0',  v0, v1, v3, inpv, outpv );
+            do_tri(f, intype, outtype, ptr+'+3',  v1, v2, v3, inpv, outpv );
+        else:
+            do_tri(f, intype, outtype, ptr+'+0',  v0, v1, v2, inpv, outpv );
+            do_tri(f, intype, outtype, ptr+'+3',  v0, v2, v3, inpv, outpv );
     else:
-        do_tri(f, intype, outtype, ptr+'+0',  v0, v1, v2, inpv, outpv );
-        do_tri(f, intype, outtype, ptr+'+3',  v0, v2, v3, inpv, outpv );
+        if inpv == outpv:
+            shape(f, intype, outtype, ptr, v0, v1, v2, v3)
+        elif inpv == FIRST:
+            shape(f, intype, outtype, ptr, v1, v2, v3, v0)
+        else:
+            shape(f, intype, outtype, ptr, v3, v0, v1, v2)
 
 def do_lineadj(f: 'T.TextIO', intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv ):
     if inpv == outpv:
@@ -141,14 +154,14 @@ def do_triadj(f: 'T.TextIO', intype, outtype, ptr, v0, v1, v2, v3, v4, v5, inpv,
     else:
         shape(f, intype, outtype, ptr, v4, v5, v0, v1, v2, v3 )
 
-def name(intype, outtype, inpv, outpv, pr, prim):
+def name(intype, outtype, inpv, outpv, pr, prim, out_prim):
     if intype == GENERATE:
-        return 'generate_' + prim + '_' + outtype + '_' + inpv + '2' + outpv
+        return 'generate_' + prim + '_' + outtype + '_' + inpv + '2' + outpv + '_' + str(out_prim)
     else:
-        return 'translate_' + prim + '_' + intype + '2' + outtype + '_' + inpv + '2' + outpv + '_' + pr
+        return 'translate_' + prim + '_' + intype + '2' + outtype + '_' + inpv + '2' + outpv + '_' + pr + '_' + str(out_prim)
 
-def preamble(f: 'T.TextIO', intype, outtype, inpv, outpv, pr, prim):
-    f.write('static void ' + name( intype, outtype, inpv, outpv, pr, prim ) + '(\n')
+def preamble(f: 'T.TextIO', intype, outtype, inpv, outpv, pr, prim, out_prim):
+    f.write('static void ' + name( intype, outtype, inpv, outpv, pr, prim, out_prim ) + '(\n')
     if intype != GENERATE:
         f.write('    const void * restrict _in,\n')
     f.write('    unsigned start,\n')
@@ -186,28 +199,28 @@ def prim_restart(f: 'T.TextIO', in_verts, out_verts, out_prims, close_func = Non
         f.write('      }\n')
 
 def points(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='points')
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='points')
     f.write('  for (i = start, j = 0; j < out_nr; j++, i++) {\n')
     do_point(f, intype, outtype, 'out+j',  'i' );
     f.write('   }\n')
     postamble(f)
 
 def lines(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='lines')
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='lines')
     f.write('  for (i = start, j = 0; j < out_nr; j+=2, i+=2) {\n')
     do_line(f,  intype, outtype, 'out+j',  'i', 'i+1', inpv, outpv );
     f.write('   }\n')
     postamble(f)
 
 def linestrip(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='linestrip')
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='linestrip')
     f.write('  for (i = start, j = 0; j < out_nr; j+=2, i++) {\n')
     do_line(f, intype, outtype, 'out+j',  'i', 'i+1', inpv, outpv );
     f.write('   }\n')
     postamble(f)
 
 def lineloop(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='lineloop')
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='lineloop')
     f.write('  unsigned end = start;\n')
     f.write('  for (i = start, j = 0; j < out_nr - 2; j+=2, i++) {\n')
     if pr == PRENABLE:
@@ -226,7 +239,7 @@ def lineloop(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
     postamble(f)
 
 def tris(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='tris')
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='tris')
     f.write('  for (i = start, j = 0; j < out_nr; j+=3, i+=3) {\n')
     do_tri(f, intype, outtype, 'out+j',  'i', 'i+1', 'i+2', inpv, outpv );
     f.write('   }\n')
@@ -234,7 +247,7 @@ def tris(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
 
 
 def tristrip(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='tristrip')
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='tristrip')
     f.write('  for (i = start, j = 0; j < out_nr; j+=3, i++) {\n')
     if inpv == FIRST:
         do_tri(f, intype, outtype, 'out+j',  'i', 'i+1+(i&1)', 'i+2-(i&1)', inpv, outpv );
@@ -245,7 +258,7 @@ def tristrip(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
 
 
 def trifan(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='trifan')
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='trifan')
     f.write('  for (i = start, j = 0; j < out_nr; j+=3, i++) {\n')
 
     if pr == PRENABLE:
@@ -264,7 +277,7 @@ def trifan(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
 
 
 def polygon(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='polygon')
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='polygon')
     f.write('  for (i = start, j = 0; j < out_nr; j+=3, i++) {\n')
     if pr == PRENABLE:
         def close_func(index):
@@ -279,33 +292,43 @@ def polygon(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
     postamble(f)
 
 
-def quads(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='quads')
-    f.write('  for (i = start, j = 0; j < out_nr; j+=6, i+=4) {\n')
-    if pr == PRENABLE:
+def quads(f: 'T.TextIO', intype, outtype, inpv, outpv, pr, out_prim):
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=out_prim, prim='quads')
+    if out_prim == OUT_TRIS:
+        f.write('  for (i = start, j = 0; j < out_nr; j+=6, i+=4) {\n')
+    else:
+        f.write('  for (i = start, j = 0; j < out_nr; j+=4, i+=4) {\n')
+    if pr == PRENABLE and out_prim == OUT_TRIS:
         prim_restart(f, 4, 3, 2)
+    elif pr == PRENABLE:
+        prim_restart(f, 4, 4, 1)
 
-    do_quad(f, intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv );
+    do_quad(f, intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv, out_prim );
     f.write('   }\n')
     postamble(f)
 
 
-def quadstrip(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='quadstrip')
-    f.write('  for (i = start, j = 0; j < out_nr; j+=6, i+=2) {\n')
-    if pr == PRENABLE:
+def quadstrip(f: 'T.TextIO', intype, outtype, inpv, outpv, pr, out_prim):
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=out_prim, prim='quadstrip')
+    if out_prim == OUT_TRIS:
+        f.write('  for (i = start, j = 0; j < out_nr; j+=6, i+=2) {\n')
+    else:
+        f.write('  for (i = start, j = 0; j < out_nr; j+=4, i+=2) {\n')
+    if pr == PRENABLE and out_prim == OUT_TRIS:
         prim_restart(f, 4, 3, 2)
+    elif pr == PRENABLE:
+        prim_restart(f, 4, 4, 1)
 
     if inpv == LAST:
-        do_quad(f, intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3', inpv, outpv );
+        do_quad(f, intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3', inpv, outpv, out_prim );
     else:
-        do_quad(f, intype, outtype, 'out+j', 'i+0', 'i+1', 'i+3', 'i+2', inpv, outpv );
+        do_quad(f, intype, outtype, 'out+j', 'i+0', 'i+1', 'i+3', 'i+2', inpv, outpv, out_prim );
     f.write('   }\n')
     postamble(f)
 
 
 def linesadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='linesadj')
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='linesadj')
     f.write('  for (i = start, j = 0; j < out_nr; j+=4, i+=4) {\n')
     do_lineadj(f, intype, outtype, 'out+j',  'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv )
     f.write('  }\n')
@@ -313,7 +336,7 @@ def linesadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
 
 
 def linestripadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='linestripadj')
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='linestripadj')
     f.write('  for (i = start, j = 0; j < out_nr; j+=4, i++) {\n')
     do_lineadj(f, intype, outtype, 'out+j',  'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv )
     f.write('  }\n')
@@ -321,7 +344,7 @@ def linestripadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
 
 
 def trisadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='trisadj')
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='trisadj')
     f.write('  for (i = start, j = 0; j < out_nr; j+=6, i+=6) {\n')
     do_triadj(f, intype, outtype, 'out+j',  'i+0', 'i+1', 'i+2', 'i+3',
               'i+4', 'i+5', inpv, outpv )
@@ -330,7 +353,7 @@ def trisadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
 
 
 def tristripadj(f: 'T.TextIO', intype, outtype, inpv, outpv, pr):
-    preamble(f, intype, outtype, inpv, outpv, pr, prim='tristripadj')
+    preamble(f, intype, outtype, inpv, outpv, pr, out_prim=OUT_TRIS, prim='tristripadj')
     f.write('  for (i = start, j = 0; j < out_nr; i+=2, j+=6) {\n')
     f.write('    if (i % 4 == 0) {\n')
     f.write('      /* even triangle */\n')
@@ -357,31 +380,44 @@ def emit_funcs(f: 'T.TextIO') -> None:
         tris(f, intype, outtype, inpv, outpv, pr)
         tristrip(f, intype, outtype, inpv, outpv, pr)
         trifan(f, intype, outtype, inpv, outpv, pr)
-        quads(f, intype, outtype, inpv, outpv, pr)
-        quadstrip(f, intype, outtype, inpv, outpv, pr)
+        quads(f, intype, outtype, inpv, outpv, pr, OUT_TRIS)
+        quadstrip(f, intype, outtype, inpv, outpv, pr, OUT_TRIS)
         polygon(f, intype, outtype, inpv, outpv, pr)
         linesadj(f, intype, outtype, inpv, outpv, pr)
         linestripadj(f, intype, outtype, inpv, outpv, pr)
         trisadj(f, intype, outtype, inpv, outpv, pr)
         tristripadj(f, intype, outtype, inpv, outpv, pr)
 
-def init(f: 'T.TextIO', intype, outtype, inpv, outpv, pr, prim):
+    for intype, outtype, inpv, outpv, pr in itertools.product(
+            INTYPES, OUTTYPES, [FIRST, LAST], [FIRST, LAST], [PRDISABLE, PRENABLE]):
+        if pr == PRENABLE and intype == GENERATE:
+            continue
+        quads(f, intype, outtype, inpv, outpv, pr, OUT_QUADS)
+        quadstrip(f, intype, outtype, inpv, outpv, pr, OUT_QUADS)
+
+def init(f: 'T.TextIO', intype, outtype, inpv, outpv, pr, prim, out_prim=OUT_TRIS):
+    generate_name = 'generate'
+    translate_name = 'translate'
+    if out_prim == OUT_QUADS:
+        generate_name = 'generate_quads'
+        translate_name = 'translate_quads'
+
     if intype == GENERATE:
-        f.write('generate[' +
+        f.write(f'{generate_name}[' +
                 outtype_idx[outtype] +
                 '][' + pv_idx[inpv] +
                 '][' + pv_idx[outpv] +
                 '][' + longprim[prim] +
-                '] = ' + name( intype, outtype, inpv, outpv, pr, prim ) + ';\n')
+                '] = ' + name( intype, outtype, inpv, outpv, pr, prim, out_prim ) + ';\n')
     else:
-        f.write('translate[' +
+        f.write(f'{translate_name}[' +
                 intype_idx[intype] +
                 '][' + outtype_idx[outtype] +
                 '][' + pv_idx[inpv] +
                 '][' + pv_idx[outpv] +
                 '][' + pr_idx[pr] +
                 '][' + longprim[prim] +
-                '] = ' + name( intype, outtype, inpv, outpv, pr, prim ) + ';\n')
+                '] = ' + name( intype, outtype, inpv, outpv, pr, prim, out_prim ) + ';\n')
 
 
 def emit_all_inits(f: 'T.TextIO'):
@@ -389,6 +425,10 @@ def emit_all_inits(f: 'T.TextIO'):
             INTYPES, OUTTYPES, PVS, PVS, PRS, PRIMS):
         init(f,intype, outtype, inpv, outpv, pr, prim)
 
+    for intype, outtype, inpv, outpv, pr, prim in itertools.product(
+            INTYPES, OUTTYPES, PVS, PVS, PRS, ['quads', 'quadstrip']):
+        init(f,intype, outtype, inpv, outpv, pr, prim, OUT_QUADS)
+
 def emit_init(f: 'T.TextIO'):
     f.write('void u_index_init( void )\n')
     f.write('{\n')
diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.c b/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.c
index dcbc9052d..526e7b644 100644
--- a/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.c
+++ b/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.c
@@ -130,9 +130,15 @@ primconvert_init_draw(struct primconvert_context *pc,
       return false;
 
    util_draw_init_info(new_info);
-   new_info->index_bounds_valid = info->index_bounds_valid;
-   new_info->min_index = info->min_index;
-   new_info->max_index = info->max_index;
+
+   /* Because we've changed the index buffer, the original min_index/max_index
+    * for the draw are no longer valid. That's ok, but we need to tell drivers
+    * so they don't optimize incorrectly.
+    */
+   new_info->index_bounds_valid = false;
+   new_info->min_index = 0;
+   new_info->max_index = ~0;
+
    new_info->start_instance = info->start_instance;
    new_info->instance_count = info->instance_count;
    new_info->primitive_restart = info->primitive_restart;
diff --git a/lib/mesa/src/gallium/auxiliary/nir/nir_helpers.c b/lib/mesa/src/gallium/auxiliary/nir/nir_helpers.c
deleted file mode 100644
index 1b5b7968e..000000000
--- a/lib/mesa/src/gallium/auxiliary/nir/nir_helpers.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2021 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- **************************************************************************/
-
-#include "nir_helpers.h"
-#include "nir_xfb_info.h"
-
-void
-nir_gather_stream_output_info(nir_shader *nir,
-                              struct pipe_stream_output_info *so)
-{
-   int slot_to_register[NUM_TOTAL_VARYING_SLOTS];
-   nir_xfb_info *info = nir_gather_xfb_info_from_intrinsics(nir, slot_to_register);
-
-   memset(so, 0, sizeof(*so));
-
-   if (!info)
-      return;
-
-   so->num_outputs = info->output_count;
-
-   for (unsigned i = 0; i < info->output_count; i++) {
-      so->output[i].start_component = info->outputs[i].component_offset;
-      so->output[i].num_components = util_bitcount(info->outputs[i].component_mask);
-      so->output[i].output_buffer = info->outputs[i].buffer;
-      so->output[i].dst_offset = info->outputs[i].offset / 4;
-      so->output[i].stream = info->buffer_to_stream[info->outputs[i].buffer];
-      so->output[i].register_index = slot_to_register[info->outputs[i].location];
-   }
-
-   for (unsigned i = 0; i < MAX_XFB_BUFFERS; i++)
-      so->stride[i] = info->buffers[i].stride;
-
-   free(info);
-}
diff --git a/lib/mesa/src/gallium/auxiliary/nir/nir_helpers.h b/lib/mesa/src/gallium/auxiliary/nir/nir_helpers.h
deleted file mode 100644
index 08761b045..000000000
--- a/lib/mesa/src/gallium/auxiliary/nir/nir_helpers.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2021 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef GALLIUM_NIR_HELPERS
-#define GALLIUM_NIR_HELPERS
-
-#include "nir.h"
-#include "pipe/p_state.h"
-
-void
-nir_gather_stream_output_info(nir_shader *nir,
-                              struct pipe_stream_output_info *so);
-
-#endif
diff --git a/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c b/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c
index ad9ed85ec..6b046a85f 100644
--- a/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -957,7 +957,7 @@ ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, unsigned dest_bitsize,
 {
    nir_ssa_def *def = nir_build_alu_src_arr(b, op, src);
    if (def->bit_size == 1)
-      def = nir_ineg(b, nir_b2i(b, def, dest_bitsize));
+      def = nir_ineg(b, nir_b2iN(b, def, dest_bitsize));
    assert(def->bit_size == dest_bitsize);
    if (dest_bitsize == 64) {
       if (def->num_components > 2) {
diff --git a/lib/mesa/src/gallium/auxiliary/os/os_mman.h b/lib/mesa/src/gallium/auxiliary/os/os_mman.h
deleted file mode 100644
index 1d07ce654..000000000
--- a/lib/mesa/src/gallium/auxiliary/os/os_mman.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2011 LunarG, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * @file
- * OS independent memory mapping (with large file support).
- *
- * @author Chia-I Wu <olvaffe@gmail.com>
- */
-
-#ifndef _OS_MMAN_H_
-#define _OS_MMAN_H_
-
-
-#include "pipe/p_config.h"
-#include "pipe/p_compiler.h"
-
-#if defined(PIPE_OS_UNIX)
-#  include <sys/mman.h>
-#else
-#  error Unsupported OS
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#if defined(PIPE_OS_ANDROID) && !defined(__LP64__)
-/* 32-bit needs mmap64 for 64-bit offsets */
-#  define os_mmap(addr, length, prot, flags, fd, offset) \
-             mmap64(addr, length, prot, flags, fd, offset)
-
-#  define os_munmap(addr, length) \
-             munmap(addr, length)
-
-#else
-/* assume large file support exists */
-#  define os_mmap(addr, length, prot, flags, fd, offset) \
-             mmap(addr, length, prot, flags, fd, offset)
-
-static inline int os_munmap(void *addr, size_t length)
-{
-   /* Copied from configure code generated by AC_SYS_LARGEFILE */
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + \
-                     (((off_t) 1 << 31) << 31))
-   STATIC_ASSERT(LARGE_OFF_T % 2147483629 == 721 &&
-                 LARGE_OFF_T % 2147483647 == 1);
-#undef LARGE_OFF_T
-
-   return munmap(addr, length);
-}
-#endif
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _OS_MMAN_H_ */
diff --git a/lib/mesa/src/gallium/auxiliary/os/os_process.c b/lib/mesa/src/gallium/auxiliary/os/os_process.c
deleted file mode 100644
index b00ff2b0d..000000000
--- a/lib/mesa/src/gallium/auxiliary/os/os_process.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2013 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "pipe/p_config.h"
-#include "os/os_process.h"
-#include "util/u_memory.h"
-#include "util/u_process.h"
-
-#if defined(PIPE_OS_WINDOWS)
-#  include <windows.h>
-#elif defined(PIPE_OS_HAIKU)
-#  include <kernel/OS.h>
-#  include <kernel/image.h>
-#endif
-
-#if defined(PIPE_OS_LINUX)
-#  include <fcntl.h>
-#endif
-
-
-/**
- * Return the name of the current process.
- * \param procname  returns the process name
- * \param size  size of the procname buffer
- * \return  TRUE or FALSE for success, failure
- */
-boolean
-os_get_process_name(char *procname, size_t size)
-{
-   const char *name;
-
-   /* First, check if the GALLIUM_PROCESS_NAME env var is set to
-    * override the normal process name query.
-    */
-   name = os_get_option("GALLIUM_PROCESS_NAME");
-
-   if (!name) {
-      /* do normal query */
-
-#if defined(PIPE_OS_WINDOWS)
-      char szProcessPath[MAX_PATH];
-      char *lpProcessName;
-      char *lpProcessExt;
-
-      GetModuleFileNameA(NULL, szProcessPath, ARRAY_SIZE(szProcessPath));
-
-      lpProcessName = strrchr(szProcessPath, '\\');
-      lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath;
-
-      lpProcessExt = strrchr(lpProcessName, '.');
-      if (lpProcessExt) {
-         *lpProcessExt = '\0';
-      }
-
-      name = lpProcessName;
-
-#elif defined(PIPE_OS_HAIKU)
-      image_info info;
-      get_image_info(B_CURRENT_TEAM, &info);
-      name = info.name;
-#else
-      name = util_get_process_name();
-#endif
-   }
-
-   assert(size > 0);
-   assert(procname);
-
-   if (name && procname && size > 0) {
-      strncpy(procname, name, size);
-      procname[size - 1] = '\0';
-      return TRUE;
-   }
-   else {
-      return FALSE;
-   }
-}
-
-
-/**
- * Return the command line for the calling process.  This is basically
- * the argv[] array with the arguments separated by spaces.
- * \param cmdline  returns the command line string
- * \param size  size of the cmdline buffer
- * \return  TRUE or FALSE for success, failure
- */
-boolean
-os_get_command_line(char *cmdline, size_t size)
-{
-#if defined(PIPE_OS_WINDOWS)
-   const char *args = GetCommandLineA();
-   if (args) {
-      strncpy(cmdline, args, size);
-      // make sure we terminate the string
-      cmdline[size - 1] = 0;
-      return TRUE;
-   }
-#elif defined(PIPE_OS_LINUX)
-   int f = open("/proc/self/cmdline", O_RDONLY);
-   if (f != -1) {
-      const int n = read(f, cmdline, size - 1);
-      int i;
-      assert(n < size);
-      // The arguments are separated by '\0' chars.  Convert them to spaces.
-      for (i = 0; i < n; i++) {
-         if (cmdline[i] == 0) {
-            cmdline[i] = ' ';
-         }
-      }
-      // terminate the string
-      cmdline[n] = 0;
-      close(f);
-      return TRUE;
-   }
-#endif
-
-   /* XXX to-do: implement this function for other operating systems */
-
-   cmdline[0] = 0;
-   return FALSE;
-}
diff --git a/lib/mesa/src/gallium/auxiliary/os/os_thread.h b/lib/mesa/src/gallium/auxiliary/os/os_thread.h
deleted file mode 100644
index 7ca65a21d..000000000
--- a/lib/mesa/src/gallium/auxiliary/os/os_thread.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 1999-2006 Brian Paul
- * Copyright 2008 VMware, Inc.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * @file
- * 
- * Thread, mutex, condition variable, barrier, semaphore and
- * thread-specific data functions.
- */
-
-
-#ifndef OS_THREAD_H_
-#define OS_THREAD_H_
-
-
-#include "pipe/p_compiler.h"
-#include "util/u_debug.h" /* for assert */
-#include "util/u_thread.h"
-
-
-#define pipe_mutex_assert_locked(mutex) \
-   __pipe_mutex_assert_locked(&(mutex))
-
-static inline void
-__pipe_mutex_assert_locked(mtx_t *mutex)
-{
-#ifdef DEBUG
-   /* NOTE: this would not work for recursive mutexes, but
-    * mtx_t doesn't support those
-    */
-   int ret = mtx_trylock(mutex);
-   assert(ret == thrd_busy);
-   if (ret == thrd_success)
-      mtx_unlock(mutex);
-#else
-   (void)mutex;
-#endif
-}
-
-
-/*
- * Semaphores
- */
-
-typedef struct
-{
-   mtx_t mutex;
-   cnd_t cond;
-   int counter;
-} pipe_semaphore;
-
-
-static inline void
-pipe_semaphore_init(pipe_semaphore *sema, int init_val)
-{
-   (void) mtx_init(&sema->mutex, mtx_plain);
-   cnd_init(&sema->cond);
-   sema->counter = init_val;
-}
-
-static inline void
-pipe_semaphore_destroy(pipe_semaphore *sema)
-{
-   mtx_destroy(&sema->mutex);
-   cnd_destroy(&sema->cond);
-}
-
-/** Signal/increment semaphore counter */
-static inline void
-pipe_semaphore_signal(pipe_semaphore *sema)
-{
-   mtx_lock(&sema->mutex);
-   sema->counter++;
-   cnd_signal(&sema->cond);
-   mtx_unlock(&sema->mutex);
-}
-
-/** Wait for semaphore counter to be greater than zero */
-static inline void
-pipe_semaphore_wait(pipe_semaphore *sema)
-{
-   mtx_lock(&sema->mutex);
-   while (sema->counter <= 0) {
-      cnd_wait(&sema->cond, &sema->mutex);
-   }
-   sema->counter--;
-   mtx_unlock(&sema->mutex);
-}
-
-
-
-/*
- * Thread-specific data.
- */
-
-typedef struct {
-   tss_t key;
-   int initMagic;
-} pipe_tsd;
-
-
-#define PIPE_TSD_INIT_MAGIC 0xff8adc98
-
-
-static inline void
-pipe_tsd_init(pipe_tsd *tsd)
-{
-   if (tss_create(&tsd->key, NULL/*free*/) != 0) {
-      exit(-1);
-   }
-   tsd->initMagic = PIPE_TSD_INIT_MAGIC;
-}
-
-static inline void *
-pipe_tsd_get(pipe_tsd *tsd)
-{
-   if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) {
-      pipe_tsd_init(tsd);
-   }
-   return tss_get(tsd->key);
-}
-
-static inline void
-pipe_tsd_set(pipe_tsd *tsd, void *value)
-{
-   if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) {
-      pipe_tsd_init(tsd);
-   }
-   if (tss_set(tsd->key, value) != 0) {
-      exit(-1);
-   }
-}
-
-#endif /* OS_THREAD_H_ */
diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader.c b/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader.c
index 1c58eaefd..5b69599ee 100644
--- a/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader.c
+++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader.c
@@ -97,8 +97,12 @@ merge_driconf(const driOptionDescription *driver_driconf, unsigned driver_count,
       return NULL;
    }
 
-   memcpy(merged, gallium_driconf, sizeof(*merged) * gallium_count);
-   memcpy(&merged[gallium_count], driver_driconf, sizeof(*merged) * driver_count);
+   if (gallium_count)
+      memcpy(merged, gallium_driconf, sizeof(*merged) * gallium_count);
+   if (driver_count) {
+      memcpy(&merged[gallium_count], driver_driconf,
+             sizeof(*merged) * driver_count);
+   }
 
    *merged_count = driver_count + gallium_count;
    return merged;
diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index 12bc79305..e11837f2c 100644
--- a/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -69,13 +69,13 @@ static const struct sw_driver_descriptor driver_descriptors = {
 #ifdef HAVE_DRI
       {
          .name = "dri",
-         .create_winsys = dri_create_sw_winsys,
+         .create_winsys_dri = dri_create_sw_winsys,
       },
 #endif
 #ifdef HAVE_DRISW_KMS
       {
          .name = "kms_dri",
-         .create_winsys = kms_dri_create_winsys,
+         .create_winsys_kms_dri = kms_dri_create_winsys,
       },
 #endif
 #ifndef __ANDROID__
@@ -85,7 +85,7 @@ static const struct sw_driver_descriptor driver_descriptors = {
       },
       {
          .name = "wrapped",
-         .create_winsys = wrapper_sw_winsys_wrap_pipe_screen,
+         .create_winsys_wrapped = wrapper_sw_winsys_wrap_pipe_screen,
       },
 #endif
       { 0 },
@@ -99,12 +99,12 @@ static const struct sw_driver_descriptor kopper_driver_descriptors = {
    .winsys = {
       {
          .name = "dri",
-         .create_winsys = dri_create_sw_winsys,
+         .create_winsys_dri = dri_create_sw_winsys,
       },
 #ifdef HAVE_DRISW_KMS
       {
          .name = "kms_dri",
-         .create_winsys = kms_dri_create_winsys,
+         .create_winsys_kms_dri = kms_dri_create_winsys,
       },
 #endif
 #ifndef __ANDROID__
@@ -114,7 +114,7 @@ static const struct sw_driver_descriptor kopper_driver_descriptors = {
       },
       {
          .name = "wrapped",
-         .create_winsys = wrapper_sw_winsys_wrap_pipe_screen,
+         .create_winsys_wrapped = wrapper_sw_winsys_wrap_pipe_screen,
       },
 #endif
       { 0 },
@@ -216,7 +216,7 @@ pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, const struct drisw_lo
 
    for (i = 0; sdev->dd->winsys[i].name; i++) {
       if (strcmp(sdev->dd->winsys[i].name, "dri") == 0) {
-         sdev->ws = sdev->dd->winsys[i].create_winsys(drisw_lf);
+         sdev->ws = sdev->dd->winsys[i].create_winsys_dri(drisw_lf);
          break;
       }
    }
@@ -246,7 +246,7 @@ pipe_loader_vk_probe_dri(struct pipe_loader_device **devs, const struct drisw_lo
 
    for (i = 0; sdev->dd->winsys[i].name; i++) {
       if (strcmp(sdev->dd->winsys[i].name, "dri") == 0) {
-         sdev->ws = sdev->dd->winsys[i].create_winsys(drisw_lf);
+         sdev->ws = sdev->dd->winsys[i].create_winsys_dri(drisw_lf);
          break;
       }
    }
@@ -282,7 +282,7 @@ pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd)
 
    for (i = 0; sdev->dd->winsys[i].name; i++) {
       if (strcmp(sdev->dd->winsys[i].name, "kms_dri") == 0) {
-         sdev->ws = sdev->dd->winsys[i].create_winsys(sdev->fd);
+         sdev->ws = sdev->dd->winsys[i].create_winsys_kms_dri(sdev->fd);
          break;
       }
    }
@@ -360,7 +360,7 @@ pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev,
 
    for (i = 0; sdev->dd->winsys[i].name; i++) {
       if (strcmp(sdev->dd->winsys[i].name, "wrapped") == 0) {
-         sdev->ws = sdev->dd->winsys[i].create_winsys(screen);
+         sdev->ws = sdev->dd->winsys[i].create_winsys_wrapped(screen);
          break;
       }
    }
@@ -382,6 +382,7 @@ pipe_loader_sw_release(struct pipe_loader_device **dev)
    UNUSED struct pipe_loader_sw_device *sdev =
       pipe_loader_sw_device(*dev);
 
+   sdev->ws->destroy(sdev->ws);
 #ifndef GALLIUM_STATIC_TARGETS
    if (sdev->lib)
       util_dl_close(sdev->lib);
diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index 41e5c8386..76608b371 100644
--- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -34,9 +34,9 @@
  */
 
 
-#include "pipe/p_config.h"
+#include "util/detect.h"
 
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if DETECT_OS_LINUX || DETECT_OS_BSD || DETECT_OS_SOLARIS
 #include <unistd.h>
 #include <sched.h>
 #endif
@@ -45,7 +45,7 @@
 #include "pipe/p_compiler.h"
 #include "pipe/p_defines.h"
 #include "util/u_debug.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 #include "util/u_memory.h"
 #include "util/list.h"
 
@@ -979,7 +979,7 @@ fenced_bufmgr_destroy(struct pb_manager *mgr)
    /* Wait on outstanding fences. */
    while (fenced_mgr->num_fenced) {
       mtx_unlock(&fenced_mgr->mutex);
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if DETECT_OS_LINUX || DETECT_OS_BSD || DETECT_OS_SOLARIS
       sched_yield();
 #endif
       mtx_lock(&fenced_mgr->mutex);
diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index 9a10def98..f2fb620dd 100644
--- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -35,7 +35,7 @@
 
 #include "pipe/p_compiler.h"
 #include "util/u_debug.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/list.h"
@@ -307,7 +307,7 @@ pb_debug_buffer_validate(struct pb_buffer *_buf,
 
    mtx_lock(&buf->mutex);
    if(buf->map_count) {
-      debug_printf("%s: attempting to validate a mapped buffer\n", __FUNCTION__);
+      debug_printf("%s: attempting to validate a mapped buffer\n", __func__);
       debug_printf("last map backtrace is\n");
       debug_backtrace_dump(buf->map_backtrace, PB_DEBUG_MAP_BACKTRACE);
    }
@@ -390,7 +390,7 @@ pb_debug_manager_create_buffer(struct pb_manager *_mgr,
       FREE(buf);
 #if 0
       mtx_lock(&mgr->mutex);
-      debug_printf("%s: failed to create buffer\n", __FUNCTION__);
+      debug_printf("%s: failed to create buffer\n", __func__);
       if(!list_is_empty(&mgr->list))
          pb_debug_manager_dump_locked(mgr);
       mtx_unlock(&mgr->mutex);
@@ -445,7 +445,7 @@ pb_debug_manager_destroy(struct pb_manager *_mgr)
    
    mtx_lock(&mgr->mutex);
    if(!list_is_empty(&mgr->list)) {
-      debug_printf("%s: unfreed buffers\n", __FUNCTION__);
+      debug_printf("%s: unfreed buffers\n", __func__);
       pb_debug_manager_dump_locked(mgr);
    }
    mtx_unlock(&mgr->mutex);
diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index 5cc63b93d..397e42eed 100644
--- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -35,7 +35,7 @@
 
 #include "pipe/p_defines.h"
 #include "util/u_debug.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 #include "util/u_memory.h"
 #include "util/list.h"
 #include "util/u_mm.h"
diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index d1928dcaf..f078ff0b1 100644
--- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -38,7 +38,7 @@
 
 #include "pipe/p_compiler.h"
 #include "util/u_debug.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 #include "pipe/p_defines.h"
 #include "util/u_memory.h"
 #include "util/list.h"
diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_cache.h b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_cache.h
index cda0f9984..c5d62c7f9 100644
--- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_cache.h
+++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_cache.h
@@ -32,7 +32,7 @@
 #include "pb_buffer.h"
 #include "util/simple_mtx.h"
 #include "util/list.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 
 /**
  * Statically inserted into the driver-specific buffer structure.
diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.h b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.h
index e8e8f7687..4fa5fd8d7 100644
--- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.h
+++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.h
@@ -47,7 +47,7 @@
 #include "pb_buffer.h"
 #include "util/simple_mtx.h"
 #include "util/list.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 
 struct pb_slab;
 struct pb_slabs;
diff --git a/lib/mesa/src/gallium/auxiliary/renderonly/renderonly.c b/lib/mesa/src/gallium/auxiliary/renderonly/renderonly.c
index d8628a36b..157570b80 100644
--- a/lib/mesa/src/gallium/auxiliary/renderonly/renderonly.c
+++ b/lib/mesa/src/gallium/auxiliary/renderonly/renderonly.c
@@ -66,7 +66,7 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,
                                                struct renderonly *ro,
                                                struct winsys_handle *out_handle)
 {
-   struct renderonly_scanout *scanout;
+   struct renderonly_scanout *scanout = NULL;
    int err;
    struct drm_mode_create_dumb create_dumb = {
       .width = rsc->width0,
@@ -114,7 +114,13 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,
    return scanout;
 
 free_dumb:
-   destroy_dumb.handle = scanout->handle;
+   /* If an error occured, make sure we reset the scanout object before
+    * leaving.
+    */
+   if (scanout)
+      memset(scanout, 0, sizeof(*scanout));
+
+   destroy_dumb.handle = create_dumb.handle;
    drmIoctl(ro->kms_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb);
 
    return NULL;
diff --git a/lib/mesa/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/lib/mesa/src/gallium/auxiliary/rtasm/rtasm_execmem.c
index 8d5195ac0..2aa545919 100644
--- a/lib/mesa/src/gallium/auxiliary/rtasm/rtasm_execmem.c
+++ b/lib/mesa/src/gallium/auxiliary/rtasm/rtasm_execmem.c
@@ -33,7 +33,7 @@
 
 #include "pipe/p_compiler.h"
 #include "util/u_debug.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 #include "util/u_memory.h"
 
 #include "rtasm_execmem.h"
@@ -42,11 +42,11 @@
 #define MAP_ANONYMOUS MAP_ANON
 #endif
 
-#if defined(PIPE_OS_WINDOWS)
+#if DETECT_OS_WINDOWS
 #include <windows.h>
 #endif
 
-#if defined(PIPE_OS_UNIX)
+#if DETECT_OS_UNIX
 
 
 /*
@@ -137,7 +137,7 @@ rtasm_exec_free(void *addr)
 }
 
 
-#elif defined(PIPE_OS_WINDOWS)
+#elif DETECT_OS_WINDOWS
 
 
 /*
diff --git a/lib/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h b/lib/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h
index 7dff0b66c..323832ecc 100644
--- a/lib/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h
+++ b/lib/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h
@@ -264,14 +264,21 @@ pipe_msm_create_screen(int fd, const struct pipe_screen_config *config)
 {
    struct pipe_screen *screen;
 
-   screen = fd_drm_screen_create(fd, NULL, config);
+   screen = fd_drm_screen_create_renderonly(fd, NULL, config);
    return screen ? debug_screen_wrap(screen) : NULL;
 }
-DRM_DRIVER_DESCRIPTOR(msm, NULL, 0)
+
+const driOptionDescription msm_driconf[] = {
+#ifdef GALLIUM_FREEDRENO
+      #include "freedreno/driinfo_freedreno.h"
+#endif
+};
+DRM_DRIVER_DESCRIPTOR(msm, msm_driconf, ARRAY_SIZE(msm_driconf))
+DRM_DRIVER_DESCRIPTOR_ALIAS(msm, kgsl, msm_driconf, ARRAY_SIZE(msm_driconf))
 #else
 DRM_DRIVER_DESCRIPTOR_STUB(msm)
+DRM_DRIVER_DESCRIPTOR_STUB(kgsl)
 #endif
-DRM_DRIVER_DESCRIPTOR_ALIAS(msm, kgsl, NULL, 0)
 
 #if defined(GALLIUM_VIRGL) || (defined(GALLIUM_FREEDRENO) && !defined(PIPE_LOADER_DYNAMIC))
 #include "virgl/drm/virgl_drm_public.h"
@@ -285,7 +292,7 @@ pipe_virtio_gpu_create_screen(int fd, const struct pipe_screen_config *config)
    /* Try native guest driver(s) first, and then fallback to virgl: */
 #ifdef GALLIUM_FREEDRENO
    if (!screen)
-      screen = fd_drm_screen_create(fd, NULL, config);
+      screen = fd_drm_screen_create_renderonly(fd, NULL, config);
 #endif
 #ifdef GALLIUM_VIRGL
    if (!screen)
@@ -295,9 +302,7 @@ pipe_virtio_gpu_create_screen(int fd, const struct pipe_screen_config *config)
 }
 
 const driOptionDescription virgl_driconf[] = {
-#ifdef GALLIUM_VIRGL
       #include "virgl/virgl_driinfo.h.in"
-#endif
 };
 DRM_DRIVER_DESCRIPTOR(virtio_gpu, virgl_driconf, ARRAY_SIZE(virgl_driconf))
 
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c
index 73d1eb26a..5ec2605aa 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 #include "util/u_debug.h"
-#include "pipe/p_format.h"
+#include "util/format/u_formats.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi_build.h"
 #include "tgsi_parse.h"
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 5e440353e..abafc6dc0 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -73,18 +73,27 @@
 #define TILE_BOTTOM_LEFT  2
 #define TILE_BOTTOM_RIGHT 3
 
+static_assert(alignof(union tgsi_exec_channel) == 16, "");
+static_assert(alignof(struct tgsi_exec_vector) == 16, "");
+static_assert(alignof(struct tgsi_exec_machine) == 16, "");
+
 union tgsi_double_channel {
+   alignas(16)
    double d[TGSI_QUAD_SIZE];
    unsigned u[TGSI_QUAD_SIZE][2];
    uint64_t u64[TGSI_QUAD_SIZE];
    int64_t i64[TGSI_QUAD_SIZE];
-} ALIGN16;
+};
 
-struct ALIGN16 tgsi_double_vector {
+struct tgsi_double_vector {
+   alignas(16)
    union tgsi_double_channel xy;
    union tgsi_double_channel zw;
 };
 
+static_assert(alignof(union tgsi_double_channel) == 16, "");
+static_assert(alignof(struct tgsi_double_vector) == 16, "");
+
 static void
 micro_abs(union tgsi_exec_channel *dst,
           const union tgsi_exec_channel *src)
@@ -399,17 +408,6 @@ micro_dldexp(union tgsi_double_channel *dst,
 }
 
 static void
-micro_dfracexp(union tgsi_double_channel *dst,
-               union tgsi_exec_channel *dst_exp,
-               const union tgsi_double_channel *src)
-{
-   dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);
-   dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);
-   dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);
-   dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);
-}
-
-static void
 micro_exp2(union tgsi_exec_channel *dst,
            const union tgsi_exec_channel *src)
 {
@@ -3559,26 +3557,6 @@ exec_dldexp(struct tgsi_exec_machine *mach,
 }
 
 static void
-exec_dfracexp(struct tgsi_exec_machine *mach,
-              const struct tgsi_full_instruction *inst)
-{
-   union tgsi_double_channel src;
-   union tgsi_double_channel dst;
-   union tgsi_exec_channel dst_exp;
-
-   fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
-   micro_dfracexp(&dst, &dst_exp, &src);
-   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)
-      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
-   if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)
-      store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
-   for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-      if (inst->Dst[1].Register.WriteMask & (1 << chan))
-         store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan);
-   }
-}
-
-static void
 exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
             const struct tgsi_full_instruction *inst,
             micro_dop_sop op)
@@ -5795,10 +5773,6 @@ exec_instruction(
       exec_dldexp(mach, inst);
       break;
 
-   case TGSI_OPCODE_DFRACEXP:
-      exec_dfracexp(mach, inst);
-      break;
-
    case TGSI_OPCODE_I2D:
       exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_FLOAT);
       break;
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 80acf6359..6e13618fb 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -73,17 +73,18 @@ extern "C" {
   */
 union tgsi_exec_channel
 {
+   alignas(16)
    float    f[TGSI_QUAD_SIZE];
    int      i[TGSI_QUAD_SIZE];
    unsigned u[TGSI_QUAD_SIZE];
-} ALIGN16;
+};
 
 /**
   * A vector[RGBA] of channels[4 pixels]
   */
-struct ALIGN16 tgsi_exec_vector
+struct tgsi_exec_vector
 {
-   union tgsi_exec_channel xyzw[TGSI_NUM_CHANNELS];
+   alignas(16) union tgsi_exec_channel xyzw[TGSI_NUM_CHANNELS];
 };
 
 /**
@@ -286,10 +287,11 @@ typedef void (* apply_sample_offset_func)(
 /**
  * Run-time virtual machine state for executing TGSI shader.
  */
-struct ALIGN16 tgsi_exec_machine
+struct tgsi_exec_machine
 {
    /* Total = program temporaries + internal temporaries
     */
+   alignas(16)
    struct tgsi_exec_vector       Temps[TGSI_EXEC_NUM_TEMPS];
 
    unsigned                       ImmsReserved;
@@ -480,8 +482,6 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
       return 1 << PIPE_SHADER_IR_TGSI;
    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
       return 1;
-   case PIPE_SHADER_CAP_DFRACEXP_DLDEXP_SUPPORTED:
-   case PIPE_SHADER_CAP_LDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       return 1;
    case PIPE_SHADER_CAP_DROUND_SUPPORTED:
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c
index 477876d7e..8b926baf7 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -201,7 +201,6 @@ tgsi_opcode_infer_type(enum tgsi_opcode opcode)
    case TGSI_OPCODE_DSQRT:
    case TGSI_OPCODE_DMAD:
    case TGSI_OPCODE_DLDEXP:
-   case TGSI_OPCODE_DFRACEXP:
    case TGSI_OPCODE_DFRAC:
    case TGSI_OPCODE_DRSQ:
    case TGSI_OPCODE_DTRUNC:
@@ -335,8 +334,5 @@ tgsi_opcode_infer_src_type(enum tgsi_opcode opcode, uint src_idx)
 enum tgsi_opcode_type
 tgsi_opcode_infer_dst_type(enum tgsi_opcode opcode, uint dst_idx)
 {
-   if (dst_idx == 1 && opcode == TGSI_OPCODE_DFRACEXP)
-      return TGSI_TYPE_SIGNED;
-
    return tgsi_opcode_infer_type(opcode);
 }
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c
index 7802f1049..29e337278 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -30,6 +30,7 @@
 #include "util/u_prim.h"
 #include "pipe/p_defines.h"
 #include "util/u_inlines.h"
+#include "util/strtod.h"
 #include "tgsi_text.h"
 #include "tgsi_build.h"
 #include "tgsi_info.h"
@@ -231,52 +232,9 @@ static boolean parse_identifier( const char **pcur, char *ret, size_t len )
 static boolean parse_float( const char **pcur, float *val )
 {
    const char *cur = *pcur;
-   boolean integral_part = FALSE;
-   boolean fractional_part = FALSE;
-
-   if (*cur == '0' && *(cur + 1) == 'x') {
-      union fi fi;
-      fi.ui = strtoul(cur, NULL, 16);
-      *val = fi.f;
-      cur += 10;
-      goto out;
-   }
-
-   *val = (float) atof( cur );
-   if (*cur == '-' || *cur == '+')
-      cur++;
-   if (is_digit( cur )) {
-      cur++;
-      integral_part = TRUE;
-      while (is_digit( cur ))
-         cur++;
-   }
-   if (*cur == '.') {
-      cur++;
-      if (is_digit( cur )) {
-         cur++;
-         fractional_part = TRUE;
-         while (is_digit( cur ))
-            cur++;
-      }
-   }
-   if (!integral_part && !fractional_part)
+   *val = _mesa_strtof(cur, (char**)pcur);
+   if (*pcur == cur)
       return FALSE;
-   if (uprcase( *cur ) == 'E') {
-      cur++;
-      if (*cur == '-' || *cur == '+')
-         cur++;
-      if (is_digit( cur )) {
-         cur++;
-         while (is_digit( cur ))
-            cur++;
-      }
-      else
-         return FALSE;
-   }
-
-out:
-   *pcur = cur;
    return TRUE;
 }
 
@@ -288,7 +246,7 @@ static boolean parse_double( const char **pcur, uint32_t *val0, uint32_t *val1)
       uint32_t uval[2];
    } v;
 
-   v.dval = strtod(cur, (char**)pcur);
+   v.dval = _mesa_strtod(cur, (char**)pcur);
    if (*pcur == cur)
       return FALSE;
 
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 398d59b95..b4cfae0e6 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -35,13 +35,13 @@
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_sanity.h"
+#include "util/glheader.h"
 #include "util/u_debug.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "util/u_prim.h"
 #include "util/u_bitmask.h"
-#include "GL/gl.h"
 #include "compiler/shader_info.h"
 
 union tgsi_any_token {
@@ -2131,7 +2131,7 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
 
    if (ureg->domain[0].tokens == error_tokens ||
        ureg->domain[1].tokens == error_tokens) {
-      debug_printf("%s: error in generated shader\n", __FUNCTION__);
+      debug_printf("%s: error in generated shader\n", __func__);
       assert(0);
       return NULL;
    }
@@ -2139,7 +2139,7 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
    tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
 
    if (0) {
-      debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__,
+      debug_printf("%s: emitted shader %d tokens:\n", __func__,
                    ureg->domain[DOMAIN_DECL].count);
       tgsi_dump( tokens, 0 );
    }
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 59041e94d..5c4efbe8c 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -29,7 +29,7 @@
 #define TGSI_UREG_H
 
 #include "pipe/p_defines.h"
-#include "pipe/p_format.h"
+#include "util/format/u_formats.h"
 #include "pipe/p_compiler.h"
 #include "pipe/p_shader_tokens.h"
 #include "util/u_debug.h"
diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c
index a101cce24..d879d2e26 100644
--- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -146,7 +146,6 @@ tgsi_util_get_src_usage_mask(enum tgsi_opcode opcode,
    case TGSI_OPCODE_DP2:
    case TGSI_OPCODE_PK2H:
    case TGSI_OPCODE_PK2US:
-   case TGSI_OPCODE_DFRACEXP:
    case TGSI_OPCODE_F2D:
    case TGSI_OPCODE_I2D:
    case TGSI_OPCODE_U2D:
diff --git a/lib/mesa/src/gallium/auxiliary/translate/translate_generic.c b/lib/mesa/src/gallium/auxiliary/translate/translate_generic.c
index 20e2de341..c4213fb5d 100644
--- a/lib/mesa/src/gallium/auxiliary/translate/translate_generic.c
+++ b/lib/mesa/src/gallium/auxiliary/translate/translate_generic.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2007 VMware, Inc.
+ * Copyright 2007-2023 VMware, Inc.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -584,12 +584,13 @@ get_emit_func(enum pipe_format format)
    }
 }
 
-static ALWAYS_INLINE void PIPE_CDECL
+static ALWAYS_INLINE void UTIL_CDECL
 generic_run_one(struct translate_generic *tg,
                 unsigned elt,
                 unsigned start_instance,
                 unsigned instance_id,
-                void *vert)
+                void *vert,
+                unsigned index_size)
 {
    unsigned nr_attrs = tg->nr_attrib;
    unsigned attr;
@@ -613,8 +614,10 @@ generic_run_one(struct translate_generic *tg,
          }
          else {
             index = elt;
-            /* clamp to avoid going out of bounds */
-            index = MIN2(index, tg->attrib[attr].max_index);
+            if (index_size > 0) {
+               /* clamp to avoid going out of bounds */
+               index = MIN2(index, tg->attrib[attr].max_index);
+            }
          }
 
          src = tg->attrib[attr].input_ptr +
@@ -651,7 +654,7 @@ generic_run_one(struct translate_generic *tg,
 /**
  * Fetch vertex attributes for 'count' vertices.
  */
-static void PIPE_CDECL
+static void UTIL_CDECL
 generic_run_elts(struct translate *translate,
                  const unsigned *elts,
                  unsigned count,
@@ -664,12 +667,12 @@ generic_run_elts(struct translate *translate,
    unsigned i;
 
    for (i = 0; i < count; i++) {
-      generic_run_one(tg, *elts++, start_instance, instance_id, vert);
+      generic_run_one(tg, *elts++, start_instance, instance_id, vert, 4);
       vert += tg->translate.key.output_stride;
    }
 }
 
-static void PIPE_CDECL
+static void UTIL_CDECL
 generic_run_elts16(struct translate *translate,
                    const uint16_t *elts,
                    unsigned count,
@@ -682,12 +685,12 @@ generic_run_elts16(struct translate *translate,
    unsigned i;
 
    for (i = 0; i < count; i++) {
-      generic_run_one(tg, *elts++, start_instance, instance_id, vert);
+      generic_run_one(tg, *elts++, start_instance, instance_id, vert, 2);
       vert += tg->translate.key.output_stride;
    }
 }
 
-static void PIPE_CDECL
+static void UTIL_CDECL
 generic_run_elts8(struct translate *translate,
                   const uint8_t *elts,
                   unsigned count,
@@ -700,12 +703,12 @@ generic_run_elts8(struct translate *translate,
    unsigned i;
 
    for (i = 0; i < count; i++) {
-      generic_run_one(tg, *elts++, start_instance, instance_id, vert);
+      generic_run_one(tg, *elts++, start_instance, instance_id, vert, 1);
       vert += tg->translate.key.output_stride;
    }
 }
 
-static void PIPE_CDECL
+static void UTIL_CDECL
 generic_run(struct translate *translate,
             unsigned start,
             unsigned count,
@@ -718,7 +721,7 @@ generic_run(struct translate *translate,
    unsigned i;
 
    for (i = 0; i < count; i++) {
-      generic_run_one(tg, start + i, start_instance, instance_id, vert);
+      generic_run_one(tg, start + i, start_instance, instance_id, vert, 0);
       vert += tg->translate.key.output_stride;
    }
 }
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_blitter.c b/lib/mesa/src/gallium/auxiliary/util/u_blitter.c
index 4c0c960de..5552e04c7 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_blitter.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_blitter.c
@@ -2412,6 +2412,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter,
    fb_state.nr_cbufs = 1;
    fb_state.cbufs[0] = dstsurf;
    fb_state.zsbuf = NULL;
+   fb_state.resolve = NULL;
    pipe->set_framebuffer_state(pipe, &fb_state);
    pipe->set_sample_mask(pipe, ~0);
    if (pipe->set_min_samples)
@@ -2497,6 +2498,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
    fb_state.nr_cbufs = 0;
    fb_state.cbufs[0] = NULL;
    fb_state.zsbuf = dstsurf;
+   fb_state.resolve = NULL;
    pipe->set_framebuffer_state(pipe, &fb_state);
    pipe->set_sample_mask(pipe, ~0);
    if (pipe->set_min_samples)
@@ -2568,6 +2570,7 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
       fb_state.nr_cbufs = 0;
    }
    fb_state.zsbuf = zsurf;
+   fb_state.resolve = NULL;
    pipe->set_framebuffer_state(pipe, &fb_state);
    pipe->set_sample_mask(pipe, sample_mask);
    if (pipe->set_min_samples)
@@ -2706,6 +2709,7 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter,
    fb_state.cbufs[0] = srcsurf;
    fb_state.cbufs[1] = dstsurf;
    fb_state.zsbuf = NULL;
+   fb_state.resolve = NULL;
    pipe->set_framebuffer_state(pipe, &fb_state);
 
    blitter_set_common_draw_rect_state(ctx, false,
@@ -2755,6 +2759,7 @@ void util_blitter_custom_color(struct blitter_context *blitter,
    fb_state.nr_cbufs = 1;
    fb_state.cbufs[0] = dstsurf;
    fb_state.zsbuf = NULL;
+   fb_state.resolve = NULL;
    pipe->set_framebuffer_state(pipe, &fb_state);
    pipe->set_sample_mask(pipe, ~0);
    if (pipe->set_min_samples)
@@ -2818,6 +2823,7 @@ void util_blitter_custom_shader(struct blitter_context *blitter,
    fb_state.height = dstsurf->height;
    fb_state.nr_cbufs = 1;
    fb_state.cbufs[0] = dstsurf;
+   fb_state.resolve = NULL;
    pipe->set_framebuffer_state(pipe, &fb_state);
    pipe->set_sample_mask(pipe, ~0);
    if (pipe->set_min_samples)
@@ -2914,6 +2920,7 @@ util_blitter_stencil_fallback(struct blitter_context *blitter,
    fb_state.width = dstbox->x + dstbox->width;
    fb_state.height = dstbox->y + dstbox->height;
    fb_state.zsbuf = dst_view;
+   fb_state.resolve = NULL;
    pipe->set_framebuffer_state(pipe, &fb_state);
    pipe->set_sample_mask(pipe, ~0);
    if (pipe->set_min_samples)
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c b/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c
index 55e6d7aa9..5aa8e4faf 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c
@@ -48,7 +48,7 @@
 #include "util/list.h"
 #include "util/u_inlines.h"
 #include "util/u_string.h"
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 #include <stdio.h>
 
 /* Future improvement: Use realloc instead? */
@@ -337,7 +337,7 @@ out_no_item:
                 "for this command batch.\n");
 }
 
-static enum pipe_error
+static int
 debug_flush_might_flush_cb(UNUSED void *key, void *value, void *data)
 {
    struct debug_flush_item *item =
@@ -360,7 +360,7 @@ debug_flush_might_flush_cb(UNUSED void *key, void *value, void *data)
    }
    mtx_unlock(&fbuf->mutex);
 
-   return PIPE_OK;
+   return 0;
 }
 
 /**
@@ -378,7 +378,7 @@ debug_flush_might_flush(struct debug_flush_ctx *fctx)
                            "Might flush");
 }
 
-static enum pipe_error
+static int
 debug_flush_flush_cb(UNUSED void *key, void *value, UNUSED void *data)
 {
    struct debug_flush_item *item =
@@ -386,7 +386,7 @@ debug_flush_flush_cb(UNUSED void *key, void *value, UNUSED void *data)
 
    debug_flush_item_destroy(item);
 
-   return PIPE_OK;
+   return 0;
 }
 
 
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_dirty_flags.h b/lib/mesa/src/gallium/auxiliary/util/u_dirty_flags.h
deleted file mode 100644
index 40539f0b0..000000000
--- a/lib/mesa/src/gallium/auxiliary/util/u_dirty_flags.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef U_DIRTY_FLAGS_H
-#define U_DIRTY_FLAGS_H
-
-/* Here's a convenient list of dirty flags to use in a driver.  Either
- * include it directly or use it as a starting point for your own
- * list.
- */
-#define U_NEW_VIEWPORT              0x1
-#define U_NEW_RASTERIZER            0x2
-#define U_NEW_FS                    0x4
-#define U_NEW_FS_CONSTANTS          0x8
-#define U_NEW_FS_SAMPLER_VIEW       0x10
-#define U_NEW_FS_SAMPLER_STATES     0x20
-#define U_NEW_VS                    0x40
-#define U_NEW_VS_CONSTANTS          0x80
-#define U_NEW_VS_SAMPLER_VIEW       0x100
-#define U_NEW_VS_SAMPLER_STATES     0x200
-#define U_NEW_BLEND                 0x400
-#define U_NEW_CLIP                  0x800
-#define U_NEW_SCISSOR               0x1000
-#define U_NEW_POLYGON_STIPPLE       0x2000
-#define U_NEW_FRAMEBUFFER           0x4000
-#define U_NEW_VERTEX_ELEMENTS       0x8000
-#define U_NEW_VERTEX_BUFFER         0x10000
-#define U_NEW_QUERY                 0x20000
-#define U_NEW_DEPTH_STENCIL         0x40000
-#define U_NEW_GS                    0x80000
-#define U_NEW_GS_CONSTANTS          0x100000
-#define U_NEW_GS_SAMPLER_VIEW       0x200000
-#define U_NEW_GS_SAMPLER_STATES     0x400000
-
-#endif
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_dirty_surfaces.h b/lib/mesa/src/gallium/auxiliary/util/u_dirty_surfaces.h
deleted file mode 100644
index ccde8a8c1..000000000
--- a/lib/mesa/src/gallium/auxiliary/util/u_dirty_surfaces.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010 Luca Barbieri
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef U_DIRTY_SURFACES_H_
-#define U_DIRTY_SURFACES_H_
-
-#include "pipe/p_state.h"
-
-#include "util/list.h"
-#include "util/u_math.h"
-
-struct pipe_context;
-
-typedef void (*util_dirty_surface_flush_t) (struct pipe_context *, struct pipe_surface *);
-
-struct util_dirty_surfaces
-{
-   struct list_head dirty_list;
-};
-
-struct util_dirty_surface
-{
-   struct pipe_surface base;
-   struct list_head dirty_list;
-};
-
-static inline void
-util_dirty_surfaces_init(struct util_dirty_surfaces *ds)
-{
-   LIST_INITHEAD(&ds->dirty_list);
-}
-
-static inline void
-util_dirty_surfaces_use_for_sampling(struct pipe_context *pipe, struct util_dirty_surfaces *dss, util_dirty_surface_flush_t flush)
-{
-   struct list_head *p, *next;
-   for(p = dss->dirty_list.next; p != &dss->dirty_list; p = next)
-   {
-      struct util_dirty_surface *ds = LIST_ENTRY(struct util_dirty_surface, p, dirty_list);
-      next = p->next;
-
-      flush(pipe, &ds->base);
-   }
-}
-
-static inline void
-util_dirty_surfaces_use_levels_for_sampling(struct pipe_context *pipe, struct util_dirty_surfaces *dss, unsigned first, unsigned last, util_dirty_surface_flush_t flush)
-{
-   struct list_head *p, *next;
-   if(first > last)
-      return;
-   for(p = dss->dirty_list.next; p != &dss->dirty_list; p = next)
-   {
-      struct util_dirty_surface *ds = LIST_ENTRY(struct util_dirty_surface, p, dirty_list);
-      next = p->next;
-
-      if(ds->base.u.tex.level >= first && ds->base.u.tex.level <= last)
-	 flush(pipe, &ds->base);
-   }
-}
-
-static inline void
-util_dirty_surfaces_use_for_sampling_with(struct pipe_context *pipe, struct util_dirty_surfaces *dss, struct pipe_sampler_view *psv, struct pipe_sampler_state *pss, util_dirty_surface_flush_t flush)
-{
-   if(!LIST_IS_EMPTY(&dss->dirty_list))
-      util_dirty_surfaces_use_levels_for_sampling(pipe, dss, (unsigned)pss->min_lod + psv->u.tex.first_level,
-						  MIN2((unsigned)ceilf(pss->max_lod) + psv->u.tex.first_level, psv->u.tex.last_level), flush);
-}
-
-static inline void
-util_dirty_surface_init(struct util_dirty_surface *ds)
-{
-   LIST_INITHEAD(&ds->dirty_list);
-}
-
-static inline boolean
-util_dirty_surface_is_dirty(struct util_dirty_surface *ds)
-{
-   return !LIST_IS_EMPTY(&ds->dirty_list);
-}
-
-static inline void
-util_dirty_surface_set_dirty(struct util_dirty_surfaces *dss, struct util_dirty_surface *ds)
-{
-   if(LIST_IS_EMPTY(&ds->dirty_list))
-      LIST_ADDTAIL(&ds->dirty_list, &dss->dirty_list);
-}
-
-static inline void
-util_dirty_surface_set_clean(struct util_dirty_surfaces *dss, struct util_dirty_surface *ds)
-{
-   if(!LIST_IS_EMPTY(&ds->dirty_list))
-      LIST_DELINIT(&ds->dirty_list);
-}
-
-#endif
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_draw.c b/lib/mesa/src/gallium/auxiliary/util/u_draw.c
index ed1e294a5..aac16c0f4 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_draw.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_draw.c
@@ -115,7 +115,7 @@ util_draw_max_index(
                 * indices/instances and simply start clamping against buffer
                 * size. */
                debug_printf("%s: too many instances for vertex buffer\n",
-                            __FUNCTION__);
+                            __func__);
                return 0;
             }
          }
@@ -147,7 +147,7 @@ util_draw_indirect_read(struct pipe_context *pipe,
                                                  indirect->indirect_draw_count_offset,
                                                  4, PIPE_MAP_READ, &dc_transfer);
       if (!dc_transfer) {
-         debug_printf("%s: failed to map indirect draw count buffer\n", __FUNCTION__);
+         debug_printf("%s: failed to map indirect draw count buffer\n", __func__);
          return NULL;
       }
       draw_count = dc_param[0];
@@ -169,7 +169,7 @@ util_draw_indirect_read(struct pipe_context *pipe,
                                   PIPE_MAP_READ,
                                   &transfer);
    if (!transfer) {
-      debug_printf("%s: failed to map indirect buffer\n", __FUNCTION__);
+      debug_printf("%s: failed to map indirect buffer\n", __func__);
       free(draws);
       return NULL;
    }
@@ -215,7 +215,7 @@ util_draw_indirect(struct pipe_context *pipe,
                                                  indirect->indirect_draw_count_offset,
                                                  4, PIPE_MAP_READ, &dc_transfer);
       if (!dc_transfer) {
-         debug_printf("%s: failed to map indirect draw count buffer\n", __FUNCTION__);
+         debug_printf("%s: failed to map indirect draw count buffer\n", __func__);
          return;
       }
       if (dc_param[0] < draw_count)
@@ -223,6 +223,9 @@ util_draw_indirect(struct pipe_context *pipe,
       pipe_buffer_unmap(pipe, dc_transfer);
    }
 
+   if (!draw_count)
+      return;
+
    if (indirect->stride)
       num_params = MIN2(indirect->stride / 4, num_params);
    params = (uint32_t *)
@@ -233,7 +236,7 @@ util_draw_indirect(struct pipe_context *pipe,
                             PIPE_MAP_READ,
                             &transfer);
    if (!transfer) {
-      debug_printf("%s: failed to map indirect buffer\n", __FUNCTION__);
+      debug_printf("%s: failed to map indirect buffer\n", __func__);
       return;
    }
 
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_helpers.c b/lib/mesa/src/gallium/auxiliary/util/u_helpers.c
index e0e91aab4..2b5350015 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_helpers.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_helpers.c
@@ -32,6 +32,7 @@
 #include "util/u_upload_mgr.h"
 #include "util/u_thread.h"
 #include "util/os_time.h"
+#include "util/perf/cpu_trace.h"
 #include <inttypes.h>
 
 /**
@@ -396,6 +397,8 @@ util_throttle_memory_usage(struct pipe_context *pipe,
    if (!t->max_mem_usage)
       return;
 
+   MESA_TRACE_FUNC();
+
    struct pipe_screen *screen = pipe->screen;
    struct pipe_fence_handle **fence = NULL;
    unsigned ring_size = ARRAY_SIZE(t->ring);
@@ -460,6 +463,21 @@ util_throttle_memory_usage(struct pipe_context *pipe,
    t->ring[t->flush_index].mem_usage += memory_size;
 }
 
+void
+util_sw_query_memory_info(struct pipe_screen *pscreen,
+                          struct pipe_memory_info *info)
+{
+   /* Provide query_memory_info from CPU reported memory */
+   uint64_t size;
+
+   if (!os_get_available_system_memory(&size))
+      return;
+   info->avail_staging_memory = size / 1024;
+   if (!os_get_total_physical_memory(&size))
+      return;
+   info->total_staging_memory = size / 1024;
+}
+
 bool
 util_lower_clearsize_to_dword(const void *clearValue, int *clearValueSize, uint32_t *clamped)
 {
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_helpers.h b/lib/mesa/src/gallium/auxiliary/util/u_helpers.h
index 299c67980..2d12d6f17 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_helpers.h
+++ b/lib/mesa/src/gallium/auxiliary/util/u_helpers.h
@@ -117,6 +117,8 @@ void util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage);
 void util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t);
 void util_throttle_memory_usage(struct pipe_context *pipe,
                                 struct util_throttle *t, uint64_t memory_size);
+void util_sw_query_memory_info(struct pipe_screen *pscreen,
+                          struct pipe_memory_info *info);
 
 bool
 util_lower_clearsize_to_dword(const void *clearValue, int *clearValueSize, uint32_t *clamped);
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_inlines.h b/lib/mesa/src/gallium/auxiliary/util/u_inlines.h
index 1e6ec06c9..f42f368e2 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_inlines.h
+++ b/lib/mesa/src/gallium/auxiliary/util/u_inlines.h
@@ -891,13 +891,17 @@ util_writes_stencil(const struct pipe_stencil_state *s)
 }
 
 static inline bool
-util_writes_depth_stencil(const struct pipe_depth_stencil_alpha_state *zsa)
+util_writes_depth(const struct pipe_depth_stencil_alpha_state *zsa)
 {
-   if (zsa->depth_enabled && zsa->depth_writemask &&
-       (zsa->depth_func != PIPE_FUNC_NEVER))
-      return true;
+   return zsa->depth_enabled && zsa->depth_writemask &&
+         (zsa->depth_func != PIPE_FUNC_NEVER);
+}
 
-   return util_writes_stencil(&zsa->stencil[0]) ||
+static inline bool
+util_writes_depth_stencil(const struct pipe_depth_stencil_alpha_state *zsa)
+{
+   return util_writes_depth(zsa) ||
+          util_writes_stencil(&zsa->stencil[0]) ||
           util_writes_stencil(&zsa->stencil[1]);
 }
 
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_linear.c b/lib/mesa/src/gallium/auxiliary/util/u_linear.c
deleted file mode 100644
index f1aef2167..000000000
--- a/lib/mesa/src/gallium/auxiliary/util/u_linear.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Functions for converting tiled data to linear and vice versa.
- */
-
-
-#include "util/u_debug.h"
-#include "u_linear.h"
-
-void
-pipe_linear_to_tile(size_t src_stride, const void *src_ptr,
-		    struct pipe_tile_info *t, void *dst_ptr)
-{
-   int x, y, z;
-   char *ptr;
-   size_t bytes = t->cols * t->block.size;
-   char *dst_ptr2 = (char *) dst_ptr;
-
-   assert(pipe_linear_check_tile(t));
-
-   /* lets write lineary to the tiled buffer */
-   for (y = 0; y < t->tiles_y; y++) {
-      for (x = 0; x < t->tiles_x; x++) {
-	 /* this inner loop could be replace with SSE magic */
-	 ptr = (char*)src_ptr + src_stride * t->rows * y + bytes * x;
-	 for (z = 0; z < t->rows; z++) {
-	    memcpy(dst_ptr2, ptr, bytes);
-	    dst_ptr2 += bytes;
-	    ptr += src_stride;
-	 }
-      }
-   }
-}
-
-void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr,
-			   size_t dst_stride, void *dst_ptr)
-{
-   int x, y, z;
-   char *ptr;
-   size_t bytes = t->cols * t->block.size;
-   const char *src_ptr2 = (const char *) src_ptr;
-
-   /* lets read lineary from the tiled buffer */
-   for (y = 0; y < t->tiles_y; y++) {
-      for (x = 0; x < t->tiles_x; x++) {
-	 /* this inner loop could be replace with SSE magic */
-	 ptr = (char*)dst_ptr + dst_stride * t->rows * y + bytes * x;
-	 for (z = 0; z < t->rows; z++) {
-	    memcpy(ptr, src_ptr2, bytes);
-	    src_ptr2 += bytes;
-	    ptr += dst_stride;
-	 }
-      }
-   }
-}
-
-void
-pipe_linear_fill_info(struct pipe_tile_info *t,
-		      const struct u_linear_format_block *block,
-		      unsigned tile_width, unsigned tile_height,
-		      unsigned tiles_x, unsigned tiles_y)
-{
-   t->block = *block;
-
-   t->tile.width = tile_width;
-   t->tile.height = tile_height;
-   t->cols = t->tile.width / t->block.width;
-   t->rows = t->tile.height / t->block.height;
-   t->tile.size = t->cols * t->rows * t->block.size;
-
-   t->tiles_x = tiles_x;
-   t->tiles_y = tiles_y;
-   t->stride = t->cols * t->tiles_x * t->block.size;
-   t->size = t->tiles_x * t->tiles_y * t->tile.size;
-}
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_linear.h b/lib/mesa/src/gallium/auxiliary/util/u_linear.h
deleted file mode 100644
index 87e52a344..000000000
--- a/lib/mesa/src/gallium/auxiliary/util/u_linear.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Functions for converting tiled data to linear and vice versa.
- */
-
-
-#ifndef U_LINEAR_H
-#define U_LINEAR_H
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_format.h"
-
-struct u_linear_format_block
-{
-   /** Block size in bytes */
-   unsigned size;
-   
-   /** Block width in pixels */
-   unsigned width;
-   
-   /** Block height in pixels */
-   unsigned height;
-};
-
-
-struct pipe_tile_info
-{
-   unsigned size;
-   unsigned stride;
-
-   /* The number of tiles */
-   unsigned tiles_x;
-   unsigned tiles_y;
-
-   /* size of each tile expressed in blocks */
-   unsigned cols;
-   unsigned rows;
-
-   /* Describe the tile in pixels */
-   struct u_linear_format_block tile;
-
-   /* Describe each block within the tile */
-   struct u_linear_format_block block;
-};
-
-void pipe_linear_to_tile(size_t src_stride, const void *src_ptr,
-			 struct pipe_tile_info *t, void  *dst_ptr);
-
-void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr,
-			   size_t dst_stride, void *dst_ptr);
-
-/**
- * Convenience function to fillout a pipe_tile_info struct.
- * @t info to fill out.
- * @block block info about pixel layout
- * @tile_width the width of the tile in pixels
- * @tile_height the height of the tile in pixels
- * @tiles_x number of tiles in x axis
- * @tiles_y number of tiles in y axis
- */
-void pipe_linear_fill_info(struct pipe_tile_info *t,
-			   const struct u_linear_format_block *block,
-			   unsigned tile_width, unsigned tile_height,
-			   unsigned tiles_x, unsigned tiles_y);
-
-static inline boolean pipe_linear_check_tile(const struct pipe_tile_info *t)
-{
-   if (t->tile.size != t->block.size * t->cols * t->rows)
-      return FALSE;
-
-   if (t->stride != t->block.size * t->cols * t->tiles_x)
-      return FALSE;
-
-   if (t->size < t->stride * t->rows * t->tiles_y)
-      return FALSE;
-
-   return TRUE;
-}
-
-#endif /* U_LINEAR_H */
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.c b/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.c
index 85ce9f174..82fca6788 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.c
@@ -90,84 +90,6 @@ util_translate_prim_restart_data(unsigned index_size,
    }
 }
 
-/**
- * Translate an index buffer for primitive restart.
- * Create a new index buffer which is a copy of the original index buffer
- * except that instances of 'restart_index' are converted to 0xffff or
- * 0xffffffff.
- * Also, index buffers using 1-byte indexes are converted to 2-byte indexes.
- */
-enum pipe_error
-util_translate_prim_restart_ib(struct pipe_context *context,
-                               const struct pipe_draw_info *info,
-                               const struct pipe_draw_indirect_info *indirect_info,
-                               const struct pipe_draw_start_count_bias *draw,
-                               struct pipe_resource **dst_buffer)
-{
-   struct pipe_screen *screen = context->screen;
-   struct pipe_transfer *src_transfer = NULL, *dst_transfer = NULL;
-   void *src_map = NULL, *dst_map = NULL;
-   const unsigned src_index_size = info->index_size;
-   unsigned dst_index_size;
-   DrawElementsIndirectCommand indirect;
-   unsigned count = draw->count;
-   unsigned start = draw->start;
-
-   /* 1-byte indexes are converted to 2-byte indexes, 4-byte stays 4-byte */
-   dst_index_size = MAX2(2, info->index_size);
-   assert(dst_index_size == 2 || dst_index_size == 4);
-
-   if (indirect_info && indirect_info->buffer) {
-      indirect = read_indirect_elements(context, indirect_info);
-      count = indirect.count;
-      start = indirect.firstIndex;
-   }
-
-   /* Create new index buffer */
-   *dst_buffer = pipe_buffer_create(screen, PIPE_BIND_INDEX_BUFFER,
-                                    PIPE_USAGE_STREAM,
-                                    count * dst_index_size);
-   if (!*dst_buffer)
-      goto error;
-
-   /* Map new / dest index buffer */
-   dst_map = pipe_buffer_map(context, *dst_buffer,
-                             PIPE_MAP_WRITE, &dst_transfer);
-   if (!dst_map)
-      goto error;
-
-   if (info->has_user_indices)
-      src_map = (unsigned char*)info->index.user + start * src_index_size;
-   else
-      /* Map original / src index buffer */
-      src_map = pipe_buffer_map_range(context, info->index.resource,
-                                      start * src_index_size,
-                                      count * src_index_size,
-                                      PIPE_MAP_READ,
-                                      &src_transfer);
-   if (!src_map)
-      goto error;
-
-   util_translate_prim_restart_data(src_index_size, src_map, dst_map,
-                                    count, info->restart_index);
-
-   if (src_transfer)
-      pipe_buffer_unmap(context, src_transfer);
-   pipe_buffer_unmap(context, dst_transfer);
-
-   return PIPE_OK;
-
-error:
-   if (src_transfer)
-      pipe_buffer_unmap(context, src_transfer);
-   if (dst_transfer)
-      pipe_buffer_unmap(context, dst_transfer);
-   if (*dst_buffer)
-      pipe_resource_reference(dst_buffer, NULL);
-   return PIPE_ERROR_OUT_OF_MEMORY;
-}
-
-
 /** Helper structs for util_draw_vbo_without_prim_restart() */
 
 struct range_info {
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.h b/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.h
index eb06b8e77..45038d468 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.h
+++ b/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.h
@@ -46,13 +46,6 @@ util_translate_prim_restart_data(unsigned index_size,
                                  void *src_map, void *dst_map,
                                  unsigned count, unsigned restart_index);
 
-enum pipe_error
-util_translate_prim_restart_ib(struct pipe_context *context,
-                               const struct pipe_draw_info *info,
-                               const struct pipe_draw_indirect_info *indirect,
-                               const struct pipe_draw_start_count_bias *draw,
-                               struct pipe_resource **dst_buffer);
-
 struct pipe_draw_start_count_bias *
 util_prim_restart_convert_to_direct(const void *index_map,
                                     const struct pipe_draw_info *info,
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_range.h b/lib/mesa/src/gallium/auxiliary/util/u_range.h
index 90dc80bbc..1ade98381 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_range.h
+++ b/lib/mesa/src/gallium/auxiliary/util/u_range.h
@@ -34,7 +34,7 @@
 #ifndef U_RANGE_H
 #define U_RANGE_H
 
-#include "os/os_thread.h"
+#include "util/u_thread.h"
 #include "pipe/p_state.h"
 #include "pipe/p_screen.h"
 #include "util/u_atomic.h"
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_surface.c b/lib/mesa/src/gallium/auxiliary/util/u_surface.c
index af406e826..cd51fd34b 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_surface.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_surface.c
@@ -783,9 +783,11 @@ util_can_blit_via_copy_region(const struct pipe_blit_info *blit,
    }
    else {
       /* do loose format compatibility checking */
-      if (blit->src.resource->format != blit->src.format ||
-          blit->dst.resource->format != blit->dst.format ||
-          !util_is_format_compatible(src_desc, dst_desc)) {
+      if ((blit->src.format != blit->dst.format ||
+           src_desc != dst_desc) &&
+          (blit->src.resource->format != blit->src.format ||
+           blit->dst.resource->format != blit->dst.format ||
+           !util_is_format_compatible(src_desc, dst_desc))) {
          return FALSE;
       }
    }
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_tests.c b/lib/mesa/src/gallium/auxiliary/util/u_tests.c
index aab3ca52c..bec15df2a 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_tests.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_tests.c
@@ -516,7 +516,7 @@ disabled_fragment_shader(struct pipe_context *ctx)
    util_report_result(qresult.u64 == 2);
 }
 
-#if defined(PIPE_OS_LINUX) && defined(HAVE_LIBDRM)
+#if DETECT_OS_LINUX && defined(HAVE_LIBDRM)
 #include <libsync.h>
 #else
 #define sync_merge(str, fd1, fd2) (-1)
@@ -594,7 +594,7 @@ test_sync_file_fences(struct pipe_context *ctx)
    pass = pass && screen->fence_finish(screen, NULL, final_fence, 0);
 
    /* Cleanup. */
-#ifndef PIPE_OS_WINDOWS
+#if !DETECT_OS_WINDOWS
    if (buf_fd >= 0)
       close(buf_fd);
    if (tex_fd >= 0)
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c
index 6b3929d89..828b8847b 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c
@@ -116,29 +116,70 @@ tc_clear_driver_thread(struct threaded_context *tc)
 #endif
 }
 
+struct tc_batch_rp_info {
+   /* this is what drivers can see */
+   struct tc_renderpass_info info;
+   /* determines whether the info can be "safely" read by drivers or if it may still be in use */
+   struct util_queue_fence ready;
+   /* when a batch is full, the rp info rollsover onto 'next' */
+   struct tc_batch_rp_info *next;
+   /* when rp info has rolled over onto this struct, 'prev' is used to update pointers for realloc */
+   struct tc_batch_rp_info *prev;
+};
+
+static struct tc_batch_rp_info *
+tc_batch_rp_info(struct tc_renderpass_info *info)
+{
+   return (struct tc_batch_rp_info *)info;
+}
+
+static void
+tc_sanitize_renderpass_info(struct threaded_context *tc)
+{
+   tc->renderpass_info_recording->cbuf_invalidate = 0;
+   tc->renderpass_info_recording->zsbuf_invalidate = false;
+   tc->renderpass_info_recording->cbuf_load |= (~tc->renderpass_info_recording->cbuf_clear) & BITFIELD_MASK(PIPE_MAX_COLOR_BUFS);
+   if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] && !tc_renderpass_info_is_zsbuf_used(tc->renderpass_info_recording))
+      /* this should be a "safe" way to indicate to the driver that both loads and stores are required;
+      * driver can always detect invalidation
+      */
+      tc->renderpass_info_recording->zsbuf_clear_partial = true;
+   if (tc->num_queries_active)
+      tc->renderpass_info_recording->has_query_ends = true;
+}
+
 /* ensure the batch's array of renderpass data is large enough for the current index */
 static void
-tc_batch_renderpass_infos_resize(struct tc_batch *batch)
+tc_batch_renderpass_infos_resize(struct threaded_context *tc, struct tc_batch *batch)
 {
    unsigned size = batch->renderpass_infos.capacity;
-   unsigned cur_num = batch->renderpass_info_idx;
+   unsigned cur_num = MAX2(batch->renderpass_info_idx, 0);
 
-   if (size / sizeof(struct tc_renderpass_info) > cur_num)
+   if (size / sizeof(struct tc_batch_rp_info) > cur_num)
       return;
 
-   if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_renderpass_info, cur_num + 10))
+   struct tc_batch_rp_info *infos = batch->renderpass_infos.data;
+   unsigned old_idx = batch->renderpass_info_idx - 1;
+   bool redo = tc->renderpass_info_recording &&
+               tc->renderpass_info_recording == &infos[old_idx].info;
+   if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_batch_rp_info, cur_num + 10))
       mesa_loge("tc: memory alloc fail!");
 
    if (size != batch->renderpass_infos.capacity) {
       /* zero new allocation region */
       uint8_t *data = batch->renderpass_infos.data;
       memset(data + size, 0, batch->renderpass_infos.capacity - size);
-      unsigned start = size / sizeof(struct tc_renderpass_info);
+      unsigned start = size / sizeof(struct tc_batch_rp_info);
       unsigned count = (batch->renderpass_infos.capacity - size) /
-                       sizeof(struct tc_renderpass_info);
-      struct tc_renderpass_info *infos = batch->renderpass_infos.data;
+                       sizeof(struct tc_batch_rp_info);
+      infos = batch->renderpass_infos.data;
+      if (infos->prev)
+         infos->prev->next = infos;
       for (unsigned i = 0; i < count; i++)
          util_queue_fence_init(&infos[start + i].ready);
+      /* re-set current recording info on resize */
+      if (redo)
+         tc->renderpass_info_recording = &infos[old_idx].info;
    }
 }
 
@@ -147,43 +188,75 @@ static void
 tc_signal_renderpass_info_ready(struct threaded_context *tc)
 {
    if (tc->renderpass_info_recording &&
-       !util_queue_fence_is_signalled(&tc->renderpass_info_recording->ready))
-      util_queue_fence_signal(&tc->renderpass_info_recording->ready);
+       !util_queue_fence_is_signalled(&tc_batch_rp_info(tc->renderpass_info_recording)->ready))
+      util_queue_fence_signal(&tc_batch_rp_info(tc->renderpass_info_recording)->ready);
 }
 
 /* increment the current renderpass info struct for recording
  * 'full_copy' is used for preserving data across non-blocking tc batch flushes
  */
 static void
-tc_batch_increment_renderpass_info(struct threaded_context *tc, bool full_copy)
+tc_batch_increment_renderpass_info(struct threaded_context *tc, unsigned batch_idx, bool full_copy)
 {
-   struct tc_batch *batch = &tc->batch_slots[tc->next];
-   struct tc_renderpass_info *tc_info = batch->renderpass_infos.data;
+   struct tc_batch *batch = &tc->batch_slots[batch_idx];
+   struct tc_batch_rp_info *tc_info = batch->renderpass_infos.data;
 
-   /* signal existing info since it will not be used anymore */
-   tc_signal_renderpass_info_ready(tc);
+   if (tc_info[0].next || batch->num_total_slots) {
+      /* deadlock condition detected: all batches are in flight, renderpass hasn't ended
+       * (probably a cts case)
+       */
+      struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info_recording);
+      if (!util_queue_fence_is_signalled(&info->ready)) {
+         /* this batch is actively executing and the driver is waiting on the recording fence to signal */
+         /* force all buffer usage to avoid data loss */
+         info->info.cbuf_load = ~(BITFIELD_MASK(8) & info->info.cbuf_clear);
+         info->info.zsbuf_clear_partial = true;
+         info->info.has_query_ends = tc->num_queries_active > 0;
+         /* ensure threaded_context_get_renderpass_info() won't deadlock */
+         info->next = NULL;
+         util_queue_fence_signal(&info->ready);
+      }
+      /* always wait on the batch to finish since this will otherwise overwrite thread data */
+      util_queue_fence_wait(&batch->fence);
+   }
    /* increment rp info and initialize it */
    batch->renderpass_info_idx++;
-   tc_batch_renderpass_infos_resize(batch);
+   tc_batch_renderpass_infos_resize(tc, batch);
    tc_info = batch->renderpass_infos.data;
 
    if (full_copy) {
+      /* this should only be called when changing batches */
+      assert(batch->renderpass_info_idx == 0);
       /* copy the previous data in its entirety: this is still the same renderpass */
-      if (tc->renderpass_info_recording)
-         tc_info[batch->renderpass_info_idx].data = tc->renderpass_info_recording->data;
-      else
-         tc_info[batch->renderpass_info_idx].data = 0;
+      if (tc->renderpass_info_recording) {
+         tc_info[batch->renderpass_info_idx].info.data = tc->renderpass_info_recording->data;
+         tc_batch_rp_info(tc->renderpass_info_recording)->next = &tc_info[batch->renderpass_info_idx];
+         tc_info[batch->renderpass_info_idx].prev = tc_batch_rp_info(tc->renderpass_info_recording);
+         /* guard against deadlock scenario */
+         assert(&tc_batch_rp_info(tc->renderpass_info_recording)->next->info != tc->renderpass_info_recording);
+      } else {
+         tc_info[batch->renderpass_info_idx].info.data = 0;
+         tc_info[batch->renderpass_info_idx].prev = NULL;
+      }
    } else {
       /* selectively copy: only the CSO metadata is copied, and a new framebuffer state will be added later */
-      tc_info[batch->renderpass_info_idx].data = 0;
-      if (tc->renderpass_info_recording)
-         tc_info[batch->renderpass_info_idx].data16[2] = tc->renderpass_info_recording->data16[2];
+      tc_info[batch->renderpass_info_idx].info.data = 0;
+      if (tc->renderpass_info_recording) {
+         tc_info[batch->renderpass_info_idx].info.data16[2] = tc->renderpass_info_recording->data16[2];
+         tc_batch_rp_info(tc->renderpass_info_recording)->next = NULL;
+         tc_info[batch->renderpass_info_idx].prev = NULL;
+      }
    }
 
+   assert(!full_copy || !tc->renderpass_info_recording || tc_batch_rp_info(tc->renderpass_info_recording)->next);
+   /* signal existing info since it will not be used anymore */
+   tc_signal_renderpass_info_ready(tc);
    util_queue_fence_reset(&tc_info[batch->renderpass_info_idx].ready);
-   assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx]);
+   /* guard against deadlock scenario */
+   assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx].info);
    /* this is now the current recording renderpass info */
-   tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx];
+   tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx].info;
+   batch->max_renderpass_info_idx = batch->renderpass_info_idx;
 }
 
 static ALWAYS_INLINE struct tc_renderpass_info *
@@ -207,10 +280,12 @@ tc_parse_draw(struct threaded_context *tc)
       info->cbuf_invalidate = 0;
       info->zsbuf_invalidate = false;
       info->has_draw = true;
+      info->has_query_ends |= tc->query_ended;
    }
 
    tc->in_renderpass = true;
    tc->seen_fb_state = true;
+   tc->query_ended = false;
 }
 
 static void *
@@ -291,6 +366,13 @@ tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
 #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
    offsetof(struct pipe_draw_info, min_index)
 
+ALWAYS_INLINE static struct tc_renderpass_info *
+incr_rp_info(struct tc_renderpass_info *tc_info)
+{
+   struct tc_batch_rp_info *info = tc_batch_rp_info(tc_info);
+   return &info[1].info;
+}
+
 ALWAYS_INLINE static void
 batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, bool parsing)
 {
@@ -314,7 +396,7 @@ batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last,
       if (parsing) {
          if (call->call_id == TC_CALL_flush) {
             /* always increment renderpass info for non-deferred flushes */
-            batch->tc->renderpass_info++;
+            batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
             /* if a flush happens, renderpass info is always incremented after */
             first = false;
          } else if (call->call_id == TC_CALL_set_framebuffer_state) {
@@ -322,7 +404,7 @@ batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last,
              * so don't increment on the first set_framebuffer_state call
              */
             if (!first)
-               batch->tc->renderpass_info++;
+               batch->tc->renderpass_info = incr_rp_info(batch->tc->renderpass_info);
             first = false;
          } else if (call->call_id >= TC_CALL_draw_single &&
                     call->call_id <= TC_CALL_draw_vstate_multi) {
@@ -350,10 +432,18 @@ tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
    /* setup renderpass info */
    batch->tc->renderpass_info = batch->renderpass_infos.data;
 
-   if (batch->tc->options.parse_renderpass_info)
+   if (batch->tc->options.parse_renderpass_info) {
       batch_execute(batch, pipe, last, true);
-   else
+
+      struct tc_batch_rp_info *info = batch->renderpass_infos.data;
+      for (unsigned i = 0; i < batch->max_renderpass_info_idx + 1; i++) {
+         if (info[i].next)
+            info[i].next->prev = NULL;
+         info[i].next = NULL;
+      }
+   } else {
       batch_execute(batch, pipe, last, false);
+   }
 
    /* Add the fence to the list of fences for the driver to signal at the next
     * flush, which we use for tracking which buffers are referenced by
@@ -383,6 +473,7 @@ tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
    batch->num_total_slots = 0;
    batch->last_mergeable_call = NULL;
    batch->first_set_fb = false;
+   batch->max_renderpass_info_idx = 0;
 }
 
 static void
@@ -406,6 +497,7 @@ static void
 tc_batch_flush(struct threaded_context *tc, bool full_copy)
 {
    struct tc_batch *next = &tc->batch_slots[tc->next];
+   unsigned next_id = (tc->next + 1) % TC_MAX_BATCHES;
 
    tc_assert(next->num_total_slots != 0);
    tc_batch_check(next);
@@ -420,19 +512,20 @@ tc_batch_flush(struct threaded_context *tc, bool full_copy)
    /* reset renderpass info index for subsequent use */
    next->renderpass_info_idx = -1;
 
-   util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
-                      NULL, 0);
-   tc->last = tc->next;
-   tc->next = (tc->next + 1) % TC_MAX_BATCHES;
-   tc_begin_next_buffer_list(tc);
-
    /* always increment renderpass info on batch flush;
     * renderpass info can only be accessed by its owner batch during execution
     */
    if (tc->renderpass_info_recording) {
-      tc->batch_slots[tc->next].first_set_fb = full_copy;
-      tc_batch_increment_renderpass_info(tc, full_copy);
+      tc->batch_slots[next_id].first_set_fb = full_copy;
+      tc_batch_increment_renderpass_info(tc, next_id, full_copy);
    }
+
+   util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
+                      NULL, 0);
+   tc->last = tc->next;
+   tc->next = next_id;
+   tc_begin_next_buffer_list(tc);
+
 }
 
 /* This is the function that adds variable-sized calls into the current
@@ -553,6 +646,18 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char
 
    tc_debug_check(tc);
 
+   if (tc->options.parse_renderpass_info && tc->in_renderpass && !tc->flushing) {
+      /* corner case: if tc syncs for any reason but a driver flush during a renderpass,
+       * then the current renderpass info MUST be signaled to avoid deadlocking the driver
+       *
+       * this is not a "complete" signal operation, however, as it's unknown what calls may
+       * come after this one, which means that framebuffer attachment data is unreliable
+       * 
+       * to avoid erroneously passing bad state to the driver (e.g., allowing zsbuf elimination),
+       * force all attachments active and assume the app was going to get bad perf here anyway
+       */
+      tc_sanitize_renderpass_info(tc);
+   }
    tc_signal_renderpass_info_ready(tc);
 
    /* Only wait for queued calls... */
@@ -590,12 +695,18 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char
    if (tc->options.parse_renderpass_info) {
       int renderpass_info_idx = next->renderpass_info_idx;
       if (renderpass_info_idx > 0) {
+         /* don't reset if fb state is unflushed */
+         bool fb_no_draw = tc->seen_fb_state && !tc->renderpass_info_recording->has_draw;
+         uint32_t fb_info = tc->renderpass_info_recording->data32[0];
          next->renderpass_info_idx = -1;
-         tc_batch_increment_renderpass_info(tc, false);
+         tc_batch_increment_renderpass_info(tc, tc->next, false);
+         if (fb_no_draw)
+            tc->renderpass_info_recording->data32[0] = fb_info;
       } else if (tc->renderpass_info_recording->has_draw) {
          tc->renderpass_info_recording->data32[0] = 0;
       }
       tc->seen_fb_state = false;
+      tc->query_ended = false;
    }
 
    MESA_TRACE_END();
@@ -632,40 +743,10 @@ threaded_context_flush(struct pipe_context *_pipe,
    }
 }
 
-/* Must be called before TC binds, maps, invalidates, or adds a buffer to a buffer list. */
-static void tc_touch_buffer(struct threaded_context *tc, struct threaded_resource *buf)
-{
-   const struct threaded_context *first_user = buf->first_user;
-
-   /* Fast path exit to avoid additional branches */
-   if (likely(first_user == tc))
-      return;
-
-   if (!first_user)
-      first_user = p_atomic_cmpxchg_ptr(&buf->first_user, NULL, tc);
-
-   /* The NULL check might seem unnecessary here but it's actually critical:
-    * p_atomic_cmpxchg will return NULL if it succeeds, meaning that NULL is
-    * equivalent to "we're the first user" here. (It's equally important not
-    * to ignore the result of the cmpxchg above, since it might fail.)
-    * Without the NULL check, we'd set the flag unconditionally, which is bad.
-    */
-   if (first_user && first_user != tc && !buf->used_by_multiple_contexts)
-      buf->used_by_multiple_contexts = true;
-}
-
-static bool tc_is_buffer_shared(struct threaded_resource *buf)
-{
-   return buf->is_shared || buf->used_by_multiple_contexts;
-}
-
 static void
 tc_add_to_buffer_list(struct threaded_context *tc, struct tc_buffer_list *next, struct pipe_resource *buf)
 {
-   struct threaded_resource *tbuf = threaded_resource(buf);
-   tc_touch_buffer(tc, tbuf);
-
-   uint32_t id = tbuf->buffer_id_unique;
+   uint32_t id = threaded_resource(buf)->buffer_id_unique;
    BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
 }
 
@@ -673,10 +754,7 @@ tc_add_to_buffer_list(struct threaded_context *tc, struct tc_buffer_list *next,
 static void
 tc_bind_buffer(struct threaded_context *tc, uint32_t *binding, struct tc_buffer_list *next, struct pipe_resource *buf)
 {
-   struct threaded_resource *tbuf = threaded_resource(buf);
-   tc_touch_buffer(tc, tbuf);
-
-   uint32_t id = tbuf->buffer_id_unique;
+   uint32_t id = threaded_resource(buf)->buffer_id_unique;
    *binding = id;
    BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
 }
@@ -934,8 +1012,6 @@ threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage)
 {
    struct threaded_resource *tres = threaded_resource(res);
 
-   tres->first_user = NULL;
-   tres->used_by_multiple_contexts = false;
    tres->latest = &tres->b;
    tres->cpu_storage = NULL;
    util_range_init(&tres->valid_buffer_range);
@@ -1084,6 +1160,7 @@ static bool
 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
 {
    struct threaded_context *tc = threaded_context(_pipe);
+   tc->num_queries_active++;
 
    tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
    return true; /* we don't care about the return value for this call */
@@ -1115,11 +1192,13 @@ tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
    struct threaded_query *tq = threaded_query(query);
    struct tc_end_query_call *call =
       tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
+   tc->num_queries_active--;
 
    call->tc = tc;
    call->query = query;
 
    tq->flushed = false;
+   tc->query_ended = true;
 
    return true; /* we don't care about the return value for this call */
 }
@@ -1363,6 +1442,7 @@ tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call, uint64_t *l
    for (unsigned i = 0; i < nr_cbufs; i++)
       tc_drop_surface_reference(p->cbufs[i]);
    tc_drop_surface_reference(p->zsbuf);
+   tc_drop_resource_reference(p->resolve);
    return call_size(tc_framebuffer);
 }
 
@@ -1383,6 +1463,13 @@ tc_set_framebuffer_state(struct pipe_context *_pipe,
 
 
    if (tc->options.parse_renderpass_info) {
+      /* ensure this is treated as the first fb set if no fb activity has occurred */
+      if (!tc->renderpass_info_recording->has_draw &&
+          !tc->renderpass_info_recording->cbuf_clear &&
+          !tc->renderpass_info_recording->cbuf_load &&
+          !tc->renderpass_info_recording->zsbuf_load &&
+          !tc->renderpass_info_recording->zsbuf_clear_partial)
+         tc->batch_slots[tc->next].first_set_fb = false;
       /* store existing zsbuf data for possible persistence */
       uint8_t zsbuf = tc->renderpass_info_recording->has_draw ?
                       0 :
@@ -1400,9 +1487,10 @@ tc_set_framebuffer_state(struct pipe_context *_pipe,
              sizeof(void*) * (PIPE_MAX_COLOR_BUFS - nr_cbufs));
 
       tc->fb_resources[PIPE_MAX_COLOR_BUFS] = fb->zsbuf ? fb->zsbuf->texture : NULL;
+      tc->fb_resolve = fb->resolve;
       if (tc->seen_fb_state) {
          /* this is the end of a renderpass, so increment the renderpass info */
-         tc_batch_increment_renderpass_info(tc, false);
+         tc_batch_increment_renderpass_info(tc, tc->next, false);
          /* if zsbuf hasn't changed (i.e., possibly just adding a color buffer):
           * keep zsbuf usage data
           */
@@ -1425,6 +1513,8 @@ tc_set_framebuffer_state(struct pipe_context *_pipe,
    tc->in_renderpass = false;
    p->state.zsbuf = NULL;
    pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
+   p->state.resolve = NULL;
+   pipe_resource_reference(&p->state.resolve, fb->resolve);
 }
 
 struct tc_tess_state {
@@ -2323,9 +2413,7 @@ tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call, uint64_t *
    return call_size(tc_replace_buffer_storage);
 }
 
-/* Return true if the buffer has been invalidated or is idle.
- * Note that callers must've called tc_touch_buffer before calling
- * this function. */
+/* Return true if the buffer has been invalidated or is idle. */
 static bool
 tc_invalidate_buffer(struct threaded_context *tc,
                      struct threaded_resource *tbuf)
@@ -2346,7 +2434,7 @@ tc_invalidate_buffer(struct threaded_context *tc,
    struct pipe_resource *new_buf;
 
    /* Shared, pinned, and sparse buffers can't be reallocated. */
-   if (tc_is_buffer_shared(tbuf) ||
+   if (tbuf->is_shared ||
        tbuf->is_user_ptr ||
        tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE))
       return false;
@@ -2391,8 +2479,6 @@ tc_invalidate_buffer(struct threaded_context *tc,
    return true;
 }
 
-/* Note that callers must've called tc_touch_buffer first before
- * calling tc_improve_map_buffer_flags. */
 static unsigned
 tc_improve_map_buffer_flags(struct threaded_context *tc,
                             struct threaded_resource *tres, unsigned usage,
@@ -2507,14 +2593,6 @@ tc_buffer_map(struct pipe_context *_pipe,
    if (usage & PIPE_MAP_THREAD_SAFE)
       tc_buffer_disable_cpu_storage(resource);
 
-   tc_touch_buffer(tc, tres);
-
-   /* CPU storage relies on buffer invalidation never failing. With shared buffers,
-    * invalidation might not always be possible, so CPU storage can't be used.
-    */
-   if (tc_is_buffer_shared(tres))
-      tc_buffer_disable_cpu_storage(resource);
-
    usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
 
    /* If the CPU storage is enabled, return it directly. */
@@ -2817,10 +2895,7 @@ tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
       assert(tres->cpu_storage);
 
       if (tres->cpu_storage) {
-         /* Invalidations shouldn't fail as long as CPU storage is allowed. */
-         ASSERTED bool invalidated = tc_invalidate_buffer(tc, tres);
-         assert(invalidated);
-
+         tc_invalidate_buffer(tc, tres);
          tc_buffer_subdata(&tc->base, &tres->b,
                            PIPE_MAP_UNSYNCHRONIZED |
                            TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE,
@@ -2948,8 +3023,6 @@ tc_buffer_subdata(struct pipe_context *_pipe,
    if (!size)
       return;
 
-   tc_touch_buffer(tc, tres);
-
    usage |= PIPE_MAP_WRITE;
 
    /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
@@ -3084,11 +3157,68 @@ tc_texture_subdata(struct pipe_context *_pipe,
    } else {
       struct pipe_context *pipe = tc->pipe;
 
-      tc_sync(tc);
-      tc_set_driver_thread(tc);
-      pipe->texture_subdata(pipe, resource, level, usage, box, data,
-                            stride, layer_stride);
-      tc_clear_driver_thread(tc);
+      if (resource->usage != PIPE_USAGE_STAGING &&
+          tc->options.parse_renderpass_info && tc->in_renderpass) {
+         enum pipe_format format = resource->format;
+         if (usage & PIPE_MAP_DEPTH_ONLY)
+            format = util_format_get_depth_only(format);
+         else if (usage & PIPE_MAP_STENCIL_ONLY)
+            format = PIPE_FORMAT_S8_UINT;
+         unsigned fmt_stride = util_format_get_stride(format, box->width);
+         unsigned fmt_layer_stride = util_format_get_2d_size(format, stride, box->height);
+
+         struct pipe_resource *pres = pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STREAM, layer_stride * box->depth);
+         pipe->buffer_subdata(pipe, pres, PIPE_MAP_WRITE | TC_TRANSFER_MAP_THREADED_UNSYNC, 0, layer_stride * box->depth, data);
+         struct pipe_box src_box = *box;
+         src_box.x = src_box.y = src_box.z = 0;
+
+         if (fmt_stride == stride && fmt_layer_stride == layer_stride) {
+            /* if stride matches, single copy is fine*/
+            tc->base.resource_copy_region(&tc->base, resource, level, box->x, box->y, box->z, pres, 0, &src_box);
+         } else {
+            /* if stride doesn't match, inline util_copy_box on the GPU and assume the driver will optimize */
+            src_box.depth = 1;
+            for (unsigned z = 0; z < box->depth; ++z, src_box.x = z * layer_stride) {
+               unsigned dst_x = box->x, dst_y = box->y, width = box->width, height = box->height, dst_z = box->z + z;
+               int blocksize = util_format_get_blocksize(format);
+               int blockwidth = util_format_get_blockwidth(format);
+               int blockheight = util_format_get_blockheight(format);
+
+               assert(blocksize > 0);
+               assert(blockwidth > 0);
+               assert(blockheight > 0);
+
+               dst_x /= blockwidth;
+               dst_y /= blockheight;
+               width = DIV_ROUND_UP(width, blockwidth);
+               height = DIV_ROUND_UP(height, blockheight);
+
+               width *= blocksize;
+
+               if (width == fmt_stride && width == (unsigned)stride) {
+                  ASSERTED uint64_t size = (uint64_t)height * width;
+
+                  assert(size <= SIZE_MAX);
+                  assert(dst_x + src_box.width < u_minify(pres->width0, level));
+                  assert(dst_y + src_box.height < u_minify(pres->height0, level));
+                  assert(pres->target != PIPE_TEXTURE_3D ||  z + src_box.depth < u_minify(pres->depth0, level));
+                  tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
+               } else {
+                  src_box.height = 1;
+                  for (unsigned i = 0; i < height; i++, dst_y++, src_box.x += stride)
+                     tc->base.resource_copy_region(&tc->base, resource, level, dst_x, dst_y, dst_z, pres, 0, &src_box);
+               }
+            }
+         }
+
+         pipe_resource_reference(&pres, NULL);
+      } else {
+         tc_sync(tc);
+         tc_set_driver_thread(tc);
+         pipe->texture_subdata(pipe, resource, level, usage, box, data,
+                              stride, layer_stride);
+         tc_clear_driver_thread(tc);
+      }
    }
 }
 
@@ -3117,7 +3247,6 @@ tc_get_sample_position(struct pipe_context *_pipe,
    struct threaded_context *tc = threaded_context(_pipe);
    struct pipe_context *pipe = tc->pipe;
 
-   tc_sync(tc);
    pipe->get_sample_position(pipe, sample_count, sample_index,
                              out_value);
 }
@@ -3404,8 +3533,10 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
    struct pipe_context *pipe = tc->pipe;
    struct pipe_screen *screen = pipe->screen;
    bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
+   bool deferred = (flags & PIPE_FLUSH_DEFERRED) > 0;
 
-   tc->in_renderpass = false;
+   if (!deferred || !fence)
+      tc->in_renderpass = false;
 
    if (async && tc->options.create_fence) {
       if (fence) {
@@ -3427,7 +3558,7 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
       }
 
       struct tc_flush_call *p;
-      if (flags & PIPE_FLUSH_DEFERRED) {
+      if (deferred) {
          /* these have identical fields */
          p = (struct tc_flush_call *)tc_add_call(tc, TC_CALL_flush_deferred, tc_flush_deferred_call);
       } else {
@@ -3437,7 +3568,7 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
       p->fence = fence ? *fence : NULL;
       p->flags = flags | TC_FLUSH_ASYNC;
 
-      if (!(flags & PIPE_FLUSH_DEFERRED)) {
+      if (!deferred) {
          /* non-deferred async flushes indicate completion of existing renderpass info */
          tc_signal_renderpass_info_ready(tc);
          tc_batch_flush(tc, false);
@@ -3448,17 +3579,20 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
    }
 
 out_of_memory:
+   tc->flushing = true;
    /* renderpass info is signaled during sync */
    tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
                    flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
 
-   if (!(flags & PIPE_FLUSH_DEFERRED)) {
+   if (!deferred) {
       tc_flush_queries(tc);
       tc->seen_fb_state = false;
+      tc->query_ended = false;
    }
    tc_set_driver_thread(tc);
    pipe->flush(pipe, fence, flags);
    tc_clear_driver_thread(tc);
+   tc->flushing = false;
 }
 
 struct tc_draw_single {
@@ -3670,7 +3804,8 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
    struct threaded_context *tc = threaded_context(_pipe);
    unsigned index_size = info->index_size;
    bool has_user_indices = info->has_user_indices;
-   tc_parse_draw(tc);
+   if (tc->options.parse_renderpass_info)
+      tc_parse_draw(tc);
 
    if (unlikely(indirect)) {
       assert(!has_user_indices);
@@ -3990,7 +4125,8 @@ tc_draw_vertex_state(struct pipe_context *_pipe,
                      unsigned num_draws)
 {
    struct threaded_context *tc = threaded_context(_pipe);
-   tc_parse_draw(tc);
+   if (tc->options.parse_renderpass_info)
+      tc_parse_draw(tc);
 
    if (num_draws == 1) {
       /* Single draw. */
@@ -4171,6 +4307,11 @@ tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
    tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
    tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
    memcpy(&blit->info, info, sizeof(*info));
+   if (tc->options.parse_renderpass_info) {
+      tc->renderpass_info_recording->has_resolve = info->src.resource->nr_samples > 1 &&
+                                                   info->dst.resource->nr_samples <= 1 &&
+                                                   tc->fb_resolve == info->dst.resource;
+   }
 }
 
 struct tc_generate_mipmap {
@@ -4275,10 +4416,7 @@ tc_invalidate_resource(struct pipe_context *_pipe,
    struct threaded_context *tc = threaded_context(_pipe);
 
    if (resource->target == PIPE_BUFFER) {
-      /* This can fail, in which case we simply ignore the invalidation request. */
-      struct threaded_resource *tbuf = threaded_resource(resource);
-      tc_touch_buffer(tc, tbuf);
-      tc_invalidate_buffer(tc, tbuf);
+      tc_invalidate_buffer(tc, threaded_resource(resource));
       return;
    }
 
@@ -4340,8 +4478,13 @@ tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor
       if (info) {
          /* full clears use a different load operation, but are only valid if draws haven't occurred yet */
          info->cbuf_clear |= (buffers >> 2) & ~info->cbuf_load;
-         if (buffers & PIPE_CLEAR_DEPTHSTENCIL && !info->zsbuf_load && !info->zsbuf_clear_partial)
-            info->zsbuf_clear = true;
+         if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
+            if (!info->zsbuf_load && !info->zsbuf_clear_partial)
+               info->zsbuf_clear = true;
+            else if (!info->zsbuf_clear)
+               /* this is a clear that occurred after a draw: flag as partial to ensure it isn't ignored */
+               info->zsbuf_clear_partial = true;
+         }
       }
    }
    p->scissor_state_set = !!scissor_state;
@@ -4812,8 +4955,11 @@ threaded_context_create(struct pipe_context *pipe,
       return NULL;
    }
 
-   if (options)
+   if (options) {
+      /* this is unimplementable */
+      assert(!(options->parse_renderpass_info && options->driver_calls_flush_notify));
       tc->options = *options;
+   }
 
    pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
 
@@ -4858,7 +5004,7 @@ threaded_context_create(struct pipe_context *pipe,
       tc->batch_slots[i].renderpass_info_idx = -1;
       if (tc->options.parse_renderpass_info) {
          util_dynarray_init(&tc->batch_slots[i].renderpass_infos, NULL);
-         tc_batch_renderpass_infos_resize(&tc->batch_slots[i]);
+         tc_batch_renderpass_infos_resize(tc, &tc->batch_slots[i]);
       }
    }
    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
@@ -5022,7 +5168,7 @@ threaded_context_create(struct pipe_context *pipe,
 
    tc_begin_next_buffer_list(tc);
    if (tc->options.parse_renderpass_info)
-      tc_batch_increment_renderpass_info(tc, false);
+      tc_batch_increment_renderpass_info(tc, tc->next, false);
    return &tc->base;
 
 fail:
@@ -5042,9 +5188,14 @@ threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned d
 }
 
 const struct tc_renderpass_info *
-threaded_context_get_renderpass_info(struct threaded_context *tc, bool wait)
-{
-   if (tc->renderpass_info && wait)
-      util_queue_fence_wait(&tc->renderpass_info->ready);
-   return tc->renderpass_info;
-}
-\ No newline at end of file
+threaded_context_get_renderpass_info(struct threaded_context *tc)
+{
+   assert(tc->renderpass_info && tc->options.parse_renderpass_info);
+   struct tc_batch_rp_info *info = tc_batch_rp_info(tc->renderpass_info);
+   while (1) {
+      util_queue_fence_wait(&info->ready);
+      if (!info->next)
+         return &info->info;
+      info = info->next;
+   }
+}
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h
index e87b0061e..dbc5d6962 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h
@@ -78,6 +78,7 @@
  * - transfer_map (only unsychronized buffer mappings)
  * - get_query_result (when threaded_query::flushed == true)
  * - create_stream_output_target
+ * - get_sample_position
  *
  *
  * Transfer_map rules for buffer mappings
@@ -316,17 +317,6 @@ typedef bool (*tc_is_resource_busy)(struct pipe_screen *screen,
 struct threaded_resource {
    struct pipe_resource b;
 
-   /* Pointer to the TC that first used this threaded_resource (buffer). This is used to
-    * allow TCs to determine whether they have been given a buffer that was created by a
-    * different TC, in which case all TCs have to disable busyness tracking and buffer
-    * replacement for that particular buffer.
-    * DO NOT DEREFERENCE. The only operation allowed on this pointer is equality-checking
-    * since it might be dangling if a buffer has been shared and its first_user has
-    * already been destroyed. The pointer is const void to discourage such disallowed usage.
-    * This is NULL if no TC has used this buffer yet.
-    */
-   const void *first_user;
-
    /* Since buffer invalidations are queued, we can't use the base resource
     * for unsychronized mappings. This points to the latest version of
     * the buffer after the latest invalidation. It's only used for unsychro-
@@ -354,12 +344,6 @@ struct threaded_resource {
     */
    struct util_range valid_buffer_range;
 
-   /* True if multiple threaded contexts have accessed this buffer.
-    * Disables non-multicontext-safe optimizations in TC.
-    * We can't just re-use is_shared for that purpose as that would confuse drivers.
-    */
-   bool used_by_multiple_contexts;
-
    /* Drivers are required to update this for shared resources and user
     * pointers. */
    bool is_shared;
@@ -444,7 +428,11 @@ struct tc_renderpass_info {
          bool zsbuf_invalidate : 1;
          /* whether a draw occurs */
          bool has_draw : 1;
-         uint8_t pad : 3;
+         /* whether a framebuffer resolve occurs on cbuf[0] */
+         bool has_resolve : 1;
+         /* whether queries are ended during this renderpass */
+         bool has_query_ends : 1;
+         uint8_t pad : 1;
          /* 32 bits offset */
          /* bitmask of color buffers using fbfetch */
          uint8_t cbuf_fbfetch;
@@ -467,8 +455,6 @@ struct tc_renderpass_info {
       /* zsbuf fb info is in data8[3] */
       uint8_t data8[8];
    };
-   /* determines whether the info can be "safely" read by drivers or if it may still be in use */
-   struct util_queue_fence ready;
 };
 
 static inline bool
@@ -482,6 +468,23 @@ tc_renderpass_info_is_zsbuf_used(const struct tc_renderpass_info *info)
           info->zsbuf_fbfetch;
 }
 
+/* if a driver ends a renderpass early for some reason,
+ * this function can be called to reset any stored renderpass info
+ * to a "safe" state that will avoid data loss on framebuffer attachments
+ * 
+ * note: ending a renderpass early if invalidate hints are applied will
+ * result in data loss
+ */
+static inline void
+tc_renderpass_info_reset(struct tc_renderpass_info *info)
+{
+   info->data32[0] = 0;
+   info->cbuf_load = BITFIELD_MASK(8);
+   info->zsbuf_clear_partial = true;
+   info->has_draw = true;
+   info->has_query_ends = true;
+}
+
 struct tc_batch {
    struct threaded_context *tc;
 #if !defined(NDEBUG) && TC_DEBUG >= 1
@@ -490,7 +493,8 @@ struct tc_batch {
    uint16_t num_total_slots;
    uint16_t buffer_list_index;
    /* the index of the current renderpass info for recording */
-   int renderpass_info_idx;
+   int16_t renderpass_info_idx;
+   uint16_t max_renderpass_info_idx;
 
    /* The last mergeable call that was added to this batch (i.e.
     * buffer subdata). This might be out-of-date or NULL.
@@ -559,6 +563,7 @@ struct threaded_context {
    bool use_forced_staging_uploads;
    bool add_all_gfx_bindings_to_buffer_list;
    bool add_all_compute_bindings_to_buffer_list;
+   uint8_t num_queries_active;
 
    /* Estimation of how much vram/gtt bytes are mmap'd in
     * the current tc_batch.
@@ -585,6 +590,10 @@ struct threaded_context {
    bool seen_fb_state;
    /* whether a renderpass is currently active */
    bool in_renderpass;
+   /* whether a query has ended more recently than a draw */
+   bool query_ended;
+   /* whether pipe_context::flush has been called */
+   bool flushing;
 
    bool seen_streamout_buffers;
    bool seen_shader_buffers[PIPE_SHADER_TYPES];
@@ -619,8 +628,9 @@ struct threaded_context {
 
    struct tc_batch batch_slots[TC_MAX_BATCHES];
    struct tc_buffer_list buffer_lists[TC_MAX_BUFFER_LISTS];
-   /* the curent framebuffer attachments; [PIPE_MAX_COLOR_BUFS] is the zsbuf */
+   /* the current framebuffer attachments; [PIPE_MAX_COLOR_BUFS] is the zsbuf */
    struct pipe_resource *fb_resources[PIPE_MAX_COLOR_BUFS + 1];
+   struct pipe_resource *fb_resolve;
    /* accessed by main thread; preserves info across batches */
    struct tc_renderpass_info *renderpass_info_recording;
    /* accessed by driver thread */
@@ -634,17 +644,18 @@ struct pipe_context *threaded_context_unwrap_sync(struct pipe_context *pipe);
 void tc_driver_internal_flush_notify(struct threaded_context *tc);
 
 /** function for getting the current renderpass info:
- * - renderpass info is always valid
- * - set 'wait=true' when calling during normal execution
- * - set 'wait=true' when calling from flush
+ * - renderpass info is always non-null
  *
  * Rules:
- * 1) this must be called with 'wait=true' after the driver receives a pipe_context::set_framebuffer_state callback
- * 2) this should be called with 'wait=false' when the driver receives a blocking pipe_context::flush call
- * 3) this must not be used during any internal driver operations (e.g., u_blitter)
+ * - threaded context must have been created with parse_renderpass_info=true
+ * - must be called after the driver receives a pipe_context::set_framebuffer_state callback
+ * - must be called after the driver receives a non-deferrable pipe_context::flush callback
+ * - renderpass info must not be used during any internal driver operations (e.g., u_blitter)
+ * - must not be called before the driver receives its first pipe_context::set_framebuffer_state callback
+ * - renderpass info is invalidated only for non-deferrable flushes and new framebuffer states
  */
 const struct tc_renderpass_info *
-threaded_context_get_renderpass_info(struct threaded_context *tc, bool wait);
+threaded_context_get_renderpass_info(struct threaded_context *tc);
 
 struct pipe_context *
 threaded_context_create(struct pipe_context *pipe,
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c
index 202fbed0a..7b8f95560 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c
+++ b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c
@@ -1450,17 +1450,17 @@ u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info,
       draw.index_bias = indirect_data[offset + 3];
       info->start_instance = indirect_data[offset + 4];
 
-      u_vbuf_draw_vbo(mgr, info, drawid_offset, NULL, &draw, 1);
+      u_vbuf_draw_vbo(mgr->pipe, info, drawid_offset, NULL, &draw, 1);
    }
 }
 
-void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
+void u_vbuf_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
                      unsigned drawid_offset,
                      const struct pipe_draw_indirect_info *indirect,
                      const struct pipe_draw_start_count_bias *draws,
                      unsigned num_draws)
 {
-   struct pipe_context *pipe = mgr->pipe;
+   struct u_vbuf *mgr = pipe->vbuf;
    int start_vertex;
    unsigned min_index;
    unsigned num_vertices;
@@ -1512,6 +1512,9 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
       if (indirect && indirect->buffer) {
          unsigned draw_count = 0;
 
+         /* num_draws can only be 1 with indirect draws. */
+         assert(num_draws == 1);
+
          /* Get the number of draws. */
          if (indirect->indirect_draw_count) {
             pipe_buffer_read(pipe, indirect->indirect_draw_count,
@@ -1547,6 +1550,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
                u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data,
                                               indirect->stride, draw_count);
                free(data);
+               /* We're done (as num_draws is 1), so return early. */
                return;
             }
 
@@ -1563,6 +1567,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
                u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data,
                                               indirect->stride, draw_count);
                free(data);
+               /* We're done (as num_draws is 1), so return early. */
                return;
             }
 
@@ -1724,6 +1729,8 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
          }
 
          if (unroll_indices) {
+            if (!new_info.has_user_indices && info->take_index_buffer_ownership)
+               pipe_drop_resource_references(new_info.index.resource, 1);
             new_info.index_size = 0;
             new_draw.index_bias = 0;
             new_info.index_bounds_valid = true;
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h
index 2d6ca434d..bb3568fb3 100644
--- a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h
+++ b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h
@@ -35,7 +35,7 @@
 
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
-#include "pipe/p_format.h"
+#include "util/format/u_formats.h"
 
 struct cso_context;
 struct cso_velems_state;
@@ -85,7 +85,8 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
                                unsigned unbind_num_trailing_slots,
                                bool take_ownership,
                                const struct pipe_vertex_buffer *bufs);
-void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
+void u_vbuf_draw_vbo(struct pipe_context *pipe,
+                     const struct pipe_draw_info *info,
                      unsigned drawid_offset,
                      const struct pipe_draw_indirect_info *indirect,
                      const struct pipe_draw_start_count_bias *draws,
diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.c b/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.c
index b545b9c29..92e7462c1 100644
--- a/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.c
@@ -363,6 +363,13 @@ set_yuv_layer(struct vl_compositor_state *s, struct vl_compositor *c,
           s->layers[layer].cs = (y) ? c->cs_yuv.bob.y : c->cs_yuv.bob.uv;
       break;
 
+   case VL_COMPOSITOR_NONE:
+      if (c->pipe_cs_composit_supported) {
+          s->layers[layer].cs = (y) ? c->cs_yuv.progressive.y : c->cs_yuv.progressive.uv;
+          break;
+      }
+      FALLTHROUGH;
+
    default:
       if (c->pipe_gfx_supported)
           s->layers[layer].fs = (y) ? c->fs_yuv.weave.y : c->fs_yuv.weave.uv;
diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.h b/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.h
index 32ce82f73..be82e156c 100644
--- a/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/lib/mesa/src/gallium/auxiliary/vl/vl_compositor.h
@@ -149,6 +149,10 @@ struct vl_compositor
          void *y;
          void *uv;
       } bob;
+      struct {
+         void *y;
+         void *uv;
+      } progressive;
    } cs_yuv;
 
    struct {
diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_winsys.h b/lib/mesa/src/gallium/auxiliary/vl/vl_winsys.h
index 919f86c34..a3945b67e 100644
--- a/lib/mesa/src/gallium/auxiliary/vl/vl_winsys.h
+++ b/lib/mesa/src/gallium/auxiliary/vl/vl_winsys.h
@@ -39,7 +39,7 @@
 #include <windows.h>
 #endif
 #include "pipe/p_defines.h"
-#include "pipe/p_format.h"
+#include "util/format/u_formats.h"
 
 struct pipe_screen;
 struct pipe_surface;
diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_winsys_dri3.c b/lib/mesa/src/gallium/auxiliary/vl/vl_winsys_dri3.c
index baa12fa95..073630e55 100644
--- a/lib/mesa/src/gallium/auxiliary/vl/vl_winsys_dri3.c
+++ b/lib/mesa/src/gallium/auxiliary/vl/vl_winsys_dri3.c
@@ -133,13 +133,21 @@ dri3_handle_stamps(struct vl_dri3_screen *scrn, uint64_t ust, uint64_t msc)
    scrn->last_msc = msc;
 }
 
-static void
+/* XXX this belongs in presentproto */
+#ifndef PresentWindowDestroyed
+#define PresentWindowDestroyed (1 << 0)
+#endif
+static bool
 dri3_handle_present_event(struct vl_dri3_screen *scrn,
                           xcb_present_generic_event_t *ge)
 {
    switch (ge->evtype) {
    case XCB_PRESENT_CONFIGURE_NOTIFY: {
       xcb_present_configure_notify_event_t *ce = (void *) ge;
+      if (ce->pixmap_flags & PresentWindowDestroyed) {
+         free(ge);
+         return false;
+      }
       scrn->width = ce->width;
       scrn->height = ce->height;
       break;
@@ -171,6 +179,7 @@ dri3_handle_present_event(struct vl_dri3_screen *scrn,
    }
    }
    free(ge);
+   return true;
 }
 
 static void
@@ -179,8 +188,10 @@ dri3_flush_present_events(struct vl_dri3_screen *scrn)
    if (scrn->special_event) {
       xcb_generic_event_t *ev;
       while ((ev = xcb_poll_for_special_event(
-                   scrn->conn, scrn->special_event)) != NULL)
-         dri3_handle_present_event(scrn, (xcb_present_generic_event_t *)ev);
+                   scrn->conn, scrn->special_event)) != NULL) {
+         if (!dri3_handle_present_event(scrn, (xcb_present_generic_event_t *)ev))
+            break;
+      }
    }
 }
 
@@ -192,8 +203,7 @@ dri3_wait_present_events(struct vl_dri3_screen *scrn)
       ev = xcb_wait_for_special_event(scrn->conn, scrn->special_event);
       if (!ev)
          return false;
-      dri3_handle_present_event(scrn, (xcb_present_generic_event_t *)ev);
-      return true;
+      return dri3_handle_present_event(scrn, (xcb_present_generic_event_t *)ev);
    }
    return false;
 }
@@ -811,7 +821,7 @@ vl_dri3_screen_create(Display *display, int screen)
    fcntl(fd, F_SETFD, FD_CLOEXEC);
    free(open_reply);
 
-   fd = loader_get_user_preferred_fd(fd, &scrn->is_different_gpu);
+   scrn->is_different_gpu = loader_get_user_preferred_fd(&fd, NULL);
 
    geom_cookie = xcb_get_geometry(scrn->conn, RootWindow(display, screen));
    geom_reply = xcb_get_geometry_reply(scrn->conn, geom_cookie, NULL);
author	Jonathan Gray <jsg@cvs.openbsd.org>	2023-11-02 04:53:47 +0000
committer	Jonathan Gray <jsg@cvs.openbsd.org>	2023-11-02 04:53:47 +0000
commit	b44518130b33cadb5c1d619e9e936ae0e0dbf7cb (patch)
tree	6069eb03c39fbc79808a7d94f857118cce75cbe3 /lib/mesa/src/gallium/auxiliary
parent	32aeb3c41fedbbd7b11aacfec48e8f699d16bff0 (diff)