Import Mesa 21.3.7

author: Jonathan Gray <jsg@cvs.openbsd.org> 2022-02-24 01:57:18 +0000
committer: Jonathan Gray <jsg@cvs.openbsd.org> 2022-02-24 01:57:18 +0000
commit: b24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (patch)
tree: 658ca4e6b41655f49463c85edbaeda48979c394c /lib/mesa/src/gallium/drivers/asahi
parent: 57768bbb154c2879d34ec20e401b19472e77aaf7 (diff)
9 files changed, 3734 insertions, 0 deletions
diff --git a/lib/mesa/src/gallium/drivers/asahi/agx_blit.c b/lib/mesa/src/gallium/drivers/asahi/agx_blit.c
new file mode 100644
index 000000000..213761ade
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/asahi/agx_blit.c
@@ -0,0 +1,182 @@
+/* 
+ * Copyright (C) 2021 Alyssa Rosenzweig
+ * Copyright (C) 2020-2021 Collabora, Ltd.
+ * Copyright (C) 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "agx_state.h"
+#include "compiler/nir/nir_builder.h"
+#include "asahi/compiler/agx_compile.h"
+#include "gallium/auxiliary/util/u_blitter.h"
+
+static void
+agx_build_reload_shader(struct agx_device *dev)
+{
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
+         &agx_nir_options, "agx_reload");
+   b.shader->info.internal = true;
+
+   nir_variable *out = nir_variable_create(b.shader, nir_var_shader_out,
+         glsl_vector_type(GLSL_TYPE_FLOAT, 4), "output");
+   out->data.location = FRAG_RESULT_DATA0;
+
+   nir_ssa_def *fragcoord = nir_load_frag_coord(&b);
+   nir_ssa_def *coord = nir_channels(&b, fragcoord, 0x3);
+
+   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
+   tex->dest_type = nir_type_float32;
+   tex->sampler_dim = GLSL_SAMPLER_DIM_RECT;
+   tex->op = nir_texop_tex;
+   tex->src[0].src_type = nir_tex_src_coord;
+   tex->src[0].src = nir_src_for_ssa(coord);
+   tex->coord_components = 2;
+   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
+   nir_builder_instr_insert(&b, &tex->instr);
+   nir_store_var(&b, out, &tex->dest.ssa, 0xFF);
+
+   unsigned offset = 0;
+   unsigned bo_size = 4096;
+
+   struct agx_bo *bo = agx_bo_create(dev, bo_size, AGX_MEMORY_TYPE_SHADER);
+   dev->reload.bo = bo;
+
+   for (unsigned i = 0; i < AGX_NUM_FORMATS; ++i) {
+      struct util_dynarray binary;
+      util_dynarray_init(&binary, NULL);
+
+      nir_shader *s = nir_shader_clone(NULL, b.shader);
+      struct agx_shader_info info;
+
+      struct agx_shader_key key = {
+         .fs.tib_formats[0] = i
+      };
+
+      agx_compile_shader_nir(s, &key, &binary, &info);
+
+      assert(offset + binary.size < bo_size);
+      memcpy(((uint8_t *) bo->ptr.cpu) + offset, binary.data, binary.size);
+
+      dev->reload.format[i] = bo->ptr.gpu + offset;
+      offset += ALIGN_POT(binary.size, 128);
+
+      util_dynarray_fini(&binary);
+   }
+}
+
+static void
+agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
+                 bool render_cond)
+{
+   util_blitter_save_vertex_buffer_slot(blitter, ctx->vertex_buffers);
+   util_blitter_save_vertex_elements(blitter, ctx->attributes);
+   util_blitter_save_vertex_shader(blitter, ctx->stage[PIPE_SHADER_VERTEX].shader);
+   util_blitter_save_rasterizer(blitter, ctx->rast);
+   util_blitter_save_viewport(blitter, &ctx->viewport);
+   util_blitter_save_scissor(blitter, &ctx->scissor);
+   util_blitter_save_fragment_shader(blitter, ctx->stage[PIPE_SHADER_FRAGMENT].shader);
+   util_blitter_save_blend(blitter, ctx->blend);
+   util_blitter_save_depth_stencil_alpha(blitter, &ctx->zs);
+   util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref);
+   util_blitter_save_so_targets(blitter, 0, NULL);
+   util_blitter_save_sample_mask(blitter, ctx->sample_mask);
+
+   util_blitter_save_framebuffer(blitter, &ctx->framebuffer);
+   util_blitter_save_fragment_sampler_states(blitter,
+         ctx->stage[PIPE_SHADER_FRAGMENT].sampler_count,
+         (void **)(ctx->stage[PIPE_SHADER_FRAGMENT].samplers));
+   util_blitter_save_fragment_sampler_views(blitter,
+         ctx->stage[PIPE_SHADER_FRAGMENT].texture_count,
+         (struct pipe_sampler_view **)ctx->stage[PIPE_SHADER_FRAGMENT].textures);
+   util_blitter_save_fragment_constant_buffer_slot(blitter,
+         ctx->stage[PIPE_SHADER_FRAGMENT].cb);
+
+   if (!render_cond) {
+      util_blitter_save_render_condition(blitter,
+            (struct pipe_query *) ctx->cond_query,
+            ctx->cond_cond, ctx->cond_mode);
+   }
+}
+
+void
+agx_blit(struct pipe_context *pipe,
+              const struct pipe_blit_info *info)
+{
+   //if (info->render_condition_enable &&
+   //    !agx_render_condition_check(pan_context(pipe)))
+   //        return;
+
+   struct agx_context *ctx = agx_context(pipe);
+
+   if (!util_blitter_is_blit_supported(ctx->blitter, info))
+      unreachable("Unsupported blit\n");
+
+   agx_blitter_save(ctx, ctx->blitter, info->render_condition_enable);
+   util_blitter_blit(ctx->blitter, info);
+}
+
+/* We need some fixed shaders for common rendering tasks. When colour buffer
+ * reload is not in use, a shader is used to clear a particular colour. At the
+ * end of rendering a tile, a shader is used to write it out. These shaders are
+ * too trivial to go through the compiler at this stage. */
+#define AGX_STOP \
+	0x88, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, \
+	0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00 \
+
+#define AGX_BLEND \
+	0x09, 0x00, 0x00, 0x04, 0xf0, 0xfc, 0x80, 0x03
+
+/* Clears the tilebuffer, where u6-u7 are preloaded with the FP16 clear colour
+
+   0: 7e018c098040         bitop_mov        r0, u6
+   6: 7e058e098000         bitop_mov        r1, u7
+   c: 09000004f0fc8003     TODO.blend
+   */
+
+static uint8_t shader_clear[] = {
+   0x7e, 0x01, 0x8c, 0x09, 0x80, 0x40,
+   0x7e, 0x05, 0x8e, 0x09, 0x80, 0x00,
+   AGX_BLEND,
+   AGX_STOP
+};
+
+static uint8_t shader_store[] = {
+   0x7e, 0x00, 0x04, 0x09, 0x80, 0x00,
+   0xb1, 0x80, 0x00, 0x80, 0x00, 0x4a, 0x00, 0x00, 0x0a, 0x00,
+   AGX_STOP
+};
+
+void
+agx_internal_shaders(struct agx_device *dev)
+{
+   unsigned clear_offset = 0;
+   unsigned store_offset = 1024;
+
+   struct agx_bo *bo = agx_bo_create(dev, 4096, AGX_MEMORY_TYPE_SHADER);
+   memcpy(((uint8_t *) bo->ptr.cpu) + clear_offset, shader_clear, sizeof(shader_clear));
+   memcpy(((uint8_t *) bo->ptr.cpu) + store_offset, shader_store, sizeof(shader_store));
+
+   dev->internal.bo = bo;
+   dev->internal.clear = bo->ptr.gpu + clear_offset;
+   dev->internal.store = bo->ptr.gpu + store_offset;
+
+   agx_build_reload_shader(dev);
+}
diff --git a/lib/mesa/src/gallium/drivers/asahi/agx_pipe.c b/lib/mesa/src/gallium/drivers/asahi/agx_pipe.c
new file mode 100644
index 000000000..bb4306ed3
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/asahi/agx_pipe.c
@@ -0,0 +1,1142 @@
+/*
+ * Copyright 2010 Red Hat Inc.
+ * Copyright © 2014-2017 Broadcom
+ * Copyright (C) 2019-2020 Collabora, Ltd.
+ * Copyright 2006 VMware, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <stdio.h>
+#include <errno.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+#include "util/u_screen.h"
+#include "util/u_inlines.h"
+#include "util/format/u_format.h"
+#include "util/u_upload_mgr.h"
+#include "util/half_float.h"
+#include "frontend/winsys_handle.h"
+#include "frontend/sw_winsys.h"
+#include "gallium/auxiliary/util/u_transfer.h"
+#include "gallium/auxiliary/util/u_surface.h"
+#include "gallium/auxiliary/util/u_framebuffer.h"
+#include "agx_public.h"
+#include "agx_state.h"
+#include "magic.h"
+#include "asahi/compiler/agx_compile.h"
+#include "asahi/lib/decode.h"
+#include "asahi/lib/tiling.h"
+#include "asahi/lib/agx_formats.h"
+
+static const struct debug_named_value agx_debug_options[] = {
+   {"trace",     AGX_DBG_TRACE,    "Trace the command stream"},
+   {"deqp",      AGX_DBG_DEQP,     "Hacks for dEQP"},
+   {"no16",      AGX_DBG_NO16,     "Disable 16-bit support"},
+   DEBUG_NAMED_VALUE_END
+};
+
+void agx_init_state_functions(struct pipe_context *ctx);
+
+static struct pipe_query *
+agx_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
+{
+   struct agx_query *query = CALLOC_STRUCT(agx_query);
+
+   return (struct pipe_query *)query;
+}
+
+static void
+agx_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+   FREE(query);
+}
+
+static bool
+agx_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+   return true;
+}
+
+static bool
+agx_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+   return true;
+}
+
+static bool
+agx_get_query_result(struct pipe_context *ctx,
+                     struct pipe_query *query,
+                     bool wait,
+                     union pipe_query_result *vresult)
+{
+   uint64_t *result = (uint64_t*)vresult;
+
+   *result = 0;
+   return true;
+}
+
+static void
+agx_set_active_query_state(struct pipe_context *pipe, bool enable)
+{
+}
+
+
+/*
+ * resource
+ */
+
+static struct pipe_resource *
+agx_resource_from_handle(struct pipe_screen *pscreen,
+                         const struct pipe_resource *templat,
+                         struct winsys_handle *whandle,
+                         unsigned usage)
+{
+   unreachable("Imports todo");
+}
+
+static bool
+agx_resource_get_handle(struct pipe_screen *pscreen,
+                        struct pipe_context *ctx,
+                        struct pipe_resource *pt,
+                        struct winsys_handle *handle,
+                        unsigned usage)
+{
+   unreachable("Handles todo");
+}
+
+static inline bool
+agx_is_2d(const struct agx_resource *pres)
+{
+   switch (pres->base.target) {
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+   case PIPE_TEXTURE_CUBE:
+      return true;
+   default:
+      return false;
+   }
+}
+
+static bool
+agx_must_tile(const struct agx_resource *pres)
+{
+   switch (pres->base.target) {
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_3D:
+      /* We don't know how to do linear for these */
+      return true;
+   default:
+      break;
+   }
+
+   return false;
+}
+
+static bool
+agx_should_tile(const struct agx_resource *pres)
+{
+   const unsigned valid_binding =
+      PIPE_BIND_DEPTH_STENCIL |
+      PIPE_BIND_RENDER_TARGET |
+      PIPE_BIND_BLENDABLE |
+      PIPE_BIND_SAMPLER_VIEW |
+      PIPE_BIND_DISPLAY_TARGET |
+      PIPE_BIND_SCANOUT |
+      PIPE_BIND_SHARED;
+
+   unsigned bpp = util_format_get_blocksizebits(pres->base.format);
+
+   bool can_tile = agx_is_2d(pres)
+      && (bpp == 32)
+      && ((pres->base.bind & ~valid_binding) == 0);
+
+   bool should_tile = (pres->base.usage != PIPE_USAGE_STREAM);
+   bool must_tile = agx_must_tile(pres);
+
+   assert(!(must_tile && !can_tile));
+   return must_tile || (can_tile && should_tile);
+}
+
+static struct pipe_resource *
+agx_resource_create(struct pipe_screen *screen,
+                    const struct pipe_resource *templ)
+{
+   struct agx_device *dev = agx_device(screen);
+   struct agx_resource *nresource;
+
+   nresource = CALLOC_STRUCT(agx_resource);
+   if (!nresource)
+      return NULL;
+
+   nresource->base = *templ;
+   nresource->base.screen = screen;
+
+   nresource->modifier = agx_should_tile(nresource) ?
+      DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER : DRM_FORMAT_MOD_LINEAR;
+
+   unsigned offset = 0;
+
+   for (unsigned l = 0; l <= templ->last_level; ++l) {
+      unsigned width = u_minify(templ->width0, l);
+      unsigned height = u_minify(templ->height0, l);
+
+      if (nresource->modifier == DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER) {
+         width = ALIGN_POT(width, 64);
+         height = ALIGN_POT(height, 64);
+      }
+
+      nresource->slices[l].line_stride =
+         util_format_get_stride(templ->format, width);
+
+      nresource->slices[l].offset = offset;
+      offset += ALIGN_POT(nresource->slices[l].line_stride * height, 0x80);
+   }
+
+   /* Arrays and cubemaps have the entire miptree duplicated */
+   nresource->array_stride = ALIGN_POT(offset, 64);
+   unsigned size = ALIGN_POT(nresource->array_stride * templ->array_size, 4096);
+
+   pipe_reference_init(&nresource->base.reference, 1);
+
+   struct sw_winsys *winsys = ((struct agx_screen *) screen)->winsys;
+
+   if (templ->bind & (PIPE_BIND_DISPLAY_TARGET |
+                      PIPE_BIND_SCANOUT |
+                      PIPE_BIND_SHARED)) {
+      unsigned width0 = templ->width0, height0 = templ->height0;
+
+      if (nresource->modifier == DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER) {
+         width0 = ALIGN_POT(width0, 64);
+         height0 = ALIGN_POT(height0, 64);
+      }
+
+      nresource->dt = winsys->displaytarget_create(winsys,
+                      templ->bind,
+                      templ->format,
+                      width0,
+                      height0,
+                      64,
+                      NULL /*map_front_private*/,
+                      &nresource->dt_stride);
+
+      nresource->slices[0].line_stride = nresource->dt_stride;
+      assert((nresource->dt_stride & 0xF) == 0);
+
+      offset = nresource->slices[0].line_stride * ALIGN_POT(templ->height0, 64);
+
+      if (nresource->dt == NULL) {
+         FREE(nresource);
+         return NULL;
+      }
+   }
+
+   nresource->bo = agx_bo_create(dev, size, AGX_MEMORY_TYPE_FRAMEBUFFER);
+
+   if (!nresource->bo) {
+      FREE(nresource);
+      return NULL;
+   }
+
+   return &nresource->base;
+}
+
+static uint8_t *
+agx_rsrc_offset(struct agx_resource *rsrc, unsigned level, unsigned z)
+{
+   struct agx_bo *bo = rsrc->bo;
+   uint8_t *map = ((uint8_t *) bo->ptr.cpu) + rsrc->slices[level].offset;
+   map += z * rsrc->array_stride;
+
+   return map;
+}
+
+static void
+agx_resource_destroy(struct pipe_screen *screen,
+                     struct pipe_resource *prsrc)
+{
+   struct agx_resource *rsrc = (struct agx_resource *)prsrc;
+
+   if (rsrc->dt) {
+      /* display target */
+      struct agx_screen *agx_screen = (struct agx_screen*)screen;
+      struct sw_winsys *winsys = agx_screen->winsys;
+      winsys->displaytarget_destroy(winsys, rsrc->dt);
+   }
+
+   agx_bo_unreference(rsrc->bo);
+   FREE(rsrc);
+}
+
+
+/*
+ * transfer
+ */
+
+static void
+agx_transfer_flush_region(struct pipe_context *pipe,
+                          struct pipe_transfer *transfer,
+                          const struct pipe_box *box)
+{
+}
+
+static void *
+agx_transfer_map(struct pipe_context *pctx,
+                 struct pipe_resource *resource,
+                 unsigned level,
+                 unsigned usage,  /* a combination of PIPE_MAP_x */
+                 const struct pipe_box *box,
+                 struct pipe_transfer **out_transfer)
+{
+   struct agx_context *ctx = agx_context(pctx);
+   struct agx_resource *rsrc = agx_resource(resource);
+   unsigned bytes_per_pixel = util_format_get_blocksize(resource->format);
+
+   /* Can't map tiled/compressed directly */
+   if ((usage & PIPE_MAP_DIRECTLY) && rsrc->modifier != DRM_FORMAT_MOD_LINEAR)
+      return NULL;
+
+   if (ctx->batch->cbufs[0] && resource == ctx->batch->cbufs[0]->texture)
+      pctx->flush(pctx, NULL, 0);
+   if (ctx->batch->zsbuf && resource == ctx->batch->zsbuf->texture)
+      pctx->flush(pctx, NULL, 0);
+
+   struct agx_transfer *transfer = CALLOC_STRUCT(agx_transfer);
+   transfer->base.level = level;
+   transfer->base.usage = usage;
+   transfer->base.box = *box;
+
+   pipe_resource_reference(&transfer->base.resource, resource);
+   *out_transfer = &transfer->base;
+
+   if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER) {
+      transfer->base.stride = box->width * bytes_per_pixel;
+      transfer->base.layer_stride = transfer->base.stride * box->height;
+      transfer->map = calloc(transfer->base.layer_stride, box->depth);
+      assert(box->depth == 1);
+
+      if ((usage & PIPE_MAP_READ) && BITSET_TEST(rsrc->data_valid, level)) {
+         for (unsigned z = 0; z < box->depth; ++z) {
+            uint8_t *map = agx_rsrc_offset(rsrc, level, box->z + z);
+
+            agx_detile(map, transfer->map,
+               u_minify(resource->width0, level), bytes_per_pixel * 8,
+               transfer->base.stride / bytes_per_pixel,
+               box->x, box->y, box->x + box->width, box->y + box->height);
+         }
+      }
+
+      return transfer->map;
+   } else {
+      assert (rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
+
+      transfer->base.stride = rsrc->slices[level].line_stride;
+      transfer->base.layer_stride = rsrc->array_stride;
+
+      /* Be conservative for direct writes */
+
+      if ((usage & PIPE_MAP_WRITE) && (usage & PIPE_MAP_DIRECTLY))
+         BITSET_SET(rsrc->data_valid, level);
+
+      return agx_rsrc_offset(rsrc, level, box->z)
+             + transfer->base.box.y * rsrc->slices[level].line_stride
+             + transfer->base.box.x * bytes_per_pixel;
+   }
+}
+
+static void
+agx_transfer_unmap(struct pipe_context *pctx,
+                   struct pipe_transfer *transfer)
+{
+   /* Gallium expects writeback here, so we tile */
+
+   struct agx_transfer *trans = agx_transfer(transfer);
+   struct pipe_resource *prsrc = transfer->resource;
+   struct agx_resource *rsrc = (struct agx_resource *) prsrc;
+   unsigned bytes_per_pixel = util_format_get_blocksize(prsrc->format);
+
+   if (transfer->usage & PIPE_MAP_WRITE)
+      BITSET_SET(rsrc->data_valid, transfer->level);
+
+   /* Tiling will occur in software from a staging cpu buffer */
+   if ((transfer->usage & PIPE_MAP_WRITE) &&
+         rsrc->modifier == DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER) {
+      assert(trans->map != NULL);
+
+      for (unsigned z = 0; z < transfer->box.depth; ++z) {
+         uint8_t *map = agx_rsrc_offset(rsrc, transfer->level,
+               transfer->box.z + z);
+
+         agx_tile(map, trans->map,
+            u_minify(transfer->resource->width0, transfer->level),
+            bytes_per_pixel * 8,
+            transfer->stride / bytes_per_pixel,
+            transfer->box.x, transfer->box.y,
+            transfer->box.x + transfer->box.width,
+            transfer->box.y + transfer->box.height);
+      }
+   }
+
+   /* Free the transfer */
+   free(trans->map);
+   pipe_resource_reference(&transfer->resource, NULL);
+   FREE(transfer);
+}
+
+/*
+ * clear/copy
+ */
+static void
+agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor_state *scissor_state,
+          const union pipe_color_union *color, double depth, unsigned stencil)
+{
+   struct agx_context *ctx = agx_context(pctx);
+   ctx->batch->clear |= buffers;
+   memcpy(ctx->batch->clear_color, color->f, sizeof(color->f));
+}
+
+
+static void
+agx_flush_resource(struct pipe_context *ctx,
+                   struct pipe_resource *resource)
+{
+}
+
+/*
+ * context
+ */
+static void
+agx_flush(struct pipe_context *pctx,
+          struct pipe_fence_handle **fence,
+          unsigned flags)
+{
+   struct agx_context *ctx = agx_context(pctx);
+
+   if (fence)
+      *fence = NULL;
+
+   /* TODO */
+   if (!ctx->batch->cbufs[0])
+      return;
+
+   /* Nothing to do */
+   if (!(ctx->batch->draw | ctx->batch->clear))
+      return;
+
+   /* Finalize the encoder */
+   uint8_t stop[5 + 64] = { 0x00, 0x00, 0x00, 0xc0, 0x00 };
+   memcpy(ctx->batch->encoder_current, stop, sizeof(stop));
+
+   /* Emit the commandbuffer */
+   uint64_t pipeline_clear = 0;
+   bool clear_pipeline_textures = false;
+
+   struct agx_device *dev = agx_device(pctx->screen);
+
+   if (ctx->batch->clear & PIPE_CLEAR_COLOR0) {
+      uint16_t clear_colour[4] = {
+         _mesa_float_to_half(ctx->batch->clear_color[0]),
+         _mesa_float_to_half(ctx->batch->clear_color[1]),
+         _mesa_float_to_half(ctx->batch->clear_color[2]),
+         _mesa_float_to_half(ctx->batch->clear_color[3])
+      };
+
+
+      pipeline_clear = agx_build_clear_pipeline(ctx,
+                               dev->internal.clear,
+                               agx_pool_upload(&ctx->batch->pool, clear_colour, sizeof(clear_colour)));
+   } else {
+      enum pipe_format fmt = ctx->batch->cbufs[0]->format;
+      enum agx_format internal = agx_pixel_format[fmt].internal;
+      uint32_t shader = dev->reload.format[internal];
+
+      pipeline_clear = agx_build_reload_pipeline(ctx, shader,
+                               ctx->batch->cbufs[0]);
+
+      clear_pipeline_textures = true;
+   }
+
+   uint64_t pipeline_store =
+      agx_build_store_pipeline(ctx,
+                               dev->internal.store,
+                               agx_pool_upload(&ctx->batch->pool, ctx->render_target[0], sizeof(ctx->render_target)));
+
+   /* Pipelines must 64 aligned */
+   struct agx_ptr pipeline_null =
+      agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, 64, 64);
+   memset(pipeline_null.cpu, 0, 64);
+
+   struct agx_resource *rt0 = agx_resource(ctx->batch->cbufs[0]->texture);
+   BITSET_SET(rt0->data_valid, 0);
+
+   struct agx_resource *zbuf = ctx->batch->zsbuf ?
+      agx_resource(ctx->batch->zsbuf->texture) : NULL;
+
+   if (zbuf)
+      BITSET_SET(zbuf->data_valid, 0);
+
+   /* BO list for a given batch consists of:
+    *  - BOs for the batch's framebuffer surfaces
+    *  - BOs for the batch's pools
+    *  - BOs for the encoder
+    *  - BO for internal shaders
+    *  - BOs added to the batch explicitly
+    */
+   struct agx_batch *batch = ctx->batch;
+
+   agx_batch_add_bo(batch, batch->encoder);
+   agx_batch_add_bo(batch, batch->scissor.bo);
+   agx_batch_add_bo(batch, dev->internal.bo);
+   agx_batch_add_bo(batch, dev->reload.bo);
+
+   for (unsigned i = 0; i < batch->nr_cbufs; ++i) {
+      struct pipe_surface *surf = batch->cbufs[i];
+      assert(surf != NULL && surf->texture != NULL);
+      struct agx_resource *rsrc = agx_resource(surf->texture);
+      agx_batch_add_bo(batch, rsrc->bo);
+   }
+
+   if (batch->zsbuf) {
+      struct pipe_surface *surf = batch->zsbuf;
+      struct agx_resource *rsrc = agx_resource(surf->texture);
+      agx_batch_add_bo(batch, rsrc->bo);
+   }
+
+   unsigned handle_count =
+      BITSET_COUNT(batch->bo_list) +
+      agx_pool_num_bos(&batch->pool) +
+      agx_pool_num_bos(&batch->pipeline_pool);
+
+   uint32_t *handles = calloc(sizeof(uint32_t), handle_count);
+   unsigned handle = 0, handle_i = 0;
+
+   BITSET_FOREACH_SET(handle, batch->bo_list, sizeof(batch->bo_list) * 8) {
+      handles[handle_i++] = handle;
+   }
+
+   agx_pool_get_bo_handles(&batch->pool, handles + handle_i);
+   handle_i += agx_pool_num_bos(&batch->pool);
+
+   agx_pool_get_bo_handles(&batch->pipeline_pool, handles + handle_i);
+   handle_i += agx_pool_num_bos(&batch->pipeline_pool);
+
+   /* Size calculation should've been exact */
+   assert(handle_i == handle_count);
+
+   unsigned cmdbuf_id = agx_get_global_id(dev);
+   unsigned encoder_id = agx_get_global_id(dev);
+
+   unsigned cmdbuf_size = demo_cmdbuf(dev->cmdbuf.ptr.cpu,
+               dev->cmdbuf.size,
+               &ctx->batch->pool,
+               ctx->batch->encoder->ptr.gpu,
+               encoder_id,
+               ctx->batch->scissor.bo->ptr.gpu,
+               ctx->batch->width,
+               ctx->batch->height,
+               pipeline_null.gpu,
+               pipeline_clear,
+               pipeline_store,
+               rt0->bo->ptr.gpu,
+               clear_pipeline_textures);
+
+   /* Generate the mapping table from the BO list */
+   demo_mem_map(dev->memmap.ptr.cpu, dev->memmap.size, handles, handle_count,
+                cmdbuf_id, encoder_id, cmdbuf_size);
+
+   free(handles);
+
+   agx_submit_cmdbuf(dev, dev->cmdbuf.handle, dev->memmap.handle, dev->queue.id);
+
+   agx_wait_queue(dev->queue);
+
+   if (dev->debug & AGX_DBG_TRACE) {
+      agxdecode_cmdstream(dev->cmdbuf.handle, dev->memmap.handle, true);
+      agxdecode_next_frame();
+   }
+
+   memset(batch->bo_list, 0, sizeof(batch->bo_list));
+   agx_pool_cleanup(&ctx->batch->pool);
+   agx_pool_cleanup(&ctx->batch->pipeline_pool);
+   agx_pool_init(&ctx->batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true);
+   agx_pool_init(&ctx->batch->pipeline_pool, dev, AGX_MEMORY_TYPE_CMDBUF_32, true);
+   ctx->batch->clear = 0;
+   ctx->batch->draw = 0;
+   ctx->batch->encoder_current = ctx->batch->encoder->ptr.cpu;
+   ctx->batch->scissor.count = 0;
+   ctx->dirty = ~0;
+}
+
+static void
+agx_destroy_context(struct pipe_context *pctx)
+{
+   struct agx_context *ctx = agx_context(pctx);
+
+   if (pctx->stream_uploader)
+      u_upload_destroy(pctx->stream_uploader);
+
+   if (ctx->blitter)
+      util_blitter_destroy(ctx->blitter);
+
+   util_unreference_framebuffer_state(&ctx->framebuffer);
+
+   FREE(ctx);
+}
+
+static void
+agx_invalidate_resource(struct pipe_context *ctx,
+                        struct pipe_resource *resource)
+{
+}
+
+static struct pipe_context *
+agx_create_context(struct pipe_screen *screen,
+                   void *priv, unsigned flags)
+{
+   struct agx_context *ctx = CALLOC_STRUCT(agx_context);
+   struct pipe_context *pctx = &ctx->base;
+
+   if (!ctx)
+      return NULL;
+
+   pctx->screen = screen;
+   pctx->priv = priv;
+
+   ctx->batch = CALLOC_STRUCT(agx_batch);
+   agx_pool_init(&ctx->batch->pool,
+                 agx_device(screen), AGX_MEMORY_TYPE_FRAMEBUFFER, true);
+   agx_pool_init(&ctx->batch->pipeline_pool,
+                 agx_device(screen), AGX_MEMORY_TYPE_SHADER, true);
+   ctx->batch->encoder = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
+   ctx->batch->encoder_current = ctx->batch->encoder->ptr.cpu;
+   ctx->batch->scissor.bo = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER);
+
+   /* Upload fixed shaders (TODO: compile them?) */
+
+   pctx->stream_uploader = u_upload_create_default(pctx);
+   if (!pctx->stream_uploader) {
+      FREE(pctx);
+      return NULL;
+   }
+   pctx->const_uploader = pctx->stream_uploader;
+
+   pctx->destroy = agx_destroy_context;
+   pctx->flush = agx_flush;
+   pctx->clear = agx_clear;
+   pctx->resource_copy_region = util_resource_copy_region;
+   pctx->blit = agx_blit;
+   pctx->flush_resource = agx_flush_resource;
+   pctx->create_query = agx_create_query;
+   pctx->destroy_query = agx_destroy_query;
+   pctx->begin_query = agx_begin_query;
+   pctx->end_query = agx_end_query;
+   pctx->get_query_result = agx_get_query_result;
+   pctx->set_active_query_state = agx_set_active_query_state;
+   pctx->buffer_map = agx_transfer_map;
+   pctx->texture_map = agx_transfer_map;
+   pctx->transfer_flush_region = agx_transfer_flush_region;
+   pctx->buffer_unmap = agx_transfer_unmap;
+   pctx->texture_unmap = agx_transfer_unmap;
+   pctx->buffer_subdata = u_default_buffer_subdata;
+   pctx->texture_subdata = u_default_texture_subdata;
+   pctx->invalidate_resource = agx_invalidate_resource;
+   agx_init_state_functions(pctx);
+
+
+   ctx->blitter = util_blitter_create(pctx);
+
+   return pctx;
+}
+
+static void
+agx_flush_frontbuffer(struct pipe_screen *_screen,
+                      struct pipe_context *pctx,
+                      struct pipe_resource *prsrc,
+                      unsigned level, unsigned layer,
+                      void *context_private, struct pipe_box *box)
+{
+   struct agx_resource *rsrc = (struct agx_resource *) prsrc;
+   struct agx_screen *agx_screen = (struct agx_screen*)_screen;
+   struct sw_winsys *winsys = agx_screen->winsys;
+
+   /* Dump the framebuffer */
+   assert (rsrc->dt);
+   void *map = winsys->displaytarget_map(winsys, rsrc->dt, PIPE_USAGE_DEFAULT);
+   assert(map != NULL);
+
+   if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER) {
+      agx_detile(rsrc->bo->ptr.cpu, map,
+                 rsrc->base.width0, 32, rsrc->dt_stride / 4,
+                 0, 0, rsrc->base.width0, rsrc->base.height0);
+   } else {
+      memcpy(map, rsrc->bo->ptr.cpu, rsrc->dt_stride * rsrc->base.height0);
+   }
+
+   winsys->displaytarget_display(winsys, rsrc->dt, context_private, box);
+}
+
+static const char *
+agx_get_vendor(struct pipe_screen* pscreen)
+{
+   return "Asahi";
+}
+
+static const char *
+agx_get_device_vendor(struct pipe_screen* pscreen)
+{
+   return "Apple";
+}
+
+static const char *
+agx_get_name(struct pipe_screen* pscreen)
+{
+   return "Apple M1 (G13G B0)";
+}
+
+static int
+agx_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
+{
+   bool is_deqp = agx_device(pscreen)->debug & AGX_DBG_DEQP;
+
+   switch (param) {
+   case PIPE_CAP_NPOT_TEXTURES:
+   case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+   case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
+   case PIPE_CAP_VERTEX_SHADER_SATURATE:
+   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+   case PIPE_CAP_DEPTH_CLIP_DISABLE:
+   case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+   case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
+   case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+   case PIPE_CAP_CLIP_HALFZ:
+      return 1;
+
+   case PIPE_CAP_MAX_RENDER_TARGETS:
+      return 1;
+
+   case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+      return 0;
+
+   case PIPE_CAP_OCCLUSION_QUERY:
+   case PIPE_CAP_PRIMITIVE_RESTART:
+   case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
+      return true;
+
+   case PIPE_CAP_SAMPLER_VIEW_TARGET:
+   case PIPE_CAP_TEXTURE_SWIZZLE:
+   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+   case PIPE_CAP_INDEP_BLEND_ENABLE:
+   case PIPE_CAP_INDEP_BLEND_FUNC:
+   case PIPE_CAP_ACCELERATED:
+   case PIPE_CAP_UMA:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
+   case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED:
+   case PIPE_CAP_PACKED_UNIFORMS:
+      return 1;
+
+   case PIPE_CAP_TGSI_INSTANCEID:
+   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+   case PIPE_CAP_TEXTURE_MULTISAMPLE:
+   case PIPE_CAP_SURFACE_SAMPLE_COUNT:
+      return is_deqp;
+
+   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+      return 0;
+
+   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+      return is_deqp ? PIPE_MAX_SO_BUFFERS : 0;
+
+   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+      return is_deqp ? PIPE_MAX_SO_OUTPUTS : 0;
+
+   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+   case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
+      return is_deqp ? 1 : 0;
+ 
+   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+      return is_deqp ? 256 : 0;
+
+   case PIPE_CAP_GLSL_FEATURE_LEVEL:
+   case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
+      return 130;
+   case PIPE_CAP_ESSL_FEATURE_LEVEL:
+      return 120;
+
+   case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+      return 16;
+
+   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+      return 65536;
+
+   case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+      return 64;
+
+   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+      return 1;
+
+   case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
+      return 16384;
+   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+      return 13;
+
+   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+      return 0;
+
+   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+   case PIPE_CAP_TGSI_TEXCOORD:
+   case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+   case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+      return true;
+   case PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL:
+      return false;
+
+   case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
+      return 0xffff;
+
+   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+      return 0;
+
+   case PIPE_CAP_ENDIANNESS:
+      return PIPE_ENDIAN_LITTLE;
+
+   case PIPE_CAP_VIDEO_MEMORY: {
+      uint64_t system_memory;
+
+      if (!os_get_total_physical_memory(&system_memory))
+         return 0;
+
+      return (int)(system_memory >> 20);
+   }
+
+   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+      return 4;
+
+   case PIPE_CAP_MAX_VARYINGS:
+      return 16;
+
+   case PIPE_CAP_FLATSHADE:
+   case PIPE_CAP_TWO_SIDED_COLOR:
+   case PIPE_CAP_ALPHA_TEST:
+   case PIPE_CAP_CLIP_PLANES:
+   case PIPE_CAP_NIR_IMAGES_AS_DEREF:
+      return 0;
+
+   case PIPE_CAP_SHAREABLE_SHADERS:
+      return 1;
+
+   default:
+      return u_pipe_screen_get_param_defaults(pscreen, param);
+   }
+}
+
+static float
+agx_get_paramf(struct pipe_screen* pscreen,
+               enum pipe_capf param)
+{
+   switch (param) {
+   case PIPE_CAPF_MAX_LINE_WIDTH:
+   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+      return 16.0; /* Off-by-one fixed point 4:4 encoding */
+
+   case PIPE_CAPF_MAX_POINT_WIDTH:
+   case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+      return 511.95f;
+
+   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+      return 16.0;
+
+   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+      return 16.0; /* arbitrary */
+
+   case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
+   case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
+   case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
+      return 0.0f;
+
+   default:
+      debug_printf("Unexpected PIPE_CAPF %d query\n", param);
+      return 0.0;
+   }
+}
+
+static int
+agx_get_shader_param(struct pipe_screen* pscreen,
+                     enum pipe_shader_type shader,
+                     enum pipe_shader_cap param)
+{
+   bool is_deqp = agx_device(pscreen)->debug & AGX_DBG_DEQP;
+   bool is_no16 = agx_device(pscreen)->debug & AGX_DBG_NO16;
+
+   if (shader != PIPE_SHADER_VERTEX &&
+       shader != PIPE_SHADER_FRAGMENT)
+      return 0;
+
+   /* this is probably not totally correct.. but it's a start: */
+   switch (param) {
+   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+      return 16384;
+
+   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+      return 1024;
+
+   case PIPE_SHADER_CAP_MAX_INPUTS:
+      return 16;
+
+   case PIPE_SHADER_CAP_MAX_OUTPUTS:
+      return shader == PIPE_SHADER_FRAGMENT ? 4 : 16;
+
+   case PIPE_SHADER_CAP_MAX_TEMPS:
+      return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
+
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      return 16 * 1024 * sizeof(float);
+
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+      return 16;
+
+   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+      return 0;
+
+   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+   case PIPE_SHADER_CAP_SUBROUTINES:
+   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+      return 0;
+
+   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+      return is_deqp;
+
+   case PIPE_SHADER_CAP_INTEGERS:
+      return true;
+
+   case PIPE_SHADER_CAP_FP16:
+   case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
+   case PIPE_SHADER_CAP_FP16_DERIVATIVES:
+   case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
+   case PIPE_SHADER_CAP_INT16:
+      return !is_no16;
+
+   case PIPE_SHADER_CAP_INT64_ATOMICS:
+   case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+      return 0;
+
+   case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+      return 16; /* XXX: How many? */
+
+   case PIPE_SHADER_CAP_PREFERRED_IR:
+      return PIPE_SHADER_IR_NIR;
+
+   case PIPE_SHADER_CAP_SUPPORTED_IRS:
+      return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_NIR_SERIALIZED);
+
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;
+
+   case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+   case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
+   case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+   case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+      return 0;
+
+   default:
+      /* Other params are unknown */
+      return 0;
+   }
+
+   return 0;
+}
+
+static int
+agx_get_compute_param(struct pipe_screen *pscreen,
+                      enum pipe_shader_ir ir_type,
+                      enum pipe_compute_cap param,
+                      void *ret)
+{
+   return 0;
+}
+
+static bool
+agx_is_format_supported(struct pipe_screen* pscreen,
+                        enum pipe_format format,
+                        enum pipe_texture_target target,
+                        unsigned sample_count,
+                        unsigned storage_sample_count,
+                        unsigned usage)
+{
+   const struct util_format_description *format_desc;
+
+   assert(target == PIPE_BUFFER ||
+          target == PIPE_TEXTURE_1D ||
+          target == PIPE_TEXTURE_1D_ARRAY ||
+          target == PIPE_TEXTURE_2D ||
+          target == PIPE_TEXTURE_2D_ARRAY ||
+          target == PIPE_TEXTURE_RECT ||
+          target == PIPE_TEXTURE_3D ||
+          target == PIPE_TEXTURE_CUBE ||
+          target == PIPE_TEXTURE_CUBE_ARRAY);
+
+   format_desc = util_format_description(format);
+
+   if (!format_desc)
+      return false;
+
+   if (sample_count > 1)
+      return false;
+
+   if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
+      return false;
+
+   if (usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
+      struct agx_pixel_format_entry ent = agx_pixel_format[format];
+
+      if (!agx_is_valid_pixel_format(format))
+         return false;
+
+      if ((usage & PIPE_BIND_RENDER_TARGET) && !ent.renderable)
+         return false;
+   }
+
+   /* TODO: formats */
+   if (usage & PIPE_BIND_VERTEX_BUFFER) {
+      switch (format) {
+      case PIPE_FORMAT_R16_FLOAT:
+      case PIPE_FORMAT_R16G16_FLOAT:
+      case PIPE_FORMAT_R16G16B16_FLOAT:
+      case PIPE_FORMAT_R16G16B16A16_FLOAT:
+      case PIPE_FORMAT_R32_FLOAT:
+      case PIPE_FORMAT_R32G32_FLOAT:
+      case PIPE_FORMAT_R32G32B32_FLOAT:
+      case PIPE_FORMAT_R32G32B32A32_FLOAT:
+         return true;
+      default:
+         return false;
+      }
+   }
+
+   /* TODO */
+   return true;
+}
+
+static uint64_t
+agx_get_timestamp(struct pipe_screen *pscreen)
+{
+   return 0;
+}
+
+static void
+agx_destroy_screen(struct pipe_screen *screen)
+{
+   agx_close_device(agx_device(screen));
+   ralloc_free(screen);
+}
+
+static void
+agx_fence_reference(struct pipe_screen *screen,
+                    struct pipe_fence_handle **ptr,
+                    struct pipe_fence_handle *fence)
+{
+}
+
+static bool
+agx_fence_finish(struct pipe_screen *screen,
+                 struct pipe_context *ctx,
+                 struct pipe_fence_handle *fence,
+                 uint64_t timeout)
+{
+   return true;
+}
+
+static const void *
+agx_get_compiler_options(struct pipe_screen *pscreen,
+                         enum pipe_shader_ir ir,
+                         enum pipe_shader_type shader)
+{
+   return &agx_nir_options;
+}
+
+struct pipe_screen *
+agx_screen_create(struct sw_winsys *winsys)
+{
+   struct agx_screen *agx_screen;
+   struct pipe_screen *screen;
+
+   agx_screen = rzalloc(NULL, struct agx_screen);
+   if (!agx_screen)
+      return NULL;
+
+   screen = &agx_screen->pscreen;
+   agx_screen->winsys = winsys;
+
+   /* Set debug before opening */
+   agx_screen->dev.debug =
+      debug_get_flags_option("ASAHI_MESA_DEBUG", agx_debug_options, 0);
+
+   /* Try to open an AGX device */
+   if (!agx_open_device(screen, &agx_screen->dev)) {
+      ralloc_free(agx_screen);
+      return NULL;
+   }
+
+   screen->destroy = agx_destroy_screen;
+   screen->get_name = agx_get_name;
+   screen->get_vendor = agx_get_vendor;
+   screen->get_device_vendor = agx_get_device_vendor;
+   screen->get_param = agx_get_param;
+   screen->get_shader_param = agx_get_shader_param;
+   screen->get_compute_param = agx_get_compute_param;
+   screen->get_paramf = agx_get_paramf;
+   screen->is_format_supported = agx_is_format_supported;
+   screen->context_create = agx_create_context;
+   screen->resource_create = agx_resource_create;
+   screen->resource_from_handle = agx_resource_from_handle;
+   screen->resource_get_handle = agx_resource_get_handle;
+   screen->resource_destroy = agx_resource_destroy;
+   screen->flush_frontbuffer = agx_flush_frontbuffer;
+   screen->get_timestamp = agx_get_timestamp;
+   screen->fence_reference = agx_fence_reference;
+   screen->fence_finish = agx_fence_finish;
+   screen->get_compiler_options = agx_get_compiler_options;
+
+   agx_internal_shaders(&agx_screen->dev);
+
+   return screen;
+}
diff --git a/lib/mesa/src/gallium/drivers/asahi/agx_public.h b/lib/mesa/src/gallium/drivers/asahi/agx_public.h
new file mode 100644
index 000000000..4bf706286
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/asahi/agx_public.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2010 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef AGX_PUBLIC_H
+#define AGX_PUBLIC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_screen;
+struct sw_winsys;
+struct pipe_screen *agx_screen_create(struct sw_winsys *winsys);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/asahi/agx_state.c b/lib/mesa/src/gallium/drivers/asahi/agx_state.c
new file mode 100644
index 000000000..6a9027a0d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/asahi/agx_state.c
@@ -0,0 +1,1658 @@
+/*
+ * Copyright 2021 Alyssa Rosenzweig
+ * Copyright (C) 2019-2020 Collabora, Ltd.
+ * Copyright 2010 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <stdio.h>
+#include <errno.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+#include "gallium/auxiliary/util/u_draw.h"
+#include "gallium/auxiliary/util/u_helpers.h"
+#include "gallium/auxiliary/util/u_viewport.h"
+#include "gallium/auxiliary/util/u_blend.h"
+#include "gallium/auxiliary/util/u_framebuffer.h"
+#include "gallium/auxiliary/tgsi/tgsi_from_mesa.h"
+#include "gallium/auxiliary/nir/tgsi_to_nir.h"
+#include "compiler/nir/nir.h"
+#include "asahi/compiler/agx_compile.h"
+#include "agx_state.h"
+#include "asahi/lib/agx_pack.h"
+#include "asahi/lib/agx_formats.h"
+
+static struct pipe_stream_output_target *
+agx_create_stream_output_target(struct pipe_context *pctx,
+                                struct pipe_resource *prsc,
+                                unsigned buffer_offset,
+                                unsigned buffer_size)
+{
+   struct pipe_stream_output_target *target;
+
+   target = &rzalloc(pctx, struct agx_streamout_target)->base;
+
+   if (!target)
+      return NULL;
+
+   pipe_reference_init(&target->reference, 1);
+   pipe_resource_reference(&target->buffer, prsc);
+
+   target->context = pctx;
+   target->buffer_offset = buffer_offset;
+   target->buffer_size = buffer_size;
+
+   return target;
+}
+
+static void
+agx_stream_output_target_destroy(struct pipe_context *pctx,
+                                 struct pipe_stream_output_target *target)
+{
+   pipe_resource_reference(&target->buffer, NULL);
+   ralloc_free(target);
+}
+
+static void
+agx_set_stream_output_targets(struct pipe_context *pctx,
+                              unsigned num_targets,
+                              struct pipe_stream_output_target **targets,
+                              const unsigned *offsets)
+{
+   struct agx_context *ctx = agx_context(pctx);
+   struct agx_streamout *so = &ctx->streamout;
+
+   assert(num_targets <= ARRAY_SIZE(so->targets));
+
+   for (unsigned i = 0; i < num_targets; i++) {
+      if (offsets[i] != -1)
+         agx_so_target(targets[i])->offset = offsets[i];
+
+      pipe_so_target_reference(&so->targets[i], targets[i]);
+   }
+
+   for (unsigned i = 0; i < so->num_targets; i++)
+      pipe_so_target_reference(&so->targets[i], NULL);
+
+   so->num_targets = num_targets;
+}
+
+static void
+agx_set_blend_color(struct pipe_context *pctx,
+                    const struct pipe_blend_color *state)
+{
+   struct agx_context *ctx = agx_context(pctx);
+
+   if (state)
+      memcpy(&ctx->blend_color, state, sizeof(*state));
+}
+
+static void *
+agx_create_blend_state(struct pipe_context *ctx,
+                       const struct pipe_blend_state *state)
+{
+   struct agx_blend *so = CALLOC_STRUCT(agx_blend);
+
+   assert(!state->alpha_to_coverage);
+   assert(!state->alpha_to_coverage_dither);
+   assert(!state->alpha_to_one);
+   assert(!state->advanced_blend_func);
+
+   if (state->logicop_enable) {
+      so->logicop_enable = true;
+      so->logicop_func = state->logicop_func;
+      return so;
+   }
+
+   for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+      unsigned rti = state->independent_blend_enable ? i : 0;
+      struct pipe_rt_blend_state rt = state->rt[rti];
+
+      if (!rt.blend_enable) {
+         static const nir_lower_blend_channel replace = {
+            .func = BLEND_FUNC_ADD,
+            .src_factor = BLEND_FACTOR_ZERO,
+            .invert_src_factor = true,
+            .dst_factor = BLEND_FACTOR_ZERO,
+            .invert_dst_factor = false,
+         };
+
+         so->rt[i].rgb = replace;
+         so->rt[i].alpha = replace;
+      } else {
+         so->rt[i].rgb.func = util_blend_func_to_shader(rt.rgb_func);
+         so->rt[i].rgb.src_factor = util_blend_factor_to_shader(rt.rgb_src_factor);
+         so->rt[i].rgb.invert_src_factor = util_blend_factor_is_inverted(rt.rgb_src_factor);
+         so->rt[i].rgb.dst_factor = util_blend_factor_to_shader(rt.rgb_dst_factor);
+         so->rt[i].rgb.invert_dst_factor = util_blend_factor_is_inverted(rt.rgb_dst_factor);
+
+         so->rt[i].alpha.func = util_blend_func_to_shader(rt.alpha_func);
+         so->rt[i].alpha.src_factor = util_blend_factor_to_shader(rt.alpha_src_factor);
+         so->rt[i].alpha.invert_src_factor = util_blend_factor_is_inverted(rt.alpha_src_factor);
+         so->rt[i].alpha.dst_factor = util_blend_factor_to_shader(rt.alpha_dst_factor);
+         so->rt[i].alpha.invert_dst_factor = util_blend_factor_is_inverted(rt.alpha_dst_factor);
+
+	 so->blend_enable = true;
+      }
+
+      so->rt[i].colormask = rt.colormask;
+   }
+
+   return so;
+}
+
+static void
+agx_bind_blend_state(struct pipe_context *pctx, void *cso)
+{
+   struct agx_context *ctx = agx_context(pctx);
+   ctx->blend = cso;
+}
+
+static const enum agx_stencil_op agx_stencil_ops[PIPE_STENCIL_OP_INVERT + 1] = {
+   [PIPE_STENCIL_OP_KEEP] = AGX_STENCIL_OP_KEEP,
+   [PIPE_STENCIL_OP_ZERO] = AGX_STENCIL_OP_ZERO,
+   [PIPE_STENCIL_OP_REPLACE] = AGX_STENCIL_OP_REPLACE,
+   [PIPE_STENCIL_OP_INCR] = AGX_STENCIL_OP_INCR_SAT,
+   [PIPE_STENCIL_OP_DECR] = AGX_STENCIL_OP_DECR_SAT,
+   [PIPE_STENCIL_OP_INCR_WRAP] = AGX_STENCIL_OP_INCR_WRAP,
+   [PIPE_STENCIL_OP_DECR_WRAP] = AGX_STENCIL_OP_DECR_WRAP,
+   [PIPE_STENCIL_OP_INVERT] = AGX_STENCIL_OP_INVERT,
+};
+
+static void
+agx_pack_rasterizer_face(struct agx_rasterizer_face_packed *out,
+                         struct pipe_stencil_state st,
+                         enum agx_zs_func z_func,
+                         bool disable_z_write)
+{
+   agx_pack(out, RASTERIZER_FACE, cfg) {
+      cfg.depth_function = z_func;
+      cfg.disable_depth_write = disable_z_write;
+
+      if (st.enabled) {
+         cfg.stencil_write_mask = st.writemask;
+         cfg.stencil_read_mask = st.valuemask;
+
+         cfg.depth_pass   = agx_stencil_ops[st.zpass_op];
+         cfg.depth_fail   = agx_stencil_ops[st.zfail_op];
+         cfg.stencil_fail = agx_stencil_ops[st.fail_op];
+
+         cfg.stencil_compare = (enum agx_zs_func) st.func;
+      } else {
+         cfg.stencil_write_mask = 0xFF;
+         cfg.stencil_read_mask = 0xFF;
+
+         cfg.depth_pass = AGX_STENCIL_OP_KEEP;
+         cfg.depth_fail = AGX_STENCIL_OP_KEEP;
+         cfg.stencil_fail = AGX_STENCIL_OP_KEEP;
+
+         cfg.stencil_compare = AGX_ZS_FUNC_ALWAYS;
+      }
+   }
+}
+
+static void *
+agx_create_zsa_state(struct pipe_context *ctx,
+                     const struct pipe_depth_stencil_alpha_state *state)
+{
+   struct agx_zsa *so = CALLOC_STRUCT(agx_zsa);
+   assert(!state->depth_bounds_test && "todo");
+
+   so->base = *state;
+
+   /* Z func can be used as-is */
+   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_NEVER    == AGX_ZS_FUNC_NEVER);
+   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_LESS     == AGX_ZS_FUNC_LESS);
+   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_EQUAL    == AGX_ZS_FUNC_EQUAL);
+   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_LEQUAL   == AGX_ZS_FUNC_LEQUAL);
+   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_GREATER  == AGX_ZS_FUNC_GREATER);
+   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_NOTEQUAL == AGX_ZS_FUNC_NOT_EQUAL);
+   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_GEQUAL   == AGX_ZS_FUNC_GEQUAL);
+   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_ALWAYS   == AGX_ZS_FUNC_ALWAYS);
+
+   enum agx_zs_func z_func = state->depth_enabled ?
+                ((enum agx_zs_func) state->depth_func) : AGX_ZS_FUNC_ALWAYS;
+
+   agx_pack_rasterizer_face(&so->front,
+         state->stencil[0], z_func, !state->depth_writemask);
+
+   if (state->stencil[1].enabled) {
+      agx_pack_rasterizer_face(&so->back,
+            state->stencil[1], z_func, !state->depth_writemask);
+   } else {
+      /* One sided stencil */
+      so->back = so->front;
+   }
+
+   return so;
+}
+
+static void
+agx_bind_zsa_state(struct pipe_context *pctx, void *cso)
+{
+   struct agx_context *ctx = agx_context(pctx);
+
+   if (cso)
+      memcpy(&ctx->zs, cso, sizeof(ctx->zs));
+}
+
+static void *
+agx_create_rs_state(struct pipe_context *ctx,
+                    const struct pipe_rasterizer_state *cso)
+{
+   struct agx_rasterizer *so = CALLOC_STRUCT(agx_rasterizer);
+   so->base = *cso;
+
+   /* Line width is packed in a 4:4 fixed point format */
+   unsigned line_width_fixed = ((unsigned) (cso->line_width * 16.0f)) - 1;
+
+   /* Clamp to maximum line width */
+   so->line_width = MIN2(line_width_fixed, 0xFF);
+
+   agx_pack(so->cull, CULL, cfg) {
+      cfg.cull_front = cso->cull_face & PIPE_FACE_FRONT;
+      cfg.cull_back = cso->cull_face & PIPE_FACE_BACK;
+      cfg.front_face_ccw = cso->front_ccw;
+      cfg.depth_clip = cso->depth_clip_near;
+      cfg.depth_clamp = !cso->depth_clip_near;
+   };
+
+   return so;
+}
+
+static void
+agx_bind_rasterizer_state(struct pipe_context *pctx, void *cso)
+{
+   struct agx_context *ctx = agx_context(pctx);
+   struct agx_rasterizer *so = cso;
+
+   /* Check if scissor state has changed, since scissor enable is part of the
+    * rasterizer state but everything else needed for scissors is part of
+    * viewport/scissor states */
+   bool scissor_changed = (cso == NULL) || (ctx->rast == NULL) ||
+      (ctx->rast->base.scissor != so->base.scissor);
+
+   ctx->rast = so;
+
+   if (scissor_changed)
+      ctx->dirty |= AGX_DIRTY_SCISSOR;
+}
+
+static enum agx_wrap
+agx_wrap_from_pipe(enum pipe_tex_wrap in)
+{
+   switch (in) {
+   case PIPE_TEX_WRAP_REPEAT: return AGX_WRAP_REPEAT;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return AGX_WRAP_CLAMP_TO_EDGE;
+   case PIPE_TEX_WRAP_MIRROR_REPEAT: return AGX_WRAP_MIRRORED_REPEAT;
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return AGX_WRAP_CLAMP_TO_BORDER;
+   default: unreachable("todo: more wrap modes");
+   }
+}
+
+static enum agx_mip_filter
+agx_mip_filter_from_pipe(enum pipe_tex_mipfilter in)
+{
+   switch (in) {
+   case PIPE_TEX_MIPFILTER_NEAREST: return AGX_MIP_FILTER_NEAREST;
+   case PIPE_TEX_MIPFILTER_LINEAR: return AGX_MIP_FILTER_LINEAR;
+   case PIPE_TEX_MIPFILTER_NONE: return AGX_MIP_FILTER_NONE;
+   }
+
+   unreachable("Invalid mip filter");
+}
+
+static const enum agx_compare_func agx_compare_funcs[PIPE_FUNC_ALWAYS + 1] = {
+   [PIPE_FUNC_NEVER] = AGX_COMPARE_FUNC_NEVER,
+   [PIPE_FUNC_LESS] = AGX_COMPARE_FUNC_LESS,
+   [PIPE_FUNC_EQUAL] = AGX_COMPARE_FUNC_EQUAL,
+   [PIPE_FUNC_LEQUAL] = AGX_COMPARE_FUNC_LEQUAL,
+   [PIPE_FUNC_GREATER] = AGX_COMPARE_FUNC_GREATER,
+   [PIPE_FUNC_NOTEQUAL] = AGX_COMPARE_FUNC_NOT_EQUAL,
+   [PIPE_FUNC_GEQUAL] = AGX_COMPARE_FUNC_GEQUAL,
+   [PIPE_FUNC_ALWAYS] = AGX_COMPARE_FUNC_ALWAYS,
+};
+
+static void *
+agx_create_sampler_state(struct pipe_context *pctx,
+                         const struct pipe_sampler_state *state)
+{
+   struct agx_device *dev = agx_device(pctx->screen);
+   struct agx_bo *bo = agx_bo_create(dev, AGX_SAMPLER_LENGTH,
+                                     AGX_MEMORY_TYPE_FRAMEBUFFER);
+
+   assert(state->min_lod == 0 && "todo: lod clamps");
+   assert(state->lod_bias == 0 && "todo: lod bias");
+
+   agx_pack(bo->ptr.cpu, SAMPLER, cfg) {
+      cfg.magnify_linear = (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
+      cfg.minify_linear = (state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
+      cfg.mip_filter = agx_mip_filter_from_pipe(state->min_mip_filter);
+      cfg.wrap_s = agx_wrap_from_pipe(state->wrap_s);
+      cfg.wrap_t = agx_wrap_from_pipe(state->wrap_t);
+      cfg.wrap_r = agx_wrap_from_pipe(state->wrap_r);
+      cfg.pixel_coordinates = !state->normalized_coords;
+      cfg.compare_func = agx_compare_funcs[state->compare_func];
+   }
+
+   struct agx_sampler_state *so = CALLOC_STRUCT(agx_sampler_state);
+   so->base = *state;
+   so->desc = bo;
+
+   return so;
+}
+
+static void
+agx_delete_sampler_state(struct pipe_context *ctx, void *state)
+{
+   struct agx_bo *bo = state;
+   agx_bo_unreference(bo);
+}
+
+static void
+agx_bind_sampler_states(struct pipe_context *pctx,
+                        enum pipe_shader_type shader,
+                        unsigned start, unsigned count,
+                        void **states)
+{
+   struct agx_context *ctx = agx_context(pctx);
+
+   ctx->stage[shader].sampler_count = states ? count : 0;
+
+   memcpy(&ctx->stage[shader].samplers[start], states,
+          sizeof(struct agx_sampler_state *) * count);
+}
+
+/* Channels agree for RGBA but are weird for force 0/1 */
+
+static enum agx_channel
+agx_channel_from_pipe(enum pipe_swizzle in)
+{
+   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_X == AGX_CHANNEL_R);
+   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_Y == AGX_CHANNEL_G);
+   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_Z == AGX_CHANNEL_B);
+   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_W == AGX_CHANNEL_A);
+   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_0 & 0x4);
+   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_1 & 0x4);
+   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_NONE & 0x4);
+
+   if ((in & 0x4) == 0)
+      return (enum agx_channel) in;
+   else if (in == PIPE_SWIZZLE_1)
+      return AGX_CHANNEL_1;
+   else
+      return AGX_CHANNEL_0;
+}
+
+static enum agx_layout
+agx_translate_layout(uint64_t modifier)
+{
+   switch (modifier) {
+   case DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER:
+      return AGX_LAYOUT_TILED_64X64;
+   case DRM_FORMAT_MOD_LINEAR:
+      return AGX_LAYOUT_LINEAR;
+   default:
+      unreachable("Invalid modifier");
+   }
+}
+
+static enum agx_texture_dimension
+agx_translate_texture_dimension(enum pipe_texture_target dim)
+{
+   switch (dim) {
+   case PIPE_TEXTURE_2D: return AGX_TEXTURE_DIMENSION_2D;
+   case PIPE_TEXTURE_CUBE: return AGX_TEXTURE_DIMENSION_CUBE;
+   default: unreachable("Unsupported texture dimension");
+   }
+}
+
+static struct pipe_sampler_view *
+agx_create_sampler_view(struct pipe_context *pctx,
+                        struct pipe_resource *texture,
+                        const struct pipe_sampler_view *state)
+{
+   struct agx_device *dev = agx_device(pctx->screen);
+   struct agx_resource *rsrc = agx_resource(texture);
+   struct agx_sampler_view *so = CALLOC_STRUCT(agx_sampler_view);
+
+   if (!so)
+      return NULL;
+
+   /* We prepare the descriptor at CSO create time */
+   so->desc = agx_bo_create(dev, AGX_TEXTURE_LENGTH,
+                            AGX_MEMORY_TYPE_FRAMEBUFFER);
+
+   const struct util_format_description *desc =
+      util_format_description(state->format);
+
+   /* We only have a single swizzle for the user swizzle and the format fixup,
+    * so compose them now. */
+   uint8_t out_swizzle[4];
+   uint8_t view_swizzle[4] = {
+      state->swizzle_r, state->swizzle_g,
+      state->swizzle_b, state->swizzle_a
+   };
+
+   util_format_compose_swizzles(desc->swizzle, view_swizzle, out_swizzle);
+
+   unsigned level = state->u.tex.first_level;
+   assert(state->u.tex.first_layer == 0);
+
+   /* Pack the descriptor into GPU memory */
+   agx_pack(so->desc->ptr.cpu, TEXTURE, cfg) {
+      cfg.dimension = agx_translate_texture_dimension(state->target);
+      cfg.layout = agx_translate_layout(rsrc->modifier);
+      cfg.format = agx_pixel_format[state->format].hw;
+      cfg.swizzle_r = agx_channel_from_pipe(out_swizzle[0]);
+      cfg.swizzle_g = agx_channel_from_pipe(out_swizzle[1]);
+      cfg.swizzle_b = agx_channel_from_pipe(out_swizzle[2]);
+      cfg.swizzle_a = agx_channel_from_pipe(out_swizzle[3]);
+      cfg.width = u_minify(texture->width0, level);
+      cfg.height = u_minify(texture->height0, level);
+      cfg.levels = state->u.tex.last_level - level + 1;
+      cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
+      cfg.address = rsrc->bo->ptr.gpu + rsrc->slices[level].offset;
+      cfg.unk_2 = false;
+
+      cfg.stride = (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) ?
+         (rsrc->slices[level].line_stride - 16) :
+         AGX_RT_STRIDE_TILED;
+   }
+
+   /* Initialize base object */
+   so->base = *state;
+   so->base.texture = NULL;
+   pipe_resource_reference(&so->base.texture, texture);
+   pipe_reference_init(&so->base.reference, 1);
+   so->base.context = pctx;
+   return &so->base;
+}
+
+static void
+agx_set_sampler_views(struct pipe_context *pctx,
+                      enum pipe_shader_type shader,
+                      unsigned start, unsigned count,
+                      unsigned unbind_num_trailing_slots,
+                      bool take_ownership,
+                      struct pipe_sampler_view **views)
+{
+   struct agx_context *ctx = agx_context(pctx);
+   unsigned new_nr = 0;
+   unsigned i;
+
+   assert(start == 0);
+
+   if (!views)
+      count = 0;
+
+   for (i = 0; i < count; ++i) {
+      if (views[i])
+         new_nr = i + 1;
+
+      if (take_ownership) {
+         pipe_sampler_view_reference((struct pipe_sampler_view **)
+                                     &ctx->stage[shader].textures[i], NULL);
+         ctx->stage[shader].textures[i] = (struct agx_sampler_view *)views[i];
+      } else {
+         pipe_sampler_view_reference((struct pipe_sampler_view **)
+                                     &ctx->stage[shader].textures[i], views[i]);
+      }
+   }
+
+   for (; i < ctx->stage[shader].texture_count; i++) {
+      pipe_sampler_view_reference((struct pipe_sampler_view **)
+                                  &ctx->stage[shader].textures[i], NULL);
+   }
+   ctx->stage[shader].texture_count = new_nr;
+}
+
+static void
+agx_sampler_view_destroy(struct pipe_context *ctx,
+                         struct pipe_sampler_view *pview)
+{
+   struct agx_sampler_view *view = (struct agx_sampler_view *) pview;
+   pipe_resource_reference(&view->base.texture, NULL);
+   agx_bo_unreference(view->desc);
+   FREE(view);
+}
+
+static struct pipe_surface *
+agx_create_surface(struct pipe_context *ctx,
+                   struct pipe_resource *texture,
+                   const struct pipe_surface *surf_tmpl)
+{
+   struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface);
+
+   if (!surface)
+      return NULL;
+   pipe_reference_init(&surface->reference, 1);
+   pipe_resource_reference(&surface->texture, texture);
+   surface->context = ctx;
+   surface->format = surf_tmpl->format;
+   surface->width = texture->width0;
+   surface->height = texture->height0;
+   surface->texture = texture;
+   surface->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+   surface->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+   surface->u.tex.level = surf_tmpl->u.tex.level;
+
+   return surface;
+}
+
+static void
+agx_set_clip_state(struct pipe_context *ctx,
+                   const struct pipe_clip_state *state)
+{
+}
+
+static void
+agx_set_polygon_stipple(struct pipe_context *ctx,
+                        const struct pipe_poly_stipple *state)
+{
+}
+
+static void
+agx_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
+{
+   struct agx_context *ctx = agx_context(pipe);
+   ctx->sample_mask = sample_mask;
+}
+
+static void
+agx_set_scissor_states(struct pipe_context *pctx,
+                       unsigned start_slot,
+                       unsigned num_scissors,
+                       const struct pipe_scissor_state *scissor)
+{
+   struct agx_context *ctx = agx_context(pctx);
+
+   assert(start_slot == 0 && "no geometry shaders");
+   assert(num_scissors == 1 && "no geometry shaders");
+
+   ctx->scissor = *scissor;
+   ctx->dirty |= AGX_DIRTY_SCISSOR;
+}
+
+static void
+agx_set_stencil_ref(struct pipe_context *pctx,
+                    const struct pipe_stencil_ref state)
+{
+   struct agx_context *ctx = agx_context(pctx);
+   ctx->stencil_ref = state;
+}
+
+static void
+agx_set_viewport_states(struct pipe_context *pctx,
+                        unsigned start_slot,
+                        unsigned num_viewports,
+                        const struct pipe_viewport_state *vp)
+{
+   struct agx_context *ctx = agx_context(pctx);
+
+   assert(start_slot == 0 && "no geometry shaders");
+   assert(num_viewports == 1 && "no geometry shaders");
+
+   ctx->dirty |= AGX_DIRTY_VIEWPORT;
+   ctx->viewport = *vp;
+}
+
+struct agx_viewport_scissor {
+   uint64_t viewport;
+   unsigned scissor;
+};
+
+static struct agx_viewport_scissor
+agx_upload_viewport_scissor(struct agx_pool *pool,
+                            struct agx_batch *batch,
+                            const struct pipe_viewport_state *vp,
+                            const struct pipe_scissor_state *ss)
+{
+   struct agx_ptr T = agx_pool_alloc_aligned(pool, AGX_VIEWPORT_LENGTH, 64);
+
+   float trans_x = vp->translate[0], trans_y = vp->translate[1];
+   float abs_scale_x = fabsf(vp->scale[0]), abs_scale_y = fabsf(vp->scale[1]);
+
+   /* Calculate the extent of the viewport. Note if a particular dimension of
+    * the viewport is an odd number of pixels, both the translate and the scale
+    * will have a fractional part of 0.5, so adding and subtracting them yields
+    * an integer. Therefore we don't need to round explicitly */
+   unsigned minx = CLAMP((int) (trans_x - abs_scale_x), 0, batch->width);
+   unsigned miny = CLAMP((int) (trans_y - abs_scale_y), 0, batch->height);
+   unsigned maxx = CLAMP((int) (trans_x + abs_scale_x), 0, batch->width);
+   unsigned maxy = CLAMP((int) (trans_y + abs_scale_y), 0, batch->height);
+
+   if (ss) {
+      minx = MAX2(ss->minx, minx);
+      miny = MAX2(ss->miny, miny);
+      maxx = MIN2(ss->maxx, maxx);
+      maxy = MIN2(ss->maxy, maxy);
+   }
+
+   assert(maxx > minx && maxy > miny);
+
+   float minz, maxz;
+   util_viewport_zmin_zmax(vp, false, &minz, &maxz);
+
+   agx_pack(T.cpu, VIEWPORT, cfg) {
+      cfg.min_tile_x = minx / 32;
+      cfg.min_tile_y = miny / 32;
+      cfg.max_tile_x = DIV_ROUND_UP(maxx, 32);
+      cfg.max_tile_y = DIV_ROUND_UP(maxy, 32);
+      cfg.clip_tile = true;
+
+      cfg.translate_x = vp->translate[0];
+      cfg.translate_y = vp->translate[1];
+      cfg.scale_x = vp->scale[0];
+      cfg.scale_y = vp->scale[1];
+
+      /* Assumes [0, 1] clip coordinates. If half-z is not in use, lower_half_z
+       * is called to ensure this works. */
+      cfg.translate_z = minz;
+      cfg.scale_z = maxz - minz;
+   };
+
+   /* Allocate a new scissor descriptor */
+   struct agx_scissor_packed *ptr = batch->scissor.bo->ptr.cpu;
+   unsigned index = (batch->scissor.count++);
+
+   agx_pack(ptr + index, SCISSOR, cfg) {
+      cfg.min_x = minx;
+      cfg.min_y = miny;
+      cfg.min_z = minz;
+      cfg.max_x = maxx;
+      cfg.max_y = maxy;
+      cfg.max_z = maxz;
+   }
+
+   return (struct agx_viewport_scissor) {
+      .viewport = T.gpu,
+      .scissor = index
+   };
+}
+
+/* A framebuffer state can be reused across batches, so it doesn't make sense
+ * to add surfaces to the BO list here. Instead we added them when flushing.
+ */
+
+static void
+agx_set_framebuffer_state(struct pipe_context *pctx,
+                          const struct pipe_framebuffer_state *state)
+{
+   struct agx_context *ctx = agx_context(pctx);
+
+   if (!state)
+      return;
+
+   /* XXX: eliminate this flush with batch tracking logic */
+   pctx->flush(pctx, NULL, 0);
+
+   util_copy_framebuffer_state(&ctx->framebuffer, state);
+   ctx->batch->width = state->width;
+   ctx->batch->height = state->height;
+   ctx->batch->nr_cbufs = state->nr_cbufs;
+   ctx->batch->cbufs[0] = state->cbufs[0];
+   ctx->batch->zsbuf = state->zsbuf;
+   ctx->dirty = ~0;
+
+   for (unsigned i = 0; i < state->nr_cbufs; ++i) {
+      struct pipe_surface *surf = state->cbufs[i];
+      struct agx_resource *tex = agx_resource(surf->texture);
+      const struct util_format_description *desc =
+         util_format_description(surf->format);
+
+      agx_pack(ctx->render_target[i], RENDER_TARGET, cfg) {
+         cfg.layout = agx_translate_layout(tex->modifier);
+         cfg.format = agx_pixel_format[surf->format].hw;
+         cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]);
+         cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]);
+         cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]);
+         cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]);
+         cfg.width = state->width;
+         cfg.height = state->height;
+         cfg.buffer = tex->bo->ptr.gpu;
+
+         cfg.stride = (tex->modifier == DRM_FORMAT_MOD_LINEAR) ?
+            (tex->slices[0].line_stride - 4) :
+            AGX_RT_STRIDE_TILED;
+      };
+   }
+}
+
+/* Likewise constant buffers, textures, and samplers are handled in a common
+ * per-draw path, with dirty tracking to reduce the costs involved.
+ */
+
+static void
+agx_set_constant_buffer(struct pipe_context *pctx,
+                        enum pipe_shader_type shader, uint index,
+                        bool take_ownership,
+                        const struct pipe_constant_buffer *cb)
+{
+   struct agx_context *ctx = agx_context(pctx);
+   struct agx_stage *s = &ctx->stage[shader];
+
+   util_copy_constant_buffer(&s->cb[index], cb, take_ownership);
+
+   unsigned mask = (1 << index);
+
+   if (cb)
+      s->cb_mask |= mask;
+   else
+      s->cb_mask &= ~mask;
+}
+
+static void
+agx_surface_destroy(struct pipe_context *ctx,
+                    struct pipe_surface *surface)
+{
+   pipe_resource_reference(&surface->texture, NULL);
+   FREE(surface);
+}
+
+static void
+agx_delete_state(struct pipe_context *ctx, void *state)
+{
+   FREE(state);
+}
+
+/* BOs added to the batch in the uniform upload path */
+
+static void
+agx_set_vertex_buffers(struct pipe_context *pctx,
+                       unsigned start_slot, unsigned count,
+                       unsigned unbind_num_trailing_slots,
+                       bool take_ownership,
+                       const struct pipe_vertex_buffer *buffers)
+{
+   struct agx_context *ctx = agx_context(pctx);
+
+   util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers,
+                                start_slot, count, unbind_num_trailing_slots, take_ownership);
+
+   ctx->dirty |= AGX_DIRTY_VERTEX;
+}
+
+static void *
+agx_create_vertex_elements(struct pipe_context *ctx,
+                           unsigned count,
+                           const struct pipe_vertex_element *state)
+{
+   assert(count < AGX_MAX_ATTRIBS);
+
+   struct agx_attribute *attribs = calloc(sizeof(*attribs), AGX_MAX_ATTRIBS);
+   for (unsigned i = 0; i < count; ++i) {
+      const struct pipe_vertex_element ve = state[i];
+
+      const struct util_format_description *desc =
+         util_format_description(ve.src_format);
+
+      unsigned chan_size = desc->channel[0].size / 8;
+
+      assert(chan_size == 1 || chan_size == 2 || chan_size == 4);
+      assert(desc->nr_channels >= 1 && desc->nr_channels <= 4);
+      assert((ve.src_offset & (chan_size - 1)) == 0);
+
+      attribs[i] = (struct agx_attribute) {
+         .buf = ve.vertex_buffer_index,
+         .src_offset = ve.src_offset / chan_size,
+         .nr_comps_minus_1 = desc->nr_channels - 1,
+         .format = agx_vertex_format[ve.src_format],
+         .divisor = ve.instance_divisor
+      };
+   }
+
+   return attribs;
+}
+
+static void
+agx_bind_vertex_elements_state(struct pipe_context *pctx, void *cso)
+{
+   struct agx_context *ctx = agx_context(pctx);
+   ctx->attributes = cso;
+   ctx->dirty |= AGX_DIRTY_VERTEX;
+}
+
+static uint32_t asahi_shader_key_hash(const void *key)
+{
+   return _mesa_hash_data(key, sizeof(struct asahi_shader_key));
+}
+
+static bool asahi_shader_key_equal(const void *a, const void *b)
+{
+   return memcmp(a, b, sizeof(struct asahi_shader_key)) == 0;
+}
+
+static void *
+agx_create_shader_state(struct pipe_context *pctx,
+                        const struct pipe_shader_state *cso)
+{
+   struct agx_uncompiled_shader *so = CALLOC_STRUCT(agx_uncompiled_shader);
+
+   if (!so)
+      return NULL;
+
+   so->base = *cso;
+
+   if (cso->type == PIPE_SHADER_IR_NIR) {
+      so->nir = cso->ir.nir;
+   } else {
+      assert(cso->type == PIPE_SHADER_IR_TGSI);
+      so->nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
+   }
+
+   so->variants = _mesa_hash_table_create(NULL, asahi_shader_key_hash, asahi_shader_key_equal);
+   return so;
+}
+
+static bool
+agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out,
+                  enum pipe_shader_type stage, struct asahi_shader_key *key)
+{
+   struct agx_uncompiled_shader *so = ctx->stage[stage].shader;
+   assert(so != NULL);
+
+   struct hash_entry *he = _mesa_hash_table_search(so->variants, key);
+
+   if (he) {
+      if ((*out) == he->data)
+         return false;
+
+      *out = he->data;
+      return true;
+   }
+
+   struct agx_compiled_shader *compiled = CALLOC_STRUCT(agx_compiled_shader);
+   struct util_dynarray binary;
+   util_dynarray_init(&binary, NULL);
+
+   nir_shader *nir = nir_shader_clone(NULL, so->nir);
+
+   if (key->blend.blend_enable) {
+      nir_lower_blend_options opts = {
+         .format = { key->rt_formats[0] },
+         .scalar_blend_const = true
+      };
+
+      memcpy(opts.rt, key->blend.rt, sizeof(opts.rt));
+      NIR_PASS_V(nir, nir_lower_blend, opts);
+   } else if (key->blend.logicop_enable) {
+      nir_lower_blend_options opts = {
+         .format = { key->rt_formats[0] },
+         .logicop_enable = true,
+         .logicop_func = key->blend.logicop_func,
+      };
+
+      NIR_PASS_V(nir, nir_lower_blend, opts);
+   }
+
+   if (stage == PIPE_SHADER_FRAGMENT)
+      NIR_PASS_V(nir, nir_lower_fragcolor, key->nr_cbufs);
+
+   agx_compile_shader_nir(nir, &key->base, &binary, &compiled->info);
+
+   struct agx_varyings *varyings = &compiled->info.varyings;
+   unsigned packed_varying_sz = (AGX_VARYING_HEADER_LENGTH + varyings->nr_descs * AGX_VARYING_LENGTH);
+   uint8_t *packed_varyings = alloca(packed_varying_sz);
+
+   agx_pack(packed_varyings, VARYING_HEADER, cfg) {
+      cfg.triangle_slots = cfg.point_slots = varyings->nr_slots;
+   }
+
+   memcpy(packed_varyings + AGX_VARYING_HEADER_LENGTH, varyings->packed,
+         varyings->nr_descs * AGX_VARYING_LENGTH);
+
+   if (binary.size) {
+      struct agx_device *dev = agx_device(ctx->base.screen);
+      compiled->bo = agx_bo_create(dev,
+                                   ALIGN_POT(binary.size, 256) + (3 * packed_varying_sz),
+                                   AGX_MEMORY_TYPE_SHADER);
+      memcpy(compiled->bo->ptr.cpu, binary.data, binary.size);
+
+
+      /* TODO: Why is the varying descriptor duplicated 3x? */
+      unsigned offs = ALIGN_POT(binary.size, 256);
+      for (unsigned copy = 0; copy < 3; ++copy) {
+         memcpy(((uint8_t *) compiled->bo->ptr.cpu) + offs, packed_varyings, packed_varying_sz);
+         offs += packed_varying_sz;
+      }
+
+      compiled->varyings = compiled->bo->ptr.gpu + ALIGN_POT(binary.size, 256);
+   }
+
+   ralloc_free(nir);
+   util_dynarray_fini(&binary);
+
+   he = _mesa_hash_table_insert(so->variants, key, compiled);
+   *out = he->data;
+   return true;
+}
+
+static bool
+agx_update_vs(struct agx_context *ctx)
+{
+   struct agx_vs_shader_key key = {
+      .num_vbufs = util_last_bit(ctx->vb_mask),
+      .clip_halfz = ctx->rast->base.clip_halfz,
+   };
+
+   memcpy(key.attributes, ctx->attributes,
+          sizeof(key.attributes[0]) * AGX_MAX_ATTRIBS);
+
+   u_foreach_bit(i, ctx->vb_mask) {
+      key.vbuf_strides[i] = ctx->vertex_buffers[i].stride;
+   }
+
+   struct asahi_shader_key akey = {
+      .base.vs = key
+   };
+
+   return agx_update_shader(ctx, &ctx->vs, PIPE_SHADER_VERTEX, &akey);
+}
+
+static bool
+agx_update_fs(struct agx_context *ctx)
+{
+   struct asahi_shader_key key = {
+      .nr_cbufs = ctx->batch->nr_cbufs,
+   };
+
+   for (unsigned i = 0; i < key.nr_cbufs; ++i) {
+      struct pipe_surface *surf = ctx->batch->cbufs[i];
+
+      if (surf) {
+         enum pipe_format fmt = surf->format;
+         key.rt_formats[i] = fmt;
+         key.base.fs.tib_formats[i] = agx_pixel_format[fmt].internal;
+      } else {
+         key.rt_formats[i] = PIPE_FORMAT_NONE;
+      }
+   }
+
+   memcpy(&key.blend, ctx->blend, sizeof(key.blend));
+
+   return agx_update_shader(ctx, &ctx->fs, PIPE_SHADER_FRAGMENT, &key);
+}
+
+static void
+agx_bind_shader_state(struct pipe_context *pctx, void *cso)
+{
+   if (!cso)
+      return;
+
+   struct agx_context *ctx = agx_context(pctx);
+   struct agx_uncompiled_shader *so = cso;
+
+   enum pipe_shader_type type = pipe_shader_type_from_mesa(so->nir->info.stage);
+   ctx->stage[type].shader = so;
+}
+
+static void
+agx_delete_compiled_shader(struct hash_entry *ent)
+{
+   struct agx_compiled_shader *so = ent->data;
+   agx_bo_unreference(so->bo);
+   FREE(so);
+}
+
+static void
+agx_delete_shader_state(struct pipe_context *ctx,
+                        void *cso)
+{
+   struct agx_uncompiled_shader *so = cso;
+   _mesa_hash_table_destroy(so->variants, agx_delete_compiled_shader);
+   free(so);
+}
+
+/* Pipeline consists of a sequence of binding commands followed by a set shader command */
+static uint32_t
+agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum pipe_shader_type stage)
+{
+   /* Pipelines must be 64-byte aligned */
+   struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool,
+                        (16 * AGX_BIND_UNIFORM_LENGTH) + // XXX: correct sizes, break up at compile time
+                        (ctx->stage[stage].texture_count * AGX_BIND_TEXTURE_LENGTH) +
+                        (PIPE_MAX_SAMPLERS * AGX_BIND_SAMPLER_LENGTH) +
+                        AGX_SET_SHADER_EXTENDED_LENGTH + 8,
+                        64);
+
+   uint8_t *record = ptr.cpu;
+
+   /* There is a maximum number of half words we may push with a single
+    * BIND_UNIFORM record, so split up the range to fit. We only need to call
+    * agx_push_location once, however, which reduces the cost. */
+   unsigned unif_records = 0;
+
+   for (unsigned i = 0; i < cs->info.push_ranges; ++i) {
+      struct agx_push push = cs->info.push[i];
+      uint64_t buffer = agx_push_location(ctx, push, stage);
+      unsigned halfs_per_record = 14;
+      unsigned records = DIV_ROUND_UP(push.length, halfs_per_record);
+
+      /* Ensure we don't overflow */
+      unif_records += records;
+      assert(unif_records < 16);
+
+      for (unsigned j = 0; j < records; ++j) {
+         agx_pack(record, BIND_UNIFORM, cfg) {
+            cfg.start_halfs = push.base + (j * halfs_per_record);
+            cfg.size_halfs = MIN2(push.length - (j * halfs_per_record), halfs_per_record);
+            cfg.buffer = buffer + (j * halfs_per_record * 2);
+         }
+
+         record += AGX_BIND_UNIFORM_LENGTH;
+      }
+   }
+
+   for (unsigned i = 0; i < ctx->stage[stage].texture_count; ++i) {
+      struct agx_sampler_view *tex = ctx->stage[stage].textures[i];
+      agx_batch_add_bo(ctx->batch, tex->desc);
+      agx_batch_add_bo(ctx->batch, agx_resource(tex->base.texture)->bo);
+
+
+      agx_pack(record, BIND_TEXTURE, cfg) {
+         cfg.start = i;
+         cfg.count = 1;
+         cfg.buffer = tex->desc->ptr.gpu;
+      }
+
+      record += AGX_BIND_TEXTURE_LENGTH;
+   }
+
+   for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
+      struct agx_sampler_state *sampler = ctx->stage[stage].samplers[i];
+
+      if (!sampler)
+         continue;
+
+      struct agx_bo *bo = sampler->desc;
+      agx_batch_add_bo(ctx->batch, bo);
+
+      agx_pack(record, BIND_SAMPLER, cfg) {
+         cfg.start = i;
+         cfg.count = 1;
+         cfg.buffer = bo->ptr.gpu;
+      }
+
+      record += AGX_BIND_SAMPLER_LENGTH;
+   }
+
+   /* TODO: Can we prepack this? */
+   if (stage == PIPE_SHADER_FRAGMENT) {
+      agx_pack(record, SET_SHADER_EXTENDED, cfg) {
+         cfg.code = cs->bo->ptr.gpu;
+         cfg.register_quadwords = 0;
+         cfg.unk_3 = 0x8d;
+         cfg.unk_1 = 0x2010bd;
+         cfg.unk_2 = 0x0d;
+         cfg.unk_2b = 1;
+         cfg.unk_3b = 0x1;
+         cfg.unk_4 = 0x800;
+         cfg.preshader_unk = 0xc080;
+         cfg.spill_size = 0x2;
+      }
+
+      record += AGX_SET_SHADER_EXTENDED_LENGTH;
+   } else {
+      agx_pack(record, SET_SHADER, cfg) {
+         cfg.code = cs->bo->ptr.gpu;
+         cfg.register_quadwords = 0;
+         cfg.unk_2b = cs->info.varyings.nr_slots;
+         cfg.unk_2 = 0x0d;
+      }
+
+      record += AGX_SET_SHADER_LENGTH;
+   }
+
+   /* End pipeline */
+   memset(record, 0, 8);
+   assert(ptr.gpu < (1ull << 32));
+   return ptr.gpu;
+}
+
+/* Internal pipelines (TODO: refactor?) */
+uint64_t
+agx_build_clear_pipeline(struct agx_context *ctx, uint32_t code, uint64_t clear_buf)
+{
+   struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool,
+                        (1 * AGX_BIND_UNIFORM_LENGTH) +
+                        AGX_SET_SHADER_EXTENDED_LENGTH + 8,
+                        64);
+
+   uint8_t *record = ptr.cpu;
+
+   agx_pack(record, BIND_UNIFORM, cfg) {
+      cfg.start_halfs = (6 * 2);
+      cfg.size_halfs = 4;
+      cfg.buffer = clear_buf;
+   }
+
+   record += AGX_BIND_UNIFORM_LENGTH;
+
+   /* TODO: Can we prepack this? */
+   agx_pack(record, SET_SHADER_EXTENDED, cfg) {
+      cfg.code = code;
+      cfg.register_quadwords = 1;
+      cfg.unk_3 = 0x8d;
+      cfg.unk_2 = 0x0d;
+      cfg.unk_2b = 4;
+      cfg.frag_unk = 0x880100;
+      cfg.preshader_mode = 0; // XXX
+   }
+
+   record += AGX_SET_SHADER_EXTENDED_LENGTH;
+
+   /* End pipeline */
+   memset(record, 0, 8);
+   return ptr.gpu;
+}
+
+uint64_t
+agx_build_reload_pipeline(struct agx_context *ctx, uint32_t code, struct pipe_surface *surf)
+{
+   struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool,
+                        (1 * AGX_BIND_TEXTURE_LENGTH) +
+                        (1 * AGX_BIND_SAMPLER_LENGTH) +
+                        AGX_SET_SHADER_EXTENDED_LENGTH + 8,
+                        64);
+
+   uint8_t *record = ptr.cpu;
+   struct agx_ptr sampler = agx_pool_alloc_aligned(&ctx->batch->pool, AGX_SAMPLER_LENGTH, 64);
+   struct agx_ptr texture = agx_pool_alloc_aligned(&ctx->batch->pool, AGX_TEXTURE_LENGTH, 64);
+
+   agx_pack(sampler.cpu, SAMPLER, cfg) {
+      cfg.magnify_linear = true;
+      cfg.minify_linear = false;
+      cfg.mip_filter = AGX_MIP_FILTER_NONE;
+      cfg.wrap_s = AGX_WRAP_CLAMP_TO_EDGE;
+      cfg.wrap_t = AGX_WRAP_CLAMP_TO_EDGE;
+      cfg.wrap_r = AGX_WRAP_CLAMP_TO_EDGE;
+      cfg.pixel_coordinates = true;
+      cfg.compare_func = AGX_COMPARE_FUNC_ALWAYS;
+      cfg.unk_2 = 0;
+      cfg.unk_3 = 0;
+   }
+
+   agx_pack(texture.cpu, TEXTURE, cfg) {
+      struct agx_resource *rsrc = agx_resource(surf->texture);
+      const struct util_format_description *desc =
+         util_format_description(surf->format);
+
+      cfg.layout = agx_translate_layout(rsrc->modifier);
+      cfg.format = agx_pixel_format[surf->format].hw;
+      cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]);
+      cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]);
+      cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]);
+      cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]);
+      cfg.width = surf->width;
+      cfg.height = surf->height;
+      cfg.levels = 1;
+      cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
+      cfg.address = rsrc->bo->ptr.gpu;
+      cfg.unk_2 = false;
+
+      cfg.stride = (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) ?
+         (rsrc->slices[0].line_stride - 16) :
+         AGX_RT_STRIDE_TILED;
+   }
+
+   agx_pack(record, BIND_TEXTURE, cfg) {
+      cfg.start = 0;
+      cfg.count = 1;
+      cfg.buffer = texture.gpu;
+   }
+
+   record += AGX_BIND_TEXTURE_LENGTH;
+
+   agx_pack(record, BIND_SAMPLER, cfg) {
+      cfg.start = 0;
+      cfg.count = 1;
+      cfg.buffer = sampler.gpu;
+   }
+
+   record += AGX_BIND_SAMPLER_LENGTH;
+
+   /* TODO: Can we prepack this? */
+   agx_pack(record, SET_SHADER_EXTENDED, cfg) {
+      cfg.code = code;
+      cfg.register_quadwords = 0;
+      cfg.unk_3 = 0x8d;
+      cfg.unk_2 = 0x0d;
+      cfg.unk_2b = 4;
+      cfg.unk_4 = 0;
+      cfg.frag_unk = 0x880100;
+      cfg.preshader_mode = 0; // XXX
+   }
+
+   record += AGX_SET_SHADER_EXTENDED_LENGTH;
+
+   /* End pipeline */
+   memset(record, 0, 8);
+   return ptr.gpu;
+}
+
+uint64_t
+agx_build_store_pipeline(struct agx_context *ctx, uint32_t code,
+                         uint64_t render_target)
+{
+   struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool,
+                        (1 * AGX_BIND_TEXTURE_LENGTH) +
+                        (1 * AGX_BIND_UNIFORM_LENGTH) +
+                        AGX_SET_SHADER_EXTENDED_LENGTH + 8,
+                        64);
+
+   uint8_t *record = ptr.cpu;
+
+   agx_pack(record, BIND_TEXTURE, cfg) {
+      cfg.start = 0;
+      cfg.count = 1;
+      cfg.buffer = render_target;
+   }
+
+   record += AGX_BIND_TEXTURE_LENGTH;
+
+   uint32_t unk[] = { 0, ~0 };
+
+   agx_pack(record, BIND_UNIFORM, cfg) {
+      cfg.start_halfs = 4;
+      cfg.size_halfs = 4;
+      cfg.buffer = agx_pool_upload_aligned(&ctx->batch->pool, unk, sizeof(unk), 16);
+   }
+
+   record += AGX_BIND_UNIFORM_LENGTH;
+
+   /* TODO: Can we prepack this? */
+   agx_pack(record, SET_SHADER_EXTENDED, cfg) {
+      cfg.code = code;
+      cfg.register_quadwords = 1;
+      cfg.unk_2 = 0xd;
+      cfg.unk_3 = 0x8d;
+      cfg.frag_unk = 0x880100;
+      cfg.preshader_mode = 0; // XXX
+   }
+
+   record += AGX_SET_SHADER_EXTENDED_LENGTH;
+
+   /* End pipeline */
+   memset(record, 0, 8);
+   return ptr.gpu;
+}
+
+static uint64_t
+demo_launch_fragment(struct agx_context *ctx, struct agx_pool *pool, uint32_t pipeline, uint32_t varyings, unsigned input_count)
+{
+   struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_BIND_PIPELINE_LENGTH, 64);
+
+   agx_pack(t.cpu, BIND_PIPELINE, cfg) {
+      cfg.tag = AGX_BIND_PIPELINE_FRAGMENT;
+      cfg.sampler_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count;
+      cfg.texture_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count;
+      cfg.input_count = input_count;
+      cfg.pipeline = pipeline;
+      cfg.fs_varyings = varyings;
+   };
+
+   return t.gpu;
+}
+
+static uint64_t
+demo_interpolation(struct agx_compiled_shader *fs, struct agx_pool *pool)
+{
+   struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_INTERPOLATION_LENGTH, 64);
+
+   agx_pack(t.cpu, INTERPOLATION, cfg) {
+      cfg.varying_count = fs->info.varyings.nr_slots;
+   };
+
+   return t.gpu;
+}
+
+static uint64_t
+demo_linkage(struct agx_compiled_shader *vs, struct agx_pool *pool)
+{
+   struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_LINKAGE_LENGTH, 64);
+
+   agx_pack(t.cpu, LINKAGE, cfg) {
+      cfg.varying_count = vs->info.varyings.nr_slots;
+
+      // 0x2 for fragcoordz, 0x1 for varyings at all
+      cfg.unk_1 = 0x210000 | (vs->info.writes_psiz ? 0x40000 : 0);
+   };
+
+   return t.gpu;
+}
+
+static uint64_t
+demo_rasterizer(struct agx_context *ctx, struct agx_pool *pool, bool is_points)
+{
+   struct agx_rasterizer *rast = ctx->rast;
+   struct agx_rasterizer_packed out;
+
+   agx_pack(&out, RASTERIZER, cfg) {
+      bool back_stencil = ctx->zs.base.stencil[1].enabled;
+      cfg.front.stencil_reference = ctx->stencil_ref.ref_value[0];
+      cfg.back.stencil_reference = back_stencil ?
+         ctx->stencil_ref.ref_value[1] :
+         cfg.front.stencil_reference;
+
+      cfg.front.line_width = cfg.back.line_width = rast->line_width;
+      cfg.front.polygon_mode = cfg.back.polygon_mode = AGX_POLYGON_MODE_FILL;
+
+      cfg.unk_fill_lines = is_points; /* XXX: what is this? */
+
+      /* Always enable scissoring so we may scissor to the viewport (TODO:
+       * optimize this out if the viewport is the default and the app does not
+       * use the scissor test) */
+      cfg.scissor_enable = true;
+   };
+
+   /* Words 2-3: front */
+   out.opaque[2] |= ctx->zs.front.opaque[0];
+   out.opaque[3] |= ctx->zs.front.opaque[1];
+
+   /* Words 4-5: back */
+   out.opaque[4] |= ctx->zs.back.opaque[0];
+   out.opaque[5] |= ctx->zs.back.opaque[1];
+
+   return agx_pool_upload_aligned(pool, &out, sizeof(out), 64);
+}
+
+static uint64_t
+demo_unk11(struct agx_pool *pool, bool prim_lines, bool prim_points, bool reads_tib)
+{
+#define UNK11_FILL_MODE_LINES_1 (1 << 26)
+
+#define UNK11_FILL_MODE_LINES_2 (0x5004 << 16)
+#define UNK11_LINES (0x10000000)
+#define UNK11_POINTS (0x40000000)
+
+#define UNK11_READS_TIB (0x20000000)
+
+   uint32_t unk[] = {
+      0x200004a,
+      0x200 | ((prim_lines || prim_points) ? UNK11_FILL_MODE_LINES_1 : 0) | (reads_tib ? UNK11_READS_TIB : 0),
+      0x7e00000 | (prim_lines ? UNK11_LINES : 0) | (prim_points ? UNK11_POINTS : 0),
+      0x7e00000 | (prim_lines ? UNK11_LINES : 0) | (prim_points ? UNK11_POINTS : 0),
+
+      0x1ffff
+   };
+
+   return agx_pool_upload(pool, unk, sizeof(unk));
+}
+
+static uint64_t
+demo_unk12(struct agx_pool *pool)
+{
+   uint32_t unk[] = {
+      0x410000,
+      0x1e3ce508,
+      0xa0
+   };
+
+   return agx_pool_upload(pool, unk, sizeof(unk));
+}
+
+static uint64_t
+agx_set_scissor_index(struct agx_pool *pool, unsigned index)
+{
+   struct agx_ptr T = agx_pool_alloc_aligned(pool, AGX_SET_SCISSOR_LENGTH, 64);
+
+   agx_pack(T.cpu, SET_SCISSOR, cfg) {
+      cfg.index = index;
+   };
+
+   return T.gpu;
+}
+
+static void
+agx_push_record(uint8_t **out, unsigned size_words, uint64_t ptr)
+{
+   assert(ptr < (1ull << 40));
+   assert(size_words < (1ull << 24));
+
+   uint64_t value = (size_words | (ptr << 24));
+   memcpy(*out, &value, sizeof(value));
+   *out += sizeof(value);
+}
+
+static uint8_t *
+agx_encode_state(struct agx_context *ctx, uint8_t *out,
+                 uint32_t pipeline_vertex, uint32_t pipeline_fragment, uint32_t varyings,
+                 bool is_lines, bool is_points)
+{
+   agx_pack(out, BIND_PIPELINE, cfg) {
+      cfg.tag = AGX_BIND_PIPELINE_VERTEX;
+      cfg.pipeline = pipeline_vertex;
+      cfg.vs_output_count_1 = ctx->vs->info.varyings.nr_slots;
+      cfg.vs_output_count_2 = ctx->vs->info.varyings.nr_slots;
+      cfg.sampler_count = ctx->stage[PIPE_SHADER_VERTEX].texture_count;
+      cfg.texture_count = ctx->stage[PIPE_SHADER_VERTEX].texture_count;
+   }
+
+   /* yes, it's really 17 bytes */
+   out += AGX_BIND_PIPELINE_LENGTH;
+   *(out++) = 0x0;
+
+   struct agx_pool *pool = &ctx->batch->pool;
+   struct agx_ptr zero = agx_pool_alloc_aligned(pool, 16, 256);
+   memset(zero.cpu, 0, 16);
+
+   bool reads_tib = ctx->fs->info.reads_tib;
+
+   agx_push_record(&out, 0, zero.gpu);
+   agx_push_record(&out, 5, demo_interpolation(ctx->fs, pool));
+   agx_push_record(&out, 5, demo_launch_fragment(ctx, pool, pipeline_fragment, varyings, ctx->fs->info.varyings.nr_descs));
+   agx_push_record(&out, 4, demo_linkage(ctx->vs, pool));
+   agx_push_record(&out, 7, demo_rasterizer(ctx, pool, is_points));
+   agx_push_record(&out, 5, demo_unk11(pool, is_lines, is_points, reads_tib));
+
+   if (ctx->dirty & (AGX_DIRTY_VIEWPORT | AGX_DIRTY_SCISSOR)) {
+      struct agx_viewport_scissor vps = agx_upload_viewport_scissor(pool,
+            ctx->batch, &ctx->viewport,
+            ctx->rast->base.scissor ? &ctx->scissor : NULL);
+
+      agx_push_record(&out, 10, vps.viewport);
+      agx_push_record(&out, 2, agx_set_scissor_index(pool, vps.scissor));
+   }
+
+   agx_push_record(&out, 3, demo_unk12(pool));
+   agx_push_record(&out, 2, agx_pool_upload(pool, ctx->rast->cull, sizeof(ctx->rast->cull)));
+
+   return (out - 1); // XXX: alignment fixup, or something
+}
+
+static enum agx_primitive
+agx_primitive_for_pipe(enum pipe_prim_type mode)
+{
+   switch (mode) {
+   case PIPE_PRIM_POINTS: return AGX_PRIMITIVE_POINTS;
+   case PIPE_PRIM_LINES: return AGX_PRIMITIVE_LINES;
+   case PIPE_PRIM_LINE_STRIP: return AGX_PRIMITIVE_LINE_STRIP;
+   case PIPE_PRIM_LINE_LOOP: return AGX_PRIMITIVE_LINE_LOOP;
+   case PIPE_PRIM_TRIANGLES: return AGX_PRIMITIVE_TRIANGLES;
+   case PIPE_PRIM_TRIANGLE_STRIP: return AGX_PRIMITIVE_TRIANGLE_STRIP;
+   case PIPE_PRIM_TRIANGLE_FAN: return AGX_PRIMITIVE_TRIANGLE_FAN;
+   case PIPE_PRIM_QUADS: return AGX_PRIMITIVE_QUADS;
+   case PIPE_PRIM_QUAD_STRIP: return AGX_PRIMITIVE_QUAD_STRIP;
+   default: unreachable("todo: other primitive types");
+   }
+}
+
+static uint64_t
+agx_index_buffer_ptr(struct agx_batch *batch,
+                     const struct pipe_draw_start_count_bias *draw,
+                     const struct pipe_draw_info *info)
+{
+   off_t offset = draw->start * info->index_size;
+
+   if (!info->has_user_indices) {
+      struct agx_bo *bo = agx_resource(info->index.resource)->bo;
+      agx_batch_add_bo(batch, bo);
+
+      return bo->ptr.gpu + offset;
+   } else {
+      return agx_pool_upload_aligned(&batch->pool,
+                                     ((uint8_t *) info->index.user) + offset,
+                                     draw->count * info->index_size, 64);
+   }
+}
+
+static bool
+agx_scissor_culls_everything(struct agx_context *ctx)
+{
+        const struct pipe_scissor_state ss = ctx->scissor;
+
+        return ctx->rast->base.scissor &&
+		((ss.minx == ss.maxx) || (ss.miny == ss.maxy));
+}
+
+static void
+agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
+             unsigned drawid_offset,
+             const struct pipe_draw_indirect_info *indirect,
+             const struct pipe_draw_start_count_bias *draws,
+             unsigned num_draws)
+{
+   if (num_draws > 1) {
+      util_draw_multi(pctx, info, drawid_offset, indirect, draws, num_draws);
+      return;
+   }
+
+   if (info->index_size && draws->index_bias)
+      unreachable("todo: index bias");
+
+   struct agx_context *ctx = agx_context(pctx);
+   struct agx_batch *batch = ctx->batch;
+
+   if (agx_scissor_culls_everything(ctx))
+	   return;
+
+   /* TODO: masks */
+   ctx->batch->draw |= ~0;
+
+   /* TODO: Dirty track */
+   agx_update_vs(ctx);
+   agx_update_fs(ctx);
+
+   agx_batch_add_bo(batch, ctx->vs->bo);
+   agx_batch_add_bo(batch, ctx->fs->bo);
+
+   bool is_lines =
+      (info->mode == PIPE_PRIM_LINES) ||
+      (info->mode == PIPE_PRIM_LINE_STRIP) ||
+      (info->mode == PIPE_PRIM_LINE_LOOP);
+
+   uint8_t *out = agx_encode_state(ctx, batch->encoder_current,
+                                   agx_build_pipeline(ctx, ctx->vs, PIPE_SHADER_VERTEX),
+                                   agx_build_pipeline(ctx, ctx->fs, PIPE_SHADER_FRAGMENT),
+                                   ctx->fs->varyings, is_lines, info->mode == PIPE_PRIM_POINTS);
+
+   enum agx_primitive prim = agx_primitive_for_pipe(info->mode);
+   unsigned idx_size = info->index_size;
+
+   if (idx_size) {
+      uint64_t ib = agx_index_buffer_ptr(batch, draws, info);
+
+      /* Index sizes are encoded logarithmically */
+      STATIC_ASSERT(__builtin_ctz(1) == AGX_INDEX_SIZE_U8);
+      STATIC_ASSERT(__builtin_ctz(2) == AGX_INDEX_SIZE_U16);
+      STATIC_ASSERT(__builtin_ctz(4) == AGX_INDEX_SIZE_U32);
+      assert((idx_size == 1) || (idx_size == 2) || (idx_size == 4));
+
+      agx_pack(out, INDEXED_DRAW, cfg) {
+         cfg.restart_index = info->restart_index;
+         cfg.unk_2a = (ib >> 32);
+         cfg.primitive = prim;
+         cfg.restart_enable = info->primitive_restart;
+         cfg.index_size = __builtin_ctz(idx_size);
+         cfg.index_buffer_offset = (ib & BITFIELD_MASK(32));
+         cfg.index_buffer_size = ALIGN_POT(draws->count * idx_size, 4);
+         cfg.index_count = draws->count;
+         cfg.instance_count = info->instance_count;
+         cfg.base_vertex = draws->index_bias;
+      };
+
+      out += AGX_INDEXED_DRAW_LENGTH;
+   } else {
+      agx_pack(out, DRAW, cfg) {
+         cfg.primitive = prim;
+         cfg.vertex_start = draws->start;
+         cfg.vertex_count = draws->count;
+         cfg.instance_count = info->instance_count;
+      };
+
+      out += AGX_DRAW_LENGTH;
+   }
+
+   batch->encoder_current = out;
+   ctx->dirty = 0;
+}
+
+void agx_init_state_functions(struct pipe_context *ctx);
+
+void
+agx_init_state_functions(struct pipe_context *ctx)
+{
+   ctx->create_blend_state = agx_create_blend_state;
+   ctx->create_depth_stencil_alpha_state = agx_create_zsa_state;
+   ctx->create_fs_state = agx_create_shader_state;
+   ctx->create_rasterizer_state = agx_create_rs_state;
+   ctx->create_sampler_state = agx_create_sampler_state;
+   ctx->create_sampler_view = agx_create_sampler_view;
+   ctx->create_surface = agx_create_surface;
+   ctx->create_vertex_elements_state = agx_create_vertex_elements;
+   ctx->create_vs_state = agx_create_shader_state;
+   ctx->bind_blend_state = agx_bind_blend_state;
+   ctx->bind_depth_stencil_alpha_state = agx_bind_zsa_state;
+   ctx->bind_sampler_states = agx_bind_sampler_states;
+   ctx->bind_fs_state = agx_bind_shader_state;
+   ctx->bind_rasterizer_state = agx_bind_rasterizer_state;
+   ctx->bind_vertex_elements_state = agx_bind_vertex_elements_state;
+   ctx->bind_vs_state = agx_bind_shader_state;
+   ctx->delete_blend_state = agx_delete_state;
+   ctx->delete_depth_stencil_alpha_state = agx_delete_state;
+   ctx->delete_fs_state = agx_delete_shader_state;
+   ctx->delete_rasterizer_state = agx_delete_state;
+   ctx->delete_sampler_state = agx_delete_sampler_state;
+   ctx->delete_vertex_elements_state = agx_delete_state;
+   ctx->delete_vs_state = agx_delete_state;
+   ctx->set_blend_color = agx_set_blend_color;
+   ctx->set_clip_state = agx_set_clip_state;
+   ctx->set_constant_buffer = agx_set_constant_buffer;
+   ctx->set_sampler_views = agx_set_sampler_views;
+   ctx->set_framebuffer_state = agx_set_framebuffer_state;
+   ctx->set_polygon_stipple = agx_set_polygon_stipple;
+   ctx->set_sample_mask = agx_set_sample_mask;
+   ctx->set_scissor_states = agx_set_scissor_states;
+   ctx->set_stencil_ref = agx_set_stencil_ref;
+   ctx->set_vertex_buffers = agx_set_vertex_buffers;
+   ctx->set_viewport_states = agx_set_viewport_states;
+   ctx->sampler_view_destroy = agx_sampler_view_destroy;
+   ctx->surface_destroy = agx_surface_destroy;
+   ctx->draw_vbo = agx_draw_vbo;
+   ctx->create_stream_output_target = agx_create_stream_output_target;
+   ctx->stream_output_target_destroy = agx_stream_output_target_destroy;
+   ctx->set_stream_output_targets = agx_set_stream_output_targets;
+}
diff --git a/lib/mesa/src/gallium/drivers/asahi/agx_state.h b/lib/mesa/src/gallium/drivers/asahi/agx_state.h
new file mode 100644
index 000000000..a4abc0760
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/asahi/agx_state.h
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2021 Alyssa Rosenzweig
+ * Copyright (C) 2019-2021 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AGX_STATE_H
+#define AGX_STATE_H
+
+#include "gallium/include/pipe/p_context.h"
+#include "gallium/include/pipe/p_state.h"
+#include "gallium/include/pipe/p_screen.h"
+#include "gallium/auxiliary/util/u_blitter.h"
+#include "asahi/lib/agx_pack.h"
+#include "asahi/lib/agx_bo.h"
+#include "asahi/lib/agx_device.h"
+#include "asahi/lib/pool.h"
+#include "asahi/compiler/agx_compile.h"
+#include "compiler/nir/nir_lower_blend.h"
+#include "util/hash_table.h"
+#include "util/bitset.h"
+
+struct agx_streamout_target {
+   struct pipe_stream_output_target base;
+   uint32_t offset;
+};
+
+struct agx_streamout {
+   struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+   unsigned num_targets;
+};
+
+static inline struct agx_streamout_target *
+agx_so_target(struct pipe_stream_output_target *target)
+{
+   return (struct agx_streamout_target *)target;
+}
+
+struct agx_compiled_shader {
+   /* Mapped executable memory */
+   struct agx_bo *bo;
+
+   /* Varying descriptor (TODO: is this the right place?) */
+   uint64_t varyings;
+
+   /* Metadata returned from the compiler */
+   struct agx_shader_info info;
+};
+
+struct agx_uncompiled_shader {
+   struct pipe_shader_state base;
+   struct nir_shader *nir;
+   struct hash_table *variants;
+
+   /* Set on VS, passed to FS for linkage */
+   unsigned base_varying;
+};
+
+struct agx_stage {
+   struct agx_uncompiled_shader *shader;
+   uint32_t dirty;
+
+   struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
+   uint32_t cb_mask;
+
+   /* Need full CSOs for u_blitter */
+   struct agx_sampler_state *samplers[PIPE_MAX_SAMPLERS];
+   struct agx_sampler_view *textures[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+
+   unsigned sampler_count, texture_count;
+};
+
+/* Uploaded scissor descriptors */
+struct agx_scissors {
+      struct agx_bo *bo;
+      unsigned count;
+};
+
+struct agx_batch {
+   unsigned width, height, nr_cbufs;
+   struct pipe_surface *cbufs[8];
+   struct pipe_surface *zsbuf;
+
+   /* PIPE_CLEAR_* bitmask */
+   uint32_t clear, draw;
+
+   float clear_color[4];
+
+   /* Resource list requirements, represented as a bit set indexed by BO
+    * handles (GEM handles on Linux, or IOGPU's equivalent on macOS) */
+   BITSET_WORD bo_list[256];
+
+   struct agx_pool pool, pipeline_pool;
+   struct agx_bo *encoder;
+   uint8_t *encoder_current;
+
+   struct agx_scissors scissor;
+};
+
+struct agx_zsa {
+   struct pipe_depth_stencil_alpha_state base;
+   struct agx_rasterizer_face_packed front, back;
+};
+
+struct agx_blend {
+   bool logicop_enable, blend_enable;
+
+   union {
+      nir_lower_blend_rt rt[8];
+      unsigned logicop_func;
+   };
+};
+
+struct asahi_shader_key {
+   struct agx_shader_key base;
+   struct agx_blend blend;
+   unsigned nr_cbufs;
+   enum pipe_format rt_formats[PIPE_MAX_COLOR_BUFS];
+};
+
+enum agx_dirty {
+   AGX_DIRTY_VERTEX   = BITFIELD_BIT(0),
+   AGX_DIRTY_VIEWPORT = BITFIELD_BIT(1),
+   AGX_DIRTY_SCISSOR  = BITFIELD_BIT(2),
+};
+
+struct agx_context {
+   struct pipe_context base;
+   struct agx_compiled_shader *vs, *fs;
+   uint32_t dirty;
+
+   struct agx_batch *batch;
+
+   struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
+   uint32_t vb_mask;
+
+   struct agx_stage stage[PIPE_SHADER_TYPES];
+   struct agx_attribute *attributes;
+   struct agx_rasterizer *rast;
+   struct agx_zsa zs;
+   struct agx_blend *blend;
+   struct pipe_blend_color blend_color;
+   struct pipe_viewport_state viewport;
+   struct pipe_scissor_state scissor;
+   struct pipe_stencil_ref stencil_ref;
+   struct agx_streamout streamout;
+   uint16_t sample_mask;
+   struct pipe_framebuffer_state framebuffer;
+
+   struct pipe_query *cond_query;
+   bool cond_cond;
+   enum pipe_render_cond_flag cond_mode;
+
+   bool is_noop;
+
+   uint8_t render_target[8][AGX_RENDER_TARGET_LENGTH];
+
+   struct blitter_context *blitter;
+};
+
+static inline struct agx_context *
+agx_context(struct pipe_context *pctx)
+{
+   return (struct agx_context *) pctx;
+}
+
+struct agx_rasterizer {
+   struct pipe_rasterizer_state base;
+   uint8_t cull[AGX_CULL_LENGTH];
+   uint8_t line_width;
+};
+
+struct agx_query {
+   unsigned	query;
+};
+
+struct agx_sampler_state {
+   struct pipe_sampler_state base;
+
+   /* Prepared descriptor */
+   struct agx_bo *desc;
+};
+
+struct agx_sampler_view {
+   struct pipe_sampler_view base;
+
+   /* Prepared descriptor */
+   struct agx_bo *desc;
+};
+
+struct agx_screen {
+   struct pipe_screen pscreen;
+   struct agx_device dev;
+   struct sw_winsys *winsys;
+};
+
+static inline struct agx_screen *
+agx_screen(struct pipe_screen *p)
+{
+   return (struct agx_screen *)p;
+}
+
+static inline struct agx_device *
+agx_device(struct pipe_screen *p)
+{
+   return &(agx_screen(p)->dev);
+}
+
+/* TODO: UABI, fake for macOS */
+#ifndef DRM_FORMAT_MOD_LINEAR
+#define DRM_FORMAT_MOD_LINEAR 1
+#endif
+#define DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER (2)
+
+struct agx_resource {
+   struct pipe_resource	base;
+   uint64_t modifier;
+
+   /* Hardware backing */
+   struct agx_bo *bo;
+
+   /* Software backing (XXX) */
+   struct sw_displaytarget	*dt;
+   unsigned dt_stride;
+
+   BITSET_DECLARE(data_valid, PIPE_MAX_TEXTURE_LEVELS);
+
+   struct {
+      unsigned offset;
+      unsigned line_stride;
+   } slices[PIPE_MAX_TEXTURE_LEVELS];
+
+   /* Bytes from one miptree to the next */
+   unsigned array_stride;
+};
+
+static inline struct agx_resource *
+agx_resource(struct pipe_resource *pctx)
+{
+   return (struct agx_resource *) pctx;
+}
+
+struct agx_transfer {
+   struct pipe_transfer base;
+   void *map;
+   struct {
+      struct pipe_resource *rsrc;
+      struct pipe_box box;
+   } staging;
+};
+
+static inline struct agx_transfer *
+agx_transfer(struct pipe_transfer *p)
+{
+   return (struct agx_transfer *)p;
+}
+
+uint64_t
+agx_push_location(struct agx_context *ctx, struct agx_push push,
+                  enum pipe_shader_type stage);
+
+uint64_t
+agx_build_clear_pipeline(struct agx_context *ctx, uint32_t code, uint64_t clear_buf);
+
+uint64_t
+agx_build_store_pipeline(struct agx_context *ctx, uint32_t code,
+                         uint64_t render_target);
+
+uint64_t
+agx_build_reload_pipeline(struct agx_context *ctx, uint32_t code, struct pipe_surface *surf);
+
+/* Add a BO to a batch. This needs to be amortized O(1) since it's called in
+ * hot paths. To achieve this we model BO lists by bit sets */
+
+static inline void
+agx_batch_add_bo(struct agx_batch *batch, struct agx_bo *bo)
+{
+   if (unlikely(bo->handle > (sizeof(batch->bo_list) * 8)))
+      unreachable("todo: growable");
+
+   BITSET_SET(batch->bo_list, bo->handle);
+}
+
+/* Blit shaders */
+void agx_blit(struct pipe_context *pipe,
+              const struct pipe_blit_info *info);
+
+void agx_internal_shaders(struct agx_device *dev);
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/asahi/agx_uniforms.c b/lib/mesa/src/gallium/drivers/asahi/agx_uniforms.c
new file mode 100644
index 000000000..136503737
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/asahi/agx_uniforms.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2021 Alyssa Rosenzweig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <stdio.h>
+#include "agx_state.h"
+#include "asahi/lib/agx_pack.h"
+
+/* Computes the address for a push uniform, adding referenced BOs to the
+ * current batch as necessary. Note anything uploaded via the batch's pool does
+ * not require an update to the BO list, since the entire pool will be added
+ * once at submit time. */
+
+static uint64_t
+agx_const_buffer_ptr(struct agx_batch *batch,
+                     struct pipe_constant_buffer *cb)
+{
+   if (cb->buffer) {
+      struct agx_bo *bo = agx_resource(cb->buffer)->bo;
+      agx_batch_add_bo(batch, bo);
+
+      return bo->ptr.gpu + cb->buffer_offset;
+   } else {
+      return agx_pool_upload_aligned(&batch->pool,
+                                     ((uint8_t *) cb->user_buffer) + cb->buffer_offset,
+                                     cb->buffer_size - cb->buffer_offset, 64);
+   }
+}
+
+static uint64_t
+agx_push_location_direct(struct agx_context *ctx, struct agx_push push,
+                         enum pipe_shader_type stage)
+{
+   struct agx_batch *batch = ctx->batch;
+   struct agx_stage *st = &ctx->stage[stage];
+
+   switch (push.type) {
+   case AGX_PUSH_UBO_BASES: {
+      unsigned count = util_last_bit(st->cb_mask);
+      struct agx_ptr ptr = agx_pool_alloc_aligned(&batch->pool, count * sizeof(uint64_t), 8);
+      uint64_t *addresses = ptr.cpu;
+
+      for (unsigned i = 0; i < count; ++i) {
+         struct pipe_constant_buffer *cb = &st->cb[i];
+         addresses[i] = agx_const_buffer_ptr(batch, cb);
+      }
+
+      return ptr.gpu;
+   }
+
+   case AGX_PUSH_VBO_BASES: {
+      unsigned count = util_last_bit(ctx->vb_mask);
+      struct agx_ptr ptr = agx_pool_alloc_aligned(&batch->pool, count * sizeof(uint64_t), 8);
+      uint64_t *addresses = ptr.cpu;
+
+      u_foreach_bit(i, ctx->vb_mask) {
+         struct pipe_vertex_buffer vb = ctx->vertex_buffers[i];
+         assert(!vb.is_user_buffer);
+
+         struct agx_bo *bo = agx_resource(vb.buffer.resource)->bo;
+         agx_batch_add_bo(batch, bo);
+
+         addresses[i] = bo->ptr.gpu + vb.buffer_offset;
+      }
+
+      return ptr.gpu;
+   }
+
+   case AGX_PUSH_BLEND_CONST:
+   {
+      return agx_pool_upload_aligned(&batch->pool, &ctx->blend_color,
+            sizeof(ctx->blend_color), 8);
+   }
+
+   default:
+      unreachable("todo: push more");
+   }
+}
+
+uint64_t
+agx_push_location(struct agx_context *ctx, struct agx_push push,
+                  enum pipe_shader_type stage)
+{
+   uint64_t direct = agx_push_location_direct(ctx, push, stage);
+   struct agx_pool *pool = &ctx->batch->pool;
+
+   if (push.indirect)
+      return agx_pool_upload(pool, &direct, sizeof(direct));
+   else
+      return direct;
+}
diff --git a/lib/mesa/src/gallium/drivers/asahi/magic.c b/lib/mesa/src/gallium/drivers/asahi/magic.c
new file mode 100644
index 000000000..fa56ede4f
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/asahi/magic.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2021 Alyssa Rosenzweig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#
+#include <stdint.h>
+#include "agx_state.h"
+#include "magic.h"
+
+/* The structures managed in this file appear to be software defined (either in
+ * the macOS kernel driver or in the AGX firmware) */
+
+/* Odd pattern */
+static uint64_t
+demo_unk6(struct agx_pool *pool)
+{
+   struct agx_ptr ptr = agx_pool_alloc_aligned(pool, 0x4000 * sizeof(uint64_t), 64);
+   uint64_t *buf = ptr.cpu;
+   memset(buf, 0, sizeof(*buf));
+
+   for (unsigned i = 1; i < 0x3ff; ++i)
+      buf[i] = (i + 1);
+
+   return ptr.gpu;
+}
+
+static uint64_t
+demo_zero(struct agx_pool *pool, unsigned count)
+{
+   struct agx_ptr ptr = agx_pool_alloc_aligned(pool, count, 64);
+   memset(ptr.cpu, 0, count);
+   return ptr.gpu;
+}
+
+unsigned
+demo_cmdbuf(uint64_t *buf, size_t size,
+            struct agx_pool *pool,
+            uint64_t encoder_ptr,
+            uint64_t encoder_id,
+            uint64_t scissor_ptr,
+            unsigned width, unsigned height,
+            uint32_t pipeline_null,
+            uint32_t pipeline_clear,
+            uint32_t pipeline_store,
+            uint64_t rt0,
+            bool clear_pipeline_textures)
+{
+   uint32_t *map = (uint32_t *) buf;
+   memset(map, 0, 474 * 4);
+
+   map[54] = 0x6b0003;
+   map[55] = 0x3a0012;
+   map[56] = 1;
+
+   map[106] = 1;
+   map[108] = 0x1c;
+   map[112] = 0xffffffff;
+   map[113] = 0xffffffff;
+   map[114] = 0xffffffff;
+
+   uint64_t unk_buffer = demo_zero(pool, 0x1000);
+   uint64_t unk_buffer_2 = demo_zero(pool, 0x8000);
+
+   // This is a pipeline bind
+   map[156] = 0xffff8002 | (clear_pipeline_textures ? 0x210 : 0);
+   map[158] = pipeline_clear | 0x4;
+   map[163] = 0x12;
+   map[164] = pipeline_store | 0x4;
+   map[166] = scissor_ptr & 0xFFFFFFFF;
+   map[167] = scissor_ptr >> 32;
+   map[168] = unk_buffer & 0xFFFFFFFF;
+   map[169] = unk_buffer >> 32;
+
+   map[220] = 4;
+   map[222] = 0xc000;
+   map[224] = width;
+   map[225] = height;
+   map[226] = unk_buffer_2 & 0xFFFFFFFF;
+   map[227] = unk_buffer_2 >> 32;
+
+   float depth_clear = 1.0;
+   uint8_t stencil_clear = 0;
+
+   map[278] = fui(depth_clear);
+   map[279] = (0x3 << 8) | stencil_clear;
+   map[282] = 0x1000000;
+   map[284] = 0xffffffff;
+   map[285] = 0xffffffff;
+   map[286] = 0xffffffff;
+
+   map[298] = 0xffff8212;
+   map[300] = pipeline_null | 0x4;
+   map[305] = 0x12;
+   map[306] = pipeline_store | 0x4;
+   map[352] = 1;
+   map[360] = 0x1c;
+   map[362] = encoder_id;
+   map[365] = 0xffffffff;
+   map[366] = 1;
+
+   uint64_t unk6 = demo_unk6(pool);
+   map[370] = unk6 & 0xFFFFFFFF;
+   map[371] = unk6 >> 32;
+
+   map[374] = width;
+   map[375] = height;
+   map[376] = 1;
+   map[377] = 8;
+   map[378] = 8;
+
+   map[393] = 8;
+   map[394] = 32;
+   map[395] = 32;
+   map[396] = 1;
+
+   unsigned offset_unk = (458 * 4);
+   unsigned offset_attachments = (470 * 4);
+   unsigned nr_attachments = 1;
+
+   map[473] = nr_attachments;
+
+   /* A single attachment follows, depth/stencil have their own attachments */
+   agx_pack((map + (offset_attachments / 4) + 4), IOGPU_ATTACHMENT, cfg) {
+      cfg.address = rt0;
+      cfg.type = AGX_IOGPU_ATTACHMENT_TYPE_COLOUR;
+      cfg.unk_1 = 0x80000000;
+      cfg.unk_2 = 0x5;
+      cfg.bytes_per_pixel = 4;
+      cfg.percent = 100;
+   }
+
+   unsigned total_size = offset_attachments + (AGX_IOGPU_ATTACHMENT_LENGTH * nr_attachments) + 16;
+
+   agx_pack(map, IOGPU_HEADER, cfg) {
+      cfg.total_size = total_size;
+      cfg.attachment_offset_1 = offset_attachments;
+      cfg.attachment_offset_2 = offset_attachments;
+      cfg.attachment_length = nr_attachments * AGX_IOGPU_ATTACHMENT_LENGTH;
+      cfg.unknown_offset = offset_unk;
+      cfg.encoder = encoder_ptr;
+   }
+
+   return total_size;
+}
+
+static struct agx_map_header
+demo_map_header(uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size, unsigned count)
+{
+   return (struct agx_map_header) {
+      .cmdbuf_id = cmdbuf_id,
+      .unk2 = 0x1,
+      .unk3 = 0x528, // 1320
+      .encoder_id = encoder_id,
+      .unk6 = 0x0,
+      .cmdbuf_size = cmdbuf_size,
+
+      /* +1 for the sentinel ending */
+      .nr_entries = count + 1,
+      .nr_handles = count + 1,
+      .indices = {0x0b},
+   };
+}
+
+void
+demo_mem_map(void *map, size_t size, unsigned *handles, unsigned count,
+             uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size)
+{
+   struct agx_map_header *header = map;
+   struct agx_map_entry *entries = (struct agx_map_entry *) (((uint8_t *) map) + 0x40);
+   struct agx_map_entry *end = (struct agx_map_entry *) (((uint8_t *) map) + size);
+
+   /* Header precedes the entry */
+   *header = demo_map_header(cmdbuf_id, encoder_id, cmdbuf_size, count);
+
+   /* Add an entry for each BO mapped */
+   for (unsigned i = 0; i < count; ++i) {
+	   assert((entries + i) < end);
+      entries[i] = (struct agx_map_entry) {
+         .unkAAA = 0x20,
+         .unkBBB = 0x1,
+         .unka = 0x1ffff,
+         .indices = {handles[i]}
+      };
+   }
+
+   /* Final entry is a sentinel */
+   assert((entries + count) < end);
+   entries[count] = (struct agx_map_entry) {
+      .unkAAA = 0x40,
+      .unkBBB = 0x1,
+      .unka = 0x1ffff,
+   };
+}
diff --git a/lib/mesa/src/gallium/drivers/asahi/magic.h b/lib/mesa/src/gallium/drivers/asahi/magic.h
new file mode 100644
index 000000000..98215d367
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/asahi/magic.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2021 Alyssa Rosenzweig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __ASAHI_MAGIC_H
+#define __ASAHI_MAGIC_H
+
+unsigned
+demo_cmdbuf(uint64_t *buf, size_t size,
+            struct agx_pool *pool,
+            uint64_t encoder_ptr,
+            uint64_t encoder_id,
+            uint64_t scissor_ptr,
+            unsigned width, unsigned height,
+            uint32_t pipeline_null,
+            uint32_t pipeline_clear,
+            uint32_t pipeline_store,
+            uint64_t rt0,
+            bool clear_pipeline_textures);
+
+void
+demo_mem_map(void *map, size_t size, unsigned *handles,
+             unsigned count, uint64_t cmdbuf_id, uint64_t
+             encoder_id, unsigned cmdbuf_size);
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/asahi/meson.build b/lib/mesa/src/gallium/drivers/asahi/meson.build
new file mode 100644
index 000000000..24da3bc10
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/asahi/meson.build
@@ -0,0 +1,41 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_asahi = files(
+  'agx_blit.c',
+  'agx_pipe.c',
+  'agx_state.c',
+  'agx_uniforms.c',
+  'magic.c',
+)
+
+libasahi = static_library(
+  'asahi',
+  files_asahi,
+  include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src],
+  c_args : [c_msvc_compat_args],
+  gnu_symbol_visibility : 'hidden',
+  dependencies : [idep_nir, idep_agx_pack],
+)
+
+driver_asahi = declare_dependency(
+  compile_args : '-DGALLIUM_ASAHI',
+  link_with : [libasahi, libasahi_compiler, libasahi_lib, libasahi_decode]
+)
author	Jonathan Gray <jsg@cvs.openbsd.org>	2022-02-24 01:57:18 +0000
committer	Jonathan Gray <jsg@cvs.openbsd.org>	2022-02-24 01:57:18 +0000
commit	b24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (patch)
tree	658ca4e6b41655f49463c85edbaeda48979c394c /lib/mesa/src/gallium/drivers/asahi
parent	57768bbb154c2879d34ec20e401b19472e77aaf7 (diff)