diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-02-24 01:57:18 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-02-24 01:57:18 +0000 |
commit | b24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (patch) | |
tree | 658ca4e6b41655f49463c85edbaeda48979c394c /lib/mesa/src/gallium/drivers/asahi | |
parent | 57768bbb154c2879d34ec20e401b19472e77aaf7 (diff) |
Import Mesa 21.3.7
Diffstat (limited to 'lib/mesa/src/gallium/drivers/asahi')
-rw-r--r-- | lib/mesa/src/gallium/drivers/asahi/agx_blit.c | 182 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/asahi/agx_pipe.c | 1142 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/asahi/agx_public.h | 38 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/asahi/agx_state.c | 1658 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/asahi/agx_state.h | 308 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/asahi/agx_uniforms.c | 109 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/asahi/magic.c | 211 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/asahi/magic.h | 45 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/asahi/meson.build | 41 |
9 files changed, 3734 insertions, 0 deletions
diff --git a/lib/mesa/src/gallium/drivers/asahi/agx_blit.c b/lib/mesa/src/gallium/drivers/asahi/agx_blit.c new file mode 100644 index 000000000..213761ade --- /dev/null +++ b/lib/mesa/src/gallium/drivers/asahi/agx_blit.c @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2021 Alyssa Rosenzweig + * Copyright (C) 2020-2021 Collabora, Ltd. + * Copyright (C) 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "agx_state.h" +#include "compiler/nir/nir_builder.h" +#include "asahi/compiler/agx_compile.h" +#include "gallium/auxiliary/util/u_blitter.h" + +static void +agx_build_reload_shader(struct agx_device *dev) +{ + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, + &agx_nir_options, "agx_reload"); + b.shader->info.internal = true; + + nir_variable *out = nir_variable_create(b.shader, nir_var_shader_out, + glsl_vector_type(GLSL_TYPE_FLOAT, 4), "output"); + out->data.location = FRAG_RESULT_DATA0; + + nir_ssa_def *fragcoord = nir_load_frag_coord(&b); + nir_ssa_def *coord = nir_channels(&b, fragcoord, 0x3); + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + tex->dest_type = nir_type_float32; + tex->sampler_dim = GLSL_SAMPLER_DIM_RECT; + tex->op = nir_texop_tex; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(coord); + tex->coord_components = 2; + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); + nir_builder_instr_insert(&b, &tex->instr); + nir_store_var(&b, out, &tex->dest.ssa, 0xFF); + + unsigned offset = 0; + unsigned bo_size = 4096; + + struct agx_bo *bo = agx_bo_create(dev, bo_size, AGX_MEMORY_TYPE_SHADER); + dev->reload.bo = bo; + + for (unsigned i = 0; i < AGX_NUM_FORMATS; ++i) { + struct util_dynarray binary; + util_dynarray_init(&binary, NULL); + + nir_shader *s = nir_shader_clone(NULL, b.shader); + struct agx_shader_info info; + + struct agx_shader_key key = { + .fs.tib_formats[0] = i + }; + + agx_compile_shader_nir(s, &key, &binary, &info); + + assert(offset + binary.size < bo_size); + memcpy(((uint8_t *) bo->ptr.cpu) + offset, binary.data, binary.size); + + dev->reload.format[i] = bo->ptr.gpu + offset; + offset += ALIGN_POT(binary.size, 128); + + util_dynarray_fini(&binary); + } +} + +static void +agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter, + bool render_cond) +{ + util_blitter_save_vertex_buffer_slot(blitter, ctx->vertex_buffers); + util_blitter_save_vertex_elements(blitter, ctx->attributes); + util_blitter_save_vertex_shader(blitter, ctx->stage[PIPE_SHADER_VERTEX].shader); + util_blitter_save_rasterizer(blitter, ctx->rast); + util_blitter_save_viewport(blitter, &ctx->viewport); + util_blitter_save_scissor(blitter, &ctx->scissor); + util_blitter_save_fragment_shader(blitter, ctx->stage[PIPE_SHADER_FRAGMENT].shader); + util_blitter_save_blend(blitter, ctx->blend); + util_blitter_save_depth_stencil_alpha(blitter, &ctx->zs); + util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref); + util_blitter_save_so_targets(blitter, 0, NULL); + util_blitter_save_sample_mask(blitter, ctx->sample_mask); + + util_blitter_save_framebuffer(blitter, &ctx->framebuffer); + util_blitter_save_fragment_sampler_states(blitter, + ctx->stage[PIPE_SHADER_FRAGMENT].sampler_count, + (void **)(ctx->stage[PIPE_SHADER_FRAGMENT].samplers)); + util_blitter_save_fragment_sampler_views(blitter, + ctx->stage[PIPE_SHADER_FRAGMENT].texture_count, + (struct pipe_sampler_view **)ctx->stage[PIPE_SHADER_FRAGMENT].textures); + util_blitter_save_fragment_constant_buffer_slot(blitter, + ctx->stage[PIPE_SHADER_FRAGMENT].cb); + + if (!render_cond) { + util_blitter_save_render_condition(blitter, + (struct pipe_query *) ctx->cond_query, + ctx->cond_cond, ctx->cond_mode); + } +} + +void +agx_blit(struct pipe_context *pipe, + const struct pipe_blit_info *info) +{ + //if (info->render_condition_enable && + // !agx_render_condition_check(pan_context(pipe))) + // return; + + struct agx_context *ctx = agx_context(pipe); + + if (!util_blitter_is_blit_supported(ctx->blitter, info)) + unreachable("Unsupported blit\n"); + + agx_blitter_save(ctx, ctx->blitter, info->render_condition_enable); + util_blitter_blit(ctx->blitter, info); +} + +/* We need some fixed shaders for common rendering tasks. When colour buffer + * reload is not in use, a shader is used to clear a particular colour. At the + * end of rendering a tile, a shader is used to write it out. These shaders are + * too trivial to go through the compiler at this stage. */ +#define AGX_STOP \ + 0x88, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, \ + 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00 \ + +#define AGX_BLEND \ + 0x09, 0x00, 0x00, 0x04, 0xf0, 0xfc, 0x80, 0x03 + +/* Clears the tilebuffer, where u6-u7 are preloaded with the FP16 clear colour + + 0: 7e018c098040 bitop_mov r0, u6 + 6: 7e058e098000 bitop_mov r1, u7 + c: 09000004f0fc8003 TODO.blend + */ + +static uint8_t shader_clear[] = { + 0x7e, 0x01, 0x8c, 0x09, 0x80, 0x40, + 0x7e, 0x05, 0x8e, 0x09, 0x80, 0x00, + AGX_BLEND, + AGX_STOP +}; + +static uint8_t shader_store[] = { + 0x7e, 0x00, 0x04, 0x09, 0x80, 0x00, + 0xb1, 0x80, 0x00, 0x80, 0x00, 0x4a, 0x00, 0x00, 0x0a, 0x00, + AGX_STOP +}; + +void +agx_internal_shaders(struct agx_device *dev) +{ + unsigned clear_offset = 0; + unsigned store_offset = 1024; + + struct agx_bo *bo = agx_bo_create(dev, 4096, AGX_MEMORY_TYPE_SHADER); + memcpy(((uint8_t *) bo->ptr.cpu) + clear_offset, shader_clear, sizeof(shader_clear)); + memcpy(((uint8_t *) bo->ptr.cpu) + store_offset, shader_store, sizeof(shader_store)); + + dev->internal.bo = bo; + dev->internal.clear = bo->ptr.gpu + clear_offset; + dev->internal.store = bo->ptr.gpu + store_offset; + + agx_build_reload_shader(dev); +} diff --git a/lib/mesa/src/gallium/drivers/asahi/agx_pipe.c b/lib/mesa/src/gallium/drivers/asahi/agx_pipe.c new file mode 100644 index 000000000..bb4306ed3 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/asahi/agx_pipe.c @@ -0,0 +1,1142 @@ +/* + * Copyright 2010 Red Hat Inc. + * Copyright © 2014-2017 Broadcom + * Copyright (C) 2019-2020 Collabora, Ltd. + * Copyright 2006 VMware, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include <stdio.h> +#include <errno.h> +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_screen.h" +#include "util/u_inlines.h" +#include "util/format/u_format.h" +#include "util/u_upload_mgr.h" +#include "util/half_float.h" +#include "frontend/winsys_handle.h" +#include "frontend/sw_winsys.h" +#include "gallium/auxiliary/util/u_transfer.h" +#include "gallium/auxiliary/util/u_surface.h" +#include "gallium/auxiliary/util/u_framebuffer.h" +#include "agx_public.h" +#include "agx_state.h" +#include "magic.h" +#include "asahi/compiler/agx_compile.h" +#include "asahi/lib/decode.h" +#include "asahi/lib/tiling.h" +#include "asahi/lib/agx_formats.h" + +static const struct debug_named_value agx_debug_options[] = { + {"trace", AGX_DBG_TRACE, "Trace the command stream"}, + {"deqp", AGX_DBG_DEQP, "Hacks for dEQP"}, + {"no16", AGX_DBG_NO16, "Disable 16-bit support"}, + DEBUG_NAMED_VALUE_END +}; + +void agx_init_state_functions(struct pipe_context *ctx); + +static struct pipe_query * +agx_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) +{ + struct agx_query *query = CALLOC_STRUCT(agx_query); + + return (struct pipe_query *)query; +} + +static void +agx_destroy_query(struct pipe_context *ctx, struct pipe_query *query) +{ + FREE(query); +} + +static bool +agx_begin_query(struct pipe_context *ctx, struct pipe_query *query) +{ + return true; +} + +static bool +agx_end_query(struct pipe_context *ctx, struct pipe_query *query) +{ + return true; +} + +static bool +agx_get_query_result(struct pipe_context *ctx, + struct pipe_query *query, + bool wait, + union pipe_query_result *vresult) +{ + uint64_t *result = (uint64_t*)vresult; + + *result = 0; + return true; +} + +static void +agx_set_active_query_state(struct pipe_context *pipe, bool enable) +{ +} + + +/* + * resource + */ + +static struct pipe_resource * +agx_resource_from_handle(struct pipe_screen *pscreen, + const struct pipe_resource *templat, + struct winsys_handle *whandle, + unsigned usage) +{ + unreachable("Imports todo"); +} + +static bool +agx_resource_get_handle(struct pipe_screen *pscreen, + struct pipe_context *ctx, + struct pipe_resource *pt, + struct winsys_handle *handle, + unsigned usage) +{ + unreachable("Handles todo"); +} + +static inline bool +agx_is_2d(const struct agx_resource *pres) +{ + switch (pres->base.target) { + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_CUBE: + return true; + default: + return false; + } +} + +static bool +agx_must_tile(const struct agx_resource *pres) +{ + switch (pres->base.target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_3D: + /* We don't know how to do linear for these */ + return true; + default: + break; + } + + return false; +} + +static bool +agx_should_tile(const struct agx_resource *pres) +{ + const unsigned valid_binding = + PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_BLENDABLE | + PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED; + + unsigned bpp = util_format_get_blocksizebits(pres->base.format); + + bool can_tile = agx_is_2d(pres) + && (bpp == 32) + && ((pres->base.bind & ~valid_binding) == 0); + + bool should_tile = (pres->base.usage != PIPE_USAGE_STREAM); + bool must_tile = agx_must_tile(pres); + + assert(!(must_tile && !can_tile)); + return must_tile || (can_tile && should_tile); +} + +static struct pipe_resource * +agx_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + struct agx_device *dev = agx_device(screen); + struct agx_resource *nresource; + + nresource = CALLOC_STRUCT(agx_resource); + if (!nresource) + return NULL; + + nresource->base = *templ; + nresource->base.screen = screen; + + nresource->modifier = agx_should_tile(nresource) ? + DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER : DRM_FORMAT_MOD_LINEAR; + + unsigned offset = 0; + + for (unsigned l = 0; l <= templ->last_level; ++l) { + unsigned width = u_minify(templ->width0, l); + unsigned height = u_minify(templ->height0, l); + + if (nresource->modifier == DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER) { + width = ALIGN_POT(width, 64); + height = ALIGN_POT(height, 64); + } + + nresource->slices[l].line_stride = + util_format_get_stride(templ->format, width); + + nresource->slices[l].offset = offset; + offset += ALIGN_POT(nresource->slices[l].line_stride * height, 0x80); + } + + /* Arrays and cubemaps have the entire miptree duplicated */ + nresource->array_stride = ALIGN_POT(offset, 64); + unsigned size = ALIGN_POT(nresource->array_stride * templ->array_size, 4096); + + pipe_reference_init(&nresource->base.reference, 1); + + struct sw_winsys *winsys = ((struct agx_screen *) screen)->winsys; + + if (templ->bind & (PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) { + unsigned width0 = templ->width0, height0 = templ->height0; + + if (nresource->modifier == DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER) { + width0 = ALIGN_POT(width0, 64); + height0 = ALIGN_POT(height0, 64); + } + + nresource->dt = winsys->displaytarget_create(winsys, + templ->bind, + templ->format, + width0, + height0, + 64, + NULL /*map_front_private*/, + &nresource->dt_stride); + + nresource->slices[0].line_stride = nresource->dt_stride; + assert((nresource->dt_stride & 0xF) == 0); + + offset = nresource->slices[0].line_stride * ALIGN_POT(templ->height0, 64); + + if (nresource->dt == NULL) { + FREE(nresource); + return NULL; + } + } + + nresource->bo = agx_bo_create(dev, size, AGX_MEMORY_TYPE_FRAMEBUFFER); + + if (!nresource->bo) { + FREE(nresource); + return NULL; + } + + return &nresource->base; +} + +static uint8_t * +agx_rsrc_offset(struct agx_resource *rsrc, unsigned level, unsigned z) +{ + struct agx_bo *bo = rsrc->bo; + uint8_t *map = ((uint8_t *) bo->ptr.cpu) + rsrc->slices[level].offset; + map += z * rsrc->array_stride; + + return map; +} + +static void +agx_resource_destroy(struct pipe_screen *screen, + struct pipe_resource *prsrc) +{ + struct agx_resource *rsrc = (struct agx_resource *)prsrc; + + if (rsrc->dt) { + /* display target */ + struct agx_screen *agx_screen = (struct agx_screen*)screen; + struct sw_winsys *winsys = agx_screen->winsys; + winsys->displaytarget_destroy(winsys, rsrc->dt); + } + + agx_bo_unreference(rsrc->bo); + FREE(rsrc); +} + + +/* + * transfer + */ + +static void +agx_transfer_flush_region(struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box) +{ +} + +static void * +agx_transfer_map(struct pipe_context *pctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, /* a combination of PIPE_MAP_x */ + const struct pipe_box *box, + struct pipe_transfer **out_transfer) +{ + struct agx_context *ctx = agx_context(pctx); + struct agx_resource *rsrc = agx_resource(resource); + unsigned bytes_per_pixel = util_format_get_blocksize(resource->format); + + /* Can't map tiled/compressed directly */ + if ((usage & PIPE_MAP_DIRECTLY) && rsrc->modifier != DRM_FORMAT_MOD_LINEAR) + return NULL; + + if (ctx->batch->cbufs[0] && resource == ctx->batch->cbufs[0]->texture) + pctx->flush(pctx, NULL, 0); + if (ctx->batch->zsbuf && resource == ctx->batch->zsbuf->texture) + pctx->flush(pctx, NULL, 0); + + struct agx_transfer *transfer = CALLOC_STRUCT(agx_transfer); + transfer->base.level = level; + transfer->base.usage = usage; + transfer->base.box = *box; + + pipe_resource_reference(&transfer->base.resource, resource); + *out_transfer = &transfer->base; + + if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER) { + transfer->base.stride = box->width * bytes_per_pixel; + transfer->base.layer_stride = transfer->base.stride * box->height; + transfer->map = calloc(transfer->base.layer_stride, box->depth); + assert(box->depth == 1); + + if ((usage & PIPE_MAP_READ) && BITSET_TEST(rsrc->data_valid, level)) { + for (unsigned z = 0; z < box->depth; ++z) { + uint8_t *map = agx_rsrc_offset(rsrc, level, box->z + z); + + agx_detile(map, transfer->map, + u_minify(resource->width0, level), bytes_per_pixel * 8, + transfer->base.stride / bytes_per_pixel, + box->x, box->y, box->x + box->width, box->y + box->height); + } + } + + return transfer->map; + } else { + assert (rsrc->modifier == DRM_FORMAT_MOD_LINEAR); + + transfer->base.stride = rsrc->slices[level].line_stride; + transfer->base.layer_stride = rsrc->array_stride; + + /* Be conservative for direct writes */ + + if ((usage & PIPE_MAP_WRITE) && (usage & PIPE_MAP_DIRECTLY)) + BITSET_SET(rsrc->data_valid, level); + + return agx_rsrc_offset(rsrc, level, box->z) + + transfer->base.box.y * rsrc->slices[level].line_stride + + transfer->base.box.x * bytes_per_pixel; + } +} + +static void +agx_transfer_unmap(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + /* Gallium expects writeback here, so we tile */ + + struct agx_transfer *trans = agx_transfer(transfer); + struct pipe_resource *prsrc = transfer->resource; + struct agx_resource *rsrc = (struct agx_resource *) prsrc; + unsigned bytes_per_pixel = util_format_get_blocksize(prsrc->format); + + if (transfer->usage & PIPE_MAP_WRITE) + BITSET_SET(rsrc->data_valid, transfer->level); + + /* Tiling will occur in software from a staging cpu buffer */ + if ((transfer->usage & PIPE_MAP_WRITE) && + rsrc->modifier == DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER) { + assert(trans->map != NULL); + + for (unsigned z = 0; z < transfer->box.depth; ++z) { + uint8_t *map = agx_rsrc_offset(rsrc, transfer->level, + transfer->box.z + z); + + agx_tile(map, trans->map, + u_minify(transfer->resource->width0, transfer->level), + bytes_per_pixel * 8, + transfer->stride / bytes_per_pixel, + transfer->box.x, transfer->box.y, + transfer->box.x + transfer->box.width, + transfer->box.y + transfer->box.height); + } + } + + /* Free the transfer */ + free(trans->map); + pipe_resource_reference(&transfer->resource, NULL); + FREE(transfer); +} + +/* + * clear/copy + */ +static void +agx_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor_state *scissor_state, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct agx_context *ctx = agx_context(pctx); + ctx->batch->clear |= buffers; + memcpy(ctx->batch->clear_color, color->f, sizeof(color->f)); +} + + +static void +agx_flush_resource(struct pipe_context *ctx, + struct pipe_resource *resource) +{ +} + +/* + * context + */ +static void +agx_flush(struct pipe_context *pctx, + struct pipe_fence_handle **fence, + unsigned flags) +{ + struct agx_context *ctx = agx_context(pctx); + + if (fence) + *fence = NULL; + + /* TODO */ + if (!ctx->batch->cbufs[0]) + return; + + /* Nothing to do */ + if (!(ctx->batch->draw | ctx->batch->clear)) + return; + + /* Finalize the encoder */ + uint8_t stop[5 + 64] = { 0x00, 0x00, 0x00, 0xc0, 0x00 }; + memcpy(ctx->batch->encoder_current, stop, sizeof(stop)); + + /* Emit the commandbuffer */ + uint64_t pipeline_clear = 0; + bool clear_pipeline_textures = false; + + struct agx_device *dev = agx_device(pctx->screen); + + if (ctx->batch->clear & PIPE_CLEAR_COLOR0) { + uint16_t clear_colour[4] = { + _mesa_float_to_half(ctx->batch->clear_color[0]), + _mesa_float_to_half(ctx->batch->clear_color[1]), + _mesa_float_to_half(ctx->batch->clear_color[2]), + _mesa_float_to_half(ctx->batch->clear_color[3]) + }; + + + pipeline_clear = agx_build_clear_pipeline(ctx, + dev->internal.clear, + agx_pool_upload(&ctx->batch->pool, clear_colour, sizeof(clear_colour))); + } else { + enum pipe_format fmt = ctx->batch->cbufs[0]->format; + enum agx_format internal = agx_pixel_format[fmt].internal; + uint32_t shader = dev->reload.format[internal]; + + pipeline_clear = agx_build_reload_pipeline(ctx, shader, + ctx->batch->cbufs[0]); + + clear_pipeline_textures = true; + } + + uint64_t pipeline_store = + agx_build_store_pipeline(ctx, + dev->internal.store, + agx_pool_upload(&ctx->batch->pool, ctx->render_target[0], sizeof(ctx->render_target))); + + /* Pipelines must 64 aligned */ + struct agx_ptr pipeline_null = + agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, 64, 64); + memset(pipeline_null.cpu, 0, 64); + + struct agx_resource *rt0 = agx_resource(ctx->batch->cbufs[0]->texture); + BITSET_SET(rt0->data_valid, 0); + + struct agx_resource *zbuf = ctx->batch->zsbuf ? + agx_resource(ctx->batch->zsbuf->texture) : NULL; + + if (zbuf) + BITSET_SET(zbuf->data_valid, 0); + + /* BO list for a given batch consists of: + * - BOs for the batch's framebuffer surfaces + * - BOs for the batch's pools + * - BOs for the encoder + * - BO for internal shaders + * - BOs added to the batch explicitly + */ + struct agx_batch *batch = ctx->batch; + + agx_batch_add_bo(batch, batch->encoder); + agx_batch_add_bo(batch, batch->scissor.bo); + agx_batch_add_bo(batch, dev->internal.bo); + agx_batch_add_bo(batch, dev->reload.bo); + + for (unsigned i = 0; i < batch->nr_cbufs; ++i) { + struct pipe_surface *surf = batch->cbufs[i]; + assert(surf != NULL && surf->texture != NULL); + struct agx_resource *rsrc = agx_resource(surf->texture); + agx_batch_add_bo(batch, rsrc->bo); + } + + if (batch->zsbuf) { + struct pipe_surface *surf = batch->zsbuf; + struct agx_resource *rsrc = agx_resource(surf->texture); + agx_batch_add_bo(batch, rsrc->bo); + } + + unsigned handle_count = + BITSET_COUNT(batch->bo_list) + + agx_pool_num_bos(&batch->pool) + + agx_pool_num_bos(&batch->pipeline_pool); + + uint32_t *handles = calloc(sizeof(uint32_t), handle_count); + unsigned handle = 0, handle_i = 0; + + BITSET_FOREACH_SET(handle, batch->bo_list, sizeof(batch->bo_list) * 8) { + handles[handle_i++] = handle; + } + + agx_pool_get_bo_handles(&batch->pool, handles + handle_i); + handle_i += agx_pool_num_bos(&batch->pool); + + agx_pool_get_bo_handles(&batch->pipeline_pool, handles + handle_i); + handle_i += agx_pool_num_bos(&batch->pipeline_pool); + + /* Size calculation should've been exact */ + assert(handle_i == handle_count); + + unsigned cmdbuf_id = agx_get_global_id(dev); + unsigned encoder_id = agx_get_global_id(dev); + + unsigned cmdbuf_size = demo_cmdbuf(dev->cmdbuf.ptr.cpu, + dev->cmdbuf.size, + &ctx->batch->pool, + ctx->batch->encoder->ptr.gpu, + encoder_id, + ctx->batch->scissor.bo->ptr.gpu, + ctx->batch->width, + ctx->batch->height, + pipeline_null.gpu, + pipeline_clear, + pipeline_store, + rt0->bo->ptr.gpu, + clear_pipeline_textures); + + /* Generate the mapping table from the BO list */ + demo_mem_map(dev->memmap.ptr.cpu, dev->memmap.size, handles, handle_count, + cmdbuf_id, encoder_id, cmdbuf_size); + + free(handles); + + agx_submit_cmdbuf(dev, dev->cmdbuf.handle, dev->memmap.handle, dev->queue.id); + + agx_wait_queue(dev->queue); + + if (dev->debug & AGX_DBG_TRACE) { + agxdecode_cmdstream(dev->cmdbuf.handle, dev->memmap.handle, true); + agxdecode_next_frame(); + } + + memset(batch->bo_list, 0, sizeof(batch->bo_list)); + agx_pool_cleanup(&ctx->batch->pool); + agx_pool_cleanup(&ctx->batch->pipeline_pool); + agx_pool_init(&ctx->batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true); + agx_pool_init(&ctx->batch->pipeline_pool, dev, AGX_MEMORY_TYPE_CMDBUF_32, true); + ctx->batch->clear = 0; + ctx->batch->draw = 0; + ctx->batch->encoder_current = ctx->batch->encoder->ptr.cpu; + ctx->batch->scissor.count = 0; + ctx->dirty = ~0; +} + +static void +agx_destroy_context(struct pipe_context *pctx) +{ + struct agx_context *ctx = agx_context(pctx); + + if (pctx->stream_uploader) + u_upload_destroy(pctx->stream_uploader); + + if (ctx->blitter) + util_blitter_destroy(ctx->blitter); + + util_unreference_framebuffer_state(&ctx->framebuffer); + + FREE(ctx); +} + +static void +agx_invalidate_resource(struct pipe_context *ctx, + struct pipe_resource *resource) +{ +} + +static struct pipe_context * +agx_create_context(struct pipe_screen *screen, + void *priv, unsigned flags) +{ + struct agx_context *ctx = CALLOC_STRUCT(agx_context); + struct pipe_context *pctx = &ctx->base; + + if (!ctx) + return NULL; + + pctx->screen = screen; + pctx->priv = priv; + + ctx->batch = CALLOC_STRUCT(agx_batch); + agx_pool_init(&ctx->batch->pool, + agx_device(screen), AGX_MEMORY_TYPE_FRAMEBUFFER, true); + agx_pool_init(&ctx->batch->pipeline_pool, + agx_device(screen), AGX_MEMORY_TYPE_SHADER, true); + ctx->batch->encoder = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER); + ctx->batch->encoder_current = ctx->batch->encoder->ptr.cpu; + ctx->batch->scissor.bo = agx_bo_create(agx_device(screen), 0x80000, AGX_MEMORY_TYPE_FRAMEBUFFER); + + /* Upload fixed shaders (TODO: compile them?) */ + + pctx->stream_uploader = u_upload_create_default(pctx); + if (!pctx->stream_uploader) { + FREE(pctx); + return NULL; + } + pctx->const_uploader = pctx->stream_uploader; + + pctx->destroy = agx_destroy_context; + pctx->flush = agx_flush; + pctx->clear = agx_clear; + pctx->resource_copy_region = util_resource_copy_region; + pctx->blit = agx_blit; + pctx->flush_resource = agx_flush_resource; + pctx->create_query = agx_create_query; + pctx->destroy_query = agx_destroy_query; + pctx->begin_query = agx_begin_query; + pctx->end_query = agx_end_query; + pctx->get_query_result = agx_get_query_result; + pctx->set_active_query_state = agx_set_active_query_state; + pctx->buffer_map = agx_transfer_map; + pctx->texture_map = agx_transfer_map; + pctx->transfer_flush_region = agx_transfer_flush_region; + pctx->buffer_unmap = agx_transfer_unmap; + pctx->texture_unmap = agx_transfer_unmap; + pctx->buffer_subdata = u_default_buffer_subdata; + pctx->texture_subdata = u_default_texture_subdata; + pctx->invalidate_resource = agx_invalidate_resource; + agx_init_state_functions(pctx); + + + ctx->blitter = util_blitter_create(pctx); + + return pctx; +} + +static void +agx_flush_frontbuffer(struct pipe_screen *_screen, + struct pipe_context *pctx, + struct pipe_resource *prsrc, + unsigned level, unsigned layer, + void *context_private, struct pipe_box *box) +{ + struct agx_resource *rsrc = (struct agx_resource *) prsrc; + struct agx_screen *agx_screen = (struct agx_screen*)_screen; + struct sw_winsys *winsys = agx_screen->winsys; + + /* Dump the framebuffer */ + assert (rsrc->dt); + void *map = winsys->displaytarget_map(winsys, rsrc->dt, PIPE_USAGE_DEFAULT); + assert(map != NULL); + + if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER) { + agx_detile(rsrc->bo->ptr.cpu, map, + rsrc->base.width0, 32, rsrc->dt_stride / 4, + 0, 0, rsrc->base.width0, rsrc->base.height0); + } else { + memcpy(map, rsrc->bo->ptr.cpu, rsrc->dt_stride * rsrc->base.height0); + } + + winsys->displaytarget_display(winsys, rsrc->dt, context_private, box); +} + +static const char * +agx_get_vendor(struct pipe_screen* pscreen) +{ + return "Asahi"; +} + +static const char * +agx_get_device_vendor(struct pipe_screen* pscreen) +{ + return "Apple"; +} + +static const char * +agx_get_name(struct pipe_screen* pscreen) +{ + return "Apple M1 (G13G B0)"; +} + +static int +agx_get_param(struct pipe_screen* pscreen, enum pipe_cap param) +{ + bool is_deqp = agx_device(pscreen)->debug & AGX_DBG_DEQP; + + switch (param) { + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: + case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD: + case PIPE_CAP_VERTEX_SHADER_SATURATE: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_CLIP_HALFZ: + return 1; + + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return 0; + + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX: + return true; + + case PIPE_CAP_SAMPLER_VIEW_TARGET: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_ACCELERATED: + case PIPE_CAP_UMA: + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_TGSI_ARRAY_COMPONENTS: + case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED: + case PIPE_CAP_PACKED_UNIFORMS: + return 1; + + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + case PIPE_CAP_SURFACE_SAMPLE_COUNT: + return is_deqp; + + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + return 0; + + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return is_deqp ? PIPE_MAX_SO_BUFFERS : 0; + + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return is_deqp ? PIPE_MAX_SO_OUTPUTS : 0; + + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: + return is_deqp ? 1 : 0; + + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return is_deqp ? 256 : 0; + + case PIPE_CAP_GLSL_FEATURE_LEVEL: + case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: + return 130; + case PIPE_CAP_ESSL_FEATURE_LEVEL: + return 120; + + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 16; + + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return 65536; + + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return 64; + + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + return 1; + + case PIPE_CAP_MAX_TEXTURE_2D_SIZE: + return 16384; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + return 0; + + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_TGSI_TEXCOORD: + case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + return true; + case PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL: + return false; + + case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET: + return 0xffff; + + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 0; + + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + + case PIPE_CAP_VIDEO_MEMORY: { + uint64_t system_memory; + + if (!os_get_total_physical_memory(&system_memory)) + return 0; + + return (int)(system_memory >> 20); + } + + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + return 4; + + case PIPE_CAP_MAX_VARYINGS: + return 16; + + case PIPE_CAP_FLATSHADE: + case PIPE_CAP_TWO_SIDED_COLOR: + case PIPE_CAP_ALPHA_TEST: + case PIPE_CAP_CLIP_PLANES: + case PIPE_CAP_NIR_IMAGES_AS_DEREF: + return 0; + + case PIPE_CAP_SHAREABLE_SHADERS: + return 1; + + default: + return u_pipe_screen_get_param_defaults(pscreen, param); + } +} + +static float +agx_get_paramf(struct pipe_screen* pscreen, + enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 16.0; /* Off-by-one fixed point 4:4 encoding */ + + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 511.95f; + + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0; + + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + + case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE: + case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE: + case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY: + return 0.0f; + + default: + debug_printf("Unexpected PIPE_CAPF %d query\n", param); + return 0.0; + } +} + +static int +agx_get_shader_param(struct pipe_screen* pscreen, + enum pipe_shader_type shader, + enum pipe_shader_cap param) +{ + bool is_deqp = agx_device(pscreen)->debug & AGX_DBG_DEQP; + bool is_no16 = agx_device(pscreen)->debug & AGX_DBG_NO16; + + if (shader != PIPE_SHADER_VERTEX && + shader != PIPE_SHADER_FRAGMENT) + return 0; + + /* this is probably not totally correct.. but it's a start: */ + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 1024; + + case PIPE_SHADER_CAP_MAX_INPUTS: + return 16; + + case PIPE_SHADER_CAP_MAX_OUTPUTS: + return shader == PIPE_SHADER_FRAGMENT ? 4 : 16; + + case PIPE_SHADER_CAP_MAX_TEMPS: + return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */ + + case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: + return 16 * 1024 * sizeof(float); + + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 16; + + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 0; + + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_SUBROUTINES: + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + return 0; + + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return is_deqp; + + case PIPE_SHADER_CAP_INTEGERS: + return true; + + case PIPE_SHADER_CAP_FP16: + case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS: + case PIPE_SHADER_CAP_FP16_DERIVATIVES: + case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: + case PIPE_SHADER_CAP_INT16: + return !is_no16; + + case PIPE_SHADER_CAP_INT64_ATOMICS: + case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: + case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: + return 0; + + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: + return 16; /* XXX: How many? */ + + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_NIR; + + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_NIR_SERIALIZED); + + case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: + return 32; + + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: + case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: + case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: + case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: + return 0; + + default: + /* Other params are unknown */ + return 0; + } + + return 0; +} + +static int +agx_get_compute_param(struct pipe_screen *pscreen, + enum pipe_shader_ir ir_type, + enum pipe_compute_cap param, + void *ret) +{ + return 0; +} + +static bool +agx_is_format_supported(struct pipe_screen* pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned storage_sample_count, + unsigned usage) +{ + const struct util_format_description *format_desc; + + assert(target == PIPE_BUFFER || + target == PIPE_TEXTURE_1D || + target == PIPE_TEXTURE_1D_ARRAY || + target == PIPE_TEXTURE_2D || + target == PIPE_TEXTURE_2D_ARRAY || + target == PIPE_TEXTURE_RECT || + target == PIPE_TEXTURE_3D || + target == PIPE_TEXTURE_CUBE || + target == PIPE_TEXTURE_CUBE_ARRAY); + + format_desc = util_format_description(format); + + if (!format_desc) + return false; + + if (sample_count > 1) + return false; + + if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1)) + return false; + + if (usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) { + struct agx_pixel_format_entry ent = agx_pixel_format[format]; + + if (!agx_is_valid_pixel_format(format)) + return false; + + if ((usage & PIPE_BIND_RENDER_TARGET) && !ent.renderable) + return false; + } + + /* TODO: formats */ + if (usage & PIPE_BIND_VERTEX_BUFFER) { + switch (format) { + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R16G16B16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return true; + default: + return false; + } + } + + /* TODO */ + return true; +} + +static uint64_t +agx_get_timestamp(struct pipe_screen *pscreen) +{ + return 0; +} + +static void +agx_destroy_screen(struct pipe_screen *screen) +{ + agx_close_device(agx_device(screen)); + ralloc_free(screen); +} + +static void +agx_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + +static bool +agx_fence_finish(struct pipe_screen *screen, + struct pipe_context *ctx, + struct pipe_fence_handle *fence, + uint64_t timeout) +{ + return true; +} + +static const void * +agx_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, + enum pipe_shader_type shader) +{ + return &agx_nir_options; +} + +struct pipe_screen * +agx_screen_create(struct sw_winsys *winsys) +{ + struct agx_screen *agx_screen; + struct pipe_screen *screen; + + agx_screen = rzalloc(NULL, struct agx_screen); + if (!agx_screen) + return NULL; + + screen = &agx_screen->pscreen; + agx_screen->winsys = winsys; + + /* Set debug before opening */ + agx_screen->dev.debug = + debug_get_flags_option("ASAHI_MESA_DEBUG", agx_debug_options, 0); + + /* Try to open an AGX device */ + if (!agx_open_device(screen, &agx_screen->dev)) { + ralloc_free(agx_screen); + return NULL; + } + + screen->destroy = agx_destroy_screen; + screen->get_name = agx_get_name; + screen->get_vendor = agx_get_vendor; + screen->get_device_vendor = agx_get_device_vendor; + screen->get_param = agx_get_param; + screen->get_shader_param = agx_get_shader_param; + screen->get_compute_param = agx_get_compute_param; + screen->get_paramf = agx_get_paramf; + screen->is_format_supported = agx_is_format_supported; + screen->context_create = agx_create_context; + screen->resource_create = agx_resource_create; + screen->resource_from_handle = agx_resource_from_handle; + screen->resource_get_handle = agx_resource_get_handle; + screen->resource_destroy = agx_resource_destroy; + screen->flush_frontbuffer = agx_flush_frontbuffer; + screen->get_timestamp = agx_get_timestamp; + screen->fence_reference = agx_fence_reference; + screen->fence_finish = agx_fence_finish; + screen->get_compiler_options = agx_get_compiler_options; + + agx_internal_shaders(&agx_screen->dev); + + return screen; +} diff --git a/lib/mesa/src/gallium/drivers/asahi/agx_public.h b/lib/mesa/src/gallium/drivers/asahi/agx_public.h new file mode 100644 index 000000000..4bf706286 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/asahi/agx_public.h @@ -0,0 +1,38 @@ +/* + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef AGX_PUBLIC_H +#define AGX_PUBLIC_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_screen; +struct sw_winsys; +struct pipe_screen *agx_screen_create(struct sw_winsys *winsys); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/mesa/src/gallium/drivers/asahi/agx_state.c b/lib/mesa/src/gallium/drivers/asahi/agx_state.c new file mode 100644 index 000000000..6a9027a0d --- /dev/null +++ b/lib/mesa/src/gallium/drivers/asahi/agx_state.c @@ -0,0 +1,1658 @@ +/* + * Copyright 2021 Alyssa Rosenzweig + * Copyright (C) 2019-2020 Collabora, Ltd. + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include <stdio.h> +#include <errno.h> +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" +#include "gallium/auxiliary/util/u_draw.h" +#include "gallium/auxiliary/util/u_helpers.h" +#include "gallium/auxiliary/util/u_viewport.h" +#include "gallium/auxiliary/util/u_blend.h" +#include "gallium/auxiliary/util/u_framebuffer.h" +#include "gallium/auxiliary/tgsi/tgsi_from_mesa.h" +#include "gallium/auxiliary/nir/tgsi_to_nir.h" +#include "compiler/nir/nir.h" +#include "asahi/compiler/agx_compile.h" +#include "agx_state.h" +#include "asahi/lib/agx_pack.h" +#include "asahi/lib/agx_formats.h" + +static struct pipe_stream_output_target * +agx_create_stream_output_target(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct pipe_stream_output_target *target; + + target = &rzalloc(pctx, struct agx_streamout_target)->base; + + if (!target) + return NULL; + + pipe_reference_init(&target->reference, 1); + pipe_resource_reference(&target->buffer, prsc); + + target->context = pctx; + target->buffer_offset = buffer_offset; + target->buffer_size = buffer_size; + + return target; +} + +static void +agx_stream_output_target_destroy(struct pipe_context *pctx, + struct pipe_stream_output_target *target) +{ + pipe_resource_reference(&target->buffer, NULL); + ralloc_free(target); +} + +static void +agx_set_stream_output_targets(struct pipe_context *pctx, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct agx_context *ctx = agx_context(pctx); + struct agx_streamout *so = &ctx->streamout; + + assert(num_targets <= ARRAY_SIZE(so->targets)); + + for (unsigned i = 0; i < num_targets; i++) { + if (offsets[i] != -1) + agx_so_target(targets[i])->offset = offsets[i]; + + pipe_so_target_reference(&so->targets[i], targets[i]); + } + + for (unsigned i = 0; i < so->num_targets; i++) + pipe_so_target_reference(&so->targets[i], NULL); + + so->num_targets = num_targets; +} + +static void +agx_set_blend_color(struct pipe_context *pctx, + const struct pipe_blend_color *state) +{ + struct agx_context *ctx = agx_context(pctx); + + if (state) + memcpy(&ctx->blend_color, state, sizeof(*state)); +} + +static void * +agx_create_blend_state(struct pipe_context *ctx, + const struct pipe_blend_state *state) +{ + struct agx_blend *so = CALLOC_STRUCT(agx_blend); + + assert(!state->alpha_to_coverage); + assert(!state->alpha_to_coverage_dither); + assert(!state->alpha_to_one); + assert(!state->advanced_blend_func); + + if (state->logicop_enable) { + so->logicop_enable = true; + so->logicop_func = state->logicop_func; + return so; + } + + for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) { + unsigned rti = state->independent_blend_enable ? i : 0; + struct pipe_rt_blend_state rt = state->rt[rti]; + + if (!rt.blend_enable) { + static const nir_lower_blend_channel replace = { + .func = BLEND_FUNC_ADD, + .src_factor = BLEND_FACTOR_ZERO, + .invert_src_factor = true, + .dst_factor = BLEND_FACTOR_ZERO, + .invert_dst_factor = false, + }; + + so->rt[i].rgb = replace; + so->rt[i].alpha = replace; + } else { + so->rt[i].rgb.func = util_blend_func_to_shader(rt.rgb_func); + so->rt[i].rgb.src_factor = util_blend_factor_to_shader(rt.rgb_src_factor); + so->rt[i].rgb.invert_src_factor = util_blend_factor_is_inverted(rt.rgb_src_factor); + so->rt[i].rgb.dst_factor = util_blend_factor_to_shader(rt.rgb_dst_factor); + so->rt[i].rgb.invert_dst_factor = util_blend_factor_is_inverted(rt.rgb_dst_factor); + + so->rt[i].alpha.func = util_blend_func_to_shader(rt.alpha_func); + so->rt[i].alpha.src_factor = util_blend_factor_to_shader(rt.alpha_src_factor); + so->rt[i].alpha.invert_src_factor = util_blend_factor_is_inverted(rt.alpha_src_factor); + so->rt[i].alpha.dst_factor = util_blend_factor_to_shader(rt.alpha_dst_factor); + so->rt[i].alpha.invert_dst_factor = util_blend_factor_is_inverted(rt.alpha_dst_factor); + + so->blend_enable = true; + } + + so->rt[i].colormask = rt.colormask; + } + + return so; +} + +static void +agx_bind_blend_state(struct pipe_context *pctx, void *cso) +{ + struct agx_context *ctx = agx_context(pctx); + ctx->blend = cso; +} + +static const enum agx_stencil_op agx_stencil_ops[PIPE_STENCIL_OP_INVERT + 1] = { + [PIPE_STENCIL_OP_KEEP] = AGX_STENCIL_OP_KEEP, + [PIPE_STENCIL_OP_ZERO] = AGX_STENCIL_OP_ZERO, + [PIPE_STENCIL_OP_REPLACE] = AGX_STENCIL_OP_REPLACE, + [PIPE_STENCIL_OP_INCR] = AGX_STENCIL_OP_INCR_SAT, + [PIPE_STENCIL_OP_DECR] = AGX_STENCIL_OP_DECR_SAT, + [PIPE_STENCIL_OP_INCR_WRAP] = AGX_STENCIL_OP_INCR_WRAP, + [PIPE_STENCIL_OP_DECR_WRAP] = AGX_STENCIL_OP_DECR_WRAP, + [PIPE_STENCIL_OP_INVERT] = AGX_STENCIL_OP_INVERT, +}; + +static void +agx_pack_rasterizer_face(struct agx_rasterizer_face_packed *out, + struct pipe_stencil_state st, + enum agx_zs_func z_func, + bool disable_z_write) +{ + agx_pack(out, RASTERIZER_FACE, cfg) { + cfg.depth_function = z_func; + cfg.disable_depth_write = disable_z_write; + + if (st.enabled) { + cfg.stencil_write_mask = st.writemask; + cfg.stencil_read_mask = st.valuemask; + + cfg.depth_pass = agx_stencil_ops[st.zpass_op]; + cfg.depth_fail = agx_stencil_ops[st.zfail_op]; + cfg.stencil_fail = agx_stencil_ops[st.fail_op]; + + cfg.stencil_compare = (enum agx_zs_func) st.func; + } else { + cfg.stencil_write_mask = 0xFF; + cfg.stencil_read_mask = 0xFF; + + cfg.depth_pass = AGX_STENCIL_OP_KEEP; + cfg.depth_fail = AGX_STENCIL_OP_KEEP; + cfg.stencil_fail = AGX_STENCIL_OP_KEEP; + + cfg.stencil_compare = AGX_ZS_FUNC_ALWAYS; + } + } +} + +static void * +agx_create_zsa_state(struct pipe_context *ctx, + const struct pipe_depth_stencil_alpha_state *state) +{ + struct agx_zsa *so = CALLOC_STRUCT(agx_zsa); + assert(!state->depth_bounds_test && "todo"); + + so->base = *state; + + /* Z func can be used as-is */ + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_NEVER == AGX_ZS_FUNC_NEVER); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_LESS == AGX_ZS_FUNC_LESS); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_EQUAL == AGX_ZS_FUNC_EQUAL); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_LEQUAL == AGX_ZS_FUNC_LEQUAL); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_GREATER == AGX_ZS_FUNC_GREATER); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_NOTEQUAL == AGX_ZS_FUNC_NOT_EQUAL); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_GEQUAL == AGX_ZS_FUNC_GEQUAL); + STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_ALWAYS == AGX_ZS_FUNC_ALWAYS); + + enum agx_zs_func z_func = state->depth_enabled ? + ((enum agx_zs_func) state->depth_func) : AGX_ZS_FUNC_ALWAYS; + + agx_pack_rasterizer_face(&so->front, + state->stencil[0], z_func, !state->depth_writemask); + + if (state->stencil[1].enabled) { + agx_pack_rasterizer_face(&so->back, + state->stencil[1], z_func, !state->depth_writemask); + } else { + /* One sided stencil */ + so->back = so->front; + } + + return so; +} + +static void +agx_bind_zsa_state(struct pipe_context *pctx, void *cso) +{ + struct agx_context *ctx = agx_context(pctx); + + if (cso) + memcpy(&ctx->zs, cso, sizeof(ctx->zs)); +} + +static void * +agx_create_rs_state(struct pipe_context *ctx, + const struct pipe_rasterizer_state *cso) +{ + struct agx_rasterizer *so = CALLOC_STRUCT(agx_rasterizer); + so->base = *cso; + + /* Line width is packed in a 4:4 fixed point format */ + unsigned line_width_fixed = ((unsigned) (cso->line_width * 16.0f)) - 1; + + /* Clamp to maximum line width */ + so->line_width = MIN2(line_width_fixed, 0xFF); + + agx_pack(so->cull, CULL, cfg) { + cfg.cull_front = cso->cull_face & PIPE_FACE_FRONT; + cfg.cull_back = cso->cull_face & PIPE_FACE_BACK; + cfg.front_face_ccw = cso->front_ccw; + cfg.depth_clip = cso->depth_clip_near; + cfg.depth_clamp = !cso->depth_clip_near; + }; + + return so; +} + +static void +agx_bind_rasterizer_state(struct pipe_context *pctx, void *cso) +{ + struct agx_context *ctx = agx_context(pctx); + struct agx_rasterizer *so = cso; + + /* Check if scissor state has changed, since scissor enable is part of the + * rasterizer state but everything else needed for scissors is part of + * viewport/scissor states */ + bool scissor_changed = (cso == NULL) || (ctx->rast == NULL) || + (ctx->rast->base.scissor != so->base.scissor); + + ctx->rast = so; + + if (scissor_changed) + ctx->dirty |= AGX_DIRTY_SCISSOR; +} + +static enum agx_wrap +agx_wrap_from_pipe(enum pipe_tex_wrap in) +{ + switch (in) { + case PIPE_TEX_WRAP_REPEAT: return AGX_WRAP_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return AGX_WRAP_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_MIRROR_REPEAT: return AGX_WRAP_MIRRORED_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return AGX_WRAP_CLAMP_TO_BORDER; + default: unreachable("todo: more wrap modes"); + } +} + +static enum agx_mip_filter +agx_mip_filter_from_pipe(enum pipe_tex_mipfilter in) +{ + switch (in) { + case PIPE_TEX_MIPFILTER_NEAREST: return AGX_MIP_FILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: return AGX_MIP_FILTER_LINEAR; + case PIPE_TEX_MIPFILTER_NONE: return AGX_MIP_FILTER_NONE; + } + + unreachable("Invalid mip filter"); +} + +static const enum agx_compare_func agx_compare_funcs[PIPE_FUNC_ALWAYS + 1] = { + [PIPE_FUNC_NEVER] = AGX_COMPARE_FUNC_NEVER, + [PIPE_FUNC_LESS] = AGX_COMPARE_FUNC_LESS, + [PIPE_FUNC_EQUAL] = AGX_COMPARE_FUNC_EQUAL, + [PIPE_FUNC_LEQUAL] = AGX_COMPARE_FUNC_LEQUAL, + [PIPE_FUNC_GREATER] = AGX_COMPARE_FUNC_GREATER, + [PIPE_FUNC_NOTEQUAL] = AGX_COMPARE_FUNC_NOT_EQUAL, + [PIPE_FUNC_GEQUAL] = AGX_COMPARE_FUNC_GEQUAL, + [PIPE_FUNC_ALWAYS] = AGX_COMPARE_FUNC_ALWAYS, +}; + +static void * +agx_create_sampler_state(struct pipe_context *pctx, + const struct pipe_sampler_state *state) +{ + struct agx_device *dev = agx_device(pctx->screen); + struct agx_bo *bo = agx_bo_create(dev, AGX_SAMPLER_LENGTH, + AGX_MEMORY_TYPE_FRAMEBUFFER); + + assert(state->min_lod == 0 && "todo: lod clamps"); + assert(state->lod_bias == 0 && "todo: lod bias"); + + agx_pack(bo->ptr.cpu, SAMPLER, cfg) { + cfg.magnify_linear = (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR); + cfg.minify_linear = (state->min_img_filter == PIPE_TEX_FILTER_LINEAR); + cfg.mip_filter = agx_mip_filter_from_pipe(state->min_mip_filter); + cfg.wrap_s = agx_wrap_from_pipe(state->wrap_s); + cfg.wrap_t = agx_wrap_from_pipe(state->wrap_t); + cfg.wrap_r = agx_wrap_from_pipe(state->wrap_r); + cfg.pixel_coordinates = !state->normalized_coords; + cfg.compare_func = agx_compare_funcs[state->compare_func]; + } + + struct agx_sampler_state *so = CALLOC_STRUCT(agx_sampler_state); + so->base = *state; + so->desc = bo; + + return so; +} + +static void +agx_delete_sampler_state(struct pipe_context *ctx, void *state) +{ + struct agx_bo *bo = state; + agx_bo_unreference(bo); +} + +static void +agx_bind_sampler_states(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned count, + void **states) +{ + struct agx_context *ctx = agx_context(pctx); + + ctx->stage[shader].sampler_count = states ? count : 0; + + memcpy(&ctx->stage[shader].samplers[start], states, + sizeof(struct agx_sampler_state *) * count); +} + +/* Channels agree for RGBA but are weird for force 0/1 */ + +static enum agx_channel +agx_channel_from_pipe(enum pipe_swizzle in) +{ + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_X == AGX_CHANNEL_R); + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_Y == AGX_CHANNEL_G); + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_Z == AGX_CHANNEL_B); + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_W == AGX_CHANNEL_A); + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_0 & 0x4); + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_1 & 0x4); + STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_NONE & 0x4); + + if ((in & 0x4) == 0) + return (enum agx_channel) in; + else if (in == PIPE_SWIZZLE_1) + return AGX_CHANNEL_1; + else + return AGX_CHANNEL_0; +} + +static enum agx_layout +agx_translate_layout(uint64_t modifier) +{ + switch (modifier) { + case DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER: + return AGX_LAYOUT_TILED_64X64; + case DRM_FORMAT_MOD_LINEAR: + return AGX_LAYOUT_LINEAR; + default: + unreachable("Invalid modifier"); + } +} + +static enum agx_texture_dimension +agx_translate_texture_dimension(enum pipe_texture_target dim) +{ + switch (dim) { + case PIPE_TEXTURE_2D: return AGX_TEXTURE_DIMENSION_2D; + case PIPE_TEXTURE_CUBE: return AGX_TEXTURE_DIMENSION_CUBE; + default: unreachable("Unsupported texture dimension"); + } +} + +static struct pipe_sampler_view * +agx_create_sampler_view(struct pipe_context *pctx, + struct pipe_resource *texture, + const struct pipe_sampler_view *state) +{ + struct agx_device *dev = agx_device(pctx->screen); + struct agx_resource *rsrc = agx_resource(texture); + struct agx_sampler_view *so = CALLOC_STRUCT(agx_sampler_view); + + if (!so) + return NULL; + + /* We prepare the descriptor at CSO create time */ + so->desc = agx_bo_create(dev, AGX_TEXTURE_LENGTH, + AGX_MEMORY_TYPE_FRAMEBUFFER); + + const struct util_format_description *desc = + util_format_description(state->format); + + /* We only have a single swizzle for the user swizzle and the format fixup, + * so compose them now. */ + uint8_t out_swizzle[4]; + uint8_t view_swizzle[4] = { + state->swizzle_r, state->swizzle_g, + state->swizzle_b, state->swizzle_a + }; + + util_format_compose_swizzles(desc->swizzle, view_swizzle, out_swizzle); + + unsigned level = state->u.tex.first_level; + assert(state->u.tex.first_layer == 0); + + /* Pack the descriptor into GPU memory */ + agx_pack(so->desc->ptr.cpu, TEXTURE, cfg) { + cfg.dimension = agx_translate_texture_dimension(state->target); + cfg.layout = agx_translate_layout(rsrc->modifier); + cfg.format = agx_pixel_format[state->format].hw; + cfg.swizzle_r = agx_channel_from_pipe(out_swizzle[0]); + cfg.swizzle_g = agx_channel_from_pipe(out_swizzle[1]); + cfg.swizzle_b = agx_channel_from_pipe(out_swizzle[2]); + cfg.swizzle_a = agx_channel_from_pipe(out_swizzle[3]); + cfg.width = u_minify(texture->width0, level); + cfg.height = u_minify(texture->height0, level); + cfg.levels = state->u.tex.last_level - level + 1; + cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); + cfg.address = rsrc->bo->ptr.gpu + rsrc->slices[level].offset; + cfg.unk_2 = false; + + cfg.stride = (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) ? + (rsrc->slices[level].line_stride - 16) : + AGX_RT_STRIDE_TILED; + } + + /* Initialize base object */ + so->base = *state; + so->base.texture = NULL; + pipe_resource_reference(&so->base.texture, texture); + pipe_reference_init(&so->base.reference, 1); + so->base.context = pctx; + return &so->base; +} + +static void +agx_set_sampler_views(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned count, + unsigned unbind_num_trailing_slots, + bool take_ownership, + struct pipe_sampler_view **views) +{ + struct agx_context *ctx = agx_context(pctx); + unsigned new_nr = 0; + unsigned i; + + assert(start == 0); + + if (!views) + count = 0; + + for (i = 0; i < count; ++i) { + if (views[i]) + new_nr = i + 1; + + if (take_ownership) { + pipe_sampler_view_reference((struct pipe_sampler_view **) + &ctx->stage[shader].textures[i], NULL); + ctx->stage[shader].textures[i] = (struct agx_sampler_view *)views[i]; + } else { + pipe_sampler_view_reference((struct pipe_sampler_view **) + &ctx->stage[shader].textures[i], views[i]); + } + } + + for (; i < ctx->stage[shader].texture_count; i++) { + pipe_sampler_view_reference((struct pipe_sampler_view **) + &ctx->stage[shader].textures[i], NULL); + } + ctx->stage[shader].texture_count = new_nr; +} + +static void +agx_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *pview) +{ + struct agx_sampler_view *view = (struct agx_sampler_view *) pview; + pipe_resource_reference(&view->base.texture, NULL); + agx_bo_unreference(view->desc); + FREE(view); +} + +static struct pipe_surface * +agx_create_surface(struct pipe_context *ctx, + struct pipe_resource *texture, + const struct pipe_surface *surf_tmpl) +{ + struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); + + if (!surface) + return NULL; + pipe_reference_init(&surface->reference, 1); + pipe_resource_reference(&surface->texture, texture); + surface->context = ctx; + surface->format = surf_tmpl->format; + surface->width = texture->width0; + surface->height = texture->height0; + surface->texture = texture; + surface->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + surface->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + surface->u.tex.level = surf_tmpl->u.tex.level; + + return surface; +} + +static void +agx_set_clip_state(struct pipe_context *ctx, + const struct pipe_clip_state *state) +{ +} + +static void +agx_set_polygon_stipple(struct pipe_context *ctx, + const struct pipe_poly_stipple *state) +{ +} + +static void +agx_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ + struct agx_context *ctx = agx_context(pipe); + ctx->sample_mask = sample_mask; +} + +static void +agx_set_scissor_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissor) +{ + struct agx_context *ctx = agx_context(pctx); + + assert(start_slot == 0 && "no geometry shaders"); + assert(num_scissors == 1 && "no geometry shaders"); + + ctx->scissor = *scissor; + ctx->dirty |= AGX_DIRTY_SCISSOR; +} + +static void +agx_set_stencil_ref(struct pipe_context *pctx, + const struct pipe_stencil_ref state) +{ + struct agx_context *ctx = agx_context(pctx); + ctx->stencil_ref = state; +} + +static void +agx_set_viewport_states(struct pipe_context *pctx, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *vp) +{ + struct agx_context *ctx = agx_context(pctx); + + assert(start_slot == 0 && "no geometry shaders"); + assert(num_viewports == 1 && "no geometry shaders"); + + ctx->dirty |= AGX_DIRTY_VIEWPORT; + ctx->viewport = *vp; +} + +struct agx_viewport_scissor { + uint64_t viewport; + unsigned scissor; +}; + +static struct agx_viewport_scissor +agx_upload_viewport_scissor(struct agx_pool *pool, + struct agx_batch *batch, + const struct pipe_viewport_state *vp, + const struct pipe_scissor_state *ss) +{ + struct agx_ptr T = agx_pool_alloc_aligned(pool, AGX_VIEWPORT_LENGTH, 64); + + float trans_x = vp->translate[0], trans_y = vp->translate[1]; + float abs_scale_x = fabsf(vp->scale[0]), abs_scale_y = fabsf(vp->scale[1]); + + /* Calculate the extent of the viewport. Note if a particular dimension of + * the viewport is an odd number of pixels, both the translate and the scale + * will have a fractional part of 0.5, so adding and subtracting them yields + * an integer. Therefore we don't need to round explicitly */ + unsigned minx = CLAMP((int) (trans_x - abs_scale_x), 0, batch->width); + unsigned miny = CLAMP((int) (trans_y - abs_scale_y), 0, batch->height); + unsigned maxx = CLAMP((int) (trans_x + abs_scale_x), 0, batch->width); + unsigned maxy = CLAMP((int) (trans_y + abs_scale_y), 0, batch->height); + + if (ss) { + minx = MAX2(ss->minx, minx); + miny = MAX2(ss->miny, miny); + maxx = MIN2(ss->maxx, maxx); + maxy = MIN2(ss->maxy, maxy); + } + + assert(maxx > minx && maxy > miny); + + float minz, maxz; + util_viewport_zmin_zmax(vp, false, &minz, &maxz); + + agx_pack(T.cpu, VIEWPORT, cfg) { + cfg.min_tile_x = minx / 32; + cfg.min_tile_y = miny / 32; + cfg.max_tile_x = DIV_ROUND_UP(maxx, 32); + cfg.max_tile_y = DIV_ROUND_UP(maxy, 32); + cfg.clip_tile = true; + + cfg.translate_x = vp->translate[0]; + cfg.translate_y = vp->translate[1]; + cfg.scale_x = vp->scale[0]; + cfg.scale_y = vp->scale[1]; + + /* Assumes [0, 1] clip coordinates. If half-z is not in use, lower_half_z + * is called to ensure this works. */ + cfg.translate_z = minz; + cfg.scale_z = maxz - minz; + }; + + /* Allocate a new scissor descriptor */ + struct agx_scissor_packed *ptr = batch->scissor.bo->ptr.cpu; + unsigned index = (batch->scissor.count++); + + agx_pack(ptr + index, SCISSOR, cfg) { + cfg.min_x = minx; + cfg.min_y = miny; + cfg.min_z = minz; + cfg.max_x = maxx; + cfg.max_y = maxy; + cfg.max_z = maxz; + } + + return (struct agx_viewport_scissor) { + .viewport = T.gpu, + .scissor = index + }; +} + +/* A framebuffer state can be reused across batches, so it doesn't make sense + * to add surfaces to the BO list here. Instead we added them when flushing. + */ + +static void +agx_set_framebuffer_state(struct pipe_context *pctx, + const struct pipe_framebuffer_state *state) +{ + struct agx_context *ctx = agx_context(pctx); + + if (!state) + return; + + /* XXX: eliminate this flush with batch tracking logic */ + pctx->flush(pctx, NULL, 0); + + util_copy_framebuffer_state(&ctx->framebuffer, state); + ctx->batch->width = state->width; + ctx->batch->height = state->height; + ctx->batch->nr_cbufs = state->nr_cbufs; + ctx->batch->cbufs[0] = state->cbufs[0]; + ctx->batch->zsbuf = state->zsbuf; + ctx->dirty = ~0; + + for (unsigned i = 0; i < state->nr_cbufs; ++i) { + struct pipe_surface *surf = state->cbufs[i]; + struct agx_resource *tex = agx_resource(surf->texture); + const struct util_format_description *desc = + util_format_description(surf->format); + + agx_pack(ctx->render_target[i], RENDER_TARGET, cfg) { + cfg.layout = agx_translate_layout(tex->modifier); + cfg.format = agx_pixel_format[surf->format].hw; + cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]); + cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]); + cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]); + cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]); + cfg.width = state->width; + cfg.height = state->height; + cfg.buffer = tex->bo->ptr.gpu; + + cfg.stride = (tex->modifier == DRM_FORMAT_MOD_LINEAR) ? + (tex->slices[0].line_stride - 4) : + AGX_RT_STRIDE_TILED; + }; + } +} + +/* Likewise constant buffers, textures, and samplers are handled in a common + * per-draw path, with dirty tracking to reduce the costs involved. + */ + +static void +agx_set_constant_buffer(struct pipe_context *pctx, + enum pipe_shader_type shader, uint index, + bool take_ownership, + const struct pipe_constant_buffer *cb) +{ + struct agx_context *ctx = agx_context(pctx); + struct agx_stage *s = &ctx->stage[shader]; + + util_copy_constant_buffer(&s->cb[index], cb, take_ownership); + + unsigned mask = (1 << index); + + if (cb) + s->cb_mask |= mask; + else + s->cb_mask &= ~mask; +} + +static void +agx_surface_destroy(struct pipe_context *ctx, + struct pipe_surface *surface) +{ + pipe_resource_reference(&surface->texture, NULL); + FREE(surface); +} + +static void +agx_delete_state(struct pipe_context *ctx, void *state) +{ + FREE(state); +} + +/* BOs added to the batch in the uniform upload path */ + +static void +agx_set_vertex_buffers(struct pipe_context *pctx, + unsigned start_slot, unsigned count, + unsigned unbind_num_trailing_slots, + bool take_ownership, + const struct pipe_vertex_buffer *buffers) +{ + struct agx_context *ctx = agx_context(pctx); + + util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, + start_slot, count, unbind_num_trailing_slots, take_ownership); + + ctx->dirty |= AGX_DIRTY_VERTEX; +} + +static void * +agx_create_vertex_elements(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *state) +{ + assert(count < AGX_MAX_ATTRIBS); + + struct agx_attribute *attribs = calloc(sizeof(*attribs), AGX_MAX_ATTRIBS); + for (unsigned i = 0; i < count; ++i) { + const struct pipe_vertex_element ve = state[i]; + + const struct util_format_description *desc = + util_format_description(ve.src_format); + + unsigned chan_size = desc->channel[0].size / 8; + + assert(chan_size == 1 || chan_size == 2 || chan_size == 4); + assert(desc->nr_channels >= 1 && desc->nr_channels <= 4); + assert((ve.src_offset & (chan_size - 1)) == 0); + + attribs[i] = (struct agx_attribute) { + .buf = ve.vertex_buffer_index, + .src_offset = ve.src_offset / chan_size, + .nr_comps_minus_1 = desc->nr_channels - 1, + .format = agx_vertex_format[ve.src_format], + .divisor = ve.instance_divisor + }; + } + + return attribs; +} + +static void +agx_bind_vertex_elements_state(struct pipe_context *pctx, void *cso) +{ + struct agx_context *ctx = agx_context(pctx); + ctx->attributes = cso; + ctx->dirty |= AGX_DIRTY_VERTEX; +} + +static uint32_t asahi_shader_key_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct asahi_shader_key)); +} + +static bool asahi_shader_key_equal(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct asahi_shader_key)) == 0; +} + +static void * +agx_create_shader_state(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct agx_uncompiled_shader *so = CALLOC_STRUCT(agx_uncompiled_shader); + + if (!so) + return NULL; + + so->base = *cso; + + if (cso->type == PIPE_SHADER_IR_NIR) { + so->nir = cso->ir.nir; + } else { + assert(cso->type == PIPE_SHADER_IR_TGSI); + so->nir = tgsi_to_nir(cso->tokens, pctx->screen, false); + } + + so->variants = _mesa_hash_table_create(NULL, asahi_shader_key_hash, asahi_shader_key_equal); + return so; +} + +static bool +agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out, + enum pipe_shader_type stage, struct asahi_shader_key *key) +{ + struct agx_uncompiled_shader *so = ctx->stage[stage].shader; + assert(so != NULL); + + struct hash_entry *he = _mesa_hash_table_search(so->variants, key); + + if (he) { + if ((*out) == he->data) + return false; + + *out = he->data; + return true; + } + + struct agx_compiled_shader *compiled = CALLOC_STRUCT(agx_compiled_shader); + struct util_dynarray binary; + util_dynarray_init(&binary, NULL); + + nir_shader *nir = nir_shader_clone(NULL, so->nir); + + if (key->blend.blend_enable) { + nir_lower_blend_options opts = { + .format = { key->rt_formats[0] }, + .scalar_blend_const = true + }; + + memcpy(opts.rt, key->blend.rt, sizeof(opts.rt)); + NIR_PASS_V(nir, nir_lower_blend, opts); + } else if (key->blend.logicop_enable) { + nir_lower_blend_options opts = { + .format = { key->rt_formats[0] }, + .logicop_enable = true, + .logicop_func = key->blend.logicop_func, + }; + + NIR_PASS_V(nir, nir_lower_blend, opts); + } + + if (stage == PIPE_SHADER_FRAGMENT) + NIR_PASS_V(nir, nir_lower_fragcolor, key->nr_cbufs); + + agx_compile_shader_nir(nir, &key->base, &binary, &compiled->info); + + struct agx_varyings *varyings = &compiled->info.varyings; + unsigned packed_varying_sz = (AGX_VARYING_HEADER_LENGTH + varyings->nr_descs * AGX_VARYING_LENGTH); + uint8_t *packed_varyings = alloca(packed_varying_sz); + + agx_pack(packed_varyings, VARYING_HEADER, cfg) { + cfg.triangle_slots = cfg.point_slots = varyings->nr_slots; + } + + memcpy(packed_varyings + AGX_VARYING_HEADER_LENGTH, varyings->packed, + varyings->nr_descs * AGX_VARYING_LENGTH); + + if (binary.size) { + struct agx_device *dev = agx_device(ctx->base.screen); + compiled->bo = agx_bo_create(dev, + ALIGN_POT(binary.size, 256) + (3 * packed_varying_sz), + AGX_MEMORY_TYPE_SHADER); + memcpy(compiled->bo->ptr.cpu, binary.data, binary.size); + + + /* TODO: Why is the varying descriptor duplicated 3x? */ + unsigned offs = ALIGN_POT(binary.size, 256); + for (unsigned copy = 0; copy < 3; ++copy) { + memcpy(((uint8_t *) compiled->bo->ptr.cpu) + offs, packed_varyings, packed_varying_sz); + offs += packed_varying_sz; + } + + compiled->varyings = compiled->bo->ptr.gpu + ALIGN_POT(binary.size, 256); + } + + ralloc_free(nir); + util_dynarray_fini(&binary); + + he = _mesa_hash_table_insert(so->variants, key, compiled); + *out = he->data; + return true; +} + +static bool +agx_update_vs(struct agx_context *ctx) +{ + struct agx_vs_shader_key key = { + .num_vbufs = util_last_bit(ctx->vb_mask), + .clip_halfz = ctx->rast->base.clip_halfz, + }; + + memcpy(key.attributes, ctx->attributes, + sizeof(key.attributes[0]) * AGX_MAX_ATTRIBS); + + u_foreach_bit(i, ctx->vb_mask) { + key.vbuf_strides[i] = ctx->vertex_buffers[i].stride; + } + + struct asahi_shader_key akey = { + .base.vs = key + }; + + return agx_update_shader(ctx, &ctx->vs, PIPE_SHADER_VERTEX, &akey); +} + +static bool +agx_update_fs(struct agx_context *ctx) +{ + struct asahi_shader_key key = { + .nr_cbufs = ctx->batch->nr_cbufs, + }; + + for (unsigned i = 0; i < key.nr_cbufs; ++i) { + struct pipe_surface *surf = ctx->batch->cbufs[i]; + + if (surf) { + enum pipe_format fmt = surf->format; + key.rt_formats[i] = fmt; + key.base.fs.tib_formats[i] = agx_pixel_format[fmt].internal; + } else { + key.rt_formats[i] = PIPE_FORMAT_NONE; + } + } + + memcpy(&key.blend, ctx->blend, sizeof(key.blend)); + + return agx_update_shader(ctx, &ctx->fs, PIPE_SHADER_FRAGMENT, &key); +} + +static void +agx_bind_shader_state(struct pipe_context *pctx, void *cso) +{ + if (!cso) + return; + + struct agx_context *ctx = agx_context(pctx); + struct agx_uncompiled_shader *so = cso; + + enum pipe_shader_type type = pipe_shader_type_from_mesa(so->nir->info.stage); + ctx->stage[type].shader = so; +} + +static void +agx_delete_compiled_shader(struct hash_entry *ent) +{ + struct agx_compiled_shader *so = ent->data; + agx_bo_unreference(so->bo); + FREE(so); +} + +static void +agx_delete_shader_state(struct pipe_context *ctx, + void *cso) +{ + struct agx_uncompiled_shader *so = cso; + _mesa_hash_table_destroy(so->variants, agx_delete_compiled_shader); + free(so); +} + +/* Pipeline consists of a sequence of binding commands followed by a set shader command */ +static uint32_t +agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum pipe_shader_type stage) +{ + /* Pipelines must be 64-byte aligned */ + struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, + (16 * AGX_BIND_UNIFORM_LENGTH) + // XXX: correct sizes, break up at compile time + (ctx->stage[stage].texture_count * AGX_BIND_TEXTURE_LENGTH) + + (PIPE_MAX_SAMPLERS * AGX_BIND_SAMPLER_LENGTH) + + AGX_SET_SHADER_EXTENDED_LENGTH + 8, + 64); + + uint8_t *record = ptr.cpu; + + /* There is a maximum number of half words we may push with a single + * BIND_UNIFORM record, so split up the range to fit. We only need to call + * agx_push_location once, however, which reduces the cost. */ + unsigned unif_records = 0; + + for (unsigned i = 0; i < cs->info.push_ranges; ++i) { + struct agx_push push = cs->info.push[i]; + uint64_t buffer = agx_push_location(ctx, push, stage); + unsigned halfs_per_record = 14; + unsigned records = DIV_ROUND_UP(push.length, halfs_per_record); + + /* Ensure we don't overflow */ + unif_records += records; + assert(unif_records < 16); + + for (unsigned j = 0; j < records; ++j) { + agx_pack(record, BIND_UNIFORM, cfg) { + cfg.start_halfs = push.base + (j * halfs_per_record); + cfg.size_halfs = MIN2(push.length - (j * halfs_per_record), halfs_per_record); + cfg.buffer = buffer + (j * halfs_per_record * 2); + } + + record += AGX_BIND_UNIFORM_LENGTH; + } + } + + for (unsigned i = 0; i < ctx->stage[stage].texture_count; ++i) { + struct agx_sampler_view *tex = ctx->stage[stage].textures[i]; + agx_batch_add_bo(ctx->batch, tex->desc); + agx_batch_add_bo(ctx->batch, agx_resource(tex->base.texture)->bo); + + + agx_pack(record, BIND_TEXTURE, cfg) { + cfg.start = i; + cfg.count = 1; + cfg.buffer = tex->desc->ptr.gpu; + } + + record += AGX_BIND_TEXTURE_LENGTH; + } + + for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; ++i) { + struct agx_sampler_state *sampler = ctx->stage[stage].samplers[i]; + + if (!sampler) + continue; + + struct agx_bo *bo = sampler->desc; + agx_batch_add_bo(ctx->batch, bo); + + agx_pack(record, BIND_SAMPLER, cfg) { + cfg.start = i; + cfg.count = 1; + cfg.buffer = bo->ptr.gpu; + } + + record += AGX_BIND_SAMPLER_LENGTH; + } + + /* TODO: Can we prepack this? */ + if (stage == PIPE_SHADER_FRAGMENT) { + agx_pack(record, SET_SHADER_EXTENDED, cfg) { + cfg.code = cs->bo->ptr.gpu; + cfg.register_quadwords = 0; + cfg.unk_3 = 0x8d; + cfg.unk_1 = 0x2010bd; + cfg.unk_2 = 0x0d; + cfg.unk_2b = 1; + cfg.unk_3b = 0x1; + cfg.unk_4 = 0x800; + cfg.preshader_unk = 0xc080; + cfg.spill_size = 0x2; + } + + record += AGX_SET_SHADER_EXTENDED_LENGTH; + } else { + agx_pack(record, SET_SHADER, cfg) { + cfg.code = cs->bo->ptr.gpu; + cfg.register_quadwords = 0; + cfg.unk_2b = cs->info.varyings.nr_slots; + cfg.unk_2 = 0x0d; + } + + record += AGX_SET_SHADER_LENGTH; + } + + /* End pipeline */ + memset(record, 0, 8); + assert(ptr.gpu < (1ull << 32)); + return ptr.gpu; +} + +/* Internal pipelines (TODO: refactor?) */ +uint64_t +agx_build_clear_pipeline(struct agx_context *ctx, uint32_t code, uint64_t clear_buf) +{ + struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, + (1 * AGX_BIND_UNIFORM_LENGTH) + + AGX_SET_SHADER_EXTENDED_LENGTH + 8, + 64); + + uint8_t *record = ptr.cpu; + + agx_pack(record, BIND_UNIFORM, cfg) { + cfg.start_halfs = (6 * 2); + cfg.size_halfs = 4; + cfg.buffer = clear_buf; + } + + record += AGX_BIND_UNIFORM_LENGTH; + + /* TODO: Can we prepack this? */ + agx_pack(record, SET_SHADER_EXTENDED, cfg) { + cfg.code = code; + cfg.register_quadwords = 1; + cfg.unk_3 = 0x8d; + cfg.unk_2 = 0x0d; + cfg.unk_2b = 4; + cfg.frag_unk = 0x880100; + cfg.preshader_mode = 0; // XXX + } + + record += AGX_SET_SHADER_EXTENDED_LENGTH; + + /* End pipeline */ + memset(record, 0, 8); + return ptr.gpu; +} + +uint64_t +agx_build_reload_pipeline(struct agx_context *ctx, uint32_t code, struct pipe_surface *surf) +{ + struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, + (1 * AGX_BIND_TEXTURE_LENGTH) + + (1 * AGX_BIND_SAMPLER_LENGTH) + + AGX_SET_SHADER_EXTENDED_LENGTH + 8, + 64); + + uint8_t *record = ptr.cpu; + struct agx_ptr sampler = agx_pool_alloc_aligned(&ctx->batch->pool, AGX_SAMPLER_LENGTH, 64); + struct agx_ptr texture = agx_pool_alloc_aligned(&ctx->batch->pool, AGX_TEXTURE_LENGTH, 64); + + agx_pack(sampler.cpu, SAMPLER, cfg) { + cfg.magnify_linear = true; + cfg.minify_linear = false; + cfg.mip_filter = AGX_MIP_FILTER_NONE; + cfg.wrap_s = AGX_WRAP_CLAMP_TO_EDGE; + cfg.wrap_t = AGX_WRAP_CLAMP_TO_EDGE; + cfg.wrap_r = AGX_WRAP_CLAMP_TO_EDGE; + cfg.pixel_coordinates = true; + cfg.compare_func = AGX_COMPARE_FUNC_ALWAYS; + cfg.unk_2 = 0; + cfg.unk_3 = 0; + } + + agx_pack(texture.cpu, TEXTURE, cfg) { + struct agx_resource *rsrc = agx_resource(surf->texture); + const struct util_format_description *desc = + util_format_description(surf->format); + + cfg.layout = agx_translate_layout(rsrc->modifier); + cfg.format = agx_pixel_format[surf->format].hw; + cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]); + cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]); + cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]); + cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]); + cfg.width = surf->width; + cfg.height = surf->height; + cfg.levels = 1; + cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); + cfg.address = rsrc->bo->ptr.gpu; + cfg.unk_2 = false; + + cfg.stride = (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) ? + (rsrc->slices[0].line_stride - 16) : + AGX_RT_STRIDE_TILED; + } + + agx_pack(record, BIND_TEXTURE, cfg) { + cfg.start = 0; + cfg.count = 1; + cfg.buffer = texture.gpu; + } + + record += AGX_BIND_TEXTURE_LENGTH; + + agx_pack(record, BIND_SAMPLER, cfg) { + cfg.start = 0; + cfg.count = 1; + cfg.buffer = sampler.gpu; + } + + record += AGX_BIND_SAMPLER_LENGTH; + + /* TODO: Can we prepack this? */ + agx_pack(record, SET_SHADER_EXTENDED, cfg) { + cfg.code = code; + cfg.register_quadwords = 0; + cfg.unk_3 = 0x8d; + cfg.unk_2 = 0x0d; + cfg.unk_2b = 4; + cfg.unk_4 = 0; + cfg.frag_unk = 0x880100; + cfg.preshader_mode = 0; // XXX + } + + record += AGX_SET_SHADER_EXTENDED_LENGTH; + + /* End pipeline */ + memset(record, 0, 8); + return ptr.gpu; +} + +uint64_t +agx_build_store_pipeline(struct agx_context *ctx, uint32_t code, + uint64_t render_target) +{ + struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, + (1 * AGX_BIND_TEXTURE_LENGTH) + + (1 * AGX_BIND_UNIFORM_LENGTH) + + AGX_SET_SHADER_EXTENDED_LENGTH + 8, + 64); + + uint8_t *record = ptr.cpu; + + agx_pack(record, BIND_TEXTURE, cfg) { + cfg.start = 0; + cfg.count = 1; + cfg.buffer = render_target; + } + + record += AGX_BIND_TEXTURE_LENGTH; + + uint32_t unk[] = { 0, ~0 }; + + agx_pack(record, BIND_UNIFORM, cfg) { + cfg.start_halfs = 4; + cfg.size_halfs = 4; + cfg.buffer = agx_pool_upload_aligned(&ctx->batch->pool, unk, sizeof(unk), 16); + } + + record += AGX_BIND_UNIFORM_LENGTH; + + /* TODO: Can we prepack this? */ + agx_pack(record, SET_SHADER_EXTENDED, cfg) { + cfg.code = code; + cfg.register_quadwords = 1; + cfg.unk_2 = 0xd; + cfg.unk_3 = 0x8d; + cfg.frag_unk = 0x880100; + cfg.preshader_mode = 0; // XXX + } + + record += AGX_SET_SHADER_EXTENDED_LENGTH; + + /* End pipeline */ + memset(record, 0, 8); + return ptr.gpu; +} + +static uint64_t +demo_launch_fragment(struct agx_context *ctx, struct agx_pool *pool, uint32_t pipeline, uint32_t varyings, unsigned input_count) +{ + struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_BIND_PIPELINE_LENGTH, 64); + + agx_pack(t.cpu, BIND_PIPELINE, cfg) { + cfg.tag = AGX_BIND_PIPELINE_FRAGMENT; + cfg.sampler_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count; + cfg.texture_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count; + cfg.input_count = input_count; + cfg.pipeline = pipeline; + cfg.fs_varyings = varyings; + }; + + return t.gpu; +} + +static uint64_t +demo_interpolation(struct agx_compiled_shader *fs, struct agx_pool *pool) +{ + struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_INTERPOLATION_LENGTH, 64); + + agx_pack(t.cpu, INTERPOLATION, cfg) { + cfg.varying_count = fs->info.varyings.nr_slots; + }; + + return t.gpu; +} + +static uint64_t +demo_linkage(struct agx_compiled_shader *vs, struct agx_pool *pool) +{ + struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_LINKAGE_LENGTH, 64); + + agx_pack(t.cpu, LINKAGE, cfg) { + cfg.varying_count = vs->info.varyings.nr_slots; + + // 0x2 for fragcoordz, 0x1 for varyings at all + cfg.unk_1 = 0x210000 | (vs->info.writes_psiz ? 0x40000 : 0); + }; + + return t.gpu; +} + +static uint64_t +demo_rasterizer(struct agx_context *ctx, struct agx_pool *pool, bool is_points) +{ + struct agx_rasterizer *rast = ctx->rast; + struct agx_rasterizer_packed out; + + agx_pack(&out, RASTERIZER, cfg) { + bool back_stencil = ctx->zs.base.stencil[1].enabled; + cfg.front.stencil_reference = ctx->stencil_ref.ref_value[0]; + cfg.back.stencil_reference = back_stencil ? + ctx->stencil_ref.ref_value[1] : + cfg.front.stencil_reference; + + cfg.front.line_width = cfg.back.line_width = rast->line_width; + cfg.front.polygon_mode = cfg.back.polygon_mode = AGX_POLYGON_MODE_FILL; + + cfg.unk_fill_lines = is_points; /* XXX: what is this? */ + + /* Always enable scissoring so we may scissor to the viewport (TODO: + * optimize this out if the viewport is the default and the app does not + * use the scissor test) */ + cfg.scissor_enable = true; + }; + + /* Words 2-3: front */ + out.opaque[2] |= ctx->zs.front.opaque[0]; + out.opaque[3] |= ctx->zs.front.opaque[1]; + + /* Words 4-5: back */ + out.opaque[4] |= ctx->zs.back.opaque[0]; + out.opaque[5] |= ctx->zs.back.opaque[1]; + + return agx_pool_upload_aligned(pool, &out, sizeof(out), 64); +} + +static uint64_t +demo_unk11(struct agx_pool *pool, bool prim_lines, bool prim_points, bool reads_tib) +{ +#define UNK11_FILL_MODE_LINES_1 (1 << 26) + +#define UNK11_FILL_MODE_LINES_2 (0x5004 << 16) +#define UNK11_LINES (0x10000000) +#define UNK11_POINTS (0x40000000) + +#define UNK11_READS_TIB (0x20000000) + + uint32_t unk[] = { + 0x200004a, + 0x200 | ((prim_lines || prim_points) ? UNK11_FILL_MODE_LINES_1 : 0) | (reads_tib ? UNK11_READS_TIB : 0), + 0x7e00000 | (prim_lines ? UNK11_LINES : 0) | (prim_points ? UNK11_POINTS : 0), + 0x7e00000 | (prim_lines ? UNK11_LINES : 0) | (prim_points ? UNK11_POINTS : 0), + + 0x1ffff + }; + + return agx_pool_upload(pool, unk, sizeof(unk)); +} + +static uint64_t +demo_unk12(struct agx_pool *pool) +{ + uint32_t unk[] = { + 0x410000, + 0x1e3ce508, + 0xa0 + }; + + return agx_pool_upload(pool, unk, sizeof(unk)); +} + +static uint64_t +agx_set_scissor_index(struct agx_pool *pool, unsigned index) +{ + struct agx_ptr T = agx_pool_alloc_aligned(pool, AGX_SET_SCISSOR_LENGTH, 64); + + agx_pack(T.cpu, SET_SCISSOR, cfg) { + cfg.index = index; + }; + + return T.gpu; +} + +static void +agx_push_record(uint8_t **out, unsigned size_words, uint64_t ptr) +{ + assert(ptr < (1ull << 40)); + assert(size_words < (1ull << 24)); + + uint64_t value = (size_words | (ptr << 24)); + memcpy(*out, &value, sizeof(value)); + *out += sizeof(value); +} + +static uint8_t * +agx_encode_state(struct agx_context *ctx, uint8_t *out, + uint32_t pipeline_vertex, uint32_t pipeline_fragment, uint32_t varyings, + bool is_lines, bool is_points) +{ + agx_pack(out, BIND_PIPELINE, cfg) { + cfg.tag = AGX_BIND_PIPELINE_VERTEX; + cfg.pipeline = pipeline_vertex; + cfg.vs_output_count_1 = ctx->vs->info.varyings.nr_slots; + cfg.vs_output_count_2 = ctx->vs->info.varyings.nr_slots; + cfg.sampler_count = ctx->stage[PIPE_SHADER_VERTEX].texture_count; + cfg.texture_count = ctx->stage[PIPE_SHADER_VERTEX].texture_count; + } + + /* yes, it's really 17 bytes */ + out += AGX_BIND_PIPELINE_LENGTH; + *(out++) = 0x0; + + struct agx_pool *pool = &ctx->batch->pool; + struct agx_ptr zero = agx_pool_alloc_aligned(pool, 16, 256); + memset(zero.cpu, 0, 16); + + bool reads_tib = ctx->fs->info.reads_tib; + + agx_push_record(&out, 0, zero.gpu); + agx_push_record(&out, 5, demo_interpolation(ctx->fs, pool)); + agx_push_record(&out, 5, demo_launch_fragment(ctx, pool, pipeline_fragment, varyings, ctx->fs->info.varyings.nr_descs)); + agx_push_record(&out, 4, demo_linkage(ctx->vs, pool)); + agx_push_record(&out, 7, demo_rasterizer(ctx, pool, is_points)); + agx_push_record(&out, 5, demo_unk11(pool, is_lines, is_points, reads_tib)); + + if (ctx->dirty & (AGX_DIRTY_VIEWPORT | AGX_DIRTY_SCISSOR)) { + struct agx_viewport_scissor vps = agx_upload_viewport_scissor(pool, + ctx->batch, &ctx->viewport, + ctx->rast->base.scissor ? &ctx->scissor : NULL); + + agx_push_record(&out, 10, vps.viewport); + agx_push_record(&out, 2, agx_set_scissor_index(pool, vps.scissor)); + } + + agx_push_record(&out, 3, demo_unk12(pool)); + agx_push_record(&out, 2, agx_pool_upload(pool, ctx->rast->cull, sizeof(ctx->rast->cull))); + + return (out - 1); // XXX: alignment fixup, or something +} + +static enum agx_primitive +agx_primitive_for_pipe(enum pipe_prim_type mode) +{ + switch (mode) { + case PIPE_PRIM_POINTS: return AGX_PRIMITIVE_POINTS; + case PIPE_PRIM_LINES: return AGX_PRIMITIVE_LINES; + case PIPE_PRIM_LINE_STRIP: return AGX_PRIMITIVE_LINE_STRIP; + case PIPE_PRIM_LINE_LOOP: return AGX_PRIMITIVE_LINE_LOOP; + case PIPE_PRIM_TRIANGLES: return AGX_PRIMITIVE_TRIANGLES; + case PIPE_PRIM_TRIANGLE_STRIP: return AGX_PRIMITIVE_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: return AGX_PRIMITIVE_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: return AGX_PRIMITIVE_QUADS; + case PIPE_PRIM_QUAD_STRIP: return AGX_PRIMITIVE_QUAD_STRIP; + default: unreachable("todo: other primitive types"); + } +} + +static uint64_t +agx_index_buffer_ptr(struct agx_batch *batch, + const struct pipe_draw_start_count_bias *draw, + const struct pipe_draw_info *info) +{ + off_t offset = draw->start * info->index_size; + + if (!info->has_user_indices) { + struct agx_bo *bo = agx_resource(info->index.resource)->bo; + agx_batch_add_bo(batch, bo); + + return bo->ptr.gpu + offset; + } else { + return agx_pool_upload_aligned(&batch->pool, + ((uint8_t *) info->index.user) + offset, + draw->count * info->index_size, 64); + } +} + +static bool +agx_scissor_culls_everything(struct agx_context *ctx) +{ + const struct pipe_scissor_state ss = ctx->scissor; + + return ctx->rast->base.scissor && + ((ss.minx == ss.maxx) || (ss.miny == ss.maxy)); +} + +static void +agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, + unsigned drawid_offset, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) +{ + if (num_draws > 1) { + util_draw_multi(pctx, info, drawid_offset, indirect, draws, num_draws); + return; + } + + if (info->index_size && draws->index_bias) + unreachable("todo: index bias"); + + struct agx_context *ctx = agx_context(pctx); + struct agx_batch *batch = ctx->batch; + + if (agx_scissor_culls_everything(ctx)) + return; + + /* TODO: masks */ + ctx->batch->draw |= ~0; + + /* TODO: Dirty track */ + agx_update_vs(ctx); + agx_update_fs(ctx); + + agx_batch_add_bo(batch, ctx->vs->bo); + agx_batch_add_bo(batch, ctx->fs->bo); + + bool is_lines = + (info->mode == PIPE_PRIM_LINES) || + (info->mode == PIPE_PRIM_LINE_STRIP) || + (info->mode == PIPE_PRIM_LINE_LOOP); + + uint8_t *out = agx_encode_state(ctx, batch->encoder_current, + agx_build_pipeline(ctx, ctx->vs, PIPE_SHADER_VERTEX), + agx_build_pipeline(ctx, ctx->fs, PIPE_SHADER_FRAGMENT), + ctx->fs->varyings, is_lines, info->mode == PIPE_PRIM_POINTS); + + enum agx_primitive prim = agx_primitive_for_pipe(info->mode); + unsigned idx_size = info->index_size; + + if (idx_size) { + uint64_t ib = agx_index_buffer_ptr(batch, draws, info); + + /* Index sizes are encoded logarithmically */ + STATIC_ASSERT(__builtin_ctz(1) == AGX_INDEX_SIZE_U8); + STATIC_ASSERT(__builtin_ctz(2) == AGX_INDEX_SIZE_U16); + STATIC_ASSERT(__builtin_ctz(4) == AGX_INDEX_SIZE_U32); + assert((idx_size == 1) || (idx_size == 2) || (idx_size == 4)); + + agx_pack(out, INDEXED_DRAW, cfg) { + cfg.restart_index = info->restart_index; + cfg.unk_2a = (ib >> 32); + cfg.primitive = prim; + cfg.restart_enable = info->primitive_restart; + cfg.index_size = __builtin_ctz(idx_size); + cfg.index_buffer_offset = (ib & BITFIELD_MASK(32)); + cfg.index_buffer_size = ALIGN_POT(draws->count * idx_size, 4); + cfg.index_count = draws->count; + cfg.instance_count = info->instance_count; + cfg.base_vertex = draws->index_bias; + }; + + out += AGX_INDEXED_DRAW_LENGTH; + } else { + agx_pack(out, DRAW, cfg) { + cfg.primitive = prim; + cfg.vertex_start = draws->start; + cfg.vertex_count = draws->count; + cfg.instance_count = info->instance_count; + }; + + out += AGX_DRAW_LENGTH; + } + + batch->encoder_current = out; + ctx->dirty = 0; +} + +void agx_init_state_functions(struct pipe_context *ctx); + +void +agx_init_state_functions(struct pipe_context *ctx) +{ + ctx->create_blend_state = agx_create_blend_state; + ctx->create_depth_stencil_alpha_state = agx_create_zsa_state; + ctx->create_fs_state = agx_create_shader_state; + ctx->create_rasterizer_state = agx_create_rs_state; + ctx->create_sampler_state = agx_create_sampler_state; + ctx->create_sampler_view = agx_create_sampler_view; + ctx->create_surface = agx_create_surface; + ctx->create_vertex_elements_state = agx_create_vertex_elements; + ctx->create_vs_state = agx_create_shader_state; + ctx->bind_blend_state = agx_bind_blend_state; + ctx->bind_depth_stencil_alpha_state = agx_bind_zsa_state; + ctx->bind_sampler_states = agx_bind_sampler_states; + ctx->bind_fs_state = agx_bind_shader_state; + ctx->bind_rasterizer_state = agx_bind_rasterizer_state; + ctx->bind_vertex_elements_state = agx_bind_vertex_elements_state; + ctx->bind_vs_state = agx_bind_shader_state; + ctx->delete_blend_state = agx_delete_state; + ctx->delete_depth_stencil_alpha_state = agx_delete_state; + ctx->delete_fs_state = agx_delete_shader_state; + ctx->delete_rasterizer_state = agx_delete_state; + ctx->delete_sampler_state = agx_delete_sampler_state; + ctx->delete_vertex_elements_state = agx_delete_state; + ctx->delete_vs_state = agx_delete_state; + ctx->set_blend_color = agx_set_blend_color; + ctx->set_clip_state = agx_set_clip_state; + ctx->set_constant_buffer = agx_set_constant_buffer; + ctx->set_sampler_views = agx_set_sampler_views; + ctx->set_framebuffer_state = agx_set_framebuffer_state; + ctx->set_polygon_stipple = agx_set_polygon_stipple; + ctx->set_sample_mask = agx_set_sample_mask; + ctx->set_scissor_states = agx_set_scissor_states; + ctx->set_stencil_ref = agx_set_stencil_ref; + ctx->set_vertex_buffers = agx_set_vertex_buffers; + ctx->set_viewport_states = agx_set_viewport_states; + ctx->sampler_view_destroy = agx_sampler_view_destroy; + ctx->surface_destroy = agx_surface_destroy; + ctx->draw_vbo = agx_draw_vbo; + ctx->create_stream_output_target = agx_create_stream_output_target; + ctx->stream_output_target_destroy = agx_stream_output_target_destroy; + ctx->set_stream_output_targets = agx_set_stream_output_targets; +} diff --git a/lib/mesa/src/gallium/drivers/asahi/agx_state.h b/lib/mesa/src/gallium/drivers/asahi/agx_state.h new file mode 100644 index 000000000..a4abc0760 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/asahi/agx_state.h @@ -0,0 +1,308 @@ +/* + * Copyright 2021 Alyssa Rosenzweig + * Copyright (C) 2019-2021 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef AGX_STATE_H +#define AGX_STATE_H + +#include "gallium/include/pipe/p_context.h" +#include "gallium/include/pipe/p_state.h" +#include "gallium/include/pipe/p_screen.h" +#include "gallium/auxiliary/util/u_blitter.h" +#include "asahi/lib/agx_pack.h" +#include "asahi/lib/agx_bo.h" +#include "asahi/lib/agx_device.h" +#include "asahi/lib/pool.h" +#include "asahi/compiler/agx_compile.h" +#include "compiler/nir/nir_lower_blend.h" +#include "util/hash_table.h" +#include "util/bitset.h" + +struct agx_streamout_target { + struct pipe_stream_output_target base; + uint32_t offset; +}; + +struct agx_streamout { + struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; + unsigned num_targets; +}; + +static inline struct agx_streamout_target * +agx_so_target(struct pipe_stream_output_target *target) +{ + return (struct agx_streamout_target *)target; +} + +struct agx_compiled_shader { + /* Mapped executable memory */ + struct agx_bo *bo; + + /* Varying descriptor (TODO: is this the right place?) */ + uint64_t varyings; + + /* Metadata returned from the compiler */ + struct agx_shader_info info; +}; + +struct agx_uncompiled_shader { + struct pipe_shader_state base; + struct nir_shader *nir; + struct hash_table *variants; + + /* Set on VS, passed to FS for linkage */ + unsigned base_varying; +}; + +struct agx_stage { + struct agx_uncompiled_shader *shader; + uint32_t dirty; + + struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS]; + uint32_t cb_mask; + + /* Need full CSOs for u_blitter */ + struct agx_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + struct agx_sampler_view *textures[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + + unsigned sampler_count, texture_count; +}; + +/* Uploaded scissor descriptors */ +struct agx_scissors { + struct agx_bo *bo; + unsigned count; +}; + +struct agx_batch { + unsigned width, height, nr_cbufs; + struct pipe_surface *cbufs[8]; + struct pipe_surface *zsbuf; + + /* PIPE_CLEAR_* bitmask */ + uint32_t clear, draw; + + float clear_color[4]; + + /* Resource list requirements, represented as a bit set indexed by BO + * handles (GEM handles on Linux, or IOGPU's equivalent on macOS) */ + BITSET_WORD bo_list[256]; + + struct agx_pool pool, pipeline_pool; + struct agx_bo *encoder; + uint8_t *encoder_current; + + struct agx_scissors scissor; +}; + +struct agx_zsa { + struct pipe_depth_stencil_alpha_state base; + struct agx_rasterizer_face_packed front, back; +}; + +struct agx_blend { + bool logicop_enable, blend_enable; + + union { + nir_lower_blend_rt rt[8]; + unsigned logicop_func; + }; +}; + +struct asahi_shader_key { + struct agx_shader_key base; + struct agx_blend blend; + unsigned nr_cbufs; + enum pipe_format rt_formats[PIPE_MAX_COLOR_BUFS]; +}; + +enum agx_dirty { + AGX_DIRTY_VERTEX = BITFIELD_BIT(0), + AGX_DIRTY_VIEWPORT = BITFIELD_BIT(1), + AGX_DIRTY_SCISSOR = BITFIELD_BIT(2), +}; + +struct agx_context { + struct pipe_context base; + struct agx_compiled_shader *vs, *fs; + uint32_t dirty; + + struct agx_batch *batch; + + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + uint32_t vb_mask; + + struct agx_stage stage[PIPE_SHADER_TYPES]; + struct agx_attribute *attributes; + struct agx_rasterizer *rast; + struct agx_zsa zs; + struct agx_blend *blend; + struct pipe_blend_color blend_color; + struct pipe_viewport_state viewport; + struct pipe_scissor_state scissor; + struct pipe_stencil_ref stencil_ref; + struct agx_streamout streamout; + uint16_t sample_mask; + struct pipe_framebuffer_state framebuffer; + + struct pipe_query *cond_query; + bool cond_cond; + enum pipe_render_cond_flag cond_mode; + + bool is_noop; + + uint8_t render_target[8][AGX_RENDER_TARGET_LENGTH]; + + struct blitter_context *blitter; +}; + +static inline struct agx_context * +agx_context(struct pipe_context *pctx) +{ + return (struct agx_context *) pctx; +} + +struct agx_rasterizer { + struct pipe_rasterizer_state base; + uint8_t cull[AGX_CULL_LENGTH]; + uint8_t line_width; +}; + +struct agx_query { + unsigned query; +}; + +struct agx_sampler_state { + struct pipe_sampler_state base; + + /* Prepared descriptor */ + struct agx_bo *desc; +}; + +struct agx_sampler_view { + struct pipe_sampler_view base; + + /* Prepared descriptor */ + struct agx_bo *desc; +}; + +struct agx_screen { + struct pipe_screen pscreen; + struct agx_device dev; + struct sw_winsys *winsys; +}; + +static inline struct agx_screen * +agx_screen(struct pipe_screen *p) +{ + return (struct agx_screen *)p; +} + +static inline struct agx_device * +agx_device(struct pipe_screen *p) +{ + return &(agx_screen(p)->dev); +} + +/* TODO: UABI, fake for macOS */ +#ifndef DRM_FORMAT_MOD_LINEAR +#define DRM_FORMAT_MOD_LINEAR 1 +#endif +#define DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER (2) + +struct agx_resource { + struct pipe_resource base; + uint64_t modifier; + + /* Hardware backing */ + struct agx_bo *bo; + + /* Software backing (XXX) */ + struct sw_displaytarget *dt; + unsigned dt_stride; + + BITSET_DECLARE(data_valid, PIPE_MAX_TEXTURE_LEVELS); + + struct { + unsigned offset; + unsigned line_stride; + } slices[PIPE_MAX_TEXTURE_LEVELS]; + + /* Bytes from one miptree to the next */ + unsigned array_stride; +}; + +static inline struct agx_resource * +agx_resource(struct pipe_resource *pctx) +{ + return (struct agx_resource *) pctx; +} + +struct agx_transfer { + struct pipe_transfer base; + void *map; + struct { + struct pipe_resource *rsrc; + struct pipe_box box; + } staging; +}; + +static inline struct agx_transfer * +agx_transfer(struct pipe_transfer *p) +{ + return (struct agx_transfer *)p; +} + +uint64_t +agx_push_location(struct agx_context *ctx, struct agx_push push, + enum pipe_shader_type stage); + +uint64_t +agx_build_clear_pipeline(struct agx_context *ctx, uint32_t code, uint64_t clear_buf); + +uint64_t +agx_build_store_pipeline(struct agx_context *ctx, uint32_t code, + uint64_t render_target); + +uint64_t +agx_build_reload_pipeline(struct agx_context *ctx, uint32_t code, struct pipe_surface *surf); + +/* Add a BO to a batch. This needs to be amortized O(1) since it's called in + * hot paths. To achieve this we model BO lists by bit sets */ + +static inline void +agx_batch_add_bo(struct agx_batch *batch, struct agx_bo *bo) +{ + if (unlikely(bo->handle > (sizeof(batch->bo_list) * 8))) + unreachable("todo: growable"); + + BITSET_SET(batch->bo_list, bo->handle); +} + +/* Blit shaders */ +void agx_blit(struct pipe_context *pipe, + const struct pipe_blit_info *info); + +void agx_internal_shaders(struct agx_device *dev); + +#endif diff --git a/lib/mesa/src/gallium/drivers/asahi/agx_uniforms.c b/lib/mesa/src/gallium/drivers/asahi/agx_uniforms.c new file mode 100644 index 000000000..136503737 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/asahi/agx_uniforms.c @@ -0,0 +1,109 @@ +/* + * Copyright 2021 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include <stdio.h> +#include "agx_state.h" +#include "asahi/lib/agx_pack.h" + +/* Computes the address for a push uniform, adding referenced BOs to the + * current batch as necessary. Note anything uploaded via the batch's pool does + * not require an update to the BO list, since the entire pool will be added + * once at submit time. */ + +static uint64_t +agx_const_buffer_ptr(struct agx_batch *batch, + struct pipe_constant_buffer *cb) +{ + if (cb->buffer) { + struct agx_bo *bo = agx_resource(cb->buffer)->bo; + agx_batch_add_bo(batch, bo); + + return bo->ptr.gpu + cb->buffer_offset; + } else { + return agx_pool_upload_aligned(&batch->pool, + ((uint8_t *) cb->user_buffer) + cb->buffer_offset, + cb->buffer_size - cb->buffer_offset, 64); + } +} + +static uint64_t +agx_push_location_direct(struct agx_context *ctx, struct agx_push push, + enum pipe_shader_type stage) +{ + struct agx_batch *batch = ctx->batch; + struct agx_stage *st = &ctx->stage[stage]; + + switch (push.type) { + case AGX_PUSH_UBO_BASES: { + unsigned count = util_last_bit(st->cb_mask); + struct agx_ptr ptr = agx_pool_alloc_aligned(&batch->pool, count * sizeof(uint64_t), 8); + uint64_t *addresses = ptr.cpu; + + for (unsigned i = 0; i < count; ++i) { + struct pipe_constant_buffer *cb = &st->cb[i]; + addresses[i] = agx_const_buffer_ptr(batch, cb); + } + + return ptr.gpu; + } + + case AGX_PUSH_VBO_BASES: { + unsigned count = util_last_bit(ctx->vb_mask); + struct agx_ptr ptr = agx_pool_alloc_aligned(&batch->pool, count * sizeof(uint64_t), 8); + uint64_t *addresses = ptr.cpu; + + u_foreach_bit(i, ctx->vb_mask) { + struct pipe_vertex_buffer vb = ctx->vertex_buffers[i]; + assert(!vb.is_user_buffer); + + struct agx_bo *bo = agx_resource(vb.buffer.resource)->bo; + agx_batch_add_bo(batch, bo); + + addresses[i] = bo->ptr.gpu + vb.buffer_offset; + } + + return ptr.gpu; + } + + case AGX_PUSH_BLEND_CONST: + { + return agx_pool_upload_aligned(&batch->pool, &ctx->blend_color, + sizeof(ctx->blend_color), 8); + } + + default: + unreachable("todo: push more"); + } +} + +uint64_t +agx_push_location(struct agx_context *ctx, struct agx_push push, + enum pipe_shader_type stage) +{ + uint64_t direct = agx_push_location_direct(ctx, push, stage); + struct agx_pool *pool = &ctx->batch->pool; + + if (push.indirect) + return agx_pool_upload(pool, &direct, sizeof(direct)); + else + return direct; +} diff --git a/lib/mesa/src/gallium/drivers/asahi/magic.c b/lib/mesa/src/gallium/drivers/asahi/magic.c new file mode 100644 index 000000000..fa56ede4f --- /dev/null +++ b/lib/mesa/src/gallium/drivers/asahi/magic.c @@ -0,0 +1,211 @@ +/* + * Copyright 2021 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +# +#include <stdint.h> +#include "agx_state.h" +#include "magic.h" + +/* The structures managed in this file appear to be software defined (either in + * the macOS kernel driver or in the AGX firmware) */ + +/* Odd pattern */ +static uint64_t +demo_unk6(struct agx_pool *pool) +{ + struct agx_ptr ptr = agx_pool_alloc_aligned(pool, 0x4000 * sizeof(uint64_t), 64); + uint64_t *buf = ptr.cpu; + memset(buf, 0, sizeof(*buf)); + + for (unsigned i = 1; i < 0x3ff; ++i) + buf[i] = (i + 1); + + return ptr.gpu; +} + +static uint64_t +demo_zero(struct agx_pool *pool, unsigned count) +{ + struct agx_ptr ptr = agx_pool_alloc_aligned(pool, count, 64); + memset(ptr.cpu, 0, count); + return ptr.gpu; +} + +unsigned +demo_cmdbuf(uint64_t *buf, size_t size, + struct agx_pool *pool, + uint64_t encoder_ptr, + uint64_t encoder_id, + uint64_t scissor_ptr, + unsigned width, unsigned height, + uint32_t pipeline_null, + uint32_t pipeline_clear, + uint32_t pipeline_store, + uint64_t rt0, + bool clear_pipeline_textures) +{ + uint32_t *map = (uint32_t *) buf; + memset(map, 0, 474 * 4); + + map[54] = 0x6b0003; + map[55] = 0x3a0012; + map[56] = 1; + + map[106] = 1; + map[108] = 0x1c; + map[112] = 0xffffffff; + map[113] = 0xffffffff; + map[114] = 0xffffffff; + + uint64_t unk_buffer = demo_zero(pool, 0x1000); + uint64_t unk_buffer_2 = demo_zero(pool, 0x8000); + + // This is a pipeline bind + map[156] = 0xffff8002 | (clear_pipeline_textures ? 0x210 : 0); + map[158] = pipeline_clear | 0x4; + map[163] = 0x12; + map[164] = pipeline_store | 0x4; + map[166] = scissor_ptr & 0xFFFFFFFF; + map[167] = scissor_ptr >> 32; + map[168] = unk_buffer & 0xFFFFFFFF; + map[169] = unk_buffer >> 32; + + map[220] = 4; + map[222] = 0xc000; + map[224] = width; + map[225] = height; + map[226] = unk_buffer_2 & 0xFFFFFFFF; + map[227] = unk_buffer_2 >> 32; + + float depth_clear = 1.0; + uint8_t stencil_clear = 0; + + map[278] = fui(depth_clear); + map[279] = (0x3 << 8) | stencil_clear; + map[282] = 0x1000000; + map[284] = 0xffffffff; + map[285] = 0xffffffff; + map[286] = 0xffffffff; + + map[298] = 0xffff8212; + map[300] = pipeline_null | 0x4; + map[305] = 0x12; + map[306] = pipeline_store | 0x4; + map[352] = 1; + map[360] = 0x1c; + map[362] = encoder_id; + map[365] = 0xffffffff; + map[366] = 1; + + uint64_t unk6 = demo_unk6(pool); + map[370] = unk6 & 0xFFFFFFFF; + map[371] = unk6 >> 32; + + map[374] = width; + map[375] = height; + map[376] = 1; + map[377] = 8; + map[378] = 8; + + map[393] = 8; + map[394] = 32; + map[395] = 32; + map[396] = 1; + + unsigned offset_unk = (458 * 4); + unsigned offset_attachments = (470 * 4); + unsigned nr_attachments = 1; + + map[473] = nr_attachments; + + /* A single attachment follows, depth/stencil have their own attachments */ + agx_pack((map + (offset_attachments / 4) + 4), IOGPU_ATTACHMENT, cfg) { + cfg.address = rt0; + cfg.type = AGX_IOGPU_ATTACHMENT_TYPE_COLOUR; + cfg.unk_1 = 0x80000000; + cfg.unk_2 = 0x5; + cfg.bytes_per_pixel = 4; + cfg.percent = 100; + } + + unsigned total_size = offset_attachments + (AGX_IOGPU_ATTACHMENT_LENGTH * nr_attachments) + 16; + + agx_pack(map, IOGPU_HEADER, cfg) { + cfg.total_size = total_size; + cfg.attachment_offset_1 = offset_attachments; + cfg.attachment_offset_2 = offset_attachments; + cfg.attachment_length = nr_attachments * AGX_IOGPU_ATTACHMENT_LENGTH; + cfg.unknown_offset = offset_unk; + cfg.encoder = encoder_ptr; + } + + return total_size; +} + +static struct agx_map_header +demo_map_header(uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size, unsigned count) +{ + return (struct agx_map_header) { + .cmdbuf_id = cmdbuf_id, + .unk2 = 0x1, + .unk3 = 0x528, // 1320 + .encoder_id = encoder_id, + .unk6 = 0x0, + .cmdbuf_size = cmdbuf_size, + + /* +1 for the sentinel ending */ + .nr_entries = count + 1, + .nr_handles = count + 1, + .indices = {0x0b}, + }; +} + +void +demo_mem_map(void *map, size_t size, unsigned *handles, unsigned count, + uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size) +{ + struct agx_map_header *header = map; + struct agx_map_entry *entries = (struct agx_map_entry *) (((uint8_t *) map) + 0x40); + struct agx_map_entry *end = (struct agx_map_entry *) (((uint8_t *) map) + size); + + /* Header precedes the entry */ + *header = demo_map_header(cmdbuf_id, encoder_id, cmdbuf_size, count); + + /* Add an entry for each BO mapped */ + for (unsigned i = 0; i < count; ++i) { + assert((entries + i) < end); + entries[i] = (struct agx_map_entry) { + .unkAAA = 0x20, + .unkBBB = 0x1, + .unka = 0x1ffff, + .indices = {handles[i]} + }; + } + + /* Final entry is a sentinel */ + assert((entries + count) < end); + entries[count] = (struct agx_map_entry) { + .unkAAA = 0x40, + .unkBBB = 0x1, + .unka = 0x1ffff, + }; +} diff --git a/lib/mesa/src/gallium/drivers/asahi/magic.h b/lib/mesa/src/gallium/drivers/asahi/magic.h new file mode 100644 index 000000000..98215d367 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/asahi/magic.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2021 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __ASAHI_MAGIC_H +#define __ASAHI_MAGIC_H + +unsigned +demo_cmdbuf(uint64_t *buf, size_t size, + struct agx_pool *pool, + uint64_t encoder_ptr, + uint64_t encoder_id, + uint64_t scissor_ptr, + unsigned width, unsigned height, + uint32_t pipeline_null, + uint32_t pipeline_clear, + uint32_t pipeline_store, + uint64_t rt0, + bool clear_pipeline_textures); + +void +demo_mem_map(void *map, size_t size, unsigned *handles, + unsigned count, uint64_t cmdbuf_id, uint64_t + encoder_id, unsigned cmdbuf_size); + +#endif diff --git a/lib/mesa/src/gallium/drivers/asahi/meson.build b/lib/mesa/src/gallium/drivers/asahi/meson.build new file mode 100644 index 000000000..24da3bc10 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/asahi/meson.build @@ -0,0 +1,41 @@ +# Copyright © 2017 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +files_asahi = files( + 'agx_blit.c', + 'agx_pipe.c', + 'agx_state.c', + 'agx_uniforms.c', + 'magic.c', +) + +libasahi = static_library( + 'asahi', + files_asahi, + include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src], + c_args : [c_msvc_compat_args], + gnu_symbol_visibility : 'hidden', + dependencies : [idep_nir, idep_agx_pack], +) + +driver_asahi = declare_dependency( + compile_args : '-DGALLIUM_ASAHI', + link_with : [libasahi, libasahi_compiler, libasahi_lib, libasahi_decode] +) |