/* * Copyright 2021 Alyssa Rosenzweig * Copyright 2019-2020 Collabora, Ltd. * Copyright 2014-2017 Broadcom * Copyright 2010 Red Hat Inc. * SPDX-License-Identifier: MIT */ #include "agx_state.h" #include #include #include "asahi/compiler/agx_compile.h" #include "asahi/lib/agx_formats.h" #include "asahi/lib/agx_helpers.h" #include "asahi/lib/agx_pack.h" #include "asahi/lib/agx_ppp.h" #include "asahi/lib/agx_usc.h" #include "compiler/nir/nir.h" #include "compiler/nir/nir_serialize.h" #include "gallium/auxiliary/nir/tgsi_to_nir.h" #include "gallium/auxiliary/tgsi/tgsi_from_mesa.h" #include "gallium/auxiliary/util/u_blend.h" #include "gallium/auxiliary/util/u_draw.h" #include "gallium/auxiliary/util/u_framebuffer.h" #include "gallium/auxiliary/util/u_helpers.h" #include "gallium/auxiliary/util/u_viewport.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" #include "pipe/p_state.h" #include "util/compiler.h" #include "util/format_srgb.h" #include "util/half_float.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_prim.h" #include "util/u_resource.h" #include "util/u_transfer.h" #include "util/u_upload_mgr.h" #include "agx_disk_cache.h" static struct pipe_stream_output_target * agx_create_stream_output_target(struct pipe_context *pctx, struct pipe_resource *prsc, unsigned buffer_offset, unsigned buffer_size) { struct pipe_stream_output_target *target; target = &rzalloc(pctx, struct agx_streamout_target)->base; if (!target) return NULL; pipe_reference_init(&target->reference, 1); pipe_resource_reference(&target->buffer, prsc); target->context = pctx; target->buffer_offset = buffer_offset; target->buffer_size = buffer_size; return target; } static void agx_stream_output_target_destroy(struct pipe_context *pctx, struct pipe_stream_output_target *target) { pipe_resource_reference(&target->buffer, NULL); ralloc_free(target); } static void agx_set_stream_output_targets(struct pipe_context *pctx, unsigned num_targets, struct pipe_stream_output_target **targets, const unsigned *offsets) { struct agx_context *ctx = agx_context(pctx); struct agx_streamout *so = &ctx->streamout; assert(num_targets <= ARRAY_SIZE(so->targets)); for (unsigned i = 0; i < num_targets; i++) { if (offsets[i] != -1) agx_so_target(targets[i])->offset = offsets[i]; pipe_so_target_reference(&so->targets[i], targets[i]); } for (unsigned i = 0; i < so->num_targets; i++) pipe_so_target_reference(&so->targets[i], NULL); so->num_targets = num_targets; } static void agx_set_shader_images(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start_slot, unsigned count, unsigned unbind_num_trailing_slots, const struct pipe_image_view *iviews) { struct agx_context *ctx = agx_context(pctx); ctx->stage[shader].dirty = ~0; /* Unbind start_slot...start_slot+count */ if (!iviews) { for (int i = start_slot; i < start_slot + count + unbind_num_trailing_slots; i++) { pipe_resource_reference(&ctx->stage[shader].images[i].resource, NULL); } ctx->stage[shader].image_mask &= ~(((1ull << count) - 1) << start_slot); return; } /* Bind start_slot...start_slot+count */ for (int i = 0; i < count; i++) { const struct pipe_image_view *image = &iviews[i]; if (image->resource) ctx->stage[shader].image_mask |= BITFIELD_BIT(start_slot + i); else ctx->stage[shader].image_mask &= ~BITFIELD_BIT(start_slot + i); if (!image->resource) { util_copy_image_view(&ctx->stage[shader].images[start_slot + i], NULL); continue; } /* FIXME: Decompress here once we have texture compression */ util_copy_image_view(&ctx->stage[shader].images[start_slot + i], image); } /* Unbind start_slot+count...start_slot+count+unbind_num_trailing_slots */ for (int i = 0; i < unbind_num_trailing_slots; i++) { ctx->stage[shader].image_mask &= ~BITFIELD_BIT(start_slot + count + i); util_copy_image_view(&ctx->stage[shader].images[start_slot + count + i], NULL); } } static void agx_set_shader_buffers(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned count, const struct pipe_shader_buffer *buffers, unsigned writable_bitmask) { struct agx_context *ctx = agx_context(pctx); util_set_shader_buffers_mask(ctx->stage[shader].ssbo, &ctx->stage[shader].ssbo_mask, buffers, start, count); ctx->stage[shader].dirty = ~0; } static void agx_set_blend_color(struct pipe_context *pctx, const struct pipe_blend_color *state) { struct agx_context *ctx = agx_context(pctx); if (state) memcpy(&ctx->blend_color, state, sizeof(*state)); ctx->stage[PIPE_SHADER_FRAGMENT].dirty = ~0; } static void * agx_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) { struct agx_blend *so = CALLOC_STRUCT(agx_blend); assert(!state->alpha_to_coverage); assert(!state->alpha_to_one); if (state->logicop_enable) { so->logicop_enable = true; so->logicop_func = state->logicop_func; } for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) { unsigned rti = state->independent_blend_enable ? i : 0; struct pipe_rt_blend_state rt = state->rt[rti]; if (state->logicop_enable) { /* No blending, but we get the colour mask below */ } else if (!rt.blend_enable) { static const nir_lower_blend_channel replace = { .func = BLEND_FUNC_ADD, .src_factor = BLEND_FACTOR_ZERO, .invert_src_factor = true, .dst_factor = BLEND_FACTOR_ZERO, .invert_dst_factor = false, }; so->rt[i].rgb = replace; so->rt[i].alpha = replace; } else { so->rt[i].rgb.func = util_blend_func_to_shader(rt.rgb_func); so->rt[i].rgb.src_factor = util_blend_factor_to_shader(rt.rgb_src_factor); so->rt[i].rgb.invert_src_factor = util_blend_factor_is_inverted(rt.rgb_src_factor); so->rt[i].rgb.dst_factor = util_blend_factor_to_shader(rt.rgb_dst_factor); so->rt[i].rgb.invert_dst_factor = util_blend_factor_is_inverted(rt.rgb_dst_factor); so->rt[i].alpha.func = util_blend_func_to_shader(rt.alpha_func); so->rt[i].alpha.src_factor = util_blend_factor_to_shader(rt.alpha_src_factor); so->rt[i].alpha.invert_src_factor = util_blend_factor_is_inverted(rt.alpha_src_factor); so->rt[i].alpha.dst_factor = util_blend_factor_to_shader(rt.alpha_dst_factor); so->rt[i].alpha.invert_dst_factor = util_blend_factor_is_inverted(rt.alpha_dst_factor); so->blend_enable = true; } so->rt[i].colormask = rt.colormask; if (rt.colormask) so->store |= (PIPE_CLEAR_COLOR0 << i); } return so; } static void agx_bind_blend_state(struct pipe_context *pctx, void *cso) { struct agx_context *ctx = agx_context(pctx); ctx->blend = cso; ctx->dirty |= AGX_DIRTY_BLEND; } static const enum agx_stencil_op agx_stencil_ops[PIPE_STENCIL_OP_INVERT + 1] = { [PIPE_STENCIL_OP_KEEP] = AGX_STENCIL_OP_KEEP, [PIPE_STENCIL_OP_ZERO] = AGX_STENCIL_OP_ZERO, [PIPE_STENCIL_OP_REPLACE] = AGX_STENCIL_OP_REPLACE, [PIPE_STENCIL_OP_INCR] = AGX_STENCIL_OP_INCR_SAT, [PIPE_STENCIL_OP_DECR] = AGX_STENCIL_OP_DECR_SAT, [PIPE_STENCIL_OP_INCR_WRAP] = AGX_STENCIL_OP_INCR_WRAP, [PIPE_STENCIL_OP_DECR_WRAP] = AGX_STENCIL_OP_DECR_WRAP, [PIPE_STENCIL_OP_INVERT] = AGX_STENCIL_OP_INVERT, }; static void agx_pack_stencil(struct agx_fragment_stencil_packed *out, struct pipe_stencil_state st) { if (st.enabled) { agx_pack(out, FRAGMENT_STENCIL, cfg) { cfg.compare = (enum agx_zs_func)st.func; cfg.write_mask = st.writemask; cfg.read_mask = st.valuemask; cfg.depth_pass = agx_stencil_ops[st.zpass_op]; cfg.depth_fail = agx_stencil_ops[st.zfail_op]; cfg.stencil_fail = agx_stencil_ops[st.fail_op]; } } else { agx_pack(out, FRAGMENT_STENCIL, cfg) { cfg.compare = AGX_ZS_FUNC_ALWAYS; cfg.write_mask = 0xFF; cfg.read_mask = 0xFF; cfg.depth_pass = AGX_STENCIL_OP_KEEP; cfg.depth_fail = AGX_STENCIL_OP_KEEP; cfg.stencil_fail = AGX_STENCIL_OP_KEEP; } } } static void * agx_create_zsa_state(struct pipe_context *ctx, const struct pipe_depth_stencil_alpha_state *state) { struct agx_zsa *so = CALLOC_STRUCT(agx_zsa); assert(!state->depth_bounds_test && "todo"); so->base = *state; /* Z func can be used as-is */ STATIC_ASSERT((enum agx_zs_func)PIPE_FUNC_NEVER == AGX_ZS_FUNC_NEVER); STATIC_ASSERT((enum agx_zs_func)PIPE_FUNC_LESS == AGX_ZS_FUNC_LESS); STATIC_ASSERT((enum agx_zs_func)PIPE_FUNC_EQUAL == AGX_ZS_FUNC_EQUAL); STATIC_ASSERT((enum agx_zs_func)PIPE_FUNC_LEQUAL == AGX_ZS_FUNC_LEQUAL); STATIC_ASSERT((enum agx_zs_func)PIPE_FUNC_GREATER == AGX_ZS_FUNC_GREATER); STATIC_ASSERT((enum agx_zs_func)PIPE_FUNC_NOTEQUAL == AGX_ZS_FUNC_NOT_EQUAL); STATIC_ASSERT((enum agx_zs_func)PIPE_FUNC_GEQUAL == AGX_ZS_FUNC_GEQUAL); STATIC_ASSERT((enum agx_zs_func)PIPE_FUNC_ALWAYS == AGX_ZS_FUNC_ALWAYS); agx_pack(&so->depth, FRAGMENT_FACE, cfg) { cfg.depth_function = state->depth_enabled ? ((enum agx_zs_func)state->depth_func) : AGX_ZS_FUNC_ALWAYS; cfg.disable_depth_write = !state->depth_writemask; } agx_pack_stencil(&so->front_stencil, state->stencil[0]); if (state->stencil[1].enabled) { agx_pack_stencil(&so->back_stencil, state->stencil[1]); } else { /* One sided stencil */ so->back_stencil = so->front_stencil; } if (state->depth_enabled) { if (state->depth_func != PIPE_FUNC_NEVER && state->depth_func != PIPE_FUNC_ALWAYS) { so->load |= PIPE_CLEAR_DEPTH; } if (state->depth_writemask) so->store |= PIPE_CLEAR_DEPTH; } if (state->stencil[0].enabled) { so->load |= PIPE_CLEAR_STENCIL; /* TODO: Optimize */ so->store |= PIPE_CLEAR_STENCIL; } return so; } static void agx_bind_zsa_state(struct pipe_context *pctx, void *cso) { struct agx_context *ctx = agx_context(pctx); ctx->zs = cso; ctx->dirty |= AGX_DIRTY_ZS; } static enum agx_polygon_mode agx_translate_polygon_mode(unsigned mode) { switch (mode) { case PIPE_POLYGON_MODE_FILL: return AGX_POLYGON_MODE_FILL; case PIPE_POLYGON_MODE_POINT: return AGX_POLYGON_MODE_POINT; case PIPE_POLYGON_MODE_LINE: return AGX_POLYGON_MODE_LINE; default: unreachable("Unsupported polygon mode"); } } static void * agx_create_rs_state(struct pipe_context *ctx, const struct pipe_rasterizer_state *cso) { struct agx_rasterizer *so = CALLOC_STRUCT(agx_rasterizer); so->base = *cso; /* Line width is packed in a 4:4 fixed point format */ unsigned line_width_fixed = ((unsigned)(cso->line_width * 16.0f)) - 1; /* Clamp to maximum line width */ so->line_width = MIN2(line_width_fixed, 0xFF); agx_pack(so->cull, CULL, cfg) { cfg.cull_front = cso->cull_face & PIPE_FACE_FRONT; cfg.cull_back = cso->cull_face & PIPE_FACE_BACK; cfg.front_face_ccw = cso->front_ccw; cfg.depth_clip = cso->depth_clip_near; cfg.depth_clamp = !cso->depth_clip_near; cfg.flat_shading_vertex = cso->flatshade_first ? AGX_PPP_VERTEX_0 : AGX_PPP_VERTEX_2; cfg.rasterizer_discard = cso->rasterizer_discard; }; /* Two-sided polygon mode doesn't seem to work on G13. Apple's OpenGL * implementation lowers to multiple draws with culling. Warn. */ if (unlikely(cso->fill_front != cso->fill_back)) { agx_msg("Warning: Two-sided fill modes are unsupported, " "rendering may be incorrect.\n"); } so->polygon_mode = agx_translate_polygon_mode(cso->fill_front); return so; } static void agx_bind_rasterizer_state(struct pipe_context *pctx, void *cso) { struct agx_context *ctx = agx_context(pctx); struct agx_rasterizer *so = cso; bool base_cso_changed = (cso == NULL) || (ctx->rast == NULL); /* Check if scissor or depth bias state has changed, since scissor/depth bias * enable is part of the rasterizer state but everything else needed for * scissors and depth bias is part of the scissor/depth bias arrays */ bool scissor_zbias_changed = base_cso_changed || (ctx->rast->base.scissor != so->base.scissor) || (ctx->rast->base.offset_tri != so->base.offset_tri); ctx->rast = so; ctx->dirty |= AGX_DIRTY_RS; if (scissor_zbias_changed) ctx->dirty |= AGX_DIRTY_SCISSOR_ZBIAS; if (base_cso_changed || (ctx->rast->base.sprite_coord_mode != so->base.sprite_coord_mode)) ctx->dirty |= AGX_DIRTY_SPRITE_COORD_MODE; } static enum agx_wrap agx_wrap_from_pipe(enum pipe_tex_wrap in) { switch (in) { case PIPE_TEX_WRAP_REPEAT: return AGX_WRAP_REPEAT; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return AGX_WRAP_CLAMP_TO_EDGE; case PIPE_TEX_WRAP_MIRROR_REPEAT: return AGX_WRAP_MIRRORED_REPEAT; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return AGX_WRAP_CLAMP_TO_BORDER; case PIPE_TEX_WRAP_CLAMP: return AGX_WRAP_CLAMP_GL; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return AGX_WRAP_MIRRORED_CLAMP_TO_EDGE; default: unreachable("Invalid wrap mode"); } } static enum agx_mip_filter agx_mip_filter_from_pipe(enum pipe_tex_mipfilter in) { switch (in) { case PIPE_TEX_MIPFILTER_NEAREST: return AGX_MIP_FILTER_NEAREST; case PIPE_TEX_MIPFILTER_LINEAR: return AGX_MIP_FILTER_LINEAR; case PIPE_TEX_MIPFILTER_NONE: return AGX_MIP_FILTER_NONE; } unreachable("Invalid mip filter"); } static const enum agx_compare_func agx_compare_funcs[PIPE_FUNC_ALWAYS + 1] = { [PIPE_FUNC_NEVER] = AGX_COMPARE_FUNC_NEVER, [PIPE_FUNC_LESS] = AGX_COMPARE_FUNC_LESS, [PIPE_FUNC_EQUAL] = AGX_COMPARE_FUNC_EQUAL, [PIPE_FUNC_LEQUAL] = AGX_COMPARE_FUNC_LEQUAL, [PIPE_FUNC_GREATER] = AGX_COMPARE_FUNC_GREATER, [PIPE_FUNC_NOTEQUAL] = AGX_COMPARE_FUNC_NOT_EQUAL, [PIPE_FUNC_GEQUAL] = AGX_COMPARE_FUNC_GEQUAL, [PIPE_FUNC_ALWAYS] = AGX_COMPARE_FUNC_ALWAYS, }; static enum pipe_format fixup_border_zs(enum pipe_format orig, union pipe_color_union *c) { switch (orig) { case PIPE_FORMAT_Z24_UNORM_S8_UINT: case PIPE_FORMAT_Z24X8_UNORM: /* Z24 is internally promoted to Z32F via transfer_helper. These formats * are normalized so should get clamped, but Z32F does not get clamped, so * we clamp here. */ c->f[0] = SATURATE(c->f[0]); return PIPE_FORMAT_Z32_FLOAT; case PIPE_FORMAT_X24S8_UINT: case PIPE_FORMAT_X32_S8X24_UINT: /* Separate stencil is internally promoted */ return PIPE_FORMAT_S8_UINT; default: return orig; } } static void * agx_create_sampler_state(struct pipe_context *pctx, const struct pipe_sampler_state *state) { struct agx_sampler_state *so = CALLOC_STRUCT(agx_sampler_state); so->base = *state; /* We report a max texture LOD bias of 16, so clamp appropriately */ float lod_bias = CLAMP(state->lod_bias, -16.0, 16.0); so->lod_bias_as_fp16 = _mesa_float_to_half(lod_bias); agx_pack(&so->desc, SAMPLER, cfg) { cfg.minimum_lod = state->min_lod; cfg.maximum_lod = state->max_lod; cfg.maximum_anisotropy = util_next_power_of_two(MAX2(state->max_anisotropy, 1)); cfg.magnify_linear = (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR); cfg.minify_linear = (state->min_img_filter == PIPE_TEX_FILTER_LINEAR); cfg.mip_filter = agx_mip_filter_from_pipe(state->min_mip_filter); cfg.wrap_s = agx_wrap_from_pipe(state->wrap_s); cfg.wrap_t = agx_wrap_from_pipe(state->wrap_t); cfg.wrap_r = agx_wrap_from_pipe(state->wrap_r); cfg.pixel_coordinates = state->unnormalized_coords; cfg.compare_func = agx_compare_funcs[state->compare_func]; cfg.compare_enable = state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE; cfg.seamful_cube_maps = !state->seamless_cube_map; if (state->border_color_format != PIPE_FORMAT_NONE) { /* TODO: Optimize to use compact descriptors for black/white borders */ so->uses_custom_border = true; cfg.border_colour = AGX_BORDER_COLOUR_CUSTOM; } } if (so->uses_custom_border) { union pipe_color_union border = state->border_color; enum pipe_format format = fixup_border_zs(state->border_color_format, &border); agx_pack_border(&so->border, border.ui, format); } return so; } static void agx_delete_sampler_state(struct pipe_context *ctx, void *state) { struct agx_sampler_state *so = state; FREE(so); } static void agx_bind_sampler_states(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned count, void **states) { struct agx_context *ctx = agx_context(pctx); ctx->stage[shader].dirty = ~0; for (unsigned i = 0; i < count; i++) { unsigned p = start + i; ctx->stage[shader].samplers[p] = states ? states[i] : NULL; if (ctx->stage[shader].samplers[p]) ctx->stage[shader].valid_samplers |= BITFIELD_BIT(p); else ctx->stage[shader].valid_samplers &= ~BITFIELD_BIT(p); } ctx->stage[shader].sampler_count = util_last_bit(ctx->stage[shader].valid_samplers); /* Recalculate whether we need custom borders */ ctx->stage[shader].custom_borders = false; u_foreach_bit(i, ctx->stage[shader].valid_samplers) { if (ctx->stage[shader].samplers[i]->uses_custom_border) ctx->stage[shader].custom_borders = true; } } /* Channels agree for RGBA but are weird for force 0/1 */ static enum agx_channel agx_channel_from_pipe(enum pipe_swizzle in) { STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_X == AGX_CHANNEL_R); STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_Y == AGX_CHANNEL_G); STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_Z == AGX_CHANNEL_B); STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_W == AGX_CHANNEL_A); STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_0 & 0x4); STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_1 & 0x4); STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_NONE & 0x4); if ((in & 0x4) == 0) return (enum agx_channel)in; else if (in == PIPE_SWIZZLE_1) return AGX_CHANNEL_1; else return AGX_CHANNEL_0; } static enum agx_layout agx_translate_layout(enum ail_tiling tiling) { switch (tiling) { case AIL_TILING_TWIDDLED: case AIL_TILING_TWIDDLED_COMPRESSED: return AGX_LAYOUT_TWIDDLED; case AIL_TILING_LINEAR: return AGX_LAYOUT_LINEAR; } unreachable("Invalid tiling"); } static enum agx_texture_dimension agx_translate_tex_dim(enum pipe_texture_target dim, unsigned samples) { assert(samples >= 1); switch (dim) { case PIPE_BUFFER: case PIPE_TEXTURE_1D: /* Lowered to 2D */ assert(samples == 1); return AGX_TEXTURE_DIMENSION_2D; case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_2D: return samples > 1 ? AGX_TEXTURE_DIMENSION_2D_MULTISAMPLED : AGX_TEXTURE_DIMENSION_2D; case PIPE_TEXTURE_1D_ARRAY: assert(samples == 1); /* Lowered to 2D */ FALLTHROUGH; case PIPE_TEXTURE_2D_ARRAY: return samples > 1 ? AGX_TEXTURE_DIMENSION_2D_ARRAY_MULTISAMPLED : AGX_TEXTURE_DIMENSION_2D_ARRAY; case PIPE_TEXTURE_3D: assert(samples == 1); return AGX_TEXTURE_DIMENSION_3D; case PIPE_TEXTURE_CUBE: assert(samples == 1); return AGX_TEXTURE_DIMENSION_CUBE; default: unreachable("Unsupported texture dimension"); } } static enum agx_sample_count agx_translate_sample_count(unsigned samples) { switch (samples) { case 2: return AGX_SAMPLE_COUNT_2; case 4: return AGX_SAMPLE_COUNT_4; default: unreachable("Invalid sample count"); } } static void agx_pack_texture(void *out, struct agx_resource *rsrc, enum pipe_format format /* override */, const struct pipe_sampler_view *state, bool include_bo) { const struct util_format_description *desc = util_format_description(format); assert(agx_is_valid_pixel_format(format)); uint8_t format_swizzle[4] = { desc->swizzle[0], desc->swizzle[1], desc->swizzle[2], desc->swizzle[3], }; if (util_format_has_stencil(desc)) { assert(!util_format_has_depth(desc) && "separate stencil always used"); /* Broadcast stencil */ format_swizzle[0] = 0; format_swizzle[1] = 0; format_swizzle[2] = 0; format_swizzle[3] = 0; } /* We only have a single swizzle for the user swizzle and the format fixup, * so compose them now. */ uint8_t out_swizzle[4]; uint8_t view_swizzle[4] = {state->swizzle_r, state->swizzle_g, state->swizzle_b, state->swizzle_a}; util_format_compose_swizzles(format_swizzle, view_swizzle, out_swizzle); unsigned first_layer = (state->target == PIPE_BUFFER) ? 0 : state->u.tex.first_layer; /* Pack the descriptor into GPU memory */ agx_pack(out, TEXTURE, cfg) { cfg.dimension = agx_translate_tex_dim(state->target, util_res_sample_count(&rsrc->base)); cfg.layout = agx_translate_layout(rsrc->layout.tiling); cfg.channels = agx_pixel_format[format].channels; cfg.type = agx_pixel_format[format].type; cfg.swizzle_r = agx_channel_from_pipe(out_swizzle[0]); cfg.swizzle_g = agx_channel_from_pipe(out_swizzle[1]); cfg.swizzle_b = agx_channel_from_pipe(out_swizzle[2]); cfg.swizzle_a = agx_channel_from_pipe(out_swizzle[3]); if (state->target == PIPE_BUFFER) { unsigned size_el = agx_texture_buffer_size_el(format, state->u.buf.size); /* Use a 2D texture to increase the maximum size */ cfg.width = 1024; cfg.height = DIV_ROUND_UP(size_el, cfg.width); cfg.first_level = cfg.last_level = 0; /* Stash the actual size in an unused part of the texture descriptor, * which we'll read later to implement txs. */ cfg.acceleration_buffer = (size_el << 4); } else { cfg.width = rsrc->base.width0; cfg.height = rsrc->base.height0; cfg.first_level = state->u.tex.first_level; cfg.last_level = state->u.tex.last_level; } cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); cfg.unk_mipmapped = rsrc->mipmapped; cfg.srgb_2_channel = cfg.srgb && util_format_colormask(desc) == 0x3; if (ail_is_compressed(&rsrc->layout)) { cfg.compressed_1 = true; cfg.extended = true; } if (include_bo) { cfg.address = agx_map_texture_gpu(rsrc, first_layer); if (state->target == PIPE_BUFFER) cfg.address += state->u.buf.offset; if (ail_is_compressed(&rsrc->layout)) { cfg.acceleration_buffer = agx_map_texture_gpu(rsrc, 0) + rsrc->layout.metadata_offset_B + (first_layer * rsrc->layout.compression_layer_stride_B); } } if (state->target == PIPE_TEXTURE_3D) { cfg.depth = rsrc->base.depth0; } else if (state->target == PIPE_BUFFER) { cfg.depth = 1; } else { unsigned layers = state->u.tex.last_layer - state->u.tex.first_layer + 1; if ((state->target == PIPE_TEXTURE_CUBE) || (state->target == PIPE_TEXTURE_CUBE_ARRAY)) layers /= 6; if (rsrc->layout.tiling == AIL_TILING_LINEAR && state->target == PIPE_TEXTURE_2D_ARRAY) { cfg.depth_linear = layers; cfg.layer_stride_linear = (rsrc->layout.layer_stride_B - 0x80); cfg.extended = true; } else { assert((rsrc->layout.tiling != AIL_TILING_LINEAR) || (layers == 1)); cfg.depth = layers; } } if (rsrc->base.nr_samples > 1) cfg.samples = agx_translate_sample_count(rsrc->base.nr_samples); if (state->target == PIPE_BUFFER) { cfg.stride = (cfg.width * util_format_get_blocksize(format)) - 16; } else if (rsrc->layout.tiling == AIL_TILING_LINEAR) { cfg.stride = ail_get_linear_stride_B(&rsrc->layout, 0) - 16; } else { assert(rsrc->layout.tiling == AIL_TILING_TWIDDLED || rsrc->layout.tiling == AIL_TILING_TWIDDLED_COMPRESSED); cfg.page_aligned_layers = rsrc->layout.page_aligned_layers; } } } static struct pipe_sampler_view * agx_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *orig_texture, const struct pipe_sampler_view *state) { struct agx_resource *rsrc = agx_resource(orig_texture); struct agx_sampler_view *so = CALLOC_STRUCT(agx_sampler_view); if (!so) return NULL; struct pipe_resource *texture = orig_texture; enum pipe_format format = state->format; const struct util_format_description *desc = util_format_description(format); /* Separate stencil always used on G13, so we need to fix up for Z32S8 */ if (util_format_has_stencil(desc) && rsrc->separate_stencil) { if (util_format_has_depth(desc)) { /* Reinterpret as the depth-only part */ format = util_format_get_depth_only(format); } else { /* Use the stencil-only-part */ rsrc = rsrc->separate_stencil; texture = &rsrc->base; format = texture->format; } } /* Save off the resource that we actually use, with the stencil fixed up */ so->rsrc = rsrc; agx_pack_texture(&so->desc, rsrc, format, state, false); so->base = *state; so->base.texture = NULL; pipe_resource_reference(&so->base.texture, orig_texture); pipe_reference_init(&so->base.reference, 1); so->base.context = pctx; return &so->base; } static void agx_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned count, unsigned unbind_num_trailing_slots, bool take_ownership, struct pipe_sampler_view **views) { struct agx_context *ctx = agx_context(pctx); unsigned new_nr = 0; unsigned i; assert(start == 0); if (!views) count = 0; for (i = 0; i < count; ++i) { if (views[i]) new_nr = i + 1; if (take_ownership) { pipe_sampler_view_reference( (struct pipe_sampler_view **)&ctx->stage[shader].textures[i], NULL); ctx->stage[shader].textures[i] = (struct agx_sampler_view *)views[i]; } else { pipe_sampler_view_reference( (struct pipe_sampler_view **)&ctx->stage[shader].textures[i], views[i]); } } for (; i < ctx->stage[shader].texture_count; i++) { pipe_sampler_view_reference( (struct pipe_sampler_view **)&ctx->stage[shader].textures[i], NULL); } ctx->stage[shader].texture_count = new_nr; ctx->stage[shader].dirty = ~0; } static void agx_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *pview) { struct agx_sampler_view *view = (struct agx_sampler_view *)pview; pipe_resource_reference(&view->base.texture, NULL); FREE(view); } static struct pipe_surface * agx_create_surface(struct pipe_context *ctx, struct pipe_resource *texture, const struct pipe_surface *surf_tmpl) { struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); if (!surface) return NULL; pipe_reference_init(&surface->reference, 1); pipe_resource_reference(&surface->texture, texture); surface->context = ctx; surface->format = surf_tmpl->format; surface->width = texture->width0; surface->height = texture->height0; surface->texture = texture; surface->u.tex.first_layer = surf_tmpl->u.tex.first_layer; surface->u.tex.last_layer = surf_tmpl->u.tex.last_layer; surface->u.tex.level = surf_tmpl->u.tex.level; return surface; } static void agx_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) { } static void agx_set_polygon_stipple(struct pipe_context *ctx, const struct pipe_poly_stipple *state) { } static void agx_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) { struct agx_context *ctx = agx_context(pipe); ctx->sample_mask = sample_mask; } static void agx_set_scissor_states(struct pipe_context *pctx, unsigned start_slot, unsigned num_scissors, const struct pipe_scissor_state *scissor) { struct agx_context *ctx = agx_context(pctx); assert(start_slot == 0 && "no geometry shaders"); assert(num_scissors == 1 && "no geometry shaders"); ctx->scissor = *scissor; ctx->dirty |= AGX_DIRTY_SCISSOR_ZBIAS; } static void agx_set_stencil_ref(struct pipe_context *pctx, const struct pipe_stencil_ref state) { struct agx_context *ctx = agx_context(pctx); ctx->stencil_ref = state; ctx->dirty |= AGX_DIRTY_STENCIL_REF; } static void agx_set_viewport_states(struct pipe_context *pctx, unsigned start_slot, unsigned num_viewports, const struct pipe_viewport_state *vp) { struct agx_context *ctx = agx_context(pctx); assert(start_slot == 0 && "no geometry shaders"); assert(num_viewports == 1 && "no geometry shaders"); ctx->dirty |= AGX_DIRTY_VIEWPORT; ctx->viewport = *vp; } static void agx_get_scissor_extents(const struct pipe_viewport_state *vp, const struct pipe_scissor_state *ss, const struct pipe_framebuffer_state *fb, unsigned *minx, unsigned *miny, unsigned *maxx, unsigned *maxy) { float trans_x = vp->translate[0], trans_y = vp->translate[1]; float abs_scale_x = fabsf(vp->scale[0]), abs_scale_y = fabsf(vp->scale[1]); /* Calculate the extent of the viewport. Note if a particular dimension of * the viewport is an odd number of pixels, both the translate and the scale * will have a fractional part of 0.5, so adding and subtracting them yields * an integer. Therefore we don't need to round explicitly */ *minx = CLAMP((int)(trans_x - abs_scale_x), 0, fb->width); *miny = CLAMP((int)(trans_y - abs_scale_y), 0, fb->height); *maxx = CLAMP((int)(trans_x + abs_scale_x), 0, fb->width); *maxy = CLAMP((int)(trans_y + abs_scale_y), 0, fb->height); if (ss) { *minx = MAX2(ss->minx, *minx); *miny = MAX2(ss->miny, *miny); *maxx = MIN2(ss->maxx, *maxx); *maxy = MIN2(ss->maxy, *maxy); } } static void agx_upload_viewport_scissor(struct agx_pool *pool, struct agx_batch *batch, uint8_t **out, const struct pipe_viewport_state *vp, const struct pipe_scissor_state *ss) { unsigned minx, miny, maxx, maxy; agx_get_scissor_extents(vp, ss, &batch->key, &minx, &miny, &maxx, &maxy); assert(maxx > minx && maxy > miny); float minz, maxz; util_viewport_zmin_zmax(vp, false, &minz, &maxz); /* Allocate a new scissor descriptor */ unsigned index = batch->scissor.size / AGX_SCISSOR_LENGTH; void *ptr = util_dynarray_grow_bytes(&batch->scissor, 1, AGX_SCISSOR_LENGTH); agx_pack(ptr, SCISSOR, cfg) { cfg.min_x = minx; cfg.min_y = miny; cfg.min_z = minz; cfg.max_x = maxx; cfg.max_y = maxy; cfg.max_z = maxz; } /* Upload state */ struct agx_ppp_update ppp = agx_new_ppp_update(pool, (struct AGX_PPP_HEADER){ .depth_bias_scissor = true, .region_clip = true, .viewport = true, }); agx_ppp_push(&ppp, DEPTH_BIAS_SCISSOR, cfg) { cfg.scissor = index; /* Use the current depth bias, we allocate linearly */ unsigned count = batch->depth_bias.size / AGX_DEPTH_BIAS_LENGTH; cfg.depth_bias = count ? count - 1 : 0; }; agx_ppp_push(&ppp, REGION_CLIP, cfg) { cfg.enable = true; cfg.min_x = minx / 32; cfg.min_y = miny / 32; cfg.max_x = DIV_ROUND_UP(maxx, 32); cfg.max_y = DIV_ROUND_UP(maxy, 32); } agx_ppp_push(&ppp, VIEWPORT, cfg) { cfg.translate_x = vp->translate[0]; cfg.translate_y = vp->translate[1]; cfg.translate_z = vp->translate[2]; cfg.scale_x = vp->scale[0]; cfg.scale_y = vp->scale[1]; cfg.scale_z = vp->scale[2]; } agx_ppp_fini(out, &ppp); } static void agx_upload_depth_bias(struct agx_batch *batch, const struct pipe_rasterizer_state *rast) { void *ptr = util_dynarray_grow_bytes(&batch->depth_bias, 1, AGX_DEPTH_BIAS_LENGTH); agx_pack(ptr, DEPTH_BIAS, cfg) { cfg.depth_bias = rast->offset_units; cfg.slope_scale = rast->offset_scale; cfg.clamp = rast->offset_clamp; } } /* A framebuffer state can be reused across batches, so it doesn't make sense * to add surfaces to the BO list here. Instead we added them when flushing. */ static void agx_set_framebuffer_state(struct pipe_context *pctx, const struct pipe_framebuffer_state *state) { struct agx_context *ctx = agx_context(pctx); if (!state) return; util_copy_framebuffer_state(&ctx->framebuffer, state); ctx->batch = NULL; agx_dirty_all(ctx); } uint64_t agx_batch_upload_pbe(struct agx_batch *batch, unsigned rt) { struct pipe_surface *surf = batch->key.cbufs[rt]; struct agx_resource *tex = agx_resource(surf->texture); const struct util_format_description *desc = util_format_description(surf->format); unsigned level = surf->u.tex.level; unsigned layer = surf->u.tex.first_layer; assert(surf->u.tex.last_layer == layer); struct agx_ptr T = agx_pool_alloc_aligned(&batch->pool, AGX_RENDER_TARGET_LENGTH, 256); agx_pack(T.cpu, RENDER_TARGET, cfg) { cfg.dimension = agx_translate_tex_dim(PIPE_TEXTURE_2D, util_res_sample_count(&tex->base)); cfg.layout = agx_translate_layout(tex->layout.tiling); cfg.channels = agx_pixel_format[surf->format].channels; cfg.type = agx_pixel_format[surf->format].type; assert(desc->nr_channels >= 1 && desc->nr_channels <= 4); cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]) & 3; if (desc->nr_channels >= 2) cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]) & 3; if (desc->nr_channels >= 3) cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]) & 3; if (desc->nr_channels >= 4) cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]) & 3; cfg.width = surf->texture->width0; cfg.height = surf->texture->height0; cfg.level = surf->u.tex.level; cfg.buffer = agx_map_texture_gpu(tex, layer); cfg.unk_mipmapped = tex->mipmapped; if (ail_is_compressed(&tex->layout)) { cfg.compressed_1 = true; cfg.extended = true; cfg.acceleration_buffer = agx_map_texture_gpu(tex, 0) + tex->layout.metadata_offset_B + (layer * tex->layout.compression_layer_stride_B); } if (tex->base.nr_samples > 1) cfg.samples = agx_translate_sample_count(tex->base.nr_samples); if (tex->layout.tiling == AIL_TILING_LINEAR) { cfg.stride = ail_get_linear_stride_B(&tex->layout, level) - 4; cfg.levels = 1; } else { cfg.page_aligned_layers = tex->layout.page_aligned_layers; cfg.levels = tex->base.last_level + 1; } }; return T.gpu; } /* Likewise constant buffers, textures, and samplers are handled in a common * per-draw path, with dirty tracking to reduce the costs involved. */ static void agx_set_constant_buffer(struct pipe_context *pctx, enum pipe_shader_type shader, uint index, bool take_ownership, const struct pipe_constant_buffer *cb) { struct agx_context *ctx = agx_context(pctx); struct agx_stage *s = &ctx->stage[shader]; struct pipe_constant_buffer *constants = &s->cb[index]; util_copy_constant_buffer(&s->cb[index], cb, take_ownership); /* Upload user buffer immediately */ if (constants->user_buffer && !constants->buffer) { u_upload_data(ctx->base.const_uploader, 0, constants->buffer_size, 64, constants->user_buffer, &constants->buffer_offset, &constants->buffer); } unsigned mask = (1 << index); if (cb) s->cb_mask |= mask; else s->cb_mask &= ~mask; ctx->stage[shader].dirty = ~0; } static void agx_surface_destroy(struct pipe_context *ctx, struct pipe_surface *surface) { pipe_resource_reference(&surface->texture, NULL); FREE(surface); } static void agx_delete_state(struct pipe_context *ctx, void *state) { FREE(state); } /* BOs added to the batch in the uniform upload path */ static void agx_set_vertex_buffers(struct pipe_context *pctx, unsigned start_slot, unsigned count, unsigned unbind_num_trailing_slots, bool take_ownership, const struct pipe_vertex_buffer *buffers) { struct agx_context *ctx = agx_context(pctx); util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, count, unbind_num_trailing_slots, take_ownership); ctx->dirty |= AGX_DIRTY_VERTEX; ctx->stage[PIPE_SHADER_VERTEX].dirty = ~0; } static void * agx_create_vertex_elements(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_element *state) { assert(count <= AGX_MAX_ATTRIBS); struct agx_attribute *attribs = calloc(sizeof(*attribs), AGX_MAX_ATTRIBS); for (unsigned i = 0; i < count; ++i) { const struct pipe_vertex_element ve = state[i]; const struct util_format_description *desc = util_format_description(ve.src_format); unsigned chan_size = desc->channel[0].size / 8; assert((ve.src_offset & (chan_size - 1)) == 0); attribs[i] = (struct agx_attribute){ .buf = ve.vertex_buffer_index, .src_offset = ve.src_offset, .format = ve.src_format, .divisor = ve.instance_divisor, }; } return attribs; } static void agx_bind_vertex_elements_state(struct pipe_context *pctx, void *cso) { struct agx_context *ctx = agx_context(pctx); ctx->attributes = cso; ctx->dirty |= AGX_DIRTY_VERTEX; } static uint32_t asahi_vs_shader_key_hash(const void *key) { return _mesa_hash_data(key, sizeof(struct asahi_vs_shader_key)); } static bool asahi_vs_shader_key_equal(const void *a, const void *b) { return memcmp(a, b, sizeof(struct asahi_vs_shader_key)) == 0; } static uint32_t asahi_fs_shader_key_hash(const void *key) { return _mesa_hash_data(key, sizeof(struct asahi_fs_shader_key)); } static bool asahi_fs_shader_key_equal(const void *a, const void *b) { return memcmp(a, b, sizeof(struct asahi_fs_shader_key)) == 0; } /* No compute variants */ static uint32_t asahi_cs_shader_key_hash(const void *key) { return 0; } static bool asahi_cs_shader_key_equal(const void *a, const void *b) { return true; } static unsigned agx_find_linked_slot(struct agx_varyings_vs *vs, struct agx_varyings_fs *fs, gl_varying_slot slot, unsigned offset) { assert(offset < 4); assert(slot != VARYING_SLOT_PNTC && "point coords aren't linked"); if (slot == VARYING_SLOT_POS) { if (offset == 3) { return 0; /* W */ } else if (offset == 2) { assert(fs->reads_z); return 1; /* Z */ } else { unreachable("gl_Position.xy are not varyings"); } } unsigned vs_index = vs->slots[slot]; assert(vs_index >= 4 && "gl_Position should have been the first 4 slots"); assert(vs_index < vs->nr_index && "varyings not written by vertex shader are undefined"); assert((vs_index < vs->base_index_fp16) == ((vs_index + offset) < vs->base_index_fp16) && "a given varying must have a consistent type"); unsigned vs_user_index = (vs_index + offset) - 4; if (fs->reads_z) return vs_user_index + 2; else return vs_user_index + 1; } static unsigned agx_num_general_outputs(struct agx_varyings_vs *vs) { unsigned nr_vs = vs->nr_index; bool writes_psiz = vs->slots[VARYING_SLOT_PSIZ] < nr_vs; assert(nr_vs >= 4 && "gl_Position must be written"); if (writes_psiz) assert(nr_vs >= 5 && "gl_PointSize is written"); return nr_vs - (writes_psiz ? 5 : 4); } static uint32_t agx_link_varyings_vs_fs(struct agx_pool *pool, struct agx_varyings_vs *vs, struct agx_varyings_fs *fs, bool first_provoking_vertex) { /* If there are no bindings, there's nothing to emit */ if (fs->nr_bindings == 0) return 0; size_t linkage_size = AGX_CF_BINDING_HEADER_LENGTH + (fs->nr_bindings * AGX_CF_BINDING_LENGTH); void *tmp = alloca(linkage_size); struct agx_cf_binding_header_packed *header = tmp; struct agx_cf_binding_packed *bindings = (void *)(header + 1); unsigned nr_slots = agx_num_general_outputs(vs) + 1 + (fs->reads_z ? 1 : 0); agx_pack(header, CF_BINDING_HEADER, cfg) { cfg.number_of_32_bit_slots = nr_slots; cfg.number_of_coefficient_registers = fs->nr_cf; } for (unsigned i = 0; i < fs->nr_bindings; ++i) { agx_pack(bindings + i, CF_BINDING, cfg) { cfg.base_coefficient_register = fs->bindings[i].cf_base; cfg.components = fs->bindings[i].count; cfg.perspective = fs->bindings[i].perspective; cfg.shade_model = fs->bindings[i].smooth ? AGX_SHADE_MODEL_GOURAUD : first_provoking_vertex ? AGX_SHADE_MODEL_FLAT_VERTEX_0 : AGX_SHADE_MODEL_FLAT_VERTEX_2; if (fs->bindings[i].slot == VARYING_SLOT_PNTC) { assert(fs->bindings[i].offset == 0); cfg.point_sprite = true; } else { cfg.base_slot = agx_find_linked_slot(vs, fs, fs->bindings[i].slot, fs->bindings[i].offset); assert(cfg.base_slot + cfg.components <= nr_slots && "overflow slots"); } if (fs->bindings[i].slot == VARYING_SLOT_POS) { if (fs->bindings[i].offset == 2) cfg.fragcoord_z = true; else assert(!cfg.perspective && "W must not be perspective divided"); } assert(cfg.base_coefficient_register + cfg.components <= fs->nr_cf && "overflowed coefficient registers"); } } struct agx_ptr ptr = agx_pool_alloc_aligned(pool, (3 * linkage_size), 256); assert(ptr.gpu < (1ull << 32) && "varyings must be in low memory"); /* I don't understand why the data structures are repeated thrice */ for (unsigned i = 0; i < 3; ++i) { memcpy(((uint8_t *)ptr.cpu) + (i * linkage_size), (uint8_t *)tmp, linkage_size); } return ptr.gpu; } /* Does not take ownership of key. Clones if necessary. */ static struct agx_compiled_shader * agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so, struct util_debug_callback *debug, union asahi_shader_key *key_) { struct agx_compiled_shader *compiled = CALLOC_STRUCT(agx_compiled_shader); struct util_dynarray binary; util_dynarray_init(&binary, NULL); nir_shader *nir = nir_shader_clone(NULL, so->nir); /* This can happen at inopportune times and cause jank, log it */ perf_debug(dev, "Compiling shader variant #%u", _mesa_hash_table_num_entries(so->variants)); bool force_translucent = false; if (nir->info.stage == MESA_SHADER_VERTEX) { struct asahi_vs_shader_key *key = &key_->vs; NIR_PASS_V(nir, agx_nir_lower_vbo, &key->vbuf); } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { struct asahi_fs_shader_key *key = &key_->fs; struct agx_tilebuffer_layout tib = agx_build_tilebuffer_layout(key->rt_formats, key->nr_cbufs, 1); nir_lower_blend_options opts = { .scalar_blend_const = true, .logicop_enable = key->blend.logicop_enable, .logicop_func = key->blend.logicop_func, }; static_assert(ARRAY_SIZE(opts.format) == PIPE_MAX_COLOR_BUFS, "max RTs out of sync"); for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) opts.format[i] = key->rt_formats[i]; memcpy(opts.rt, key->blend.rt, sizeof(opts.rt)); /* It's more efficient to use masked stores (with * agx_nir_lower_tilebuffer) than to emulate colour masking with * nir_lower_blend. */ uint8_t colormasks[PIPE_MAX_COLOR_BUFS] = {0}; for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) { if (agx_tilebuffer_supports_mask(&tib, i)) { colormasks[i] = key->blend.rt[i].colormask; opts.rt[i].colormask = BITFIELD_MASK(4); } else { colormasks[i] = BITFIELD_MASK(4); } } NIR_PASS_V(nir, nir_lower_blend, &opts); NIR_PASS_V(nir, agx_nir_lower_tilebuffer, &tib, colormasks, &force_translucent); if (key->sprite_coord_enable) { NIR_PASS_V(nir, nir_lower_texcoord_replace_late, key->sprite_coord_enable, false /* point coord is sysval */); } if (key->clip_plane_enable) { NIR_PASS_V(nir, nir_lower_clip_fs, key->clip_plane_enable, false); } } struct agx_shader_key base_key = {0}; NIR_PASS_V(nir, agx_nir_lower_sysvals, compiled, &base_key.reserved_preamble); agx_compile_shader_nir(nir, &base_key, debug, &binary, &compiled->info); /* reads_tib => Translucent pass type */ compiled->info.reads_tib |= force_translucent; /* Could be optimized to use non-translucent pass types with the appropriate * HSR configuration, but that mechanism is not yet understood. Warn that * we're leaving perf on the table when used. */ if (force_translucent) perf_debug(dev, "Translucency forced due to colour masking"); if (binary.size) { compiled->bo = agx_bo_create(dev, binary.size, AGX_BO_EXEC | AGX_BO_LOW_VA, "Executable"); memcpy(compiled->bo->ptr.cpu, binary.data, binary.size); } ralloc_free(nir); util_dynarray_fini(&binary); return compiled; } static struct agx_compiled_shader * agx_get_shader_variant(struct agx_screen *screen, struct agx_uncompiled_shader *so, struct util_debug_callback *debug, union asahi_shader_key *key) { struct agx_compiled_shader *compiled = agx_disk_cache_retrieve(screen, so, key); if (!compiled) { compiled = agx_compile_variant(&screen->dev, so, debug, key); agx_disk_cache_store(screen->disk_cache, so, key, compiled); } /* key may be destroyed after we return, so clone it before using it as a * hash table key. The clone is logically owned by the hash table. */ union asahi_shader_key *cloned_key = rzalloc(so->variants, union asahi_shader_key); if (so->type == PIPE_SHADER_FRAGMENT) { memcpy(cloned_key, key, sizeof(struct asahi_fs_shader_key)); } else if (so->type == PIPE_SHADER_VERTEX) { memcpy(cloned_key, key, sizeof(struct asahi_vs_shader_key)); } else { assert(gl_shader_stage_is_compute(so->type)); /* No key */ } _mesa_hash_table_insert(so->variants, cloned_key, compiled); return compiled; } static void * agx_create_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *cso) { struct agx_uncompiled_shader *so = rzalloc(NULL, struct agx_uncompiled_shader); struct agx_device *dev = agx_device(pctx->screen); if (!so) return NULL; so->base = *cso; nir_shader *nir = cso->type == PIPE_SHADER_IR_NIR ? cso->ir.nir : tgsi_to_nir(cso->tokens, pctx->screen, false); /* The driver gets ownership of the nir_shader for graphics. The NIR is * ralloc'd. Free the NIR when we free the uncompiled shader. */ ralloc_steal(so, nir); if (nir->info.stage == MESA_SHADER_VERTEX) { so->variants = _mesa_hash_table_create(NULL, asahi_vs_shader_key_hash, asahi_vs_shader_key_equal); } else { so->variants = _mesa_hash_table_create(NULL, asahi_fs_shader_key_hash, asahi_fs_shader_key_equal); } so->type = pipe_shader_type_from_mesa(nir->info.stage); struct blob blob; blob_init(&blob); nir_serialize(&blob, nir, true); _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1); blob_finish(&blob); so->nir = nir; agx_preprocess_nir(nir, true); /* For shader-db, precompile a shader with a default key. This could be * improved but hopefully this is acceptable for now. */ if (dev->debug & AGX_DBG_PRECOMPILE) { union asahi_shader_key key = {0}; switch (so->nir->info.stage) { case MESA_SHADER_VERTEX: { key.vs.vbuf.count = AGX_MAX_VBUFS; for (unsigned i = 0; i < AGX_MAX_VBUFS; ++i) { key.vs.vbuf.strides[i] = 16; key.vs.vbuf.attributes[i] = (struct agx_attribute){ .buf = i, .format = PIPE_FORMAT_R32G32B32A32_FLOAT, }; } break; } case MESA_SHADER_FRAGMENT: key.fs.nr_cbufs = 1; for (unsigned i = 0; i < key.fs.nr_cbufs; ++i) { key.fs.rt_formats[i] = PIPE_FORMAT_R8G8B8A8_UNORM; key.fs.blend.rt[i].colormask = 0xF; const nir_lower_blend_channel replace = { .func = BLEND_FUNC_ADD, .src_factor = BLEND_FACTOR_ZERO, .invert_src_factor = true, .dst_factor = BLEND_FACTOR_ZERO, .invert_dst_factor = false, }; key.fs.blend.rt[i].rgb = replace; key.fs.blend.rt[i].alpha = replace; } break; default: unreachable("Unknown shader stage in shader-db precompile"); } agx_compile_variant(dev, so, &pctx->debug, &key); } return so; } static void * agx_create_compute_state(struct pipe_context *pctx, const struct pipe_compute_state *cso) { struct agx_uncompiled_shader *so = rzalloc(NULL, struct agx_uncompiled_shader); if (!so) return NULL; so->static_shared_mem = cso->static_shared_mem; so->variants = _mesa_hash_table_create(NULL, asahi_cs_shader_key_hash, asahi_cs_shader_key_equal); union asahi_shader_key key = {0}; assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported"); nir_shader *nir = nir_shader_clone(NULL, cso->prog); so->type = pipe_shader_type_from_mesa(nir->info.stage); struct blob blob; blob_init(&blob); nir_serialize(&blob, nir, true); _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1); blob_finish(&blob); so->nir = nir; agx_preprocess_nir(nir, true); agx_get_shader_variant(agx_screen(pctx->screen), so, &pctx->debug, &key); /* We're done with the NIR, throw it away */ so->nir = NULL; ralloc_free(nir); return so; } /* Does not take ownership of key. Clones if necessary. */ static bool agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out, enum pipe_shader_type stage, union asahi_shader_key *key) { struct agx_uncompiled_shader *so = ctx->stage[stage].shader; assert(so != NULL); struct hash_entry *he = _mesa_hash_table_search(so->variants, key); if (he) { if ((*out) == he->data) return false; *out = he->data; return true; } struct agx_screen *screen = agx_screen(ctx->base.screen); *out = agx_get_shader_variant(screen, so, &ctx->base.debug, key); return true; } static bool agx_update_vs(struct agx_context *ctx) { /* Only proceed if the shader or anything the key depends on changes * * vb_mask, attributes, vertex_buffers: VERTEX */ if (!(ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_VERTEX))) return false; struct asahi_vs_shader_key key = { .vbuf.count = util_last_bit(ctx->vb_mask), }; memcpy(key.vbuf.attributes, ctx->attributes, sizeof(key.vbuf.attributes[0]) * AGX_MAX_ATTRIBS); u_foreach_bit(i, ctx->vb_mask) { key.vbuf.strides[i] = ctx->vertex_buffers[i].stride; } return agx_update_shader(ctx, &ctx->vs, PIPE_SHADER_VERTEX, (union asahi_shader_key *)&key); } static bool agx_update_fs(struct agx_batch *batch) { struct agx_context *ctx = batch->ctx; /* Only proceed if the shader or anything the key depends on changes * * batch->key: implicitly dirties everything, no explicit check * rast: RS * blend: BLEND */ if (!(ctx->dirty & (AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | AGX_DIRTY_BLEND))) return false; struct asahi_fs_shader_key key = { .nr_cbufs = batch->key.nr_cbufs, .clip_plane_enable = ctx->rast->base.clip_plane_enable, }; if (batch->reduced_prim == PIPE_PRIM_POINTS) key.sprite_coord_enable = ctx->rast->base.sprite_coord_enable; for (unsigned i = 0; i < key.nr_cbufs; ++i) { struct pipe_surface *surf = batch->key.cbufs[i]; key.rt_formats[i] = surf ? surf->format : PIPE_FORMAT_NONE; } memcpy(&key.blend, ctx->blend, sizeof(key.blend)); return agx_update_shader(ctx, &ctx->fs, PIPE_SHADER_FRAGMENT, (union asahi_shader_key *)&key); } static void agx_bind_shader_state(struct pipe_context *pctx, void *cso) { if (!cso) return; struct agx_context *ctx = agx_context(pctx); struct agx_uncompiled_shader *so = cso; if (so->type == PIPE_SHADER_VERTEX) ctx->dirty |= AGX_DIRTY_VS_PROG; else if (so->type == PIPE_SHADER_FRAGMENT) ctx->dirty |= AGX_DIRTY_FS_PROG; ctx->stage[so->type].shader = so; } static void agx_delete_compiled_shader(struct hash_entry *ent) { struct agx_compiled_shader *so = ent->data; agx_bo_unreference(so->bo); FREE(so); } static void agx_delete_shader_state(struct pipe_context *ctx, void *cso) { struct agx_uncompiled_shader *so = cso; _mesa_hash_table_destroy(so->variants, agx_delete_compiled_shader); ralloc_free(so); } static unsigned sampler_count(struct agx_context *ctx, struct agx_compiled_shader *cs, enum pipe_shader_type stage) { unsigned nr_samplers = ctx->stage[stage].sampler_count; if (cs->info.needs_dummy_sampler) nr_samplers = MAX2(nr_samplers, 1); return nr_samplers; } static inline enum agx_sampler_states translate_sampler_state_count(struct agx_context *ctx, struct agx_compiled_shader *cs, enum pipe_shader_type stage) { return agx_translate_sampler_state_count(sampler_count(ctx, cs, stage), ctx->stage[stage].custom_borders); } /* * Despite having both a layout *and* a flag that I only see Metal use with null * textures, AGX doesn't seem to have "real" null textures. Instead we need to * bind an arbitrary address and throw away the results to read all 0's. * Accordingly, the caller must pass some address that lives at least as long as * the texture descriptor itself. */ static void agx_set_null_texture(struct agx_texture_packed *tex, uint64_t valid_address) { agx_pack(tex, TEXTURE, cfg) { cfg.layout = AGX_LAYOUT_NULL; cfg.channels = AGX_CHANNELS_R8; cfg.type = AGX_TEXTURE_TYPE_UNORM /* don't care */; cfg.swizzle_r = AGX_CHANNEL_0; cfg.swizzle_g = AGX_CHANNEL_0; cfg.swizzle_b = AGX_CHANNEL_0; cfg.swizzle_a = AGX_CHANNEL_0; cfg.address = valid_address; cfg.null = true; } } static uint32_t agx_build_pipeline(struct agx_batch *batch, struct agx_compiled_shader *cs, enum pipe_shader_type stage, unsigned variable_shared_mem) { struct agx_context *ctx = batch->ctx; unsigned nr_textures = cs->info.nr_bindful_textures; unsigned nr_active_textures = ctx->stage[stage].texture_count; unsigned nr_samplers = sampler_count(ctx, cs, stage); bool custom_borders = ctx->stage[stage].custom_borders; struct agx_ptr T_tex = agx_pool_alloc_aligned( &batch->pool, AGX_TEXTURE_LENGTH * nr_textures, 64); size_t sampler_length = AGX_SAMPLER_LENGTH + (custom_borders ? AGX_BORDER_LENGTH : 0); struct agx_ptr T_samp = agx_pool_alloc_aligned(&batch->pool, sampler_length * nr_samplers, 64); struct agx_texture_packed *textures = T_tex.cpu; /* TODO: Dirty track me to save some CPU cycles and maybe improve caching */ for (unsigned i = 0; i < MIN2(nr_textures, nr_active_textures); ++i) { struct agx_sampler_view *tex = ctx->stage[stage].textures[i]; if (tex == NULL) { agx_set_null_texture(&textures[i], T_tex.gpu); continue; } struct agx_resource *rsrc = tex->rsrc; agx_batch_reads(batch, tex->rsrc); unsigned first_layer = (tex->base.target == PIPE_BUFFER) ? 0 : tex->base.u.tex.first_layer; /* Without the address */ struct agx_texture_packed texture = tex->desc; /* Just the address */ struct agx_texture_packed texture2; agx_pack(&texture2, TEXTURE, cfg) { cfg.address = agx_map_texture_gpu(rsrc, first_layer); if (rsrc->base.target == PIPE_BUFFER) cfg.address += tex->base.u.buf.offset; if (ail_is_compressed(&rsrc->layout)) { cfg.acceleration_buffer = agx_map_texture_gpu(rsrc, 0) + rsrc->layout.metadata_offset_B + (first_layer * rsrc->layout.compression_layer_stride_B); } } agx_merge(texture, texture2, TEXTURE); textures[i] = texture; } for (unsigned i = nr_active_textures; i < nr_textures; ++i) agx_set_null_texture(&textures[i], T_tex.gpu); /* TODO: Dirty track me to save some CPU cycles and maybe improve caching */ uint8_t *out_sampler = T_samp.cpu; if (nr_samplers && ctx->stage[stage].sampler_count == 0) { /* Configuration is irrelevant for the dummy sampler */ agx_pack(out_sampler, SAMPLER, cfg) ; } else { for (unsigned i = 0; i < nr_samplers; ++i) { struct agx_sampler_state *sampler = ctx->stage[stage].samplers[i]; struct agx_sampler_packed *out = (struct agx_sampler_packed *)out_sampler; if (sampler) { *out = sampler->desc; if (custom_borders) { memcpy(out_sampler + AGX_SAMPLER_LENGTH, &sampler->border, AGX_BORDER_LENGTH); } else { assert(!sampler->uses_custom_border && "invalid combination"); } } else { memset(out, 0, sampler_length); } out_sampler += sampler_length; } } struct agx_usc_builder b = agx_alloc_usc_control(&batch->pipeline_pool, cs->push_range_count + 2); if (nr_textures) { agx_usc_pack(&b, TEXTURE, cfg) { cfg.start = 0; cfg.count = nr_textures; cfg.buffer = T_tex.gpu; } } if (nr_samplers) { agx_usc_pack(&b, SAMPLER, cfg) { cfg.start = 0; cfg.count = nr_samplers; cfg.buffer = T_samp.gpu; } } /* Must only upload uniforms after uploading textures so we can implement the * AGX_PUSH_TEXTURE_BASE sysval correctly. */ uint64_t uniform_tables[AGX_NUM_SYSVAL_TABLES] = { agx_upload_uniforms(batch, T_tex.gpu, stage), ctx->grid_info, }; for (unsigned i = 0; i < cs->push_range_count; ++i) { agx_usc_uniform(&b, cs->push[i].uniform, cs->push[i].length, uniform_tables[cs->push[i].table] + cs->push[i].offset); } if (stage == PIPE_SHADER_FRAGMENT) { agx_usc_tilebuffer(&b, &batch->tilebuffer_layout); } else if (stage == PIPE_SHADER_COMPUTE) { unsigned size = ctx->stage[PIPE_SHADER_COMPUTE].shader->static_shared_mem + variable_shared_mem; agx_usc_pack(&b, SHARED, cfg) { cfg.layout = AGX_SHARED_LAYOUT_VERTEX_COMPUTE; cfg.bytes_per_threadgroup = size > 0 ? size : 65536; cfg.uses_shared_memory = size > 0; } } else { agx_usc_shared_none(&b); } agx_usc_pack(&b, SHADER, cfg) { if (stage == PIPE_SHADER_FRAGMENT) cfg.loads_varyings = cs->info.varyings.fs.nr_bindings > 0; cfg.code = cs->bo->ptr.gpu + cs->info.main_offset; cfg.unk_2 = (stage == PIPE_SHADER_FRAGMENT) ? 2 : 3; } agx_usc_pack(&b, REGISTERS, cfg) { cfg.register_count = cs->info.nr_gprs; cfg.unk_1 = (stage == PIPE_SHADER_FRAGMENT); } if (stage == PIPE_SHADER_FRAGMENT) { agx_usc_pack(&b, FRAGMENT_PROPERTIES, cfg) { bool writes_sample_mask = ctx->fs->info.writes_sample_mask; cfg.early_z_testing = !writes_sample_mask; cfg.unk_4 = 0x2; cfg.unk_5 = 0x0; } } if (cs->info.has_preamble) { agx_usc_pack(&b, PRESHADER, cfg) { cfg.code = cs->bo->ptr.gpu + cs->info.preamble_offset; } } else { agx_usc_pack(&b, NO_PRESHADER, cfg) ; } return agx_usc_fini(&b); } uint64_t agx_build_meta(struct agx_batch *batch, bool store, bool partial_render) { struct agx_context *ctx = batch->ctx; /* Construct the key */ struct agx_meta_key key = {.tib = batch->tilebuffer_layout}; for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) { struct pipe_surface *surf = batch->key.cbufs[rt]; if (surf == NULL) continue; if (store) { /* TODO: Suppress stores to discarded render targets */ key.op[rt] = AGX_META_OP_STORE; } else { struct agx_resource *rsrc = agx_resource(surf->texture); bool valid = agx_resource_valid(rsrc, surf->u.tex.level); bool clear = (batch->clear & (PIPE_CLEAR_COLOR0 << rt)); bool load = valid && !clear; /* The background program used for partial renders must always load * whatever was stored in the mid-frame end-of-tile program. */ load |= partial_render; key.op[rt] = load ? AGX_META_OP_LOAD : clear ? AGX_META_OP_CLEAR : AGX_META_OP_NONE; } } /* Get the shader */ struct agx_meta_shader *shader = agx_get_meta_shader(&ctx->meta, &key); agx_batch_add_bo(batch, shader->bo); /* Begin building the pipeline */ struct agx_usc_builder b = agx_alloc_usc_control(&batch->pipeline_pool, 1 + PIPE_MAX_COLOR_BUFS); bool needs_sampler = false; for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) { if (key.op[rt] == AGX_META_OP_LOAD) { /* Each reloaded render target is textured */ struct agx_ptr texture = agx_pool_alloc_aligned(&batch->pool, AGX_TEXTURE_LENGTH, 64); struct pipe_surface *surf = batch->key.cbufs[rt]; assert(surf != NULL && "cannot load nonexistent attachment"); struct agx_resource *rsrc = agx_resource(surf->texture); agx_pack_texture(texture.cpu, rsrc, surf->format, &(struct pipe_sampler_view){ /* To reduce shader variants, we always use a 2D * texture. For reloads of arrays and cube maps, we * map a single layer as a 2D image. */ .target = PIPE_TEXTURE_2D, .swizzle_r = PIPE_SWIZZLE_X, .swizzle_g = PIPE_SWIZZLE_Y, .swizzle_b = PIPE_SWIZZLE_Z, .swizzle_a = PIPE_SWIZZLE_W, .u.tex = { .first_layer = surf->u.tex.first_layer, .last_layer = surf->u.tex.last_layer, .first_level = surf->u.tex.level, .last_level = surf->u.tex.level, }, }, true); agx_usc_pack(&b, TEXTURE, cfg) { cfg.start = rt; cfg.count = 1; cfg.buffer = texture.gpu; } needs_sampler = true; } else if (key.op[rt] == AGX_META_OP_CLEAR) { assert(batch->uploaded_clear_color[rt] && "set when cleared"); agx_usc_uniform(&b, 8 * rt, 8, batch->uploaded_clear_color[rt]); } else if (key.op[rt] == AGX_META_OP_STORE) { agx_usc_pack(&b, TEXTURE, cfg) { cfg.start = rt; cfg.count = 1; cfg.buffer = agx_batch_upload_pbe(batch, rt); } } } /* All render targets share a sampler */ if (needs_sampler) { struct agx_ptr sampler = agx_pool_alloc_aligned(&batch->pool, AGX_SAMPLER_LENGTH, 64); agx_pack(sampler.cpu, SAMPLER, cfg) { cfg.magnify_linear = true; cfg.minify_linear = false; cfg.mip_filter = AGX_MIP_FILTER_NONE; cfg.wrap_s = AGX_WRAP_CLAMP_TO_EDGE; cfg.wrap_t = AGX_WRAP_CLAMP_TO_EDGE; cfg.wrap_r = AGX_WRAP_CLAMP_TO_EDGE; cfg.pixel_coordinates = true; cfg.compare_func = AGX_COMPARE_FUNC_ALWAYS; } agx_usc_pack(&b, SAMPLER, cfg) { cfg.start = 0; cfg.count = 1; cfg.buffer = sampler.gpu; } } agx_usc_tilebuffer(&b, &batch->tilebuffer_layout); agx_usc_pack(&b, SHADER, cfg) { cfg.code = shader->ptr; cfg.unk_2 = 0; } agx_usc_pack(&b, REGISTERS, cfg) cfg.register_count = shader->info.nr_gprs; agx_usc_pack(&b, NO_PRESHADER, cfg) ; return agx_usc_fini(&b); } void agx_batch_init_state(struct agx_batch *batch) { /* Emit state on the batch that we don't change and so don't dirty track */ uint8_t *out = batch->encoder_current; struct agx_ppp_update ppp = agx_new_ppp_update(&batch->pool, (struct AGX_PPP_HEADER){ .w_clamp = true, .varying_word_1 = true, .cull_2 = true, .occlusion_query_2 = true, .output_unknown = true, .varying_word_2 = true, }); /* clang-format off */ agx_ppp_push(&ppp, W_CLAMP, cfg) cfg.w_clamp = 1e-10; agx_ppp_push(&ppp, VARYING_1, cfg); agx_ppp_push(&ppp, CULL_2, cfg); agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY_2, cfg); agx_ppp_push(&ppp, OUTPUT_UNKNOWN, cfg); agx_ppp_push(&ppp, VARYING_2, cfg); /* clang-format on */ agx_ppp_fini(&out, &ppp); batch->encoder_current = out; /* Choose a tilebuffer layout given the framebuffer key */ enum pipe_format formats[PIPE_MAX_COLOR_BUFS] = {0}; for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) { struct pipe_surface *surf = batch->key.cbufs[i]; if (surf) formats[i] = surf->format; } batch->tilebuffer_layout = agx_build_tilebuffer_layout( formats, batch->key.nr_cbufs, util_framebuffer_get_num_samples(&batch->key)); } static enum agx_object_type agx_point_object_type(struct agx_rasterizer *rast) { return (rast->base.sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) ? AGX_OBJECT_TYPE_POINT_SPRITE_UV01 : AGX_OBJECT_TYPE_POINT_SPRITE_UV10; } static enum agx_pass_type agx_pass_type_for_shader(struct agx_shader_info *info) { if (info->reads_tib && info->writes_sample_mask) return AGX_PASS_TYPE_TRANSLUCENT_PUNCH_THROUGH; else if (info->reads_tib) return AGX_PASS_TYPE_TRANSLUCENT; else if (info->writes_sample_mask) return AGX_PASS_TYPE_PUNCH_THROUGH; else return AGX_PASS_TYPE_OPAQUE; } static enum agx_conservative_depth agx_translate_depth_layout(enum gl_frag_depth_layout layout) { switch (layout) { case FRAG_DEPTH_LAYOUT_ANY: return AGX_CONSERVATIVE_DEPTH_ANY; case FRAG_DEPTH_LAYOUT_LESS: return AGX_CONSERVATIVE_DEPTH_LESS; case FRAG_DEPTH_LAYOUT_GREATER: return AGX_CONSERVATIVE_DEPTH_GREATER; case FRAG_DEPTH_LAYOUT_UNCHANGED: return AGX_CONSERVATIVE_DEPTH_UNCHANGED; default: unreachable("depth layout should have been canonicalized"); } } static void agx_ppp_fragment_face_2(struct agx_ppp_update *ppp, enum agx_object_type object_type, struct agx_shader_info *info) { agx_ppp_push(ppp, FRAGMENT_FACE_2, cfg) { cfg.object_type = object_type; cfg.conservative_depth = agx_translate_depth_layout(info->depth_layout); } } #define MAX_PPP_UPDATES 2 static uint8_t * agx_encode_state(struct agx_batch *batch, uint8_t *out, bool is_lines, bool is_points) { struct agx_context *ctx = batch->ctx; struct agx_rasterizer *rast = ctx->rast; unsigned ppp_updates = 0; #define IS_DIRTY(ST) !!(ctx->dirty & AGX_DIRTY_##ST) if (IS_DIRTY(VS)) { agx_pack(out, VDM_STATE, cfg) { cfg.vertex_shader_word_0_present = true; cfg.vertex_shader_word_1_present = true; cfg.vertex_outputs_present = true; cfg.vertex_unknown_present = true; } out += AGX_VDM_STATE_LENGTH; unsigned tex_count = ctx->vs->info.nr_bindful_textures; agx_pack(out, VDM_STATE_VERTEX_SHADER_WORD_0, cfg) { cfg.uniform_register_count = ctx->vs->info.push_count; cfg.preshader_register_count = ctx->vs->info.nr_preamble_gprs; cfg.texture_state_register_count = tex_count; cfg.sampler_state_register_count = translate_sampler_state_count(ctx, ctx->vs, PIPE_SHADER_VERTEX); } out += AGX_VDM_STATE_VERTEX_SHADER_WORD_0_LENGTH; agx_pack(out, VDM_STATE_VERTEX_SHADER_WORD_1, cfg) { cfg.pipeline = agx_build_pipeline(batch, ctx->vs, PIPE_SHADER_VERTEX, 0); } out += AGX_VDM_STATE_VERTEX_SHADER_WORD_1_LENGTH; agx_pack(out, VDM_STATE_VERTEX_OUTPUTS, cfg) { cfg.output_count_1 = ctx->vs->info.varyings.vs.nr_index; cfg.output_count_2 = cfg.output_count_1; } out += AGX_VDM_STATE_VERTEX_OUTPUTS_LENGTH; agx_pack(out, VDM_STATE_VERTEX_UNKNOWN, cfg) { cfg.flat_shading_control = ctx->rast->base.flatshade_first ? AGX_VDM_VERTEX_0 : AGX_VDM_VERTEX_2; cfg.unknown_4 = cfg.unknown_5 = ctx->rast->base.rasterizer_discard; } out += AGX_VDM_STATE_VERTEX_UNKNOWN_LENGTH; /* Pad up to a multiple of 8 bytes */ memset(out, 0, 4); out += 4; } struct agx_pool *pool = &batch->pool; struct agx_compiled_shader *vs = ctx->vs, *fs = ctx->fs; if ((ctx->dirty & AGX_DIRTY_RS) && ctx->rast->base.offset_tri) { agx_upload_depth_bias(batch, &ctx->rast->base); ctx->dirty |= AGX_DIRTY_SCISSOR_ZBIAS; } if (ctx->dirty & (AGX_DIRTY_VIEWPORT | AGX_DIRTY_SCISSOR_ZBIAS)) { agx_upload_viewport_scissor( pool, batch, &out, &ctx->viewport, ctx->rast->base.scissor ? &ctx->scissor : NULL); } bool varyings_dirty = false; if (IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG) || IS_DIRTY(RS)) { batch->varyings = agx_link_varyings_vs_fs( &batch->pipeline_pool, &ctx->vs->info.varyings.vs, &ctx->fs->info.varyings.fs, ctx->rast->base.flatshade_first); varyings_dirty = true; ppp_updates++; } bool object_type_dirty = IS_DIRTY(PRIM) || (is_points && IS_DIRTY(SPRITE_COORD_MODE)); bool fragment_face_dirty = IS_DIRTY(ZS) || IS_DIRTY(STENCIL_REF) || IS_DIRTY(RS); enum agx_object_type object_type = is_points ? agx_point_object_type(rast) : is_lines ? AGX_OBJECT_TYPE_LINE : AGX_OBJECT_TYPE_TRIANGLE; struct AGX_PPP_HEADER dirty = { .fragment_control = IS_DIRTY(ZS) || IS_DIRTY(RS) || IS_DIRTY(PRIM) || IS_DIRTY(QUERY), .fragment_control_2 = IS_DIRTY(PRIM) || IS_DIRTY(FS_PROG) || IS_DIRTY(RS), .fragment_front_face = fragment_face_dirty, .fragment_front_face_2 = object_type_dirty || IS_DIRTY(FS_PROG), .fragment_front_stencil = IS_DIRTY(ZS), .fragment_back_face = fragment_face_dirty, .fragment_back_face_2 = object_type_dirty || IS_DIRTY(FS_PROG), .fragment_back_stencil = IS_DIRTY(ZS), .output_select = IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG), .varying_word_0 = IS_DIRTY(VS_PROG), .cull = IS_DIRTY(RS), .fragment_shader = IS_DIRTY(FS) || varyings_dirty, .occlusion_query = IS_DIRTY(QUERY), .output_size = IS_DIRTY(VS_PROG), }; struct agx_ppp_update ppp = agx_new_ppp_update(pool, dirty); if (dirty.fragment_control) { agx_ppp_push(&ppp, FRAGMENT_CONTROL, cfg) { if (ctx->active_queries && ctx->occlusion_query) { if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER) cfg.visibility_mode = AGX_VISIBILITY_MODE_COUNTING; else cfg.visibility_mode = AGX_VISIBILITY_MODE_BOOLEAN; } cfg.stencil_test_enable = ctx->zs->base.stencil[0].enabled; cfg.two_sided_stencil = ctx->zs->base.stencil[1].enabled; cfg.depth_bias_enable = rast->base.offset_tri; /* Always enable scissoring so we may scissor to the viewport (TODO: * optimize this out if the viewport is the default and the app does * not use the scissor test) */ cfg.scissor_enable = true; } } if (dirty.fragment_control_2) { agx_ppp_push(&ppp, FRAGMENT_CONTROL, cfg) { /* This avoids broken derivatives along primitive edges */ cfg.disable_tri_merging = (is_lines || is_points || ctx->fs->info.disable_tri_merging); cfg.no_colour_output = ctx->fs->info.no_colour_output || ctx->rast->base.rasterizer_discard; cfg.pass_type = agx_pass_type_for_shader(&ctx->fs->info); } } if (dirty.fragment_front_face) { struct agx_fragment_face_packed front_face; agx_pack(&front_face, FRAGMENT_FACE, cfg) { cfg.stencil_reference = ctx->stencil_ref.ref_value[0]; cfg.line_width = rast->line_width; cfg.polygon_mode = rast->polygon_mode; }; front_face.opaque[0] |= ctx->zs->depth.opaque[0]; agx_ppp_push_packed(&ppp, &front_face, FRAGMENT_FACE); } if (dirty.fragment_front_face_2) agx_ppp_fragment_face_2(&ppp, object_type, &ctx->fs->info); if (dirty.fragment_front_stencil) { agx_ppp_push_packed(&ppp, ctx->zs->front_stencil.opaque, FRAGMENT_STENCIL); } if (dirty.fragment_back_face) { struct agx_fragment_face_packed back_face; agx_pack(&back_face, FRAGMENT_FACE, cfg) { bool twosided = ctx->zs->base.stencil[1].enabled; cfg.stencil_reference = ctx->stencil_ref.ref_value[twosided ? 1 : 0]; cfg.line_width = rast->line_width; cfg.polygon_mode = rast->polygon_mode; }; back_face.opaque[0] |= ctx->zs->depth.opaque[0]; agx_ppp_push_packed(&ppp, &back_face, FRAGMENT_FACE); } if (dirty.fragment_back_face_2) agx_ppp_fragment_face_2(&ppp, object_type, &ctx->fs->info); if (dirty.fragment_back_stencil) agx_ppp_push_packed(&ppp, ctx->zs->back_stencil.opaque, FRAGMENT_STENCIL); if (dirty.output_select) { agx_ppp_push(&ppp, OUTPUT_SELECT, cfg) { cfg.varyings = !!fs->info.varyings.fs.nr_bindings; cfg.point_size = vs->info.writes_psiz; cfg.frag_coord_z = fs->info.varyings.fs.reads_z; } } if (dirty.varying_word_0) { agx_ppp_push(&ppp, VARYING_0, cfg) { cfg.count = agx_num_general_outputs(&ctx->vs->info.varyings.vs); } } if (dirty.cull) agx_ppp_push_packed(&ppp, ctx->rast->cull, CULL); if (dirty.fragment_shader) { unsigned frag_tex_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count; agx_ppp_push(&ppp, FRAGMENT_SHADER, cfg) { cfg.pipeline = agx_build_pipeline(batch, ctx->fs, PIPE_SHADER_FRAGMENT, 0), cfg.uniform_register_count = ctx->fs->info.push_count; cfg.preshader_register_count = ctx->fs->info.nr_preamble_gprs; cfg.texture_state_register_count = frag_tex_count; cfg.sampler_state_register_count = translate_sampler_state_count(ctx, ctx->fs, PIPE_SHADER_FRAGMENT); cfg.cf_binding_count = ctx->fs->info.varyings.fs.nr_bindings; cfg.cf_bindings = batch->varyings; /* XXX: This is probably wrong */ cfg.unknown_30 = frag_tex_count >= 4; } } if (dirty.occlusion_query) { agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY, cfg) { if (ctx->active_queries && ctx->occlusion_query) { cfg.index = agx_get_oq_index(batch, ctx->occlusion_query); } else { cfg.index = 0; } } } if (dirty.output_size) { agx_ppp_push(&ppp, OUTPUT_SIZE, cfg) cfg.count = vs->info.varyings.vs.nr_index; } agx_ppp_fini(&out, &ppp); ppp_updates++; #undef IS_DIRTY assert(ppp_updates <= MAX_PPP_UPDATES); return out; } static enum agx_primitive agx_primitive_for_pipe(enum pipe_prim_type mode) { switch (mode) { case PIPE_PRIM_POINTS: return AGX_PRIMITIVE_POINTS; case PIPE_PRIM_LINES: return AGX_PRIMITIVE_LINES; case PIPE_PRIM_LINE_STRIP: return AGX_PRIMITIVE_LINE_STRIP; case PIPE_PRIM_LINE_LOOP: return AGX_PRIMITIVE_LINE_LOOP; case PIPE_PRIM_TRIANGLES: return AGX_PRIMITIVE_TRIANGLES; case PIPE_PRIM_TRIANGLE_STRIP: return AGX_PRIMITIVE_TRIANGLE_STRIP; case PIPE_PRIM_TRIANGLE_FAN: return AGX_PRIMITIVE_TRIANGLE_FAN; case PIPE_PRIM_QUADS: return AGX_PRIMITIVE_QUADS; case PIPE_PRIM_QUAD_STRIP: return AGX_PRIMITIVE_QUAD_STRIP; default: unreachable("todo: other primitive types"); } } static uint64_t agx_index_buffer_rsrc_ptr(struct agx_batch *batch, const struct pipe_draw_info *info, size_t *extent) { assert(!info->has_user_indices && "cannot use user pointers with indirect"); struct agx_resource *rsrc = agx_resource(info->index.resource); agx_batch_reads(batch, rsrc); *extent = ALIGN_POT(util_resource_size(&rsrc->base), 4); return rsrc->bo->ptr.gpu; } static uint64_t agx_index_buffer_direct_ptr(struct agx_batch *batch, const struct pipe_draw_start_count_bias *draw, const struct pipe_draw_info *info, size_t *extent) { off_t offset = draw->start * info->index_size; if (!info->has_user_indices) { uint64_t base = agx_index_buffer_rsrc_ptr(batch, info, extent); *extent = ALIGN_POT(*extent - offset, 4); return base + offset; } else { *extent = ALIGN_POT(draw->count * info->index_size, 4); return agx_pool_upload_aligned(&batch->pool, ((uint8_t *)info->index.user) + offset, draw->count * info->index_size, 64); } } static bool agx_scissor_culls_everything(struct agx_context *ctx) { unsigned minx, miny, maxx, maxy; agx_get_scissor_extents(&ctx->viewport, ctx->rast->base.scissor ? &ctx->scissor : NULL, &ctx->framebuffer, &minx, &miny, &maxx, &maxy); return (minx == maxx) || (miny == maxy); } static void agx_ensure_cmdbuf_has_space(struct agx_batch *batch, size_t space) { /* Assert that we have space for a link tag */ assert((batch->encoder_current + AGX_VDM_STREAM_LINK_LENGTH) <= batch->encoder_end && "Encoder overflowed"); /* Always leave room for a link tag, in case we run out of space later, * plus padding because VDM apparently overreads? * * 0x200 is not enough. 0x400 seems to work. 0x800 for safety. */ space += AGX_VDM_STREAM_LINK_LENGTH + 0x800; /* If there is room in the command buffer, we're done */ if (likely((batch->encoder_end - batch->encoder_current) >= space)) return; /* Otherwise, we need to allocate a new command buffer. We use memory owned * by the batch to simplify lifetime management for the BO. */ size_t size = 65536; struct agx_ptr T = agx_pool_alloc_aligned(&batch->pool, size, 256); /* Jump from the old command buffer to the new command buffer */ agx_pack(batch->encoder_current, VDM_STREAM_LINK, cfg) { cfg.target_lo = T.gpu & BITFIELD_MASK(32); cfg.target_hi = T.gpu >> 32; } /* Swap out the command buffer */ batch->encoder_current = T.cpu; batch->encoder_end = batch->encoder_current + size; } static void agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_start_count_bias *draws, unsigned num_draws) { struct agx_context *ctx = agx_context(pctx); if (unlikely(!agx_render_condition_check(ctx))) return; if (num_draws > 1) { util_draw_multi(pctx, info, drawid_offset, indirect, draws, num_draws); return; } struct agx_batch *batch = agx_get_batch(ctx); #ifndef NDEBUG if (unlikely(agx_device(pctx->screen)->debug & AGX_DBG_DIRTY)) agx_dirty_all(ctx); #endif if (agx_scissor_culls_everything(ctx)) return; /* We don't support side effects in vertex stages, so this is trivial */ if (ctx->rast->base.rasterizer_discard) return; /* Dirty track the reduced prim: lines vs points vs triangles */ enum pipe_prim_type reduced_prim = u_reduced_prim(info->mode); if (reduced_prim != batch->reduced_prim) ctx->dirty |= AGX_DIRTY_PRIM; batch->reduced_prim = reduced_prim; /* Update batch masks based on current state */ if (ctx->dirty & AGX_DIRTY_BLEND) { /* TODO: Any point to tracking load? */ batch->draw |= ctx->blend->store; batch->resolve |= ctx->blend->store; } if (ctx->dirty & AGX_DIRTY_ZS) { batch->load |= ctx->zs->load; batch->draw |= ctx->zs->store; batch->resolve |= ctx->zs->store; } if (agx_update_vs(ctx)) ctx->dirty |= AGX_DIRTY_VS | AGX_DIRTY_VS_PROG; else if (ctx->stage[PIPE_SHADER_VERTEX].dirty) ctx->dirty |= AGX_DIRTY_VS; if (agx_update_fs(batch)) ctx->dirty |= AGX_DIRTY_FS | AGX_DIRTY_FS_PROG; else if (ctx->stage[PIPE_SHADER_FRAGMENT].dirty) ctx->dirty |= AGX_DIRTY_FS; agx_batch_add_bo(batch, ctx->vs->bo); agx_batch_add_bo(batch, ctx->fs->bo); /* When we approach the end of a command buffer, cycle it out for a new one. * We only need to do this once per draw as long as we conservatively * estimate the maximum bytes of VDM commands that this draw will emit. */ agx_ensure_cmdbuf_has_space( batch, (AGX_VDM_STATE_LENGTH * 2) + (AGX_PPP_STATE_LENGTH * MAX_PPP_UPDATES) + AGX_VDM_STATE_RESTART_INDEX_LENGTH + AGX_VDM_STATE_VERTEX_SHADER_WORD_0_LENGTH + AGX_VDM_STATE_VERTEX_SHADER_WORD_1_LENGTH + AGX_VDM_STATE_VERTEX_OUTPUTS_LENGTH + AGX_VDM_STATE_VERTEX_UNKNOWN_LENGTH + 4 /* padding */ + ((!batch->any_draws) ? AGX_VDM_BARRIER_LENGTH : 0) + AGX_INDEX_LIST_LENGTH + AGX_INDEX_LIST_BUFFER_LO_LENGTH + AGX_INDEX_LIST_COUNT_LENGTH + AGX_INDEX_LIST_INSTANCES_LENGTH + AGX_INDEX_LIST_START_LENGTH + AGX_INDEX_LIST_BUFFER_SIZE_LENGTH); uint8_t *out = agx_encode_state(batch, batch->encoder_current, reduced_prim == PIPE_PRIM_LINES, reduced_prim == PIPE_PRIM_POINTS); enum agx_primitive prim = agx_primitive_for_pipe(info->mode); unsigned idx_size = info->index_size; uint64_t ib = 0; size_t ib_extent = 0; if (idx_size) { if (indirect != NULL) ib = agx_index_buffer_rsrc_ptr(batch, info, &ib_extent); else ib = agx_index_buffer_direct_ptr(batch, draws, info, &ib_extent); } if (idx_size) { /* Index sizes are encoded logarithmically */ STATIC_ASSERT(__builtin_ctz(1) == AGX_INDEX_SIZE_U8); STATIC_ASSERT(__builtin_ctz(2) == AGX_INDEX_SIZE_U16); STATIC_ASSERT(__builtin_ctz(4) == AGX_INDEX_SIZE_U32); assert((idx_size == 1) || (idx_size == 2) || (idx_size == 4)); agx_pack(out, VDM_STATE, cfg) cfg.restart_index_present = true; out += AGX_VDM_STATE_LENGTH; agx_pack(out, VDM_STATE_RESTART_INDEX, cfg) { cfg.value = info->restart_index; } out += AGX_VDM_STATE_RESTART_INDEX_LENGTH; } if (!batch->any_draws) { agx_pack(out, VDM_BARRIER, cfg) { cfg.usc_cache_inval = true; } out += AGX_VDM_BARRIER_LENGTH; } batch->any_draws = true; agx_pack(out, INDEX_LIST, cfg) { cfg.primitive = prim; cfg.instance_count_present = true; if (indirect != NULL) { cfg.indirect_buffer_present = true; } else { cfg.index_count_present = true; cfg.start_present = true; } if (idx_size) { cfg.restart_enable = info->primitive_restart; cfg.index_buffer_hi = (ib >> 32); cfg.index_size = __builtin_ctz(idx_size); cfg.index_buffer_present = true; cfg.index_buffer_size_present = true; } } out += AGX_INDEX_LIST_LENGTH; if (idx_size) { agx_pack(out, INDEX_LIST_BUFFER_LO, cfg) { cfg.buffer_lo = ib & BITFIELD_MASK(32); } out += AGX_INDEX_LIST_BUFFER_LO_LENGTH; } if (!indirect) { agx_pack(out, INDEX_LIST_COUNT, cfg) cfg.count = draws->count; out += AGX_INDEX_LIST_COUNT_LENGTH; } agx_pack(out, INDEX_LIST_INSTANCES, cfg) cfg.count = info->instance_count; out += AGX_INDEX_LIST_INSTANCES_LENGTH; if (indirect) { struct agx_resource *indirect_rsrc = agx_resource(indirect->buffer); uint64_t address = indirect_rsrc->bo->ptr.gpu + indirect->offset; agx_pack(out, INDEX_LIST_INDIRECT_BUFFER, cfg) { cfg.address_hi = address >> 32; cfg.address_lo = address & BITFIELD_MASK(32); } out += AGX_INDEX_LIST_INDIRECT_BUFFER_LENGTH; } else { agx_pack(out, INDEX_LIST_START, cfg) { cfg.start = idx_size ? draws->index_bias : draws->start; } out += AGX_INDEX_LIST_START_LENGTH; } if (idx_size) { agx_pack(out, INDEX_LIST_BUFFER_SIZE, cfg) { cfg.size = ib_extent; } out += AGX_INDEX_LIST_BUFFER_SIZE_LENGTH; } batch->encoder_current = out; assert((batch->encoder_current + AGX_VDM_STREAM_LINK_LENGTH) <= batch->encoder_end && "Failed to reserve sufficient space in encoder"); ctx->dirty = 0; assert(batch == agx_get_batch(ctx) && "batch should not change under us"); /* The scissor/zbias arrays are indexed with 16-bit integers, imposigin a * maximum of UINT16_MAX descriptors. Flush if the next draw would overflow */ if (unlikely((batch->scissor.size / AGX_SCISSOR_LENGTH) >= UINT16_MAX) || (batch->depth_bias.size / AGX_DEPTH_BIAS_LENGTH) >= UINT16_MAX) { agx_flush_batch_for_reason(ctx, batch, "Scissor/depth bias overflow"); } } static void agx_texture_barrier(struct pipe_context *pipe, unsigned flags) { struct agx_context *ctx = agx_context(pipe); agx_flush_all(ctx, "Texture barrier"); } static void agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct agx_context *ctx = agx_context(pipe); struct agx_batch *batch = agx_get_compute_batch(ctx); /* To implement load_num_workgroups, the number of workgroups needs to be * available in GPU memory. This is either the indirect buffer, or just a * buffer we upload ourselves if not indirect. */ if (info->indirect) { struct agx_resource *indirect = agx_resource(info->indirect); agx_batch_reads(batch, indirect); ctx->grid_info = indirect->bo->ptr.gpu + info->indirect_offset; } else { static_assert(sizeof(info->grid) == 12, "matches indirect dispatch buffer"); ctx->grid_info = agx_pool_upload_aligned(&batch->pool, info->grid, sizeof(info->grid), 4); } struct agx_uncompiled_shader *uncompiled = ctx->stage[PIPE_SHADER_COMPUTE].shader; /* There is exactly one variant, get it */ struct agx_compiled_shader *cs = _mesa_hash_table_next_entry(uncompiled->variants, NULL)->data; agx_batch_add_bo(batch, cs->bo); /* TODO: Ensure space if we allow multiple kernels in a batch */ uint8_t *out = batch->encoder_current; unsigned nr_textures = cs->info.nr_bindful_textures; agx_pack(out, CDM_HEADER, cfg) { if (info->indirect) cfg.mode = AGX_CDM_MODE_INDIRECT_GLOBAL; else cfg.mode = AGX_CDM_MODE_DIRECT; cfg.uniform_register_count = cs->info.push_count; cfg.preshader_register_count = cs->info.nr_preamble_gprs; cfg.texture_state_register_count = nr_textures; cfg.sampler_state_register_count = translate_sampler_state_count(ctx, cs, PIPE_SHADER_COMPUTE); cfg.pipeline = agx_build_pipeline(batch, cs, PIPE_SHADER_COMPUTE, info->variable_shared_mem); } out += AGX_CDM_HEADER_LENGTH; if (info->indirect) { agx_pack(out, CDM_INDIRECT, cfg) { cfg.address_hi = ctx->grid_info >> 32; cfg.address_lo = ctx->grid_info & BITFIELD64_MASK(32); } out += AGX_CDM_INDIRECT_LENGTH; } else { agx_pack(out, CDM_GLOBAL_SIZE, cfg) { cfg.x = info->grid[0] * info->block[0]; cfg.y = info->grid[1] * info->block[1]; cfg.z = info->grid[2] * info->block[2]; } out += AGX_CDM_GLOBAL_SIZE_LENGTH; } agx_pack(out, CDM_LOCAL_SIZE, cfg) { cfg.x = info->block[0]; cfg.y = info->block[1]; cfg.z = info->block[2]; } out += AGX_CDM_LOCAL_SIZE_LENGTH; agx_pack(out, CDM_LAUNCH, cfg) ; out += AGX_CDM_LAUNCH_LENGTH; batch->encoder_current = out; assert(batch->encoder_current <= batch->encoder_end && "Failed to reserve sufficient space in encoder"); /* TODO: Dirty tracking? */ /* TODO: Allow multiple kernels in a batch? */ agx_flush_batch_for_reason(ctx, batch, "Compute kernel serialization"); ctx->grid_info = 0; } void agx_init_state_functions(struct pipe_context *ctx); void agx_init_state_functions(struct pipe_context *ctx) { ctx->create_blend_state = agx_create_blend_state; ctx->create_depth_stencil_alpha_state = agx_create_zsa_state; ctx->create_fs_state = agx_create_shader_state; ctx->create_rasterizer_state = agx_create_rs_state; ctx->create_sampler_state = agx_create_sampler_state; ctx->create_sampler_view = agx_create_sampler_view; ctx->create_surface = agx_create_surface; ctx->create_vertex_elements_state = agx_create_vertex_elements; ctx->create_vs_state = agx_create_shader_state; ctx->create_compute_state = agx_create_compute_state; ctx->bind_blend_state = agx_bind_blend_state; ctx->bind_depth_stencil_alpha_state = agx_bind_zsa_state; ctx->bind_sampler_states = agx_bind_sampler_states; ctx->bind_fs_state = agx_bind_shader_state; ctx->bind_rasterizer_state = agx_bind_rasterizer_state; ctx->bind_vertex_elements_state = agx_bind_vertex_elements_state; ctx->bind_vs_state = agx_bind_shader_state; ctx->bind_compute_state = agx_bind_shader_state; ctx->delete_blend_state = agx_delete_state; ctx->delete_depth_stencil_alpha_state = agx_delete_state; ctx->delete_fs_state = agx_delete_shader_state; ctx->delete_compute_state = agx_delete_shader_state; ctx->delete_rasterizer_state = agx_delete_state; ctx->delete_sampler_state = agx_delete_sampler_state; ctx->delete_vertex_elements_state = agx_delete_state; ctx->delete_vs_state = agx_delete_shader_state; ctx->set_blend_color = agx_set_blend_color; ctx->set_clip_state = agx_set_clip_state; ctx->set_constant_buffer = agx_set_constant_buffer; ctx->set_shader_buffers = agx_set_shader_buffers; ctx->set_shader_images = agx_set_shader_images; ctx->set_sampler_views = agx_set_sampler_views; ctx->set_framebuffer_state = agx_set_framebuffer_state; ctx->set_polygon_stipple = agx_set_polygon_stipple; ctx->set_sample_mask = agx_set_sample_mask; ctx->set_scissor_states = agx_set_scissor_states; ctx->set_stencil_ref = agx_set_stencil_ref; ctx->set_vertex_buffers = agx_set_vertex_buffers; ctx->set_viewport_states = agx_set_viewport_states; ctx->sampler_view_destroy = agx_sampler_view_destroy; ctx->surface_destroy = agx_surface_destroy; ctx->draw_vbo = agx_draw_vbo; ctx->launch_grid = agx_launch_grid; ctx->create_stream_output_target = agx_create_stream_output_target; ctx->stream_output_target_destroy = agx_stream_output_target_destroy; ctx->set_stream_output_targets = agx_set_stream_output_targets; ctx->texture_barrier = agx_texture_barrier; }