diff options
-rw-r--r-- | lib/mesa/src/amd/ci/radv-bonaire-aco-fails.txt | 28 | ||||
-rw-r--r-- | lib/mesa/src/amd/ci/radv-bonaire-aco-skips.txt | 2 | ||||
-rw-r--r-- | lib/mesa/src/amd/ci/radv-pitcairn-aco-fails.txt | 37 | ||||
-rw-r--r-- | lib/mesa/src/amd/ci/radv-polaris10-aco-fails.txt | 19 | ||||
-rw-r--r-- | lib/mesa/src/amd/vulkan/radv_acceleration_structure.h | 82 | ||||
-rw-r--r-- | lib/mesa/src/asahi/compiler/agx_uniforms.c | 38 | ||||
-rw-r--r-- | lib/mesa/src/asahi/lib/io.h | 139 | ||||
-rw-r--r-- | lib/mesa/src/compiler/isaspec/decode.c | 11 | ||||
-rw-r--r-- | lib/mesa/src/freedreno/computerator/a4xx.c | 9 | ||||
-rw-r--r-- | lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c | 33 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/asahi/magic.c | 342 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/asahi/magic.h | 11 |
12 files changed, 253 insertions, 498 deletions
diff --git a/lib/mesa/src/amd/ci/radv-bonaire-aco-fails.txt b/lib/mesa/src/amd/ci/radv-bonaire-aco-fails.txt index 94f9700f6..e69de29bb 100644 --- a/lib/mesa/src/amd/ci/radv-bonaire-aco-fails.txt +++ b/lib/mesa/src/amd/ci/radv-bonaire-aco-fails.txt @@ -1,28 +0,0 @@ -dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.2_bit,Fail -dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.4_bit,Fail -dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.8_bit,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.2_bit,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.4_bit,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.8_bit,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.2_bit,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.4_bit,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.8_bit,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_linear,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_linear_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_nearest,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_nearest_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_linear,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_linear_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_nearest,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_nearest_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_linear,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_linear_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_nearest,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_nearest_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_linear,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_linear_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.linear_nearest,Fail -dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_linear,Fail -dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_nearest,Fail diff --git a/lib/mesa/src/amd/ci/radv-bonaire-aco-skips.txt b/lib/mesa/src/amd/ci/radv-bonaire-aco-skips.txt index e69de29bb..1238ee072 100644 --- a/lib/mesa/src/amd/ci/radv-bonaire-aco-skips.txt +++ b/lib/mesa/src/amd/ci/radv-bonaire-aco-skips.txt @@ -0,0 +1,2 @@ +# This subset of CTS randomly hangs but it's fine when using only one thread. +dEQP-VK.synchronization.* diff --git a/lib/mesa/src/amd/ci/radv-pitcairn-aco-fails.txt b/lib/mesa/src/amd/ci/radv-pitcairn-aco-fails.txt index 3fcf3dfd2..e69de29bb 100644 --- a/lib/mesa/src/amd/ci/radv-pitcairn-aco-fails.txt +++ b/lib/mesa/src/amd/ci/radv-pitcairn-aco-fails.txt @@ -1,37 +0,0 @@ -dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.2_bit,Fail -dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.4_bit,Fail -dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.8_bit,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.2_bit,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.4_bit,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.8_bit,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.2_bit,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.4_bit,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.8_bit,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_linear,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_linear_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_nearest,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_nearest_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_linear,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_linear_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_nearest,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_nearest_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_linear,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_linear_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_nearest,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_nearest_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_linear,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_linear_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.linear_nearest,Fail -dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_linear,Fail -dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_nearest,Fail -dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.2_bit_bind_offset,Fail -dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.4_bit_bind_offset,Fail -dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.8_bit_bind_offset,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.2_bit_bind_offset,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.4_bit_bind_offset,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.8_bit_bind_offset,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.2_bit_bind_offset,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.4_bit_bind_offset,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.8_bit_bind_offset,Fail diff --git a/lib/mesa/src/amd/ci/radv-polaris10-aco-fails.txt b/lib/mesa/src/amd/ci/radv-polaris10-aco-fails.txt index 7864a790e..e69de29bb 100644 --- a/lib/mesa/src/amd/ci/radv-polaris10-aco-fails.txt +++ b/lib/mesa/src/amd/ci/radv-polaris10-aco-fails.txt @@ -1,19 +0,0 @@ -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_linear,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_linear_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_nearest,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_nearest_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_linear,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_linear_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_nearest,Fail -dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_nearest_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_linear,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_linear_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_nearest,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_nearest_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_linear,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_linear_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest,Fail -dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest_integer_texel_coord,Fail -dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.linear_nearest,Fail -dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_linear,Fail -dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_nearest,Fail diff --git a/lib/mesa/src/amd/vulkan/radv_acceleration_structure.h b/lib/mesa/src/amd/vulkan/radv_acceleration_structure.h index df738d89c..062edde50 100644 --- a/lib/mesa/src/amd/vulkan/radv_acceleration_structure.h +++ b/lib/mesa/src/amd/vulkan/radv_acceleration_structure.h @@ -24,21 +24,79 @@ #ifndef RADV_ACCELERATION_STRUCTURE_H #define RADV_ACCELERATION_STRUCTURE_H -#include "bvh/bvh.h" +#include <stdint.h> +#include <vulkan/vulkan.h> -#include "radv_private.h" +struct radv_accel_struct_serialization_header { + uint8_t driver_uuid[VK_UUID_SIZE]; + uint8_t accel_struct_compat[VK_UUID_SIZE]; + uint64_t serialization_size; + uint64_t compacted_size; + uint64_t instance_count; + uint64_t instances[]; +}; + +struct radv_accel_struct_header { + uint32_t root_node_offset; + uint32_t reserved; + float aabb[2][3]; + + /* Everything after this gets updated/copied from the CPU. */ + uint64_t compacted_size; + uint64_t serialization_size; + uint32_t copy_dispatch_size[3]; + uint64_t instance_offset; + uint64_t instance_count; +}; + +struct radv_bvh_triangle_node { + float coords[3][3]; + uint32_t reserved[3]; + uint32_t triangle_id; + /* flags in upper 4 bits */ + uint32_t geometry_id_and_flags; + uint32_t reserved2; + uint32_t id; +}; -struct radv_acceleration_structure { - struct vk_object_base base; +struct radv_bvh_aabb_node { + float aabb[2][3]; + uint32_t primitive_id; + /* flags in upper 4 bits */ + uint32_t geometry_id_and_flags; + uint32_t reserved[8]; +}; + +struct radv_bvh_instance_node { + uint64_t base_ptr; + /* lower 24 bits are the custom instance index, upper 8 bits are the visibility mask */ + uint32_t custom_instance_and_mask; + /* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */ + uint32_t sbt_offset_and_flags; + + /* The translation component is actually a pre-translation instead of a post-translation. If you + * want to get a proper matrix out of it you need to apply the directional component of the + * matrix to it. The pre-translation of the world->object matrix is the same as the + * post-translation of the object->world matrix so this way we can share data between both + * matrices. */ + float wto_matrix[12]; + float aabb[2][3]; + uint32_t instance_id; - struct radeon_winsys_bo *bo; - uint64_t mem_offset; - uint64_t size; - uint64_t va; - VkAccelerationStructureTypeKHR type; + /* Object to world matrix transposed from the initial transform. Translate part is store in the + * wto_matrix. */ + float otw_matrix[9]; }; -VK_DEFINE_NONDISP_HANDLE_CASTS(radv_acceleration_structure, base, VkAccelerationStructureKHR, - VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR) +struct radv_bvh_box16_node { + uint32_t children[4]; + uint32_t coords[4][3]; +}; + +struct radv_bvh_box32_node { + uint32_t children[4]; + float coords[4][2][3]; + uint32_t reserved[4]; +}; -#endif +#endif
\ No newline at end of file diff --git a/lib/mesa/src/asahi/compiler/agx_uniforms.c b/lib/mesa/src/asahi/compiler/agx_uniforms.c index bd8f0a1ed..f357b2f86 100644 --- a/lib/mesa/src/asahi/compiler/agx_uniforms.c +++ b/lib/mesa/src/asahi/compiler/agx_uniforms.c @@ -48,11 +48,8 @@ agx_indexed_sysval(agx_context *ctx, enum agx_push_type type, /* Otherwise, push */ assert(ctx->out->push_ranges < AGX_MAX_PUSH_RANGES); - ctx->out->push_count = ALIGN_POT(ctx->out->push_count, agx_size_align_16(size)); - - unsigned base = ctx->out->push_count; - ctx->out->push_count += length; - assert(ctx->out->push_count <= AGX_NUM_UNIFORMS); + unsigned base = ctx->push_base; + ctx->push_base += length; ctx->out->push[ctx->out->push_ranges++] = (struct agx_push) { .type = type, @@ -63,34 +60,3 @@ agx_indexed_sysval(agx_context *ctx, enum agx_push_type type, return agx_uniform(base + index, size); } - -agx_index -agx_vbo_base(agx_context *ctx, unsigned vbo) -{ - /* Check if we already pushed */ - for (unsigned i = 0; i < ctx->out->push_ranges; ++i) { - struct agx_push push = ctx->out->push[i]; - - if (push.type == AGX_PUSH_VBO_BASE && push.vbo == vbo) { - return agx_uniform(push.base, AGX_SIZE_64); - } - } - - /* Otherwise, push */ - assert(ctx->out->push_ranges < AGX_MAX_PUSH_RANGES); - - ctx->out->push_count = ALIGN_POT(ctx->out->push_count, 4); - - unsigned base = ctx->out->push_count; - ctx->out->push_count += 4; - assert(ctx->out->push_count <= AGX_NUM_UNIFORMS); - - ctx->out->push[ctx->out->push_ranges++] = (struct agx_push) { - .type = AGX_PUSH_VBO_BASE, - .base = base, - .length = 4, - .vbo = vbo, - }; - - return agx_uniform(base, AGX_SIZE_64); -} diff --git a/lib/mesa/src/asahi/lib/io.h b/lib/mesa/src/asahi/lib/io.h index 548df704a..bfac6fc42 100644 --- a/lib/mesa/src/asahi/lib/io.h +++ b/lib/mesa/src/asahi/lib/io.h @@ -32,19 +32,6 @@ #include <IOKit/IODataQueueClient.h> #endif -/* - * This file contains necessary defines for the macOS (IOKit) interface to the - * AGX accelerator, required to build a userspace graphics driver on macOS. - * - * They are not used under Linux. - * - * Information is this file was originally determined independently. More - * recently, names have been augmented via the oob_timestamp code sample from - * Project Zero [1] - * - * [1] https://bugs.chromium.org/p/project-zero/issues/detail?id=1986 - */ - #define AGX_SERVICE_TYPE 0x100005 enum agx_selector { @@ -60,7 +47,7 @@ enum agx_selector { AGX_SELECTOR_FREE_NOTIFICATION_QUEUE = 0x12, AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS = 0x1E, AGX_SELECTOR_GET_VERSION = 0x23, - AGX_NUM_SELECTORS = 0x32 + AGX_NUM_SELECTORS = 0x30 }; static const char *selector_table[AGX_NUM_SELECTORS] = { @@ -111,9 +98,7 @@ static const char *selector_table[AGX_NUM_SELECTORS] = { "unk2C", "unk2D", "unk2E", - "unk2F", - "unk30", - "unk31" + "unk2F" }; static inline const char * @@ -129,10 +114,9 @@ struct agx_create_command_queue_resp { } __attribute__((packed)); struct agx_create_shmem_resp { - /* IOAccelDeviceShmemData */ - void *map; - uint32_t size; - uint32_t id; + void *map; + uint32_t size; + uint32_t id; } __attribute__((packed)); struct agx_create_notification_queue_resp { @@ -146,18 +130,14 @@ struct agx_create_notification_queue_resp { } __attribute__((packed)); struct agx_submit_cmdbuf_req { - /* IOAccelCommandQueueSubmitArgs_Header */ - uint32_t unk0; - uint32_t count; - - /* IOAccelCommandQueueSubmitArgs_Command */ - uint32_t command_buffer_shmem_id; - uint32_t segment_list_shmem_id; - uint64_t unk1B; // 0, new in 12.x - uint64_t notify_1; - uint64_t notify_2; - uint32_t unk2; - uint32_t unk3; + uint32_t unk0; + uint32_t unk1; + uint32_t cmdbuf; + uint32_t mappings; + void *user_0; + void *user_1; + uint32_t unk2; + uint32_t unk3; } __attribute__((packed)); /* Memory allocation isn't really understood yet. By comparing SHADER/CMDBUF_32 @@ -187,58 +167,6 @@ agx_memory_type_name(uint32_t type) } } -struct agx_allocate_resource_req { - uint32_t unk0[5]; - uint32_t mode; - uint32_t unk6[6]; - uint64_t cpu_fixed; - uint64_t cpu_fixed_parent; - uint32_t size; - uint32_t unk17; - - /* Handle of the parent resource when a suballocation is requested. - * Based on an assertion failure, this corresponds to: - * - * -[IOGPUMetalBuffer initWithPrimaryBuffer:heapIndex:bufferIndex:bufferOffset:length:args:argsSize:] - */ - uint32_t parent; - - uint32_t unk19; - uint32_t flags; - uint32_t unk21[3]; -} __attribute__((packed)); - -struct agx_allocate_resource_resp { - /* Returned GPU virtual address */ - uint64_t gpu_va; - - /* Returned CPU virtual address */ - uint64_t cpu; - - uint32_t unk4[3]; - - /* Handle used to identify the resource in the segment list */ - uint32_t handle; - - /* Size of the root resource from which we are allocated. If this is not a - * suballocation, this is equal to the size. - */ - uint64_t root_size; - - /* Globally unique identifier for the resource, shown in Instruments */ - uint32_t guid; - - uint32_t unk11[7]; - - /* Maximum size of the suballocation. For a suballocation, this equals: - * - * sub_size = root_size - (sub_cpu - root_cpu) - * - * For root allocations, this equals the size. - */ - uint64_t sub_size; -} __attribute__((packed)); - struct agx_notification_queue { #ifdef __APPLE__ mach_port_t port; @@ -255,29 +183,32 @@ struct agx_command_queue { struct agx_notification_queue notif; }; -struct agx_map_header { - /* IOAccelSegmentListHeader */ - uint64_t cmdbuf_id; // GUID - uint32_t segment_count; - uint16_t length; - uint16_t unk; // 0x8000 - uint64_t encoder_id; // GUID +/* Not sure if this is hardware or software defined */ - /* IOAccelSegmentResourceListHeader */ - uint32_t kernel_commands_start_offset; - uint32_t kernel_commands_end_offset; - uint32_t padding[2]; - uint32_t total_resources; - uint32_t resource_group_count; +struct agx_map_header { + uint64_t cmdbuf_id; // GUID + uint32_t unk2; // 01 00 00 00 + uint32_t unk3; // 28 05 00 80 + uint64_t encoder_id; // GUID + uint32_t unk6; // 00 00 00 00 + uint32_t cmdbuf_size; + uint32_t nr_handles; + uint32_t nr_entries; + uint32_t indices[6]; } __attribute__((packed)); -/* IOAccelSegmentResourceList_ResourceGroup */ struct agx_map_entry { - uint32_t resource_id[6]; - uint32_t resource_unk[6]; - uint16_t resource_flags[6]; - uint16_t unka; // ff ff - uint16_t resource_count; + uint32_t unkAAA; // 20 00 00 00 + uint32_t unk2; // 00 00 00 00 + uint32_t unk3; // 00 00 00 00 + uint32_t unk4; // 00 00 00 00 + uint32_t unk5; // 00 00 00 00 + uint32_t unk6; // 00 00 00 00 + uint32_t unkBBB; // 01 00 00 00 + uint32_t unk8; // 00 00 00 00 + uint32_t unk9; // 00 00 00 00 + uint32_t unka; // ff ff 01 00 + uint32_t indices[6]; } __attribute__((packed)); uint64_t diff --git a/lib/mesa/src/compiler/isaspec/decode.c b/lib/mesa/src/compiler/isaspec/decode.c index 127773d66..633ef2f76 100644 --- a/lib/mesa/src/compiler/isaspec/decode.c +++ b/lib/mesa/src/compiler/isaspec/decode.c @@ -560,14 +560,6 @@ display_field(struct decode_scope *scope, const char *field_name) num_align = atoi(value); } - /* Special case ':algin=' should only do alignment */ - if (field_name == align) { - while (scope->state->line_column < num_align) - print(state, " "); - - return; - } - /* Special case 'NAME' maps to instruction/bitset name: */ if (!strncmp("NAME", field_name, field_name_len)) { if (options->field_cb) { @@ -765,7 +757,6 @@ void isa_decode(void *bin, int sz, FILE *out, const struct isa_decode_options *options) { const struct isa_decode_options default_options = { - .gpu_id = options ? options->gpu_id : 0, .branch_labels = options ? options->branch_labels : false }; struct decode_state *state; @@ -773,6 +764,8 @@ isa_decode(void *bin, int sz, FILE *out, const struct isa_decode_options *option if (!options) options = &default_options; + util_cpu_detect(); /* needed for _mesa_half_to_float() */ + state = rzalloc_size(NULL, sizeof(*state)); state->options = options; state->num_instr = sz / (BITMASK_WORDS * sizeof(BITSET_WORD)); diff --git a/lib/mesa/src/freedreno/computerator/a4xx.c b/lib/mesa/src/freedreno/computerator/a4xx.c index a3a4c179d..0dbb30712 100644 --- a/lib/mesa/src/freedreno/computerator/a4xx.c +++ b/lib/mesa/src/freedreno/computerator/a4xx.c @@ -135,10 +135,10 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) OUT_PKT0(ring, REG_A4XX_HLSQ_CL_CONTROL_0, 2); OUT_RING(ring, A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID(work_group_id) | - A4XX_HLSQ_CL_CONTROL_0_KERNELDIMCONSTID(regid(63, 0)) | + A4XX_HLSQ_CL_CONTROL_0_UNK12CONSTID(regid(63, 0)) | A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID(local_invocation_id)); OUT_RING(ring, A4XX_HLSQ_CL_CONTROL_1_UNK0CONSTID(regid(63, 0)) | - A4XX_HLSQ_CL_CONTROL_1_WORKGROUPSIZECONSTID(regid(63, 0))); + A4XX_HLSQ_CL_CONTROL_1_UNK12CONSTID(regid(63, 0))); OUT_PKT0(ring, REG_A4XX_HLSQ_CL_KERNEL_CONST, 1); OUT_RING(ring, A4XX_HLSQ_CL_KERNEL_CONST_UNK0CONSTID(regid(63, 0)) | @@ -161,7 +161,7 @@ emit_const(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t constid, { uint32_t align_sz; - assert((constid % 4) == 0); + debug_assert((constid % 4) == 0); /* Overwrite appropriate entries with buffer addresses */ struct fd_bo **replacements = calloc(sizedwords, sizeof(struct fd_bo *)); @@ -341,8 +341,7 @@ a4xx_init(struct fd_device *dev, const struct fd_dev_id *dev_id) .emit_grid = a4xx_emit_grid, }; - a4xx_backend->compiler = ir3_compiler_create(dev, dev_id, - &(struct ir3_compiler_options) {}); + a4xx_backend->compiler = ir3_compiler_create(dev, dev_id, false); a4xx_backend->dev = dev; return &a4xx_backend->base; diff --git a/lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c b/lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c index 6d9b9921e..7d13678c6 100644 --- a/lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c +++ b/lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c @@ -1,12 +1,30 @@ /* * Copyright © 2021 Igalia S.L. - * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ -#include "tu_device.h" +#include "tu_private.h" #include "tu_perfetto.h" -/* Including tu_device.h in tu_perfetto.cc doesn't work, so +/* Including tu_private.h in tu_perfetto.cc doesn't work, so * we need some helper methods to access tu_device. */ @@ -16,8 +34,15 @@ tu_device_get_perfetto_state(struct tu_device *dev) return &dev->perfetto; } +int +tu_device_get_timestamp(struct tu_device *dev, + uint64_t *ts) +{ + return tu_drm_get_timestamp(dev->physical_device, ts); +} + uint32_t -tu_u_trace_submission_data_get_submit_id(const struct tu_u_trace_submission_data *data) +tu_u_trace_flush_data_get_submit_id(const struct tu_u_trace_flush_data *data) { return data->submission_id; } diff --git a/lib/mesa/src/gallium/drivers/asahi/magic.c b/lib/mesa/src/gallium/drivers/asahi/magic.c index a5dfd62da..fa56ede4f 100644 --- a/lib/mesa/src/gallium/drivers/asahi/magic.c +++ b/lib/mesa/src/gallium/drivers/asahi/magic.c @@ -50,244 +50,109 @@ demo_zero(struct agx_pool *pool, unsigned count) return ptr.gpu; } -static size_t -asahi_size_resource(struct pipe_resource *prsrc, unsigned level) -{ - struct agx_resource *rsrc = agx_resource(prsrc); - size_t size = rsrc->layout.size_B; - - if (rsrc->separate_stencil) - size += asahi_size_resource(&rsrc->separate_stencil->base, level); - - return size; -} - -static size_t -asahi_size_surface(struct pipe_surface *surf) -{ - return asahi_size_resource(surf->texture, surf->u.tex.level); -} - -static size_t -asahi_size_attachments(struct pipe_framebuffer_state *framebuffer) -{ - size_t sum = 0; - - for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) - sum += asahi_size_surface(framebuffer->cbufs[i]); - - if (framebuffer->zsbuf) - sum += asahi_size_surface(framebuffer->zsbuf); - - return sum; -} - -static enum agx_iogpu_attachment_type -asahi_classify_attachment(enum pipe_format format) -{ - const struct util_format_description *desc = util_format_description(format); - - if (util_format_has_depth(desc)) - return AGX_IOGPU_ATTACHMENT_TYPE_DEPTH; - else if (util_format_has_stencil(desc)) - return AGX_IOGPU_ATTACHMENT_TYPE_STENCIL; - else - return AGX_IOGPU_ATTACHMENT_TYPE_COLOUR; -} - -static uint64_t -agx_map_surface_resource(struct pipe_surface *surf, struct agx_resource *rsrc) -{ - return agx_map_texture_gpu(rsrc, surf->u.tex.first_layer); -} - -static uint64_t -agx_map_surface(struct pipe_surface *surf) -{ - return agx_map_surface_resource(surf, agx_resource(surf->texture)); -} - -static void -asahi_pack_iogpu_attachment(void *out, struct agx_resource *rsrc, - unsigned total_size) -{ - agx_pack(out, IOGPU_ATTACHMENT, cfg) { - cfg.type = asahi_classify_attachment(rsrc->layout.format); - cfg.address = rsrc->bo->ptr.gpu; - cfg.size = rsrc->layout.size_B; - cfg.percent = (100 * cfg.size) / total_size; - } -} - -static unsigned -asahi_pack_iogpu_attachments(void *out, struct pipe_framebuffer_state *framebuffer) -{ - unsigned total_attachment_size = asahi_size_attachments(framebuffer); - struct agx_iogpu_attachment_packed *attachments = out; - unsigned nr = 0; - - for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) { - asahi_pack_iogpu_attachment(attachments + (nr++), - agx_resource(framebuffer->cbufs[i]->texture), - total_attachment_size); - } - - if (framebuffer->zsbuf) { - struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture); - - asahi_pack_iogpu_attachment(attachments + (nr++), - rsrc, total_attachment_size); - - if (rsrc->separate_stencil) { - asahi_pack_iogpu_attachment(attachments + (nr++), - rsrc->separate_stencil, - total_attachment_size); - } - } - - return nr; -} - unsigned demo_cmdbuf(uint64_t *buf, size_t size, struct agx_pool *pool, - struct pipe_framebuffer_state *framebuffer, uint64_t encoder_ptr, uint64_t encoder_id, uint64_t scissor_ptr, - uint64_t depth_bias_ptr, + unsigned width, unsigned height, + uint32_t pipeline_null, uint32_t pipeline_clear, - uint32_t pipeline_load, uint32_t pipeline_store, - bool clear_pipeline_textures, - unsigned clear_buffers, - double clear_depth, - unsigned clear_stencil) + uint64_t rt0, + bool clear_pipeline_textures) { - bool should_clear_depth = clear_buffers & PIPE_CLEAR_DEPTH; - bool should_clear_stencil = clear_buffers & PIPE_CLEAR_STENCIL; - uint32_t *map = (uint32_t *) buf; - memset(map, 0, 518 * 4); - - uint64_t deflake_buffer = demo_zero(pool, 0x7e0); - uint64_t deflake_1 = deflake_buffer + 0x2a0; - uint64_t deflake_2 = deflake_buffer + 0x20; - - uint64_t unk_buffer_2 = demo_zero(pool, 0x8000); - - uint64_t depth_buffer = 0; - uint64_t stencil_buffer = 0; - - agx_pack(map + 16, IOGPU_GRAPHICS, cfg) { - cfg.opengl_depth_clipping = true; - - cfg.deflake_1 = deflake_1; - cfg.deflake_2 = deflake_2; - cfg.deflake_3 = deflake_buffer; - - cfg.clear_pipeline_bind = 0xffff8002 | (clear_pipeline_textures ? 0x210 : 0); - cfg.clear_pipeline = pipeline_clear; - - /* store pipeline used when entire frame completes */ - cfg.store_pipeline_bind = 0x12; - cfg.store_pipeline = pipeline_store; - cfg.scissor_array = scissor_ptr; - cfg.depth_bias_array = depth_bias_ptr; - - if (framebuffer->zsbuf) { - struct pipe_surface *zsbuf = framebuffer->zsbuf; - const struct util_format_description *desc = - util_format_description(agx_resource(zsbuf->texture)->layout.format); - - assert(desc->format == PIPE_FORMAT_Z32_FLOAT || - desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || - desc->format == PIPE_FORMAT_S8_UINT); - - cfg.depth_width = framebuffer->width; - cfg.depth_height = framebuffer->height; - - if (util_format_has_depth(desc)) { - depth_buffer = agx_map_surface(zsbuf); - - cfg.zls_control.z_store_enable = true; - cfg.zls_control.z_load_enable = !should_clear_depth; - } else { - stencil_buffer = agx_map_surface(zsbuf); - cfg.zls_control.s_store_enable = true; - cfg.zls_control.s_load_enable = !should_clear_stencil; - } + memset(map, 0, 474 * 4); - if (agx_resource(zsbuf->texture)->separate_stencil) { - stencil_buffer = agx_map_surface_resource(zsbuf, - agx_resource(zsbuf->texture)->separate_stencil); + map[54] = 0x6b0003; + map[55] = 0x3a0012; + map[56] = 1; - cfg.zls_control.s_store_enable = true; - cfg.zls_control.s_load_enable = !should_clear_stencil; - } + map[106] = 1; + map[108] = 0x1c; + map[112] = 0xffffffff; + map[113] = 0xffffffff; + map[114] = 0xffffffff; - /* It's unclear how tile size is conveyed for depth/stencil targets, - * which interactions with mipmapping (for example of a 33x33 - * depth/stencil attachment) - */ - if (zsbuf->u.tex.level != 0) - unreachable("todo: mapping other levels"); - - cfg.depth_buffer_1 = depth_buffer; - cfg.depth_buffer_2 = depth_buffer; - - cfg.stencil_buffer_1 = stencil_buffer; - cfg.stencil_buffer_2 = stencil_buffer; - } - - cfg.width_1 = framebuffer->width; - cfg.height_1 = framebuffer->height; - cfg.pointer = unk_buffer_2; - - cfg.set_when_reloading_z_or_s_1 = clear_pipeline_textures; - - if (depth_buffer && !should_clear_depth) { - cfg.set_when_reloading_z_or_s_1 = true; - cfg.set_when_reloading_z_or_s_2 = true; - } - - if (stencil_buffer && !should_clear_stencil) { - cfg.set_when_reloading_z_or_s_1 = true; - cfg.set_when_reloading_z_or_s_2 = true; - } - - cfg.depth_clear_value = fui(clear_depth); - cfg.stencil_clear_value = clear_stencil & 0xff; - - cfg.partial_reload_pipeline_bind = 0xffff8212; - cfg.partial_reload_pipeline = pipeline_load; - - cfg.partial_store_pipeline_bind = 0x12; - cfg.partial_store_pipeline = pipeline_store; + uint64_t unk_buffer = demo_zero(pool, 0x1000); + uint64_t unk_buffer_2 = demo_zero(pool, 0x8000); - cfg.depth_buffer_3 = depth_buffer; - cfg.stencil_buffer_3 = stencil_buffer; - cfg.encoder_id = encoder_id; - cfg.unknown_buffer = demo_unk6(pool); - cfg.width_2 = framebuffer->width; - cfg.height_2 = framebuffer->height; - cfg.unk_352 = clear_pipeline_textures ? 0x0 : 0x1; + // This is a pipeline bind + map[156] = 0xffff8002 | (clear_pipeline_textures ? 0x210 : 0); + map[158] = pipeline_clear | 0x4; + map[163] = 0x12; + map[164] = pipeline_store | 0x4; + map[166] = scissor_ptr & 0xFFFFFFFF; + map[167] = scissor_ptr >> 32; + map[168] = unk_buffer & 0xFFFFFFFF; + map[169] = unk_buffer >> 32; + + map[220] = 4; + map[222] = 0xc000; + map[224] = width; + map[225] = height; + map[226] = unk_buffer_2 & 0xFFFFFFFF; + map[227] = unk_buffer_2 >> 32; + + float depth_clear = 1.0; + uint8_t stencil_clear = 0; + + map[278] = fui(depth_clear); + map[279] = (0x3 << 8) | stencil_clear; + map[282] = 0x1000000; + map[284] = 0xffffffff; + map[285] = 0xffffffff; + map[286] = 0xffffffff; + + map[298] = 0xffff8212; + map[300] = pipeline_null | 0x4; + map[305] = 0x12; + map[306] = pipeline_store | 0x4; + map[352] = 1; + map[360] = 0x1c; + map[362] = encoder_id; + map[365] = 0xffffffff; + map[366] = 1; + + uint64_t unk6 = demo_unk6(pool); + map[370] = unk6 & 0xFFFFFFFF; + map[371] = unk6 >> 32; + + map[374] = width; + map[375] = height; + map[376] = 1; + map[377] = 8; + map[378] = 8; + + map[393] = 8; + map[394] = 32; + map[395] = 32; + map[396] = 1; + + unsigned offset_unk = (458 * 4); + unsigned offset_attachments = (470 * 4); + unsigned nr_attachments = 1; + + map[473] = nr_attachments; + + /* A single attachment follows, depth/stencil have their own attachments */ + agx_pack((map + (offset_attachments / 4) + 4), IOGPU_ATTACHMENT, cfg) { + cfg.address = rt0; + cfg.type = AGX_IOGPU_ATTACHMENT_TYPE_COLOUR; + cfg.unk_1 = 0x80000000; + cfg.unk_2 = 0x5; + cfg.bytes_per_pixel = 4; + cfg.percent = 100; } - unsigned offset_unk = (484 * 4); - unsigned offset_attachments = (496 * 4); - - unsigned nr_attachments = - asahi_pack_iogpu_attachments(map + (offset_attachments / 4) + 4, - framebuffer); - - map[(offset_attachments / 4) + 3] = nr_attachments; - unsigned total_size = offset_attachments + (AGX_IOGPU_ATTACHMENT_LENGTH * nr_attachments) + 16; agx_pack(map, IOGPU_HEADER, cfg) { cfg.total_size = total_size; - cfg.attachment_offset = offset_attachments; + cfg.attachment_offset_1 = offset_attachments; + cfg.attachment_offset_2 = offset_attachments; cfg.attachment_length = nr_attachments * AGX_IOGPU_ATTACHMENT_LENGTH; cfg.unknown_offset = offset_unk; cfg.encoder = encoder_ptr; @@ -299,23 +164,18 @@ demo_cmdbuf(uint64_t *buf, size_t size, static struct agx_map_header demo_map_header(uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size, unsigned count) { - /* Structure: header followed by resource groups. For now, we use a single - * resource group for every resource. This could be optimized. - */ - unsigned length = sizeof(struct agx_map_header); - length += count * sizeof(struct agx_map_entry); - assert(length < 0x10000); - return (struct agx_map_header) { .cmdbuf_id = cmdbuf_id, - .segment_count = 1, - .length = length, + .unk2 = 0x1, + .unk3 = 0x528, // 1320 .encoder_id = encoder_id, - .kernel_commands_start_offset = 0, - .kernel_commands_end_offset = cmdbuf_size, - .total_resources = count, - .resource_group_count = count, - .unk = 0x8000, + .unk6 = 0x0, + .cmdbuf_size = cmdbuf_size, + + /* +1 for the sentinel ending */ + .nr_entries = count + 1, + .nr_handles = count + 1, + .indices = {0x0b}, }; } @@ -324,7 +184,7 @@ demo_mem_map(void *map, size_t size, unsigned *handles, unsigned count, uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size) { struct agx_map_header *header = map; - struct agx_map_entry *entries = (struct agx_map_entry *) (((uint8_t *) map) + sizeof(*header)); + struct agx_map_entry *entries = (struct agx_map_entry *) (((uint8_t *) map) + 0x40); struct agx_map_entry *end = (struct agx_map_entry *) (((uint8_t *) map) + size); /* Header precedes the entry */ @@ -334,10 +194,18 @@ demo_mem_map(void *map, size_t size, unsigned *handles, unsigned count, for (unsigned i = 0; i < count; ++i) { assert((entries + i) < end); entries[i] = (struct agx_map_entry) { - .resource_id = { handles[i] }, - .resource_unk = { 0x20 }, - .resource_flags = { 0x1 }, - .resource_count = 1 + .unkAAA = 0x20, + .unkBBB = 0x1, + .unka = 0x1ffff, + .indices = {handles[i]} }; } + + /* Final entry is a sentinel */ + assert((entries + count) < end); + entries[count] = (struct agx_map_entry) { + .unkAAA = 0x40, + .unkBBB = 0x1, + .unka = 0x1ffff, + }; } diff --git a/lib/mesa/src/gallium/drivers/asahi/magic.h b/lib/mesa/src/gallium/drivers/asahi/magic.h index 0231afdc2..98215d367 100644 --- a/lib/mesa/src/gallium/drivers/asahi/magic.h +++ b/lib/mesa/src/gallium/drivers/asahi/magic.h @@ -27,18 +27,15 @@ unsigned demo_cmdbuf(uint64_t *buf, size_t size, struct agx_pool *pool, - struct pipe_framebuffer_state *framebuffer, uint64_t encoder_ptr, uint64_t encoder_id, uint64_t scissor_ptr, - uint64_t depth_bias_ptr, + unsigned width, unsigned height, + uint32_t pipeline_null, uint32_t pipeline_clear, - uint32_t pipeline_load, uint32_t pipeline_store, - bool clear_pipeline_textures, - unsigned clear_buffers, - double clear_depth, - unsigned clear_stencil); + uint64_t rt0, + bool clear_pipeline_textures); void demo_mem_map(void *map, size_t size, unsigned *handles, |