summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/mesa/src/amd/ci/radv-bonaire-aco-fails.txt28
-rw-r--r--lib/mesa/src/amd/ci/radv-bonaire-aco-skips.txt2
-rw-r--r--lib/mesa/src/amd/ci/radv-pitcairn-aco-fails.txt37
-rw-r--r--lib/mesa/src/amd/ci/radv-polaris10-aco-fails.txt19
-rw-r--r--lib/mesa/src/amd/vulkan/radv_acceleration_structure.h82
-rw-r--r--lib/mesa/src/asahi/compiler/agx_uniforms.c38
-rw-r--r--lib/mesa/src/asahi/lib/io.h139
-rw-r--r--lib/mesa/src/compiler/isaspec/decode.c11
-rw-r--r--lib/mesa/src/freedreno/computerator/a4xx.c9
-rw-r--r--lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c33
-rw-r--r--lib/mesa/src/gallium/drivers/asahi/magic.c342
-rw-r--r--lib/mesa/src/gallium/drivers/asahi/magic.h11
12 files changed, 253 insertions, 498 deletions
diff --git a/lib/mesa/src/amd/ci/radv-bonaire-aco-fails.txt b/lib/mesa/src/amd/ci/radv-bonaire-aco-fails.txt
index 94f9700f6..e69de29bb 100644
--- a/lib/mesa/src/amd/ci/radv-bonaire-aco-fails.txt
+++ b/lib/mesa/src/amd/ci/radv-bonaire-aco-fails.txt
@@ -1,28 +0,0 @@
-dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.2_bit,Fail
-dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.4_bit,Fail
-dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.8_bit,Fail
-dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.2_bit,Fail
-dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.4_bit,Fail
-dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.8_bit,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.2_bit,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.4_bit,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.8_bit,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_linear,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_linear_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_nearest,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_nearest_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_linear,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_linear_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_nearest,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_nearest_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_linear,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_linear_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_nearest,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_nearest_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_linear,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_linear_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.linear_nearest,Fail
-dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_linear,Fail
-dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_nearest,Fail
diff --git a/lib/mesa/src/amd/ci/radv-bonaire-aco-skips.txt b/lib/mesa/src/amd/ci/radv-bonaire-aco-skips.txt
index e69de29bb..1238ee072 100644
--- a/lib/mesa/src/amd/ci/radv-bonaire-aco-skips.txt
+++ b/lib/mesa/src/amd/ci/radv-bonaire-aco-skips.txt
@@ -0,0 +1,2 @@
+# This subset of CTS randomly hangs but it's fine when using only one thread.
+dEQP-VK.synchronization.*
diff --git a/lib/mesa/src/amd/ci/radv-pitcairn-aco-fails.txt b/lib/mesa/src/amd/ci/radv-pitcairn-aco-fails.txt
index 3fcf3dfd2..e69de29bb 100644
--- a/lib/mesa/src/amd/ci/radv-pitcairn-aco-fails.txt
+++ b/lib/mesa/src/amd/ci/radv-pitcairn-aco-fails.txt
@@ -1,37 +0,0 @@
-dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.2_bit,Fail
-dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.4_bit,Fail
-dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.8_bit,Fail
-dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.2_bit,Fail
-dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.4_bit,Fail
-dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.8_bit,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.2_bit,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.4_bit,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.8_bit,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_linear,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_linear_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_nearest,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_nearest_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_linear,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_linear_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_nearest,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_nearest_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_linear,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_linear_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_nearest,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_nearest_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_linear,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_linear_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.linear_nearest,Fail
-dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_linear,Fail
-dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_nearest,Fail
-dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.2_bit_bind_offset,Fail
-dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.4_bit_bind_offset,Fail
-dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.layer_copy_before_resolving.8_bit_bind_offset,Fail
-dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.2_bit_bind_offset,Fail
-dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.4_bit_bind_offset,Fail
-dEQP-VK.api.copy_and_blit.core.resolve_image.layer_copy_before_resolving.8_bit_bind_offset,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.2_bit_bind_offset,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.4_bit_bind_offset,Fail
-dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.layer_copy_before_resolving.8_bit_bind_offset,Fail
diff --git a/lib/mesa/src/amd/ci/radv-polaris10-aco-fails.txt b/lib/mesa/src/amd/ci/radv-polaris10-aco-fails.txt
index 7864a790e..e69de29bb 100644
--- a/lib/mesa/src/amd/ci/radv-polaris10-aco-fails.txt
+++ b/lib/mesa/src/amd/ci/radv-polaris10-aco-fails.txt
@@ -1,19 +0,0 @@
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_linear,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_linear_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_nearest,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.linear_nearest_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_linear,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_linear_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_nearest,Fail
-dEQP-VK.texture.mipmap.2d.image_view_min_lod.base_level.nearest_nearest_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_linear,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_linear_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_nearest,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.linear_nearest_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_linear,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_linear_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest,Fail
-dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest_integer_texel_coord,Fail
-dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.linear_nearest,Fail
-dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_linear,Fail
-dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_nearest,Fail
diff --git a/lib/mesa/src/amd/vulkan/radv_acceleration_structure.h b/lib/mesa/src/amd/vulkan/radv_acceleration_structure.h
index df738d89c..062edde50 100644
--- a/lib/mesa/src/amd/vulkan/radv_acceleration_structure.h
+++ b/lib/mesa/src/amd/vulkan/radv_acceleration_structure.h
@@ -24,21 +24,79 @@
#ifndef RADV_ACCELERATION_STRUCTURE_H
#define RADV_ACCELERATION_STRUCTURE_H
-#include "bvh/bvh.h"
+#include <stdint.h>
+#include <vulkan/vulkan.h>
-#include "radv_private.h"
+struct radv_accel_struct_serialization_header {
+ uint8_t driver_uuid[VK_UUID_SIZE];
+ uint8_t accel_struct_compat[VK_UUID_SIZE];
+ uint64_t serialization_size;
+ uint64_t compacted_size;
+ uint64_t instance_count;
+ uint64_t instances[];
+};
+
+struct radv_accel_struct_header {
+ uint32_t root_node_offset;
+ uint32_t reserved;
+ float aabb[2][3];
+
+ /* Everything after this gets updated/copied from the CPU. */
+ uint64_t compacted_size;
+ uint64_t serialization_size;
+ uint32_t copy_dispatch_size[3];
+ uint64_t instance_offset;
+ uint64_t instance_count;
+};
+
+struct radv_bvh_triangle_node {
+ float coords[3][3];
+ uint32_t reserved[3];
+ uint32_t triangle_id;
+ /* flags in upper 4 bits */
+ uint32_t geometry_id_and_flags;
+ uint32_t reserved2;
+ uint32_t id;
+};
-struct radv_acceleration_structure {
- struct vk_object_base base;
+struct radv_bvh_aabb_node {
+ float aabb[2][3];
+ uint32_t primitive_id;
+ /* flags in upper 4 bits */
+ uint32_t geometry_id_and_flags;
+ uint32_t reserved[8];
+};
+
+struct radv_bvh_instance_node {
+ uint64_t base_ptr;
+ /* lower 24 bits are the custom instance index, upper 8 bits are the visibility mask */
+ uint32_t custom_instance_and_mask;
+ /* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */
+ uint32_t sbt_offset_and_flags;
+
+ /* The translation component is actually a pre-translation instead of a post-translation. If you
+ * want to get a proper matrix out of it you need to apply the directional component of the
+ * matrix to it. The pre-translation of the world->object matrix is the same as the
+ * post-translation of the object->world matrix so this way we can share data between both
+ * matrices. */
+ float wto_matrix[12];
+ float aabb[2][3];
+ uint32_t instance_id;
- struct radeon_winsys_bo *bo;
- uint64_t mem_offset;
- uint64_t size;
- uint64_t va;
- VkAccelerationStructureTypeKHR type;
+ /* Object to world matrix transposed from the initial transform. Translate part is store in the
+ * wto_matrix. */
+ float otw_matrix[9];
};
-VK_DEFINE_NONDISP_HANDLE_CASTS(radv_acceleration_structure, base, VkAccelerationStructureKHR,
- VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
+struct radv_bvh_box16_node {
+ uint32_t children[4];
+ uint32_t coords[4][3];
+};
+
+struct radv_bvh_box32_node {
+ uint32_t children[4];
+ float coords[4][2][3];
+ uint32_t reserved[4];
+};
-#endif
+#endif \ No newline at end of file
diff --git a/lib/mesa/src/asahi/compiler/agx_uniforms.c b/lib/mesa/src/asahi/compiler/agx_uniforms.c
index bd8f0a1ed..f357b2f86 100644
--- a/lib/mesa/src/asahi/compiler/agx_uniforms.c
+++ b/lib/mesa/src/asahi/compiler/agx_uniforms.c
@@ -48,11 +48,8 @@ agx_indexed_sysval(agx_context *ctx, enum agx_push_type type,
/* Otherwise, push */
assert(ctx->out->push_ranges < AGX_MAX_PUSH_RANGES);
- ctx->out->push_count = ALIGN_POT(ctx->out->push_count, agx_size_align_16(size));
-
- unsigned base = ctx->out->push_count;
- ctx->out->push_count += length;
- assert(ctx->out->push_count <= AGX_NUM_UNIFORMS);
+ unsigned base = ctx->push_base;
+ ctx->push_base += length;
ctx->out->push[ctx->out->push_ranges++] = (struct agx_push) {
.type = type,
@@ -63,34 +60,3 @@ agx_indexed_sysval(agx_context *ctx, enum agx_push_type type,
return agx_uniform(base + index, size);
}
-
-agx_index
-agx_vbo_base(agx_context *ctx, unsigned vbo)
-{
- /* Check if we already pushed */
- for (unsigned i = 0; i < ctx->out->push_ranges; ++i) {
- struct agx_push push = ctx->out->push[i];
-
- if (push.type == AGX_PUSH_VBO_BASE && push.vbo == vbo) {
- return agx_uniform(push.base, AGX_SIZE_64);
- }
- }
-
- /* Otherwise, push */
- assert(ctx->out->push_ranges < AGX_MAX_PUSH_RANGES);
-
- ctx->out->push_count = ALIGN_POT(ctx->out->push_count, 4);
-
- unsigned base = ctx->out->push_count;
- ctx->out->push_count += 4;
- assert(ctx->out->push_count <= AGX_NUM_UNIFORMS);
-
- ctx->out->push[ctx->out->push_ranges++] = (struct agx_push) {
- .type = AGX_PUSH_VBO_BASE,
- .base = base,
- .length = 4,
- .vbo = vbo,
- };
-
- return agx_uniform(base, AGX_SIZE_64);
-}
diff --git a/lib/mesa/src/asahi/lib/io.h b/lib/mesa/src/asahi/lib/io.h
index 548df704a..bfac6fc42 100644
--- a/lib/mesa/src/asahi/lib/io.h
+++ b/lib/mesa/src/asahi/lib/io.h
@@ -32,19 +32,6 @@
#include <IOKit/IODataQueueClient.h>
#endif
-/*
- * This file contains necessary defines for the macOS (IOKit) interface to the
- * AGX accelerator, required to build a userspace graphics driver on macOS.
- *
- * They are not used under Linux.
- *
- * Information is this file was originally determined independently. More
- * recently, names have been augmented via the oob_timestamp code sample from
- * Project Zero [1]
- *
- * [1] https://bugs.chromium.org/p/project-zero/issues/detail?id=1986
- */
-
#define AGX_SERVICE_TYPE 0x100005
enum agx_selector {
@@ -60,7 +47,7 @@ enum agx_selector {
AGX_SELECTOR_FREE_NOTIFICATION_QUEUE = 0x12,
AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS = 0x1E,
AGX_SELECTOR_GET_VERSION = 0x23,
- AGX_NUM_SELECTORS = 0x32
+ AGX_NUM_SELECTORS = 0x30
};
static const char *selector_table[AGX_NUM_SELECTORS] = {
@@ -111,9 +98,7 @@ static const char *selector_table[AGX_NUM_SELECTORS] = {
"unk2C",
"unk2D",
"unk2E",
- "unk2F",
- "unk30",
- "unk31"
+ "unk2F"
};
static inline const char *
@@ -129,10 +114,9 @@ struct agx_create_command_queue_resp {
} __attribute__((packed));
struct agx_create_shmem_resp {
- /* IOAccelDeviceShmemData */
- void *map;
- uint32_t size;
- uint32_t id;
+ void *map;
+ uint32_t size;
+ uint32_t id;
} __attribute__((packed));
struct agx_create_notification_queue_resp {
@@ -146,18 +130,14 @@ struct agx_create_notification_queue_resp {
} __attribute__((packed));
struct agx_submit_cmdbuf_req {
- /* IOAccelCommandQueueSubmitArgs_Header */
- uint32_t unk0;
- uint32_t count;
-
- /* IOAccelCommandQueueSubmitArgs_Command */
- uint32_t command_buffer_shmem_id;
- uint32_t segment_list_shmem_id;
- uint64_t unk1B; // 0, new in 12.x
- uint64_t notify_1;
- uint64_t notify_2;
- uint32_t unk2;
- uint32_t unk3;
+ uint32_t unk0;
+ uint32_t unk1;
+ uint32_t cmdbuf;
+ uint32_t mappings;
+ void *user_0;
+ void *user_1;
+ uint32_t unk2;
+ uint32_t unk3;
} __attribute__((packed));
/* Memory allocation isn't really understood yet. By comparing SHADER/CMDBUF_32
@@ -187,58 +167,6 @@ agx_memory_type_name(uint32_t type)
}
}
-struct agx_allocate_resource_req {
- uint32_t unk0[5];
- uint32_t mode;
- uint32_t unk6[6];
- uint64_t cpu_fixed;
- uint64_t cpu_fixed_parent;
- uint32_t size;
- uint32_t unk17;
-
- /* Handle of the parent resource when a suballocation is requested.
- * Based on an assertion failure, this corresponds to:
- *
- * -[IOGPUMetalBuffer initWithPrimaryBuffer:heapIndex:bufferIndex:bufferOffset:length:args:argsSize:]
- */
- uint32_t parent;
-
- uint32_t unk19;
- uint32_t flags;
- uint32_t unk21[3];
-} __attribute__((packed));
-
-struct agx_allocate_resource_resp {
- /* Returned GPU virtual address */
- uint64_t gpu_va;
-
- /* Returned CPU virtual address */
- uint64_t cpu;
-
- uint32_t unk4[3];
-
- /* Handle used to identify the resource in the segment list */
- uint32_t handle;
-
- /* Size of the root resource from which we are allocated. If this is not a
- * suballocation, this is equal to the size.
- */
- uint64_t root_size;
-
- /* Globally unique identifier for the resource, shown in Instruments */
- uint32_t guid;
-
- uint32_t unk11[7];
-
- /* Maximum size of the suballocation. For a suballocation, this equals:
- *
- * sub_size = root_size - (sub_cpu - root_cpu)
- *
- * For root allocations, this equals the size.
- */
- uint64_t sub_size;
-} __attribute__((packed));
-
struct agx_notification_queue {
#ifdef __APPLE__
mach_port_t port;
@@ -255,29 +183,32 @@ struct agx_command_queue {
struct agx_notification_queue notif;
};
-struct agx_map_header {
- /* IOAccelSegmentListHeader */
- uint64_t cmdbuf_id; // GUID
- uint32_t segment_count;
- uint16_t length;
- uint16_t unk; // 0x8000
- uint64_t encoder_id; // GUID
+/* Not sure if this is hardware or software defined */
- /* IOAccelSegmentResourceListHeader */
- uint32_t kernel_commands_start_offset;
- uint32_t kernel_commands_end_offset;
- uint32_t padding[2];
- uint32_t total_resources;
- uint32_t resource_group_count;
+struct agx_map_header {
+ uint64_t cmdbuf_id; // GUID
+ uint32_t unk2; // 01 00 00 00
+ uint32_t unk3; // 28 05 00 80
+ uint64_t encoder_id; // GUID
+ uint32_t unk6; // 00 00 00 00
+ uint32_t cmdbuf_size;
+ uint32_t nr_handles;
+ uint32_t nr_entries;
+ uint32_t indices[6];
} __attribute__((packed));
-/* IOAccelSegmentResourceList_ResourceGroup */
struct agx_map_entry {
- uint32_t resource_id[6];
- uint32_t resource_unk[6];
- uint16_t resource_flags[6];
- uint16_t unka; // ff ff
- uint16_t resource_count;
+ uint32_t unkAAA; // 20 00 00 00
+ uint32_t unk2; // 00 00 00 00
+ uint32_t unk3; // 00 00 00 00
+ uint32_t unk4; // 00 00 00 00
+ uint32_t unk5; // 00 00 00 00
+ uint32_t unk6; // 00 00 00 00
+ uint32_t unkBBB; // 01 00 00 00
+ uint32_t unk8; // 00 00 00 00
+ uint32_t unk9; // 00 00 00 00
+ uint32_t unka; // ff ff 01 00
+ uint32_t indices[6];
} __attribute__((packed));
uint64_t
diff --git a/lib/mesa/src/compiler/isaspec/decode.c b/lib/mesa/src/compiler/isaspec/decode.c
index 127773d66..633ef2f76 100644
--- a/lib/mesa/src/compiler/isaspec/decode.c
+++ b/lib/mesa/src/compiler/isaspec/decode.c
@@ -560,14 +560,6 @@ display_field(struct decode_scope *scope, const char *field_name)
num_align = atoi(value);
}
- /* Special case ':algin=' should only do alignment */
- if (field_name == align) {
- while (scope->state->line_column < num_align)
- print(state, " ");
-
- return;
- }
-
/* Special case 'NAME' maps to instruction/bitset name: */
if (!strncmp("NAME", field_name, field_name_len)) {
if (options->field_cb) {
@@ -765,7 +757,6 @@ void
isa_decode(void *bin, int sz, FILE *out, const struct isa_decode_options *options)
{
const struct isa_decode_options default_options = {
- .gpu_id = options ? options->gpu_id : 0,
.branch_labels = options ? options->branch_labels : false
};
struct decode_state *state;
@@ -773,6 +764,8 @@ isa_decode(void *bin, int sz, FILE *out, const struct isa_decode_options *option
if (!options)
options = &default_options;
+ util_cpu_detect(); /* needed for _mesa_half_to_float() */
+
state = rzalloc_size(NULL, sizeof(*state));
state->options = options;
state->num_instr = sz / (BITMASK_WORDS * sizeof(BITSET_WORD));
diff --git a/lib/mesa/src/freedreno/computerator/a4xx.c b/lib/mesa/src/freedreno/computerator/a4xx.c
index a3a4c179d..0dbb30712 100644
--- a/lib/mesa/src/freedreno/computerator/a4xx.c
+++ b/lib/mesa/src/freedreno/computerator/a4xx.c
@@ -135,10 +135,10 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
OUT_PKT0(ring, REG_A4XX_HLSQ_CL_CONTROL_0, 2);
OUT_RING(ring, A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID(work_group_id) |
- A4XX_HLSQ_CL_CONTROL_0_KERNELDIMCONSTID(regid(63, 0)) |
+ A4XX_HLSQ_CL_CONTROL_0_UNK12CONSTID(regid(63, 0)) |
A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID(local_invocation_id));
OUT_RING(ring, A4XX_HLSQ_CL_CONTROL_1_UNK0CONSTID(regid(63, 0)) |
- A4XX_HLSQ_CL_CONTROL_1_WORKGROUPSIZECONSTID(regid(63, 0)));
+ A4XX_HLSQ_CL_CONTROL_1_UNK12CONSTID(regid(63, 0)));
OUT_PKT0(ring, REG_A4XX_HLSQ_CL_KERNEL_CONST, 1);
OUT_RING(ring, A4XX_HLSQ_CL_KERNEL_CONST_UNK0CONSTID(regid(63, 0)) |
@@ -161,7 +161,7 @@ emit_const(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t constid,
{
uint32_t align_sz;
- assert((constid % 4) == 0);
+ debug_assert((constid % 4) == 0);
/* Overwrite appropriate entries with buffer addresses */
struct fd_bo **replacements = calloc(sizedwords, sizeof(struct fd_bo *));
@@ -341,8 +341,7 @@ a4xx_init(struct fd_device *dev, const struct fd_dev_id *dev_id)
.emit_grid = a4xx_emit_grid,
};
- a4xx_backend->compiler = ir3_compiler_create(dev, dev_id,
- &(struct ir3_compiler_options) {});
+ a4xx_backend->compiler = ir3_compiler_create(dev, dev_id, false);
a4xx_backend->dev = dev;
return &a4xx_backend->base;
diff --git a/lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c b/lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c
index 6d9b9921e..7d13678c6 100644
--- a/lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c
+++ b/lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c
@@ -1,12 +1,30 @@
/*
* Copyright © 2021 Igalia S.L.
- * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
*/
-#include "tu_device.h"
+#include "tu_private.h"
#include "tu_perfetto.h"
-/* Including tu_device.h in tu_perfetto.cc doesn't work, so
+/* Including tu_private.h in tu_perfetto.cc doesn't work, so
* we need some helper methods to access tu_device.
*/
@@ -16,8 +34,15 @@ tu_device_get_perfetto_state(struct tu_device *dev)
return &dev->perfetto;
}
+int
+tu_device_get_timestamp(struct tu_device *dev,
+ uint64_t *ts)
+{
+ return tu_drm_get_timestamp(dev->physical_device, ts);
+}
+
uint32_t
-tu_u_trace_submission_data_get_submit_id(const struct tu_u_trace_submission_data *data)
+tu_u_trace_flush_data_get_submit_id(const struct tu_u_trace_flush_data *data)
{
return data->submission_id;
}
diff --git a/lib/mesa/src/gallium/drivers/asahi/magic.c b/lib/mesa/src/gallium/drivers/asahi/magic.c
index a5dfd62da..fa56ede4f 100644
--- a/lib/mesa/src/gallium/drivers/asahi/magic.c
+++ b/lib/mesa/src/gallium/drivers/asahi/magic.c
@@ -50,244 +50,109 @@ demo_zero(struct agx_pool *pool, unsigned count)
return ptr.gpu;
}
-static size_t
-asahi_size_resource(struct pipe_resource *prsrc, unsigned level)
-{
- struct agx_resource *rsrc = agx_resource(prsrc);
- size_t size = rsrc->layout.size_B;
-
- if (rsrc->separate_stencil)
- size += asahi_size_resource(&rsrc->separate_stencil->base, level);
-
- return size;
-}
-
-static size_t
-asahi_size_surface(struct pipe_surface *surf)
-{
- return asahi_size_resource(surf->texture, surf->u.tex.level);
-}
-
-static size_t
-asahi_size_attachments(struct pipe_framebuffer_state *framebuffer)
-{
- size_t sum = 0;
-
- for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i)
- sum += asahi_size_surface(framebuffer->cbufs[i]);
-
- if (framebuffer->zsbuf)
- sum += asahi_size_surface(framebuffer->zsbuf);
-
- return sum;
-}
-
-static enum agx_iogpu_attachment_type
-asahi_classify_attachment(enum pipe_format format)
-{
- const struct util_format_description *desc = util_format_description(format);
-
- if (util_format_has_depth(desc))
- return AGX_IOGPU_ATTACHMENT_TYPE_DEPTH;
- else if (util_format_has_stencil(desc))
- return AGX_IOGPU_ATTACHMENT_TYPE_STENCIL;
- else
- return AGX_IOGPU_ATTACHMENT_TYPE_COLOUR;
-}
-
-static uint64_t
-agx_map_surface_resource(struct pipe_surface *surf, struct agx_resource *rsrc)
-{
- return agx_map_texture_gpu(rsrc, surf->u.tex.first_layer);
-}
-
-static uint64_t
-agx_map_surface(struct pipe_surface *surf)
-{
- return agx_map_surface_resource(surf, agx_resource(surf->texture));
-}
-
-static void
-asahi_pack_iogpu_attachment(void *out, struct agx_resource *rsrc,
- unsigned total_size)
-{
- agx_pack(out, IOGPU_ATTACHMENT, cfg) {
- cfg.type = asahi_classify_attachment(rsrc->layout.format);
- cfg.address = rsrc->bo->ptr.gpu;
- cfg.size = rsrc->layout.size_B;
- cfg.percent = (100 * cfg.size) / total_size;
- }
-}
-
-static unsigned
-asahi_pack_iogpu_attachments(void *out, struct pipe_framebuffer_state *framebuffer)
-{
- unsigned total_attachment_size = asahi_size_attachments(framebuffer);
- struct agx_iogpu_attachment_packed *attachments = out;
- unsigned nr = 0;
-
- for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) {
- asahi_pack_iogpu_attachment(attachments + (nr++),
- agx_resource(framebuffer->cbufs[i]->texture),
- total_attachment_size);
- }
-
- if (framebuffer->zsbuf) {
- struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture);
-
- asahi_pack_iogpu_attachment(attachments + (nr++),
- rsrc, total_attachment_size);
-
- if (rsrc->separate_stencil) {
- asahi_pack_iogpu_attachment(attachments + (nr++),
- rsrc->separate_stencil,
- total_attachment_size);
- }
- }
-
- return nr;
-}
-
unsigned
demo_cmdbuf(uint64_t *buf, size_t size,
struct agx_pool *pool,
- struct pipe_framebuffer_state *framebuffer,
uint64_t encoder_ptr,
uint64_t encoder_id,
uint64_t scissor_ptr,
- uint64_t depth_bias_ptr,
+ unsigned width, unsigned height,
+ uint32_t pipeline_null,
uint32_t pipeline_clear,
- uint32_t pipeline_load,
uint32_t pipeline_store,
- bool clear_pipeline_textures,
- unsigned clear_buffers,
- double clear_depth,
- unsigned clear_stencil)
+ uint64_t rt0,
+ bool clear_pipeline_textures)
{
- bool should_clear_depth = clear_buffers & PIPE_CLEAR_DEPTH;
- bool should_clear_stencil = clear_buffers & PIPE_CLEAR_STENCIL;
-
uint32_t *map = (uint32_t *) buf;
- memset(map, 0, 518 * 4);
-
- uint64_t deflake_buffer = demo_zero(pool, 0x7e0);
- uint64_t deflake_1 = deflake_buffer + 0x2a0;
- uint64_t deflake_2 = deflake_buffer + 0x20;
-
- uint64_t unk_buffer_2 = demo_zero(pool, 0x8000);
-
- uint64_t depth_buffer = 0;
- uint64_t stencil_buffer = 0;
-
- agx_pack(map + 16, IOGPU_GRAPHICS, cfg) {
- cfg.opengl_depth_clipping = true;
-
- cfg.deflake_1 = deflake_1;
- cfg.deflake_2 = deflake_2;
- cfg.deflake_3 = deflake_buffer;
-
- cfg.clear_pipeline_bind = 0xffff8002 | (clear_pipeline_textures ? 0x210 : 0);
- cfg.clear_pipeline = pipeline_clear;
-
- /* store pipeline used when entire frame completes */
- cfg.store_pipeline_bind = 0x12;
- cfg.store_pipeline = pipeline_store;
- cfg.scissor_array = scissor_ptr;
- cfg.depth_bias_array = depth_bias_ptr;
-
- if (framebuffer->zsbuf) {
- struct pipe_surface *zsbuf = framebuffer->zsbuf;
- const struct util_format_description *desc =
- util_format_description(agx_resource(zsbuf->texture)->layout.format);
-
- assert(desc->format == PIPE_FORMAT_Z32_FLOAT ||
- desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
- desc->format == PIPE_FORMAT_S8_UINT);
-
- cfg.depth_width = framebuffer->width;
- cfg.depth_height = framebuffer->height;
-
- if (util_format_has_depth(desc)) {
- depth_buffer = agx_map_surface(zsbuf);
-
- cfg.zls_control.z_store_enable = true;
- cfg.zls_control.z_load_enable = !should_clear_depth;
- } else {
- stencil_buffer = agx_map_surface(zsbuf);
- cfg.zls_control.s_store_enable = true;
- cfg.zls_control.s_load_enable = !should_clear_stencil;
- }
+ memset(map, 0, 474 * 4);
- if (agx_resource(zsbuf->texture)->separate_stencil) {
- stencil_buffer = agx_map_surface_resource(zsbuf,
- agx_resource(zsbuf->texture)->separate_stencil);
+ map[54] = 0x6b0003;
+ map[55] = 0x3a0012;
+ map[56] = 1;
- cfg.zls_control.s_store_enable = true;
- cfg.zls_control.s_load_enable = !should_clear_stencil;
- }
+ map[106] = 1;
+ map[108] = 0x1c;
+ map[112] = 0xffffffff;
+ map[113] = 0xffffffff;
+ map[114] = 0xffffffff;
- /* It's unclear how tile size is conveyed for depth/stencil targets,
- * which interactions with mipmapping (for example of a 33x33
- * depth/stencil attachment)
- */
- if (zsbuf->u.tex.level != 0)
- unreachable("todo: mapping other levels");
-
- cfg.depth_buffer_1 = depth_buffer;
- cfg.depth_buffer_2 = depth_buffer;
-
- cfg.stencil_buffer_1 = stencil_buffer;
- cfg.stencil_buffer_2 = stencil_buffer;
- }
-
- cfg.width_1 = framebuffer->width;
- cfg.height_1 = framebuffer->height;
- cfg.pointer = unk_buffer_2;
-
- cfg.set_when_reloading_z_or_s_1 = clear_pipeline_textures;
-
- if (depth_buffer && !should_clear_depth) {
- cfg.set_when_reloading_z_or_s_1 = true;
- cfg.set_when_reloading_z_or_s_2 = true;
- }
-
- if (stencil_buffer && !should_clear_stencil) {
- cfg.set_when_reloading_z_or_s_1 = true;
- cfg.set_when_reloading_z_or_s_2 = true;
- }
-
- cfg.depth_clear_value = fui(clear_depth);
- cfg.stencil_clear_value = clear_stencil & 0xff;
-
- cfg.partial_reload_pipeline_bind = 0xffff8212;
- cfg.partial_reload_pipeline = pipeline_load;
-
- cfg.partial_store_pipeline_bind = 0x12;
- cfg.partial_store_pipeline = pipeline_store;
+ uint64_t unk_buffer = demo_zero(pool, 0x1000);
+ uint64_t unk_buffer_2 = demo_zero(pool, 0x8000);
- cfg.depth_buffer_3 = depth_buffer;
- cfg.stencil_buffer_3 = stencil_buffer;
- cfg.encoder_id = encoder_id;
- cfg.unknown_buffer = demo_unk6(pool);
- cfg.width_2 = framebuffer->width;
- cfg.height_2 = framebuffer->height;
- cfg.unk_352 = clear_pipeline_textures ? 0x0 : 0x1;
+ // This is a pipeline bind
+ map[156] = 0xffff8002 | (clear_pipeline_textures ? 0x210 : 0);
+ map[158] = pipeline_clear | 0x4;
+ map[163] = 0x12;
+ map[164] = pipeline_store | 0x4;
+ map[166] = scissor_ptr & 0xFFFFFFFF;
+ map[167] = scissor_ptr >> 32;
+ map[168] = unk_buffer & 0xFFFFFFFF;
+ map[169] = unk_buffer >> 32;
+
+ map[220] = 4;
+ map[222] = 0xc000;
+ map[224] = width;
+ map[225] = height;
+ map[226] = unk_buffer_2 & 0xFFFFFFFF;
+ map[227] = unk_buffer_2 >> 32;
+
+ float depth_clear = 1.0;
+ uint8_t stencil_clear = 0;
+
+ map[278] = fui(depth_clear);
+ map[279] = (0x3 << 8) | stencil_clear;
+ map[282] = 0x1000000;
+ map[284] = 0xffffffff;
+ map[285] = 0xffffffff;
+ map[286] = 0xffffffff;
+
+ map[298] = 0xffff8212;
+ map[300] = pipeline_null | 0x4;
+ map[305] = 0x12;
+ map[306] = pipeline_store | 0x4;
+ map[352] = 1;
+ map[360] = 0x1c;
+ map[362] = encoder_id;
+ map[365] = 0xffffffff;
+ map[366] = 1;
+
+ uint64_t unk6 = demo_unk6(pool);
+ map[370] = unk6 & 0xFFFFFFFF;
+ map[371] = unk6 >> 32;
+
+ map[374] = width;
+ map[375] = height;
+ map[376] = 1;
+ map[377] = 8;
+ map[378] = 8;
+
+ map[393] = 8;
+ map[394] = 32;
+ map[395] = 32;
+ map[396] = 1;
+
+ unsigned offset_unk = (458 * 4);
+ unsigned offset_attachments = (470 * 4);
+ unsigned nr_attachments = 1;
+
+ map[473] = nr_attachments;
+
+ /* A single attachment follows, depth/stencil have their own attachments */
+ agx_pack((map + (offset_attachments / 4) + 4), IOGPU_ATTACHMENT, cfg) {
+ cfg.address = rt0;
+ cfg.type = AGX_IOGPU_ATTACHMENT_TYPE_COLOUR;
+ cfg.unk_1 = 0x80000000;
+ cfg.unk_2 = 0x5;
+ cfg.bytes_per_pixel = 4;
+ cfg.percent = 100;
}
- unsigned offset_unk = (484 * 4);
- unsigned offset_attachments = (496 * 4);
-
- unsigned nr_attachments =
- asahi_pack_iogpu_attachments(map + (offset_attachments / 4) + 4,
- framebuffer);
-
- map[(offset_attachments / 4) + 3] = nr_attachments;
-
unsigned total_size = offset_attachments + (AGX_IOGPU_ATTACHMENT_LENGTH * nr_attachments) + 16;
agx_pack(map, IOGPU_HEADER, cfg) {
cfg.total_size = total_size;
- cfg.attachment_offset = offset_attachments;
+ cfg.attachment_offset_1 = offset_attachments;
+ cfg.attachment_offset_2 = offset_attachments;
cfg.attachment_length = nr_attachments * AGX_IOGPU_ATTACHMENT_LENGTH;
cfg.unknown_offset = offset_unk;
cfg.encoder = encoder_ptr;
@@ -299,23 +164,18 @@ demo_cmdbuf(uint64_t *buf, size_t size,
static struct agx_map_header
demo_map_header(uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size, unsigned count)
{
- /* Structure: header followed by resource groups. For now, we use a single
- * resource group for every resource. This could be optimized.
- */
- unsigned length = sizeof(struct agx_map_header);
- length += count * sizeof(struct agx_map_entry);
- assert(length < 0x10000);
-
return (struct agx_map_header) {
.cmdbuf_id = cmdbuf_id,
- .segment_count = 1,
- .length = length,
+ .unk2 = 0x1,
+ .unk3 = 0x528, // 1320
.encoder_id = encoder_id,
- .kernel_commands_start_offset = 0,
- .kernel_commands_end_offset = cmdbuf_size,
- .total_resources = count,
- .resource_group_count = count,
- .unk = 0x8000,
+ .unk6 = 0x0,
+ .cmdbuf_size = cmdbuf_size,
+
+ /* +1 for the sentinel ending */
+ .nr_entries = count + 1,
+ .nr_handles = count + 1,
+ .indices = {0x0b},
};
}
@@ -324,7 +184,7 @@ demo_mem_map(void *map, size_t size, unsigned *handles, unsigned count,
uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size)
{
struct agx_map_header *header = map;
- struct agx_map_entry *entries = (struct agx_map_entry *) (((uint8_t *) map) + sizeof(*header));
+ struct agx_map_entry *entries = (struct agx_map_entry *) (((uint8_t *) map) + 0x40);
struct agx_map_entry *end = (struct agx_map_entry *) (((uint8_t *) map) + size);
/* Header precedes the entry */
@@ -334,10 +194,18 @@ demo_mem_map(void *map, size_t size, unsigned *handles, unsigned count,
for (unsigned i = 0; i < count; ++i) {
assert((entries + i) < end);
entries[i] = (struct agx_map_entry) {
- .resource_id = { handles[i] },
- .resource_unk = { 0x20 },
- .resource_flags = { 0x1 },
- .resource_count = 1
+ .unkAAA = 0x20,
+ .unkBBB = 0x1,
+ .unka = 0x1ffff,
+ .indices = {handles[i]}
};
}
+
+ /* Final entry is a sentinel */
+ assert((entries + count) < end);
+ entries[count] = (struct agx_map_entry) {
+ .unkAAA = 0x40,
+ .unkBBB = 0x1,
+ .unka = 0x1ffff,
+ };
}
diff --git a/lib/mesa/src/gallium/drivers/asahi/magic.h b/lib/mesa/src/gallium/drivers/asahi/magic.h
index 0231afdc2..98215d367 100644
--- a/lib/mesa/src/gallium/drivers/asahi/magic.h
+++ b/lib/mesa/src/gallium/drivers/asahi/magic.h
@@ -27,18 +27,15 @@
unsigned
demo_cmdbuf(uint64_t *buf, size_t size,
struct agx_pool *pool,
- struct pipe_framebuffer_state *framebuffer,
uint64_t encoder_ptr,
uint64_t encoder_id,
uint64_t scissor_ptr,
- uint64_t depth_bias_ptr,
+ unsigned width, unsigned height,
+ uint32_t pipeline_null,
uint32_t pipeline_clear,
- uint32_t pipeline_load,
uint32_t pipeline_store,
- bool clear_pipeline_textures,
- unsigned clear_buffers,
- double clear_depth,
- unsigned clear_stencil);
+ uint64_t rt0,
+ bool clear_pipeline_textures);
void
demo_mem_map(void *map, size_t size, unsigned *handles,