summaryrefslogtreecommitdiff
path: root/lib/mesa/src/amd
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2018-10-23 05:53:16 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2018-10-23 05:53:16 +0000
commitaea331c3cb69ef2756ffceb05da8ace5e6287314 (patch)
tree2ec86f417bae9af0860d54ed89c822e701563645 /lib/mesa/src/amd
parentf6666e4c3977a5d74f3da7464672ea48e44dff4b (diff)
Import Mesa 17.3.9
Diffstat (limited to 'lib/mesa/src/amd')
-rw-r--r--lib/mesa/src/amd/common/ac_shader_abi.h101
-rw-r--r--lib/mesa/src/amd/vulkan/dev_icd.json.in2
-rw-r--r--lib/mesa/src/amd/vulkan/radv_debug.c736
-rw-r--r--lib/mesa/src/amd/vulkan/radv_extensions.c407
-rw-r--r--lib/mesa/src/amd/vulkan/radv_extensions.py278
-rw-r--r--lib/mesa/src/amd/vulkan/radv_pass.c25
-rw-r--r--lib/mesa/src/amd/vulkan/radv_shader.c671
-rw-r--r--lib/mesa/src/amd/vulkan/radv_shader.h123
8 files changed, 2341 insertions, 2 deletions
diff --git a/lib/mesa/src/amd/common/ac_shader_abi.h b/lib/mesa/src/amd/common/ac_shader_abi.h
new file mode 100644
index 000000000..b04dc076d
--- /dev/null
+++ b/lib/mesa/src/amd/common/ac_shader_abi.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AC_SHADER_ABI_H
+#define AC_SHADER_ABI_H
+
+#include <llvm-c/Core.h>
+
+enum ac_descriptor_type {
+ AC_DESC_IMAGE,
+ AC_DESC_FMASK,
+ AC_DESC_SAMPLER,
+ AC_DESC_BUFFER,
+};
+
+/* Document the shader ABI during compilation. This is what allows radeonsi and
+ * radv to share a compiler backend.
+ */
+struct ac_shader_abi {
+ LLVMValueRef base_vertex;
+ LLVMValueRef start_instance;
+ LLVMValueRef draw_id;
+ LLVMValueRef vertex_id;
+ LLVMValueRef instance_id;
+ LLVMValueRef frag_pos[4];
+ LLVMValueRef front_face;
+ LLVMValueRef ancillary;
+ LLVMValueRef sample_coverage;
+
+ /* For VS and PS: pre-loaded shader inputs.
+ *
+ * Currently only used for NIR shaders; indexed by variables'
+ * driver_location.
+ */
+ LLVMValueRef *inputs;
+
+ void (*emit_outputs)(struct ac_shader_abi *abi,
+ unsigned max_outputs,
+ LLVMValueRef *addrs);
+
+ LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
+
+ /**
+ * Load the descriptor for the given buffer.
+ *
+ * \param buffer the buffer as presented in NIR: this is the descriptor
+ * in Vulkan, and the buffer index in OpenGL/Gallium
+ * \param write whether buffer contents will be written
+ */
+ LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
+ LLVMValueRef buffer, bool write);
+
+ /**
+ * Load a descriptor associated to a sampler.
+ *
+ * \param descriptor_set the descriptor set index (only for Vulkan)
+ * \param base_index the base index of the sampler variable
+ * \param constant_index constant part of an array index (or 0, if the
+ * sampler variable is not an array)
+ * \param index non-constant part of an array index (may be NULL)
+ * \param desc_type the type of descriptor to load
+ * \param image whether the descriptor is loaded for an image operation
+ */
+ LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi,
+ unsigned descriptor_set,
+ unsigned base_index,
+ unsigned constant_index,
+ LLVMValueRef index,
+ enum ac_descriptor_type desc_type,
+ bool image, bool write);
+
+ /* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
+ * uses it due to promoting D16 to D32, but radv needs it off. */
+ bool clamp_shadow_reference;
+
+ /* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
+ * and LLVM optimizes an indexed load with constant index to IDXEN=0. */
+ bool gfx9_stride_size_workaround;
+};
+
+#endif /* AC_SHADER_ABI_H */
diff --git a/lib/mesa/src/amd/vulkan/dev_icd.json.in b/lib/mesa/src/amd/vulkan/dev_icd.json.in
index f726df02a..cc80641f5 100644
--- a/lib/mesa/src/amd/vulkan/dev_icd.json.in
+++ b/lib/mesa/src/amd/vulkan/dev_icd.json.in
@@ -1,7 +1,7 @@
{
"file_format_version": "1.0.0",
"ICD": {
- "library_path": "@build_libdir@/libvulkan_radeon.so",
+ "library_path": "@libvulkan_radeon_path@",
"api_version": "1.0.3"
}
}
diff --git a/lib/mesa/src/amd/vulkan/radv_debug.c b/lib/mesa/src/amd/vulkan/radv_debug.c
new file mode 100644
index 000000000..b69c05b64
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_debug.c
@@ -0,0 +1,736 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/utsname.h>
+
+#include "sid.h"
+#include "gfx9d.h"
+#include "ac_debug.h"
+#include "radv_debug.h"
+#include "radv_shader.h"
+
+#define TRACE_BO_SIZE 4096
+
+#define COLOR_RESET "\033[0m"
+#define COLOR_RED "\033[31m"
+#define COLOR_GREEN "\033[1;32m"
+#define COLOR_YELLOW "\033[1;33m"
+#define COLOR_CYAN "\033[1;36m"
+
+/* Trace BO layout (offsets are 4 bytes):
+ *
+ * [0]: primary trace ID
+ * [1]: secondary trace ID
+ * [2-3]: 64-bit GFX pipeline pointer
+ * [4-5]: 64-bit COMPUTE pipeline pointer
+ * [6-7]: 64-bit descriptor set #0 pointer
+ * ...
+ * [68-69]: 64-bit descriptor set #31 pointer
+ */
+
+bool
+radv_init_trace(struct radv_device *device)
+{
+ struct radeon_winsys *ws = device->ws;
+
+ device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
+ RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_CPU_ACCESS);
+ if (!device->trace_bo)
+ return false;
+
+ device->trace_id_ptr = ws->buffer_map(device->trace_bo);
+ if (!device->trace_id_ptr)
+ return false;
+
+ memset(device->trace_id_ptr, 0, TRACE_BO_SIZE);
+
+ ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
+ &device->dmesg_timestamp, NULL);
+
+ return true;
+}
+
+static void
+radv_dump_trace(struct radv_device *device, struct radeon_winsys_cs *cs)
+{
+ const char *filename = getenv("RADV_TRACE_FILE");
+ FILE *f = fopen(filename, "w");
+
+ if (!f) {
+ fprintf(stderr, "Failed to write trace dump to %s\n", filename);
+ return;
+ }
+
+ fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
+ device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2);
+ fclose(f);
+}
+
+static void
+radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
+{
+ struct radeon_winsys *ws = device->ws;
+ uint32_t value;
+
+ if (ws->read_registers(ws, offset, 1, &value))
+ ac_dump_reg(f, device->physical_device->rad_info.chip_class,
+ offset, value, ~0);
+}
+
+static void
+radv_dump_debug_registers(struct radv_device *device, FILE *f)
+{
+ struct radeon_info *info = &device->physical_device->rad_info;
+
+ if (info->drm_major == 2 && info->drm_minor < 42)
+ return; /* no radeon support */
+
+ fprintf(f, "Memory-mapped registers:\n");
+ radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
+
+ /* No other registers can be read on DRM < 3.1.0. */
+ if (info->drm_major < 3 || info->drm_minor < 1) {
+ fprintf(f, "\n");
+ return;
+ }
+
+ radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
+ radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
+ radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
+ radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
+ radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
+ radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
+ radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
+ if (info->chip_class <= VI) {
+ radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
+ radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
+ radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
+ }
+ radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
+ radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
+ radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
+ radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
+ radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
+ radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
+ radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
+ radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
+ radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
+ radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
+ fprintf(f, "\n");
+}
+
+static const char *
+radv_get_descriptor_name(enum VkDescriptorType type)
+{
+ switch (type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ return "SAMPLER";
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ return "COMBINED_IMAGE_SAMPLER";
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ return "SAMPLED_IMAGE";
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ return "STORAGE_IMAGE";
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ return "UNIFORM_TEXEL_BUFFER";
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ return "STORAGE_TEXEL_BUFFER";
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ return "UNIFORM_BUFFER";
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ return "STORAGE_BUFFER";
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ return "UNIFORM_BUFFER_DYNAMIC";
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ return "STORAGE_BUFFER_DYNAMIC";
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ return "INPUT_ATTACHMENT";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static void
+radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc,
+ FILE *f)
+{
+ fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
+ for (unsigned j = 0; j < 4; j++)
+ ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4,
+ desc[j], 0xffffffff);
+}
+
+static void
+radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc,
+ FILE *f)
+{
+ fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n");
+ for (unsigned j = 0; j < 8; j++)
+ ac_dump_reg(f, chip_class, R_008F10_SQ_IMG_RSRC_WORD0 + j * 4,
+ desc[j], 0xffffffff);
+
+ fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
+ for (unsigned j = 0; j < 8; j++)
+ ac_dump_reg(f, chip_class, R_008F10_SQ_IMG_RSRC_WORD0 + j * 4,
+ desc[8 + j], 0xffffffff);
+}
+
+static void
+radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
+ FILE *f)
+{
+ fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
+ for (unsigned j = 0; j < 4; j++) {
+ ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4,
+ desc[j], 0xffffffff);
+ }
+}
+
+static void
+radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class,
+ const uint32_t *desc, FILE *f)
+{
+ radv_dump_image_descriptor(chip_class, desc, f);
+ radv_dump_sampler_descriptor(chip_class, desc + 16, f);
+}
+
+static void
+radv_dump_descriptor_set(enum chip_class chip_class,
+ struct radv_descriptor_set *set, unsigned id, FILE *f)
+{
+ const struct radv_descriptor_set_layout *layout;
+ int i;
+
+ if (!set)
+ return;
+ layout = set->layout;
+
+ fprintf(f, "** descriptor set (%d) **\n", id);
+ fprintf(f, "va: 0x%"PRIx64"\n", set->va);
+ fprintf(f, "size: %d\n", set->size);
+ fprintf(f, "mapped_ptr:\n");
+
+ for (i = 0; i < set->size / 4; i++) {
+ fprintf(f, "\t[0x%x] = 0x%08x\n", i, set->mapped_ptr[i]);
+ }
+ fprintf(f, "\n");
+
+ fprintf(f, "\t*** layout ***\n");
+ fprintf(f, "\tbinding_count: %d\n", layout->binding_count);
+ fprintf(f, "\tsize: %d\n", layout->size);
+ fprintf(f, "\tshader_stages: %x\n", layout->shader_stages);
+ fprintf(f, "\tdynamic_shader_stages: %x\n",
+ layout->dynamic_shader_stages);
+ fprintf(f, "\tbuffer_count: %d\n", layout->buffer_count);
+ fprintf(f, "\tdynamic_offset_count: %d\n",
+ layout->dynamic_offset_count);
+ fprintf(f, "\n");
+
+ for (i = 0; i < set->layout->binding_count; i++) {
+ uint32_t *desc =
+ set->mapped_ptr + layout->binding[i].offset / 4;
+
+ fprintf(f, "\t\t**** binding layout (%d) ****\n", i);
+ fprintf(f, "\t\ttype: %s\n",
+ radv_get_descriptor_name(layout->binding[i].type));
+ fprintf(f, "\t\tarray_size: %d\n",
+ layout->binding[i].array_size);
+ fprintf(f, "\t\toffset: %d\n",
+ layout->binding[i].offset);
+ fprintf(f, "\t\tbuffer_offset: %d\n",
+ layout->binding[i].buffer_offset);
+ fprintf(f, "\t\tdynamic_offset_offset: %d\n",
+ layout->binding[i].dynamic_offset_offset);
+ fprintf(f, "\t\tdynamic_offset_count: %d\n",
+ layout->binding[i].dynamic_offset_count);
+ fprintf(f, "\t\tsize: %d\n",
+ layout->binding[i].size);
+ fprintf(f, "\t\timmutable_samplers_offset: %d\n",
+ layout->binding[i].immutable_samplers_offset);
+ fprintf(f, "\t\timmutable_samplers_equal: %d\n",
+ layout->binding[i].immutable_samplers_equal);
+ fprintf(f, "\n");
+
+ switch (layout->binding[i].type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ radv_dump_buffer_descriptor(chip_class, desc, f);
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ radv_dump_image_descriptor(chip_class, desc, f);
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ radv_dump_sampler_descriptor(chip_class, desc, f);
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ /* todo */
+ break;
+ default:
+ assert(!"unknown descriptor type");
+ break;
+ }
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n\n");
+}
+
+static void
+radv_dump_descriptors(struct radv_pipeline *pipeline, FILE *f)
+{
+ struct radv_device *device = pipeline->device;
+ enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+ uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+ int i;
+
+ fprintf(f, "List of descriptors:\n");
+ for (i = 0; i < MAX_SETS; i++) {
+ struct radv_descriptor_set *set =
+ (struct radv_descriptor_set *)ptr[i + 3];
+
+ radv_dump_descriptor_set(chip_class, set, i, f);
+ }
+}
+
+struct radv_shader_inst {
+ char text[160]; /* one disasm line */
+ unsigned offset; /* instruction offset */
+ unsigned size; /* instruction size = 4 or 8 */
+};
+
+/* Split a disassembly string into lines and add them to the array pointed
+ * to by "instructions". */
+static void si_add_split_disasm(const char *disasm,
+ uint64_t start_addr,
+ unsigned *num,
+ struct radv_shader_inst *instructions)
+{
+ struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
+ char *next;
+
+ while ((next = strchr(disasm, '\n'))) {
+ struct radv_shader_inst *inst = &instructions[*num];
+ unsigned len = next - disasm;
+
+ assert(len < ARRAY_SIZE(inst->text));
+ memcpy(inst->text, disasm, len);
+ inst->text[len] = 0;
+ inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
+
+ const char *semicolon = strchr(disasm, ';');
+ assert(semicolon);
+ /* More than 16 chars after ";" means the instruction is 8 bytes long. */
+ inst->size = next - semicolon > 16 ? 8 : 4;
+
+ snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
+ " [PC=0x%"PRIx64", off=%u, size=%u]",
+ start_addr + inst->offset, inst->offset, inst->size);
+
+ last_inst = inst;
+ (*num)++;
+ disasm = next + 1;
+ }
+}
+
+static void
+radv_dump_annotated_shader(struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader,
+ gl_shader_stage stage,
+ struct ac_wave_info *waves, unsigned num_waves,
+ FILE *f)
+{
+ uint64_t start_addr, end_addr;
+ unsigned i;
+
+ if (!shader)
+ return;
+
+ start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+ end_addr = start_addr + shader->code_size;
+
+ /* See if any wave executes the shader. */
+ for (i = 0; i < num_waves; i++) {
+ if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
+ break;
+ }
+
+ if (i == num_waves)
+ return; /* the shader is not being executed */
+
+ /* Remember the first found wave. The waves are sorted according to PC. */
+ waves = &waves[i];
+ num_waves -= i;
+
+ /* Get the list of instructions.
+ * Buffer size / 4 is the upper bound of the instruction count.
+ */
+ unsigned num_inst = 0;
+ struct radv_shader_inst *instructions =
+ calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
+
+ si_add_split_disasm(shader->disasm_string,
+ start_addr, &num_inst, instructions);
+
+ fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
+ radv_get_shader_name(shader, stage));
+
+ /* Print instructions with annotations. */
+ for (i = 0; i < num_inst; i++) {
+ struct radv_shader_inst *inst = &instructions[i];
+
+ fprintf(f, "%s\n", inst->text);
+
+ /* Print which waves execute the instruction right now. */
+ while (num_waves && start_addr + inst->offset == waves->pc) {
+ fprintf(f,
+ " " COLOR_GREEN "^ SE%u SH%u CU%u "
+ "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
+ waves->se, waves->sh, waves->cu, waves->simd,
+ waves->wave, waves->exec);
+
+ if (inst->size == 4) {
+ fprintf(f, "INST32=%08X" COLOR_RESET "\n",
+ waves->inst_dw0);
+ } else {
+ fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
+ waves->inst_dw0, waves->inst_dw1);
+ }
+
+ waves->matched = true;
+ waves = &waves[1];
+ num_waves--;
+ }
+ }
+
+ fprintf(f, "\n\n");
+ free(instructions);
+}
+
+static void
+radv_dump_annotated_shaders(struct radv_pipeline *pipeline,
+ struct radv_shader_variant *compute_shader,
+ FILE *f)
+{
+ struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
+ unsigned num_waves = ac_get_wave_info(waves);
+ unsigned mask;
+
+ fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
+ "\n\n", num_waves);
+
+ /* Dump annotated active graphics shaders. */
+ mask = pipeline->active_stages;
+ while (mask) {
+ int stage = u_bit_scan(&mask);
+
+ radv_dump_annotated_shader(pipeline, pipeline->shaders[stage],
+ stage, waves, num_waves, f);
+ }
+
+ radv_dump_annotated_shader(pipeline, compute_shader,
+ MESA_SHADER_COMPUTE, waves, num_waves, f);
+
+ /* Print waves executing shaders that are not currently bound. */
+ unsigned i;
+ bool found = false;
+ for (i = 0; i < num_waves; i++) {
+ if (waves[i].matched)
+ continue;
+
+ if (!found) {
+ fprintf(f, COLOR_CYAN
+ "Waves not executing currently-bound shaders:"
+ COLOR_RESET "\n");
+ found = true;
+ }
+ fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
+ " INST=%08X %08X PC=%"PRIx64"\n",
+ waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
+ waves[i].wave, waves[i].exec, waves[i].inst_dw0,
+ waves[i].inst_dw1, waves[i].pc);
+ }
+ if (found)
+ fprintf(f, "\n\n");
+}
+
+static void
+radv_dump_shader(struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader, gl_shader_stage stage,
+ FILE *f)
+{
+ if (!shader)
+ return;
+
+ fprintf(f, "%s:\n\n", radv_get_shader_name(shader, stage));
+
+ if (shader->spirv) {
+ fprintf(f, "SPIRV:\n");
+ radv_print_spirv(shader->spirv, shader->spirv_size, f);
+ }
+
+ if (shader->nir) {
+ fprintf(f, "NIR:\n");
+ nir_print_shader(shader->nir, f);
+ }
+
+ fprintf(stderr, "DISASM:\n%s\n", shader->disasm_string);
+
+ radv_shader_dump_stats(pipeline->device, shader, stage, f);
+}
+
+static void
+radv_dump_shaders(struct radv_pipeline *pipeline,
+ struct radv_shader_variant *compute_shader, FILE *f)
+{
+ unsigned mask;
+
+ /* Dump active graphics shaders. */
+ mask = pipeline->active_stages;
+ while (mask) {
+ int stage = u_bit_scan(&mask);
+
+ radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f);
+ }
+
+ radv_dump_shader(pipeline, compute_shader, MESA_SHADER_COMPUTE, f);
+}
+
+static void
+radv_dump_graphics_state(struct radv_pipeline *graphics_pipeline,
+ struct radv_pipeline *compute_pipeline, FILE *f)
+{
+ struct radv_shader_variant *compute_shader =
+ compute_pipeline ? compute_pipeline->shaders[MESA_SHADER_COMPUTE] : NULL;
+
+ if (!graphics_pipeline)
+ return;
+
+ radv_dump_shaders(graphics_pipeline, compute_shader, f);
+ radv_dump_annotated_shaders(graphics_pipeline, compute_shader, f);
+ radv_dump_descriptors(graphics_pipeline, f);
+}
+
+static void
+radv_dump_compute_state(struct radv_pipeline *compute_pipeline, FILE *f)
+{
+ if (!compute_pipeline)
+ return;
+
+ radv_dump_shaders(compute_pipeline,
+ compute_pipeline->shaders[MESA_SHADER_COMPUTE], f);
+ radv_dump_annotated_shaders(compute_pipeline,
+ compute_pipeline->shaders[MESA_SHADER_COMPUTE],
+ f);
+ radv_dump_descriptors(compute_pipeline, f);
+}
+
+static struct radv_pipeline *
+radv_get_saved_graphics_pipeline(struct radv_device *device)
+{
+ uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+
+ return (struct radv_pipeline *)ptr[1];
+}
+
+static struct radv_pipeline *
+radv_get_saved_compute_pipeline(struct radv_device *device)
+{
+ uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+
+ return (struct radv_pipeline *)ptr[2];
+}
+
+static void
+radv_dump_dmesg(FILE *f)
+{
+ char line[2000];
+ FILE *p;
+
+ p = popen("dmesg | tail -n60", "r");
+ if (!p)
+ return;
+
+ fprintf(f, "\nLast 60 lines of dmesg:\n\n");
+ while (fgets(line, sizeof(line), p))
+ fputs(line, f);
+ fprintf(f, "\n");
+
+ pclose(p);
+}
+
+static void
+radv_dump_enabled_options(struct radv_device *device, FILE *f)
+{
+ uint64_t mask;
+
+ fprintf(f, "Enabled debug options: ");
+
+ mask = device->instance->debug_flags;
+ while (mask) {
+ int i = u_bit_scan64(&mask);
+ fprintf(f, "%s, ", radv_get_debug_option_name(i));
+ }
+ fprintf(f, "\n");
+
+ fprintf(f, "Enabled perftest options: ");
+
+ mask = device->instance->perftest_flags;
+ while (mask) {
+ int i = u_bit_scan64(&mask);
+ fprintf(f, "%s, ", radv_get_perftest_option_name(i));
+ }
+ fprintf(f, "\n");
+}
+
+static void
+radv_dump_device_name(struct radv_device *device, FILE *f)
+{
+ struct radeon_info *info = &device->physical_device->rad_info;
+ char llvm_string[32] = {}, kernel_version[128] = {};
+ struct utsname uname_data;
+ const char *chip_name;
+
+ chip_name = device->ws->get_chip_name(device->ws);
+
+ if (uname(&uname_data) == 0)
+ snprintf(kernel_version, sizeof(kernel_version),
+ " / %s", uname_data.release);
+
+ if (HAVE_LLVM > 0) {
+ snprintf(llvm_string, sizeof(llvm_string),
+ ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
+ HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+ }
+
+ fprintf(f, "Device name: %s (%s DRM %i.%i.%i%s%s)\n\n",
+ chip_name, device->physical_device->name,
+ info->drm_major, info->drm_minor, info->drm_patchlevel,
+ kernel_version, llvm_string);
+}
+
+static bool
+radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
+{
+ struct radeon_winsys *ws = queue->device->ws;
+
+ if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx))
+ return true;
+
+ return false;
+}
+
+void
+radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_winsys_cs *cs)
+{
+ struct radv_pipeline *graphics_pipeline, *compute_pipeline;
+ struct radv_device *device = queue->device;
+ enum ring_type ring;
+ uint64_t addr;
+
+ ring = radv_queue_family_to_ring(queue->queue_family_index);
+
+ bool hang_occurred = radv_gpu_hang_occured(queue, ring);
+ bool vm_fault_occurred = false;
+ if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
+ vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
+ &device->dmesg_timestamp, &addr);
+ if (!hang_occurred && !vm_fault_occurred)
+ return;
+
+ graphics_pipeline = radv_get_saved_graphics_pipeline(device);
+ compute_pipeline = radv_get_saved_compute_pipeline(device);
+
+ fprintf(stderr, "GPU hang report:\n\n");
+ radv_dump_device_name(device, stderr);
+
+ radv_dump_enabled_options(device, stderr);
+ radv_dump_dmesg(stderr);
+
+ if (vm_fault_occurred) {
+ fprintf(stderr, "VM fault report.\n\n");
+ fprintf(stderr, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
+ }
+
+ radv_dump_debug_registers(device, stderr);
+
+ switch (ring) {
+ case RING_GFX:
+ radv_dump_graphics_state(graphics_pipeline, compute_pipeline,
+ stderr);
+ break;
+ case RING_COMPUTE:
+ radv_dump_compute_state(compute_pipeline, stderr);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ radv_dump_trace(queue->device, cs);
+ abort();
+}
+
+void
+radv_print_spirv(uint32_t *data, uint32_t size, FILE *fp)
+{
+ char path[] = "/tmp/fileXXXXXX";
+ char line[2048], command[128];
+ FILE *p;
+ int fd;
+
+ /* Dump the binary into a temporary file. */
+ fd = mkstemp(path);
+ if (fd < 0)
+ return;
+
+ if (write(fd, data, size) == -1)
+ goto fail;
+
+ sprintf(command, "spirv-dis %s", path);
+
+ /* Disassemble using spirv-dis if installed. */
+ p = popen(command, "r");
+ if (p) {
+ while (fgets(line, sizeof(line), p))
+ fprintf(fp, "%s", line);
+ pclose(p);
+ }
+
+fail:
+ close(fd);
+ unlink(path);
+}
diff --git a/lib/mesa/src/amd/vulkan/radv_extensions.c b/lib/mesa/src/amd/vulkan/radv_extensions.c
new file mode 100644
index 000000000..f9268dfbe
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_extensions.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "radv_private.h"
+
+#include "vk_util.h"
+
+/* Convert the VK_USE_PLATFORM_* defines to booleans */
+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+# undef VK_USE_PLATFORM_ANDROID_KHR
+# define VK_USE_PLATFORM_ANDROID_KHR true
+#else
+# define VK_USE_PLATFORM_ANDROID_KHR false
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+# undef VK_USE_PLATFORM_WAYLAND_KHR
+# define VK_USE_PLATFORM_WAYLAND_KHR true
+#else
+# define VK_USE_PLATFORM_WAYLAND_KHR false
+#endif
+#ifdef VK_USE_PLATFORM_XCB_KHR
+# undef VK_USE_PLATFORM_XCB_KHR
+# define VK_USE_PLATFORM_XCB_KHR true
+#else
+# define VK_USE_PLATFORM_XCB_KHR false
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+# undef VK_USE_PLATFORM_XLIB_KHR
+# define VK_USE_PLATFORM_XLIB_KHR true
+#else
+# define VK_USE_PLATFORM_XLIB_KHR false
+#endif
+
+/* And ANDROID too */
+#ifdef ANDROID
+# undef ANDROID
+# define ANDROID true
+#else
+# define ANDROID false
+#endif
+
+#define RADV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || VK_USE_PLATFORM_XCB_KHR || VK_USE_PLATFORM_XLIB_KHR)
+
+bool
+radv_instance_extension_supported(const char *name)
+{
+ if (strcmp(name, "VK_KHR_external_memory_capabilities") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_semaphore_capabilities") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_get_physical_device_properties2") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_surface") == 0)
+ return RADV_HAS_SURFACE;
+ if (strcmp(name, "VK_KHR_wayland_surface") == 0)
+ return VK_USE_PLATFORM_WAYLAND_KHR;
+ if (strcmp(name, "VK_KHR_xcb_surface") == 0)
+ return VK_USE_PLATFORM_XCB_KHR;
+ if (strcmp(name, "VK_KHR_xlib_surface") == 0)
+ return VK_USE_PLATFORM_XLIB_KHR;
+ return false;
+}
+
+VkResult radv_EnumerateInstanceExtensionProperties(
+ const char* pLayerName,
+ uint32_t* pPropertyCount,
+ VkExtensionProperties* pProperties)
+{
+ VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_memory_capabilities",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_semaphore_capabilities",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_get_physical_device_properties2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (RADV_HAS_SURFACE) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_surface",
+ .specVersion = 25,
+ };
+ }
+ }
+ if (VK_USE_PLATFORM_WAYLAND_KHR) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_wayland_surface",
+ .specVersion = 6,
+ };
+ }
+ }
+ if (VK_USE_PLATFORM_XCB_KHR) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_xcb_surface",
+ .specVersion = 6,
+ };
+ }
+ }
+ if (VK_USE_PLATFORM_XLIB_KHR) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_xlib_surface",
+ .specVersion = 6,
+ };
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
+
+uint32_t
+radv_physical_device_api_version(struct radv_physical_device *dev)
+{
+ return VK_MAKE_VERSION(1, 0, 57);
+}
+
+bool
+radv_physical_device_extension_supported(struct radv_physical_device *device,
+ const char *name)
+{
+ if (strcmp(name, "VK_KHR_bind_memory2") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_dedicated_allocation") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_descriptor_update_template") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_memory") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_memory_fd") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_external_semaphore") == 0)
+ return device->rad_info.has_syncobj;
+ if (strcmp(name, "VK_KHR_external_semaphore_fd") == 0)
+ return device->rad_info.has_syncobj;
+ if (strcmp(name, "VK_KHR_get_memory_requirements2") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_image_format_list") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_incremental_present") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_maintenance1") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_maintenance2") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_push_descriptor") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_relaxed_block_layout") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_sampler_mirror_clamp_to_edge") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_shader_draw_parameters") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_storage_buffer_storage_class") == 0)
+ return true;
+ if (strcmp(name, "VK_KHR_swapchain") == 0)
+ return RADV_HAS_SURFACE;
+ if (strcmp(name, "VK_KHR_variable_pointers") == 0)
+ return true;
+ if (strcmp(name, "VK_KHX_multiview") == 0)
+ return false;
+ if (strcmp(name, "VK_EXT_global_priority") == 0)
+ return device->rad_info.has_ctx_priority;
+ if (strcmp(name, "VK_AMD_draw_indirect_count") == 0)
+ return true;
+ if (strcmp(name, "VK_AMD_rasterization_order") == 0)
+ return device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2;
+ return false;
+}
+
+VkResult radv_EnumerateDeviceExtensionProperties(
+ VkPhysicalDevice physicalDevice,
+ const char* pLayerName,
+ uint32_t* pPropertyCount,
+ VkExtensionProperties* pProperties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+ (void)device;
+
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_bind_memory2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_dedicated_allocation",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_descriptor_update_template",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_memory",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_memory_fd",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (device->rad_info.has_syncobj) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_semaphore",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (device->rad_info.has_syncobj) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_external_semaphore_fd",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_get_memory_requirements2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_image_format_list",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_incremental_present",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_maintenance1",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_maintenance2",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_push_descriptor",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_relaxed_block_layout",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_sampler_mirror_clamp_to_edge",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_shader_draw_parameters",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_storage_buffer_storage_class",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (RADV_HAS_SURFACE) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_swapchain",
+ .specVersion = 68,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHR_variable_pointers",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (false) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_KHX_multiview",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (device->rad_info.has_ctx_priority) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_EXT_global_priority",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (true) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_AMD_draw_indirect_count",
+ .specVersion = 1,
+ };
+ }
+ }
+ if (device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "VK_AMD_rasterization_order",
+ .specVersion = 1,
+ };
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
diff --git a/lib/mesa/src/amd/vulkan/radv_extensions.py b/lib/mesa/src/amd/vulkan/radv_extensions.py
new file mode 100644
index 000000000..43c0fa740
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_extensions.py
@@ -0,0 +1,278 @@
+COPYRIGHT = """\
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+"""
+
+import argparse
+import copy
+import re
+import xml.etree.cElementTree as et
+
+from mako.template import Template
+
+MAX_API_VERSION = '1.0.57'
+
+class Extension:
+ def __init__(self, name, ext_version, enable):
+ self.name = name
+ self.ext_version = int(ext_version)
+ if enable is True:
+ self.enable = 'true';
+ elif enable is False:
+ self.enable = 'false';
+ else:
+ self.enable = enable;
+
+# On Android, we disable all surface and swapchain extensions. Android's Vulkan
+# loader implements VK_KHR_surface and VK_KHR_swapchain, and applications
+# cannot access the driver's implementation. Moreoever, if the driver exposes
+# the those extension strings, then tests dEQP-VK.api.info.instance.extensions
+# and dEQP-VK.api.info.device fail due to the duplicated strings.
+EXTENSIONS = [
+ Extension('VK_KHR_bind_memory2', 1, True),
+ Extension('VK_KHR_dedicated_allocation', 1, True),
+ Extension('VK_KHR_descriptor_update_template', 1, True),
+ Extension('VK_KHR_external_memory', 1, True),
+ Extension('VK_KHR_external_memory_capabilities', 1, True),
+ Extension('VK_KHR_external_memory_fd', 1, True),
+ Extension('VK_KHR_external_semaphore', 1, 'device->rad_info.has_syncobj'),
+ Extension('VK_KHR_external_semaphore_capabilities', 1, True),
+ Extension('VK_KHR_external_semaphore_fd', 1, 'device->rad_info.has_syncobj'),
+ Extension('VK_KHR_get_memory_requirements2', 1, True),
+ Extension('VK_KHR_get_physical_device_properties2', 1, True),
+ Extension('VK_KHR_image_format_list', 1, True),
+ Extension('VK_KHR_incremental_present', 1, True),
+ Extension('VK_KHR_maintenance1', 1, True),
+ Extension('VK_KHR_maintenance2', 1, True),
+ Extension('VK_KHR_push_descriptor', 1, True),
+ Extension('VK_KHR_relaxed_block_layout', 1, True),
+ Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True),
+ Extension('VK_KHR_shader_draw_parameters', 1, True),
+ Extension('VK_KHR_storage_buffer_storage_class', 1, True),
+ Extension('VK_KHR_surface', 25, 'RADV_HAS_SURFACE'),
+ Extension('VK_KHR_swapchain', 68, 'RADV_HAS_SURFACE'),
+ Extension('VK_KHR_variable_pointers', 1, True),
+ Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'),
+ Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'),
+ Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
+ Extension('VK_KHX_multiview', 1, False),
+ Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'),
+ Extension('VK_AMD_draw_indirect_count', 1, True),
+ Extension('VK_AMD_rasterization_order', 1, 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),
+]
+
+class VkVersion:
+ def __init__(self, string):
+ split = string.split('.')
+ self.major = int(split[0])
+ self.minor = int(split[1])
+ if len(split) > 2:
+ assert len(split) == 3
+ self.patch = int(split[2])
+ else:
+ self.patch = None
+
+ # Sanity check. The range bits are required by the definition of the
+ # VK_MAKE_VERSION macro
+ assert self.major < 1024 and self.minor < 1024
+ assert self.patch is None or self.patch < 4096
+ assert(str(self) == string)
+
+ def __str__(self):
+ ver_list = [str(self.major), str(self.minor)]
+ if self.patch is not None:
+ ver_list.append(str(self.patch))
+ return '.'.join(ver_list)
+
+ def c_vk_version(self):
+ ver_list = [str(self.major), str(self.minor), str(self.patch)]
+ return 'VK_MAKE_VERSION(' + ', '.join(ver_list) + ')'
+
+ def __int_ver(self):
+ # This is just an expansion of VK_VERSION
+ patch = self.patch if self.patch is not None else 0
+ return (self.major << 22) | (self.minor << 12) | patch
+
+ def __cmp__(self, other):
+ # If only one of them has a patch version, "ignore" it by making
+ # other's patch version match self.
+ if (self.patch is None) != (other.patch is None):
+ other = copy.copy(other)
+ other.patch = self.patch
+
+ return self.__int_ver().__cmp__(other.__int_ver())
+
+MAX_API_VERSION = VkVersion(MAX_API_VERSION)
+
+def _init_exts_from_xml(xml):
+ """ Walk the Vulkan XML and fill out extra extension information. """
+
+ xml = et.parse(xml)
+
+ ext_name_map = {}
+ for ext in EXTENSIONS:
+ ext_name_map[ext.name] = ext
+
+ for ext_elem in xml.findall('.extensions/extension'):
+ ext_name = ext_elem.attrib['name']
+ if ext_name not in ext_name_map:
+ continue
+
+ # Workaround for VK_ANDROID_native_buffer. Its <extension> element in
+ # vk.xml lists it as supported="disabled" and provides only a stub
+ # definition. Its <extension> element in Mesa's custom
+ # vk_android_native_buffer.xml, though, lists it as
+ # supported='android-vendor' and fully defines the extension. We want
+ # to skip the <extension> element in vk.xml.
+ if ext_elem.attrib['supported'] == 'disabled':
+ assert ext_name == 'VK_ANDROID_native_buffer'
+ continue
+
+ ext = ext_name_map[ext_name]
+ ext.type = ext_elem.attrib['type']
+
+_TEMPLATE = Template(COPYRIGHT + """
+#include "radv_private.h"
+
+#include "vk_util.h"
+
+/* Convert the VK_USE_PLATFORM_* defines to booleans */
+%for platform in ['ANDROID', 'WAYLAND', 'XCB', 'XLIB']:
+#ifdef VK_USE_PLATFORM_${platform}_KHR
+# undef VK_USE_PLATFORM_${platform}_KHR
+# define VK_USE_PLATFORM_${platform}_KHR true
+#else
+# define VK_USE_PLATFORM_${platform}_KHR false
+#endif
+%endfor
+
+/* And ANDROID too */
+#ifdef ANDROID
+# undef ANDROID
+# define ANDROID true
+#else
+# define ANDROID false
+#endif
+
+#define RADV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || \\
+ VK_USE_PLATFORM_XCB_KHR || \\
+ VK_USE_PLATFORM_XLIB_KHR)
+
+bool
+radv_instance_extension_supported(const char *name)
+{
+%for ext in instance_extensions:
+ if (strcmp(name, "${ext.name}") == 0)
+ return ${ext.enable};
+%endfor
+ return false;
+}
+
+VkResult radv_EnumerateInstanceExtensionProperties(
+ const char* pLayerName,
+ uint32_t* pPropertyCount,
+ VkExtensionProperties* pProperties)
+{
+ VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+
+%for ext in instance_extensions:
+ if (${ext.enable}) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "${ext.name}",
+ .specVersion = ${ext.ext_version},
+ };
+ }
+ }
+%endfor
+
+ return vk_outarray_status(&out);
+}
+
+uint32_t
+radv_physical_device_api_version(struct radv_physical_device *dev)
+{
+ return ${MAX_API_VERSION.c_vk_version()};
+}
+
+bool
+radv_physical_device_extension_supported(struct radv_physical_device *device,
+ const char *name)
+{
+%for ext in device_extensions:
+ if (strcmp(name, "${ext.name}") == 0)
+ return ${ext.enable};
+%endfor
+ return false;
+}
+
+VkResult radv_EnumerateDeviceExtensionProperties(
+ VkPhysicalDevice physicalDevice,
+ const char* pLayerName,
+ uint32_t* pPropertyCount,
+ VkExtensionProperties* pProperties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+ (void)device;
+
+%for ext in device_extensions:
+ if (${ext.enable}) {
+ vk_outarray_append(&out, prop) {
+ *prop = (VkExtensionProperties) {
+ .extensionName = "${ext.name}",
+ .specVersion = ${ext.ext_version},
+ };
+ }
+ }
+%endfor
+
+ return vk_outarray_status(&out);
+}
+""")
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--out', help='Output C file.', required=True)
+ parser.add_argument('--xml',
+ help='Vulkan API XML file.',
+ required=True,
+ action='append',
+ dest='xml_files')
+ args = parser.parse_args()
+
+ for filename in args.xml_files:
+ _init_exts_from_xml(filename)
+
+ for ext in EXTENSIONS:
+ assert ext.type == 'instance' or ext.type == 'device'
+
+ template_env = {
+ 'MAX_API_VERSION': MAX_API_VERSION,
+ 'instance_extensions': [e for e in EXTENSIONS if e.type == 'instance'],
+ 'device_extensions': [e for e in EXTENSIONS if e.type == 'device'],
+ }
+
+ with open(args.out, 'w') as f:
+ f.write(_TEMPLATE.render(**template_env))
diff --git a/lib/mesa/src/amd/vulkan/radv_pass.c b/lib/mesa/src/amd/vulkan/radv_pass.c
index 17eff3937..a52dae39d 100644
--- a/lib/mesa/src/amd/vulkan/radv_pass.c
+++ b/lib/mesa/src/amd/vulkan/radv_pass.c
@@ -26,6 +26,8 @@
*/
#include "radv_private.h"
+#include "vk_util.h"
+
VkResult radv_CreateRenderPass(
VkDevice _device,
const VkRenderPassCreateInfo* pCreateInfo,
@@ -36,6 +38,7 @@ VkResult radv_CreateRenderPass(
struct radv_render_pass *pass;
size_t size;
size_t attachments_offset;
+ VkRenderPassMultiviewCreateInfoKHX *multiview_info = NULL;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
@@ -54,6 +57,16 @@ VkResult radv_CreateRenderPass(
pass->subpass_count = pCreateInfo->subpassCount;
pass->attachments = (void *) pass + attachments_offset;
+ vk_foreach_struct(ext, pCreateInfo->pNext) {
+ switch(ext->sType) {
+ case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHX:
+ multiview_info = ( VkRenderPassMultiviewCreateInfoKHX*)ext;
+ break;
+ default:
+ break;
+ }
+ }
+
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
struct radv_render_pass_attachment *att = &pass->attachments[i];
@@ -97,6 +110,8 @@ VkResult radv_CreateRenderPass(
subpass->input_count = desc->inputAttachmentCount;
subpass->color_count = desc->colorAttachmentCount;
+ if (multiview_info)
+ subpass->view_mask = multiview_info->pViewMasks[i];
if (desc->inputAttachmentCount > 0) {
subpass->input_attachments = p;
@@ -105,6 +120,8 @@ VkResult radv_CreateRenderPass(
for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
subpass->input_attachments[j]
= desc->pInputAttachments[j];
+ if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED)
+ pass->attachments[desc->pInputAttachments[j].attachment].view_mask |= subpass->view_mask;
}
}
@@ -115,6 +132,8 @@ VkResult radv_CreateRenderPass(
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
subpass->color_attachments[j]
= desc->pColorAttachments[j];
+ if (desc->pColorAttachments[j].attachment != VK_ATTACHMENT_UNUSED)
+ pass->attachments[desc->pColorAttachments[j].attachment].view_mask |= subpass->view_mask;
}
}
@@ -127,14 +146,18 @@ VkResult radv_CreateRenderPass(
uint32_t a = desc->pResolveAttachments[j].attachment;
subpass->resolve_attachments[j]
= desc->pResolveAttachments[j];
- if (a != VK_ATTACHMENT_UNUSED)
+ if (a != VK_ATTACHMENT_UNUSED) {
subpass->has_resolve = true;
+ pass->attachments[desc->pResolveAttachments[j].attachment].view_mask |= subpass->view_mask;
+ }
}
}
if (desc->pDepthStencilAttachment) {
subpass->depth_stencil_attachment =
*desc->pDepthStencilAttachment;
+ if (desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED)
+ pass->attachments[desc->pDepthStencilAttachment->attachment].view_mask |= subpass->view_mask;
} else {
subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
}
diff --git a/lib/mesa/src/amd/vulkan/radv_shader.c b/lib/mesa/src/amd/vulkan/radv_shader.c
new file mode 100644
index 000000000..83e2e675e
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_shader.c
@@ -0,0 +1,671 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/mesa-sha1.h"
+#include "util/u_atomic.h"
+#include "radv_debug.h"
+#include "radv_private.h"
+#include "radv_shader.h"
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "spirv/nir_spirv.h"
+
+#include <llvm-c/Core.h>
+#include <llvm-c/TargetMachine.h>
+
+#include "sid.h"
+#include "gfx9d.h"
+#include "ac_binary.h"
+#include "ac_llvm_util.h"
+#include "ac_nir_to_llvm.h"
+#include "vk_format.h"
+#include "util/debug.h"
+#include "ac_exp_param.h"
+
+static const struct nir_shader_compiler_options nir_options = {
+ .vertex_id_zero_based = true,
+ .lower_scmp = true,
+ .lower_flrp32 = true,
+ .lower_fsat = true,
+ .lower_fdiv = true,
+ .lower_sub = true,
+ .lower_pack_snorm_2x16 = true,
+ .lower_pack_snorm_4x8 = true,
+ .lower_pack_unorm_2x16 = true,
+ .lower_pack_unorm_4x8 = true,
+ .lower_unpack_snorm_2x16 = true,
+ .lower_unpack_snorm_4x8 = true,
+ .lower_unpack_unorm_2x16 = true,
+ .lower_unpack_unorm_4x8 = true,
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+ .lower_ffma = true,
+ .max_unroll_iterations = 32
+};
+
+VkResult radv_CreateShaderModule(
+ VkDevice _device,
+ const VkShaderModuleCreateInfo* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkShaderModule* pShaderModule)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_shader_module *module;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
+ assert(pCreateInfo->flags == 0);
+
+ module = vk_alloc2(&device->alloc, pAllocator,
+ sizeof(*module) + pCreateInfo->codeSize, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (module == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ module->nir = NULL;
+ module->size = pCreateInfo->codeSize;
+ memcpy(module->data, pCreateInfo->pCode, module->size);
+
+ _mesa_sha1_compute(module->data, module->size, module->sha1);
+
+ *pShaderModule = radv_shader_module_to_handle(module);
+
+ return VK_SUCCESS;
+}
+
+void radv_DestroyShaderModule(
+ VkDevice _device,
+ VkShaderModule _module,
+ const VkAllocationCallbacks* pAllocator)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_shader_module, module, _module);
+
+ if (!module)
+ return;
+
+ vk_free2(&device->alloc, pAllocator, module);
+}
+
+bool
+radv_lower_indirect_derefs(struct nir_shader *nir,
+ struct radv_physical_device *device)
+{
+ /* While it would be nice not to have this flag, we are constrained
+ * by the reality that LLVM 5.0 doesn't have working VGPR indexing
+ * on GFX9.
+ */
+ bool llvm_has_working_vgpr_indexing =
+ device->rad_info.chip_class <= VI;
+
+ /* TODO: Indirect indexing of GS inputs is unimplemented.
+ *
+ * TCS and TES load inputs directly from LDS or offchip memory, so
+ * indirect indexing is trivial.
+ */
+ nir_variable_mode indirect_mask = 0;
+ if (nir->info.stage == MESA_SHADER_GEOMETRY ||
+ (nir->info.stage != MESA_SHADER_TESS_CTRL &&
+ nir->info.stage != MESA_SHADER_TESS_EVAL &&
+ !llvm_has_working_vgpr_indexing)) {
+ indirect_mask |= nir_var_shader_in;
+ }
+ if (!llvm_has_working_vgpr_indexing &&
+ nir->info.stage != MESA_SHADER_TESS_CTRL)
+ indirect_mask |= nir_var_shader_out;
+
+ /* TODO: We shouldn't need to do this, however LLVM isn't currently
+ * smart enough to handle indirects without causing excess spilling
+ * causing the gpu to hang.
+ *
+ * See the following thread for more details of the problem:
+ * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
+ */
+ indirect_mask |= nir_var_local;
+
+ return nir_lower_indirect_derefs(nir, indirect_mask);
+}
+
+void
+radv_optimize_nir(struct nir_shader *shader)
+{
+ bool progress;
+
+ do {
+ progress = false;
+
+ NIR_PASS_V(shader, nir_lower_vars_to_ssa);
+ NIR_PASS_V(shader, nir_lower_64bit_pack);
+ NIR_PASS_V(shader, nir_lower_alu_to_scalar);
+ NIR_PASS_V(shader, nir_lower_phis_to_scalar);
+
+ NIR_PASS(progress, shader, nir_copy_prop);
+ NIR_PASS(progress, shader, nir_opt_remove_phis);
+ NIR_PASS(progress, shader, nir_opt_dce);
+ if (nir_opt_trivial_continues(shader)) {
+ progress = true;
+ NIR_PASS(progress, shader, nir_copy_prop);
+ NIR_PASS(progress, shader, nir_opt_remove_phis);
+ NIR_PASS(progress, shader, nir_opt_dce);
+ }
+ NIR_PASS(progress, shader, nir_opt_if);
+ NIR_PASS(progress, shader, nir_opt_dead_cf);
+ NIR_PASS(progress, shader, nir_opt_cse);
+ NIR_PASS(progress, shader, nir_opt_peephole_select, 8);
+ NIR_PASS(progress, shader, nir_opt_algebraic);
+ NIR_PASS(progress, shader, nir_opt_constant_folding);
+ NIR_PASS(progress, shader, nir_opt_undef);
+ NIR_PASS(progress, shader, nir_opt_conditional_discard);
+ if (shader->options->max_unroll_iterations) {
+ NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
+ }
+ } while (progress);
+}
+
+nir_shader *
+radv_shader_compile_to_nir(struct radv_device *device,
+ struct radv_shader_module *module,
+ const char *entrypoint_name,
+ gl_shader_stage stage,
+ const VkSpecializationInfo *spec_info)
+{
+ if (strcmp(entrypoint_name, "main") != 0) {
+ radv_finishme("Multiple shaders per module not really supported");
+ }
+
+ nir_shader *nir;
+ nir_function *entry_point;
+ if (module->nir) {
+ /* Some things such as our meta clear/blit code will give us a NIR
+ * shader directly. In that case, we just ignore the SPIR-V entirely
+ * and just use the NIR shader */
+ nir = module->nir;
+ nir->options = &nir_options;
+ nir_validate_shader(nir);
+
+ assert(exec_list_length(&nir->functions) == 1);
+ struct exec_node *node = exec_list_get_head(&nir->functions);
+ entry_point = exec_node_data(nir_function, node, node);
+ } else {
+ uint32_t *spirv = (uint32_t *) module->data;
+ assert(module->size % 4 == 0);
+
+ if (device->instance->debug_flags & RADV_DEBUG_DUMP_SPIRV)
+ radv_print_spirv(spirv, module->size, stderr);
+
+ uint32_t num_spec_entries = 0;
+ struct nir_spirv_specialization *spec_entries = NULL;
+ if (spec_info && spec_info->mapEntryCount > 0) {
+ num_spec_entries = spec_info->mapEntryCount;
+ spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
+ for (uint32_t i = 0; i < num_spec_entries; i++) {
+ VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
+ const void *data = spec_info->pData + entry.offset;
+ assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
+
+ spec_entries[i].id = spec_info->pMapEntries[i].constantID;
+ if (spec_info->dataSize == 8)
+ spec_entries[i].data64 = *(const uint64_t *)data;
+ else
+ spec_entries[i].data32 = *(const uint32_t *)data;
+ }
+ }
+ const struct nir_spirv_supported_extensions supported_ext = {
+ .draw_parameters = true,
+ .float64 = true,
+ .image_read_without_format = true,
+ .image_write_without_format = true,
+ .tessellation = true,
+ .int64 = true,
+ .multiview = true,
+ .variable_pointers = true,
+ };
+ entry_point = spirv_to_nir(spirv, module->size / 4,
+ spec_entries, num_spec_entries,
+ stage, entrypoint_name, &supported_ext, &nir_options);
+ nir = entry_point->shader;
+ assert(nir->info.stage == stage);
+ nir_validate_shader(nir);
+
+ free(spec_entries);
+
+ /* We have to lower away local constant initializers right before we
+ * inline functions. That way they get properly initialized at the top
+ * of the function and not at the top of its caller.
+ */
+ NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
+ NIR_PASS_V(nir, nir_lower_returns);
+ NIR_PASS_V(nir, nir_inline_functions);
+
+ /* Pick off the single entrypoint that we want */
+ foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
+ if (func != entry_point)
+ exec_node_remove(&func->node);
+ }
+ assert(exec_list_length(&nir->functions) == 1);
+ entry_point->name = ralloc_strdup(entry_point, "main");
+
+ NIR_PASS_V(nir, nir_remove_dead_variables,
+ nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
+
+ /* Now that we've deleted all but the main function, we can go ahead and
+ * lower the rest of the constant initializers.
+ */
+ NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
+ NIR_PASS_V(nir, nir_lower_system_values);
+ NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
+ }
+
+ /* Vulkan uses the separate-shader linking model */
+ nir->info.separate_shader = true;
+
+ nir_shader_gather_info(nir, entry_point->impl);
+
+ static const nir_lower_tex_options tex_options = {
+ .lower_txp = ~0,
+ };
+
+ nir_lower_tex(nir, &tex_options);
+
+ nir_lower_vars_to_ssa(nir);
+ nir_lower_var_copies(nir);
+ nir_lower_global_vars_to_local(nir);
+ nir_remove_dead_variables(nir, nir_var_local);
+ radv_lower_indirect_derefs(nir, device->physical_device);
+ radv_optimize_nir(nir);
+
+ return nir;
+}
+
+void *
+radv_alloc_shader_memory(struct radv_device *device,
+ struct radv_shader_variant *shader)
+{
+ mtx_lock(&device->shader_slab_mutex);
+ list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
+ uint64_t offset = 0;
+ list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list) {
+ if (s->bo_offset - offset >= shader->code_size) {
+ shader->bo = slab->bo;
+ shader->bo_offset = offset;
+ list_addtail(&shader->slab_list, &s->slab_list);
+ mtx_unlock(&device->shader_slab_mutex);
+ return slab->ptr + offset;
+ }
+ offset = align_u64(s->bo_offset + s->code_size, 256);
+ }
+ if (slab->size - offset >= shader->code_size) {
+ shader->bo = slab->bo;
+ shader->bo_offset = offset;
+ list_addtail(&shader->slab_list, &slab->shaders);
+ mtx_unlock(&device->shader_slab_mutex);
+ return slab->ptr + offset;
+ }
+ }
+
+ mtx_unlock(&device->shader_slab_mutex);
+ struct radv_shader_slab *slab = calloc(1, sizeof(struct radv_shader_slab));
+
+ slab->size = 256 * 1024;
+ slab->bo = device->ws->buffer_create(device->ws, slab->size, 256,
+ RADEON_DOMAIN_VRAM, 0);
+ slab->ptr = (char*)device->ws->buffer_map(slab->bo);
+ list_inithead(&slab->shaders);
+
+ mtx_lock(&device->shader_slab_mutex);
+ list_add(&slab->slabs, &device->shader_slabs);
+
+ shader->bo = slab->bo;
+ shader->bo_offset = 0;
+ list_add(&shader->slab_list, &slab->shaders);
+ mtx_unlock(&device->shader_slab_mutex);
+ return slab->ptr;
+}
+
+void
+radv_destroy_shader_slabs(struct radv_device *device)
+{
+ list_for_each_entry_safe(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
+ device->ws->buffer_destroy(slab->bo);
+ free(slab);
+ }
+ mtx_destroy(&device->shader_slab_mutex);
+}
+
+static void
+radv_fill_shader_variant(struct radv_device *device,
+ struct radv_shader_variant *variant,
+ struct ac_shader_binary *binary,
+ gl_shader_stage stage)
+{
+ bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0;
+ unsigned vgpr_comp_cnt = 0;
+
+ if (scratch_enabled && !device->llvm_supports_spill)
+ radv_finishme("shader scratch support only available with LLVM 4.0");
+
+ variant->code_size = binary->code_size;
+ variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
+ S_00B12C_SCRATCH_EN(scratch_enabled);
+
+ variant->rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) |
+ S_00B848_SGPRS((variant->config.num_sgprs - 1) / 8) |
+ S_00B848_DX10_CLAMP(1) |
+ S_00B848_FLOAT_MODE(variant->config.float_mode);
+
+ switch (stage) {
+ case MESA_SHADER_TESS_EVAL:
+ vgpr_comp_cnt = 3;
+ variant->rsrc2 |= S_00B12C_OC_LDS_EN(1);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt;
+ else
+ variant->rsrc2 |= S_00B12C_OC_LDS_EN(1);
+ break;
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_GEOMETRY:
+ vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ break;
+ case MESA_SHADER_COMPUTE:
+ variant->rsrc2 |=
+ S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) |
+ S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) |
+ S_00B84C_TG_SIZE_EN(1) |
+ S_00B84C_LDS_SIZE(variant->config.lds_size);
+ break;
+ default:
+ unreachable("unsupported shader type");
+ break;
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 &&
+ stage == MESA_SHADER_GEOMETRY) {
+ /* TODO: Figure out how many we actually need. */
+ variant->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(3);
+ variant->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(3) |
+ S_00B22C_OC_LDS_EN(1);
+ } else if (device->physical_device->rad_info.chip_class >= GFX9 &&
+ stage == MESA_SHADER_TESS_CTRL)
+ variant->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt);
+ else
+ variant->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
+
+ void *ptr = radv_alloc_shader_memory(device, variant);
+ memcpy(ptr, binary->code, binary->code_size);
+}
+
+static struct radv_shader_variant *
+shader_variant_create(struct radv_device *device,
+ struct radv_shader_module *module,
+ struct nir_shader * const *shaders,
+ int shader_count,
+ gl_shader_stage stage,
+ struct ac_nir_compiler_options *options,
+ bool gs_copy_shader,
+ void **code_out,
+ unsigned *code_size_out)
+{
+ enum radeon_family chip_family = device->physical_device->rad_info.family;
+ bool dump_shaders = device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS;
+ enum ac_target_machine_options tm_options = 0;
+ struct radv_shader_variant *variant;
+ struct ac_shader_binary binary;
+ LLVMTargetMachineRef tm;
+
+ variant = calloc(1, sizeof(struct radv_shader_variant));
+ if (!variant)
+ return NULL;
+
+ options->family = chip_family;
+ options->chip_class = device->physical_device->rad_info.chip_class;
+
+ if (options->supports_spill)
+ tm_options |= AC_TM_SUPPORTS_SPILL;
+ if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
+ tm_options |= AC_TM_SISCHED;
+ tm = ac_create_target_machine(chip_family, tm_options);
+
+ if (gs_copy_shader) {
+ assert(shader_count == 1);
+ ac_create_gs_copy_shader(tm, *shaders, &binary, &variant->config,
+ &variant->info, options, dump_shaders);
+ } else {
+ ac_compile_nir_shader(tm, &binary, &variant->config,
+ &variant->info, shaders, shader_count, options,
+ dump_shaders);
+ }
+
+ LLVMDisposeTargetMachine(tm);
+
+ radv_fill_shader_variant(device, variant, &binary, stage);
+
+ if (code_out) {
+ *code_out = binary.code;
+ *code_size_out = binary.code_size;
+ } else
+ free(binary.code);
+ free(binary.config);
+ free(binary.rodata);
+ free(binary.global_symbol_offsets);
+ free(binary.relocs);
+ variant->ref_count = 1;
+
+ if (device->trace_bo) {
+ variant->disasm_string = binary.disasm_string;
+ if (!gs_copy_shader && !module->nir) {
+ variant->nir = *shaders;
+ variant->spirv = (uint32_t *)module->data;
+ variant->spirv_size = module->size;
+ }
+ } else {
+ free(binary.disasm_string);
+ }
+
+ return variant;
+}
+
+struct radv_shader_variant *
+radv_shader_variant_create(struct radv_device *device,
+ struct radv_shader_module *module,
+ struct nir_shader *const *shaders,
+ int shader_count,
+ struct radv_pipeline_layout *layout,
+ const struct ac_shader_variant_key *key,
+ void **code_out,
+ unsigned *code_size_out)
+{
+ struct ac_nir_compiler_options options = {0};
+
+ options.layout = layout;
+ if (key)
+ options.key = *key;
+
+ options.unsafe_math = !!(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH);
+ options.supports_spill = device->llvm_supports_spill;
+
+ return shader_variant_create(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage,
+ &options, false, code_out, code_size_out);
+}
+
+struct radv_shader_variant *
+radv_create_gs_copy_shader(struct radv_device *device,
+ struct nir_shader *shader,
+ void **code_out,
+ unsigned *code_size_out,
+ bool multiview)
+{
+ struct ac_nir_compiler_options options = {0};
+
+ options.key.has_multiview_view_index = multiview;
+
+ return shader_variant_create(device, NULL, &shader, 1, MESA_SHADER_VERTEX,
+ &options, true, code_out, code_size_out);
+}
+
+void
+radv_shader_variant_destroy(struct radv_device *device,
+ struct radv_shader_variant *variant)
+{
+ if (!p_atomic_dec_zero(&variant->ref_count))
+ return;
+
+ mtx_lock(&device->shader_slab_mutex);
+ list_del(&variant->slab_list);
+ mtx_unlock(&device->shader_slab_mutex);
+
+ ralloc_free(variant->nir);
+ free(variant->disasm_string);
+ free(variant);
+}
+
+uint32_t
+radv_shader_stage_to_user_data_0(gl_shader_stage stage, enum chip_class chip_class,
+ bool has_gs, bool has_tess)
+{
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
+ return R_00B030_SPI_SHADER_USER_DATA_PS_0;
+ case MESA_SHADER_VERTEX:
+ if (chip_class >= GFX9) {
+ return has_tess ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
+ has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
+ R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ }
+ if (has_tess)
+ return R_00B530_SPI_SHADER_USER_DATA_LS_0;
+ else
+ return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ case MESA_SHADER_GEOMETRY:
+ return chip_class >= GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
+ R_00B230_SPI_SHADER_USER_DATA_GS_0;
+ case MESA_SHADER_COMPUTE:
+ return R_00B900_COMPUTE_USER_DATA_0;
+ case MESA_SHADER_TESS_CTRL:
+ return chip_class >= GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
+ R_00B430_SPI_SHADER_USER_DATA_HS_0;
+ case MESA_SHADER_TESS_EVAL:
+ if (chip_class >= GFX9) {
+ return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
+ R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ }
+ if (has_gs)
+ return R_00B330_SPI_SHADER_USER_DATA_ES_0;
+ else
+ return R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ default:
+ unreachable("unknown shader");
+ }
+}
+
+const char *
+radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage)
+{
+ switch (stage) {
+ case MESA_SHADER_VERTEX: return var->info.vs.as_ls ? "Vertex Shader as LS" : var->info.vs.as_es ? "Vertex Shader as ES" : "Vertex Shader as VS";
+ case MESA_SHADER_GEOMETRY: return "Geometry Shader";
+ case MESA_SHADER_FRAGMENT: return "Pixel Shader";
+ case MESA_SHADER_COMPUTE: return "Compute Shader";
+ case MESA_SHADER_TESS_CTRL: return "Tessellation Control Shader";
+ case MESA_SHADER_TESS_EVAL: return var->info.tes.as_es ? "Tessellation Evaluation Shader as ES" : "Tessellation Evaluation Shader as VS";
+ default:
+ return "Unknown shader";
+ };
+}
+
+void
+radv_shader_dump_stats(struct radv_device *device,
+ struct radv_shader_variant *variant,
+ gl_shader_stage stage,
+ FILE *file)
+{
+ unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
+ struct ac_shader_config *conf;
+ unsigned max_simd_waves;
+ unsigned lds_per_wave = 0;
+
+ switch (device->physical_device->rad_info.family) {
+ /* These always have 8 waves: */
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ max_simd_waves = 8;
+ break;
+ default:
+ max_simd_waves = 10;
+ }
+
+ conf = &variant->config;
+
+ if (stage == MESA_SHADER_FRAGMENT) {
+ lds_per_wave = conf->lds_size * lds_increment +
+ align(variant->info.fs.num_interp * 48,
+ lds_increment);
+ }
+
+ if (conf->num_sgprs) {
+ if (device->physical_device->rad_info.chip_class >= VI)
+ max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
+ else
+ max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
+ }
+
+ if (conf->num_vgprs)
+ max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
+
+ /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
+ * that PS can use.
+ */
+ if (lds_per_wave)
+ max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
+
+ fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage));
+
+ if (stage == MESA_SHADER_FRAGMENT) {
+ fprintf(file, "*** SHADER CONFIG ***\n"
+ "SPI_PS_INPUT_ADDR = 0x%04x\n"
+ "SPI_PS_INPUT_ENA = 0x%04x\n",
+ conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+ }
+
+ fprintf(file, "*** SHADER STATS ***\n"
+ "SGPRS: %d\n"
+ "VGPRS: %d\n"
+ "Spilled SGPRs: %d\n"
+ "Spilled VGPRs: %d\n"
+ "Code Size: %d bytes\n"
+ "LDS: %d blocks\n"
+ "Scratch: %d bytes per wave\n"
+ "Max Waves: %d\n"
+ "********************\n\n\n",
+ conf->num_sgprs, conf->num_vgprs,
+ conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size,
+ conf->lds_size, conf->scratch_bytes_per_wave,
+ max_simd_waves);
+}
diff --git a/lib/mesa/src/amd/vulkan/radv_shader.h b/lib/mesa/src/amd/vulkan/radv_shader.h
new file mode 100644
index 000000000..6e4e9966c
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_shader.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef RADV_SHADER_H
+#define RADV_SHADER_H
+
+#include "radv_private.h"
+
+#include "nir/nir.h"
+
+struct radv_shader_module {
+ struct nir_shader *nir;
+ unsigned char sha1[20];
+ uint32_t size;
+ char data[0];
+};
+
+struct radv_shader_variant {
+ uint32_t ref_count;
+
+ struct radeon_winsys_bo *bo;
+ uint64_t bo_offset;
+ struct ac_shader_config config;
+ uint32_t code_size;
+ struct ac_shader_variant_info info;
+ unsigned rsrc1;
+ unsigned rsrc2;
+
+ /* debug only */
+ uint32_t *spirv;
+ uint32_t spirv_size;
+ struct nir_shader *nir;
+ char *disasm_string;
+
+ struct list_head slab_list;
+};
+
+struct radv_shader_slab {
+ struct list_head slabs;
+ struct list_head shaders;
+ struct radeon_winsys_bo *bo;
+ uint64_t size;
+ char *ptr;
+};
+
+void
+radv_optimize_nir(struct nir_shader *shader);
+
+nir_shader *
+radv_shader_compile_to_nir(struct radv_device *device,
+ struct radv_shader_module *module,
+ const char *entrypoint_name,
+ gl_shader_stage stage,
+ const VkSpecializationInfo *spec_info);
+
+void *
+radv_alloc_shader_memory(struct radv_device *device,
+ struct radv_shader_variant *shader);
+
+void
+radv_destroy_shader_slabs(struct radv_device *device);
+
+struct radv_shader_variant *
+radv_shader_variant_create(struct radv_device *device,
+ struct radv_shader_module *module,
+ struct nir_shader *const *shaders,
+ int shader_count,
+ struct radv_pipeline_layout *layout,
+ const struct ac_shader_variant_key *key,
+ void **code_out,
+ unsigned *code_size_out);
+
+struct radv_shader_variant *
+radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir,
+ void **code_out, unsigned *code_size_out,
+ bool multiview);
+
+void
+radv_shader_variant_destroy(struct radv_device *device,
+ struct radv_shader_variant *variant);
+
+bool
+radv_lower_indirect_derefs(struct nir_shader *nir,
+ struct radv_physical_device *device);
+
+uint32_t
+radv_shader_stage_to_user_data_0(gl_shader_stage stage, enum chip_class chip_class,
+ bool has_gs, bool has_tess);
+
+const char *
+radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage);
+
+void
+radv_shader_dump_stats(struct radv_device *device,
+ struct radv_shader_variant *variant,
+ gl_shader_stage stage,
+ FILE *file);
+
+#endif