diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2018-10-23 05:53:16 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2018-10-23 05:53:16 +0000 |
commit | aea331c3cb69ef2756ffceb05da8ace5e6287314 (patch) | |
tree | 2ec86f417bae9af0860d54ed89c822e701563645 /lib/mesa/src/amd | |
parent | f6666e4c3977a5d74f3da7464672ea48e44dff4b (diff) |
Import Mesa 17.3.9
Diffstat (limited to 'lib/mesa/src/amd')
-rw-r--r-- | lib/mesa/src/amd/common/ac_shader_abi.h | 101 | ||||
-rw-r--r-- | lib/mesa/src/amd/vulkan/dev_icd.json.in | 2 | ||||
-rw-r--r-- | lib/mesa/src/amd/vulkan/radv_debug.c | 736 | ||||
-rw-r--r-- | lib/mesa/src/amd/vulkan/radv_extensions.c | 407 | ||||
-rw-r--r-- | lib/mesa/src/amd/vulkan/radv_extensions.py | 278 | ||||
-rw-r--r-- | lib/mesa/src/amd/vulkan/radv_pass.c | 25 | ||||
-rw-r--r-- | lib/mesa/src/amd/vulkan/radv_shader.c | 671 | ||||
-rw-r--r-- | lib/mesa/src/amd/vulkan/radv_shader.h | 123 |
8 files changed, 2341 insertions, 2 deletions
diff --git a/lib/mesa/src/amd/common/ac_shader_abi.h b/lib/mesa/src/amd/common/ac_shader_abi.h new file mode 100644 index 000000000..b04dc076d --- /dev/null +++ b/lib/mesa/src/amd/common/ac_shader_abi.h @@ -0,0 +1,101 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef AC_SHADER_ABI_H +#define AC_SHADER_ABI_H + +#include <llvm-c/Core.h> + +enum ac_descriptor_type { + AC_DESC_IMAGE, + AC_DESC_FMASK, + AC_DESC_SAMPLER, + AC_DESC_BUFFER, +}; + +/* Document the shader ABI during compilation. This is what allows radeonsi and + * radv to share a compiler backend. + */ +struct ac_shader_abi { + LLVMValueRef base_vertex; + LLVMValueRef start_instance; + LLVMValueRef draw_id; + LLVMValueRef vertex_id; + LLVMValueRef instance_id; + LLVMValueRef frag_pos[4]; + LLVMValueRef front_face; + LLVMValueRef ancillary; + LLVMValueRef sample_coverage; + + /* For VS and PS: pre-loaded shader inputs. + * + * Currently only used for NIR shaders; indexed by variables' + * driver_location. + */ + LLVMValueRef *inputs; + + void (*emit_outputs)(struct ac_shader_abi *abi, + unsigned max_outputs, + LLVMValueRef *addrs); + + LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index); + + /** + * Load the descriptor for the given buffer. + * + * \param buffer the buffer as presented in NIR: this is the descriptor + * in Vulkan, and the buffer index in OpenGL/Gallium + * \param write whether buffer contents will be written + */ + LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi, + LLVMValueRef buffer, bool write); + + /** + * Load a descriptor associated to a sampler. + * + * \param descriptor_set the descriptor set index (only for Vulkan) + * \param base_index the base index of the sampler variable + * \param constant_index constant part of an array index (or 0, if the + * sampler variable is not an array) + * \param index non-constant part of an array index (may be NULL) + * \param desc_type the type of descriptor to load + * \param image whether the descriptor is loaded for an image operation + */ + LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi, + unsigned descriptor_set, + unsigned base_index, + unsigned constant_index, + LLVMValueRef index, + enum ac_descriptor_type desc_type, + bool image, bool write); + + /* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently + * uses it due to promoting D16 to D32, but radv needs it off. */ + bool clamp_shadow_reference; + + /* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0 + * and LLVM optimizes an indexed load with constant index to IDXEN=0. */ + bool gfx9_stride_size_workaround; +}; + +#endif /* AC_SHADER_ABI_H */ diff --git a/lib/mesa/src/amd/vulkan/dev_icd.json.in b/lib/mesa/src/amd/vulkan/dev_icd.json.in index f726df02a..cc80641f5 100644 --- a/lib/mesa/src/amd/vulkan/dev_icd.json.in +++ b/lib/mesa/src/amd/vulkan/dev_icd.json.in @@ -1,7 +1,7 @@ { "file_format_version": "1.0.0", "ICD": { - "library_path": "@build_libdir@/libvulkan_radeon.so", + "library_path": "@libvulkan_radeon_path@", "api_version": "1.0.3" } } diff --git a/lib/mesa/src/amd/vulkan/radv_debug.c b/lib/mesa/src/amd/vulkan/radv_debug.c new file mode 100644 index 000000000..b69c05b64 --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_debug.c @@ -0,0 +1,736 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <sys/utsname.h> + +#include "sid.h" +#include "gfx9d.h" +#include "ac_debug.h" +#include "radv_debug.h" +#include "radv_shader.h" + +#define TRACE_BO_SIZE 4096 + +#define COLOR_RESET "\033[0m" +#define COLOR_RED "\033[31m" +#define COLOR_GREEN "\033[1;32m" +#define COLOR_YELLOW "\033[1;33m" +#define COLOR_CYAN "\033[1;36m" + +/* Trace BO layout (offsets are 4 bytes): + * + * [0]: primary trace ID + * [1]: secondary trace ID + * [2-3]: 64-bit GFX pipeline pointer + * [4-5]: 64-bit COMPUTE pipeline pointer + * [6-7]: 64-bit descriptor set #0 pointer + * ... + * [68-69]: 64-bit descriptor set #31 pointer + */ + +bool +radv_init_trace(struct radv_device *device) +{ + struct radeon_winsys *ws = device->ws; + + device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8, + RADEON_DOMAIN_VRAM, + RADEON_FLAG_CPU_ACCESS); + if (!device->trace_bo) + return false; + + device->trace_id_ptr = ws->buffer_map(device->trace_bo); + if (!device->trace_id_ptr) + return false; + + memset(device->trace_id_ptr, 0, TRACE_BO_SIZE); + + ac_vm_fault_occured(device->physical_device->rad_info.chip_class, + &device->dmesg_timestamp, NULL); + + return true; +} + +static void +radv_dump_trace(struct radv_device *device, struct radeon_winsys_cs *cs) +{ + const char *filename = getenv("RADV_TRACE_FILE"); + FILE *f = fopen(filename, "w"); + + if (!f) { + fprintf(stderr, "Failed to write trace dump to %s\n", filename); + return; + } + + fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr); + device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2); + fclose(f); +} + +static void +radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset) +{ + struct radeon_winsys *ws = device->ws; + uint32_t value; + + if (ws->read_registers(ws, offset, 1, &value)) + ac_dump_reg(f, device->physical_device->rad_info.chip_class, + offset, value, ~0); +} + +static void +radv_dump_debug_registers(struct radv_device *device, FILE *f) +{ + struct radeon_info *info = &device->physical_device->rad_info; + + if (info->drm_major == 2 && info->drm_minor < 42) + return; /* no radeon support */ + + fprintf(f, "Memory-mapped registers:\n"); + radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS); + + /* No other registers can be read on DRM < 3.1.0. */ + if (info->drm_major < 3 || info->drm_minor < 1) { + fprintf(f, "\n"); + return; + } + + radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2); + radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0); + radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1); + radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2); + radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3); + radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG); + radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG); + if (info->chip_class <= VI) { + radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS); + radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2); + radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3); + } + radv_dump_mmapped_reg(device, f, R_008680_CP_STAT); + radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1); + radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2); + radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3); + radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS); + radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT); + radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1); + radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS); + radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT); + radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1); + fprintf(f, "\n"); +} + +static const char * +radv_get_descriptor_name(enum VkDescriptorType type) +{ + switch (type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + return "SAMPLER"; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + return "COMBINED_IMAGE_SAMPLER"; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + return "SAMPLED_IMAGE"; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + return "STORAGE_IMAGE"; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + return "UNIFORM_TEXEL_BUFFER"; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + return "STORAGE_TEXEL_BUFFER"; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + return "UNIFORM_BUFFER"; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + return "STORAGE_BUFFER"; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + return "UNIFORM_BUFFER_DYNAMIC"; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + return "STORAGE_BUFFER_DYNAMIC"; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + return "INPUT_ATTACHMENT"; + default: + return "UNKNOWN"; + } +} + +static void +radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, + FILE *f) +{ + fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); + for (unsigned j = 0; j < 4; j++) + ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, + desc[j], 0xffffffff); +} + +static void +radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, + FILE *f) +{ + fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n"); + for (unsigned j = 0; j < 8; j++) + ac_dump_reg(f, chip_class, R_008F10_SQ_IMG_RSRC_WORD0 + j * 4, + desc[j], 0xffffffff); + + fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); + for (unsigned j = 0; j < 8; j++) + ac_dump_reg(f, chip_class, R_008F10_SQ_IMG_RSRC_WORD0 + j * 4, + desc[8 + j], 0xffffffff); +} + +static void +radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, + FILE *f) +{ + fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); + for (unsigned j = 0; j < 4; j++) { + ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, + desc[j], 0xffffffff); + } +} + +static void +radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, + const uint32_t *desc, FILE *f) +{ + radv_dump_image_descriptor(chip_class, desc, f); + radv_dump_sampler_descriptor(chip_class, desc + 16, f); +} + +static void +radv_dump_descriptor_set(enum chip_class chip_class, + struct radv_descriptor_set *set, unsigned id, FILE *f) +{ + const struct radv_descriptor_set_layout *layout; + int i; + + if (!set) + return; + layout = set->layout; + + fprintf(f, "** descriptor set (%d) **\n", id); + fprintf(f, "va: 0x%"PRIx64"\n", set->va); + fprintf(f, "size: %d\n", set->size); + fprintf(f, "mapped_ptr:\n"); + + for (i = 0; i < set->size / 4; i++) { + fprintf(f, "\t[0x%x] = 0x%08x\n", i, set->mapped_ptr[i]); + } + fprintf(f, "\n"); + + fprintf(f, "\t*** layout ***\n"); + fprintf(f, "\tbinding_count: %d\n", layout->binding_count); + fprintf(f, "\tsize: %d\n", layout->size); + fprintf(f, "\tshader_stages: %x\n", layout->shader_stages); + fprintf(f, "\tdynamic_shader_stages: %x\n", + layout->dynamic_shader_stages); + fprintf(f, "\tbuffer_count: %d\n", layout->buffer_count); + fprintf(f, "\tdynamic_offset_count: %d\n", + layout->dynamic_offset_count); + fprintf(f, "\n"); + + for (i = 0; i < set->layout->binding_count; i++) { + uint32_t *desc = + set->mapped_ptr + layout->binding[i].offset / 4; + + fprintf(f, "\t\t**** binding layout (%d) ****\n", i); + fprintf(f, "\t\ttype: %s\n", + radv_get_descriptor_name(layout->binding[i].type)); + fprintf(f, "\t\tarray_size: %d\n", + layout->binding[i].array_size); + fprintf(f, "\t\toffset: %d\n", + layout->binding[i].offset); + fprintf(f, "\t\tbuffer_offset: %d\n", + layout->binding[i].buffer_offset); + fprintf(f, "\t\tdynamic_offset_offset: %d\n", + layout->binding[i].dynamic_offset_offset); + fprintf(f, "\t\tdynamic_offset_count: %d\n", + layout->binding[i].dynamic_offset_count); + fprintf(f, "\t\tsize: %d\n", + layout->binding[i].size); + fprintf(f, "\t\timmutable_samplers_offset: %d\n", + layout->binding[i].immutable_samplers_offset); + fprintf(f, "\t\timmutable_samplers_equal: %d\n", + layout->binding[i].immutable_samplers_equal); + fprintf(f, "\n"); + + switch (layout->binding[i].type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + radv_dump_buffer_descriptor(chip_class, desc, f); + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + radv_dump_image_descriptor(chip_class, desc, f); + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + radv_dump_combined_image_sampler_descriptor(chip_class, desc, f); + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + radv_dump_sampler_descriptor(chip_class, desc, f); + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + /* todo */ + break; + default: + assert(!"unknown descriptor type"); + break; + } + fprintf(f, "\n"); + } + fprintf(f, "\n\n"); +} + +static void +radv_dump_descriptors(struct radv_pipeline *pipeline, FILE *f) +{ + struct radv_device *device = pipeline->device; + enum chip_class chip_class = device->physical_device->rad_info.chip_class; + uint64_t *ptr = (uint64_t *)device->trace_id_ptr; + int i; + + fprintf(f, "List of descriptors:\n"); + for (i = 0; i < MAX_SETS; i++) { + struct radv_descriptor_set *set = + (struct radv_descriptor_set *)ptr[i + 3]; + + radv_dump_descriptor_set(chip_class, set, i, f); + } +} + +struct radv_shader_inst { + char text[160]; /* one disasm line */ + unsigned offset; /* instruction offset */ + unsigned size; /* instruction size = 4 or 8 */ +}; + +/* Split a disassembly string into lines and add them to the array pointed + * to by "instructions". */ +static void si_add_split_disasm(const char *disasm, + uint64_t start_addr, + unsigned *num, + struct radv_shader_inst *instructions) +{ + struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL; + char *next; + + while ((next = strchr(disasm, '\n'))) { + struct radv_shader_inst *inst = &instructions[*num]; + unsigned len = next - disasm; + + assert(len < ARRAY_SIZE(inst->text)); + memcpy(inst->text, disasm, len); + inst->text[len] = 0; + inst->offset = last_inst ? last_inst->offset + last_inst->size : 0; + + const char *semicolon = strchr(disasm, ';'); + assert(semicolon); + /* More than 16 chars after ";" means the instruction is 8 bytes long. */ + inst->size = next - semicolon > 16 ? 8 : 4; + + snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, + " [PC=0x%"PRIx64", off=%u, size=%u]", + start_addr + inst->offset, inst->offset, inst->size); + + last_inst = inst; + (*num)++; + disasm = next + 1; + } +} + +static void +radv_dump_annotated_shader(struct radv_pipeline *pipeline, + struct radv_shader_variant *shader, + gl_shader_stage stage, + struct ac_wave_info *waves, unsigned num_waves, + FILE *f) +{ + uint64_t start_addr, end_addr; + unsigned i; + + if (!shader) + return; + + start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset; + end_addr = start_addr + shader->code_size; + + /* See if any wave executes the shader. */ + for (i = 0; i < num_waves; i++) { + if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) + break; + } + + if (i == num_waves) + return; /* the shader is not being executed */ + + /* Remember the first found wave. The waves are sorted according to PC. */ + waves = &waves[i]; + num_waves -= i; + + /* Get the list of instructions. + * Buffer size / 4 is the upper bound of the instruction count. + */ + unsigned num_inst = 0; + struct radv_shader_inst *instructions = + calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); + + si_add_split_disasm(shader->disasm_string, + start_addr, &num_inst, instructions); + + fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", + radv_get_shader_name(shader, stage)); + + /* Print instructions with annotations. */ + for (i = 0; i < num_inst; i++) { + struct radv_shader_inst *inst = &instructions[i]; + + fprintf(f, "%s\n", inst->text); + + /* Print which waves execute the instruction right now. */ + while (num_waves && start_addr + inst->offset == waves->pc) { + fprintf(f, + " " COLOR_GREEN "^ SE%u SH%u CU%u " + "SIMD%u WAVE%u EXEC=%016"PRIx64 " ", + waves->se, waves->sh, waves->cu, waves->simd, + waves->wave, waves->exec); + + if (inst->size == 4) { + fprintf(f, "INST32=%08X" COLOR_RESET "\n", + waves->inst_dw0); + } else { + fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", + waves->inst_dw0, waves->inst_dw1); + } + + waves->matched = true; + waves = &waves[1]; + num_waves--; + } + } + + fprintf(f, "\n\n"); + free(instructions); +} + +static void +radv_dump_annotated_shaders(struct radv_pipeline *pipeline, + struct radv_shader_variant *compute_shader, + FILE *f) +{ + struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; + unsigned num_waves = ac_get_wave_info(waves); + unsigned mask; + + fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET + "\n\n", num_waves); + + /* Dump annotated active graphics shaders. */ + mask = pipeline->active_stages; + while (mask) { + int stage = u_bit_scan(&mask); + + radv_dump_annotated_shader(pipeline, pipeline->shaders[stage], + stage, waves, num_waves, f); + } + + radv_dump_annotated_shader(pipeline, compute_shader, + MESA_SHADER_COMPUTE, waves, num_waves, f); + + /* Print waves executing shaders that are not currently bound. */ + unsigned i; + bool found = false; + for (i = 0; i < num_waves; i++) { + if (waves[i].matched) + continue; + + if (!found) { + fprintf(f, COLOR_CYAN + "Waves not executing currently-bound shaders:" + COLOR_RESET "\n"); + found = true; + } + fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64 + " INST=%08X %08X PC=%"PRIx64"\n", + waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, + waves[i].wave, waves[i].exec, waves[i].inst_dw0, + waves[i].inst_dw1, waves[i].pc); + } + if (found) + fprintf(f, "\n\n"); +} + +static void +radv_dump_shader(struct radv_pipeline *pipeline, + struct radv_shader_variant *shader, gl_shader_stage stage, + FILE *f) +{ + if (!shader) + return; + + fprintf(f, "%s:\n\n", radv_get_shader_name(shader, stage)); + + if (shader->spirv) { + fprintf(f, "SPIRV:\n"); + radv_print_spirv(shader->spirv, shader->spirv_size, f); + } + + if (shader->nir) { + fprintf(f, "NIR:\n"); + nir_print_shader(shader->nir, f); + } + + fprintf(stderr, "DISASM:\n%s\n", shader->disasm_string); + + radv_shader_dump_stats(pipeline->device, shader, stage, f); +} + +static void +radv_dump_shaders(struct radv_pipeline *pipeline, + struct radv_shader_variant *compute_shader, FILE *f) +{ + unsigned mask; + + /* Dump active graphics shaders. */ + mask = pipeline->active_stages; + while (mask) { + int stage = u_bit_scan(&mask); + + radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f); + } + + radv_dump_shader(pipeline, compute_shader, MESA_SHADER_COMPUTE, f); +} + +static void +radv_dump_graphics_state(struct radv_pipeline *graphics_pipeline, + struct radv_pipeline *compute_pipeline, FILE *f) +{ + struct radv_shader_variant *compute_shader = + compute_pipeline ? compute_pipeline->shaders[MESA_SHADER_COMPUTE] : NULL; + + if (!graphics_pipeline) + return; + + radv_dump_shaders(graphics_pipeline, compute_shader, f); + radv_dump_annotated_shaders(graphics_pipeline, compute_shader, f); + radv_dump_descriptors(graphics_pipeline, f); +} + +static void +radv_dump_compute_state(struct radv_pipeline *compute_pipeline, FILE *f) +{ + if (!compute_pipeline) + return; + + radv_dump_shaders(compute_pipeline, + compute_pipeline->shaders[MESA_SHADER_COMPUTE], f); + radv_dump_annotated_shaders(compute_pipeline, + compute_pipeline->shaders[MESA_SHADER_COMPUTE], + f); + radv_dump_descriptors(compute_pipeline, f); +} + +static struct radv_pipeline * +radv_get_saved_graphics_pipeline(struct radv_device *device) +{ + uint64_t *ptr = (uint64_t *)device->trace_id_ptr; + + return (struct radv_pipeline *)ptr[1]; +} + +static struct radv_pipeline * +radv_get_saved_compute_pipeline(struct radv_device *device) +{ + uint64_t *ptr = (uint64_t *)device->trace_id_ptr; + + return (struct radv_pipeline *)ptr[2]; +} + +static void +radv_dump_dmesg(FILE *f) +{ + char line[2000]; + FILE *p; + + p = popen("dmesg | tail -n60", "r"); + if (!p) + return; + + fprintf(f, "\nLast 60 lines of dmesg:\n\n"); + while (fgets(line, sizeof(line), p)) + fputs(line, f); + fprintf(f, "\n"); + + pclose(p); +} + +static void +radv_dump_enabled_options(struct radv_device *device, FILE *f) +{ + uint64_t mask; + + fprintf(f, "Enabled debug options: "); + + mask = device->instance->debug_flags; + while (mask) { + int i = u_bit_scan64(&mask); + fprintf(f, "%s, ", radv_get_debug_option_name(i)); + } + fprintf(f, "\n"); + + fprintf(f, "Enabled perftest options: "); + + mask = device->instance->perftest_flags; + while (mask) { + int i = u_bit_scan64(&mask); + fprintf(f, "%s, ", radv_get_perftest_option_name(i)); + } + fprintf(f, "\n"); +} + +static void +radv_dump_device_name(struct radv_device *device, FILE *f) +{ + struct radeon_info *info = &device->physical_device->rad_info; + char llvm_string[32] = {}, kernel_version[128] = {}; + struct utsname uname_data; + const char *chip_name; + + chip_name = device->ws->get_chip_name(device->ws); + + if (uname(&uname_data) == 0) + snprintf(kernel_version, sizeof(kernel_version), + " / %s", uname_data.release); + + if (HAVE_LLVM > 0) { + snprintf(llvm_string, sizeof(llvm_string), + ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, + HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); + } + + fprintf(f, "Device name: %s (%s DRM %i.%i.%i%s%s)\n\n", + chip_name, device->physical_device->name, + info->drm_major, info->drm_minor, info->drm_patchlevel, + kernel_version, llvm_string); +} + +static bool +radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring) +{ + struct radeon_winsys *ws = queue->device->ws; + + if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx)) + return true; + + return false; +} + +void +radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_winsys_cs *cs) +{ + struct radv_pipeline *graphics_pipeline, *compute_pipeline; + struct radv_device *device = queue->device; + enum ring_type ring; + uint64_t addr; + + ring = radv_queue_family_to_ring(queue->queue_family_index); + + bool hang_occurred = radv_gpu_hang_occured(queue, ring); + bool vm_fault_occurred = false; + if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS) + vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class, + &device->dmesg_timestamp, &addr); + if (!hang_occurred && !vm_fault_occurred) + return; + + graphics_pipeline = radv_get_saved_graphics_pipeline(device); + compute_pipeline = radv_get_saved_compute_pipeline(device); + + fprintf(stderr, "GPU hang report:\n\n"); + radv_dump_device_name(device, stderr); + + radv_dump_enabled_options(device, stderr); + radv_dump_dmesg(stderr); + + if (vm_fault_occurred) { + fprintf(stderr, "VM fault report.\n\n"); + fprintf(stderr, "Failing VM page: 0x%08"PRIx64"\n\n", addr); + } + + radv_dump_debug_registers(device, stderr); + + switch (ring) { + case RING_GFX: + radv_dump_graphics_state(graphics_pipeline, compute_pipeline, + stderr); + break; + case RING_COMPUTE: + radv_dump_compute_state(compute_pipeline, stderr); + break; + default: + assert(0); + break; + } + + radv_dump_trace(queue->device, cs); + abort(); +} + +void +radv_print_spirv(uint32_t *data, uint32_t size, FILE *fp) +{ + char path[] = "/tmp/fileXXXXXX"; + char line[2048], command[128]; + FILE *p; + int fd; + + /* Dump the binary into a temporary file. */ + fd = mkstemp(path); + if (fd < 0) + return; + + if (write(fd, data, size) == -1) + goto fail; + + sprintf(command, "spirv-dis %s", path); + + /* Disassemble using spirv-dis if installed. */ + p = popen(command, "r"); + if (p) { + while (fgets(line, sizeof(line), p)) + fprintf(fp, "%s", line); + pclose(p); + } + +fail: + close(fd); + unlink(path); +} diff --git a/lib/mesa/src/amd/vulkan/radv_extensions.c b/lib/mesa/src/amd/vulkan/radv_extensions.c new file mode 100644 index 000000000..f9268dfbe --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_extensions.c @@ -0,0 +1,407 @@ +/* + * Copyright 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "radv_private.h" + +#include "vk_util.h" + +/* Convert the VK_USE_PLATFORM_* defines to booleans */ +#ifdef VK_USE_PLATFORM_ANDROID_KHR +# undef VK_USE_PLATFORM_ANDROID_KHR +# define VK_USE_PLATFORM_ANDROID_KHR true +#else +# define VK_USE_PLATFORM_ANDROID_KHR false +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +# undef VK_USE_PLATFORM_WAYLAND_KHR +# define VK_USE_PLATFORM_WAYLAND_KHR true +#else +# define VK_USE_PLATFORM_WAYLAND_KHR false +#endif +#ifdef VK_USE_PLATFORM_XCB_KHR +# undef VK_USE_PLATFORM_XCB_KHR +# define VK_USE_PLATFORM_XCB_KHR true +#else +# define VK_USE_PLATFORM_XCB_KHR false +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR +# undef VK_USE_PLATFORM_XLIB_KHR +# define VK_USE_PLATFORM_XLIB_KHR true +#else +# define VK_USE_PLATFORM_XLIB_KHR false +#endif + +/* And ANDROID too */ +#ifdef ANDROID +# undef ANDROID +# define ANDROID true +#else +# define ANDROID false +#endif + +#define RADV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || VK_USE_PLATFORM_XCB_KHR || VK_USE_PLATFORM_XLIB_KHR) + +bool +radv_instance_extension_supported(const char *name) +{ + if (strcmp(name, "VK_KHR_external_memory_capabilities") == 0) + return true; + if (strcmp(name, "VK_KHR_external_semaphore_capabilities") == 0) + return true; + if (strcmp(name, "VK_KHR_get_physical_device_properties2") == 0) + return true; + if (strcmp(name, "VK_KHR_surface") == 0) + return RADV_HAS_SURFACE; + if (strcmp(name, "VK_KHR_wayland_surface") == 0) + return VK_USE_PLATFORM_WAYLAND_KHR; + if (strcmp(name, "VK_KHR_xcb_surface") == 0) + return VK_USE_PLATFORM_XCB_KHR; + if (strcmp(name, "VK_KHR_xlib_surface") == 0) + return VK_USE_PLATFORM_XLIB_KHR; + return false; +} + +VkResult radv_EnumerateInstanceExtensionProperties( + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); + + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_memory_capabilities", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_semaphore_capabilities", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_get_physical_device_properties2", + .specVersion = 1, + }; + } + } + if (RADV_HAS_SURFACE) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_surface", + .specVersion = 25, + }; + } + } + if (VK_USE_PLATFORM_WAYLAND_KHR) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_wayland_surface", + .specVersion = 6, + }; + } + } + if (VK_USE_PLATFORM_XCB_KHR) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_xcb_surface", + .specVersion = 6, + }; + } + } + if (VK_USE_PLATFORM_XLIB_KHR) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_xlib_surface", + .specVersion = 6, + }; + } + } + + return vk_outarray_status(&out); +} + +uint32_t +radv_physical_device_api_version(struct radv_physical_device *dev) +{ + return VK_MAKE_VERSION(1, 0, 57); +} + +bool +radv_physical_device_extension_supported(struct radv_physical_device *device, + const char *name) +{ + if (strcmp(name, "VK_KHR_bind_memory2") == 0) + return true; + if (strcmp(name, "VK_KHR_dedicated_allocation") == 0) + return true; + if (strcmp(name, "VK_KHR_descriptor_update_template") == 0) + return true; + if (strcmp(name, "VK_KHR_external_memory") == 0) + return true; + if (strcmp(name, "VK_KHR_external_memory_fd") == 0) + return true; + if (strcmp(name, "VK_KHR_external_semaphore") == 0) + return device->rad_info.has_syncobj; + if (strcmp(name, "VK_KHR_external_semaphore_fd") == 0) + return device->rad_info.has_syncobj; + if (strcmp(name, "VK_KHR_get_memory_requirements2") == 0) + return true; + if (strcmp(name, "VK_KHR_image_format_list") == 0) + return true; + if (strcmp(name, "VK_KHR_incremental_present") == 0) + return true; + if (strcmp(name, "VK_KHR_maintenance1") == 0) + return true; + if (strcmp(name, "VK_KHR_maintenance2") == 0) + return true; + if (strcmp(name, "VK_KHR_push_descriptor") == 0) + return true; + if (strcmp(name, "VK_KHR_relaxed_block_layout") == 0) + return true; + if (strcmp(name, "VK_KHR_sampler_mirror_clamp_to_edge") == 0) + return true; + if (strcmp(name, "VK_KHR_shader_draw_parameters") == 0) + return true; + if (strcmp(name, "VK_KHR_storage_buffer_storage_class") == 0) + return true; + if (strcmp(name, "VK_KHR_swapchain") == 0) + return RADV_HAS_SURFACE; + if (strcmp(name, "VK_KHR_variable_pointers") == 0) + return true; + if (strcmp(name, "VK_KHX_multiview") == 0) + return false; + if (strcmp(name, "VK_EXT_global_priority") == 0) + return device->rad_info.has_ctx_priority; + if (strcmp(name, "VK_AMD_draw_indirect_count") == 0) + return true; + if (strcmp(name, "VK_AMD_rasterization_order") == 0) + return device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2; + return false; +} + +VkResult radv_EnumerateDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); + VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); + (void)device; + + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_bind_memory2", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_dedicated_allocation", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_descriptor_update_template", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_memory", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_memory_fd", + .specVersion = 1, + }; + } + } + if (device->rad_info.has_syncobj) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_semaphore", + .specVersion = 1, + }; + } + } + if (device->rad_info.has_syncobj) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_external_semaphore_fd", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_get_memory_requirements2", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_image_format_list", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_incremental_present", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_maintenance1", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_maintenance2", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_push_descriptor", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_relaxed_block_layout", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_sampler_mirror_clamp_to_edge", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_shader_draw_parameters", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_storage_buffer_storage_class", + .specVersion = 1, + }; + } + } + if (RADV_HAS_SURFACE) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_swapchain", + .specVersion = 68, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHR_variable_pointers", + .specVersion = 1, + }; + } + } + if (false) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_KHX_multiview", + .specVersion = 1, + }; + } + } + if (device->rad_info.has_ctx_priority) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_EXT_global_priority", + .specVersion = 1, + }; + } + } + if (true) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_AMD_draw_indirect_count", + .specVersion = 1, + }; + } + } + if (device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "VK_AMD_rasterization_order", + .specVersion = 1, + }; + } + } + + return vk_outarray_status(&out); +} diff --git a/lib/mesa/src/amd/vulkan/radv_extensions.py b/lib/mesa/src/amd/vulkan/radv_extensions.py new file mode 100644 index 000000000..43c0fa740 --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_extensions.py @@ -0,0 +1,278 @@ +COPYRIGHT = """\ +/* + * Copyright 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +""" + +import argparse +import copy +import re +import xml.etree.cElementTree as et + +from mako.template import Template + +MAX_API_VERSION = '1.0.57' + +class Extension: + def __init__(self, name, ext_version, enable): + self.name = name + self.ext_version = int(ext_version) + if enable is True: + self.enable = 'true'; + elif enable is False: + self.enable = 'false'; + else: + self.enable = enable; + +# On Android, we disable all surface and swapchain extensions. Android's Vulkan +# loader implements VK_KHR_surface and VK_KHR_swapchain, and applications +# cannot access the driver's implementation. Moreoever, if the driver exposes +# the those extension strings, then tests dEQP-VK.api.info.instance.extensions +# and dEQP-VK.api.info.device fail due to the duplicated strings. +EXTENSIONS = [ + Extension('VK_KHR_bind_memory2', 1, True), + Extension('VK_KHR_dedicated_allocation', 1, True), + Extension('VK_KHR_descriptor_update_template', 1, True), + Extension('VK_KHR_external_memory', 1, True), + Extension('VK_KHR_external_memory_capabilities', 1, True), + Extension('VK_KHR_external_memory_fd', 1, True), + Extension('VK_KHR_external_semaphore', 1, 'device->rad_info.has_syncobj'), + Extension('VK_KHR_external_semaphore_capabilities', 1, True), + Extension('VK_KHR_external_semaphore_fd', 1, 'device->rad_info.has_syncobj'), + Extension('VK_KHR_get_memory_requirements2', 1, True), + Extension('VK_KHR_get_physical_device_properties2', 1, True), + Extension('VK_KHR_image_format_list', 1, True), + Extension('VK_KHR_incremental_present', 1, True), + Extension('VK_KHR_maintenance1', 1, True), + Extension('VK_KHR_maintenance2', 1, True), + Extension('VK_KHR_push_descriptor', 1, True), + Extension('VK_KHR_relaxed_block_layout', 1, True), + Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True), + Extension('VK_KHR_shader_draw_parameters', 1, True), + Extension('VK_KHR_storage_buffer_storage_class', 1, True), + Extension('VK_KHR_surface', 25, 'RADV_HAS_SURFACE'), + Extension('VK_KHR_swapchain', 68, 'RADV_HAS_SURFACE'), + Extension('VK_KHR_variable_pointers', 1, True), + Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'), + Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'), + Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'), + Extension('VK_KHX_multiview', 1, False), + Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'), + Extension('VK_AMD_draw_indirect_count', 1, True), + Extension('VK_AMD_rasterization_order', 1, 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'), +] + +class VkVersion: + def __init__(self, string): + split = string.split('.') + self.major = int(split[0]) + self.minor = int(split[1]) + if len(split) > 2: + assert len(split) == 3 + self.patch = int(split[2]) + else: + self.patch = None + + # Sanity check. The range bits are required by the definition of the + # VK_MAKE_VERSION macro + assert self.major < 1024 and self.minor < 1024 + assert self.patch is None or self.patch < 4096 + assert(str(self) == string) + + def __str__(self): + ver_list = [str(self.major), str(self.minor)] + if self.patch is not None: + ver_list.append(str(self.patch)) + return '.'.join(ver_list) + + def c_vk_version(self): + ver_list = [str(self.major), str(self.minor), str(self.patch)] + return 'VK_MAKE_VERSION(' + ', '.join(ver_list) + ')' + + def __int_ver(self): + # This is just an expansion of VK_VERSION + patch = self.patch if self.patch is not None else 0 + return (self.major << 22) | (self.minor << 12) | patch + + def __cmp__(self, other): + # If only one of them has a patch version, "ignore" it by making + # other's patch version match self. + if (self.patch is None) != (other.patch is None): + other = copy.copy(other) + other.patch = self.patch + + return self.__int_ver().__cmp__(other.__int_ver()) + +MAX_API_VERSION = VkVersion(MAX_API_VERSION) + +def _init_exts_from_xml(xml): + """ Walk the Vulkan XML and fill out extra extension information. """ + + xml = et.parse(xml) + + ext_name_map = {} + for ext in EXTENSIONS: + ext_name_map[ext.name] = ext + + for ext_elem in xml.findall('.extensions/extension'): + ext_name = ext_elem.attrib['name'] + if ext_name not in ext_name_map: + continue + + # Workaround for VK_ANDROID_native_buffer. Its <extension> element in + # vk.xml lists it as supported="disabled" and provides only a stub + # definition. Its <extension> element in Mesa's custom + # vk_android_native_buffer.xml, though, lists it as + # supported='android-vendor' and fully defines the extension. We want + # to skip the <extension> element in vk.xml. + if ext_elem.attrib['supported'] == 'disabled': + assert ext_name == 'VK_ANDROID_native_buffer' + continue + + ext = ext_name_map[ext_name] + ext.type = ext_elem.attrib['type'] + +_TEMPLATE = Template(COPYRIGHT + """ +#include "radv_private.h" + +#include "vk_util.h" + +/* Convert the VK_USE_PLATFORM_* defines to booleans */ +%for platform in ['ANDROID', 'WAYLAND', 'XCB', 'XLIB']: +#ifdef VK_USE_PLATFORM_${platform}_KHR +# undef VK_USE_PLATFORM_${platform}_KHR +# define VK_USE_PLATFORM_${platform}_KHR true +#else +# define VK_USE_PLATFORM_${platform}_KHR false +#endif +%endfor + +/* And ANDROID too */ +#ifdef ANDROID +# undef ANDROID +# define ANDROID true +#else +# define ANDROID false +#endif + +#define RADV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || \\ + VK_USE_PLATFORM_XCB_KHR || \\ + VK_USE_PLATFORM_XLIB_KHR) + +bool +radv_instance_extension_supported(const char *name) +{ +%for ext in instance_extensions: + if (strcmp(name, "${ext.name}") == 0) + return ${ext.enable}; +%endfor + return false; +} + +VkResult radv_EnumerateInstanceExtensionProperties( + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); + +%for ext in instance_extensions: + if (${ext.enable}) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "${ext.name}", + .specVersion = ${ext.ext_version}, + }; + } + } +%endfor + + return vk_outarray_status(&out); +} + +uint32_t +radv_physical_device_api_version(struct radv_physical_device *dev) +{ + return ${MAX_API_VERSION.c_vk_version()}; +} + +bool +radv_physical_device_extension_supported(struct radv_physical_device *device, + const char *name) +{ +%for ext in device_extensions: + if (strcmp(name, "${ext.name}") == 0) + return ${ext.enable}; +%endfor + return false; +} + +VkResult radv_EnumerateDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); + VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); + (void)device; + +%for ext in device_extensions: + if (${ext.enable}) { + vk_outarray_append(&out, prop) { + *prop = (VkExtensionProperties) { + .extensionName = "${ext.name}", + .specVersion = ${ext.ext_version}, + }; + } + } +%endfor + + return vk_outarray_status(&out); +} +""") + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--out', help='Output C file.', required=True) + parser.add_argument('--xml', + help='Vulkan API XML file.', + required=True, + action='append', + dest='xml_files') + args = parser.parse_args() + + for filename in args.xml_files: + _init_exts_from_xml(filename) + + for ext in EXTENSIONS: + assert ext.type == 'instance' or ext.type == 'device' + + template_env = { + 'MAX_API_VERSION': MAX_API_VERSION, + 'instance_extensions': [e for e in EXTENSIONS if e.type == 'instance'], + 'device_extensions': [e for e in EXTENSIONS if e.type == 'device'], + } + + with open(args.out, 'w') as f: + f.write(_TEMPLATE.render(**template_env)) diff --git a/lib/mesa/src/amd/vulkan/radv_pass.c b/lib/mesa/src/amd/vulkan/radv_pass.c index 17eff3937..a52dae39d 100644 --- a/lib/mesa/src/amd/vulkan/radv_pass.c +++ b/lib/mesa/src/amd/vulkan/radv_pass.c @@ -26,6 +26,8 @@ */ #include "radv_private.h" +#include "vk_util.h" + VkResult radv_CreateRenderPass( VkDevice _device, const VkRenderPassCreateInfo* pCreateInfo, @@ -36,6 +38,7 @@ VkResult radv_CreateRenderPass( struct radv_render_pass *pass; size_t size; size_t attachments_offset; + VkRenderPassMultiviewCreateInfoKHX *multiview_info = NULL; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); @@ -54,6 +57,16 @@ VkResult radv_CreateRenderPass( pass->subpass_count = pCreateInfo->subpassCount; pass->attachments = (void *) pass + attachments_offset; + vk_foreach_struct(ext, pCreateInfo->pNext) { + switch(ext->sType) { + case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHX: + multiview_info = ( VkRenderPassMultiviewCreateInfoKHX*)ext; + break; + default: + break; + } + } + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { struct radv_render_pass_attachment *att = &pass->attachments[i]; @@ -97,6 +110,8 @@ VkResult radv_CreateRenderPass( subpass->input_count = desc->inputAttachmentCount; subpass->color_count = desc->colorAttachmentCount; + if (multiview_info) + subpass->view_mask = multiview_info->pViewMasks[i]; if (desc->inputAttachmentCount > 0) { subpass->input_attachments = p; @@ -105,6 +120,8 @@ VkResult radv_CreateRenderPass( for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { subpass->input_attachments[j] = desc->pInputAttachments[j]; + if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED) + pass->attachments[desc->pInputAttachments[j].attachment].view_mask |= subpass->view_mask; } } @@ -115,6 +132,8 @@ VkResult radv_CreateRenderPass( for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { subpass->color_attachments[j] = desc->pColorAttachments[j]; + if (desc->pColorAttachments[j].attachment != VK_ATTACHMENT_UNUSED) + pass->attachments[desc->pColorAttachments[j].attachment].view_mask |= subpass->view_mask; } } @@ -127,14 +146,18 @@ VkResult radv_CreateRenderPass( uint32_t a = desc->pResolveAttachments[j].attachment; subpass->resolve_attachments[j] = desc->pResolveAttachments[j]; - if (a != VK_ATTACHMENT_UNUSED) + if (a != VK_ATTACHMENT_UNUSED) { subpass->has_resolve = true; + pass->attachments[desc->pResolveAttachments[j].attachment].view_mask |= subpass->view_mask; + } } } if (desc->pDepthStencilAttachment) { subpass->depth_stencil_attachment = *desc->pDepthStencilAttachment; + if (desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) + pass->attachments[desc->pDepthStencilAttachment->attachment].view_mask |= subpass->view_mask; } else { subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; } diff --git a/lib/mesa/src/amd/vulkan/radv_shader.c b/lib/mesa/src/amd/vulkan/radv_shader.c new file mode 100644 index 000000000..83e2e675e --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_shader.c @@ -0,0 +1,671 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/mesa-sha1.h" +#include "util/u_atomic.h" +#include "radv_debug.h" +#include "radv_private.h" +#include "radv_shader.h" +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "spirv/nir_spirv.h" + +#include <llvm-c/Core.h> +#include <llvm-c/TargetMachine.h> + +#include "sid.h" +#include "gfx9d.h" +#include "ac_binary.h" +#include "ac_llvm_util.h" +#include "ac_nir_to_llvm.h" +#include "vk_format.h" +#include "util/debug.h" +#include "ac_exp_param.h" + +static const struct nir_shader_compiler_options nir_options = { + .vertex_id_zero_based = true, + .lower_scmp = true, + .lower_flrp32 = true, + .lower_fsat = true, + .lower_fdiv = true, + .lower_sub = true, + .lower_pack_snorm_2x16 = true, + .lower_pack_snorm_4x8 = true, + .lower_pack_unorm_2x16 = true, + .lower_pack_unorm_4x8 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_snorm_4x8 = true, + .lower_unpack_unorm_2x16 = true, + .lower_unpack_unorm_4x8 = true, + .lower_extract_byte = true, + .lower_extract_word = true, + .lower_ffma = true, + .max_unroll_iterations = 32 +}; + +VkResult radv_CreateShaderModule( + VkDevice _device, + const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + struct radv_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + module = vk_alloc2(&device->alloc, pAllocator, + sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (module == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + module->nir = NULL; + module->size = pCreateInfo->codeSize; + memcpy(module->data, pCreateInfo->pCode, module->size); + + _mesa_sha1_compute(module->data, module->size, module->sha1); + + *pShaderModule = radv_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +void radv_DestroyShaderModule( + VkDevice _device, + VkShaderModule _module, + const VkAllocationCallbacks* pAllocator) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_shader_module, module, _module); + + if (!module) + return; + + vk_free2(&device->alloc, pAllocator, module); +} + +bool +radv_lower_indirect_derefs(struct nir_shader *nir, + struct radv_physical_device *device) +{ + /* While it would be nice not to have this flag, we are constrained + * by the reality that LLVM 5.0 doesn't have working VGPR indexing + * on GFX9. + */ + bool llvm_has_working_vgpr_indexing = + device->rad_info.chip_class <= VI; + + /* TODO: Indirect indexing of GS inputs is unimplemented. + * + * TCS and TES load inputs directly from LDS or offchip memory, so + * indirect indexing is trivial. + */ + nir_variable_mode indirect_mask = 0; + if (nir->info.stage == MESA_SHADER_GEOMETRY || + (nir->info.stage != MESA_SHADER_TESS_CTRL && + nir->info.stage != MESA_SHADER_TESS_EVAL && + !llvm_has_working_vgpr_indexing)) { + indirect_mask |= nir_var_shader_in; + } + if (!llvm_has_working_vgpr_indexing && + nir->info.stage != MESA_SHADER_TESS_CTRL) + indirect_mask |= nir_var_shader_out; + + /* TODO: We shouldn't need to do this, however LLVM isn't currently + * smart enough to handle indirects without causing excess spilling + * causing the gpu to hang. + * + * See the following thread for more details of the problem: + * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html + */ + indirect_mask |= nir_var_local; + + return nir_lower_indirect_derefs(nir, indirect_mask); +} + +void +radv_optimize_nir(struct nir_shader *shader) +{ + bool progress; + + do { + progress = false; + + NIR_PASS_V(shader, nir_lower_vars_to_ssa); + NIR_PASS_V(shader, nir_lower_64bit_pack); + NIR_PASS_V(shader, nir_lower_alu_to_scalar); + NIR_PASS_V(shader, nir_lower_phis_to_scalar); + + NIR_PASS(progress, shader, nir_copy_prop); + NIR_PASS(progress, shader, nir_opt_remove_phis); + NIR_PASS(progress, shader, nir_opt_dce); + if (nir_opt_trivial_continues(shader)) { + progress = true; + NIR_PASS(progress, shader, nir_copy_prop); + NIR_PASS(progress, shader, nir_opt_remove_phis); + NIR_PASS(progress, shader, nir_opt_dce); + } + NIR_PASS(progress, shader, nir_opt_if); + NIR_PASS(progress, shader, nir_opt_dead_cf); + NIR_PASS(progress, shader, nir_opt_cse); + NIR_PASS(progress, shader, nir_opt_peephole_select, 8); + NIR_PASS(progress, shader, nir_opt_algebraic); + NIR_PASS(progress, shader, nir_opt_constant_folding); + NIR_PASS(progress, shader, nir_opt_undef); + NIR_PASS(progress, shader, nir_opt_conditional_discard); + if (shader->options->max_unroll_iterations) { + NIR_PASS(progress, shader, nir_opt_loop_unroll, 0); + } + } while (progress); +} + +nir_shader * +radv_shader_compile_to_nir(struct radv_device *device, + struct radv_shader_module *module, + const char *entrypoint_name, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info) +{ + if (strcmp(entrypoint_name, "main") != 0) { + radv_finishme("Multiple shaders per module not really supported"); + } + + nir_shader *nir; + nir_function *entry_point; + if (module->nir) { + /* Some things such as our meta clear/blit code will give us a NIR + * shader directly. In that case, we just ignore the SPIR-V entirely + * and just use the NIR shader */ + nir = module->nir; + nir->options = &nir_options; + nir_validate_shader(nir); + + assert(exec_list_length(&nir->functions) == 1); + struct exec_node *node = exec_list_get_head(&nir->functions); + entry_point = exec_node_data(nir_function, node, node); + } else { + uint32_t *spirv = (uint32_t *) module->data; + assert(module->size % 4 == 0); + + if (device->instance->debug_flags & RADV_DEBUG_DUMP_SPIRV) + radv_print_spirv(spirv, module->size, stderr); + + uint32_t num_spec_entries = 0; + struct nir_spirv_specialization *spec_entries = NULL; + if (spec_info && spec_info->mapEntryCount > 0) { + num_spec_entries = spec_info->mapEntryCount; + spec_entries = malloc(num_spec_entries * sizeof(*spec_entries)); + for (uint32_t i = 0; i < num_spec_entries; i++) { + VkSpecializationMapEntry entry = spec_info->pMapEntries[i]; + const void *data = spec_info->pData + entry.offset; + assert(data + entry.size <= spec_info->pData + spec_info->dataSize); + + spec_entries[i].id = spec_info->pMapEntries[i].constantID; + if (spec_info->dataSize == 8) + spec_entries[i].data64 = *(const uint64_t *)data; + else + spec_entries[i].data32 = *(const uint32_t *)data; + } + } + const struct nir_spirv_supported_extensions supported_ext = { + .draw_parameters = true, + .float64 = true, + .image_read_without_format = true, + .image_write_without_format = true, + .tessellation = true, + .int64 = true, + .multiview = true, + .variable_pointers = true, + }; + entry_point = spirv_to_nir(spirv, module->size / 4, + spec_entries, num_spec_entries, + stage, entrypoint_name, &supported_ext, &nir_options); + nir = entry_point->shader; + assert(nir->info.stage == stage); + nir_validate_shader(nir); + + free(spec_entries); + + /* We have to lower away local constant initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local); + NIR_PASS_V(nir, nir_lower_returns); + NIR_PASS_V(nir, nir_inline_functions); + + /* Pick off the single entrypoint that we want */ + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (func != entry_point) + exec_node_remove(&func->node); + } + assert(exec_list_length(&nir->functions) == 1); + entry_point->name = ralloc_strdup(entry_point, "main"); + + NIR_PASS_V(nir, nir_remove_dead_variables, + nir_var_shader_in | nir_var_shader_out | nir_var_system_value); + + /* Now that we've deleted all but the main function, we can go ahead and + * lower the rest of the constant initializers. + */ + NIR_PASS_V(nir, nir_lower_constant_initializers, ~0); + NIR_PASS_V(nir, nir_lower_system_values); + NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); + } + + /* Vulkan uses the separate-shader linking model */ + nir->info.separate_shader = true; + + nir_shader_gather_info(nir, entry_point->impl); + + static const nir_lower_tex_options tex_options = { + .lower_txp = ~0, + }; + + nir_lower_tex(nir, &tex_options); + + nir_lower_vars_to_ssa(nir); + nir_lower_var_copies(nir); + nir_lower_global_vars_to_local(nir); + nir_remove_dead_variables(nir, nir_var_local); + radv_lower_indirect_derefs(nir, device->physical_device); + radv_optimize_nir(nir); + + return nir; +} + +void * +radv_alloc_shader_memory(struct radv_device *device, + struct radv_shader_variant *shader) +{ + mtx_lock(&device->shader_slab_mutex); + list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs) { + uint64_t offset = 0; + list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list) { + if (s->bo_offset - offset >= shader->code_size) { + shader->bo = slab->bo; + shader->bo_offset = offset; + list_addtail(&shader->slab_list, &s->slab_list); + mtx_unlock(&device->shader_slab_mutex); + return slab->ptr + offset; + } + offset = align_u64(s->bo_offset + s->code_size, 256); + } + if (slab->size - offset >= shader->code_size) { + shader->bo = slab->bo; + shader->bo_offset = offset; + list_addtail(&shader->slab_list, &slab->shaders); + mtx_unlock(&device->shader_slab_mutex); + return slab->ptr + offset; + } + } + + mtx_unlock(&device->shader_slab_mutex); + struct radv_shader_slab *slab = calloc(1, sizeof(struct radv_shader_slab)); + + slab->size = 256 * 1024; + slab->bo = device->ws->buffer_create(device->ws, slab->size, 256, + RADEON_DOMAIN_VRAM, 0); + slab->ptr = (char*)device->ws->buffer_map(slab->bo); + list_inithead(&slab->shaders); + + mtx_lock(&device->shader_slab_mutex); + list_add(&slab->slabs, &device->shader_slabs); + + shader->bo = slab->bo; + shader->bo_offset = 0; + list_add(&shader->slab_list, &slab->shaders); + mtx_unlock(&device->shader_slab_mutex); + return slab->ptr; +} + +void +radv_destroy_shader_slabs(struct radv_device *device) +{ + list_for_each_entry_safe(struct radv_shader_slab, slab, &device->shader_slabs, slabs) { + device->ws->buffer_destroy(slab->bo); + free(slab); + } + mtx_destroy(&device->shader_slab_mutex); +} + +static void +radv_fill_shader_variant(struct radv_device *device, + struct radv_shader_variant *variant, + struct ac_shader_binary *binary, + gl_shader_stage stage) +{ + bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0; + unsigned vgpr_comp_cnt = 0; + + if (scratch_enabled && !device->llvm_supports_spill) + radv_finishme("shader scratch support only available with LLVM 4.0"); + + variant->code_size = binary->code_size; + variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) | + S_00B12C_SCRATCH_EN(scratch_enabled); + + variant->rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) | + S_00B848_SGPRS((variant->config.num_sgprs - 1) / 8) | + S_00B848_DX10_CLAMP(1) | + S_00B848_FLOAT_MODE(variant->config.float_mode); + + switch (stage) { + case MESA_SHADER_TESS_EVAL: + vgpr_comp_cnt = 3; + variant->rsrc2 |= S_00B12C_OC_LDS_EN(1); + break; + case MESA_SHADER_TESS_CTRL: + if (device->physical_device->rad_info.chip_class >= GFX9) + vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt; + else + variant->rsrc2 |= S_00B12C_OC_LDS_EN(1); + break; + case MESA_SHADER_VERTEX: + case MESA_SHADER_GEOMETRY: + vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt; + break; + case MESA_SHADER_FRAGMENT: + break; + case MESA_SHADER_COMPUTE: + variant->rsrc2 |= + S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) | + S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) | + S_00B84C_TG_SIZE_EN(1) | + S_00B84C_LDS_SIZE(variant->config.lds_size); + break; + default: + unreachable("unsupported shader type"); + break; + } + + if (device->physical_device->rad_info.chip_class >= GFX9 && + stage == MESA_SHADER_GEOMETRY) { + /* TODO: Figure out how many we actually need. */ + variant->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(3); + variant->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(3) | + S_00B22C_OC_LDS_EN(1); + } else if (device->physical_device->rad_info.chip_class >= GFX9 && + stage == MESA_SHADER_TESS_CTRL) + variant->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt); + else + variant->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt); + + void *ptr = radv_alloc_shader_memory(device, variant); + memcpy(ptr, binary->code, binary->code_size); +} + +static struct radv_shader_variant * +shader_variant_create(struct radv_device *device, + struct radv_shader_module *module, + struct nir_shader * const *shaders, + int shader_count, + gl_shader_stage stage, + struct ac_nir_compiler_options *options, + bool gs_copy_shader, + void **code_out, + unsigned *code_size_out) +{ + enum radeon_family chip_family = device->physical_device->rad_info.family; + bool dump_shaders = device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS; + enum ac_target_machine_options tm_options = 0; + struct radv_shader_variant *variant; + struct ac_shader_binary binary; + LLVMTargetMachineRef tm; + + variant = calloc(1, sizeof(struct radv_shader_variant)); + if (!variant) + return NULL; + + options->family = chip_family; + options->chip_class = device->physical_device->rad_info.chip_class; + + if (options->supports_spill) + tm_options |= AC_TM_SUPPORTS_SPILL; + if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED) + tm_options |= AC_TM_SISCHED; + tm = ac_create_target_machine(chip_family, tm_options); + + if (gs_copy_shader) { + assert(shader_count == 1); + ac_create_gs_copy_shader(tm, *shaders, &binary, &variant->config, + &variant->info, options, dump_shaders); + } else { + ac_compile_nir_shader(tm, &binary, &variant->config, + &variant->info, shaders, shader_count, options, + dump_shaders); + } + + LLVMDisposeTargetMachine(tm); + + radv_fill_shader_variant(device, variant, &binary, stage); + + if (code_out) { + *code_out = binary.code; + *code_size_out = binary.code_size; + } else + free(binary.code); + free(binary.config); + free(binary.rodata); + free(binary.global_symbol_offsets); + free(binary.relocs); + variant->ref_count = 1; + + if (device->trace_bo) { + variant->disasm_string = binary.disasm_string; + if (!gs_copy_shader && !module->nir) { + variant->nir = *shaders; + variant->spirv = (uint32_t *)module->data; + variant->spirv_size = module->size; + } + } else { + free(binary.disasm_string); + } + + return variant; +} + +struct radv_shader_variant * +radv_shader_variant_create(struct radv_device *device, + struct radv_shader_module *module, + struct nir_shader *const *shaders, + int shader_count, + struct radv_pipeline_layout *layout, + const struct ac_shader_variant_key *key, + void **code_out, + unsigned *code_size_out) +{ + struct ac_nir_compiler_options options = {0}; + + options.layout = layout; + if (key) + options.key = *key; + + options.unsafe_math = !!(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH); + options.supports_spill = device->llvm_supports_spill; + + return shader_variant_create(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, + &options, false, code_out, code_size_out); +} + +struct radv_shader_variant * +radv_create_gs_copy_shader(struct radv_device *device, + struct nir_shader *shader, + void **code_out, + unsigned *code_size_out, + bool multiview) +{ + struct ac_nir_compiler_options options = {0}; + + options.key.has_multiview_view_index = multiview; + + return shader_variant_create(device, NULL, &shader, 1, MESA_SHADER_VERTEX, + &options, true, code_out, code_size_out); +} + +void +radv_shader_variant_destroy(struct radv_device *device, + struct radv_shader_variant *variant) +{ + if (!p_atomic_dec_zero(&variant->ref_count)) + return; + + mtx_lock(&device->shader_slab_mutex); + list_del(&variant->slab_list); + mtx_unlock(&device->shader_slab_mutex); + + ralloc_free(variant->nir); + free(variant->disasm_string); + free(variant); +} + +uint32_t +radv_shader_stage_to_user_data_0(gl_shader_stage stage, enum chip_class chip_class, + bool has_gs, bool has_tess) +{ + switch (stage) { + case MESA_SHADER_FRAGMENT: + return R_00B030_SPI_SHADER_USER_DATA_PS_0; + case MESA_SHADER_VERTEX: + if (chip_class >= GFX9) { + return has_tess ? R_00B430_SPI_SHADER_USER_DATA_LS_0 : + has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : + R_00B130_SPI_SHADER_USER_DATA_VS_0; + } + if (has_tess) + return R_00B530_SPI_SHADER_USER_DATA_LS_0; + else + return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0; + case MESA_SHADER_GEOMETRY: + return chip_class >= GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : + R_00B230_SPI_SHADER_USER_DATA_GS_0; + case MESA_SHADER_COMPUTE: + return R_00B900_COMPUTE_USER_DATA_0; + case MESA_SHADER_TESS_CTRL: + return chip_class >= GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 : + R_00B430_SPI_SHADER_USER_DATA_HS_0; + case MESA_SHADER_TESS_EVAL: + if (chip_class >= GFX9) { + return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : + R_00B130_SPI_SHADER_USER_DATA_VS_0; + } + if (has_gs) + return R_00B330_SPI_SHADER_USER_DATA_ES_0; + else + return R_00B130_SPI_SHADER_USER_DATA_VS_0; + default: + unreachable("unknown shader"); + } +} + +const char * +radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: return var->info.vs.as_ls ? "Vertex Shader as LS" : var->info.vs.as_es ? "Vertex Shader as ES" : "Vertex Shader as VS"; + case MESA_SHADER_GEOMETRY: return "Geometry Shader"; + case MESA_SHADER_FRAGMENT: return "Pixel Shader"; + case MESA_SHADER_COMPUTE: return "Compute Shader"; + case MESA_SHADER_TESS_CTRL: return "Tessellation Control Shader"; + case MESA_SHADER_TESS_EVAL: return var->info.tes.as_es ? "Tessellation Evaluation Shader as ES" : "Tessellation Evaluation Shader as VS"; + default: + return "Unknown shader"; + }; +} + +void +radv_shader_dump_stats(struct radv_device *device, + struct radv_shader_variant *variant, + gl_shader_stage stage, + FILE *file) +{ + unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256; + struct ac_shader_config *conf; + unsigned max_simd_waves; + unsigned lds_per_wave = 0; + + switch (device->physical_device->rad_info.family) { + /* These always have 8 waves: */ + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + max_simd_waves = 8; + break; + default: + max_simd_waves = 10; + } + + conf = &variant->config; + + if (stage == MESA_SHADER_FRAGMENT) { + lds_per_wave = conf->lds_size * lds_increment + + align(variant->info.fs.num_interp * 48, + lds_increment); + } + + if (conf->num_sgprs) { + if (device->physical_device->rad_info.chip_class >= VI) + max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs); + else + max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs); + } + + if (conf->num_vgprs) + max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs); + + /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD + * that PS can use. + */ + if (lds_per_wave) + max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); + + fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage)); + + if (stage == MESA_SHADER_FRAGMENT) { + fprintf(file, "*** SHADER CONFIG ***\n" + "SPI_PS_INPUT_ADDR = 0x%04x\n" + "SPI_PS_INPUT_ENA = 0x%04x\n", + conf->spi_ps_input_addr, conf->spi_ps_input_ena); + } + + fprintf(file, "*** SHADER STATS ***\n" + "SGPRS: %d\n" + "VGPRS: %d\n" + "Spilled SGPRs: %d\n" + "Spilled VGPRs: %d\n" + "Code Size: %d bytes\n" + "LDS: %d blocks\n" + "Scratch: %d bytes per wave\n" + "Max Waves: %d\n" + "********************\n\n\n", + conf->num_sgprs, conf->num_vgprs, + conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size, + conf->lds_size, conf->scratch_bytes_per_wave, + max_simd_waves); +} diff --git a/lib/mesa/src/amd/vulkan/radv_shader.h b/lib/mesa/src/amd/vulkan/radv_shader.h new file mode 100644 index 000000000..6e4e9966c --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_shader.h @@ -0,0 +1,123 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef RADV_SHADER_H +#define RADV_SHADER_H + +#include "radv_private.h" + +#include "nir/nir.h" + +struct radv_shader_module { + struct nir_shader *nir; + unsigned char sha1[20]; + uint32_t size; + char data[0]; +}; + +struct radv_shader_variant { + uint32_t ref_count; + + struct radeon_winsys_bo *bo; + uint64_t bo_offset; + struct ac_shader_config config; + uint32_t code_size; + struct ac_shader_variant_info info; + unsigned rsrc1; + unsigned rsrc2; + + /* debug only */ + uint32_t *spirv; + uint32_t spirv_size; + struct nir_shader *nir; + char *disasm_string; + + struct list_head slab_list; +}; + +struct radv_shader_slab { + struct list_head slabs; + struct list_head shaders; + struct radeon_winsys_bo *bo; + uint64_t size; + char *ptr; +}; + +void +radv_optimize_nir(struct nir_shader *shader); + +nir_shader * +radv_shader_compile_to_nir(struct radv_device *device, + struct radv_shader_module *module, + const char *entrypoint_name, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info); + +void * +radv_alloc_shader_memory(struct radv_device *device, + struct radv_shader_variant *shader); + +void +radv_destroy_shader_slabs(struct radv_device *device); + +struct radv_shader_variant * +radv_shader_variant_create(struct radv_device *device, + struct radv_shader_module *module, + struct nir_shader *const *shaders, + int shader_count, + struct radv_pipeline_layout *layout, + const struct ac_shader_variant_key *key, + void **code_out, + unsigned *code_size_out); + +struct radv_shader_variant * +radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir, + void **code_out, unsigned *code_size_out, + bool multiview); + +void +radv_shader_variant_destroy(struct radv_device *device, + struct radv_shader_variant *variant); + +bool +radv_lower_indirect_derefs(struct nir_shader *nir, + struct radv_physical_device *device); + +uint32_t +radv_shader_stage_to_user_data_0(gl_shader_stage stage, enum chip_class chip_class, + bool has_gs, bool has_tess); + +const char * +radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage); + +void +radv_shader_dump_stats(struct radv_device *device, + struct radv_shader_variant *variant, + gl_shader_stage stage, + FILE *file); + +#endif |