diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-01-22 02:13:18 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2020-01-22 02:13:18 +0000 |
commit | fdcc03929065b5bf5dd93553db219ea3e05c8c34 (patch) | |
tree | ca90dc8d9e89febdcd4160956c1b8ec098a4efc9 /lib/mesa/src/compiler/glsl | |
parent | 3c9de4a7e13712b5696750bbd59a18c848742022 (diff) |
Import Mesa 19.2.8
Diffstat (limited to 'lib/mesa/src/compiler/glsl')
27 files changed, 2232 insertions, 263 deletions
diff --git a/lib/mesa/src/compiler/glsl/float64.glsl b/lib/mesa/src/compiler/glsl/float64.glsl index 415dde390..0433d925a 100644 --- a/lib/mesa/src/compiler/glsl/float64.glsl +++ b/lib/mesa/src/compiler/glsl/float64.glsl @@ -218,6 +218,18 @@ __fge64(uint64_t a, uint64_t b) return !__flt64_nonnan(a, b); } +uint64_t +__fsat64(uint64_t __a) +{ + if (__flt64(__a, 0ul)) + return 0ul; + + if (__fge64(__a, 0x3FF0000000000000ul /* 1.0 */)) + return 0x3FF0000000000000ul; + + return __a; +} + /* Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit * value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so * any carry out is lost. The result is broken into two 32-bit pieces which @@ -1681,17 +1693,22 @@ __fround64(uint64_t __a) if (unbiasedExp < 20) { if (unbiasedExp < 0) { + if ((aHi & 0x80000000u) != 0u && aLo == 0u) { + return 0; + } aHi &= 0x80000000u; - if (unbiasedExp == -1 && aLo != 0u) - aHi |= (1023u << 20); + if ((a.y & 0x000FFFFFu) == 0u && a.x == 0u) { + aLo = 0u; + return packUint2x32(uvec2(aLo, aHi)); + } + aHi = mix(aHi, (aHi | 0x3FF00000u), unbiasedExp == -1); aLo = 0u; } else { uint maskExp = 0x000FFFFFu >> unbiasedExp; - /* a is an integral value */ - if (((aHi & maskExp) == 0u) && (aLo == 0u)) - return __a; - + uint lastBit = maskExp + 1; aHi += 0x00080000u >> unbiasedExp; + if ((aHi & maskExp) == 0u) + aHi &= ~lastBit; aHi &= ~maskExp; aLo = 0u; } @@ -1708,9 +1725,7 @@ __fround64(uint64_t __a) aLo &= ~maskExp; } - a.x = aLo; - a.y = aHi; - return packUint2x32(a); + return packUint2x32(uvec2(aLo, aHi)); } uint64_t diff --git a/lib/mesa/src/compiler/glsl/gl_nir.h b/lib/mesa/src/compiler/glsl/gl_nir.h index 59d5f65e6..9907f732e 100644 --- a/lib/mesa/src/compiler/glsl/gl_nir.h +++ b/lib/mesa/src/compiler/glsl/gl_nir.h @@ -35,11 +35,17 @@ bool gl_nir_lower_atomics(nir_shader *shader, const struct gl_shader_program *shader_program, bool use_binding_as_idx); +bool gl_nir_lower_bindless_images(nir_shader *shader); bool gl_nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program); bool gl_nir_lower_samplers_as_deref(nir_shader *shader, const struct gl_shader_program *shader_program); +bool gl_nir_lower_buffers(nir_shader *shader, + const struct gl_shader_program *shader_program); + +bool gl_nir_opt_access(nir_shader *shader); + #ifdef __cplusplus } #endif diff --git a/lib/mesa/src/compiler/glsl/gl_nir_link_uniform_blocks.c b/lib/mesa/src/compiler/glsl/gl_nir_link_uniform_blocks.c new file mode 100644 index 000000000..28faa7e76 --- /dev/null +++ b/lib/mesa/src/compiler/glsl/gl_nir_link_uniform_blocks.c @@ -0,0 +1,643 @@ +/* + * Copyright © 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "gl_nir_linker.h" +#include "ir_uniform.h" /* for gl_uniform_storage */ +#include "linker_util.h" +#include "main/mtypes.h" + +/* Summary: This file contains code to do a nir-based linking for uniform + * blocks. This includes ubos and ssbos. Note that it is tailored to + * ARB_gl_spirv needs and particularities. + * + * More details: + * + * 1. Linking doesn't use names: GLSL linking use names as core concept. But + * on SPIR-V, uniform block name, fields names, and other names are + * considered optional debug infor so could not be present. So the linking + * should work without it, and it is optional to not handle them at + * all. From ARB_gl_spirv spec. + * + * "19. How should the program interface query operations behave for program + * objects created from SPIR-V shaders? + * + * DISCUSSION: we previously said we didn't need reflection to work for + * SPIR-V shaders (at least for the first version), however we are left + * with specifying how it should "not work". The primary issue is that + * SPIR-V binaries are not required to have names associated with + * variables. They can be associated in debug information, but there is no + * requirement for that to be present, and it should not be relied upon. + * + * Options: + * + * <skip> + * + * C) Allow as much as possible to work "naturally". You can query for the + * number of active resources, and for details about them. Anything that + * doesn't query by name will work as expected. Queries for maximum length + * of names return one. Queries for anything "by name" return INVALID_INDEX + * (or -1). Querying the name property of a resource returns an empty + * string. This may allow many queries to work, but it's not clear how + * useful it would be if you can't actually know which specific variable + * you are retrieving information on. If everything is specified a-priori + * by location/binding/offset/index/component in the shader, this may be + * sufficient. + * + * RESOLVED. Pick (c), but also allow debug names to be returned if an + * implementation wants to." + * + * This implemention doesn't care for the names, as the main objective is + * functional, and not support optional debug features. + * + * 2. Terminology: this file handles both UBO and SSBO, including both as + * "uniform blocks" analogously to what is done in the GLSL (IR) path. + * + * From ARB_gl_spirv spec: + * "Mapping of Storage Classes: + * <skip> + * uniform blockN { ... } ...; -> Uniform, with Block decoration + * <skip> + * buffer blockN { ... } ...; -> Uniform, with BufferBlock decoration" + * + * 3. Explicit data: The code assumes that all structure members have an + * Offset decoration, all arrays have an ArrayStride and all matrices have + * a MatrixStride, even for nested structures. That way we don’t have to + * worry about the different layout modes. This is explicitly required in + * the SPIR-V spec: + * + * "Composite objects in the UniformConstant, Uniform, and PushConstant + * Storage Classes must be explicitly laid out. The following apply to all + * the aggregate and matrix types describing such an object, recursively + * through their nested types: + * + * – Each structure-type member must have an Offset Decoration. + * – Each array type must have an ArrayStride Decoration. + * – Each structure-type member that is a matrix or array-of-matrices must + * have be decorated with a MatrixStride Decoration, and one of the + * RowMajor or ColMajor Decorations." + * + * Additionally, the structure members are expected to be presented in + * increasing offset order: + * + * "a structure has lower-numbered members appearing at smaller offsets than + * higher-numbered members" + */ + +enum block_type { + BLOCK_UBO, + BLOCK_SSBO +}; + +/* + * It is worth to note that ARB_gl_spirv spec doesn't require us to do this + * validation, but at the same time, it allow us to do it. The following + * validation is easy and a nice-to-have. +*/ +static bool +link_blocks_are_compatible(const struct gl_uniform_block *a, + const struct gl_uniform_block *b) +{ + /* + * Names on ARB_gl_spirv are optional, so we are ignoring them. So + * meanwhile on the equivalent GLSL method the matching is done using the + * name, here we use the binding, that for SPIR-V binaries is explicit, and + * mandatory, from OpenGL 4.6 spec, section "7.4.2. SPIR-V Shader Interface + * Matching": + * "Uniform and shader storage block variables must also be decorated + * with a Binding" + */ + if (a->Binding != b->Binding) + return false; + + /* We are explicitly ignoring the names, so it would be good to check that + * this is happening. + */ + assert(a->Name == NULL); + assert(b->Name == NULL); + + if (a->NumUniforms != b->NumUniforms) + return false; + + if (a->_Packing != b->_Packing) + return false; + + if (a->_RowMajor != b->_RowMajor) + return false; + + for (unsigned i = 0; i < a->NumUniforms; i++) { + if (a->Uniforms[i].Type != b->Uniforms[i].Type) + return false; + + if (a->Uniforms[i].RowMajor != b->Uniforms[i].RowMajor) + return false; + + if (a->Uniforms[i].Offset != b->Uniforms[i].Offset) + return false; + + /* See comment on previous assert */ + assert(a->Uniforms[i].Name == NULL); + assert(b->Uniforms[i].Name == NULL); + } + + return true; +} + +/** + * Merges a buffer block into an array of buffer blocks that may or may not + * already contain a copy of it. + * + * Returns the index of the block in the array (new if it was needed, or the + * index of the copy of it). -1 if there are two incompatible block + * definitions with the same binding. + * + */ +static int +link_cross_validate_uniform_block(void *mem_ctx, + struct gl_uniform_block **linked_blocks, + unsigned int *num_linked_blocks, + struct gl_uniform_block *new_block) +{ + /* We first check if new_block was already linked */ + for (unsigned int i = 0; i < *num_linked_blocks; i++) { + struct gl_uniform_block *old_block = &(*linked_blocks)[i]; + + if (old_block->Binding == new_block->Binding) + return link_blocks_are_compatible(old_block, new_block) ? i : -1; + } + + *linked_blocks = reralloc(mem_ctx, *linked_blocks, + struct gl_uniform_block, + *num_linked_blocks + 1); + int linked_block_index = (*num_linked_blocks)++; + struct gl_uniform_block *linked_block = &(*linked_blocks)[linked_block_index]; + + memcpy(linked_block, new_block, sizeof(*new_block)); + linked_block->Uniforms = ralloc_array(*linked_blocks, + struct gl_uniform_buffer_variable, + linked_block->NumUniforms); + + memcpy(linked_block->Uniforms, + new_block->Uniforms, + sizeof(*linked_block->Uniforms) * linked_block->NumUniforms); + + return linked_block_index; +} + + +/** + * Accumulates the array of buffer blocks and checks that all definitions of + * blocks agree on their contents. + */ +static bool +nir_interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog, + enum block_type block_type) +{ + int *interfaceBlockStageIndex[MESA_SHADER_STAGES]; + struct gl_uniform_block *blks = NULL; + unsigned *num_blks = block_type == BLOCK_SSBO ? &prog->data->NumShaderStorageBlocks : + &prog->data->NumUniformBlocks; + + unsigned max_num_buffer_blocks = 0; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i]) { + if (block_type == BLOCK_SSBO) { + max_num_buffer_blocks += + prog->_LinkedShaders[i]->Program->info.num_ssbos; + } else { + max_num_buffer_blocks += + prog->_LinkedShaders[i]->Program->info.num_ubos; + } + } + } + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_linked_shader *sh = prog->_LinkedShaders[i]; + + interfaceBlockStageIndex[i] = malloc(max_num_buffer_blocks * sizeof(int)); + for (unsigned int j = 0; j < max_num_buffer_blocks; j++) + interfaceBlockStageIndex[i][j] = -1; + + if (sh == NULL) + continue; + + unsigned sh_num_blocks; + struct gl_uniform_block **sh_blks; + if (block_type == BLOCK_SSBO) { + sh_num_blocks = prog->_LinkedShaders[i]->Program->info.num_ssbos; + sh_blks = sh->Program->sh.ShaderStorageBlocks; + } else { + sh_num_blocks = prog->_LinkedShaders[i]->Program->info.num_ubos; + sh_blks = sh->Program->sh.UniformBlocks; + } + + for (unsigned int j = 0; j < sh_num_blocks; j++) { + int index = link_cross_validate_uniform_block(prog->data, &blks, + num_blks, sh_blks[j]); + + if (index == -1) { + /* We use the binding as we are ignoring the names */ + linker_error(prog, "buffer block with binding `%i' has mismatching " + "definitions\n", sh_blks[j]->Binding); + + for (unsigned k = 0; k <= i; k++) { + free(interfaceBlockStageIndex[k]); + } + + /* Reset the block count. This will help avoid various segfaults + * from api calls that assume the array exists due to the count + * being non-zero. + */ + *num_blks = 0; + return false; + } + + interfaceBlockStageIndex[i][index] = j; + } + } + + /* Update per stage block pointers to point to the program list. + */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + for (unsigned j = 0; j < *num_blks; j++) { + int stage_index = interfaceBlockStageIndex[i][j]; + + if (stage_index != -1) { + struct gl_linked_shader *sh = prog->_LinkedShaders[i]; + + struct gl_uniform_block **sh_blks = block_type == BLOCK_SSBO ? + sh->Program->sh.ShaderStorageBlocks : + sh->Program->sh.UniformBlocks; + + blks[j].stageref |= sh_blks[stage_index]->stageref; + sh_blks[stage_index] = &blks[j]; + } + } + } + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + free(interfaceBlockStageIndex[i]); + } + + if (block_type == BLOCK_SSBO) + prog->data->ShaderStorageBlocks = blks; + else + prog->data->UniformBlocks = blks; + + return true; +} + +/* + * Iterates @type in order to compute how many individual leaf variables + * contains. + */ +static void +iterate_type_count_variables(const struct glsl_type *type, + unsigned int *num_variables) +{ + for (unsigned i = 0; i < glsl_get_length(type); i++) { + const struct glsl_type *field_type; + + if (glsl_type_is_struct_or_ifc(type)) + field_type = glsl_get_struct_field(type, i); + else + field_type = glsl_get_array_element(type); + + if (glsl_type_is_leaf(field_type)) + (*num_variables)++; + else + iterate_type_count_variables(field_type, num_variables); + } +} + + +static void +fill_individual_variable(const struct glsl_type *type, + struct gl_uniform_buffer_variable *variables, + unsigned int *variable_index, + unsigned int *offset, + struct gl_shader_program *prog, + struct gl_uniform_block *block) +{ + /* ARB_gl_spirv: allowed to ignore names. Thus, we don't need to initialize + * the variable's Name or IndexName. + */ + variables[*variable_index].Type = type; + + if (glsl_type_is_matrix(type)) { + variables[*variable_index].RowMajor = glsl_matrix_type_is_row_major(type); + } else { + /* default value, better that potential meaningless garbage */ + variables[*variable_index].RowMajor = false; + } + + /** + * Although ARB_gl_spirv points that the offsets need to be included (see + * "Mappings of layouts"), in the end those are only valid for + * root-variables, and we would need to recompute offsets when we iterate + * over non-trivial types, like aoa. So we compute the offset always. + */ + variables[*variable_index].Offset = *offset; + (*offset) += glsl_get_explicit_size(type, true); + + (*variable_index)++; +} + +static void +iterate_type_fill_variables(const struct glsl_type *type, + struct gl_uniform_buffer_variable *variables, + unsigned int *variable_index, + unsigned int *offset, + struct gl_shader_program *prog, + struct gl_uniform_block *block) +{ + unsigned int struct_base_offset; + + for (unsigned i = 0; i < glsl_get_length(type); i++) { + const struct glsl_type *field_type; + + if (glsl_type_is_struct_or_ifc(type)) { + field_type = glsl_get_struct_field(type, i); + + if (i == 0) { + struct_base_offset = *offset; + } + + *offset = struct_base_offset + glsl_get_struct_field_offset(type, i); + } else { + field_type = glsl_get_array_element(type); + } + + if (glsl_type_is_leaf(field_type)) { + fill_individual_variable(field_type, variables, variable_index, + offset, prog, block); + } else { + iterate_type_fill_variables(field_type, variables, variable_index, + offset, prog, block); + } + } +} + +/* + * In opposite to the equivalent glsl one, this one only allocates the needed + * space. We do a initial count here, just to avoid re-allocating for each one + * we find. + */ +static void +allocate_uniform_blocks(void *mem_ctx, + struct gl_linked_shader *shader, + struct gl_uniform_block **out_blks, unsigned *num_blocks, + struct gl_uniform_buffer_variable **out_variables, + unsigned *num_variables, + enum block_type block_type) +{ + *num_variables = 0; + *num_blocks = 0; + + nir_foreach_variable(var, &shader->Program->nir->uniforms) { + if (block_type == BLOCK_UBO && !nir_variable_is_in_ubo(var)) + continue; + + if (block_type == BLOCK_SSBO && !nir_variable_is_in_ssbo(var)) + continue; + + const struct glsl_type *type = glsl_without_array(var->type); + unsigned aoa_size = glsl_get_aoa_size(var->type); + unsigned buffer_count = aoa_size == 0 ? 1 : aoa_size; + + *num_blocks += buffer_count; + + unsigned int block_variables = 0; + iterate_type_count_variables(type, &block_variables); + + *num_variables += block_variables * buffer_count; + } + + if (*num_blocks == 0) { + assert(*num_variables == 0); + return; + } + + assert(*num_variables != 0); + + struct gl_uniform_block *blocks = + rzalloc_array(mem_ctx, struct gl_uniform_block, *num_blocks); + + struct gl_uniform_buffer_variable *variables = + rzalloc_array(blocks, struct gl_uniform_buffer_variable, *num_variables); + + *out_blks = blocks; + *out_variables = variables; +} + +static void +fill_block(struct gl_uniform_block *block, + nir_variable *var, + struct gl_uniform_buffer_variable *variables, + unsigned *variable_index, + unsigned array_index, + struct gl_shader_program *prog, + const gl_shader_stage stage) +{ + const struct glsl_type *type = glsl_without_array(var->type); + + block->Name = NULL; /* ARB_gl_spirv: allowed to ignore names */ + /* From ARB_gl_spirv spec: + * "Vulkan uses only one binding point for a resource array, + * while OpenGL still uses multiple binding points, so binding + * numbers are counted differently for SPIR-V used in Vulkan + * and OpenGL + */ + block->Binding = var->data.binding + array_index; + block->Uniforms = &variables[*variable_index]; + block->stageref = 1U << stage; + + /* From SPIR-V 1.0 spec, 3.20, Decoration: + * "RowMajor + * Applies only to a member of a structure type. + * Only valid on a matrix or array whose most basic + * element is a matrix. Indicates that components + * within a row are contiguous in memory." + * + * So the SPIR-V binary doesn't report if the block was defined as RowMajor + * or not. In any case, for the components it is mandatory to set it, so it + * is not needed a default RowMajor value to know it. + * + * Setting to the default, but it should be ignored. + */ + block->_RowMajor = false; + + /* From ARB_gl_spirv spec: + * "Mapping of layouts + * + * std140/std430 -> explicit *Offset*, *ArrayStride*, and + * *MatrixStride* Decoration on struct members + * shared/packed -> not allowed" + * + * So we would not have a value for _Packing, and in fact it would be + * useless so far. Using a default value. It should be ignored. + */ + block->_Packing = 0; + block->linearized_array_index = array_index; + + unsigned old_variable_index = *variable_index; + unsigned offset = 0; + iterate_type_fill_variables(type, variables, variable_index, &offset, prog, block); + block->NumUniforms = *variable_index - old_variable_index; + + block->UniformBufferSize = glsl_get_explicit_size(type, false); + + /* From OpenGL 4.6 spec, section 7.6.2.3, "SPIR-V Uniform Offsets and + * strides" + * + * "If the variable is decorated as a BufferBlock , its offsets and + * strides must not contradict std430 alignment and minimum offset + * requirements. Otherwise, its offsets and strides must not contradict + * std140 alignment and minimum offset requirements." + * + * So although we are computing the size based on the offsets and + * array/matrix strides, at the end we need to ensure that the alignment is + * the same that with std140. From ARB_uniform_buffer_object spec: + * + * "For uniform blocks laid out according to [std140] rules, the minimum + * buffer object size returned by the UNIFORM_BLOCK_DATA_SIZE query is + * derived by taking the offset of the last basic machine unit consumed + * by the last uniform of the uniform block (including any end-of-array + * or end-of-structure padding), adding one, and rounding up to the next + * multiple of the base alignment required for a vec4." + */ + block->UniformBufferSize = glsl_align(block->UniformBufferSize, 16); +} + +/* + * Link ubos/ssbos for a given linked_shader/stage. + */ +static void +link_linked_shader_uniform_blocks(void *mem_ctx, + struct gl_context *ctx, + struct gl_shader_program *prog, + struct gl_linked_shader *shader, + struct gl_uniform_block **blocks, + unsigned *num_blocks, + enum block_type block_type) +{ + struct gl_uniform_buffer_variable *variables = NULL; + unsigned num_variables = 0; + + allocate_uniform_blocks(mem_ctx, shader, + blocks, num_blocks, + &variables, &num_variables, + block_type); + + /* Fill the content of uniforms and variables */ + unsigned block_index = 0; + unsigned variable_index = 0; + struct gl_uniform_block *blks = *blocks; + + nir_foreach_variable(var, &shader->Program->nir->uniforms) { + if (block_type == BLOCK_UBO && !nir_variable_is_in_ubo(var)) + continue; + + if (block_type == BLOCK_SSBO && !nir_variable_is_in_ssbo(var)) + continue; + + unsigned aoa_size = glsl_get_aoa_size(var->type); + unsigned buffer_count = aoa_size == 0 ? 1 : aoa_size; + + for (unsigned array_index = 0; array_index < buffer_count; array_index++) { + fill_block(&blks[block_index], var, variables, &variable_index, + array_index, prog, shader->Stage); + block_index++; + } + } + + assert(block_index == *num_blocks); + assert(variable_index == num_variables); +} + +bool +gl_nir_link_uniform_blocks(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + void *mem_ctx = ralloc_context(NULL); + + for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) { + struct gl_linked_shader *const linked = prog->_LinkedShaders[stage]; + struct gl_uniform_block *ubo_blocks = NULL; + unsigned num_ubo_blocks = 0; + struct gl_uniform_block *ssbo_blocks = NULL; + unsigned num_ssbo_blocks = 0; + + if (!linked) + continue; + + link_linked_shader_uniform_blocks(mem_ctx, ctx, prog, linked, + &ubo_blocks, &num_ubo_blocks, + BLOCK_UBO); + + link_linked_shader_uniform_blocks(mem_ctx, ctx, prog, linked, + &ssbo_blocks, &num_ssbo_blocks, + BLOCK_SSBO); + + if (!prog->data->LinkStatus) { + return false; + } + + prog->data->linked_stages |= 1 << stage; + + /* Copy ubo blocks to linked shader list */ + linked->Program->sh.UniformBlocks = + ralloc_array(linked, struct gl_uniform_block *, num_ubo_blocks); + ralloc_steal(linked, ubo_blocks); + for (unsigned i = 0; i < num_ubo_blocks; i++) { + linked->Program->sh.UniformBlocks[i] = &ubo_blocks[i]; + } + + /* We need to set it twice to avoid the value being overwritten by the + * one from nir in brw_shader_gather_info. TODO: get a way to set the + * info once, and being able to gather properly the info. + */ + linked->Program->nir->info.num_ubos = num_ubo_blocks; + linked->Program->info.num_ubos = num_ubo_blocks; + + /* Copy ssbo blocks to linked shader list */ + linked->Program->sh.ShaderStorageBlocks = + ralloc_array(linked, struct gl_uniform_block *, num_ssbo_blocks); + ralloc_steal(linked, ssbo_blocks); + for (unsigned i = 0; i < num_ssbo_blocks; i++) { + linked->Program->sh.ShaderStorageBlocks[i] = &ssbo_blocks[i]; + } + + /* See previous comment on num_ubo_blocks */ + linked->Program->nir->info.num_ssbos = num_ssbo_blocks; + linked->Program->info.num_ssbos = num_ssbo_blocks; + } + + if (!nir_interstage_cross_validate_uniform_blocks(prog, BLOCK_UBO)) + return false; + + if (!nir_interstage_cross_validate_uniform_blocks(prog, BLOCK_SSBO)) + return false; + + return true; +} diff --git a/lib/mesa/src/compiler/glsl/gl_nir_link_uniform_initializers.c b/lib/mesa/src/compiler/glsl/gl_nir_link_uniform_initializers.c index 8eefa71c8..1c4529c93 100644 --- a/lib/mesa/src/compiler/glsl/gl_nir_link_uniform_initializers.c +++ b/lib/mesa/src/compiler/glsl/gl_nir_link_uniform_initializers.c @@ -118,31 +118,37 @@ copy_constant_to_storage(union gl_constant_value *storage, const enum glsl_base_type base_type = glsl_get_base_type(type); const unsigned n_columns = glsl_get_matrix_columns(type); const unsigned n_rows = glsl_get_vector_elements(type); + unsigned dmul = glsl_base_type_is_64bit(base_type) ? 2 : 1; int i = 0; - for (unsigned int column = 0; column < n_columns; column++) { + if (n_columns > 1) { + const struct glsl_type *column_type = glsl_get_column_type(type); + for (unsigned int column = 0; column < n_columns; column++) { + copy_constant_to_storage(&storage[i], val->elements[column], + column_type, boolean_true); + i += n_rows * dmul; + } + } else { for (unsigned int row = 0; row < n_rows; row++) { switch (base_type) { case GLSL_TYPE_UINT: - storage[i].u = val->values[column].u32[row]; + storage[i].u = val->values[row].u32; break; case GLSL_TYPE_INT: case GLSL_TYPE_SAMPLER: - storage[i].i = val->values[column].i32[row]; + storage[i].i = val->values[row].i32; break; case GLSL_TYPE_FLOAT: - storage[i].f = val->values[column].f32[row]; + storage[i].f = val->values[row].f32; break; case GLSL_TYPE_DOUBLE: case GLSL_TYPE_UINT64: case GLSL_TYPE_INT64: /* XXX need to check on big-endian */ - memcpy(&storage[i * 2].u, - &val->values[column].f64[row], - sizeof(double)); + memcpy(&storage[i].u, &val->values[row].f64, sizeof(double)); break; case GLSL_TYPE_BOOL: - storage[i].b = val->values[column].u32[row] ? boolean_true : 0; + storage[i].b = val->values[row].u32 ? boolean_true : 0; break; case GLSL_TYPE_ARRAY: case GLSL_TYPE_STRUCT: @@ -164,7 +170,7 @@ copy_constant_to_storage(union gl_constant_value *storage, assert(!"Should not get here."); break; } - i++; + i += dmul; } } } @@ -184,7 +190,7 @@ set_uniform_initializer(struct set_uniform_initializer_closure *data, { const struct glsl_type *t_without_array = glsl_without_array(type); - if (glsl_type_is_struct(type)) { + if (glsl_type_is_struct_or_ifc(type)) { for (unsigned int i = 0; i < glsl_get_length(type); i++) { const struct glsl_type *field_type = glsl_get_struct_field(type, i); set_uniform_initializer(data, field_type, val->elements[i]); @@ -192,7 +198,7 @@ set_uniform_initializer(struct set_uniform_initializer_closure *data, return; } - if (glsl_type_is_struct(t_without_array) || + if (glsl_type_is_struct_or_ifc(t_without_array) || (glsl_type_is_array(type) && glsl_type_is_array(glsl_get_array_element(type)))) { const struct glsl_type *element_type = glsl_get_array_element(type); @@ -289,4 +295,7 @@ gl_nir_set_uniform_initializers(struct gl_context *ctx, } } } + memcpy(prog->data->UniformDataDefaults, prog->data->UniformDataSlots, + sizeof(union gl_constant_value) * prog->data->NumUniformDataSlots); + } diff --git a/lib/mesa/src/compiler/glsl/gl_nir_link_uniforms.c b/lib/mesa/src/compiler/glsl/gl_nir_link_uniforms.c index 1a491dc2e..6323d2940 100644 --- a/lib/mesa/src/compiler/glsl/gl_nir_link_uniforms.c +++ b/lib/mesa/src/compiler/glsl/gl_nir_link_uniforms.c @@ -54,6 +54,10 @@ nir_setup_uniform_remap_tables(struct gl_context *ctx, } prog->data->UniformDataSlots = data; + prog->data->UniformDataDefaults = + rzalloc_array(prog->data->UniformStorage, + union gl_constant_value, prog->data->NumUniformDataSlots); + unsigned data_pos = 0; /* Reserve all the explicit locations of the active uniforms. */ @@ -130,20 +134,82 @@ nir_setup_uniform_remap_tables(struct gl_context *ctx, } } +static void +mark_stage_as_active(struct gl_uniform_storage *uniform, + unsigned stage) +{ + uniform->active_shader_mask |= 1 << stage; +} + +/** + * Finds, returns, and updates the stage info for any uniform in UniformStorage + * defined by @var. In general this is done using the explicit location, + * except: + * + * * UBOs/SSBOs: as they lack explicit location, binding is used to locate + * them. That means that more that one entry at the uniform storage can be + * found. In that case all of them are updated, and the first entry is + * returned, in order to update the location of the nir variable. + * + * * Special uniforms: like atomic counters. They lack a explicit location, + * so they are skipped. They will be handled and assigned a location later. + * + */ static struct gl_uniform_storage * -find_previous_uniform_storage(struct gl_shader_program *prog, - int location) +find_and_update_previous_uniform_storage(struct gl_shader_program *prog, + nir_variable *var, + unsigned stage) { - /* This would only work for uniform with explicit location, as all the - * uniforms without location (ie: atomic counters) would have a initial - * location equal to -1. We early return in that case. + if (nir_variable_is_in_block(var)) { + struct gl_uniform_storage *uniform = NULL; + + unsigned num_blks = nir_variable_is_in_ubo(var) ? + prog->data->NumUniformBlocks : + prog->data->NumShaderStorageBlocks; + + struct gl_uniform_block *blks = nir_variable_is_in_ubo(var) ? + prog->data->UniformBlocks : prog->data->ShaderStorageBlocks; + + for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) { + /* UniformStorage contains both variables from ubos and ssbos */ + if ( prog->data->UniformStorage[i].is_shader_storage != + nir_variable_is_in_ssbo(var)) + continue; + + int block_index = prog->data->UniformStorage[i].block_index; + if (block_index != -1) { + assert(block_index < num_blks); + + if (var->data.binding == blks[block_index].Binding) { + if (!uniform) + uniform = &prog->data->UniformStorage[i]; + mark_stage_as_active(&prog->data->UniformStorage[i], + stage); + } + } + } + + return uniform; + } + + /* Beyond blocks, there are still some corner cases of uniforms without + * location (ie: atomic counters) that would have a initial location equal + * to -1. We just return on that case. Those uniforms will be handled + * later. */ - if (location == -1) + if (var->data.location == -1) return NULL; - for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) - if (prog->data->UniformStorage[i].remap_location == location) + /* TODO: following search can be problematic with shaders with a lot of + * uniforms. Would it be better to use some type of hash + */ + for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) { + if (prog->data->UniformStorage[i].remap_location == var->data.location) { + mark_stage_as_active(&prog->data->UniformStorage[i], stage); + return &prog->data->UniformStorage[i]; + } + } return NULL; } @@ -184,7 +250,12 @@ struct nir_link_uniforms_state { unsigned shader_samplers_used; unsigned shader_shadow_samplers; + /* per-variable */ nir_variable *current_var; + int offset; + bool var_is_in_block; + int top_level_array_size; + int top_level_array_stride; struct type_tree_entry *current_type; }; @@ -204,7 +275,7 @@ build_type_tree_for_type(const struct glsl_type *type) entry->array_size = glsl_get_length(type); entry->children = build_type_tree_for_type(glsl_get_array_element(type)); entry->children->parent = entry; - } else if (glsl_type_is_struct(type)) { + } else if (glsl_type_is_struct_or_ifc(type)) { struct type_tree_entry *last = NULL; for (unsigned i = 0; i < glsl_get_length(type); i++) { @@ -282,34 +353,61 @@ nir_link_uniform(struct gl_context *ctx, struct gl_program *stage_program, gl_shader_stage stage, const struct glsl_type *type, + const struct glsl_type *parent_type, + unsigned index_in_parent, int location, struct nir_link_uniforms_state *state) { struct gl_uniform_storage *uniform = NULL; + if (parent_type == state->current_var->type && + nir_variable_is_in_ssbo(state->current_var)) { + /* Type is the top level SSBO member */ + if (glsl_type_is_array(type) && + (glsl_type_is_array(glsl_get_array_element(type)) || + glsl_type_is_struct_or_ifc(glsl_get_array_element(type)))) { + /* Type is a top-level array (array of aggregate types) */ + state->top_level_array_size = glsl_get_length(type); + state->top_level_array_stride = glsl_get_explicit_stride(type); + } else { + state->top_level_array_size = 1; + state->top_level_array_stride = 0; + } + } + /* gl_uniform_storage can cope with one level of array, so if the type is a * composite type or an array where each element occupies more than one * location than we need to recursively process it. */ - if (glsl_type_is_struct(type) || + if (glsl_type_is_struct_or_ifc(type) || (glsl_type_is_array(type) && (glsl_type_is_array(glsl_get_array_element(type)) || - glsl_type_is_struct(glsl_get_array_element(type))))) { + glsl_type_is_struct_or_ifc(glsl_get_array_element(type))))) { int location_count = 0; struct type_tree_entry *old_type = state->current_type; + unsigned int struct_base_offset = state->offset; state->current_type = old_type->children; for (unsigned i = 0; i < glsl_get_length(type); i++) { const struct glsl_type *field_type; - if (glsl_type_is_struct(type)) + if (glsl_type_is_struct_or_ifc(type)) { field_type = glsl_get_struct_field(type, i); - else + /* Use the offset inside the struct only for variables backed by + * a buffer object. For variables not backed by a buffer object, + * offset is -1. + */ + if (state->var_is_in_block) { + state->offset = + struct_base_offset + glsl_get_struct_field_offset(type, i); + } + } else { field_type = glsl_get_array_element(type); + } int entries = nir_link_uniform(ctx, prog, stage_program, stage, - field_type, location, + field_type, type, i, location, state); if (entries == -1) return -1; @@ -318,7 +416,7 @@ nir_link_uniform(struct gl_context *ctx, location += entries; location_count += entries; - if (glsl_type_is_struct(type)) + if (glsl_type_is_struct_or_ifc(type)) state->current_type = state->current_type->next_sibling; } @@ -356,6 +454,9 @@ nir_link_uniform(struct gl_context *ctx, uniform->type = type; uniform->array_elements = 0; } + uniform->top_level_array_size = state->top_level_array_size; + uniform->top_level_array_stride = state->top_level_array_stride; + uniform->active_shader_mask |= 1 << stage; if (location >= 0) { @@ -369,20 +470,79 @@ nir_link_uniform(struct gl_context *ctx, if (uniform->hidden) state->num_hidden_uniforms++; + uniform->is_shader_storage = nir_variable_is_in_ssbo(state->current_var); + + /* Set fields whose default value depend on the variable being inside a + * block. + * + * From the OpenGL 4.6 spec, 7.3 Program objects: + * + * "For the property ARRAY_STRIDE, ... For active variables not declared + * as an array of basic types, zero is written to params. For active + * variables not backed by a buffer object, -1 is written to params, + * regardless of the variable type." + * + * "For the property MATRIX_STRIDE, ... For active variables not declared + * as a matrix or array of matrices, zero is written to params. For active + * variables not backed by a buffer object, -1 is written to params, + * regardless of the variable type." + * + * For the property IS_ROW_MAJOR, ... For active variables backed by a + * buffer object, declared as a single matrix or array of matrices, and + * stored in row-major order, one is written to params. For all other + * active variables, zero is written to params. + */ + uniform->array_stride = -1; + uniform->matrix_stride = -1; + uniform->row_major = false; + + if (state->var_is_in_block) { + uniform->array_stride = glsl_type_is_array(type) ? + glsl_get_explicit_stride(type) : 0; + + if (glsl_type_is_matrix(type)) { + assert(parent_type); + uniform->matrix_stride = glsl_get_explicit_stride(type); + + uniform->row_major = glsl_matrix_type_is_row_major(type); + } else { + uniform->matrix_stride = 0; + } + } + + uniform->offset = state->var_is_in_block ? state->offset : -1; + + int buffer_block_index = -1; + /* If the uniform is inside a uniform block determine its block index by + * comparing the bindings, we can not use names. + */ + if (state->var_is_in_block) { + struct gl_uniform_block *blocks = nir_variable_is_in_ssbo(state->current_var) ? + prog->data->ShaderStorageBlocks : prog->data->UniformBlocks; + + int num_blocks = nir_variable_is_in_ssbo(state->current_var) ? + prog->data->NumShaderStorageBlocks : prog->data->NumUniformBlocks; + + for (unsigned i = 0; i < num_blocks; i++) { + if (state->current_var->data.binding == blocks[i].Binding) { + buffer_block_index = i; + break; + } + } + assert(buffer_block_index >= 0); + + /* Compute the next offset. */ + state->offset += glsl_get_explicit_size(type, true); + } + + uniform->block_index = buffer_block_index; + /* @FIXME: the initialization of the following will be done as we * implement support for their specific features, like SSBO, atomics, * etc. */ - uniform->block_index = -1; - uniform->offset = -1; - uniform->matrix_stride = -1; - uniform->array_stride = -1; - uniform->row_major = false; uniform->builtin = false; - uniform->is_shader_storage = false; uniform->atomic_buffer_index = -1; - uniform->top_level_array_size = 0; - uniform->top_level_array_stride = 0; uniform->is_bindless = false; /* The following are not for features not supported by ARB_gl_spirv */ @@ -443,7 +603,8 @@ nir_link_uniform(struct gl_context *ctx, state->num_shader_uniform_components += values; state->num_values += values; - if (state->max_uniform_location < uniform->remap_location + entries) + if (uniform->remap_location != UNMAPPED_UNIFORM_LOC && + state->max_uniform_location < uniform->remap_location + entries) state->max_uniform_location = uniform->remap_location + entries; return MAX2(uniform->array_elements, 1); @@ -483,9 +644,8 @@ gl_nir_link_uniforms(struct gl_context *ctx, * other stage. If so, validate they are compatible and update * the active stage mask. */ - uniform = find_previous_uniform_storage(prog, var->data.location); + uniform = find_and_update_previous_uniform_storage(prog, var, shader_type); if (uniform) { - uniform->active_shader_mask |= 1 << shader_type; var->data.location = uniform - prog->data->UniformStorage; continue; @@ -496,13 +656,58 @@ gl_nir_link_uniforms(struct gl_context *ctx, var->data.location = prog->data->NumUniformStorage; state.current_var = var; + state.offset = 0; + state.var_is_in_block = nir_variable_is_in_block(var); + state.top_level_array_size = 0; + state.top_level_array_stride = 0; + + /* + * From ARB_program_interface spec, issue (16): + * + * "RESOLVED: We will follow the default rule for enumerating block + * members in the OpenGL API, which is: + * + * * If a variable is a member of an interface block without an + * instance name, it is enumerated using just the variable name. + * + * * If a variable is a member of an interface block with an + * instance name, it is enumerated as "BlockName.Member", where + * "BlockName" is the name of the interface block (not the + * instance name) and "Member" is the name of the variable. + * + * For example, in the following code: + * + * uniform Block1 { + * int member1; + * }; + * uniform Block2 { + * int member2; + * } instance2; + * uniform Block3 { + * int member3; + * } instance3[2]; // uses two separate buffer bindings + * + * the three uniforms (if active) are enumerated as "member1", + * "Block2.member2", and "Block3.member3"." + * + * Note that in the last example, with an array of ubo, only one + * uniform is generated. For that reason, while unrolling the + * uniforms of a ubo, or the variables of a ssbo, we need to treat + * arrays of instance as a single block. + */ + const struct glsl_type *type = var->type; + if (state.var_is_in_block && glsl_type_is_array(type)) { + type = glsl_without_array(type); + } struct type_tree_entry *type_tree = - build_type_tree_for_type(var->type); + build_type_tree_for_type(type); state.current_type = type_tree; - int res = nir_link_uniform(ctx, prog, sh->Program, shader_type, var->type, - location, &state); + int res = nir_link_uniform(ctx, prog, sh->Program, shader_type, type, + NULL, 0, + location, + &state); free_type_tree(type_tree); diff --git a/lib/mesa/src/compiler/glsl/gl_nir_link_xfb.c b/lib/mesa/src/compiler/glsl/gl_nir_link_xfb.c index bcef1e186..5b80a3e03 100644 --- a/lib/mesa/src/compiler/glsl/gl_nir_link_xfb.c +++ b/lib/mesa/src/compiler/glsl/gl_nir_link_xfb.c @@ -22,10 +22,11 @@ */ #include "nir.h" +#include "nir_xfb_info.h" #include "gl_nir_linker.h" -#include "ir_uniform.h" /* for gl_uniform_storage */ #include "linker_util.h" #include "main/context.h" +#include "util/u_math.h" /* * This file does the linking of GLSL transform feedback using NIR. @@ -34,160 +35,6 @@ * particularities. */ -struct active_xfb_buffer { - GLuint stride; - GLuint num_varyings; -}; - -struct active_xfb_varyings { - unsigned num_varyings; - unsigned num_outputs; - unsigned buffer_size; - struct nir_variable **varyings; - struct active_xfb_buffer buffers[MAX_FEEDBACK_BUFFERS]; -}; - -static unsigned -get_num_outputs(nir_variable *var) -{ - return glsl_count_attribute_slots(var->type, - false /* is_vertex_input */); -} - -static void -add_xfb_varying(struct active_xfb_varyings *active_varyings, - nir_variable *var) -{ - if (active_varyings->num_varyings >= active_varyings->buffer_size) { - if (active_varyings->buffer_size == 0) - active_varyings->buffer_size = 1; - else - active_varyings->buffer_size *= 2; - - active_varyings->varyings = realloc(active_varyings->varyings, - sizeof(nir_variable*) * - active_varyings->buffer_size); - } - - active_varyings->varyings[active_varyings->num_varyings++] = var; - - active_varyings->num_outputs += get_num_outputs(var); -} - -static int -cmp_xfb_offset(const void *x_generic, const void *y_generic) -{ - const nir_variable *const *x = x_generic; - const nir_variable *const *y = y_generic; - - if ((*x)->data.xfb_buffer != (*y)->data.xfb_buffer) - return (*x)->data.xfb_buffer - (*y)->data.xfb_buffer; - return (*x)->data.offset - (*y)->data.offset; -} - -static void -get_active_xfb_varyings(struct gl_shader_program *prog, - struct active_xfb_varyings *active_varyings) -{ - for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) { - struct gl_linked_shader *sh = prog->_LinkedShaders[i]; - if (sh == NULL) - continue; - - nir_shader *nir = sh->Program->nir; - - nir_foreach_variable(var, &nir->outputs) { - if (var->data.explicit_xfb_buffer && - var->data.explicit_xfb_stride) { - assert(var->data.xfb_buffer < MAX_FEEDBACK_BUFFERS); - active_varyings->buffers[var->data.xfb_buffer].stride = - var->data.xfb_stride; - } - - if (!var->data.explicit_xfb_buffer || - !var->data.explicit_offset) - continue; - - active_varyings->buffers[var->data.xfb_buffer].num_varyings++; - - add_xfb_varying(active_varyings, var); - } - } - - /* The xfb_offset qualifier does not have to be used in increasing order - * however some drivers expect to receive the list of transform feedback - * declarations in order so sort it now for convenience. - */ - qsort(active_varyings->varyings, - active_varyings->num_varyings, - sizeof(*active_varyings->varyings), - cmp_xfb_offset); -} - -static unsigned -add_varying_outputs(nir_variable *var, - const struct glsl_type *type, - unsigned location_offset, - unsigned dest_offset, - struct gl_transform_feedback_output *output) -{ - unsigned num_outputs = 0; - - if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) { - unsigned length = glsl_get_length(type); - const struct glsl_type *child_type = glsl_get_array_element(type); - unsigned component_slots = glsl_get_component_slots(child_type); - - for (unsigned i = 0; i < length; i++) { - unsigned child_outputs = add_varying_outputs(var, - child_type, - location_offset, - dest_offset, - output + num_outputs); - num_outputs += child_outputs; - location_offset += child_outputs; - dest_offset += component_slots; - } - } else if (glsl_type_is_struct(type)) { - unsigned length = glsl_get_length(type); - for (unsigned i = 0; i < length; i++) { - const struct glsl_type *child_type = glsl_get_struct_field(type, i); - unsigned child_outputs = add_varying_outputs(var, - child_type, - location_offset, - dest_offset, - output + num_outputs); - num_outputs += child_outputs; - location_offset += child_outputs; - dest_offset += glsl_get_component_slots(child_type); - } - } else { - unsigned location = var->data.location + location_offset; - unsigned location_frac = var->data.location_frac; - unsigned num_components = glsl_get_component_slots(type); - - while (num_components > 0) { - unsigned output_size = MIN2(num_components, 4 - location_frac); - - output->OutputRegister = location; - output->OutputBuffer = var->data.xfb_buffer; - output->NumComponents = output_size; - output->StreamId = var->data.stream; - output->DstOffset = var->data.offset / 4 + dest_offset; - output->ComponentOffset = location_frac; - - dest_offset += output_size; - num_components -= output_size; - num_outputs++; - output++; - location++; - location_frac = 0; - } - } - - return num_outputs; -} - void gl_nir_link_assign_xfb_resources(struct gl_context *ctx, struct gl_shader_program *prog) @@ -220,36 +67,57 @@ gl_nir_link_assign_xfb_resources(struct gl_context *ctx, free(prog->TransformFeedback.VaryingNames[i]); free(prog->TransformFeedback.VaryingNames); - struct active_xfb_varyings active_varyings = { 0 }; + nir_xfb_info *xfb_info = NULL; + nir_xfb_varyings_info *varyings_info = NULL; - get_active_xfb_varyings(prog, &active_varyings); + /* Find last stage before fragment shader */ + for (int stage = MESA_SHADER_FRAGMENT - 1; stage >= 0; stage--) { + struct gl_linked_shader *sh = prog->_LinkedShaders[stage]; - for (unsigned buf = 0; buf < MAX_FEEDBACK_BUFFERS; buf++) - prog->TransformFeedback.BufferStride[buf] = active_varyings.buffers[buf].stride; - - prog->TransformFeedback.NumVarying = active_varyings.num_varyings; - prog->TransformFeedback.VaryingNames = - malloc(sizeof(GLchar *) * active_varyings.num_varyings); + if (sh && stage != MESA_SHADER_TESS_CTRL) { + xfb_info = nir_gather_xfb_info_with_varyings(sh->Program->nir, NULL, &varyings_info); + break; + } + } struct gl_transform_feedback_info *linked_xfb = rzalloc(xfb_prog, struct gl_transform_feedback_info); xfb_prog->sh.LinkedTransformFeedback = linked_xfb; + if (!xfb_info) { + prog->TransformFeedback.NumVarying = 0; + linked_xfb->NumOutputs = 0; + linked_xfb->NumVarying = 0; + linked_xfb->ActiveBuffers = 0; + return; + } + + for (unsigned buf = 0; buf < MAX_FEEDBACK_BUFFERS; buf++) + prog->TransformFeedback.BufferStride[buf] = xfb_info->buffers[buf].stride; + + prog->TransformFeedback.NumVarying = varyings_info->varying_count; + prog->TransformFeedback.VaryingNames = + malloc(sizeof(GLchar *) * varyings_info->varying_count); + linked_xfb->Outputs = rzalloc_array(xfb_prog, struct gl_transform_feedback_output, - active_varyings.num_outputs); - linked_xfb->NumOutputs = active_varyings.num_outputs; + xfb_info->output_count); + linked_xfb->NumOutputs = xfb_info->output_count; linked_xfb->Varyings = rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info, - active_varyings.num_varyings); - linked_xfb->NumVarying = active_varyings.num_varyings; + varyings_info->varying_count); + linked_xfb->NumVarying = varyings_info->varying_count; + + int buffer_index = 0; /* Corresponds to GL_TRANSFORM_FEEDBACK_BUFFER_INDEX */ + int xfb_buffer = + (varyings_info->varying_count > 0) ? + xfb_info->outputs[0].buffer : 0; - struct gl_transform_feedback_output *output = linked_xfb->Outputs; - for (unsigned i = 0; i < active_varyings.num_varyings; i++) { - struct nir_variable *var = active_varyings.varyings[i]; + for (unsigned i = 0; i < varyings_info->varying_count; i++) { + nir_xfb_varying_info *xfb_varying = &varyings_info->varyings[i]; /* From ARB_gl_spirv spec: * @@ -277,23 +145,35 @@ gl_nir_link_assign_xfb_resources(struct gl_context *ctx, */ prog->TransformFeedback.VaryingNames[i] = NULL; - unsigned varying_outputs = add_varying_outputs(var, - var->type, - 0, /* location_offset */ - 0, /* dest_offset */ - output); - assert(varying_outputs == get_num_outputs(var)); - output = output + varying_outputs; + if (xfb_buffer != xfb_varying->buffer) { + buffer_index++; + xfb_buffer = xfb_varying->buffer; + } struct gl_transform_feedback_varying_info *varying = linked_xfb->Varyings + i; /* ARB_gl_spirv: see above. */ varying->Name = NULL; - varying->Type = glsl_get_gl_type(var->type); - varying->BufferIndex = var->data.xfb_buffer; - varying->Size = glsl_get_length(var->type); - varying->Offset = var->data.offset; + varying->Type = glsl_get_gl_type(xfb_varying->type); + varying->BufferIndex = buffer_index; + varying->Size = glsl_type_is_array(xfb_varying->type) ? + glsl_get_length(xfb_varying->type) : 1; + varying->Offset = xfb_varying->offset; + } + + for (unsigned i = 0; i < xfb_info->output_count; i++) { + nir_xfb_output_info *xfb_output = &xfb_info->outputs[i]; + + struct gl_transform_feedback_output *output = + linked_xfb->Outputs + i; + + output->OutputRegister = xfb_output->location; + output->OutputBuffer = xfb_output->buffer; + output->NumComponents = util_bitcount(xfb_output->component_mask); + output->StreamId = xfb_info->buffer_to_stream[xfb_output->buffer]; + output->DstOffset = xfb_output->offset / 4; + output->ComponentOffset = xfb_output->component_offset; } /* Make sure MaxTransformFeedbackBuffers is <= 32 so the bitmask for @@ -303,14 +183,14 @@ gl_nir_link_assign_xfb_resources(struct gl_context *ctx, assert(ctx->Const.MaxTransformFeedbackBuffers <= sizeof(buffers) * 8); for (unsigned buf = 0; buf < MAX_FEEDBACK_BUFFERS; buf++) { - if (active_varyings.buffers[buf].stride > 0) { - linked_xfb->Buffers[buf].Stride = active_varyings.buffers[buf].stride / 4; - linked_xfb->Buffers[buf].NumVaryings = active_varyings.buffers[buf].num_varyings; + if (xfb_info->buffers[buf].stride > 0) { + linked_xfb->Buffers[buf].Stride = xfb_info->buffers[buf].stride / 4; + linked_xfb->Buffers[buf].NumVaryings = xfb_info->buffers[buf].varying_count; buffers |= 1 << buf; } } linked_xfb->ActiveBuffers = buffers; - free(active_varyings.varyings); + ralloc_free(xfb_info); } diff --git a/lib/mesa/src/compiler/glsl/gl_nir_linker.c b/lib/mesa/src/compiler/glsl/gl_nir_linker.c index 92e90714e..1b041d384 100644 --- a/lib/mesa/src/compiler/glsl/gl_nir_linker.c +++ b/lib/mesa/src/compiler/glsl/gl_nir_linker.c @@ -33,6 +33,96 @@ * Also note that this is tailored for ARB_gl_spirv needs and particularities */ +static bool +add_vars_from_list(const struct gl_context *ctx, + struct gl_shader_program *prog, struct set *resource_set, + const struct exec_list *var_list, unsigned stage, + GLenum programInterface) +{ + nir_foreach_variable(var, var_list) { + if (var->data.how_declared == nir_var_hidden) + continue; + + int loc_bias = 0; + switch(var->data.mode) { + case nir_var_system_value: + case nir_var_shader_in: + if (programInterface != GL_PROGRAM_INPUT) + continue; + loc_bias = (stage == MESA_SHADER_VERTEX) ? VERT_ATTRIB_GENERIC0 + : VARYING_SLOT_VAR0; + break; + case nir_var_shader_out: + if (programInterface != GL_PROGRAM_OUTPUT) + continue; + loc_bias = (stage == MESA_SHADER_FRAGMENT) ? FRAG_RESULT_DATA0 + : VARYING_SLOT_VAR0; + break; + default: + continue; + } + + if (var->data.patch) + loc_bias = VARYING_SLOT_PATCH0; + + struct gl_shader_variable *sh_var = + rzalloc(prog, struct gl_shader_variable); + + /* In the ARB_gl_spirv spec, names are considered optional debug info, so + * the linker needs to work without them. Returning them is optional. + * For simplicity, we ignore names. + */ + sh_var->name = NULL; + sh_var->type = var->type; + sh_var->location = var->data.location - loc_bias; + sh_var->index = var->data.index; + + if (!link_util_add_program_resource(prog, resource_set, + programInterface, + sh_var, 1 << stage)) { + return false; + } + } + + return true; +} + +static bool +add_interface_variables(const struct gl_context *ctx, + struct gl_shader_program *prog, + struct set *resource_set, + unsigned stage, GLenum programInterface) +{ + struct gl_linked_shader *sh = prog->_LinkedShaders[stage]; + if (!sh) + return true; + + nir_shader *nir = sh->Program->nir; + assert(nir); + + switch (programInterface) { + case GL_PROGRAM_INPUT: { + bool result = add_vars_from_list(ctx, prog, resource_set, + &nir->inputs, stage, programInterface); + result &= add_vars_from_list(ctx, prog, resource_set, &nir->system_values, + stage, programInterface); + return result; + } + case GL_PROGRAM_OUTPUT: + return add_vars_from_list(ctx, prog, resource_set, &nir->outputs, stage, + programInterface); + default: + assert("!Should not get here"); + break; + } + + return false; +} + +/* TODO: as we keep adding features, this method is becoming more and more + * similar to its GLSL counterpart at linker.cpp. Eventually it would be good + * to check if they could be refactored, and reduce code duplication somehow + */ void nir_build_program_resource_list(struct gl_context *ctx, struct gl_shader_program *prog) @@ -44,13 +134,73 @@ nir_build_program_resource_list(struct gl_context *ctx, prog->data->NumProgramResourceList = 0; } + int input_stage = MESA_SHADER_STAGES, output_stage = 0; + + /* Determine first input and final output stage. These are used to + * detect which variables should be enumerated in the resource list + * for GL_PROGRAM_INPUT and GL_PROGRAM_OUTPUT. + */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (!prog->_LinkedShaders[i]) + continue; + if (input_stage == MESA_SHADER_STAGES) + input_stage = i; + output_stage = i; + } + + /* Empty shader, no resources. */ + if (input_stage == MESA_SHADER_STAGES && output_stage == 0) + return; + struct set *resource_set = _mesa_pointer_set_create(NULL); + /* Add inputs and outputs to the resource list. */ + if (!add_interface_variables(ctx, prog, resource_set, input_stage, + GL_PROGRAM_INPUT)) { + return; + } + + if (!add_interface_variables(ctx, prog, resource_set, output_stage, + GL_PROGRAM_OUTPUT)) { + return; + } + + /* Add transform feedback varyings and buffers. */ + if (prog->last_vert_prog) { + struct gl_transform_feedback_info *linked_xfb = + prog->last_vert_prog->sh.LinkedTransformFeedback; + + /* Add varyings. */ + if (linked_xfb->NumVarying > 0) { + for (int i = 0; i < linked_xfb->NumVarying; i++) { + if (!link_util_add_program_resource(prog, resource_set, + GL_TRANSFORM_FEEDBACK_VARYING, + &linked_xfb->Varyings[i], 0)) + return; + } + } + + /* Add buffers. */ + for (unsigned i = 0; i < ctx->Const.MaxTransformFeedbackBuffers; i++) { + if ((linked_xfb->ActiveBuffers >> i) & 1) { + linked_xfb->Buffers[i].Binding = i; + if (!link_util_add_program_resource(prog, resource_set, + GL_TRANSFORM_FEEDBACK_BUFFER, + &linked_xfb->Buffers[i], 0)) + return; + } + } + } + /* Add uniforms * * Here, it is expected that nir_link_uniforms() has already been * called, so that UniformStorage table is already available. */ + int top_level_array_base_offset = -1; + int top_level_array_size_in_bytes = -1; + int second_element_offset = -1; + int block_index = -1; for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) { struct gl_uniform_storage *uniform = &prog->data->UniformStorage[i]; @@ -58,12 +208,59 @@ nir_build_program_resource_list(struct gl_context *ctx, if (uniform->hidden) continue; - if (!link_util_add_program_resource(prog, resource_set, GL_UNIFORM, uniform, + if (!link_util_should_add_buffer_variable(prog, uniform, + top_level_array_base_offset, + top_level_array_size_in_bytes, + second_element_offset, block_index)) + continue; + + + if (prog->data->UniformStorage[i].offset >= second_element_offset) { + top_level_array_base_offset = + prog->data->UniformStorage[i].offset; + + top_level_array_size_in_bytes = + prog->data->UniformStorage[i].top_level_array_size * + prog->data->UniformStorage[i].top_level_array_stride; + + /* Set or reset the second element offset. For non arrays this + * will be set to -1. + */ + second_element_offset = top_level_array_size_in_bytes ? + top_level_array_base_offset + + prog->data->UniformStorage[i].top_level_array_stride : -1; + } + block_index = uniform->block_index; + + + GLenum interface = uniform->is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM; + if (!link_util_add_program_resource(prog, resource_set, interface, uniform, uniform->active_shader_mask)) { return; } } + for (unsigned i = 0; i < prog->data->NumUniformBlocks; i++) { + if (!link_util_add_program_resource(prog, resource_set, GL_UNIFORM_BLOCK, + &prog->data->UniformBlocks[i], + prog->data->UniformBlocks[i].stageref)) + return; + } + + for (unsigned i = 0; i < prog->data->NumShaderStorageBlocks; i++) { + if (!link_util_add_program_resource(prog, resource_set, GL_SHADER_STORAGE_BLOCK, + &prog->data->ShaderStorageBlocks[i], + prog->data->ShaderStorageBlocks[i].stageref)) + return; + } + + /* Add atomic counter buffers. */ + for (unsigned i = 0; i < prog->data->NumAtomicBuffers; i++) { + if (!link_util_add_program_resource(prog, resource_set, GL_ATOMIC_COUNTER_BUFFER, + &prog->data->AtomicBuffers[i], 0)) + return; + } + _mesa_set_destroy(resource_set, NULL); } diff --git a/lib/mesa/src/compiler/glsl/gl_nir_linker.h b/lib/mesa/src/compiler/glsl/gl_nir_linker.h index 29ca27d3d..20ed35fa0 100644 --- a/lib/mesa/src/compiler/glsl/gl_nir_linker.h +++ b/lib/mesa/src/compiler/glsl/gl_nir_linker.h @@ -46,6 +46,9 @@ void gl_nir_link_assign_atomic_counter_resources(struct gl_context *ctx, void gl_nir_link_assign_xfb_resources(struct gl_context *ctx, struct gl_shader_program *prog); +bool gl_nir_link_uniform_blocks(struct gl_context *ctx, + struct gl_shader_program *prog); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/lib/mesa/src/compiler/glsl/gl_nir_lower_bindless_images.c b/lib/mesa/src/compiler/glsl/gl_nir_lower_bindless_images.c new file mode 100644 index 000000000..4c7697719 --- /dev/null +++ b/lib/mesa/src/compiler/glsl/gl_nir_lower_bindless_images.c @@ -0,0 +1,97 @@ +/* + * Copyright © 2019 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * + * Lower bindless image operations by turning the image_deref_* into a + * bindless_image_* intrinsic and adding a load_deref on the previous deref + * source. All applicable indicies are also set so that fetching the variable + * in the backend wouldn't be needed anymore. + */ + +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_deref.h" + +#include "compiler/glsl/gl_nir.h" + +static bool +lower_impl(nir_builder *b, nir_instr *instr) { + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); + + nir_deref_instr *deref; + nir_variable *var; + + switch (intrinsic->intrinsic) { + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_atomic_fadd: + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_store: { + deref = nir_src_as_deref(intrinsic->src[0]); + var = nir_deref_instr_get_variable(deref); + break; + } + default: + return false; + } + + if (deref->mode == nir_var_uniform && !var->data.bindless) + return false; + + b->cursor = nir_before_instr(instr); + nir_ssa_def *handle = nir_load_deref(b, deref); + nir_rewrite_image_intrinsic(intrinsic, handle, true); + return true; +} + +bool +gl_nir_lower_bindless_images(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) + nir_foreach_instr(instr, block) + progress |= lower_impl(&b, instr); + } + } + + return progress; +} diff --git a/lib/mesa/src/compiler/glsl/gl_nir_lower_buffers.c b/lib/mesa/src/compiler/glsl/gl_nir_lower_buffers.c new file mode 100644 index 000000000..c7b58583d --- /dev/null +++ b/lib/mesa/src/compiler/glsl/gl_nir_lower_buffers.c @@ -0,0 +1,323 @@ +/* + * Copyright © 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "gl_nir.h" +#include "ir_uniform.h" + +#include "main/compiler.h" +#include "main/mtypes.h" + +static nir_ssa_def * +get_block_array_index(nir_builder *b, nir_deref_instr *deref, + const struct gl_shader_program *shader_program) +{ + unsigned array_elements = 1; + + /* Build a block name such as "block[2][0]" for finding in the list of + * blocks later on as well as an optional dynamic index which gets added + * to the block index later. + */ + int binding = 0; + const char *block_name = ""; + nir_ssa_def *nonconst_index = NULL; + while (deref->deref_type == nir_deref_type_array) { + nir_deref_instr *parent = nir_deref_instr_parent(deref); + assert(parent && glsl_type_is_array(parent->type)); + unsigned arr_size = glsl_get_length(parent->type); + + if (nir_src_is_const(deref->arr.index)) { + unsigned arr_index = nir_src_as_uint(deref->arr.index); + arr_index = MIN2(arr_index, arr_size - 1); + + /* We're walking the deref from the tail so prepend the array index */ + block_name = ralloc_asprintf(b->shader, "[%u]%s", arr_index, + block_name); + + binding += arr_index * array_elements; + } else { + nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1); + arr_index = nir_umin(b, arr_index, nir_imm_int(b, arr_size - 1)); + nir_ssa_def *arr_offset = nir_imul_imm(b, arr_index, array_elements); + if (nonconst_index) + nonconst_index = nir_iadd(b, nonconst_index, arr_offset); + else + nonconst_index = arr_offset; + + /* We're walking the deref from the tail so prepend the array index */ + block_name = ralloc_asprintf(b->shader, "[0]%s", block_name); + } + + array_elements *= arr_size; + deref = parent; + } + + assert(deref->deref_type == nir_deref_type_var); + binding += deref->var->data.binding; + block_name = ralloc_asprintf(b->shader, "%s%s", + glsl_get_type_name(deref->var->interface_type), + block_name); + + struct gl_linked_shader *linked_shader = + shader_program->_LinkedShaders[b->shader->info.stage]; + + unsigned num_blocks; + struct gl_uniform_block **blocks; + if (deref->mode == nir_var_mem_ubo) { + num_blocks = linked_shader->Program->info.num_ubos; + blocks = linked_shader->Program->sh.UniformBlocks; + } else { + assert(deref->mode == nir_var_mem_ssbo); + num_blocks = linked_shader->Program->info.num_ssbos; + blocks = linked_shader->Program->sh.ShaderStorageBlocks; + } + + /* Block names are optional with ARB_gl_spirv so use the binding instead. */ + bool use_bindings = shader_program->data->spirv; + + for (unsigned i = 0; i < num_blocks; i++) { + if (( use_bindings && binding == blocks[i]->Binding) || + (!use_bindings && strcmp(block_name, blocks[i]->Name) == 0)) { + if (nonconst_index) + return nir_iadd_imm(b, nonconst_index, i); + else + return nir_imm_int(b, i); + } + } + + /* TODO: Investigate if we could change the code to assign Bindings to the + * blocks that were not explicitly assigned, so we can always compare + * bindings. + */ + + if (use_bindings) + unreachable("Failed to find the block by binding"); + else + unreachable("Failed to find the block by name"); +} + +static void +get_block_index_offset(nir_variable *var, + const struct gl_shader_program *shader_program, + gl_shader_stage stage, + unsigned *index, unsigned *offset) +{ + + struct gl_linked_shader *linked_shader = + shader_program->_LinkedShaders[stage]; + + unsigned num_blocks; + struct gl_uniform_block **blocks; + if (var->data.mode == nir_var_mem_ubo) { + num_blocks = linked_shader->Program->info.num_ubos; + blocks = linked_shader->Program->sh.UniformBlocks; + } else { + assert(var->data.mode == nir_var_mem_ssbo); + num_blocks = linked_shader->Program->info.num_ssbos; + blocks = linked_shader->Program->sh.ShaderStorageBlocks; + } + + /* Block names are optional with ARB_gl_spirv so use the binding instead. */ + bool use_bindings = shader_program->data->spirv; + + for (unsigned i = 0; i < num_blocks; i++) { + const char *block_name = glsl_get_type_name(var->interface_type); + if (( use_bindings && blocks[i]->Binding == var->data.binding) || + (!use_bindings && strcmp(block_name, blocks[i]->Name) == 0)) { + *index = i; + *offset = blocks[i]->Uniforms[var->data.location].Offset; + return; + } + } + + if (use_bindings) + unreachable("Failed to find the block by binding"); + else + unreachable("Failed to find the block by name"); +} + +static bool +lower_buffer_interface_derefs_impl(nir_function_impl *impl, + const struct gl_shader_program *shader_program) +{ + bool progress = false; + + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + switch (instr->type) { + case nir_instr_type_deref: { + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (!(deref->mode & (nir_var_mem_ubo | nir_var_mem_ssbo))) + break; + + /* We use nir_address_format_32bit_index_offset */ + assert(deref->dest.is_ssa); + assert(deref->dest.ssa.bit_size == 32); + deref->dest.ssa.num_components = 2; + + progress = true; + + b.cursor = nir_before_instr(&deref->instr); + + nir_ssa_def *ptr; + if (deref->deref_type == nir_deref_type_var && + !glsl_type_is_interface(glsl_without_array(deref->var->type))) { + /* This variable is contained in an interface block rather than + * containing one. We need the block index and its offset + * inside that block + */ + unsigned index, offset; + get_block_index_offset(deref->var, shader_program, + b.shader->info.stage, + &index, &offset); + ptr = nir_imm_ivec2(&b, index, offset); + } else if (glsl_type_is_interface(deref->type)) { + /* This is the last deref before the block boundary. + * Everything after this point is a byte offset and will be + * handled by nir_lower_explicit_io(). + */ + nir_ssa_def *index = get_block_array_index(&b, deref, + shader_program); + ptr = nir_vec2(&b, index, nir_imm_int(&b, 0)); + } else { + /* This will get handled by nir_lower_explicit_io(). */ + break; + } + + nir_deref_instr *cast = nir_build_deref_cast(&b, ptr, deref->mode, + deref->type, 0); + nir_ssa_def_rewrite_uses(&deref->dest.ssa, + nir_src_for_ssa(&cast->dest.ssa)); + nir_deref_instr_remove_if_unused(deref); + break; + } + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_load_deref: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (!(deref->mode & (nir_var_mem_ubo | nir_var_mem_ssbo))) + break; + + /* UBO and SSBO Booleans are 32-bit integers where any non-zero + * value is considered true. NIR Booleans, on the other hand + * are 1-bit values until you get to a very late stage of the + * compilation process. We need to turn those 1-bit loads into + * a 32-bit load wrapped in an i2b to get a proper NIR boolean + * from the SSBO. + */ + if (glsl_type_is_boolean(deref->type)) { + assert(intrin->dest.is_ssa); + b.cursor = nir_after_instr(&intrin->instr); + intrin->dest.ssa.bit_size = 32; + nir_ssa_def *bval = nir_i2b(&b, &intrin->dest.ssa); + nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, + nir_src_for_ssa(bval), + bval->parent_instr); + progress = true; + } + break; + } + + case nir_intrinsic_store_deref: { + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (!(deref->mode & (nir_var_mem_ubo | nir_var_mem_ssbo))) + break; + + /* SSBO Booleans are 32-bit integers where any non-zero value + * is considered true. NIR Booleans, on the other hand are + * 1-bit values until you get to a very late stage of the + * compilation process. We need to turn those 1-bit stores + * into a b2i32 followed by a 32-bit store. Technically the + * value we write doesn't have to be 0/1 so once Booleans are + * lowered to 32-bit values, we have an unneeded sanitation + * step but in practice it doesn't cost much. + */ + if (glsl_type_is_boolean(deref->type)) { + assert(intrin->src[1].is_ssa); + b.cursor = nir_before_instr(&intrin->instr); + nir_ssa_def *ival = nir_b2i32(&b, intrin->src[1].ssa); + nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], + nir_src_for_ssa(ival)); + progress = true; + } + break; + } + + case nir_intrinsic_copy_deref: + unreachable("copy_deref should be lowered by now"); + break; + + default: + /* Nothing to do */ + break; + } + break; + } + + default: + break; /* Nothing to do */ + } + } + } + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } + + return progress; +} + +bool +gl_nir_lower_buffers(nir_shader *shader, + const struct gl_shader_program *shader_program) +{ + bool progress = false; + + /* First, we lower the derefs to turn block variable and array derefs into + * a nir_address_format_32bit_index_offset pointer. From there forward, + * we leave the derefs in place and let nir_lower_explicit_io handle them. + */ + nir_foreach_function(function, shader) { + if (function->impl && + lower_buffer_interface_derefs_impl(function->impl, shader_program)) + progress = true; + } + + /* If that did something, we validate and then call nir_lower_explicit_io + * to finish the process. + */ + if (progress) { + nir_validate_shader(shader, "Lowering buffer interface derefs"); + nir_lower_explicit_io(shader, nir_var_mem_ubo | nir_var_mem_ssbo, + nir_address_format_32bit_index_offset); + } + + return progress; +} diff --git a/lib/mesa/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c b/lib/mesa/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c index 87718112d..5907abfd9 100644 --- a/lib/mesa/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c +++ b/lib/mesa/src/compiler/glsl/gl_nir_lower_samplers_as_deref.c @@ -104,7 +104,7 @@ remove_struct_derefs_prep(nir_deref_instr **p, char **name, } case nir_deref_type_struct: { - *location += glsl_get_record_location_offset(cur->type, next->strct.index); + *location += glsl_get_struct_location_offset(cur->type, next->strct.index); ralloc_asprintf_append(name, ".%s", glsl_get_struct_elem_name(cur->type, next->strct.index)); @@ -205,6 +205,26 @@ lower_deref(nir_builder *b, struct lower_samplers_as_deref_state *state, return new_deref; } +static void +record_textures_used(struct shader_info *info, + nir_deref_instr *deref, + nir_texop op) +{ + nir_variable *var = nir_deref_instr_get_variable(deref); + + /* Structs have been lowered already, so get_aoa_size is sufficient. */ + const unsigned size = + glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1; + unsigned mask = ((1ull << MAX2(size, 1)) - 1) << var->data.binding; + + info->textures_used |= mask; + + if (op == nir_texop_txf || + op == nir_texop_txf_ms || + op == nir_texop_txf_ms_mcs) + info->textures_used_by_txf |= mask; +} + static bool lower_sampler(nir_tex_instr *instr, struct lower_samplers_as_deref_state *state, nir_builder *b) @@ -225,6 +245,7 @@ lower_sampler(nir_tex_instr *instr, struct lower_samplers_as_deref_state *state, if (texture_deref) { nir_instr_rewrite_src(&instr->instr, &instr->src[texture_idx].src, nir_src_for_ssa(&texture_deref->dest.ssa)); + record_textures_used(&b->shader->info, texture_deref, instr->op); } } @@ -305,6 +326,9 @@ gl_nir_lower_samplers_as_deref(nir_shader *shader, state.remap_table = _mesa_hash_table_create(NULL, _mesa_key_hash_string, _mesa_key_string_equal); + shader->info.textures_used = 0; + shader->info.textures_used_by_txf = 0; + nir_foreach_function(function, shader) { if (function->impl) progress |= lower_impl(function->impl, &state); diff --git a/lib/mesa/src/compiler/glsl/gl_nir_opt_access.c b/lib/mesa/src/compiler/glsl/gl_nir_opt_access.c new file mode 100644 index 000000000..760214fbb --- /dev/null +++ b/lib/mesa/src/compiler/glsl/gl_nir_opt_access.c @@ -0,0 +1,333 @@ +/* + * Copyright © 2019 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir/nir.h" +#include "gl_nir.h" + +/* This pass optimizes GL access qualifiers. So far it does two things: + * + * - Infer readonly when it's missing. + * - Infer ACCESS_CAN_REORDER when the following are true: + * - Either there are no writes, or ACCESS_NON_WRITEABLE and ACCESS_RESTRICT + * are both set. In either case there are no writes to the underlying + * memory. + * - If ACCESS_COHERENT is set, then there must be no memory barriers + * involving the access. Coherent accesses may return different results + * before and after barriers. + * - ACCESS_VOLATILE is not set. + * + * If these conditions are true, then image and buffer reads may be treated as + * if they were uniform buffer reads, i.e. they may be arbitrarily moved, + * combined, rematerialized etc. + */ + +struct access_state { + struct set *vars_written; + bool images_written; + bool buffers_written; + bool image_barriers; + bool buffer_barriers; +}; + +static void +gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr) +{ + nir_variable *var; + switch (instr->intrinsic) { + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_atomic_fadd: + var = nir_intrinsic_get_var(instr, 0); + + /* In OpenGL, buffer images use normal buffer objects, whereas other + * image types use textures which cannot alias with buffer objects. + * Therefore we have to group buffer samplers together with SSBO's. + */ + if (glsl_get_sampler_dim(glsl_without_array(var->type)) == + GLSL_SAMPLER_DIM_BUF) + state->buffers_written = true; + else + state->images_written = true; + + if (var->data.mode == nir_var_uniform) + _mesa_set_add(state->vars_written, var); + break; + + case nir_intrinsic_bindless_image_store: + case nir_intrinsic_bindless_image_atomic_add: + case nir_intrinsic_bindless_image_atomic_min: + case nir_intrinsic_bindless_image_atomic_max: + case nir_intrinsic_bindless_image_atomic_and: + case nir_intrinsic_bindless_image_atomic_or: + case nir_intrinsic_bindless_image_atomic_xor: + case nir_intrinsic_bindless_image_atomic_exchange: + case nir_intrinsic_bindless_image_atomic_comp_swap: + case nir_intrinsic_bindless_image_atomic_fadd: + if (nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_BUF) + state->buffers_written = true; + else + state->images_written = true; + break; + + case nir_intrinsic_store_deref: + case nir_intrinsic_deref_atomic_add: + case nir_intrinsic_deref_atomic_imin: + case nir_intrinsic_deref_atomic_umin: + case nir_intrinsic_deref_atomic_imax: + case nir_intrinsic_deref_atomic_umax: + case nir_intrinsic_deref_atomic_and: + case nir_intrinsic_deref_atomic_or: + case nir_intrinsic_deref_atomic_xor: + case nir_intrinsic_deref_atomic_exchange: + case nir_intrinsic_deref_atomic_comp_swap: + case nir_intrinsic_deref_atomic_fadd: + case nir_intrinsic_deref_atomic_fmin: + case nir_intrinsic_deref_atomic_fmax: + case nir_intrinsic_deref_atomic_fcomp_swap: + var = nir_intrinsic_get_var(instr, 0); + if (var->data.mode != nir_var_mem_ssbo) + break; + + _mesa_set_add(state->vars_written, var); + state->buffers_written = true; + + case nir_intrinsic_memory_barrier: + state->buffer_barriers = true; + state->image_barriers = true; + break; + + case nir_intrinsic_memory_barrier_buffer: + state->buffer_barriers = true; + break; + + case nir_intrinsic_memory_barrier_image: + state->image_barriers = true; + break; + + default: + break; + } +} + +static bool +process_variable(struct access_state *state, nir_variable *var) +{ + if (var->data.mode != nir_var_mem_ssbo && + !(var->data.mode == nir_var_uniform && + glsl_type_is_image(var->type))) + return false; + + /* Ignore variables we've already marked */ + if (var->data.image.access & ACCESS_CAN_REORDER) + return false; + + if (!(var->data.image.access & ACCESS_NON_WRITEABLE) && + !_mesa_set_search(state->vars_written, var)) { + var->data.image.access |= ACCESS_NON_WRITEABLE; + return true; + } + + return false; +} + +static bool +can_reorder(struct access_state *state, enum gl_access_qualifier access, + bool is_buffer, bool is_ssbo) +{ + bool is_any_written = is_buffer ? state->buffers_written : + state->images_written; + + /* Can we guarantee that the underlying memory is never written? */ + if (!is_any_written || + ((access & ACCESS_NON_WRITEABLE) && + (access & ACCESS_RESTRICT))) { + /* Note: memoryBarrierBuffer() is only guaranteed to flush buffer + * variables and not imageBuffer's, so we only consider the GL-level + * type here. + */ + bool is_any_barrier = is_ssbo ? + state->buffer_barriers : state->image_barriers; + + return (!is_any_barrier || !(access & ACCESS_COHERENT)) && + !(access & ACCESS_VOLATILE); + } + + return false; +} + +static bool +process_intrinsic(struct access_state *state, nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_bindless_image_load: + if (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER) + return false; + + /* We have less information about bindless intrinsics, since we can't + * always trace uses back to the variable. Don't try and infer if it's + * read-only, unless there are no image writes at all. + */ + bool progress = false; + bool is_buffer = + nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_BUF; + + bool is_any_written = + is_buffer ? state->buffers_written : state->images_written; + + if (!(nir_intrinsic_access(instr) & ACCESS_NON_WRITEABLE) && + !is_any_written) { + progress = true; + nir_intrinsic_set_access(instr, + nir_intrinsic_access(instr) | + ACCESS_NON_WRITEABLE); + } + + if (can_reorder(state, nir_intrinsic_access(instr), is_buffer, false)) { + progress = true; + nir_intrinsic_set_access(instr, + nir_intrinsic_access(instr) | + ACCESS_CAN_REORDER); + } + + return progress; + + case nir_intrinsic_load_deref: + case nir_intrinsic_image_deref_load: { + nir_variable *var = nir_intrinsic_get_var(instr, 0); + + if (instr->intrinsic == nir_intrinsic_load_deref && + var->data.mode != nir_var_mem_ssbo) + return false; + + if (nir_intrinsic_access(instr) & ACCESS_CAN_REORDER) + return false; + + bool progress = false; + + /* Check if we were able to mark the whole variable non-writeable */ + if (!(nir_intrinsic_access(instr) & ACCESS_NON_WRITEABLE) && + var->data.image.access & ACCESS_NON_WRITEABLE) { + progress = true; + nir_intrinsic_set_access(instr, + nir_intrinsic_access(instr) | + ACCESS_NON_WRITEABLE); + } + + bool is_ssbo = var->data.mode == nir_var_mem_ssbo; + + bool is_buffer = is_ssbo || + glsl_get_sampler_dim(glsl_without_array(var->type)) == GLSL_SAMPLER_DIM_BUF; + + if (can_reorder(state, nir_intrinsic_access(instr), is_buffer, is_ssbo)) { + progress = true; + nir_intrinsic_set_access(instr, + nir_intrinsic_access(instr) | + ACCESS_CAN_REORDER); + } + + return progress; + } + + default: + return false; + } +} + +static bool +opt_access_impl(struct access_state *state, + nir_function_impl *impl) +{ + bool progress = false; + + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type == nir_instr_type_intrinsic) + progress |= process_intrinsic(state, + nir_instr_as_intrinsic(instr)); + } + } + + if (progress) { + nir_metadata_preserve(impl, + nir_metadata_block_index | + nir_metadata_dominance | + nir_metadata_live_ssa_defs | + nir_metadata_loop_analysis); + } + + + return progress; +} + +bool +gl_nir_opt_access(nir_shader *shader) +{ + struct access_state state = { + .vars_written = _mesa_pointer_set_create(NULL), + }; + + bool var_progress = false; + bool progress = false; + + nir_foreach_function(func, shader) { + if (func->impl) { + nir_foreach_block(block, func->impl) { + nir_foreach_instr(instr, block) { + if (instr->type == nir_instr_type_intrinsic) + gather_intrinsic(&state, nir_instr_as_intrinsic(instr)); + } + } + } + } + + nir_foreach_variable(var, &shader->uniforms) + var_progress |= process_variable(&state, var); + + nir_foreach_function(func, shader) { + if (func->impl) { + progress |= opt_access_impl(&state, func->impl); + + /* If we make a change to the uniforms, update all the impls. */ + if (var_progress) { + nir_metadata_preserve(func->impl, + nir_metadata_block_index | + nir_metadata_dominance | + nir_metadata_live_ssa_defs | + nir_metadata_loop_analysis); + } + } + } + + progress |= var_progress; + + _mesa_set_destroy(state.vars_written, NULL); + return progress; +} + diff --git a/lib/mesa/src/compiler/glsl/glcpp/meson.build b/lib/mesa/src/compiler/glsl/glcpp/meson.build index 677baf1df..5a665444f 100644 --- a/lib/mesa/src/compiler/glsl/glcpp/meson.build +++ b/lib/mesa/src/compiler/glsl/glcpp/meson.build @@ -38,7 +38,7 @@ glcpp_lex = custom_target( libglcpp = static_library( 'glcpp', [glcpp_lex, glcpp_parse, files('glcpp.h', 'pp.c')], - link_with : libmesa_util, + dependencies : idep_mesautil, include_directories : [inc_common], c_args : [c_vis_args, no_override_init_args, c_msvc_compat_args], cpp_args : [cpp_vis_args, cpp_msvc_compat_args], diff --git a/lib/mesa/src/compiler/glsl/int64.glsl b/lib/mesa/src/compiler/glsl/int64.glsl new file mode 100644 index 000000000..538f56cff --- /dev/null +++ b/lib/mesa/src/compiler/glsl/int64.glsl @@ -0,0 +1,121 @@ +/* Compile with: + * + * glsl_compiler --version 400 --dump-builder int64.glsl > builtin_int64.h + * + * Version 4.00+ is required for umulExtended. + */ +#version 400 +#extension GL_ARB_gpu_shader_int64: require +#extension GL_ARB_shading_language_420pack: require + +uvec2 +umul64(uvec2 a, uvec2 b) +{ + uvec2 result; + + umulExtended(a.x, b.x, result.y, result.x); + result.y += a.x * b.y + a.y * b.x; + + return result; +} + +ivec2 +sign64(ivec2 a) +{ + ivec2 result; + + result.y = a.y >> 31; + result.x = result.y | int((a.x | a.y) != 0); + + return result; +} + +uvec4 +udivmod64(uvec2 n, uvec2 d) +{ + uvec2 quot = uvec2(0U, 0U); + int log2_denom = findMSB(d.y) + 32; + + /* If the upper 32 bits of denom are non-zero, it is impossible for shifts + * greater than 32 bits to occur. If the upper 32 bits of the numerator + * are zero, it is impossible for (denom << [63, 32]) <= numer unless + * denom == 0. + */ + if (d.y == 0 && n.y >= d.x) { + log2_denom = findMSB(d.x); + + /* Since the upper 32 bits of denom are zero, log2_denom <= 31 and we + * don't have to compare log2_denom inside the loop as is done in the + * general case (below). + */ + for (int i = 31; i >= 1; i--) { + if (log2_denom <= 31 - i && (d.x << i) <= n.y) { + n.y -= d.x << i; + quot.y |= 1U << i; + } + } + + /* log2_denom is always <= 31, so manually peel the last loop + * iteration. + */ + if (d.x <= n.y) { + n.y -= d.x; + quot.y |= 1U; + } + } + + uint64_t d64 = packUint2x32(d); + uint64_t n64 = packUint2x32(n); + for (int i = 31; i >= 1; i--) { + if (log2_denom <= 63 - i && (d64 << i) <= n64) { + n64 -= d64 << i; + quot.x |= 1U << i; + } + } + + /* log2_denom is always <= 63, so manually peel the last loop + * iteration. + */ + if (d64 <= n64) { + n64 -= d64; + quot.x |= 1U; + } + + return uvec4(quot, unpackUint2x32(n64)); +} + +uvec2 +udiv64(uvec2 n, uvec2 d) +{ + return udivmod64(n, d).xy; +} + +ivec2 +idiv64(ivec2 _n, ivec2 _d) +{ + const bool negate = (_n.y < 0) != (_d.y < 0); + uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n)))); + uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d)))); + + uvec2 quot = udivmod64(n, d).xy; + + return negate ? unpackInt2x32(-int64_t(packUint2x32(quot))) : ivec2(quot); +} + +uvec2 +umod64(uvec2 n, uvec2 d) +{ + return udivmod64(n, d).zw; +} + +ivec2 +imod64(ivec2 _n, ivec2 _d) +{ + const bool negate = (_n.y < 0) != (_d.y < 0); + uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n)))); + uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d)))); + + uvec2 rem = udivmod64(n, d).zw; + + return negate ? unpackInt2x32(-int64_t(packUint2x32(rem))) : ivec2(rem); +} diff --git a/lib/mesa/src/compiler/glsl/linker_util.cpp b/lib/mesa/src/compiler/glsl/linker_util.cpp index d2724c239..99e3693b5 100644 --- a/lib/mesa/src/compiler/glsl/linker_util.cpp +++ b/lib/mesa/src/compiler/glsl/linker_util.cpp @@ -28,6 +28,43 @@ /* Utility methods shared between the GLSL IR and the NIR */ +/* From the OpenGL 4.6 specification, 7.3.1.1 Naming Active Resources: + * + * "For an active shader storage block member declared as an array of an + * aggregate type, an entry will be generated only for the first array + * element, regardless of its type. Such block members are referred to as + * top-level arrays. If the block member is an aggregate type, the + * enumeration rules are then applied recursively." + */ +bool +link_util_should_add_buffer_variable(struct gl_shader_program *prog, + struct gl_uniform_storage *uniform, + int top_level_array_base_offset, + int top_level_array_size_in_bytes, + int second_element_offset, + int block_index) +{ + /* If the uniform is not a shader storage buffer or is not an array return + * true. + */ + if (!uniform->is_shader_storage || top_level_array_size_in_bytes == 0) + return true; + + int after_top_level_array = top_level_array_base_offset + + top_level_array_size_in_bytes; + + /* Check for a new block, or that we are not dealing with array elements of + * a top member array other than the first element. + */ + if (block_index != uniform->block_index || + uniform->offset >= after_top_level_array || + uniform->offset < second_element_offset) { + return true; + } + + return false; +} + bool link_util_add_program_resource(struct gl_shader_program *prog, struct set *resource_set, diff --git a/lib/mesa/src/compiler/glsl/linker_util.h b/lib/mesa/src/compiler/glsl/linker_util.h index 1c3674f35..20a7b9752 100644 --- a/lib/mesa/src/compiler/glsl/linker_util.h +++ b/lib/mesa/src/compiler/glsl/linker_util.h @@ -51,6 +51,14 @@ void linker_warning(struct gl_shader_program *prog, const char *fmt, ...); bool +link_util_should_add_buffer_variable(struct gl_shader_program *prog, + struct gl_uniform_storage *uniform, + int top_level_array_base_offset, + int top_level_array_size_in_bytes, + int second_element_offset, + int block_index); + +bool link_util_add_program_resource(struct gl_shader_program *prog, struct set *resource_set, GLenum type, const void *data, uint8_t stages); diff --git a/lib/mesa/src/compiler/glsl/loop_analysis.cpp b/lib/mesa/src/compiler/glsl/loop_analysis.cpp index e90eb1ccd..aae8fc62c 100644 --- a/lib/mesa/src/compiler/glsl/loop_analysis.cpp +++ b/lib/mesa/src/compiler/glsl/loop_analysis.cpp @@ -107,7 +107,7 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, return -1; } - if (!iter->type->is_integer()) { + if (!iter->type->is_integer_32()) { const ir_expression_operation op = iter->type->is_double() ? ir_unop_d2i : ir_unop_f2i; ir_rvalue *cast = @@ -288,6 +288,9 @@ loop_state::get(const ir_loop *ir) loop_variable * loop_variable_state::get(const ir_variable *ir) { + if (ir == NULL) + return NULL; + hash_entry *entry = _mesa_hash_table_search(this->var_hash, ir); return entry ? (loop_variable *) entry->data : NULL; } diff --git a/lib/mesa/src/compiler/glsl/loop_unroll.cpp b/lib/mesa/src/compiler/glsl/loop_unroll.cpp index 874f41856..7e97c3cdd 100644 --- a/lib/mesa/src/compiler/glsl/loop_unroll.cpp +++ b/lib/mesa/src/compiler/glsl/loop_unroll.cpp @@ -180,6 +180,11 @@ loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) void *const mem_ctx = ralloc_parent(ir); loop_variable_state *const ls = this->state->get(ir); + /* If there are no terminators, then the loop iteration count must be 1. + * This is the 'do { } while (false);' case. + */ + assert(!ls->terminators.is_empty() || iterations == 1); + ir_instruction *first_ir = (ir_instruction *) ir->body_instructions.get_head(); @@ -221,7 +226,8 @@ loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) * the loop, or it the exit branch contains instructions. This ensures we * execute any instructions before the terminator or in its exit branch. */ - if (limit_if != first_ir->as_if() || exit_branch_has_instructions) + if (!ls->terminators.is_empty() && + (limit_if != first_ir->as_if() || exit_branch_has_instructions)) iterations++; for (int i = 0; i < iterations; i++) { diff --git a/lib/mesa/src/compiler/glsl/lower_shared_reference.cpp b/lib/mesa/src/compiler/glsl/lower_shared_reference.cpp index 5954ccce4..fb6af0c08 100644 --- a/lib/mesa/src/compiler/glsl/lower_shared_reference.cpp +++ b/lib/mesa/src/compiler/glsl/lower_shared_reference.cpp @@ -354,7 +354,7 @@ lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) ir_rvalue *deref = (ir_rvalue *) inst; assert(deref->type->is_scalar() && - (deref->type->is_integer() || deref->type->is_float())); + (deref->type->is_integer_32() || deref->type->is_float())); ir_variable *var = deref->variable_referenced(); assert(var); diff --git a/lib/mesa/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp b/lib/mesa/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp index 6fe4fe62b..c22789c39 100644 --- a/lib/mesa/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp +++ b/lib/mesa/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp @@ -280,7 +280,7 @@ struct switch_generator { unsigned middle = (begin + end) >> 1; - assert(index->type->is_integer()); + assert(index->type->is_integer_32()); ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT) ? new(body.mem_ctx) ir_constant((unsigned)middle) diff --git a/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp b/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp index 2aae30d82..0c09630fa 100644 --- a/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp +++ b/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp @@ -63,6 +63,16 @@ vector_deref_visitor::visit_enter(ir_assignment *ir) if (!deref->array->type->is_vector()) return ir_rvalue_enter_visitor::visit_enter(ir); + /* SSBOs and shared variables are backed by memory and may be accessed by + * multiple threads simultaneously. It's not safe to lower a single + * component store to a load-vec-store because it may race with writes to + * other components. + */ + ir_variable *var = deref->variable_referenced(); + if (var->data.mode == ir_var_shader_storage || + var->data.mode == ir_var_shader_shared) + return ir_rvalue_enter_visitor::visit_enter(ir); + ir_rvalue *const new_lhs = deref->array; void *mem_ctx = ralloc_parent(ir); @@ -150,6 +160,17 @@ vector_deref_visitor::handle_rvalue(ir_rvalue **rv) if (!deref->array->type->is_vector()) return; + /* Back-ends need to be able to handle derefs on vectors for SSBOs, UBOs, + * and shared variables. They have to handle it for writes anyway so we + * may as well require it for reads. + */ + ir_variable *var = deref->variable_referenced(); + if (var && (var->data.mode == ir_var_shader_storage || + var->data.mode == ir_var_shader_shared || + (var->data.mode == ir_var_uniform && + var->get_interface_type()))) + return; + void *mem_ctx = ralloc_parent(deref); *rv = new(mem_ctx) ir_expression(ir_binop_vector_extract, deref->array, diff --git a/lib/mesa/src/compiler/glsl/meson.build b/lib/mesa/src/compiler/glsl/meson.build index 8096f2d04..3f5f2463e 100644 --- a/lib/mesa/src/compiler/glsl/meson.build +++ b/lib/mesa/src/compiler/glsl/meson.build @@ -74,14 +74,18 @@ files_libglsl = files( 'builtin_variables.cpp', 'generate_ir.cpp', 'gl_nir_lower_atomics.c', + 'gl_nir_lower_bindless_images.c', + 'gl_nir_lower_buffers.c', 'gl_nir_lower_samplers.c', 'gl_nir_lower_samplers_as_deref.c', 'gl_nir_link_atomics.c', + 'gl_nir_link_uniform_blocks.c', 'gl_nir_link_uniform_initializers.c', 'gl_nir_link_uniforms.c', 'gl_nir_link_xfb.c', 'gl_nir_linker.c', 'gl_nir_linker.h', + 'gl_nir_opt_access.c', 'gl_nir.h', 'glsl_parser_extras.cpp', 'glsl_parser_extras.h', @@ -236,8 +240,8 @@ libglsl_standalone = static_library( c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args], cpp_args : [cpp_vis_args, cpp_msvc_compat_args], include_directories : [inc_common], - link_with : [libglsl, libglsl_util, libmesa_util], - dependencies : [dep_thread], + link_with : [libglsl, libglsl_util], + dependencies : idep_mesautil, build_by_default : false, ) diff --git a/lib/mesa/src/compiler/glsl/opt_dead_builtin_varyings.cpp b/lib/mesa/src/compiler/glsl/opt_dead_builtin_varyings.cpp index 6ed00128e..853847ebf 100644 --- a/lib/mesa/src/compiler/glsl/opt_dead_builtin_varyings.cpp +++ b/lib/mesa/src/compiler/glsl/opt_dead_builtin_varyings.cpp @@ -323,14 +323,14 @@ public: if (!(external_color_usage & (1 << i))) { if (info->color[i]) { - util_snprintf(name, 32, "gl_%s_FrontColor%i_dummy", mode_str, i); + snprintf(name, 32, "gl_%s_FrontColor%i_dummy", mode_str, i); this->new_color[i] = new (ctx) ir_variable(glsl_type::vec4_type, name, ir_var_temporary); } if (info->backcolor[i]) { - util_snprintf(name, 32, "gl_%s_BackColor%i_dummy", mode_str, i); + snprintf(name, 32, "gl_%s_BackColor%i_dummy", mode_str, i); this->new_backcolor[i] = new (ctx) ir_variable(glsl_type::vec4_type, name, ir_var_temporary); @@ -342,7 +342,7 @@ public: info->fog) { char name[32]; - util_snprintf(name, 32, "gl_%s_FogFragCoord_dummy", mode_str); + snprintf(name, 32, "gl_%s_FogFragCoord_dummy", mode_str); this->new_fog = new (ctx) ir_variable(glsl_type::float_type, name, ir_var_temporary); } @@ -366,13 +366,13 @@ public: if (!(external_usage & (1 << i))) { /* This varying is unused in the next stage. Declare * a temporary instead of an output. */ - util_snprintf(name, 32, "gl_%s_%s%i_dummy", mode_str, var_name, i); + snprintf(name, 32, "gl_%s_%s%i_dummy", mode_str, var_name, i); new_var[i] = new (ctx) ir_variable(glsl_type::vec4_type, name, ir_var_temporary); } else { - util_snprintf(name, 32, "gl_%s_%s%i", mode_str, var_name, i); + snprintf(name, 32, "gl_%s_%s%i", mode_str, var_name, i); new_var[i] = new(ctx) ir_variable(glsl_type::vec4_type, name, this->info->mode); diff --git a/lib/mesa/src/compiler/glsl/serialize.cpp b/lib/mesa/src/compiler/glsl/serialize.cpp index ad258f8bc..552300f7e 100644 --- a/lib/mesa/src/compiler/glsl/serialize.cpp +++ b/lib/mesa/src/compiler/glsl/serialize.cpp @@ -435,7 +435,11 @@ write_uniforms(struct blob *metadata, struct gl_shader_program *prog) for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) { encode_type_to_blob(metadata, prog->data->UniformStorage[i].type); blob_write_uint32(metadata, prog->data->UniformStorage[i].array_elements); - blob_write_string(metadata, prog->data->UniformStorage[i].name); + if (prog->data->UniformStorage[i].name) { + blob_write_string(metadata, prog->data->UniformStorage[i].name); + } else { + blob_write_string(metadata, ""); + } blob_write_uint32(metadata, prog->data->UniformStorage[i].builtin); blob_write_uint32(metadata, prog->data->UniformStorage[i].remap_location); blob_write_uint32(metadata, prog->data->UniformStorage[i].block_index); @@ -786,7 +790,11 @@ write_program_resource_data(struct blob *metadata, encode_type_to_blob(metadata, var->interface_type); encode_type_to_blob(metadata, var->outermost_struct_type); - blob_write_string(metadata, var->name); + if (var->name) { + blob_write_string(metadata, var->name); + } else { + blob_write_string(metadata, ""); + } size_t s_var_size, s_var_ptrs; get_shader_var_and_pointer_sizes(&s_var_size, &s_var_ptrs, var); @@ -1065,6 +1073,7 @@ write_shader_metadata(struct blob *metadata, gl_linked_shader *shader) sizeof(glprog->sh.SamplerTargets)); blob_write_uint32(metadata, glprog->ShadowSamplers); blob_write_uint32(metadata, glprog->ExternalSamplersUsed); + blob_write_uint32(metadata, glprog->sh.ShaderStorageBlocksWriteAccess); blob_write_bytes(metadata, glprog->sh.ImageAccess, sizeof(glprog->sh.ImageAccess)); @@ -1119,6 +1128,7 @@ read_shader_metadata(struct blob_reader *metadata, sizeof(glprog->sh.SamplerTargets)); glprog->ShadowSamplers = blob_read_uint32(metadata); glprog->ExternalSamplersUsed = blob_read_uint32(metadata); + glprog->sh.ShaderStorageBlocksWriteAccess = blob_read_uint32(metadata); blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageAccess, sizeof(glprog->sh.ImageAccess)); diff --git a/lib/mesa/src/compiler/glsl/tests/array_refcount_test.cpp b/lib/mesa/src/compiler/glsl/tests/array_refcount_test.cpp index 45c204dc2..edd9cf42a 100644 --- a/lib/mesa/src/compiler/glsl/tests/array_refcount_test.cpp +++ b/lib/mesa/src/compiler/glsl/tests/array_refcount_test.cpp @@ -477,7 +477,7 @@ TEST_F(array_refcount_test, do_not_process_array_inside_structure) }; const glsl_type *const record_of_array_3_of_int = - glsl_type::get_record_instance(fields, ARRAY_SIZE(fields), "S"); + glsl_type::get_struct_instance(fields, ARRAY_SIZE(fields), "S"); ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::int_type, "a", diff --git a/lib/mesa/src/compiler/glsl/tests/meson.build b/lib/mesa/src/compiler/glsl/tests/meson.build index 02f3355c4..dc017ca40 100644 --- a/lib/mesa/src/compiler/glsl/tests/meson.build +++ b/lib/mesa/src/compiler/glsl/tests/meson.build @@ -30,19 +30,20 @@ test( suite : ['compiler', 'glsl'], ) -test( - 'cache_test', - executable( +if with_shader_cache + test( 'cache_test', - 'cache_test.c', - c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args], - include_directories : [inc_common, inc_glsl], - link_with : [libglsl], - dependencies : [dep_clock, dep_thread], - ), - suite : ['compiler', 'glsl'], -) - + executable( + 'cache_test', + 'cache_test.c', + c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args], + include_directories : [inc_common, inc_glsl], + link_with : [libglsl], + dependencies : [dep_clock, dep_thread], + ), + suite : ['compiler', 'glsl'], + ) +endif test( 'general_ir_test', @@ -110,3 +111,14 @@ test( ], suite : ['compiler', 'glsl'], ) + +if with_tools.contains('glsl') + test( + 'glsl dump-builder test', + glsl_compiler, + args : ['--version', '110', '--dump-builder', + join_paths(meson.current_source_dir(), 'standalone_dump-builder.frag'), + ], + suite : ['compiler', 'glsl'], + ) +endif diff --git a/lib/mesa/src/compiler/glsl/tests/standalone_dump-builder.frag b/lib/mesa/src/compiler/glsl/tests/standalone_dump-builder.frag new file mode 100644 index 000000000..680f33474 --- /dev/null +++ b/lib/mesa/src/compiler/glsl/tests/standalone_dump-builder.frag @@ -0,0 +1,12 @@ +varying vec2 Var_18; +uniform sampler2D Var_19; +uniform sampler2D Var_1A; +uniform float Var_1B; +void main() +{ + vec3 Var_21 = texture2D(Var_19, Var_18).xyz; + vec3 Var_22 = texture2D(Var_1A, Var_18).xyz; + vec3 Var_23 = ((Var_22 - Var_21) / Var_1B); + (Var_22 += (Var_23 * Var_1B)); + (gl_FragColor = vec4(Var_22, 1.0)); +} |