diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2019-05-23 05:33:34 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2019-05-23 05:33:34 +0000 |
commit | 9886815a25d84be79f51e65ebd8e458bb5d26ca8 (patch) | |
tree | a65edf018dd992543337433f7303fb29a6c8e8cf /lib/mesa/src/gallium/auxiliary/gallivm | |
parent | e2a3acb64af2657b1181806818eacad061103c23 (diff) |
Merge Mesa 19.0.5
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary/gallivm')
9 files changed, 88 insertions, 906 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c index f34883320..057c50ed2 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1992,6 +1992,8 @@ arch_rounding_available(const struct lp_type type) else if ((util_cpu_caps.has_altivec && (type.width == 32 && type.length == 4))) return TRUE; + else if (util_cpu_caps.has_neon) + return TRUE; return FALSE; } @@ -2099,7 +2101,7 @@ lp_build_round_arch(struct lp_build_context *bld, LLVMValueRef a, enum lp_build_round_mode mode) { - if (util_cpu_caps.has_sse4_1) { + if (util_cpu_caps.has_sse4_1 || util_cpu_caps.has_neon) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const char *intrinsic_root; @@ -2477,7 +2479,7 @@ lp_build_iround(struct lp_build_context *bld, else { LLVMValueRef half; - half = lp_build_const_vec(bld->gallivm, type, 0.5); + half = lp_build_const_vec(bld->gallivm, type, nextafterf(0.5, 0.0)); if (type.sign) { LLVMTypeRef vec_type = bld->vec_type; diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format.h index 6540caaa2..b1e95c4e6 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -165,8 +165,12 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, LLVMValueRef j); +/* + * S3TC + */ + LLVMValueRef -lp_build_fetch_cached_texels(struct gallivm_state *gallivm, +lp_build_fetch_s3tc_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, unsigned n, LLVMValueRef base_ptr, diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index b52acca1b..21680dba7 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -464,6 +464,7 @@ lp_build_pack_rgba_aos(struct gallivm_state *gallivm, * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0, 0). + * \param cache optional value pointing to a lp_build_format_cache structure * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef @@ -728,7 +729,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, * s3tc rgb formats */ - if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && cache) { + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { struct lp_type tmp_type; LLVMValueRef tmp; @@ -737,7 +738,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, tmp_type.length = num_pixels * 4; tmp_type.norm = TRUE; - tmp = lp_build_fetch_cached_texels(gallivm, + tmp = lp_build_fetch_s3tc_rgba_aos(gallivm, format_desc, num_pixels, base_ptr, diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c deleted file mode 100644 index b683e7f96..000000000 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c +++ /dev/null @@ -1,374 +0,0 @@ -/************************************************************************** - * - * Copyright 2015 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "lp_bld_format.h" -#include "lp_bld_type.h" -#include "lp_bld_struct.h" -#include "lp_bld_const.h" -#include "lp_bld_flow.h" -#include "lp_bld_swizzle.h" - -#include "util/u_math.h" - - -/** - * @file - * Complex block-compression based formats are handled here by using a cache, - * so re-decoding of every pixel is not required. - * Especially for bilinear filtering, texel reuse is very high hence even - * a small cache helps. - * The elements in the cache are the decoded blocks - currently things - * are restricted to formats which are 4x4 block based, and the decoded - * texels must fit into 4x8 bits. - * The cache is direct mapped so hitrates aren't all that great and cache - * thrashing could happen. - * - * @author Roland Scheidegger <sroland@vmware.com> - */ - - -#if LP_BUILD_FORMAT_CACHE_DEBUG -static void -update_cache_access(struct gallivm_state *gallivm, - LLVMValueRef ptr, - unsigned count, - unsigned index) -{ - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef member_ptr, cache_access; - - assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL || - index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); - - member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, ""); - cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access"); - cache_access = LLVMBuildAdd(builder, cache_access, - LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), - count, 0), ""); - LLVMBuildStore(builder, cache_access, member_ptr); -} -#endif - - -static void -store_cached_block(struct gallivm_state *gallivm, - LLVMValueRef *col, - LLVMValueRef tag_value, - LLVMValueRef hash_index, - LLVMValueRef cache) -{ - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef ptr, indices[3]; - LLVMTypeRef type_ptr4x32; - unsigned count; - - type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0); - indices[0] = lp_build_const_int32(gallivm, 0); - indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); - indices[2] = hash_index; - ptr = LLVMBuildGEP(builder, cache, indices, Elements(indices), ""); - LLVMBuildStore(builder, tag_value, ptr); - - indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); - hash_index = LLVMBuildMul(builder, hash_index, - lp_build_const_int32(gallivm, 16), ""); - for (count = 0; count < 4; count++) { - indices[2] = hash_index; - ptr = LLVMBuildGEP(builder, cache, indices, Elements(indices), ""); - ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, ""); - LLVMBuildStore(builder, col[count], ptr); - hash_index = LLVMBuildAdd(builder, hash_index, - lp_build_const_int32(gallivm, 4), ""); - } -} - - -static LLVMValueRef -lookup_cached_pixel(struct gallivm_state *gallivm, - LLVMValueRef ptr, - LLVMValueRef index) -{ - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef member_ptr, indices[3]; - - indices[0] = lp_build_const_int32(gallivm, 0); - indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); - indices[2] = index; - member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); - return LLVMBuildLoad(builder, member_ptr, "cache_data"); -} - - -static LLVMValueRef -lookup_tag_data(struct gallivm_state *gallivm, - LLVMValueRef ptr, - LLVMValueRef index) -{ - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef member_ptr, indices[3]; - - indices[0] = lp_build_const_int32(gallivm, 0); - indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); - indices[2] = index; - member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), ""); - return LLVMBuildLoad(builder, member_ptr, "tag_data"); -} - - -static void -update_cached_block(struct gallivm_state *gallivm, - const struct util_format_description *format_desc, - LLVMValueRef ptr_addr, - LLVMValueRef hash_index, - LLVMValueRef cache) - -{ - LLVMBuilderRef builder = gallivm->builder; - LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); - LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); - LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); - LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4); - LLVMValueRef function; - LLVMValueRef tag_value, tmp_ptr; - LLVMValueRef col[4]; - unsigned i, j; - - /* - * Use format_desc->fetch_rgba_8unorm() for each pixel in the block. - * This doesn't actually make any sense whatsoever, someone would need - * to write a function doing this for all pixels in a block (either as - * an external c function or with generated code). Don't ask. - */ - - { - /* - * Function to call looks like: - * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) - */ - LLVMTypeRef ret_type; - LLVMTypeRef arg_types[4]; - LLVMTypeRef function_type; - - assert(format_desc->fetch_rgba_8unorm); - - ret_type = LLVMVoidTypeInContext(gallivm->context); - arg_types[0] = pi8t; - arg_types[1] = pi8t; - arg_types[2] = i32t; - arg_types[3] = i32t; - function_type = LLVMFunctionType(ret_type, arg_types, - Elements(arg_types), 0); - - /* make const pointer for the C fetch_rgba_8unorm function */ - function = lp_build_const_int_pointer(gallivm, - func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); - - /* cast the callee pointer to the function's type */ - function = LLVMBuildBitCast(builder, function, - LLVMPointerType(function_type, 0), - "cast callee"); - } - - tmp_ptr = lp_build_array_alloca(gallivm, i32x4, - lp_build_const_int32(gallivm, 16), - "tmp_decode_store"); - tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); - - /* - * Invoke format_desc->fetch_rgba_8unorm() for each pixel. - * This is going to be really really slow. - * Note: the block store format is actually - * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ... - */ - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) { - LLVMValueRef args[4]; - LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4); - - /* - * Note we actually supply a pointer to the start of the block, - * not the start of the texture. - */ - args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, ""); - args[1] = ptr_addr; - args[2] = LLVMConstInt(i32t, i, 0); - args[3] = LLVMConstInt(i32t, j, 0); - LLVMBuildCall(builder, function, args, Elements(args), ""); - } - } - - /* Finally store the block - pointless mem copy + update tag. */ - tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), ""); - for (i = 0; i < 4; ++i) { - LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i); - LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, ""); - col[i] = LLVMBuildLoad(builder, ptr, ""); - } - - tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr, - LLVMInt64TypeInContext(gallivm->context), ""); - store_cached_block(gallivm, col, tag_value, hash_index, cache); -} - - -/* - * Do a cached lookup. - * - * Returns (vectors of) 4x8 rgba aos value - */ -LLVMValueRef -lp_build_fetch_cached_texels(struct gallivm_state *gallivm, - const struct util_format_description *format_desc, - unsigned n, - LLVMValueRef base_ptr, - LLVMValueRef offset, - LLVMValueRef i, - LLVMValueRef j, - LLVMValueRef cache) - -{ - LLVMBuilderRef builder = gallivm->builder; - unsigned count, low_bit, log2size; - LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp; - LLVMValueRef ij_index, hash_index, hash_mask, block_index; - LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); - LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); - LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context); - struct lp_type type; - struct lp_build_context bld32; - memset(&type, 0, sizeof type); - type.width = 32; - type.length = n; - - assert(format_desc->block.width == 4); - assert(format_desc->block.height == 4); - - lp_build_context_init(&bld32, gallivm, type); - - /* - * compute hash - we use direct mapped cache, the hash function could - * be better but it needs to be simple - * per-element: - * compare offset with offset stored at tag (hash) - * if not equal decode/store block, update tag - * extract color from cache - * assemble result vector - */ - - /* TODO: not ideal with 32bit pointers... */ - - low_bit = util_logbase2(format_desc->block.bits / 8); - log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE); - addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, ""); - ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, ""); - ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc); - /* For the hash function, first mask off the unused lowest bits. Then just - do some xor with address bits - only use lower 32bits */ - ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, ""); - ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, - lp_build_const_int_vec(gallivm, type, low_bit), ""); - /* This only really makes sense for size 64,128,256 */ - hash_index = ptr_addrtrunc; - ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, - lp_build_const_int_vec(gallivm, type, 2*log2size), ""); - hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, ""); - tmp = LLVMBuildLShr(builder, hash_index, - lp_build_const_int_vec(gallivm, type, log2size), ""); - hash_index = LLVMBuildXor(builder, hash_index, tmp, ""); - - hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1); - hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, ""); - ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), ""); - ij_index = LLVMBuildAdd(builder, ij_index, j, ""); - block_index = LLVMBuildShl(builder, hash_index, - lp_build_const_int_vec(gallivm, type, 4), ""); - block_index = LLVMBuildAdd(builder, ij_index, block_index, ""); - - if (n > 1) { - color = LLVMGetUndef(LLVMVectorType(i32t, n)); - for (count = 0; count < n; count++) { - LLVMValueRef index, cond, colorx; - LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx; - struct lp_build_if_state if_ctx; - - index = lp_build_const_int32(gallivm, count); - offsetx = LLVMBuildExtractElement(builder, offset, index, ""); - addrx = LLVMBuildZExt(builder, offsetx, i64t, ""); - addrx = LLVMBuildAdd(builder, addrx, addr, ""); - block_indexx = LLVMBuildExtractElement(builder, block_index, index, ""); - hash_indexx = LLVMBuildLShr(builder, block_indexx, - lp_build_const_int32(gallivm, 4), ""); - offset_stored = lookup_tag_data(gallivm, cache, hash_indexx); - cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, ""); - - lp_build_if(&if_ctx, gallivm, cond); - { - ptr_addrx = LLVMBuildIntToPtr(builder, addrx, - LLVMPointerType(i8t, 0), ""); - update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache); -#if LP_BUILD_FORMAT_CACHE_DEBUG - update_cache_access(gallivm, cache, 1, - LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); -#endif - } - lp_build_endif(&if_ctx); - - colorx = lookup_cached_pixel(gallivm, cache, block_indexx); - - color = LLVMBuildInsertElement(builder, color, colorx, - lp_build_const_int32(gallivm, count), ""); - } - } - else { - LLVMValueRef cond; - struct lp_build_if_state if_ctx; - - tmp = LLVMBuildZExt(builder, offset, i64t, ""); - addr = LLVMBuildAdd(builder, tmp, addr, ""); - offset_stored = lookup_tag_data(gallivm, cache, hash_index); - cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, ""); - - lp_build_if(&if_ctx, gallivm, cond); - { - tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), ""); - update_cached_block(gallivm, format_desc, tmp, hash_index, cache); -#if LP_BUILD_FORMAT_CACHE_DEBUG - update_cache_access(gallivm, cache, 1, - LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); -#endif - } - lp_build_endif(&if_ctx); - - color = lookup_cached_pixel(gallivm, cache, block_index); - } -#if LP_BUILD_FORMAT_CACHE_DEBUG - update_cache_access(gallivm, cache, n, - LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL); -#endif - return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), ""); -} - diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.c index 74ed16f33..cf1f058ff 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_intr.c @@ -241,6 +241,16 @@ lp_build_intrinsic(LLVMBuilderRef builder, function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args); + /* + * If llvm removes an intrinsic we use, we'll hit this abort (rather + * than a call to address zero in the jited code). + */ + if (LLVMGetIntrinsicID(function) == 0) { + _debug_printf("llvm (version 0x%x) found no intrinsic for %s, going to crash...\n", + HAVE_LLVM, name); + abort(); + } + if (!set_callsite_attrs) lp_add_func_attributes(function, attr_mask); diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index fcbdd5050..f307c26d4 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -556,11 +556,11 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, llvm::SmallVector<std::string, 16> MAttrs; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) -#if HAVE_LLVM >= 0x0400 - /* llvm-3.7+ implements sys::getHostCPUFeatures for x86, - * which allows us to enable/disable code generation based - * on the results of cpuid. +#if HAVE_LLVM >= 0x0400 && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) || defined(PIPE_ARCH_ARM)) + /* llvm-3.3+ implements sys::getHostCPUFeatures for Arm + * and llvm-3.7+ for x86, which allows us to enable/disable + * code generation based on the results of cpuid on these + * architectures. */ llvm::StringMap<bool> features; llvm::sys::getHostCPUFeatures(features); @@ -570,7 +570,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, ++f) { MAttrs.push_back(((*f).second ? "+" : "-") + (*f).first().str()); } -#else +#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) /* * We need to unset attributes because sometimes LLVM mistakenly assumes * certain features are present given the processor name. @@ -625,6 +625,12 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, MAttrs.push_back("-avx512vl"); #endif #endif +#if defined(PIPE_ARCH_ARM) + if (!util_cpu_caps.has_neon) { + MAttrs.push_back("-neon"); + MAttrs.push_back("-crypto"); + MAttrs.push_back("-vfp2"); + } #endif #if defined(PIPE_ARCH_PPC) diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index c46749dba..ad3a9e4a4 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -132,68 +132,6 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, /** - * Build LLVM code for texture coord wrapping, for nearest filtering, - * for float texcoords. - * \param coord the incoming texcoord (s,t or r) - * \param length the texture size along one dimension - * \param offset the texel offset along the coord axis - * \param is_pot if TRUE, length is a power of two - * \param wrap_mode one of PIPE_TEX_WRAP_x - * \param icoord the texcoord after wrapping, as int - */ -static void -lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld, - LLVMValueRef coord, - LLVMValueRef length, - LLVMValueRef offset, - boolean is_pot, - unsigned wrap_mode, - LLVMValueRef *icoord) -{ - struct lp_build_context *coord_bld = &bld->coord_bld; - LLVMValueRef length_minus_one; - - switch(wrap_mode) { - case PIPE_TEX_WRAP_REPEAT: - if (offset) { - /* this is definitely not ideal for POT case */ - offset = lp_build_int_to_float(coord_bld, offset); - offset = lp_build_div(coord_bld, offset, length); - coord = lp_build_add(coord_bld, coord, offset); - } - /* take fraction, unnormalize */ - coord = lp_build_fract_safe(coord_bld, coord); - coord = lp_build_mul(coord_bld, coord, length); - *icoord = lp_build_itrunc(coord_bld, coord); - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one); - if (bld->static_sampler_state->normalized_coords) { - /* scale coord to length */ - coord = lp_build_mul(coord_bld, coord, length); - } - if (offset) { - offset = lp_build_int_to_float(coord_bld, offset); - coord = lp_build_add(coord_bld, coord, offset); - } - coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, - length_minus_one); - *icoord = lp_build_itrunc(coord_bld, coord); - break; - - case PIPE_TEX_WRAP_CLAMP: - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - case PIPE_TEX_WRAP_MIRROR_REPEAT: - case PIPE_TEX_WRAP_MIRROR_CLAMP: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - default: - assert(0); - } -} - - -/** * Helper to compute the first coord and the weight for * linear wrap repeat npot textures */ @@ -425,129 +363,6 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, /** - * Build LLVM code for texture coord wrapping, for linear filtering, - * for float texcoords. - * \param block_length is the length of the pixel block along the - * coordinate axis - * \param coord the incoming texcoord (s,t or r) - * \param length the texture size along one dimension - * \param offset the texel offset along the coord axis - * \param is_pot if TRUE, length is a power of two - * \param wrap_mode one of PIPE_TEX_WRAP_x - * \param coord0 the first texcoord after wrapping, as int - * \param coord1 the second texcoord after wrapping, as int - * \param weight the filter weight as int (0-255) - * \param force_nearest if this coord actually uses nearest filtering - */ -static void -lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld, - unsigned block_length, - LLVMValueRef coord, - LLVMValueRef length, - LLVMValueRef offset, - boolean is_pot, - unsigned wrap_mode, - LLVMValueRef *coord0, - LLVMValueRef *coord1, - LLVMValueRef *weight, - unsigned force_nearest) -{ - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - struct lp_build_context *coord_bld = &bld->coord_bld; - LLVMBuilderRef builder = bld->gallivm->builder; - LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); - LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one); - - switch(wrap_mode) { - case PIPE_TEX_WRAP_REPEAT: - if (is_pot) { - /* mul by size and subtract 0.5 */ - coord = lp_build_mul(coord_bld, coord, length); - if (offset) { - offset = lp_build_int_to_float(coord_bld, offset); - coord = lp_build_add(coord_bld, coord, offset); - } - if (!force_nearest) - coord = lp_build_sub(coord_bld, coord, half); - *coord1 = lp_build_add(coord_bld, coord, coord_bld->one); - /* convert to int, compute lerp weight */ - lp_build_ifloor_fract(coord_bld, coord, coord0, weight); - *coord1 = lp_build_ifloor(coord_bld, *coord1); - /* repeat wrap */ - length_minus_one = lp_build_itrunc(coord_bld, length_minus_one); - *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, ""); - *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, ""); - } - else { - LLVMValueRef mask; - if (offset) { - offset = lp_build_int_to_float(coord_bld, offset); - offset = lp_build_div(coord_bld, offset, length); - coord = lp_build_add(coord_bld, coord, offset); - } - /* wrap with normalized floats is just fract */ - coord = lp_build_fract(coord_bld, coord); - /* unnormalize */ - coord = lp_build_mul(coord_bld, coord, length); - /* - * we avoided the 0.5/length division, have to fix up wrong - * edge cases with selects - */ - *coord1 = lp_build_add(coord_bld, coord, half); - coord = lp_build_sub(coord_bld, coord, half); - *weight = lp_build_fract(coord_bld, coord); - /* - * It is important for this comparison to be unordered - * (or need fract_safe above). - */ - mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, - PIPE_FUNC_LESS, coord, coord_bld->zero); - *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord); - *coord0 = lp_build_itrunc(coord_bld, *coord0); - mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, - PIPE_FUNC_LESS, *coord1, length); - *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero); - *coord1 = lp_build_itrunc(coord_bld, *coord1); - } - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - if (bld->static_sampler_state->normalized_coords) { - /* mul by tex size */ - coord = lp_build_mul(coord_bld, coord, length); - } - if (offset) { - offset = lp_build_int_to_float(coord_bld, offset); - coord = lp_build_add(coord_bld, coord, offset); - } - /* subtract 0.5 */ - if (!force_nearest) { - coord = lp_build_sub(coord_bld, coord, half); - } - /* clamp to [0, length - 1] */ - coord = lp_build_min_ext(coord_bld, coord, length_minus_one, - GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); - coord = lp_build_max(coord_bld, coord, coord_bld->zero); - *coord1 = lp_build_add(coord_bld, coord, coord_bld->one); - /* convert to int, compute lerp weight */ - lp_build_ifloor_fract(coord_bld, coord, coord0, weight); - /* coord1 = min(coord1, length-1) */ - *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one); - *coord1 = lp_build_itrunc(coord_bld, *coord1); - break; - default: - assert(0); - *coord0 = int_coord_bld->zero; - *coord1 = int_coord_bld->zero; - *weight = coord_bld->zero; - break; - } - *weight = lp_build_mul_imm(coord_bld, *weight, 256); - *weight = lp_build_itrunc(coord_bld, *weight); - return; -} - - -/** * Fetch texels for image with nearest sampling. * Return filtered color as two vectors of 16-bit fixed point values. */ @@ -737,96 +552,6 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, /** - * Sample a single texture image with nearest sampling. - * If sampling a cube texture, r = cube face in [0,5]. - * Return filtered color as two vectors of 16-bit fixed point values. - * Does address calcs (except offsets) with floats. - * Useful for AVX which has support for 8x32 floats but not 8x32 ints. - */ -static void -lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld, - LLVMValueRef int_size, - LLVMValueRef row_stride_vec, - LLVMValueRef img_stride_vec, - LLVMValueRef data_ptr, - LLVMValueRef mipoffsets, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef r, - const LLVMValueRef *offsets, - LLVMValueRef *colors) - { - const unsigned dims = bld->dims; - LLVMValueRef width_vec, height_vec, depth_vec; - LLVMValueRef offset; - LLVMValueRef x_subcoord, y_subcoord; - LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL; - LLVMValueRef flt_size; - - flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size); - - lp_build_extract_image_sizes(bld, - &bld->float_size_bld, - bld->coord_type, - flt_size, - &width_vec, - &height_vec, - &depth_vec); - - /* Do texcoord wrapping */ - lp_build_sample_wrap_nearest_float(bld, - s, width_vec, offsets[0], - bld->static_texture_state->pot_width, - bld->static_sampler_state->wrap_s, - &x_icoord); - - if (dims >= 2) { - lp_build_sample_wrap_nearest_float(bld, - t, height_vec, offsets[1], - bld->static_texture_state->pot_height, - bld->static_sampler_state->wrap_t, - &y_icoord); - - if (dims >= 3) { - lp_build_sample_wrap_nearest_float(bld, - r, depth_vec, offsets[2], - bld->static_texture_state->pot_depth, - bld->static_sampler_state->wrap_r, - &z_icoord); - } - } - if (has_layer_coord(bld->static_texture_state->target)) { - z_icoord = r; - } - - /* - * From here on we deal with ints, and we should split up the 256bit - * vectors manually for better generated code. - */ - - /* - * compute texel offsets - - * cannot do offset calc with floats, difficult for block-based formats, - * and not enough precision anyway. - */ - lp_build_sample_offset(&bld->int_coord_bld, - bld->format_desc, - x_icoord, y_icoord, - z_icoord, - row_stride_vec, img_stride_vec, - &offset, - &x_subcoord, &y_subcoord); - if (mipoffsets) { - offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets); - } - - lp_build_sample_fetch_image_nearest(bld, data_ptr, offset, - x_subcoord, y_subcoord, - colors); -} - - -/** * Fetch texels for image with linear sampling. * Return filtered color as two vectors of 16-bit fixed point values. */ @@ -1213,175 +938,6 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, /** - * Sample a single texture image with (bi-)(tri-)linear sampling. - * Return filtered color as two vectors of 16-bit fixed point values. - * Does address calcs (except offsets) with floats. - * Useful for AVX which has support for 8x32 floats but not 8x32 ints. - */ -static void -lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld, - LLVMValueRef int_size, - LLVMValueRef row_stride_vec, - LLVMValueRef img_stride_vec, - LLVMValueRef data_ptr, - LLVMValueRef mipoffsets, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef r, - const LLVMValueRef *offsets, - LLVMValueRef *colors) -{ - const unsigned dims = bld->dims; - LLVMValueRef width_vec, height_vec, depth_vec; - LLVMValueRef s_fpart; - LLVMValueRef t_fpart = NULL; - LLVMValueRef r_fpart = NULL; - LLVMValueRef x_stride, y_stride, z_stride; - LLVMValueRef x_offset0, x_offset1; - LLVMValueRef y_offset0, y_offset1; - LLVMValueRef z_offset0, z_offset1; - LLVMValueRef offset[2][2][2]; /* [z][y][x] */ - LLVMValueRef x_subcoord[2], y_subcoord[2]; - LLVMValueRef flt_size; - LLVMValueRef x_icoord0, x_icoord1; - LLVMValueRef y_icoord0, y_icoord1; - LLVMValueRef z_icoord0, z_icoord1; - unsigned x, y, z; - - flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size); - - lp_build_extract_image_sizes(bld, - &bld->float_size_bld, - bld->coord_type, - flt_size, - &width_vec, - &height_vec, - &depth_vec); - - /* do texcoord wrapping and compute texel offsets */ - lp_build_sample_wrap_linear_float(bld, - bld->format_desc->block.width, - s, width_vec, offsets[0], - bld->static_texture_state->pot_width, - bld->static_sampler_state->wrap_s, - &x_icoord0, &x_icoord1, - &s_fpart, - bld->static_sampler_state->force_nearest_s); - - if (dims >= 2) { - lp_build_sample_wrap_linear_float(bld, - bld->format_desc->block.height, - t, height_vec, offsets[1], - bld->static_texture_state->pot_height, - bld->static_sampler_state->wrap_t, - &y_icoord0, &y_icoord1, - &t_fpart, - bld->static_sampler_state->force_nearest_t); - - if (dims >= 3) { - lp_build_sample_wrap_linear_float(bld, - 1, /* block length (depth) */ - r, depth_vec, offsets[2], - bld->static_texture_state->pot_depth, - bld->static_sampler_state->wrap_r, - &z_icoord0, &z_icoord1, - &r_fpart, 0); - } - } - - /* - * From here on we deal with ints, and we should split up the 256bit - * vectors manually for better generated code. - */ - - /* get pixel, row and image strides */ - x_stride = lp_build_const_vec(bld->gallivm, - bld->int_coord_bld.type, - bld->format_desc->block.bits/8); - y_stride = row_stride_vec; - z_stride = img_stride_vec; - - /* - * compute texel offset - - * cannot do offset calc with floats, difficult for block-based formats, - * and not enough precision anyway. - */ - lp_build_sample_partial_offset(&bld->int_coord_bld, - bld->format_desc->block.width, - x_icoord0, x_stride, - &x_offset0, &x_subcoord[0]); - lp_build_sample_partial_offset(&bld->int_coord_bld, - bld->format_desc->block.width, - x_icoord1, x_stride, - &x_offset1, &x_subcoord[1]); - - /* add potential cube/array/mip offsets now as they are constant per pixel */ - if (has_layer_coord(bld->static_texture_state->target)) { - LLVMValueRef z_offset; - z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); - /* The r coord is the cube face in [0,5] or array layer */ - x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset); - x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset); - } - if (mipoffsets) { - x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets); - x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets); - } - - for (z = 0; z < 2; z++) { - for (y = 0; y < 2; y++) { - offset[z][y][0] = x_offset0; - offset[z][y][1] = x_offset1; - } - } - - if (dims >= 2) { - lp_build_sample_partial_offset(&bld->int_coord_bld, - bld->format_desc->block.height, - y_icoord0, y_stride, - &y_offset0, &y_subcoord[0]); - lp_build_sample_partial_offset(&bld->int_coord_bld, - bld->format_desc->block.height, - y_icoord1, y_stride, - &y_offset1, &y_subcoord[1]); - for (z = 0; z < 2; z++) { - for (x = 0; x < 2; x++) { - offset[z][0][x] = lp_build_add(&bld->int_coord_bld, - offset[z][0][x], y_offset0); - offset[z][1][x] = lp_build_add(&bld->int_coord_bld, - offset[z][1][x], y_offset1); - } - } - } - - if (dims >= 3) { - LLVMValueRef z_subcoord[2]; - lp_build_sample_partial_offset(&bld->int_coord_bld, - 1, - z_icoord0, z_stride, - &z_offset0, &z_subcoord[0]); - lp_build_sample_partial_offset(&bld->int_coord_bld, - 1, - z_icoord1, z_stride, - &z_offset1, &z_subcoord[1]); - for (y = 0; y < 2; y++) { - for (x = 0; x < 2; x++) { - offset[0][y][x] = lp_build_add(&bld->int_coord_bld, - offset[0][y][x], z_offset0); - offset[1][y][x] = lp_build_add(&bld->int_coord_bld, - offset[1][y][x], z_offset1); - } - } - } - - lp_build_sample_fetch_image_linear(bld, data_ptr, offset, - x_subcoord, y_subcoord, - s_fpart, t_fpart, r_fpart, - colors); -} - - -/** * Sample the texture/mipmap using given image filter and mip filter. * data0_ptr and data1_ptr point to the two mipmap levels to sample * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. @@ -1413,9 +969,6 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef mipoff1 = NULL; LLVMValueRef colors0; LLVMValueRef colors1; - boolean use_floats = util_cpu_caps.has_avx && - !util_cpu_caps.has_avx2 && - bld->coord_type.length > 4; /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, @@ -1430,39 +983,20 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); } - if (use_floats) { - if (img_filter == PIPE_TEX_FILTER_NEAREST) { - lp_build_sample_image_nearest_afloat(bld, - size0, - row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, offsets, - &colors0); - } - else { - assert(img_filter == PIPE_TEX_FILTER_LINEAR); - lp_build_sample_image_linear_afloat(bld, - size0, - row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, offsets, - &colors0); - } + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + size0, + row_stride0_vec, img_stride0_vec, + data_ptr0, mipoff0, s, t, r, offsets, + &colors0); } else { - if (img_filter == PIPE_TEX_FILTER_NEAREST) { - lp_build_sample_image_nearest(bld, - size0, - row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, offsets, - &colors0); - } - else { - assert(img_filter == PIPE_TEX_FILTER_LINEAR); - lp_build_sample_image_linear(bld, - size0, - row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, offsets, - &colors0); - } + assert(img_filter == PIPE_TEX_FILTER_LINEAR); + lp_build_sample_image_linear(bld, + size0, + row_stride0_vec, img_stride0_vec, + data_ptr0, mipoff0, s, t, r, offsets, + &colors0); } /* Store the first level's colors in the output variables */ @@ -1521,37 +1055,19 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); } - if (use_floats) { - if (img_filter == PIPE_TEX_FILTER_NEAREST) { - lp_build_sample_image_nearest_afloat(bld, - size1, - row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, offsets, - &colors1); - } - else { - lp_build_sample_image_linear_afloat(bld, - size1, - row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, offsets, - &colors1); - } + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, mipoff1, s, t, r, offsets, + &colors1); } else { - if (img_filter == PIPE_TEX_FILTER_NEAREST) { - lp_build_sample_image_nearest(bld, - size1, - row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, offsets, - &colors1); - } - else { - lp_build_sample_image_linear(bld, - size1, - row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, offsets, - &colors1); - } + lp_build_sample_image_linear(bld, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, mipoff1, s, t, r, offsets, + &colors1); } /* interpolate samples from the two mipmap levels */ diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 018cca8f9..a6662c5e0 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -3549,10 +3549,6 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm, const struct util_format_description *format_desc; format_desc = util_format_description(static_texture_state->format); if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { - /* - * This is not 100% correct, if we have cache but the - * util_format_s3tc_prefer is true the cache won't get used - * regardless (could hook up the block decode there...) */ need_cache = TRUE; } } diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 5fecad4ea..d6af1d844 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -41,6 +41,7 @@ #include "util/u_debug.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_prim.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_info.h" @@ -1059,7 +1060,8 @@ emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, static LLVMValueRef get_indirect_index(struct lp_build_tgsi_soa_context *bld, unsigned reg_file, unsigned reg_index, - const struct tgsi_ind_register *indirect_reg) + const struct tgsi_ind_register *indirect_reg, + int index_limit) { LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; @@ -1106,9 +1108,9 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld, * larger than the declared size but smaller than the buffer size. */ if (reg_file != TGSI_FILE_CONSTANT) { + assert(index_limit >= 0); max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm, - uint_bld->type, - bld->bld_base.info->file_max[reg_file]); + uint_bld->type, index_limit); assert(!uint_bld->type.sign); index = lp_build_min(uint_bld, index, max_index); @@ -1225,7 +1227,8 @@ emit_fetch_constant( indirect_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, - ®->Indirect); + ®->Indirect, + bld->bld_base.info->file_max[reg->Register.File]); /* All fetches are from the same constant buffer, so * we need to propagate the size to a vector to do a @@ -1364,7 +1367,8 @@ emit_fetch_immediate( indirect_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, - ®->Indirect); + ®->Indirect, + bld->bld_base.info->file_max[reg->Register.File]); /* * Unlike for other reg classes, adding pixel offsets is unnecessary - * immediates are stored as full vectors (FIXME??? - might be better @@ -1438,7 +1442,8 @@ emit_fetch_input( indirect_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, - ®->Indirect); + ®->Indirect, + bld->bld_base.info->file_max[reg->Register.File]); index_vec = get_soa_array_offsets(&bld_base->uint_bld, indirect_index, @@ -1524,19 +1529,33 @@ emit_fetch_gs_input( } if (reg->Register.Indirect) { + /* + * XXX: this is possibly not quite the right value, since file_max may be + * larger than the max attrib index, due to it being the max of declared + * inputs AND the max vertices per prim (which is 6 for tri adj). + * It should however be safe to use (since we always allocate + * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit). + */ + int index_limit = info->file_max[reg->Register.File]; attrib_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, - ®->Indirect); + ®->Indirect, + index_limit); } else { attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); } if (reg->Dimension.Indirect) { + /* + * A fixed 6 should do as well (which is what we allocate). + */ + int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]); vertex_index = get_indirect_index(bld, reg->Register.File, reg->Dimension.Index, - ®->DimIndirect); + ®->DimIndirect, + index_limit); } else { vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); } @@ -1591,7 +1610,8 @@ emit_fetch_temporary( indirect_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, - ®->Indirect); + ®->Indirect, + bld->bld_base.info->file_max[reg->Register.File]); index_vec = get_soa_array_offsets(&bld_base->uint_bld, indirect_index, @@ -1811,7 +1831,8 @@ emit_store_chan( indirect_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, - ®->Indirect); + ®->Indirect, + bld->bld_base.info->file_max[reg->Register.File]); } else { assert(reg->Register.Index <= bld_base->info->file_max[reg->Register.File]); |