summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2022-02-24 01:57:18 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2022-02-24 01:57:18 +0000
commitb24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (patch)
tree658ca4e6b41655f49463c85edbaeda48979c394c /lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
parent57768bbb154c2879d34ec20e401b19472e77aaf7 (diff)
Import Mesa 21.3.7
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c')
-rw-r--r--lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c250
1 files changed, 196 insertions, 54 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
index 38afac47d..1ce4be0ec 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
@@ -27,14 +27,17 @@
#include "lp_bld_arit.h"
#include "lp_bld_bitarit.h"
#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
#include "lp_bld_gather.h"
#include "lp_bld_logic.h"
#include "lp_bld_quad.h"
#include "lp_bld_flow.h"
+#include "lp_bld_intr.h"
#include "lp_bld_struct.h"
#include "lp_bld_debug.h"
#include "lp_bld_printf.h"
#include "nir_deref.h"
+#include "nir_search_helpers.h"
static void visit_cf_list(struct lp_build_nir_context *bld_base,
struct exec_list *list);
@@ -47,7 +50,7 @@ static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRe
case nir_type_float:
switch (bit_size) {
case 16:
- return LLVMBuildBitCast(builder, val, LLVMVectorType(LLVMHalfTypeInContext(bld_base->base.gallivm->context), bld_base->base.type.length), "");
+ return LLVMBuildBitCast(builder, val, bld_base->half_bld.vec_type, "");
case 32:
return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, "");
case 64:
@@ -222,6 +225,8 @@ static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base,
LLVMValueRef result = lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
if (src_bit_size == 64)
result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
+ if (src_bit_size == 16)
+ result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
return result;
}
@@ -240,6 +245,8 @@ static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base,
result = lp_build_cmp(flt_bld, compare, src[0], src[1]);
if (src_bit_size == 64)
result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
+ else if (src_bit_size == 16)
+ result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
return result;
}
@@ -306,6 +313,9 @@ static LLVMValueRef emit_b2f(struct lp_build_nir_context *bld_base,
"");
result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, "");
switch (bitsize) {
+ case 16:
+ result = LLVMBuildFPTrunc(builder, result, bld_base->half_bld.vec_type, "");
+ break;
case 32:
break;
case 64:
@@ -447,6 +457,43 @@ merge_16bit(struct lp_build_nir_context *bld_base,
return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
}
+static LLVMValueRef get_signed_divisor(struct gallivm_state *gallivm,
+ struct lp_build_context *int_bld,
+ struct lp_build_context *mask_bld,
+ int src_bit_size,
+ LLVMValueRef src, LLVMValueRef divisor)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ /* However for signed divides SIGFPE can occur if the numerator is INT_MIN
+ and divisor is -1. */
+ /* set mask if numerator == INT_MIN */
+ long long min_val;
+ switch (src_bit_size) {
+ case 8:
+ min_val = INT8_MIN;
+ break;
+ case 16:
+ min_val = INT16_MIN;
+ break;
+ default:
+ case 32:
+ min_val = INT_MIN;
+ break;
+ case 64:
+ min_val = INT64_MIN;
+ break;
+ }
+ LLVMValueRef div_mask2 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src,
+ lp_build_const_int_vec(gallivm, int_bld->type, min_val));
+ /* set another mask if divisor is - 1 */
+ LLVMValueRef div_mask3 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, divisor,
+ lp_build_const_int_vec(gallivm, int_bld->type, -1));
+ div_mask2 = LLVMBuildAnd(builder, div_mask2, div_mask3, "");
+
+ divisor = lp_build_select(mask_bld, div_mask2, int_bld->one, divisor);
+ return divisor;
+}
+
static LLVMValueRef
do_int_divide(struct lp_build_nir_context *bld_base,
bool is_unsigned, unsigned src_bit_size,
@@ -456,16 +503,16 @@ do_int_divide(struct lp_build_nir_context *bld_base,
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
+
+ /* avoid divide by 0. Converted divisor from 0 to -1 */
LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
mask_bld->zero);
+ LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, "");
if (!is_unsigned) {
- /* INT_MIN (0x80000000) / -1 (0xffffffff) causes sigfpe, seen with blender. */
- div_mask = LLVMBuildAnd(builder, div_mask, lp_build_const_int_vec(gallivm, int_bld->type, 0x7fffffff), "");
+ divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
+ src_bit_size, src, divisor);
}
- LLVMValueRef divisor = LLVMBuildOr(builder,
- div_mask,
- src2, "");
LLVMValueRef result = lp_build_div(int_bld, src, divisor);
if (!is_unsigned) {
@@ -485,11 +532,16 @@ do_int_mod(struct lp_build_nir_context *bld_base,
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
- LLVMValueRef div_mask = lp_build_cmp(int_bld, PIPE_FUNC_EQUAL, src2,
- int_bld->zero);
+ struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
+ LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
+ mask_bld->zero);
LLVMValueRef divisor = LLVMBuildOr(builder,
div_mask,
src2, "");
+ if (!is_unsigned) {
+ divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
+ src_bit_size, src, divisor);
+ }
LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
return LLVMBuildOr(builder, div_mask, result, "");
}
@@ -502,7 +554,7 @@ do_quantize_to_f16(struct lp_build_nir_context *bld_base,
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef result, cond, cond2, temp;
- result = LLVMBuildFPTrunc(builder, src, LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
+ result = LLVMBuildFPTrunc(builder, src, bld_base->half_bld.vec_type, "");
result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
temp = lp_build_abs(get_flt_bld(bld_base, 32), result);
@@ -516,13 +568,18 @@ do_quantize_to_f16(struct lp_build_nir_context *bld_base,
}
static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
- nir_op op, unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
+ const nir_alu_instr *instr,
+ unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
+ LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef result;
- enum gallivm_nan_behavior minmax_nan = bld_base->shader->info.stage == MESA_SHADER_KERNEL ? GALLIVM_NAN_RETURN_OTHER : GALLIVM_NAN_BEHAVIOR_UNDEFINED;
- switch (op) {
+
+ switch (instr->op) {
+ case nir_op_b2f16:
+ result = emit_b2f(bld_base, src[0], 16);
+ break;
case nir_op_b2f32:
result = emit_b2f(bld_base, src[0], 32);
break;
@@ -546,6 +603,10 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
break;
case nir_op_bit_count:
result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
+ if (src_bit_size[0] < 32)
+ result = LLVMBuildZExt(builder, result, bld_base->int_bld.vec_type, "");
+ else if (src_bit_size[0] > 32)
+ result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
break;
case nir_op_bitfield_select:
result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2])));
@@ -561,7 +622,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
src[0] = LLVMBuildFPTrunc(builder, src[0],
bld_base->base.vec_type, "");
result = LLVMBuildFPTrunc(builder, src[0],
- LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
+ bld_base->half_bld.vec_type, "");
break;
case nir_op_f2f32:
if (src_bit_size[0] < 32)
@@ -624,17 +685,17 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_fcos:
- result = lp_build_cos(&bld_base->base, src[0]);
+ result = lp_build_cos(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_fddx:
case nir_op_fddx_coarse:
case nir_op_fddx_fine:
- result = lp_build_ddx(&bld_base->base, src[0]);
+ result = lp_build_ddx(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_fddy:
case nir_op_fddy_coarse:
case nir_op_fddy_fine:
- result = lp_build_ddy(&bld_base->base, src[0]);
+ result = lp_build_ddy(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_fdiv:
result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]),
@@ -644,7 +705,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src);
break;
case nir_op_fexp2:
- result = lp_build_exp2(&bld_base->base, src[0]);
+ result = lp_build_exp2(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_ffloor:
result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
@@ -670,16 +731,45 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
break;
}
+ case nir_op_fisfinite32:
+ unreachable("Should have been lowered in nir_opt_algebraic_late.");
case nir_op_flog2:
- result = lp_build_log2_safe(&bld_base->base, src[0]);
+ result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_flt:
case nir_op_flt32:
result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
break;
- case nir_op_fmin:
- result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan);
+ case nir_op_fmax:
+ case nir_op_fmin: {
+ enum gallivm_nan_behavior minmax_nan;
+ int first = 0;
+
+ /* If one of the sources is known to be a number (i.e., not NaN), then
+ * better code can be generated by passing that information along.
+ */
+ if (is_a_number(bld_base->range_ht, instr, 1,
+ 0 /* unused num_components */,
+ NULL /* unused swizzle */)) {
+ minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
+ } else if (is_a_number(bld_base->range_ht, instr, 0,
+ 0 /* unused num_components */,
+ NULL /* unused swizzle */)) {
+ first = 1;
+ minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
+ } else {
+ minmax_nan = GALLIVM_NAN_RETURN_OTHER;
+ }
+
+ if (instr->op == nir_op_fmin) {
+ result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]),
+ src[first], src[1 - first], minmax_nan);
+ } else {
+ result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]),
+ src[first], src[1 - first], minmax_nan);
+ }
break;
+ }
case nir_op_fmod: {
struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
result = lp_build_div(flt_bld, src[0], src[1]);
@@ -692,9 +782,6 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
src[0], src[1]);
break;
- case nir_op_fmax:
- result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan);
- break;
case nir_op_fneu32:
result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
break;
@@ -702,7 +789,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_fpow:
- result = lp_build_pow(&bld_base->base, src[0], src[1]);
+ result = lp_build_pow(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]);
break;
case nir_op_fquantize2f16:
result = do_quantize_to_f16(bld_base, src[0]);
@@ -711,7 +798,13 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_fround_even:
- result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
+ if (src_bit_size[0] == 16) {
+ struct lp_build_context *bld = get_flt_bld(bld_base, 16);
+ char intrinsic[64];
+ lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type);
+ result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]);
+ } else
+ result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_frsq:
result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
@@ -723,7 +816,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_fsin:
- result = lp_build_sin(&bld_base->base, src[0]);
+ result = lp_build_sin(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
case nir_op_fsqrt:
result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
@@ -734,6 +827,10 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
case nir_op_i2b32:
result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
break;
+ case nir_op_i2f16:
+ result = LLVMBuildSIToFP(builder, src[0],
+ bld_base->half_bld.vec_type, "");
+ break;
case nir_op_i2f32:
result = lp_build_int_to_float(&bld_base->base, src[0]);
break;
@@ -874,6 +971,10 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, "");
break;
}
+ case nir_op_u2f16:
+ result = LLVMBuildUIToFP(builder, src[0],
+ bld_base->half_bld.vec_type, "");
+ break;
case nir_op_u2f32:
result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, "");
break;
@@ -970,14 +1071,14 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr
case nir_op_unpack_half_2x16:
src_components = 1;
break;
- case nir_op_cube_face_coord:
- case nir_op_cube_face_index:
+ case nir_op_cube_face_coord_amd:
+ case nir_op_cube_face_index_amd:
src_components = 3;
break;
case nir_op_fsum2:
case nir_op_fsum3:
case nir_op_fsum4:
- src_components = nir_src_num_components(instr->src[0].src);
+ src_components = nir_op_infos[instr->op].input_sizes[0];
break;
default:
src_components = num_components;
@@ -994,7 +1095,7 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr
result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
}
} else if (instr->op == nir_op_fsum4 || instr->op == nir_op_fsum3 || instr->op == nir_op_fsum2) {
- for (unsigned c = 0; c < nir_src_num_components(instr->src[0].src); c++) {
+ for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) {
LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder,
src[0], c, "");
temp_chan = cast_type(bld_base, temp_chan, nir_op_infos[instr->op].input_types[0], src_bit_size[0]);
@@ -1012,7 +1113,7 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr
src_chan[i] = src[i];
src_chan[i] = cast_type(bld_base, src_chan[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
}
- result[c] = do_alu_action(bld_base, instr->op, src_bit_size, src_chan);
+ result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan);
result[c] = cast_type(bld_base, result[c], nir_op_infos[instr->op].output_type, nir_dest_bit_size(instr->dest.dest));
}
}
@@ -1026,6 +1127,7 @@ static void visit_load_const(struct lp_build_nir_context *bld_base,
struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
for (unsigned i = 0; i < instr->def.num_components; i++)
result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
+ memset(&result[instr->def.num_components], 0, NIR_MAX_VEC_COMPONENTS - instr->def.num_components);
assign_ssa_dest(bld_base, &instr->def, result);
}
@@ -1240,7 +1342,7 @@ static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
nir_intrinsic_instr *instr,
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
- LLVMValueRef idx = get_src(bld_base, instr->src[0]);
+ LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
LLVMValueRef offset = get_src(bld_base, instr->src[1]);
bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
idx, offset, result);
@@ -1250,7 +1352,7 @@ static void visit_store_ssbo(struct lp_build_nir_context *bld_base,
nir_intrinsic_instr *instr)
{
LLVMValueRef val = get_src(bld_base, instr->src[0]);
- LLVMValueRef idx = get_src(bld_base, instr->src[1]);
+ LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_uint, 32);
LLVMValueRef offset = get_src(bld_base, instr->src[2]);
int writemask = instr->const_index[0];
int nc = nir_src_num_components(instr->src[0]);
@@ -1262,7 +1364,7 @@ static void visit_get_ssbo_size(struct lp_build_nir_context *bld_base,
nir_intrinsic_instr *instr,
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
- LLVMValueRef idx = get_src(bld_base, instr->src[0]);
+ LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
result[0] = bld_base->get_ssbo_size(bld_base, idx);
}
@@ -1270,7 +1372,7 @@ static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
nir_intrinsic_instr *instr,
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
- LLVMValueRef idx = get_src(bld_base, instr->src[0]);
+ LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
LLVMValueRef offset = get_src(bld_base, instr->src[1]);
LLVMValueRef val = get_src(bld_base, instr->src[2]);
LLVMValueRef val2 = NULL;
@@ -1662,13 +1764,14 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base,
case nir_intrinsic_load_base_instance:
case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_first_vertex:
- case nir_intrinsic_load_work_group_id:
+ case nir_intrinsic_load_workgroup_id:
case nir_intrinsic_load_local_invocation_id:
- case nir_intrinsic_load_num_work_groups:
+ case nir_intrinsic_load_local_invocation_index:
+ case nir_intrinsic_load_num_workgroups:
case nir_intrinsic_load_invocation_id:
case nir_intrinsic_load_front_face:
case nir_intrinsic_load_draw_id:
- case nir_intrinsic_load_local_group_size:
+ case nir_intrinsic_load_workgroup_size:
case nir_intrinsic_load_work_dim:
case nir_intrinsic_load_tess_coord:
case nir_intrinsic_load_tess_level_outer:
@@ -1888,7 +1991,7 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef coords[5];
LLVMValueRef offsets[3] = { NULL };
- LLVMValueRef explicit_lod = NULL, projector = NULL, ms_index = NULL;
+ LLVMValueRef explicit_lod = NULL, ms_index = NULL;
struct lp_sampler_params params;
struct lp_derivatives derivs;
unsigned sample_key = 0;
@@ -1935,9 +2038,6 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
case nir_tex_src_sampler_deref:
sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
break;
- case nir_tex_src_projector:
- projector = lp_build_rcp(&bld_base->base, cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32));
- break;
case nir_tex_src_comparator:
sample_key |= LP_SAMPLER_SHADOW;
coords[4] = get_src(bld_base, instr->src[i].src);
@@ -2038,13 +2138,6 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
coords[1] = coord_undef;
}
- if (projector) {
- for (unsigned chan = 0; chan < instr->coord_components; ++chan)
- coords[chan] = lp_build_mul(&bld_base->base, coords[chan], projector);
- if (sample_key & LP_SAMPLER_SHADOW)
- coords[4] = lp_build_mul(&bld_base->base, coords[4], projector);
- }
-
uint32_t samp_base_index = 0, tex_base_index = 0;
if (!sampler_deref_instr) {
int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
@@ -2081,8 +2174,38 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
params.texel = texel;
params.lod = explicit_lod;
params.ms_index = ms_index;
+ params.aniso_filter_table = bld_base->aniso_filter_table;
bld_base->tex(bld_base, &params);
+
+ if (nir_dest_bit_size(instr->dest) != 32) {
+ assert(nir_dest_bit_size(instr->dest) == 16);
+ LLVMTypeRef vec_type = NULL;
+ bool is_float = false;
+ switch (nir_alu_type_get_base_type(instr->dest_type)) {
+ case nir_type_float:
+ is_float = true;
+ break;
+ case nir_type_int:
+ vec_type = bld_base->int16_bld.vec_type;
+ break;
+ case nir_type_uint:
+ vec_type = bld_base->uint16_bld.vec_type;
+ break;
+ default:
+ unreachable("unexpected alu type");
+ }
+ for (int i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+ if (is_float) {
+ texel[i] = lp_build_float_to_half(gallivm, texel[i]);
+ } else {
+ texel[i] = LLVMBuildBitCast(builder, texel[i], bld_base->int_bld.vec_type, "");
+ texel[i] = LLVMBuildTrunc(builder, texel[i], vec_type, "");
+ }
+ }
+ }
+
assign_dest(bld_base, &instr->dest, texel);
+
}
static void visit_ssa_undef(struct lp_build_nir_context *bld_base,
@@ -2093,6 +2216,7 @@ static void visit_ssa_undef(struct lp_build_nir_context *bld_base,
struct lp_build_context *undef_bld = get_int_bld(bld_base, true, instr->def.bit_size);
for (unsigned i = 0; i < num_components; i++)
undef[i] = LLVMGetUndef(undef_bld->vec_type);
+ memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components);
assign_ssa_dest(bld_base, &instr->def, undef);
}
@@ -2279,6 +2403,7 @@ bool lp_build_nir_llvm(
_mesa_key_pointer_equal);
bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
+ bld_base->range_ht = _mesa_pointer_hash_table_create(NULL);
func = (struct nir_function *)exec_list_get_head(&nir->functions);
@@ -2295,6 +2420,7 @@ bool lp_build_nir_llvm(
free(bld_base->ssa_defs);
ralloc_free(bld_base->vars);
ralloc_free(bld_base->regs);
+ ralloc_free(bld_base->range_ht);
return true;
}
@@ -2305,6 +2431,7 @@ void lp_build_opt_nir(struct nir_shader *nir)
static const struct nir_lower_tex_options lower_tex_options = {
.lower_tg4_offsets = true,
+ .lower_txp = ~0u,
};
NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
NIR_PASS_V(nir, nir_lower_frexp);
@@ -2313,21 +2440,36 @@ void lp_build_opt_nir(struct nir_shader *nir)
NIR_PASS_V(nir, nir_lower_fp16_casts);
do {
progress = false;
- NIR_PASS_V(nir, nir_opt_constant_folding);
- NIR_PASS_V(nir, nir_opt_algebraic);
- NIR_PASS_V(nir, nir_lower_pack);
+ NIR_PASS(progress, nir, nir_opt_constant_folding);
+ NIR_PASS(progress, nir, nir_opt_algebraic);
+ NIR_PASS(progress, nir, nir_lower_pack);
- nir_lower_tex_options options = { .lower_tex_without_implicit_lod = true };
+ nir_lower_tex_options options = { 0, };
NIR_PASS_V(nir, nir_lower_tex, &options);
const nir_lower_subgroups_options subgroups_options = {
.subgroup_size = lp_native_vector_width / 32,
.ballot_bit_size = 32,
+ .ballot_components = 1,
.lower_to_scalar = true,
.lower_subgroup_masks = true,
};
NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options);
} while (progress);
- nir_lower_bool_to_int32(nir);
+
+ do {
+ progress = false;
+ NIR_PASS(progress, nir, nir_opt_algebraic_late);
+ if (progress) {
+ NIR_PASS_V(nir, nir_copy_prop);
+ NIR_PASS_V(nir, nir_opt_dce);
+ NIR_PASS_V(nir, nir_opt_cse);
+ }
+ } while (progress);
+
+ if (nir_lower_bool_to_int32(nir)) {
+ NIR_PASS_V(nir, nir_copy_prop);
+ NIR_PASS_V(nir, nir_opt_dce);
+ }
}