diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-02-24 01:57:18 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-02-24 01:57:18 +0000 |
commit | b24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (patch) | |
tree | 658ca4e6b41655f49463c85edbaeda48979c394c /lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c | |
parent | 57768bbb154c2879d34ec20e401b19472e77aaf7 (diff) |
Import Mesa 21.3.7
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c')
-rw-r--r-- | lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c | 250 |
1 files changed, 196 insertions, 54 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 38afac47d..1ce4be0ec 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -27,14 +27,17 @@ #include "lp_bld_arit.h" #include "lp_bld_bitarit.h" #include "lp_bld_const.h" +#include "lp_bld_conv.h" #include "lp_bld_gather.h" #include "lp_bld_logic.h" #include "lp_bld_quad.h" #include "lp_bld_flow.h" +#include "lp_bld_intr.h" #include "lp_bld_struct.h" #include "lp_bld_debug.h" #include "lp_bld_printf.h" #include "nir_deref.h" +#include "nir_search_helpers.h" static void visit_cf_list(struct lp_build_nir_context *bld_base, struct exec_list *list); @@ -47,7 +50,7 @@ static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRe case nir_type_float: switch (bit_size) { case 16: - return LLVMBuildBitCast(builder, val, LLVMVectorType(LLVMHalfTypeInContext(bld_base->base.gallivm->context), bld_base->base.type.length), ""); + return LLVMBuildBitCast(builder, val, bld_base->half_bld.vec_type, ""); case 32: return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, ""); case 64: @@ -222,6 +225,8 @@ static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base, LLVMValueRef result = lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero); if (src_bit_size == 64) result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); + if (src_bit_size == 16) + result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, ""); return result; } @@ -240,6 +245,8 @@ static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base, result = lp_build_cmp(flt_bld, compare, src[0], src[1]); if (src_bit_size == 64) result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); + else if (src_bit_size == 16) + result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, ""); return result; } @@ -306,6 +313,9 @@ static LLVMValueRef emit_b2f(struct lp_build_nir_context *bld_base, ""); result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, ""); switch (bitsize) { + case 16: + result = LLVMBuildFPTrunc(builder, result, bld_base->half_bld.vec_type, ""); + break; case 32: break; case 64: @@ -447,6 +457,43 @@ merge_16bit(struct lp_build_nir_context *bld_base, return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); } +static LLVMValueRef get_signed_divisor(struct gallivm_state *gallivm, + struct lp_build_context *int_bld, + struct lp_build_context *mask_bld, + int src_bit_size, + LLVMValueRef src, LLVMValueRef divisor) +{ + LLVMBuilderRef builder = gallivm->builder; + /* However for signed divides SIGFPE can occur if the numerator is INT_MIN + and divisor is -1. */ + /* set mask if numerator == INT_MIN */ + long long min_val; + switch (src_bit_size) { + case 8: + min_val = INT8_MIN; + break; + case 16: + min_val = INT16_MIN; + break; + default: + case 32: + min_val = INT_MIN; + break; + case 64: + min_val = INT64_MIN; + break; + } + LLVMValueRef div_mask2 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src, + lp_build_const_int_vec(gallivm, int_bld->type, min_val)); + /* set another mask if divisor is - 1 */ + LLVMValueRef div_mask3 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, divisor, + lp_build_const_int_vec(gallivm, int_bld->type, -1)); + div_mask2 = LLVMBuildAnd(builder, div_mask2, div_mask3, ""); + + divisor = lp_build_select(mask_bld, div_mask2, int_bld->one, divisor); + return divisor; +} + static LLVMValueRef do_int_divide(struct lp_build_nir_context *bld_base, bool is_unsigned, unsigned src_bit_size, @@ -456,16 +503,16 @@ do_int_divide(struct lp_build_nir_context *bld_base, LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size); struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size); + + /* avoid divide by 0. Converted divisor from 0 to -1 */ LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2, mask_bld->zero); + LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, ""); if (!is_unsigned) { - /* INT_MIN (0x80000000) / -1 (0xffffffff) causes sigfpe, seen with blender. */ - div_mask = LLVMBuildAnd(builder, div_mask, lp_build_const_int_vec(gallivm, int_bld->type, 0x7fffffff), ""); + divisor = get_signed_divisor(gallivm, int_bld, mask_bld, + src_bit_size, src, divisor); } - LLVMValueRef divisor = LLVMBuildOr(builder, - div_mask, - src2, ""); LLVMValueRef result = lp_build_div(int_bld, src, divisor); if (!is_unsigned) { @@ -485,11 +532,16 @@ do_int_mod(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size); - LLVMValueRef div_mask = lp_build_cmp(int_bld, PIPE_FUNC_EQUAL, src2, - int_bld->zero); + struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size); + LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2, + mask_bld->zero); LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, ""); + if (!is_unsigned) { + divisor = get_signed_divisor(gallivm, int_bld, mask_bld, + src_bit_size, src, divisor); + } LLVMValueRef result = lp_build_mod(int_bld, src, divisor); return LLVMBuildOr(builder, div_mask, result, ""); } @@ -502,7 +554,7 @@ do_quantize_to_f16(struct lp_build_nir_context *bld_base, LLVMBuilderRef builder = gallivm->builder; LLVMValueRef result, cond, cond2, temp; - result = LLVMBuildFPTrunc(builder, src, LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), ""); + result = LLVMBuildFPTrunc(builder, src, bld_base->half_bld.vec_type, ""); result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, ""); temp = lp_build_abs(get_flt_bld(bld_base, 32), result); @@ -516,13 +568,18 @@ do_quantize_to_f16(struct lp_build_nir_context *bld_base, } static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, - nir_op op, unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], LLVMValueRef src[NIR_MAX_VEC_COMPONENTS]) + const nir_alu_instr *instr, + unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], + LLVMValueRef src[NIR_MAX_VEC_COMPONENTS]) { struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef result; - enum gallivm_nan_behavior minmax_nan = bld_base->shader->info.stage == MESA_SHADER_KERNEL ? GALLIVM_NAN_RETURN_OTHER : GALLIVM_NAN_BEHAVIOR_UNDEFINED; - switch (op) { + + switch (instr->op) { + case nir_op_b2f16: + result = emit_b2f(bld_base, src[0], 16); + break; case nir_op_b2f32: result = emit_b2f(bld_base, src[0], 32); break; @@ -546,6 +603,10 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, break; case nir_op_bit_count: result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]); + if (src_bit_size[0] < 32) + result = LLVMBuildZExt(builder, result, bld_base->int_bld.vec_type, ""); + else if (src_bit_size[0] > 32) + result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); break; case nir_op_bitfield_select: result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2]))); @@ -561,7 +622,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, src[0] = LLVMBuildFPTrunc(builder, src[0], bld_base->base.vec_type, ""); result = LLVMBuildFPTrunc(builder, src[0], - LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), ""); + bld_base->half_bld.vec_type, ""); break; case nir_op_f2f32: if (src_bit_size[0] < 32) @@ -624,17 +685,17 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fcos: - result = lp_build_cos(&bld_base->base, src[0]); + result = lp_build_cos(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fddx: case nir_op_fddx_coarse: case nir_op_fddx_fine: - result = lp_build_ddx(&bld_base->base, src[0]); + result = lp_build_ddx(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fddy: case nir_op_fddy_coarse: case nir_op_fddy_fine: - result = lp_build_ddy(&bld_base->base, src[0]); + result = lp_build_ddy(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fdiv: result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]), @@ -644,7 +705,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src); break; case nir_op_fexp2: - result = lp_build_exp2(&bld_base->base, src[0]); + result = lp_build_exp2(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_ffloor: result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]); @@ -670,16 +731,45 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, ""); break; } + case nir_op_fisfinite32: + unreachable("Should have been lowered in nir_opt_algebraic_late."); case nir_op_flog2: - result = lp_build_log2_safe(&bld_base->base, src[0]); + result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_flt: case nir_op_flt32: result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src); break; - case nir_op_fmin: - result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan); + case nir_op_fmax: + case nir_op_fmin: { + enum gallivm_nan_behavior minmax_nan; + int first = 0; + + /* If one of the sources is known to be a number (i.e., not NaN), then + * better code can be generated by passing that information along. + */ + if (is_a_number(bld_base->range_ht, instr, 1, + 0 /* unused num_components */, + NULL /* unused swizzle */)) { + minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN; + } else if (is_a_number(bld_base->range_ht, instr, 0, + 0 /* unused num_components */, + NULL /* unused swizzle */)) { + first = 1; + minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN; + } else { + minmax_nan = GALLIVM_NAN_RETURN_OTHER; + } + + if (instr->op == nir_op_fmin) { + result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]), + src[first], src[1 - first], minmax_nan); + } else { + result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]), + src[first], src[1 - first], minmax_nan); + } break; + } case nir_op_fmod: { struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]); result = lp_build_div(flt_bld, src[0], src[1]); @@ -692,9 +782,6 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]); break; - case nir_op_fmax: - result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan); - break; case nir_op_fneu32: result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src); break; @@ -702,7 +789,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fpow: - result = lp_build_pow(&bld_base->base, src[0], src[1]); + result = lp_build_pow(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]); break; case nir_op_fquantize2f16: result = do_quantize_to_f16(bld_base, src[0]); @@ -711,7 +798,13 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fround_even: - result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + if (src_bit_size[0] == 16) { + struct lp_build_context *bld = get_flt_bld(bld_base, 16); + char intrinsic[64]; + lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type); + result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]); + } else + result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_frsq: result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]); @@ -723,7 +816,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fsin: - result = lp_build_sin(&bld_base->base, src[0]); + result = lp_build_sin(get_flt_bld(bld_base, src_bit_size[0]), src[0]); break; case nir_op_fsqrt: result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]); @@ -734,6 +827,10 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, case nir_op_i2b32: result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]); break; + case nir_op_i2f16: + result = LLVMBuildSIToFP(builder, src[0], + bld_base->half_bld.vec_type, ""); + break; case nir_op_i2f32: result = lp_build_int_to_float(&bld_base->base, src[0]); break; @@ -874,6 +971,10 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, ""); break; } + case nir_op_u2f16: + result = LLVMBuildUIToFP(builder, src[0], + bld_base->half_bld.vec_type, ""); + break; case nir_op_u2f32: result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, ""); break; @@ -970,14 +1071,14 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr case nir_op_unpack_half_2x16: src_components = 1; break; - case nir_op_cube_face_coord: - case nir_op_cube_face_index: + case nir_op_cube_face_coord_amd: + case nir_op_cube_face_index_amd: src_components = 3; break; case nir_op_fsum2: case nir_op_fsum3: case nir_op_fsum4: - src_components = nir_src_num_components(instr->src[0].src); + src_components = nir_op_infos[instr->op].input_sizes[0]; break; default: src_components = num_components; @@ -994,7 +1095,7 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]); } } else if (instr->op == nir_op_fsum4 || instr->op == nir_op_fsum3 || instr->op == nir_op_fsum2) { - for (unsigned c = 0; c < nir_src_num_components(instr->src[0].src); c++) { + for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) { LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder, src[0], c, ""); temp_chan = cast_type(bld_base, temp_chan, nir_op_infos[instr->op].input_types[0], src_bit_size[0]); @@ -1012,7 +1113,7 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr src_chan[i] = src[i]; src_chan[i] = cast_type(bld_base, src_chan[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]); } - result[c] = do_alu_action(bld_base, instr->op, src_bit_size, src_chan); + result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan); result[c] = cast_type(bld_base, result[c], nir_op_infos[instr->op].output_type, nir_dest_bit_size(instr->dest.dest)); } } @@ -1026,6 +1127,7 @@ static void visit_load_const(struct lp_build_nir_context *bld_base, struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size); for (unsigned i = 0; i < instr->def.num_components; i++) result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64); + memset(&result[instr->def.num_components], 0, NIR_MAX_VEC_COMPONENTS - instr->def.num_components); assign_ssa_dest(bld_base, &instr->def, result); } @@ -1240,7 +1342,7 @@ static void visit_load_ssbo(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { - LLVMValueRef idx = get_src(bld_base, instr->src[0]); + LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32); LLVMValueRef offset = get_src(bld_base, instr->src[1]); bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), idx, offset, result); @@ -1250,7 +1352,7 @@ static void visit_store_ssbo(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr) { LLVMValueRef val = get_src(bld_base, instr->src[0]); - LLVMValueRef idx = get_src(bld_base, instr->src[1]); + LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_uint, 32); LLVMValueRef offset = get_src(bld_base, instr->src[2]); int writemask = instr->const_index[0]; int nc = nir_src_num_components(instr->src[0]); @@ -1262,7 +1364,7 @@ static void visit_get_ssbo_size(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { - LLVMValueRef idx = get_src(bld_base, instr->src[0]); + LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32); result[0] = bld_base->get_ssbo_size(bld_base, idx); } @@ -1270,7 +1372,7 @@ static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { - LLVMValueRef idx = get_src(bld_base, instr->src[0]); + LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32); LLVMValueRef offset = get_src(bld_base, instr->src[1]); LLVMValueRef val = get_src(bld_base, instr->src[2]); LLVMValueRef val2 = NULL; @@ -1662,13 +1764,14 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base, case nir_intrinsic_load_base_instance: case nir_intrinsic_load_base_vertex: case nir_intrinsic_load_first_vertex: - case nir_intrinsic_load_work_group_id: + case nir_intrinsic_load_workgroup_id: case nir_intrinsic_load_local_invocation_id: - case nir_intrinsic_load_num_work_groups: + case nir_intrinsic_load_local_invocation_index: + case nir_intrinsic_load_num_workgroups: case nir_intrinsic_load_invocation_id: case nir_intrinsic_load_front_face: case nir_intrinsic_load_draw_id: - case nir_intrinsic_load_local_group_size: + case nir_intrinsic_load_workgroup_size: case nir_intrinsic_load_work_dim: case nir_intrinsic_load_tess_coord: case nir_intrinsic_load_tess_level_outer: @@ -1888,7 +1991,7 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst LLVMBuilderRef builder = gallivm->builder; LLVMValueRef coords[5]; LLVMValueRef offsets[3] = { NULL }; - LLVMValueRef explicit_lod = NULL, projector = NULL, ms_index = NULL; + LLVMValueRef explicit_lod = NULL, ms_index = NULL; struct lp_sampler_params params; struct lp_derivatives derivs; unsigned sample_key = 0; @@ -1935,9 +2038,6 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst case nir_tex_src_sampler_deref: sampler_deref_instr = nir_src_as_deref(instr->src[i].src); break; - case nir_tex_src_projector: - projector = lp_build_rcp(&bld_base->base, cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32)); - break; case nir_tex_src_comparator: sample_key |= LP_SAMPLER_SHADOW; coords[4] = get_src(bld_base, instr->src[i].src); @@ -2038,13 +2138,6 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst coords[1] = coord_undef; } - if (projector) { - for (unsigned chan = 0; chan < instr->coord_components; ++chan) - coords[chan] = lp_build_mul(&bld_base->base, coords[chan], projector); - if (sample_key & LP_SAMPLER_SHADOW) - coords[4] = lp_build_mul(&bld_base->base, coords[4], projector); - } - uint32_t samp_base_index = 0, tex_base_index = 0; if (!sampler_deref_instr) { int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle); @@ -2081,8 +2174,38 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst params.texel = texel; params.lod = explicit_lod; params.ms_index = ms_index; + params.aniso_filter_table = bld_base->aniso_filter_table; bld_base->tex(bld_base, ¶ms); + + if (nir_dest_bit_size(instr->dest) != 32) { + assert(nir_dest_bit_size(instr->dest) == 16); + LLVMTypeRef vec_type = NULL; + bool is_float = false; + switch (nir_alu_type_get_base_type(instr->dest_type)) { + case nir_type_float: + is_float = true; + break; + case nir_type_int: + vec_type = bld_base->int16_bld.vec_type; + break; + case nir_type_uint: + vec_type = bld_base->uint16_bld.vec_type; + break; + default: + unreachable("unexpected alu type"); + } + for (int i = 0; i < nir_dest_num_components(instr->dest); ++i) { + if (is_float) { + texel[i] = lp_build_float_to_half(gallivm, texel[i]); + } else { + texel[i] = LLVMBuildBitCast(builder, texel[i], bld_base->int_bld.vec_type, ""); + texel[i] = LLVMBuildTrunc(builder, texel[i], vec_type, ""); + } + } + } + assign_dest(bld_base, &instr->dest, texel); + } static void visit_ssa_undef(struct lp_build_nir_context *bld_base, @@ -2093,6 +2216,7 @@ static void visit_ssa_undef(struct lp_build_nir_context *bld_base, struct lp_build_context *undef_bld = get_int_bld(bld_base, true, instr->def.bit_size); for (unsigned i = 0; i < num_components; i++) undef[i] = LLVMGetUndef(undef_bld->vec_type); + memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components); assign_ssa_dest(bld_base, &instr->def, undef); } @@ -2279,6 +2403,7 @@ bool lp_build_nir_llvm( _mesa_key_pointer_equal); bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + bld_base->range_ht = _mesa_pointer_hash_table_create(NULL); func = (struct nir_function *)exec_list_get_head(&nir->functions); @@ -2295,6 +2420,7 @@ bool lp_build_nir_llvm( free(bld_base->ssa_defs); ralloc_free(bld_base->vars); ralloc_free(bld_base->regs); + ralloc_free(bld_base->range_ht); return true; } @@ -2305,6 +2431,7 @@ void lp_build_opt_nir(struct nir_shader *nir) static const struct nir_lower_tex_options lower_tex_options = { .lower_tg4_offsets = true, + .lower_txp = ~0u, }; NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options); NIR_PASS_V(nir, nir_lower_frexp); @@ -2313,21 +2440,36 @@ void lp_build_opt_nir(struct nir_shader *nir) NIR_PASS_V(nir, nir_lower_fp16_casts); do { progress = false; - NIR_PASS_V(nir, nir_opt_constant_folding); - NIR_PASS_V(nir, nir_opt_algebraic); - NIR_PASS_V(nir, nir_lower_pack); + NIR_PASS(progress, nir, nir_opt_constant_folding); + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_lower_pack); - nir_lower_tex_options options = { .lower_tex_without_implicit_lod = true }; + nir_lower_tex_options options = { 0, }; NIR_PASS_V(nir, nir_lower_tex, &options); const nir_lower_subgroups_options subgroups_options = { .subgroup_size = lp_native_vector_width / 32, .ballot_bit_size = 32, + .ballot_components = 1, .lower_to_scalar = true, .lower_subgroup_masks = true, }; NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options); } while (progress); - nir_lower_bool_to_int32(nir); + + do { + progress = false; + NIR_PASS(progress, nir, nir_opt_algebraic_late); + if (progress) { + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_dce); + NIR_PASS_V(nir, nir_opt_cse); + } + } while (progress); + + if (nir_lower_bool_to_int32(nir)) { + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_dce); + } } |