Import Mesa 21.3.7

author: Jonathan Gray <jsg@cvs.openbsd.org> 2022-02-24 01:57:18 +0000
committer: Jonathan Gray <jsg@cvs.openbsd.org> 2022-02-24 01:57:18 +0000
commit: b24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (patch)
tree: 658ca4e6b41655f49463c85edbaeda48979c394c /lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
parent: 57768bbb154c2879d34ec20e401b19472e77aaf7 (diff)
1 files changed, 196 insertions, 54 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
index 38afac47d..1ce4be0ec 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
@@ -27,14 +27,17 @@
 #include "lp_bld_arit.h"
 #include "lp_bld_bitarit.h"
 #include "lp_bld_const.h"
+#include "lp_bld_conv.h"
 #include "lp_bld_gather.h"
 #include "lp_bld_logic.h"
 #include "lp_bld_quad.h"
 #include "lp_bld_flow.h"
+#include "lp_bld_intr.h"
 #include "lp_bld_struct.h"
 #include "lp_bld_debug.h"
 #include "lp_bld_printf.h"
 #include "nir_deref.h"
+#include "nir_search_helpers.h"
 
 static void visit_cf_list(struct lp_build_nir_context *bld_base,
                           struct exec_list *list);
@@ -47,7 +50,7 @@ static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRe
    case nir_type_float:
       switch (bit_size) {
       case 16:
-         return LLVMBuildBitCast(builder, val, LLVMVectorType(LLVMHalfTypeInContext(bld_base->base.gallivm->context), bld_base->base.type.length), "");
+         return LLVMBuildBitCast(builder, val, bld_base->half_bld.vec_type, "");
       case 32:
          return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, "");
       case 64:
@@ -222,6 +225,8 @@ static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base,
    LLVMValueRef result = lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
    if (src_bit_size == 64)
       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
+   if (src_bit_size == 16)
+      result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
    return result;
 }
 
@@ -240,6 +245,8 @@ static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base,
       result = lp_build_cmp(flt_bld, compare, src[0], src[1]);
    if (src_bit_size == 64)
       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
+   else if (src_bit_size == 16)
+      result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
    return result;
 }
 
@@ -306,6 +313,9 @@ static LLVMValueRef emit_b2f(struct lp_build_nir_context *bld_base,
                                       "");
    result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, "");
    switch (bitsize) {
+   case 16:
+      result = LLVMBuildFPTrunc(builder, result, bld_base->half_bld.vec_type, "");
+      break;
    case 32:
       break;
    case 64:
@@ -447,6 +457,43 @@ merge_16bit(struct lp_build_nir_context *bld_base,
    return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
 }
 
+static LLVMValueRef get_signed_divisor(struct gallivm_state *gallivm,
+                                       struct lp_build_context *int_bld,
+                                       struct lp_build_context *mask_bld,
+                                       int src_bit_size,
+                                       LLVMValueRef src, LLVMValueRef divisor)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   /* However for signed divides SIGFPE can occur if the numerator is INT_MIN
+      and divisor is -1. */
+   /* set mask if numerator == INT_MIN */
+   long long min_val;
+   switch (src_bit_size) {
+   case 8:
+      min_val = INT8_MIN;
+      break;
+   case 16:
+      min_val = INT16_MIN;
+      break;
+   default:
+   case 32:
+      min_val = INT_MIN;
+      break;
+   case 64:
+      min_val = INT64_MIN;
+      break;
+   }
+   LLVMValueRef div_mask2 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src,
+                                         lp_build_const_int_vec(gallivm, int_bld->type, min_val));
+   /* set another mask if divisor is - 1 */
+   LLVMValueRef div_mask3 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, divisor,
+                                         lp_build_const_int_vec(gallivm, int_bld->type, -1));
+   div_mask2 = LLVMBuildAnd(builder, div_mask2, div_mask3, "");
+
+   divisor = lp_build_select(mask_bld, div_mask2, int_bld->one, divisor);
+   return divisor;
+}
+
 static LLVMValueRef
 do_int_divide(struct lp_build_nir_context *bld_base,
               bool is_unsigned, unsigned src_bit_size,
@@ -456,16 +503,16 @@ do_int_divide(struct lp_build_nir_context *bld_base,
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
    struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
+
+   /* avoid divide by 0. Converted divisor from 0 to -1 */
    LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
                                         mask_bld->zero);
 
+   LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, "");
    if (!is_unsigned) {
-      /* INT_MIN (0x80000000) / -1 (0xffffffff) causes sigfpe, seen with blender. */
-      div_mask = LLVMBuildAnd(builder, div_mask, lp_build_const_int_vec(gallivm, int_bld->type, 0x7fffffff), "");
+      divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
+                                   src_bit_size, src, divisor);
    }
-   LLVMValueRef divisor = LLVMBuildOr(builder,
-                                      div_mask,
-                                      src2, "");
    LLVMValueRef result = lp_build_div(int_bld, src, divisor);
 
    if (!is_unsigned) {
@@ -485,11 +532,16 @@ do_int_mod(struct lp_build_nir_context *bld_base,
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
-   LLVMValueRef div_mask = lp_build_cmp(int_bld, PIPE_FUNC_EQUAL, src2,
-                                        int_bld->zero);
+   struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
+   LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
+                                        mask_bld->zero);
    LLVMValueRef divisor = LLVMBuildOr(builder,
                                       div_mask,
                                       src2, "");
+   if (!is_unsigned) {
+      divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
+                                   src_bit_size, src, divisor);
+   }
    LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
    return LLVMBuildOr(builder, div_mask, result, "");
 }
@@ -502,7 +554,7 @@ do_quantize_to_f16(struct lp_build_nir_context *bld_base,
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef result, cond, cond2, temp;
 
-   result = LLVMBuildFPTrunc(builder, src, LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
+   result = LLVMBuildFPTrunc(builder, src, bld_base->half_bld.vec_type, "");
    result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
 
    temp = lp_build_abs(get_flt_bld(bld_base, 32), result);
@@ -516,13 +568,18 @@ do_quantize_to_f16(struct lp_build_nir_context *bld_base,
 }
 
 static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
-                                  nir_op op, unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
+                                  const nir_alu_instr *instr,
+                                  unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
+                                  LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
 {
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef result;
-   enum gallivm_nan_behavior minmax_nan = bld_base->shader->info.stage == MESA_SHADER_KERNEL ? GALLIVM_NAN_RETURN_OTHER : GALLIVM_NAN_BEHAVIOR_UNDEFINED;
-   switch (op) {
+
+   switch (instr->op) {
+   case nir_op_b2f16:
+      result = emit_b2f(bld_base, src[0], 16);
+      break;
    case nir_op_b2f32:
       result = emit_b2f(bld_base, src[0], 32);
       break;
@@ -546,6 +603,10 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       break;
    case nir_op_bit_count:
       result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
+      if (src_bit_size[0] < 32)
+         result = LLVMBuildZExt(builder, result, bld_base->int_bld.vec_type, "");
+      else if (src_bit_size[0] > 32)
+         result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
       break;
    case nir_op_bitfield_select:
       result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2])));
@@ -561,7 +622,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
          src[0] = LLVMBuildFPTrunc(builder, src[0],
                                    bld_base->base.vec_type, "");
       result = LLVMBuildFPTrunc(builder, src[0],
-                                LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
+                                bld_base->half_bld.vec_type, "");
       break;
    case nir_op_f2f32:
       if (src_bit_size[0] < 32)
@@ -624,17 +685,17 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_fcos:
-      result = lp_build_cos(&bld_base->base, src[0]);
+      result = lp_build_cos(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_fddx:
    case nir_op_fddx_coarse:
    case nir_op_fddx_fine:
-      result = lp_build_ddx(&bld_base->base, src[0]);
+      result = lp_build_ddx(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_fddy:
    case nir_op_fddy_coarse:
    case nir_op_fddy_fine:
-      result = lp_build_ddy(&bld_base->base, src[0]);
+      result = lp_build_ddy(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_fdiv:
       result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]),
@@ -644,7 +705,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src);
       break;
    case nir_op_fexp2:
-      result = lp_build_exp2(&bld_base->base, src[0]);
+      result = lp_build_exp2(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_ffloor:
       result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
@@ -670,16 +731,45 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
          result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
       break;
    }
+   case nir_op_fisfinite32:
+      unreachable("Should have been lowered in nir_opt_algebraic_late.");
    case nir_op_flog2:
-      result = lp_build_log2_safe(&bld_base->base, src[0]);
+      result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_flt:
    case nir_op_flt32:
       result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
       break;
-   case nir_op_fmin:
-      result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan);
+   case nir_op_fmax:
+   case nir_op_fmin: {
+      enum gallivm_nan_behavior minmax_nan;
+      int first = 0;
+
+      /* If one of the sources is known to be a number (i.e., not NaN), then
+       * better code can be generated by passing that information along.
+       */
+      if (is_a_number(bld_base->range_ht, instr, 1,
+                      0 /* unused num_components */,
+                      NULL /* unused swizzle */)) {
+         minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
+      } else if (is_a_number(bld_base->range_ht, instr, 0,
+                             0 /* unused num_components */,
+                             NULL /* unused swizzle */)) {
+         first = 1;
+         minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
+      } else {
+         minmax_nan = GALLIVM_NAN_RETURN_OTHER;
+      }
+
+      if (instr->op == nir_op_fmin) {
+         result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]),
+                                   src[first], src[1 - first], minmax_nan);
+      } else {
+         result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]),
+                                   src[first], src[1 - first], minmax_nan);
+      }
       break;
+   }
    case nir_op_fmod: {
       struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
       result = lp_build_div(flt_bld, src[0], src[1]);
@@ -692,9 +782,6 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
                             src[0], src[1]);
       break;
-   case nir_op_fmax:
-      result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan);
-      break;
    case nir_op_fneu32:
       result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
       break;
@@ -702,7 +789,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_fpow:
-      result = lp_build_pow(&bld_base->base, src[0], src[1]);
+      result = lp_build_pow(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]);
       break;
    case nir_op_fquantize2f16:
       result = do_quantize_to_f16(bld_base, src[0]);
@@ -711,7 +798,13 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_fround_even:
-      result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
+      if (src_bit_size[0] == 16) {
+	 struct lp_build_context *bld = get_flt_bld(bld_base, 16);
+	 char intrinsic[64];
+	 lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type);
+	 result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]);
+      } else
+	 result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_frsq:
       result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
@@ -723,7 +816,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_fsin:
-      result = lp_build_sin(&bld_base->base, src[0]);
+      result = lp_build_sin(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
    case nir_op_fsqrt:
       result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
@@ -734,6 +827,10 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
    case nir_op_i2b32:
       result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
       break;
+   case nir_op_i2f16:
+      result = LLVMBuildSIToFP(builder, src[0],
+                               bld_base->half_bld.vec_type, "");
+      break;
    case nir_op_i2f32:
       result = lp_build_int_to_float(&bld_base->base, src[0]);
       break;
@@ -874,6 +971,10 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, "");
       break;
    }
+   case nir_op_u2f16:
+      result = LLVMBuildUIToFP(builder, src[0],
+                               bld_base->half_bld.vec_type, "");
+      break;
    case nir_op_u2f32:
       result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, "");
       break;
@@ -970,14 +1071,14 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr
    case nir_op_unpack_half_2x16:
       src_components = 1;
       break;
-   case nir_op_cube_face_coord:
-   case nir_op_cube_face_index:
+   case nir_op_cube_face_coord_amd:
+   case nir_op_cube_face_index_amd:
       src_components = 3;
       break;
    case nir_op_fsum2:
    case nir_op_fsum3:
    case nir_op_fsum4:
-      src_components = nir_src_num_components(instr->src[0].src);
+      src_components = nir_op_infos[instr->op].input_sizes[0];
       break;
    default:
       src_components = num_components;
@@ -994,7 +1095,7 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr
          result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
       }
    } else if (instr->op == nir_op_fsum4 || instr->op == nir_op_fsum3 || instr->op == nir_op_fsum2) {
-      for (unsigned c = 0; c < nir_src_num_components(instr->src[0].src); c++) {
+      for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) {
          LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder,
                                                           src[0], c, "");
          temp_chan = cast_type(bld_base, temp_chan, nir_op_infos[instr->op].input_types[0], src_bit_size[0]);
@@ -1012,7 +1113,7 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr
                src_chan[i] = src[i];
             src_chan[i] = cast_type(bld_base, src_chan[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
          }
-         result[c] = do_alu_action(bld_base, instr->op, src_bit_size, src_chan);
+         result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan);
          result[c] = cast_type(bld_base, result[c], nir_op_infos[instr->op].output_type, nir_dest_bit_size(instr->dest.dest));
       }
    }
@@ -1026,6 +1127,7 @@ static void visit_load_const(struct lp_build_nir_context *bld_base,
    struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
    for (unsigned i = 0; i < instr->def.num_components; i++)
       result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
+   memset(&result[instr->def.num_components], 0, NIR_MAX_VEC_COMPONENTS - instr->def.num_components);
    assign_ssa_dest(bld_base, &instr->def, result);
 }
 
@@ -1240,7 +1342,7 @@ static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
                            nir_intrinsic_instr *instr,
                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
-   LLVMValueRef idx = get_src(bld_base, instr->src[0]);
+   LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
    bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
                        idx, offset, result);
@@ -1250,7 +1352,7 @@ static void visit_store_ssbo(struct lp_build_nir_context *bld_base,
                              nir_intrinsic_instr *instr)
 {
    LLVMValueRef val = get_src(bld_base, instr->src[0]);
-   LLVMValueRef idx = get_src(bld_base, instr->src[1]);
+   LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_uint, 32);
    LLVMValueRef offset = get_src(bld_base, instr->src[2]);
    int writemask = instr->const_index[0];
    int nc = nir_src_num_components(instr->src[0]);
@@ -1262,7 +1364,7 @@ static void visit_get_ssbo_size(struct lp_build_nir_context *bld_base,
                                 nir_intrinsic_instr *instr,
                                 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
-   LLVMValueRef idx = get_src(bld_base, instr->src[0]);
+   LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
    result[0] = bld_base->get_ssbo_size(bld_base, idx);
 }
 
@@ -1270,7 +1372,7 @@ static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
                               nir_intrinsic_instr *instr,
                               LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
-   LLVMValueRef idx = get_src(bld_base, instr->src[0]);
+   LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32);
    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
    LLVMValueRef val = get_src(bld_base, instr->src[2]);
    LLVMValueRef val2 = NULL;
@@ -1662,13 +1764,14 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base,
    case nir_intrinsic_load_base_instance:
    case nir_intrinsic_load_base_vertex:
    case nir_intrinsic_load_first_vertex:
-   case nir_intrinsic_load_work_group_id:
+   case nir_intrinsic_load_workgroup_id:
    case nir_intrinsic_load_local_invocation_id:
-   case nir_intrinsic_load_num_work_groups:
+   case nir_intrinsic_load_local_invocation_index:
+   case nir_intrinsic_load_num_workgroups:
    case nir_intrinsic_load_invocation_id:
    case nir_intrinsic_load_front_face:
    case nir_intrinsic_load_draw_id:
-   case nir_intrinsic_load_local_group_size:
+   case nir_intrinsic_load_workgroup_size:
    case nir_intrinsic_load_work_dim:
    case nir_intrinsic_load_tess_coord:
    case nir_intrinsic_load_tess_level_outer:
@@ -1888,7 +1991,7 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef coords[5];
    LLVMValueRef offsets[3] = { NULL };
-   LLVMValueRef explicit_lod = NULL, projector = NULL, ms_index = NULL;
+   LLVMValueRef explicit_lod = NULL, ms_index = NULL;
    struct lp_sampler_params params;
    struct lp_derivatives derivs;
    unsigned sample_key = 0;
@@ -1935,9 +2038,6 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
       case nir_tex_src_sampler_deref:
          sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
          break;
-      case nir_tex_src_projector:
-         projector = lp_build_rcp(&bld_base->base, cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32));
-         break;
       case nir_tex_src_comparator:
          sample_key |= LP_SAMPLER_SHADOW;
          coords[4] = get_src(bld_base, instr->src[i].src);
@@ -2038,13 +2138,6 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
       coords[1] = coord_undef;
    }
 
-   if (projector) {
-      for (unsigned chan = 0; chan < instr->coord_components; ++chan)
-         coords[chan] = lp_build_mul(&bld_base->base, coords[chan], projector);
-      if (sample_key & LP_SAMPLER_SHADOW)
-         coords[4] = lp_build_mul(&bld_base->base, coords[4], projector);
-   }
-
    uint32_t samp_base_index = 0, tex_base_index = 0;
    if (!sampler_deref_instr) {
       int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
@@ -2081,8 +2174,38 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
    params.texel = texel;
    params.lod = explicit_lod;
    params.ms_index = ms_index;
+   params.aniso_filter_table = bld_base->aniso_filter_table;
    bld_base->tex(bld_base, &params);
+
+   if (nir_dest_bit_size(instr->dest) != 32) {
+      assert(nir_dest_bit_size(instr->dest) == 16);
+      LLVMTypeRef vec_type = NULL;
+      bool is_float = false;
+      switch (nir_alu_type_get_base_type(instr->dest_type)) {
+      case nir_type_float:
+         is_float = true;
+	 break;
+      case nir_type_int:
+         vec_type = bld_base->int16_bld.vec_type;
+         break;
+      case nir_type_uint:
+         vec_type = bld_base->uint16_bld.vec_type;
+         break;
+      default:
+         unreachable("unexpected alu type");
+      }
+      for (int i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+         if (is_float) {
+            texel[i] = lp_build_float_to_half(gallivm, texel[i]);
+         } else {
+            texel[i] = LLVMBuildBitCast(builder, texel[i], bld_base->int_bld.vec_type, "");
+            texel[i] = LLVMBuildTrunc(builder, texel[i], vec_type, "");
+         }
+      }
+   }
+
    assign_dest(bld_base, &instr->dest, texel);
+
 }
 
 static void visit_ssa_undef(struct lp_build_nir_context *bld_base,
@@ -2093,6 +2216,7 @@ static void visit_ssa_undef(struct lp_build_nir_context *bld_base,
    struct lp_build_context *undef_bld = get_int_bld(bld_base, true, instr->def.bit_size);
    for (unsigned i = 0; i < num_components; i++)
       undef[i] = LLVMGetUndef(undef_bld->vec_type);
+   memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components);
    assign_ssa_dest(bld_base, &instr->def, undef);
 }
 
@@ -2279,6 +2403,7 @@ bool lp_build_nir_llvm(
                                             _mesa_key_pointer_equal);
    bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                             _mesa_key_pointer_equal);
+   bld_base->range_ht = _mesa_pointer_hash_table_create(NULL);
 
    func = (struct nir_function *)exec_list_get_head(&nir->functions);
 
@@ -2295,6 +2420,7 @@ bool lp_build_nir_llvm(
    free(bld_base->ssa_defs);
    ralloc_free(bld_base->vars);
    ralloc_free(bld_base->regs);
+   ralloc_free(bld_base->range_ht);
    return true;
 }
 
@@ -2305,6 +2431,7 @@ void lp_build_opt_nir(struct nir_shader *nir)
 
    static const struct nir_lower_tex_options lower_tex_options = {
       .lower_tg4_offsets = true,
+      .lower_txp = ~0u,
    };
    NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
    NIR_PASS_V(nir, nir_lower_frexp);
@@ -2313,21 +2440,36 @@ void lp_build_opt_nir(struct nir_shader *nir)
    NIR_PASS_V(nir, nir_lower_fp16_casts);
    do {
       progress = false;
-      NIR_PASS_V(nir, nir_opt_constant_folding);
-      NIR_PASS_V(nir, nir_opt_algebraic);
-      NIR_PASS_V(nir, nir_lower_pack);
+      NIR_PASS(progress, nir, nir_opt_constant_folding);
+      NIR_PASS(progress, nir, nir_opt_algebraic);
+      NIR_PASS(progress, nir, nir_lower_pack);
 
-      nir_lower_tex_options options = { .lower_tex_without_implicit_lod = true };
+      nir_lower_tex_options options = { 0, };
       NIR_PASS_V(nir, nir_lower_tex, &options);
 
       const nir_lower_subgroups_options subgroups_options = {
 	.subgroup_size = lp_native_vector_width / 32,
 	.ballot_bit_size = 32,
+        .ballot_components = 1,
 	.lower_to_scalar = true,
 	.lower_subgroup_masks = true,
       };
       NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options);
 
    } while (progress);
-   nir_lower_bool_to_int32(nir);
+
+   do {
+      progress = false;
+      NIR_PASS(progress, nir, nir_opt_algebraic_late);
+      if (progress) {
+         NIR_PASS_V(nir, nir_copy_prop);
+         NIR_PASS_V(nir, nir_opt_dce);
+         NIR_PASS_V(nir, nir_opt_cse);
+      }
+   } while (progress);
+
+   if (nir_lower_bool_to_int32(nir)) {
+      NIR_PASS_V(nir, nir_copy_prop);
+      NIR_PASS_V(nir, nir_opt_dce);
+   }
 }
author	Jonathan Gray <jsg@cvs.openbsd.org>	2022-02-24 01:57:18 +0000
committer	Jonathan Gray <jsg@cvs.openbsd.org>	2022-02-24 01:57:18 +0000
commit	b24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (patch)
tree	658ca4e6b41655f49463c85edbaeda48979c394c /lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
parent	57768bbb154c2879d34ec20e401b19472e77aaf7 (diff)