diff options
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c')
-rw-r--r-- | lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c | 653 |
1 files changed, 285 insertions, 368 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c index 203572010..2f82d9dee 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c @@ -33,8 +33,6 @@ #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_arit_overflow.h" -#include "gallivm/lp_bld_bitarit.h" -#include "gallivm/lp_bld_gather.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_swizzle.h" @@ -63,7 +61,8 @@ static void -draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var, + boolean elts); struct draw_gs_llvm_iface { @@ -352,9 +351,9 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm, LLVMTypeRef elem_types[4]; LLVMTypeRef vb_type; - elem_types[0] = LLVMInt16TypeInContext(gallivm->context); - elem_types[1] = LLVMInt8TypeInContext(gallivm->context); - elem_types[2] = LLVMInt32TypeInContext(gallivm->context); + elem_types[0] = + elem_types[1] = LLVMInt32TypeInContext(gallivm->context); + elem_types[2] = elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, @@ -363,12 +362,8 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm, (void) target; /* silence unused var warning for non-debug build */ LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, target, vb_type, 0); - LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, is_user_buffer, - target, vb_type, 1); LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, - target, vb_type, 2); - LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer.resource, - target, vb_type, 3); + target, vb_type, 1); LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type); @@ -580,13 +575,17 @@ draw_llvm_create_variant(struct draw_llvm *llvm, variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); - draw_llvm_generate(llvm, variant); + draw_llvm_generate(llvm, variant, FALSE); /* linear */ + draw_llvm_generate(llvm, variant, TRUE); /* elts */ gallivm_compile_module(variant->gallivm); variant->jit_func = (draw_jit_vert_func) gallivm_jit_function(variant->gallivm, variant->function); + variant->jit_func_elts = (draw_jit_vert_func_elts) + gallivm_jit_function(variant->gallivm, variant->function_elts); + gallivm_free_ir(variant->gallivm); variant->list_item_global.base = variant; @@ -657,134 +656,101 @@ generate_vs(struct draw_llvm_variant *variant, } } - static void -fetch_instanced(struct gallivm_state *gallivm, - const struct util_format_description *format_desc, - struct lp_type vs_type, - LLVMValueRef vb_stride, - LLVMValueRef map_ptr, - LLVMValueRef buffer_size_adj, - LLVMValueRef *inputs, - LLVMValueRef index) +generate_fetch(struct gallivm_state *gallivm, + struct draw_context *draw, + const struct util_format_description *format_desc, + LLVMValueRef vb_stride, + LLVMValueRef stride_fixed, + LLVMValueRef map_ptr, + LLVMValueRef buffer_size_adj, + LLVMValueRef ofbit, + LLVMValueRef *res, + LLVMValueRef index) { - LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context); - LLVMTypeRef aosf_t, aosi_t; - LLVMValueRef zero = LLVMConstNull(i32_t); + LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef stride, buffer_overflowed, aos, index_valid; - unsigned i; + LLVMValueRef stride; + LLVMValueRef buffer_overflowed; + LLVMValueRef temp_ptr = + lp_build_alloca(gallivm, + lp_build_vec_type(gallivm, lp_float32_vec4_type()), ""); + struct lp_build_if_state if_ctx; - aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type()); - aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type()); + if (format_desc->format == PIPE_FORMAT_NONE) { + *res = lp_build_const_vec(gallivm, lp_float32_vec4_type(), 0); + return; + } - /* This mul can overflow. Wraparound is ok. */ - stride = LLVMBuildMul(builder, vb_stride, index, ""); + stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit); + stride = lp_build_uadd_overflow(gallivm, stride, stride_fixed, &ofbit); - buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE, + buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGT, stride, buffer_size_adj, "buffer_overflowed"); + buffer_overflowed = LLVMBuildOr(builder, buffer_overflowed, ofbit, ""); if (0) { - lp_build_print_value(gallivm, " instance index = ", index); + lp_build_printf(gallivm, " stride = %u\n", stride); + lp_build_printf(gallivm, " buffer size adj = %u\n", buffer_size_adj); lp_build_print_value(gallivm, " buffer overflowed = ", buffer_overflowed); } - index_valid = LLVMBuildNot(builder, buffer_overflowed, ""); - index_valid = LLVMBuildSExt(builder, index_valid, i32_t, ""); - stride = LLVMBuildAnd(builder, stride, index_valid, ""); - - aos = lp_build_fetch_rgba_aos(gallivm, - format_desc, - lp_float32_vec4_type(), - FALSE, - map_ptr, - stride, zero, zero, - NULL); - - index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid); - aos = LLVMBuildBitCast(builder, aos, aosi_t, ""); - aos = LLVMBuildAnd(builder, aos, index_valid, ""); - aos = LLVMBuildBitCast(builder, aos, aosf_t, ""); - - for (i = 0; i < TGSI_NUM_CHANNELS; i++) { - LLVMValueRef index = lp_build_const_int32(gallivm, i); - inputs[i] = lp_build_extract_broadcast(gallivm, - lp_float32_vec4_type(), - vs_type, aos, index); + lp_build_if(&if_ctx, gallivm, buffer_overflowed); + { + LLVMValueRef val = + lp_build_const_vec(gallivm, lp_float32_vec4_type(), 0); + LLVMBuildStore(builder, val, temp_ptr); } -} + lp_build_else(&if_ctx); + { + LLVMValueRef val; + map_ptr = LLVMBuildGEP(builder, map_ptr, &stride, 1, ""); + + val = lp_build_fetch_rgba_aos(gallivm, + format_desc, + lp_float32_vec4_type(), + FALSE, + map_ptr, + zero, zero, zero, + NULL); + LLVMBuildStore(builder, val, temp_ptr); + } + lp_build_endif(&if_ctx); + *res = LLVMBuildLoad(builder, temp_ptr, "aos"); +} static void -fetch_vector(struct gallivm_state *gallivm, - const struct util_format_description *format_desc, - struct lp_type vs_type, - LLVMValueRef vb_stride, - LLVMValueRef map_ptr, - LLVMValueRef buffer_size_adj, - LLVMValueRef *inputs, - LLVMValueRef indices) +convert_to_soa(struct gallivm_state *gallivm, + LLVMValueRef src_aos[LP_MAX_VECTOR_WIDTH / 32], + LLVMValueRef (*dst_soa)[TGSI_NUM_CHANNELS], + unsigned attrib, const struct lp_type soa_type) { - LLVMBuilderRef builder = gallivm->builder; - struct lp_build_context blduivec; - struct lp_type fetch_type = vs_type; - LLVMValueRef offset, valid_mask; - unsigned i; - - lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type)); + unsigned j, k; + struct lp_type aos_channel_type = soa_type; - vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride); - buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj); + LLVMValueRef aos_channels[TGSI_NUM_CHANNELS]; + unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS; - /* This mul can overflow. Wraparound is ok. */ - offset = lp_build_mul(&blduivec, vb_stride, indices); + debug_assert(TGSI_NUM_CHANNELS == 4); + debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0); - valid_mask = lp_build_compare(gallivm, blduivec.type, - PIPE_FUNC_LESS, offset, buffer_size_adj); + aos_channel_type.length >>= 1; - /* not valid elements use offset 0 */ - offset = LLVMBuildAnd(builder, offset, valid_mask, ""); + for (j = 0; j < TGSI_NUM_CHANNELS; ++j) { + LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 }; - if (0) { - lp_build_print_value(gallivm, " indices = ", indices); - lp_build_print_value(gallivm, " offsets = ", offset); - lp_build_print_value(gallivm, " valid_mask = ", valid_mask); - } - - /* - * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches. - * This should always produce better code. - */ + assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH); - /* The type handling is annoying here... */ - if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && - format_desc->channel[0].pure_integer) { - if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { - fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length); - } - else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { - fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length); + for (k = 0; k < pixels_per_channel; ++k) { + channel[k] = src_aos[j + TGSI_NUM_CHANNELS * k]; } - } - lp_build_fetch_rgba_soa(gallivm, format_desc, - fetch_type, FALSE, map_ptr, offset, - blduivec.zero, blduivec.zero, - NULL, inputs); - - for (i = 0; i < TGSI_NUM_CHANNELS; i++) { - inputs[i] = LLVMBuildBitCast(builder, inputs[i], - lp_build_vec_type(gallivm, vs_type), ""); + aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel); } - /* out-of-bound fetches return all zeros */ - for (i = 0; i < TGSI_NUM_CHANNELS; i++) { - inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, ""); - inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, ""); - inputs[i] = LLVMBuildBitCast(builder, inputs[i], - lp_build_vec_type(gallivm, vs_type), ""); - } + lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa[attrib]); } @@ -1336,21 +1302,20 @@ generate_clipmask(struct draw_llvm *llvm, /** * Returns boolean if any clipping has occurred - * Used zero/one i8 value to represent boolean + * Used zero/non-zero i32 value to represent boolean */ static LLVMValueRef -clipmask_booli8(struct gallivm_state *gallivm, - const struct lp_type vs_type, - LLVMValueRef clipmask_bool_ptr, - boolean edgeflag_in_clipmask) +clipmask_booli32(struct gallivm_state *gallivm, + const struct lp_type vs_type, + LLVMValueRef clipmask_bool_ptr, + boolean edgeflag_in_clipmask) { LLVMBuilderRef builder = gallivm->builder; - LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context); + LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, ""); - LLVMValueRef ret; - struct lp_build_context bldivec; - - lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type)); + LLVMValueRef ret = LLVMConstNull(int32_type); + LLVMValueRef temp; + int i; /* * We need to invert the edgeflag bit from the clipmask here @@ -1358,20 +1323,19 @@ clipmask_booli8(struct gallivm_state *gallivm, * and we (may) need it if edgeflag was 0). */ if (edgeflag_in_clipmask) { - LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type, + struct lp_type i32_type = lp_int_type(vs_type); + LLVMValueRef edge = lp_build_const_int_vec(gallivm, i32_type, 1LL << DRAW_TOTAL_CLIP_PLANES); clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, ""); } - /* - * XXX: probably should mask off bits from the mask which come from - * vertices which were beyond the count (i.e. indices_valid for - * linear fetches, for elts ones we don't have the correct mask - * right now). Otherwise might run the pipeline for nothing, - * though everything should still work. + * Could do much better with just cmp/movmskps. */ - ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool); - ret = LLVMBuildZExt(builder, ret, int8_type, ""); + for (i=0; i < vs_type.length; i++) { + temp = LLVMBuildExtractElement(builder, clipmask_bool, + lp_build_const_int32(gallivm, i) , ""); + ret = LLVMBuildOr(builder, ret, temp, ""); + } return ret; } @@ -1518,38 +1482,43 @@ draw_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base, } static void -draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, + boolean elts) { struct gallivm_state *gallivm = variant->gallivm; LLVMContextRef context = gallivm->context; LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); LLVMTypeRef arg_types[11]; - unsigned num_arg_types = ARRAY_SIZE(arg_types); + unsigned num_arg_types = + elts ? ARRAY_SIZE(arg_types) : ARRAY_SIZE(arg_types) - 1; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; char func_name[64]; struct lp_type vs_type; - LLVMValueRef count, fetch_elts, start_or_maxelt; + LLVMValueRef end, start; + LLVMValueRef count, fetch_elts, fetch_elt_max, fetch_count; LLVMValueRef vertex_id_offset, start_instance; LLVMValueRef stride, step, io_itr; - LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; - LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS]; - LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS]; - LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS]; - LLVMValueRef instance_index[PIPE_MAX_ATTRIBS]; - LLVMValueRef fake_buf_ptr, fake_buf; + LLVMValueRef zero = lp_build_const_int32(gallivm, 0); + LLVMValueRef one = lp_build_const_int32(gallivm, 1); + LLVMValueRef vb_stride[PIPE_MAX_SHADER_INPUTS]; + LLVMValueRef map_ptr[PIPE_MAX_SHADER_INPUTS]; + LLVMValueRef buffer_size_adj[PIPE_MAX_SHADER_INPUTS]; + LLVMValueRef stride_fixed[PIPE_MAX_SHADER_INPUTS]; + LLVMValueRef ofbit[PIPE_MAX_SHADER_INPUTS]; + LLVMValueRef instance_index[PIPE_MAX_SHADER_INPUTS]; struct draw_context *draw = llvm->draw; const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info; unsigned i, j; - struct lp_build_context bld, blduivec; + struct lp_build_context bld; struct lp_build_loop_state lp_loop; - struct lp_build_if_state if_ctx; const int vector_length = lp_native_vector_width / 32; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef fetch_max; struct lp_build_sampler_soa *sampler = 0; LLVMValueRef ret, clipmask_bool_ptr; struct draw_llvm_variant_key *key = &variant->key; @@ -1561,80 +1530,93 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) * the values). */ const boolean bypass_viewport = key->has_gs || key->bypass_viewport || - vs_info->writes_viewport_index; + llvm->draw->vs.vertex_shader->info.writes_viewport_index; const boolean enable_cliptest = !key->has_gs && (key->clip_xy || key->clip_z || key->clip_user || key->need_edgeflags); LLVMValueRef variant_func; - const unsigned pos = draw->vs.position_output; - const unsigned cv = draw->vs.clipvertex_output; + const unsigned pos = llvm->draw->vs.position_output; + const unsigned cv = llvm->draw->vs.clipvertex_output; boolean have_clipdist = FALSE; struct lp_bld_tgsi_system_values system_values; memset(&system_values, 0, sizeof(system_values)); - util_snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant%u", - variant->shader->variants_cached); + util_snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant%u_%s", + variant->shader->variants_cached, elts ? "elts" : "linear"); i = 0; arg_types[i++] = get_context_ptr_type(variant); /* context */ arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */ arg_types[i++] = get_buffer_ptr_type(variant); /* vbuffers */ - arg_types[i++] = int32_type; /* count */ - arg_types[i++] = int32_type; /* start/fetch_elt_max */ - arg_types[i++] = int32_type; /* stride */ - arg_types[i++] = get_vb_ptr_type(variant); /* pipe_vertex_buffer's */ - arg_types[i++] = int32_type; /* instance_id */ - arg_types[i++] = int32_type; /* vertex_id_offset */ - arg_types[i++] = int32_type; /* start_instance */ - arg_types[i++] = LLVMPointerType(int32_type, 0); /* fetch_elts */ - - func_type = LLVMFunctionType(LLVMInt8TypeInContext(context), - arg_types, num_arg_types, 0); + if (elts) { + arg_types[i++] = LLVMPointerType(int32_type, 0);/* fetch_elts */ + arg_types[i++] = int32_type; /* fetch_elt_max */ + } else + arg_types[i++] = int32_type; /* start */ + arg_types[i++] = int32_type; /* fetch_count / count */ + arg_types[i++] = int32_type; /* stride */ + arg_types[i++] = get_vb_ptr_type(variant); /* pipe_vertex_buffer's */ + arg_types[i++] = int32_type; /* instance_id */ + arg_types[i++] = int32_type; /* vertex_id_offset */ + arg_types[i++] = int32_type; /* start_instance */ + + func_type = LLVMFunctionType(int32_type, arg_types, num_arg_types, 0); variant_func = LLVMAddFunction(gallivm->module, func_name, func_type); - variant->function = variant_func; + + if (elts) + variant->function_elts = variant_func; + else + variant->function = variant_func; LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); for (i = 0; i < num_arg_types; ++i) if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS); + LLVMAddAttribute(LLVMGetParam(variant_func, i), + LLVMNoAliasAttribute); context_ptr = LLVMGetParam(variant_func, 0); io_ptr = LLVMGetParam(variant_func, 1); vbuffers_ptr = LLVMGetParam(variant_func, 2); - count = LLVMGetParam(variant_func, 3); - /* - * XXX: the maxelt part is unused. Not really useful, since we cannot - * get index buffer overflows due to vsplit (which provides its own - * elts buffer, with a different size than what's passed in here). - */ - start_or_maxelt = LLVMGetParam(variant_func, 4); /* * XXX: stride is actually unused. The stride we use is strictly calculated * from the number of outputs (including the draw_extra outputs). * Should probably fix some day (we need a new vs just because of extra * outputs which the generated vs won't touch). */ - stride = LLVMGetParam(variant_func, 5); - vb_ptr = LLVMGetParam(variant_func, 6); - system_values.instance_id = LLVMGetParam(variant_func, 7); - vertex_id_offset = LLVMGetParam(variant_func, 8); - start_instance = LLVMGetParam(variant_func, 9); - fetch_elts = LLVMGetParam(variant_func, 10); + stride = LLVMGetParam(variant_func, 5 + (elts ? 1 : 0)); + vb_ptr = LLVMGetParam(variant_func, 6 + (elts ? 1 : 0)); + system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0)); + vertex_id_offset = LLVMGetParam(variant_func, 8 + (elts ? 1 : 0)); + start_instance = LLVMGetParam(variant_func, 9 + (elts ? 1 : 0)); lp_build_name(context_ptr, "context"); lp_build_name(io_ptr, "io"); lp_build_name(vbuffers_ptr, "vbuffers"); - lp_build_name(count, "count"); - lp_build_name(start_or_maxelt, "start_or_maxelt"); lp_build_name(stride, "stride"); lp_build_name(vb_ptr, "vb"); lp_build_name(system_values.instance_id, "instance_id"); lp_build_name(vertex_id_offset, "vertex_id_offset"); lp_build_name(start_instance, "start_instance"); - lp_build_name(fetch_elts, "fetch_elts"); + + if (elts) { + fetch_elts = LLVMGetParam(variant_func, 3); + fetch_elt_max = LLVMGetParam(variant_func, 4); + fetch_count = LLVMGetParam(variant_func, 5); + lp_build_name(fetch_elts, "fetch_elts"); + lp_build_name(fetch_elt_max, "fetch_elt_max"); + lp_build_name(fetch_count, "fetch_count"); + start = count = NULL; + } + else { + start = LLVMGetParam(variant_func, 3); + count = LLVMGetParam(variant_func, 4); + lp_build_name(start, "start"); + lp_build_name(count, "count"); + fetch_elts = fetch_count = NULL; + } /* * Function body @@ -1644,6 +1626,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) builder = gallivm->builder; LLVMPositionBuilderAtEnd(builder, block); + lp_build_context_init(&bld, gallivm, lp_type_int(32)); + memset(&vs_type, 0, sizeof vs_type); vs_type.floating = TRUE; /* floating point values */ vs_type.sign = TRUE; /* values are signed */ @@ -1651,143 +1635,87 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) vs_type.width = 32; /* 32-bit float */ vs_type.length = vector_length; - lp_build_context_init(&bld, gallivm, lp_type_uint(32)); - lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type)); - /* hold temporary "bool" clipmask */ - clipmask_bool_ptr = lp_build_alloca(gallivm, blduivec.vec_type, ""); - - fake_buf = lp_build_alloca_undef(gallivm, - LLVMVectorType(LLVMInt64TypeInContext(context), 4), ""); - fake_buf = LLVMBuildBitCast(builder, fake_buf, - LLVMPointerType(LLVMInt8TypeInContext(context), 0), ""); - fake_buf_ptr = LLVMBuildGEP(builder, fake_buf, &bld.zero, 1, ""); + clipmask_bool_ptr = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, vs_type), ""); + LLVMBuildStore(builder, lp_build_zero(gallivm, lp_int_type(vs_type)), clipmask_bool_ptr); /* code generated texture sampling */ sampler = draw_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key)); - step = lp_build_const_int32(gallivm, vector_length); - - ind_vec = blduivec.undef; - for (i = 0; i < vs_type.length; i++) { - LLVMValueRef index = lp_build_const_int32(gallivm, i); - ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, ""); + if (elts) { + start = zero; + end = fetch_count; + count = fetch_count; + } + else { + end = lp_build_add(&bld, start, count); } - have_elts = LLVMBuildICmp(builder, LLVMIntNE, - LLVMConstPointerNull(arg_types[10]), fetch_elts, ""); + step = lp_build_const_int32(gallivm, vector_length); - fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max"); - fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max); - /* - * Only needed for non-indexed path. - */ - start_vec = lp_build_broadcast_scalar(&blduivec, start_or_maxelt); + fetch_max = LLVMBuildSub(builder, end, one, "fetch_max"); /* * Pre-calculate everything which is constant per shader invocation. */ - for (j = 0; j < key->nr_vertex_elements; ++j) { - LLVMValueRef vb_buffer_offset, buffer_size, temp_ptr; - LLVMValueRef vb_info, vbuffer_ptr, buf_offset, ofbit; - struct pipe_vertex_element *velem = &key->vertex_element[j]; + for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { + LLVMValueRef vb_buffer_offset, buffer_size; + LLVMValueRef vb_info, vbuffer_ptr; + struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; LLVMValueRef vb_index = lp_build_const_int32(gallivm, velem->vertex_buffer_index); LLVMValueRef bsize = lp_build_const_int32(gallivm, util_format_get_blocksize(velem->src_format)); LLVMValueRef src_offset = lp_build_const_int32(gallivm, velem->src_offset); - struct lp_build_if_state if_ctx; - - if (velem->src_format != PIPE_FORMAT_NONE) { - vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &vb_index, 1, ""); - vb_info = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); - vb_stride[j] = draw_jit_vbuffer_stride(gallivm, vb_info); - vb_stride[j] = LLVMBuildZExt(gallivm->builder, vb_stride[j], - LLVMInt32TypeInContext(context), ""); - vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vb_info); - map_ptr[j] = draw_jit_dvbuffer_map(gallivm, vbuffer_ptr); - buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr); - - ofbit = NULL; - /* - * We'll set buffer_size_adj to zero if we have of, so it will - * always overflow later automatically without having to keep ofbit. - * Overflows (with normal wraparound) doing the actual offset - * calculation should be ok, just not for the buffer size calc. - * It would also be possible to detect such overflows and return - * zeros if that happens, but this would be more complex. - */ - buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset); - tmp = lp_build_sub(&bld, bsize, bld.one); - buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp, - &ofbit); - buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j], - buf_offset, &ofbit); - - /* - * We can't easily set fake vertex buffers outside the generated code. - * Hence, set fake vertex buffers here instead basically, so fetch - * code can always fetch using offset 0, eliminating all control flow - * inside the main loop. - * (Alternatively, could have control flow per vector skipping fetch - * if ofbit is true.) - */ - if (velem->instance_divisor) { - /* - * Index is equal to the start instance plus the number of current - * instance divided by the divisor. In this case we compute it as: - * index = start_instance + (instance_id / divisor). - * Note we could actually do the fetch here, outside the loop - - * it's all constant, hopefully llvm recognizes this. - */ - LLVMValueRef current_instance; - current_instance = LLVMBuildUDiv(builder, system_values.instance_id, - lp_build_const_int32(gallivm, - velem->instance_divisor), - "instance_divisor"); - instance_index[j] = lp_build_uadd_overflow(gallivm, start_instance, - current_instance, &ofbit); - } - - buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero, - buffer_size_adj[j], ""); - temp_ptr = lp_build_alloca_undef(gallivm, - LLVMPointerType(LLVMInt8TypeInContext(context), 0), ""); + vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &vb_index, 1, ""); + vb_info = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); + vb_stride[j] = draw_jit_vbuffer_stride(gallivm, vb_info); + vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vb_info); + map_ptr[j] = draw_jit_dvbuffer_map(gallivm, vbuffer_ptr); + buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr); + + ofbit[j] = NULL; + stride_fixed[j] = lp_build_uadd_overflow(gallivm, vb_buffer_offset, + src_offset, &ofbit[j]); + buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, bsize, + &ofbit[j]); + + if (velem->instance_divisor) { + /* Index is equal to the start instance plus the number of current + * instance divided by the divisor. In this case we compute it as: + * index = start_instance + (instance_id / divisor) + */ + LLVMValueRef current_instance; + current_instance = LLVMBuildUDiv(builder, system_values.instance_id, + lp_build_const_int32(gallivm, + velem->instance_divisor), + "instance_divisor"); + instance_index[j] = lp_build_uadd_overflow(gallivm, start_instance, + current_instance, &ofbit[j]); + } - lp_build_if(&if_ctx, gallivm, ofbit); - { - LLVMBuildStore(builder, fake_buf_ptr, temp_ptr); - } - lp_build_else(&if_ctx); - { - map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, ""); - LLVMBuildStore(builder, map_ptr[j], temp_ptr); - } - lp_build_endif(&if_ctx); - map_ptr[j] = LLVMBuildLoad(builder, temp_ptr, "map_ptr"); - - if (0) { - lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n", - lp_build_const_int32(gallivm, j), - vb_index, vb_stride[j]); - lp_build_printf(gallivm, - " vb_buffer_offset = %u, src_offset = %u, buf_offset = %u\n", - vb_buffer_offset, src_offset, buf_offset); - lp_build_printf(gallivm, " buffer size = %u, blocksize = %u\n", - buffer_size, bsize); - lp_build_printf(gallivm, " instance_id = %u\n", system_values.instance_id); - } + if (0) { + lp_build_printf(gallivm, "vbuf index = %u, vb_stride is %u\n", + vb_index, vb_stride[j]); + lp_build_printf(gallivm, " vb_buffer_offset = %u, src_offset is %u\n", + vb_buffer_offset, src_offset); + lp_build_print_value(gallivm, " blocksize = ", bsize); + lp_build_printf(gallivm, " instance_id = %u\n", system_values.instance_id); + lp_build_printf(gallivm, " buffer size = %u\n", buffer_size); } } - lp_build_loop_begin(&lp_loop, gallivm, bld.zero); + lp_build_loop_begin(&lp_loop, gallivm, zero); { LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef aos_attribs[LP_MAX_VECTOR_WIDTH / 32] = { 0 }; LLVMValueRef io; LLVMValueRef clipmask; /* holds the clipmask value */ - LLVMValueRef true_index_array, index_store; + LLVMValueRef true_index_array = lp_build_zero(gallivm, + lp_type_uint_vec(32, 32*vector_length)); + LLVMValueRef true_indices[LP_MAX_VECTOR_WIDTH / 32]; const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS]; io_itr = lp_loop.counter; @@ -1798,86 +1726,73 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) io_itr, io, lp_loop.counter); #endif - true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter); - true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, ""); - - /* - * Limit indices to fetch_max, otherwise might try to access indices - * beyond index buffer (or rather vsplit elt buffer) size. - * Could probably safely (?) skip this for non-indexed draws and - * simplify things minimally (by removing it could combine the ind_vec - * and start_vec adds). I think the only effect for non-indexed draws will - * be that for the invalid elements they will be all fetched from the - * same location as the last valid one, but noone should really care. - */ - true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max); - - index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store"); - - lp_build_if(&if_ctx, gallivm, have_elts); - { - /* - * Note: you'd expect some comparison/clamp against fetch_elt_max - * here. - * There used to be one here but it was incorrect: overflow was - * detected if index > fetch_elt_max - but the correct condition - * would be index >= fetch_elt_max (since this is just size of elts - * buffer / element size). - * Using the correct condition however will cause failures - due to - * vsplit/vcache code which rebases indices. So, as an example, if - * fetch_elt_max is just 1 and fetch_count 2, vsplit cache will - * replace all invalid indices with 0 - which in case of elt_bias - * not being zero will get a different fetch index than the valid - * index 0. So, just rely on vsplit code preventing out-of-bounds - * fetches. This is also why it's safe to do elts fetch even if there - * was no index buffer bound - the real buffer is never seen here, at - * least not if there are index buffer overflows... - */ - - /* - * XXX should not have to do this, as scale can be handled - * natively by loads (hits asserts though). - */ - tmp = lp_build_shl_imm(&blduivec, true_index_array, 2); - fetch_elts = LLVMBuildBitCast(builder, fetch_elts, - LLVMPointerType(LLVMInt8TypeInContext(context), - 0), ""); - tmp = lp_build_gather(gallivm, vs_type.length, - 32, bld.type, TRUE, - fetch_elts, tmp, FALSE); - LLVMBuildStore(builder, tmp, index_store); - } - lp_build_else(&if_ctx); - { - tmp = LLVMBuildAdd(builder, true_index_array, start_vec, ""); - LLVMBuildStore(builder, tmp, index_store); + for (i = 0; i < vector_length; ++i) { + LLVMValueRef vert_index = + LLVMBuildAdd(builder, + lp_loop.counter, + lp_build_const_int32(gallivm, i), ""); + LLVMValueRef true_index = + LLVMBuildAdd(builder, start, vert_index, ""); + + /* make sure we're not out of bounds which can happen + * if fetch_count % 4 != 0, because on the last iteration + * a few of the 4 vertex fetches will be out of bounds */ + true_index = lp_build_min(&bld, true_index, fetch_max); + + if (elts) { + LLVMValueRef fetch_ptr; + LLVMValueRef index_overflowed; + LLVMValueRef index_ptr = + lp_build_alloca( + gallivm, + lp_build_vec_type(gallivm, lp_type_int(32)), ""); + struct lp_build_if_state if_ctx; + index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT, + true_index, fetch_elt_max, + "index_overflowed"); + + lp_build_if(&if_ctx, gallivm, index_overflowed); + { + /* Generate maximum possible index so that + * generate_fetch can treat it just like + * any other overflow and return zeros. + * We don't have to worry about the restart + * primitive index because it has already been + * handled + */ + LLVMValueRef val = + lp_build_const_int32(gallivm, 0xffffffff); + LLVMBuildStore(builder, val, index_ptr); + } + lp_build_else(&if_ctx); + { + LLVMValueRef val; + fetch_ptr = LLVMBuildGEP(builder, fetch_elts, + &true_index, 1, ""); + val = LLVMBuildLoad(builder, fetch_ptr, ""); + LLVMBuildStore(builder, val, index_ptr); + } + lp_build_endif(&if_ctx); + true_index = LLVMBuildLoad(builder, index_ptr, "true_index"); + } + true_indices[i] = true_index; + true_index_array = LLVMBuildInsertElement( + gallivm->builder, true_index_array, true_index, + lp_build_const_int32(gallivm, i), ""); } - lp_build_endif(&if_ctx); - - true_index_array = LLVMBuildLoad(builder, index_store, ""); for (j = 0; j < key->nr_vertex_elements; ++j) { - struct pipe_vertex_element *velem = &key->vertex_element[j]; + struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; const struct util_format_description *format_desc = util_format_description(velem->src_format); - if (format_desc->format == PIPE_FORMAT_NONE) { - for (i = 0; i < TGSI_NUM_CHANNELS; i++) { - inputs[j][i] = lp_build_zero(gallivm, vs_type); - } - } - else if (velem->instance_divisor) { - fetch_instanced(gallivm, format_desc, vs_type, - vb_stride[j], map_ptr[j], - buffer_size_adj[j], - inputs[j], instance_index[j]); - } - else { - fetch_vector(gallivm, format_desc, vs_type, - vb_stride[j], map_ptr[j], - buffer_size_adj[j], - inputs[j], true_index_array); + for (i = 0; i < vector_length; ++i) { + generate_fetch(gallivm, draw, format_desc, + vb_stride[j], stride_fixed[j], map_ptr[j], + buffer_size_adj[j], ofbit[j], &aos_attribs[i], + velem->instance_divisor ? instance_index[j] : true_indices[i]); } + convert_to_soa(gallivm, aos_attribs, inputs, j, vs_type); } /* In the paths with elts vertex id has to be unaffected by the @@ -1891,8 +1806,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) * most 4095-vertices) we need to back out the original start * index out of our vertex id here. */ - system_values.basevertex = lp_build_broadcast_scalar(&blduivec, - vertex_id_offset); + system_values.basevertex = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, + lp_type_uint_vec(32, 32*vector_length)), + vertex_id_offset); system_values.vertex_id = true_index_array; system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array, system_values.basevertex, ""); @@ -1927,7 +1843,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMBuildStore(builder, temp, clipmask_bool_ptr); } else { - clipmask = blduivec.zero; + clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0); } /* do viewport mapping */ @@ -1936,7 +1852,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) } } else { - clipmask = blduivec.zero; + clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0); } /* store clipmask in vertex header, @@ -1952,8 +1868,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) sampler->destroy(sampler); /* return clipping boolean value for function */ - ret = clipmask_booli8(gallivm, vs_type, clipmask_bool_ptr, - enable_cliptest && key->need_edgeflags); + ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr, + enable_cliptest && key->need_edgeflags); LLVMBuildRet(builder, ret); @@ -2072,7 +1988,7 @@ draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key) void draw_llvm_set_mapped_texture(struct draw_context *draw, - enum pipe_shader_type shader_stage, + unsigned shader_stage, unsigned sview_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t first_level, uint32_t last_level, @@ -2117,7 +2033,7 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, void draw_llvm_set_sampler_state(struct draw_context *draw, - enum pipe_shader_type shader_type) + unsigned shader_type) { unsigned i; @@ -2273,7 +2189,8 @@ draw_gs_llvm_generate(struct draw_llvm *llvm, for (i = 0; i < ARRAY_SIZE(arg_types); ++i) if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS); + LLVMAddAttribute(LLVMGetParam(variant_func, i), + LLVMNoAliasAttribute); context_ptr = LLVMGetParam(variant_func, 0); input_array = LLVMGetParam(variant_func, 1); |