diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-09-02 05:47:02 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-09-02 05:47:02 +0000 |
commit | 0dbbf1e0708df85a357d70e2708c0a11aeb5480e (patch) | |
tree | 6656ff8eb8b15a2fc1c02888973caf618388cfd0 /lib/mesa/src/gallium/auxiliary | |
parent | 5f66494d31f735486b8222ecfa0a0c9046e92543 (diff) |
Merge Mesa 22.1.7
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary')
70 files changed, 1038 insertions, 2716 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.c b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.c index 97b4da0af..111de3f0e 100644 --- a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.c @@ -49,7 +49,7 @@ #include "cso_cache/cso_cache.h" #include "cso_cache/cso_hash.h" #include "cso_context.h" - +#include "driver_trace/tr_dump.h" /** * Per-shader sampler information. @@ -296,6 +296,9 @@ void cso_unbind_context(struct cso_context *ctx) { unsigned i; + bool dumping = trace_dumping_enabled_locked(); + if (dumping) + trace_dumping_stop_locked(); if (ctx->pipe) { ctx->pipe->bind_blend_state( ctx->pipe, NULL ); ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); @@ -399,6 +402,8 @@ void cso_unbind_context(struct cso_context *ctx) ctx->pipe->set_sample_mask(ctx->pipe, ctx->sample_mask); if (ctx->pipe->set_min_samples) ctx->pipe->set_min_samples(ctx->pipe, ctx->min_samples); + if (dumping) + trace_dumping_start_locked(); } /** @@ -1118,20 +1123,24 @@ cso_restore_vertex_elements(struct cso_context *ctx) void cso_set_vertex_buffers(struct cso_context *ctx, unsigned start_slot, unsigned count, + unsigned unbind_trailing_count, + bool take_ownership, const struct pipe_vertex_buffer *buffers) { struct u_vbuf *vbuf = ctx->vbuf_current; - if (!count) + if (!count && !unbind_trailing_count) return; if (vbuf) { - u_vbuf_set_vertex_buffers(vbuf, start_slot, count, 0, false, buffers); + u_vbuf_set_vertex_buffers(vbuf, start_slot, count, unbind_trailing_count, + take_ownership, buffers); return; } struct pipe_context *pipe = ctx->pipe; - pipe->set_vertex_buffers(pipe, start_slot, count, 0, false, buffers); + pipe->set_vertex_buffers(pipe, start_slot, count, unbind_trailing_count, + take_ownership, buffers); } /** diff --git a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.h b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.h index 6507bd026..b153e8347 100644 --- a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_context.h @@ -90,6 +90,8 @@ enum pipe_error cso_set_vertex_elements(struct cso_context *ctx, void cso_set_vertex_buffers(struct cso_context *ctx, unsigned start_slot, unsigned count, + unsigned unbind_trailing_count, + bool take_ownership, const struct pipe_vertex_buffer *buffers); void cso_set_stream_outputs(struct cso_context *ctx, diff --git a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_hash.c b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_hash.c index dae3a2dbc..0ae1bfd1b 100644 --- a/lib/mesa/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/lib/mesa/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -178,7 +178,7 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, struct cso_node **nextNode = cso_hash_find_node(hash, key); struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); if (!node) { - struct cso_hash_iter null_iter = {hash, 0}; + struct cso_hash_iter null_iter = {hash, NULL}; return null_iter; } @@ -188,8 +188,8 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, void cso_hash_init(struct cso_hash *hash) { - hash->fakeNext = 0; - hash->buckets = 0; + hash->fakeNext = NULL; + hash->buckets = NULL; hash->size = 0; hash->userNumBits = (short)MinNumBits; hash->numBits = 0; diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_context.c b/lib/mesa/src/gallium/auxiliary/draw/draw_context.c index 7a9416889..39d3a61b9 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_context.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_context.c @@ -945,6 +945,8 @@ draw_current_shader_outputs(const struct draw_context *draw) { if (draw->gs.geometry_shader) return draw->gs.num_gs_outputs; + if (draw->tes.tess_eval_shader) + return draw->tes.num_tes_outputs; return draw->vs.num_vs_outputs; } diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c index 483bf62b4..e0d0ebad0 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c @@ -3397,6 +3397,8 @@ draw_tcs_llvm_generate(struct draw_llvm *llvm, LLVMSetFunctionCallConv(variant_coro, LLVMCCallConv); + LLVMAddTargetDependentFunctionAttr(variant_coro, "coroutine.presplit", "0"); + for (i = 0; i < ARRAY_SIZE(arg_types); ++i) { if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) { lp_add_function_attr(variant_coro, i + 1, LP_FUNC_ATTR_NOALIAS); diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 49d5c65b6..7dac40785 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -281,9 +281,6 @@ generate_aaline_fs(struct aaline_stage *aaline) const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS; aaline_fs = *orig_fs; /* copy to init */ - aaline_fs.tokens = tgsi_alloc_tokens(newLen); - if (aaline_fs.tokens == NULL) - return FALSE; memset(&transform, 0, sizeof(transform)); transform.colorOutput = -1; @@ -296,9 +293,9 @@ generate_aaline_fs(struct aaline_stage *aaline) transform.base.transform_instruction = aa_transform_inst; transform.base.transform_declaration = aa_transform_decl; - tgsi_transform_shader(orig_fs->tokens, - (struct tgsi_token *) aaline_fs.tokens, - newLen, &transform.base); + aaline_fs.tokens = tgsi_transform_shader(orig_fs->tokens, newLen, &transform.base); + if (!aaline_fs.tokens) + return false; #if 0 /* DEBUG */ debug_printf("draw_aaline, orig shader:\n"); diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 08adcddf6..16ebba3fd 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -368,9 +368,6 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) aapoint_fs = *orig_fs; /* copy to init */ assert(aapoint_fs.type == PIPE_SHADER_IR_TGSI); - aapoint_fs.tokens = tgsi_alloc_tokens(newLen); - if (aapoint_fs.tokens == NULL) - return FALSE; memset(&transform, 0, sizeof(transform)); transform.colorOutput = -1; @@ -383,9 +380,9 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) transform.base.transform_instruction = aa_transform_inst; transform.base.transform_declaration = aa_transform_decl; - tgsi_transform_shader(orig_fs->tokens, - (struct tgsi_token *) aapoint_fs.tokens, - newLen, &transform.base); + aapoint_fs.tokens = tgsi_transform_shader(orig_fs->tokens, newLen, &transform.base); + if (!aapoint_fs.tokens) + return false; #if 0 /* DEBUG */ debug_printf("draw_aapoint, orig shader:\n"); diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_user_cull.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_user_cull.c index fcc177e04..86968a257 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_user_cull.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_user_cull.c @@ -38,11 +38,6 @@ struct user_cull_stage { struct draw_stage stage; }; -static inline struct user_cull_stage *user_cull_stage( struct draw_stage *stage ) -{ - return (struct user_cull_stage *)stage; -} - static inline boolean cull_distance_is_out(float dist) { diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_private.h b/lib/mesa/src/gallium/auxiliary/draw/draw_private.h index ffdc8b987..bf3fc0810 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_private.h +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_private.h @@ -359,6 +359,7 @@ struct draw_context struct { struct draw_tess_eval_shader *tess_eval_shader; + uint num_tes_outputs; /**< convenience, from tess_eval_shader */ uint position_output; uint clipvertex_output; diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c index f1821878b..8a25b6ea1 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c @@ -154,7 +154,7 @@ draw_pt_arrays(struct draw_context *draw, if (count >= first) frontend->run( frontend, draw_info[i].start, count ); - if (draw->pt.user.increment_draw_id) + if (num_draws > 1 && draw->pt.user.increment_draw_id) draw->pt.user.drawid++; } @@ -524,11 +524,14 @@ draw_vbo(struct draw_context *draw, num_draws = 1; } - if (info->index_size) + if (info->index_size) { assert(draw->pt.user.elts); - - draw->pt.user.min_index = use_info->index_bounds_valid ? use_info->min_index : 0; - draw->pt.user.max_index = use_info->index_bounds_valid ? use_info->max_index : ~0; + draw->pt.user.min_index = use_info->index_bounds_valid ? use_info->min_index : 0; + draw->pt.user.max_index = use_info->index_bounds_valid ? use_info->max_index : ~0; + } else { + draw->pt.user.min_index = 0; + draw->pt.user.max_index = ~0; + } draw->pt.user.eltSize = use_info->index_size ? draw->pt.user.eltSizeIB : 0; draw->pt.user.drawid = drawid_offset; draw->pt.user.increment_draw_id = use_info->increment_draw_id; diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_tess.c b/lib/mesa/src/gallium/auxiliary/draw/draw_tess.c index 7aa64f076..c1742eb03 100644 --- a/lib/mesa/src/gallium/auxiliary/draw/draw_tess.c +++ b/lib/mesa/src/gallium/auxiliary/draw/draw_tess.c @@ -602,6 +602,7 @@ void draw_bind_tess_eval_shader(struct draw_context *draw, draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); if (dtes) { draw->tes.tess_eval_shader = dtes; + draw->tes.num_tes_outputs = dtes->info.num_outputs; draw->tes.position_output = dtes->position_output; draw->tes.clipvertex_output = dtes->clipvertex_output; } else { diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 1c71c0508..9cff0162b 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1887,6 +1887,8 @@ arch_rounding_available(const struct lp_type type) return TRUE; else if (util_get_cpu_caps()->has_neon) return TRUE; + else if (util_get_cpu_caps()->family == CPU_S390X) + return TRUE; return FALSE; } @@ -1994,7 +1996,8 @@ lp_build_round_arch(struct lp_build_context *bld, LLVMValueRef a, enum lp_build_round_mode mode) { - if (util_get_cpu_caps()->has_sse4_1 || util_get_cpu_caps()->has_neon) { + if (util_get_cpu_caps()->has_sse4_1 || util_get_cpu_caps()->has_neon || + util_get_cpu_caps()->family == CPU_S390X) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const char *intrinsic_root; diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index bedc8f87b..8e57a5e34 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -987,6 +987,7 @@ lp_build_pack_rgba_soa(struct gallivm_state *gallivm, { unsigned chan; struct lp_build_context bld; + LLVMValueRef rgba_swiz[4]; assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); @@ -995,13 +996,16 @@ lp_build_pack_rgba_soa(struct gallivm_state *gallivm, assert(type.width == 32); lp_build_context_init(&bld, gallivm, type); + + lp_build_format_swizzle_soa(format_desc, &bld, rgba_in, rgba_swiz); + for (chan = 0; chan < format_desc->nr_channels; ++chan) { struct util_format_channel_description chan_desc = format_desc->channel[chan]; lp_build_insert_soa_chan(&bld, format_desc->block.bits, chan_desc, packed, - rgba_in[chan]); + rgba_swiz[chan]); } } diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c index 97deffe1d..3b346f37d 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c @@ -335,7 +335,6 @@ rgb_to_rgba_aos(struct gallivm_state *gallivm, */ #if UTIL_ARCH_LITTLE_ENDIAN - r = r; g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), ""); b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), ""); a = lp_build_const_int_vec(gallivm, type, 0xff000000); diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h index 55778f2b6..0e3af4e40 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -59,7 +59,7 @@ #define LP_MAX_TGSI_SHADER_BUFFER_SIZE (1 << 27) -#define LP_MAX_TGSI_SHADER_IMAGES 16 +#define LP_MAX_TGSI_SHADER_IMAGES 32 /* * For quick access we cache registers in statically diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index be288ab02..016a0c78a 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -64,6 +64,9 @@ #include <llvm/Support/PrettyStackTrace.h> #include <llvm/ExecutionEngine/ObjectCache.h> #include <llvm/Support/TargetSelect.h> +#if LLVM_VERSION_MAJOR >= 15 +#include <llvm/Support/MemoryBuffer.h> +#endif #if LLVM_VERSION_MAJOR < 11 #include <llvm/IR/CallSite.h> diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 19bd8463d..f04b69220 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -445,6 +445,10 @@ lp_build_rho(struct lp_build_sample_context *bld, } } } + + LLVMValueRef rho_is_inf = lp_build_is_inf_or_nan(gallivm, coord_bld->type, rho); + rho = lp_build_select(coord_bld, rho_is_inf, coord_bld->zero, rho); + if (rho_per_quad) { /* * rho_vec contains per-pixel rho, convert to scalar per quad. diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index ce58db67e..940a51d4e 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -2390,10 +2390,11 @@ lp_build_sample_aniso(struct lp_build_sample_context *bld, wnz = LLVMBuildSExt(builder, wnz, bld->int_coord_bld.vec_type, ""); wnz = lp_build_any_true_range(&bld->coord_bld, bld->coord_bld.type.length, wnz); lp_build_if(&noloadw0, gallivm, wnz); - LLVMValueRef new_coords[3]; + LLVMValueRef new_coords[4]; new_coords[0] = lp_build_div(coord_bld, lp_build_int_to_float(coord_bld, u_val), width_dim); new_coords[1] = lp_build_div(coord_bld, lp_build_int_to_float(coord_bld, v_val), height_dim); new_coords[2] = coords[2]; + new_coords[3] = coords[3]; /* lookup q in filter table */ LLVMValueRef temp_colors[4]; @@ -3200,9 +3201,16 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld, * Could use min/max above instead of out-of-bounds comparisons * if we don't care about the result returned for out-of-bounds. */ + LLVMValueRef oob[4] = { + bld->texel_bld.zero, + bld->texel_bld.zero, + bld->texel_bld.zero, + bld->texel_bld.zero, + }; + lp_build_format_swizzle_soa(bld->format_desc, &bld->texel_bld, oob, oob); for (chan = 0; chan < 4; chan++) { colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds, - bld->texel_bld.zero, colors_out[chan]); + oob[chan], colors_out[chan]); } } } @@ -4660,7 +4668,7 @@ lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state, out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height); out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1); } - if (dims >= 3) { + if (dims >= 3 || layer_coord) { out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1); } diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 31aacef74..afaf35e5b 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1562,6 +1562,11 @@ emit_fetch_system_value( atype = TGSI_TYPE_UNSIGNED; break; + case TGSI_SEMANTIC_SAMPLEID: + res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id); + atype = TGSI_TYPE_UNSIGNED; + break; + case TGSI_SEMANTIC_TESSOUTER: res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type, bld->system_values.tess_outer, diff --git a/lib/mesa/src/gallium/auxiliary/hud/hud_context.c b/lib/mesa/src/gallium/auxiliary/hud/hud_context.c index b1887aa84..5fd3cdfea 100644 --- a/lib/mesa/src/gallium/auxiliary/hud/hud_context.c +++ b/lib/mesa/src/gallium/auxiliary/hud/hud_context.c @@ -93,7 +93,7 @@ hud_draw_colored_prims(struct hud_context *hud, unsigned prim, u_upload_unmap(hud->pipe->stream_uploader); vbuffer.stride = 2 * sizeof(float); - cso_set_vertex_buffers(cso, 0, 1, &vbuffer); + cso_set_vertex_buffers(cso, 0, 1, 0, false, &vbuffer); pipe_resource_reference(&vbuffer.buffer.resource, NULL); cso_set_fragment_shader_handle(hud->cso, hud->fs_color); cso_draw_arrays(cso, prim, 0, num_vertices); @@ -562,7 +562,7 @@ hud_draw_results(struct hud_context *hud, struct pipe_resource *tex) pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, false, &hud->constbuf); - cso_set_vertex_buffers(cso, 0, 1, &hud->bg.vbuf); + cso_set_vertex_buffers(cso, 0, 1, 0, false, &hud->bg.vbuf); cso_draw_arrays(cso, PIPE_PRIM_QUADS, 0, hud->bg.num_vertices); } pipe_resource_reference(&hud->bg.vbuf.buffer.resource, NULL); @@ -570,7 +570,7 @@ hud_draw_results(struct hud_context *hud, struct pipe_resource *tex) /* draw accumulated vertices for text */ if (hud->text.num_vertices) { cso_set_vertex_shader_handle(cso, hud->vs_text); - cso_set_vertex_buffers(cso, 0, 1, &hud->text.vbuf); + cso_set_vertex_buffers(cso, 0, 1, 0, false, &hud->text.vbuf); cso_set_fragment_shader_handle(hud->cso, hud->fs_text); cso_draw_arrays(cso, PIPE_PRIM_QUADS, 0, hud->text.num_vertices); } @@ -594,7 +594,7 @@ hud_draw_results(struct hud_context *hud, struct pipe_resource *tex) if (hud->whitelines.num_vertices) { cso_set_vertex_shader_handle(cso, hud->vs_color); - cso_set_vertex_buffers(cso, 0, 1, &hud->whitelines.vbuf); + cso_set_vertex_buffers(cso, 0, 1, 0, false, &hud->whitelines.vbuf); cso_set_fragment_shader_handle(hud->cso, hud->fs_color); cso_draw_arrays(cso, PIPE_PRIM_LINES, 0, hud->whitelines.num_vertices); } @@ -1682,11 +1682,11 @@ hud_set_draw_context(struct hud_context *hud, struct cso_context *cso, "FRAG\n" "DCL IN[0], GENERIC[0], LINEAR\n" "DCL SAMP[0]\n" - "DCL SVIEW[0], RECT, FLOAT\n" + "DCL SVIEW[0], 2D, FLOAT\n" "DCL OUT[0], COLOR[0]\n" "DCL TEMP[0]\n" - "TEX TEMP[0], IN[0], SAMP[0], RECT\n" + "TEX TEMP[0], IN[0], SAMP[0], 2D\n" "MOV OUT[0], TEMP[0].xxxx\n" "END\n" }; @@ -1753,6 +1753,7 @@ hud_set_draw_context(struct hud_context *hud, struct cso_context *cso, "DCL CONST[0][0..2]\n" "DCL TEMP[0]\n" "IMM[0] FLT32 { -1, 0, 0, 1 }\n" + "IMM[1] FLT32 { 0.0078125, 0.00390625, 1, 1 }\n" // 1.0 / 128, 1.0 / 256, 1, 1 /* v = in * (xscale, yscale) + (xoffset, yoffset) */ "MAD TEMP[0].xy, IN[0], CONST[0][2].xyyy, CONST[0][1].zwww\n" @@ -1760,7 +1761,7 @@ hud_set_draw_context(struct hud_context *hud, struct cso_context *cso, "MAD OUT[0].xy, TEMP[0], CONST[0][1].xyyy, IMM[0].xxxx\n" "MOV OUT[0].zw, IMM[0]\n" - "MOV OUT[1], IN[1]\n" + "MUL OUT[1], IN[1], IMM[1]\n" "END\n" }; @@ -1935,7 +1936,7 @@ hud_create(struct cso_context *cso, struct st_context_iface *st, hud->font_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; hud->font_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; hud->font_sampler_state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - hud->font_sampler_state.normalized_coords = 0; + hud->font_sampler_state.normalized_coords = 1; /* constants */ hud->constbuf.buffer_size = sizeof(hud->constants); diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_indices.c b/lib/mesa/src/gallium/auxiliary/indices/u_indices.c deleted file mode 100644 index d0a5fb8c1..000000000 --- a/lib/mesa/src/gallium/auxiliary/indices/u_indices.c +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "u_indices.h" -#include "u_indices_priv.h" - -static void translate_memcpy_ushort( const void *in, - unsigned start, - unsigned in_nr, - unsigned out_nr, - unsigned restart_index, - void *out ) -{ - memcpy(out, &((short *)in)[start], out_nr*sizeof(short)); -} - -static void translate_memcpy_uint( const void *in, - unsigned start, - unsigned in_nr, - unsigned out_nr, - unsigned restart_index, - void *out ) -{ - memcpy(out, &((int *)in)[start], out_nr*sizeof(int)); -} - -static void translate_byte_to_ushort( const void *in, - unsigned start, - UNUSED unsigned in_nr, - unsigned out_nr, - UNUSED unsigned restart_index, - void *out ) -{ - uint8_t *src = (uint8_t *)in + start; - uint16_t *dst = out; - while (out_nr--) { - *dst++ = *src++; - } -} - -enum pipe_prim_type -u_index_prim_type_convert(unsigned hw_mask, enum pipe_prim_type prim, bool pv_matches) -{ - if ((hw_mask & (1<<prim)) && pv_matches) - return prim; - - switch (prim) { - case PIPE_PRIM_POINTS: - return PIPE_PRIM_POINTS; - case PIPE_PRIM_LINES: - case PIPE_PRIM_LINE_STRIP: - case PIPE_PRIM_LINE_LOOP: - return PIPE_PRIM_LINES; - case PIPE_PRIM_TRIANGLES: - case PIPE_PRIM_TRIANGLE_STRIP: - case PIPE_PRIM_TRIANGLE_FAN: - case PIPE_PRIM_QUADS: - case PIPE_PRIM_QUAD_STRIP: - case PIPE_PRIM_POLYGON: - return PIPE_PRIM_TRIANGLES; - case PIPE_PRIM_LINES_ADJACENCY: - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - return PIPE_PRIM_LINES_ADJACENCY; - case PIPE_PRIM_TRIANGLES_ADJACENCY: - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - return PIPE_PRIM_TRIANGLES_ADJACENCY; - case PIPE_PRIM_PATCHES: - return PIPE_PRIM_PATCHES; - default: - assert(0); - break; - } - return PIPE_PRIM_POINTS; -} - -/** - * Translate indexes when a driver can't support certain types - * of drawing. Example include: - * - Translate 1-byte indexes into 2-byte indexes - * - Translate PIPE_PRIM_QUADS into PIPE_PRIM_TRIANGLES when the hardware - * doesn't support the former. - * - Translate from first provoking vertex to last provoking vertex and - * vice versa. - * - * Note that this function is used for indexed primitives. - * - * \param hw_mask mask of (1 << PIPE_PRIM_x) flags indicating which types - * of primitives are supported by the hardware. - * \param prim incoming PIPE_PRIM_x - * \param in_index_size bytes per index value (1, 2 or 4) - * \param nr number of incoming vertices - * \param in_pv incoming provoking vertex convention (PV_FIRST or PV_LAST) - * \param out_pv desired provoking vertex convention (PV_FIRST or PV_LAST) - * \param prim_restart whether primitive restart is disable or enabled - * \param out_prim returns new PIPE_PRIM_x we'll translate to - * \param out_index_size returns bytes per new index value (2 or 4) - * \param out_nr returns number of new vertices - * \param out_translate returns the translation function to use by the caller - */ -enum indices_mode -u_index_translator(unsigned hw_mask, - enum pipe_prim_type prim, - unsigned in_index_size, - unsigned nr, - unsigned in_pv, - unsigned out_pv, - unsigned prim_restart, - enum pipe_prim_type *out_prim, - unsigned *out_index_size, - unsigned *out_nr, - u_translate_func *out_translate) -{ - unsigned in_idx; - unsigned out_idx; - enum indices_mode ret = U_TRANSLATE_NORMAL; - - assert(in_index_size == 1 || - in_index_size == 2 || - in_index_size == 4); - - u_index_init(); - - in_idx = in_size_idx(in_index_size); - *out_index_size = u_index_size_convert(in_index_size); - out_idx = out_size_idx(*out_index_size); - - if ((hw_mask & (1<<prim)) && - in_pv == out_pv) - { - if (in_index_size == 4) - *out_translate = translate_memcpy_uint; - else if (in_index_size == 2) - *out_translate = translate_memcpy_ushort; - else - *out_translate = translate_byte_to_ushort; - - *out_prim = prim; - *out_nr = nr; - - return U_TRANSLATE_MEMCPY; - } - *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim_restart][prim]; - *out_prim = u_index_prim_type_convert(hw_mask, prim, in_pv == out_pv); - *out_nr = u_index_count_converted_indices(hw_mask, in_pv == out_pv, prim, nr); - - return ret; -} - -unsigned -u_index_count_converted_indices(unsigned hw_mask, bool pv_matches, enum pipe_prim_type prim, unsigned nr) -{ - if ((hw_mask & (1<<prim)) && pv_matches) - return nr; - - switch (prim) { - case PIPE_PRIM_POINTS: - case PIPE_PRIM_PATCHES: - return nr; - case PIPE_PRIM_LINES: - return nr; - case PIPE_PRIM_LINE_STRIP: - return (nr - 1) * 2; - case PIPE_PRIM_LINE_LOOP: - return nr * 2; - case PIPE_PRIM_TRIANGLES: - return nr; - case PIPE_PRIM_TRIANGLE_STRIP: - return (nr - 2) * 3; - case PIPE_PRIM_TRIANGLE_FAN: - return (nr - 2) * 3; - case PIPE_PRIM_QUADS: - return (nr / 4) * 6; - case PIPE_PRIM_QUAD_STRIP: - return (nr - 2) * 3; - case PIPE_PRIM_POLYGON: - return (nr - 2) * 3; - case PIPE_PRIM_LINES_ADJACENCY: - return nr; - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - return (nr - 3) * 4; - case PIPE_PRIM_TRIANGLES_ADJACENCY: - return nr; - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - return ((nr - 4) / 2) * 6; - default: - assert(0); - break; - } - return nr; -} - - -/** - * If a driver does not support a particular gallium primitive type - * (such as PIPE_PRIM_QUAD_STRIP) this function can be used to help - * convert the primitive into a simpler type (like PIPE_PRIM_TRIANGLES). - * - * The generator functions generates a number of ushort or uint indexes - * for drawing the new type of primitive. - * - * Note that this function is used for non-indexed primitives. - * - * \param hw_mask a bitmask of (1 << PIPE_PRIM_x) values that indicates - * kind of primitives are supported by the driver. - * \param prim the PIPE_PRIM_x that the user wants to draw - * \param start index of first vertex to draw - * \param nr number of vertices to draw - * \param in_pv user's provoking vertex (PV_FIRST/LAST) - * \param out_pv desired proking vertex for the hardware (PV_FIRST/LAST) - * \param out_prim returns the new primitive type for the driver - * \param out_index_size returns OUT_USHORT or OUT_UINT - * \param out_nr returns new number of vertices to draw - * \param out_generate returns pointer to the generator function - */ -enum indices_mode -u_index_generator(unsigned hw_mask, - enum pipe_prim_type prim, - unsigned start, - unsigned nr, - unsigned in_pv, - unsigned out_pv, - enum pipe_prim_type *out_prim, - unsigned *out_index_size, - unsigned *out_nr, - u_generate_func *out_generate) -{ - unsigned out_idx; - - u_index_init(); - - *out_index_size = ((start + nr) > 0xfffe) ? 4 : 2; - out_idx = out_size_idx(*out_index_size); - *out_prim = u_index_prim_type_convert(hw_mask, prim, in_pv == out_pv); - *out_nr = u_index_count_converted_indices(hw_mask, in_pv == out_pv, prim, nr); - - if ((hw_mask & (1<<prim)) && - (in_pv == out_pv)) { - - *out_generate = generate[out_idx][in_pv][out_pv][PIPE_PRIM_POINTS]; - return U_GENERATE_LINEAR; - } - *out_generate = generate[out_idx][in_pv][out_pv][prim]; - return prim == PIPE_PRIM_LINE_LOOP ? U_GENERATE_ONE_OFF : U_GENERATE_REUSABLE; -} diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_indices.h b/lib/mesa/src/gallium/auxiliary/indices/u_indices.h deleted file mode 100644 index e01201e4b..000000000 --- a/lib/mesa/src/gallium/auxiliary/indices/u_indices.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef U_INDICES_H -#define U_INDICES_H - -#include "pipe/p_compiler.h" - -#define PV_FIRST 0 -#define PV_LAST 1 -#define PV_COUNT 2 - -/* primitive restart disable/enable flags */ -#define PR_DISABLE 0 -#define PR_ENABLE 1 -#define PR_COUNT 2 -/** - * Index translator function (for glDrawElements() case) - * - * \param in the input index buffer - * \param start the index of the first vertex (pipe_draw_info::start) - * \param nr the number of vertices (pipe_draw_info::count) - * \param out output buffer big enough or nr vertices (of - * @out_index_size bytes each) - */ -typedef void (*u_translate_func)( const void *in, - unsigned start, - unsigned in_nr, - unsigned out_nr, - unsigned restart_index, - void *out ); - -/** - * Index generator function (for glDrawArrays() case) - * - * \param start the index of the first vertex (pipe_draw_info::start) - * \param nr the number of vertices (pipe_draw_info::count) - * \param out output buffer big enough or nr vertices (of - * @out_index_size bytes each) - */ -typedef void (*u_generate_func)( unsigned start, - unsigned nr, - void *out ); - - -/* Return codes describe the translate/generate operation. Caller may - * be able to reuse translated indices under some circumstances. - */ -#define U_TRANSLATE_ERROR -1 -#define U_TRANSLATE_NORMAL 1 -#define U_TRANSLATE_MEMCPY 2 -#define U_GENERATE_LINEAR 3 -#define U_GENERATE_REUSABLE 4 -#define U_GENERATE_ONE_OFF 5 - - -void u_index_init( void ); - -int u_index_translator( unsigned hw_mask, - unsigned prim, - unsigned in_index_size, - unsigned nr, - unsigned in_pv, /* API */ - unsigned out_pv, /* hardware */ - unsigned prim_restart, - unsigned *out_prim, - unsigned *out_index_size, - unsigned *out_nr, - u_translate_func *out_translate ); - -/* Note that even when generating it is necessary to know what the - * API's PV is, as the indices generated will depend on whether it is - * the same as hardware or not, and in the case of triangle strips, - * whether it is first or last. - */ -int u_index_generator( unsigned hw_mask, - unsigned prim, - unsigned start, - unsigned nr, - unsigned in_pv, /* API */ - unsigned out_pv, /* hardware */ - unsigned *out_prim, - unsigned *out_index_size, - unsigned *out_nr, - u_generate_func *out_generate ); - - -void u_unfilled_init( void ); - -int u_unfilled_translator( unsigned prim, - unsigned in_index_size, - unsigned nr, - unsigned unfilled_mode, - unsigned *out_prim, - unsigned *out_index_size, - unsigned *out_nr, - u_translate_func *out_translate ); - -int u_unfilled_generator( unsigned prim, - unsigned start, - unsigned nr, - unsigned unfilled_mode, - unsigned *out_prim, - unsigned *out_index_size, - unsigned *out_nr, - u_generate_func *out_generate ); - - - - -#endif diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_indices_gen.py b/lib/mesa/src/gallium/auxiliary/indices/u_indices_gen.py deleted file mode 100644 index 71c049449..000000000 --- a/lib/mesa/src/gallium/auxiliary/indices/u_indices_gen.py +++ /dev/null @@ -1,462 +0,0 @@ -copyright = ''' -/* - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -''' - -GENERATE, UBYTE, USHORT, UINT = 'generate', 'ubyte', 'ushort', 'uint' -FIRST, LAST = 'first', 'last' -PRDISABLE, PRENABLE = 'prdisable', 'prenable' - -INTYPES = (GENERATE, UBYTE, USHORT, UINT) -OUTTYPES = (USHORT, UINT) -PVS=(FIRST, LAST) -PRS=(PRDISABLE, PRENABLE) -PRIMS=('points', - 'lines', - 'linestrip', - 'lineloop', - 'tris', - 'trifan', - 'tristrip', - 'quads', - 'quadstrip', - 'polygon', - 'linesadj', - 'linestripadj', - 'trisadj', - 'tristripadj') - -LONGPRIMS=('PIPE_PRIM_POINTS', - 'PIPE_PRIM_LINES', - 'PIPE_PRIM_LINE_STRIP', - 'PIPE_PRIM_LINE_LOOP', - 'PIPE_PRIM_TRIANGLES', - 'PIPE_PRIM_TRIANGLE_FAN', - 'PIPE_PRIM_TRIANGLE_STRIP', - 'PIPE_PRIM_QUADS', - 'PIPE_PRIM_QUAD_STRIP', - 'PIPE_PRIM_POLYGON', - 'PIPE_PRIM_LINES_ADJACENCY', - 'PIPE_PRIM_LINE_STRIP_ADJACENCY', - 'PIPE_PRIM_TRIANGLES_ADJACENCY', - 'PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY') - -longprim = dict(zip(PRIMS, LONGPRIMS)) -intype_idx = dict(ubyte='IN_UBYTE', ushort='IN_USHORT', uint='IN_UINT') -outtype_idx = dict(ushort='OUT_USHORT', uint='OUT_UINT') -pv_idx = dict(first='PV_FIRST', last='PV_LAST') -pr_idx = dict(prdisable='PR_DISABLE', prenable='PR_ENABLE') - -def prolog(): - print('''/* File automatically generated by u_indices_gen.py */''') - print(copyright) - print(r''' - -/** - * @file - * Functions to translate and generate index lists - */ - -#include "indices/u_indices_priv.h" -#include "util/u_debug.h" -#include "util/u_memory.h" - - -static unsigned out_size_idx( unsigned index_size ) -{ - switch (index_size) { - case 4: return OUT_UINT; - case 2: return OUT_USHORT; - default: assert(0); return OUT_USHORT; - } -} - -static unsigned in_size_idx( unsigned index_size ) -{ - switch (index_size) { - case 4: return IN_UINT; - case 2: return IN_USHORT; - case 1: return IN_UBYTE; - default: assert(0); return IN_UBYTE; - } -} - - -static u_translate_func translate[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PR_COUNT][PRIM_COUNT]; -static u_generate_func generate[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT]; - - -''') - -def vert( intype, outtype, v0 ): - if intype == GENERATE: - return '(' + outtype + ')(' + v0 + ')' - else: - return '(' + outtype + ')in[' + v0 + ']' - -def point( intype, outtype, ptr, v0 ): - print(' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';') - -def line( intype, outtype, ptr, v0, v1 ): - print(' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';') - print(' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';') - -def tri( intype, outtype, ptr, v0, v1, v2 ): - print(' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';') - print(' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';') - print(' (' + ptr + ')[2] = ' + vert( intype, outtype, v2 ) + ';') - -def lineadj( intype, outtype, ptr, v0, v1, v2, v3 ): - print(' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';') - print(' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';') - print(' (' + ptr + ')[2] = ' + vert( intype, outtype, v2 ) + ';') - print(' (' + ptr + ')[3] = ' + vert( intype, outtype, v3 ) + ';') - -def triadj( intype, outtype, ptr, v0, v1, v2, v3, v4, v5 ): - print(' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';') - print(' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';') - print(' (' + ptr + ')[2] = ' + vert( intype, outtype, v2 ) + ';') - print(' (' + ptr + ')[3] = ' + vert( intype, outtype, v3 ) + ';') - print(' (' + ptr + ')[4] = ' + vert( intype, outtype, v4 ) + ';') - print(' (' + ptr + ')[5] = ' + vert( intype, outtype, v5 ) + ';') - -def do_point( intype, outtype, ptr, v0 ): - point( intype, outtype, ptr, v0 ) - -def do_line( intype, outtype, ptr, v0, v1, inpv, outpv ): - if inpv == outpv: - line( intype, outtype, ptr, v0, v1 ) - else: - line( intype, outtype, ptr, v1, v0 ) - -def do_tri( intype, outtype, ptr, v0, v1, v2, inpv, outpv ): - if inpv == outpv: - tri( intype, outtype, ptr, v0, v1, v2 ) - else: - if inpv == FIRST: - tri( intype, outtype, ptr, v1, v2, v0 ) - else: - tri( intype, outtype, ptr, v2, v0, v1 ) - -def do_quad( intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv ): - if inpv == LAST: - do_tri( intype, outtype, ptr+'+0', v0, v1, v3, inpv, outpv ); - do_tri( intype, outtype, ptr+'+3', v1, v2, v3, inpv, outpv ); - else: - do_tri( intype, outtype, ptr+'+0', v0, v1, v2, inpv, outpv ); - do_tri( intype, outtype, ptr+'+3', v0, v2, v3, inpv, outpv ); - -def do_lineadj( intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv ): - if inpv == outpv: - lineadj( intype, outtype, ptr, v0, v1, v2, v3 ) - else: - lineadj( intype, outtype, ptr, v3, v2, v1, v0 ) - -def do_triadj( intype, outtype, ptr, v0, v1, v2, v3, v4, v5, inpv, outpv ): - if inpv == outpv: - triadj( intype, outtype, ptr, v0, v1, v2, v3, v4, v5 ) - else: - triadj( intype, outtype, ptr, v4, v5, v0, v1, v2, v3 ) - -def name(intype, outtype, inpv, outpv, pr, prim): - if intype == GENERATE: - return 'generate_' + prim + '_' + outtype + '_' + inpv + '2' + outpv - else: - return 'translate_' + prim + '_' + intype + '2' + outtype + '_' + inpv + '2' + outpv + '_' + pr - -def preamble(intype, outtype, inpv, outpv, pr, prim): - print('static void ' + name( intype, outtype, inpv, outpv, pr, prim ) + '(') - if intype != GENERATE: - print(' const void * restrict _in,') - print(' unsigned start,') - if intype != GENERATE: - print(' unsigned in_nr,') - print(' unsigned out_nr,') - if intype != GENERATE: - print(' unsigned restart_index,') - print(' void * restrict _out )') - print('{') - if intype != GENERATE: - print(' const ' + intype + '* restrict in = (const ' + intype + '* restrict)_in;') - print(' ' + outtype + ' * restrict out = (' + outtype + '* restrict)_out;') - print(' unsigned i, j;') - print(' (void)j;') - -def postamble(): - print('}') - -def prim_restart(in_verts, out_verts, out_prims, close_func = None): - print('restart:') - print(' if (i + ' + str(in_verts) + ' > in_nr) {') - for i in range(out_prims): - for j in range(out_verts): - print(' (out+j+' + str(out_verts * i) + ')[' + str(j) + '] = restart_index;') - print(' continue;') - print(' }') - for i in range(in_verts): - print(' if (in[i + ' + str(i) + '] == restart_index) {') - print(' i += ' + str(i + 1) + ';') - - if close_func is not None: - close_func(i) - - print(' goto restart;') - print(' }') - -def points(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='points') - print(' for (i = start, j = 0; j < out_nr; j++, i++) { ') - do_point( intype, outtype, 'out+j', 'i' ); - print(' }') - postamble() - -def lines(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='lines') - print(' for (i = start, j = 0; j < out_nr; j+=2, i+=2) { ') - do_line( intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv ); - print(' }') - postamble() - -def linestrip(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='linestrip') - print(' for (i = start, j = 0; j < out_nr; j+=2, i++) { ') - do_line( intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv ); - print(' }') - postamble() - -def lineloop(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='lineloop') - print(' unsigned end = start;') - print(' for (i = start, j = 0; j < out_nr - 2; j+=2, i++) { ') - if pr == PRENABLE: - def close_func(index): - do_line( intype, outtype, 'out+j', 'end', 'start', inpv, outpv ) - print(' start = i;') - print(' end = start;') - print(' j += 2;') - - prim_restart(2, 2, 1, close_func) - - do_line( intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv ); - print(' end = i+1;') - print(' }') - do_line( intype, outtype, 'out+j', 'end', 'start', inpv, outpv ); - postamble() - -def tris(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='tris') - print(' for (i = start, j = 0; j < out_nr; j+=3, i+=3) { ') - do_tri( intype, outtype, 'out+j', 'i', 'i+1', 'i+2', inpv, outpv ); - print(' }') - postamble() - - -def tristrip(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='tristrip') - print(' for (i = start, j = 0; j < out_nr; j+=3, i++) { ') - if inpv == FIRST: - do_tri( intype, outtype, 'out+j', 'i', 'i+1+(i&1)', 'i+2-(i&1)', inpv, outpv ); - else: - do_tri( intype, outtype, 'out+j', 'i+(i&1)', 'i+1-(i&1)', 'i+2', inpv, outpv ); - print(' }') - postamble() - - -def trifan(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='trifan') - print(' for (i = start, j = 0; j < out_nr; j+=3, i++) { ') - - if pr == PRENABLE: - def close_func(index): - print(' start = i;') - prim_restart(3, 3, 1, close_func) - - if inpv == FIRST: - do_tri( intype, outtype, 'out+j', 'i+1', 'i+2', 'start', inpv, outpv ); - else: - do_tri( intype, outtype, 'out+j', 'start', 'i+1', 'i+2', inpv, outpv ); - - print(' }') - postamble() - - - -def polygon(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='polygon') - print(' for (i = start, j = 0; j < out_nr; j+=3, i++) { ') - if pr == PRENABLE: - def close_func(index): - print(' start = i;') - prim_restart(3, 3, 1, close_func) - - if inpv == FIRST: - do_tri( intype, outtype, 'out+j', 'start', 'i+1', 'i+2', inpv, outpv ); - else: - do_tri( intype, outtype, 'out+j', 'i+1', 'i+2', 'start', inpv, outpv ); - print(' }') - postamble() - - -def quads(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='quads') - print(' for (i = start, j = 0; j < out_nr; j+=6, i+=4) { ') - if pr == PRENABLE: - prim_restart(4, 3, 2) - - do_quad( intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv ); - print(' }') - postamble() - - -def quadstrip(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='quadstrip') - print(' for (i = start, j = 0; j < out_nr; j+=6, i+=2) { ') - if pr == PRENABLE: - prim_restart(4, 3, 2) - - if inpv == LAST: - do_quad( intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3', inpv, outpv ); - else: - do_quad( intype, outtype, 'out+j', 'i+0', 'i+1', 'i+3', 'i+2', inpv, outpv ); - print(' }') - postamble() - - -def linesadj(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='linesadj') - print(' for (i = start, j = 0; j < out_nr; j+=4, i+=4) { ') - do_lineadj( intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv ) - print(' }') - postamble() - - -def linestripadj(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='linestripadj') - print(' for (i = start, j = 0; j < out_nr; j+=4, i++) {') - do_lineadj( intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv ) - print(' }') - postamble() - - -def trisadj(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='trisadj') - print(' for (i = start, j = 0; j < out_nr; j+=6, i+=6) { ') - do_triadj( intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', - 'i+4', 'i+5', inpv, outpv ) - print(' }') - postamble() - - -def tristripadj(intype, outtype, inpv, outpv, pr): - preamble(intype, outtype, inpv, outpv, pr, prim='tristripadj') - print(' for (i = start, j = 0; j < out_nr; i+=2, j+=6) { ') - print(' if (i % 4 == 0) {') - print(' /* even triangle */') - do_triadj( intype, outtype, 'out+j', - 'i+0', 'i+1', 'i+2', 'i+3', 'i+4', 'i+5', inpv, outpv ) - print(' } else {') - print(' /* odd triangle */') - do_triadj( intype, outtype, 'out+j', - 'i+2', 'i-2', 'i+0', 'i+3', 'i+4', 'i+6', inpv, outpv ) - print(' }') - print(' }') - postamble() - - -def emit_funcs(): - for intype in INTYPES: - for outtype in OUTTYPES: - for inpv in (FIRST, LAST): - for outpv in (FIRST, LAST): - for pr in (PRDISABLE, PRENABLE): - if pr == PRENABLE and intype == GENERATE: - continue - points(intype, outtype, inpv, outpv, pr) - lines(intype, outtype, inpv, outpv, pr) - linestrip(intype, outtype, inpv, outpv, pr) - lineloop(intype, outtype, inpv, outpv, pr) - tris(intype, outtype, inpv, outpv, pr) - tristrip(intype, outtype, inpv, outpv, pr) - trifan(intype, outtype, inpv, outpv, pr) - quads(intype, outtype, inpv, outpv, pr) - quadstrip(intype, outtype, inpv, outpv, pr) - polygon(intype, outtype, inpv, outpv, pr) - linesadj(intype, outtype, inpv, outpv, pr) - linestripadj(intype, outtype, inpv, outpv, pr) - trisadj(intype, outtype, inpv, outpv, pr) - tristripadj(intype, outtype, inpv, outpv, pr) - -def init(intype, outtype, inpv, outpv, pr, prim): - if intype == GENERATE: - print ('generate[' + - outtype_idx[outtype] + - '][' + pv_idx[inpv] + - '][' + pv_idx[outpv] + - '][' + longprim[prim] + - '] = ' + name( intype, outtype, inpv, outpv, pr, prim ) + ';') - else: - print ('translate[' + - intype_idx[intype] + - '][' + outtype_idx[outtype] + - '][' + pv_idx[inpv] + - '][' + pv_idx[outpv] + - '][' + pr_idx[pr] + - '][' + longprim[prim] + - '] = ' + name( intype, outtype, inpv, outpv, pr, prim ) + ';') - - -def emit_all_inits(): - for intype in INTYPES: - for outtype in OUTTYPES: - for inpv in PVS: - for outpv in PVS: - for pr in PRS: - for prim in PRIMS: - init(intype, outtype, inpv, outpv, pr, prim) - -def emit_init(): - print('void u_index_init( void )') - print('{') - print(' static int firsttime = 1;') - print(' if (!firsttime) return;') - print(' firsttime = 0;') - emit_all_inits() - print('}') - - - - -def epilog(): - print('#include "indices/u_indices.c"') - - -def main(): - prolog() - emit_funcs() - emit_init() - epilog() - - -if __name__ == '__main__': - main() diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_indices_priv.h b/lib/mesa/src/gallium/auxiliary/indices/u_indices_priv.h deleted file mode 100644 index 9acf1ff48..000000000 --- a/lib/mesa/src/gallium/auxiliary/indices/u_indices_priv.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef U_INDICES_PRIV_H -#define U_INDICES_PRIV_H - -#include "pipe/p_compiler.h" -#include "u_indices.h" - -#define IN_UBYTE 0 -#define IN_USHORT 1 -#define IN_UINT 2 -#define IN_COUNT 3 - -#define OUT_USHORT 0 -#define OUT_UINT 1 -#define OUT_COUNT 2 - - -#define PRIM_COUNT (PIPE_PRIM_POLYGON + 1) - -#endif diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.c b/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.c deleted file mode 100644 index 5ce8dddfa..000000000 --- a/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.c +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <robclark@freedesktop.org> - */ - -/** - * This module converts provides a more convenient front-end to u_indices, - * etc, utils to convert primitive types supported not supported by the - * hardware. It handles binding new index buffer state, and restoring - * previous state after. To use, put something like this at the front of - * drivers pipe->draw_vbo(): - * - * // emulate unsupported primitives: - * if (info->mode needs emulating) { - * util_primconvert_save_rasterizer_state(ctx->primconvert, ctx->rasterizer); - * util_primconvert_draw_vbo(ctx->primconvert, info); - * return; - * } - * - */ - -#include "pipe/p_state.h" -#include "util/u_draw.h" -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_prim.h" -#include "util/u_prim_restart.h" -#include "util/u_upload_mgr.h" - -#include "indices/u_indices.h" -#include "indices/u_primconvert.h" - -struct primconvert_context -{ - struct pipe_context *pipe; - struct primconvert_config cfg; - unsigned api_pv; -}; - - -struct primconvert_context * -util_primconvert_create_config(struct pipe_context *pipe, - struct primconvert_config *cfg) -{ - struct primconvert_context *pc = CALLOC_STRUCT(primconvert_context); - if (!pc) - return NULL; - pc->pipe = pipe; - pc->cfg = *cfg; - return pc; -} - -struct primconvert_context * -util_primconvert_create(struct pipe_context *pipe, uint32_t primtypes_mask) -{ - struct primconvert_config cfg = { .primtypes_mask = primtypes_mask, .restart_primtypes_mask = primtypes_mask }; - return util_primconvert_create_config(pipe, &cfg); -} - -void -util_primconvert_destroy(struct primconvert_context *pc) -{ - FREE(pc); -} - -void -util_primconvert_save_rasterizer_state(struct primconvert_context *pc, - const struct pipe_rasterizer_state - *rast) -{ - util_primconvert_save_flatshade_first(pc, rast->flatshade_first); -} - -void -util_primconvert_save_flatshade_first(struct primconvert_context *pc, bool flatshade_first) -{ - /* if we actually translated the provoking vertex for the buffer, - * we would actually need to save/restore rasterizer state. As - * it is, we just need to make note of the pv. - */ - pc->api_pv = flatshade_first ? PV_FIRST : PV_LAST; -} - -void -util_primconvert_draw_vbo(struct primconvert_context *pc, - const struct pipe_draw_info *info, - unsigned drawid_offset, - const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count_bias *draws, - unsigned num_draws) -{ - struct pipe_draw_info new_info; - struct pipe_draw_start_count_bias new_draw; - struct pipe_draw_start_count_bias *direct_draws = NULL; - unsigned num_direct_draws = 0; - struct pipe_transfer *src_transfer = NULL; - u_translate_func trans_func, direct_draw_func; - u_generate_func gen_func; - const void *src = NULL; - void *dst; - unsigned ib_offset; - unsigned total_index_count = draws->count; - void *rewrite_buffer = NULL; - - if (indirect && indirect->buffer) { - /* this is stupid, but we're already doing a readback, - * so this thing may as well get the rest of the job done - */ - unsigned draw_count = 0; - struct u_indirect_params *new_draws = util_draw_indirect_read(pc->pipe, info, indirect, &draw_count); - if (!new_draws) - return; - - for (unsigned i = 0; i < draw_count; i++) - util_primconvert_draw_vbo(pc, &new_draws[i].info, drawid_offset + i, NULL, &new_draws[i].draw, 1); - free(new_draws); - return; - } - - if (num_draws > 1) { - unsigned drawid = drawid_offset; - for (unsigned i = 0; i < num_draws; i++) { - if (draws[i].count && info->instance_count) - util_primconvert_draw_vbo(pc, info, drawid, NULL, &draws[i], 1); - if (info->increment_draw_id) - drawid++; - } - return; - } - - const struct pipe_draw_start_count_bias *draw = &draws[0]; - - /* Filter out degenerate primitives, u_upload_alloc() will assert - * on size==0 so just bail: - */ - if (!info->primitive_restart && - !u_trim_pipe_prim(info->mode, (unsigned*)&draw->count)) - return; - - util_draw_init_info(&new_info); - new_info.index_bounds_valid = info->index_bounds_valid; - new_info.min_index = info->min_index; - new_info.max_index = info->max_index; - new_info.start_instance = info->start_instance; - new_info.instance_count = info->instance_count; - new_info.primitive_restart = info->primitive_restart; - new_info.restart_index = info->restart_index; - if (info->index_size) { - enum pipe_prim_type mode = new_info.mode = u_index_prim_type_convert(pc->cfg.primtypes_mask, info->mode, true); - unsigned index_size = info->index_size; - new_info.index_size = u_index_size_convert(info->index_size); - - src = info->has_user_indices ? info->index.user : NULL; - if (!src) { - src = pipe_buffer_map(pc->pipe, info->index.resource, - PIPE_MAP_READ, &src_transfer); - } - src = (const uint8_t *)src; - - /* if the resulting primitive type is not supported by the driver for primitive restart, - * or if the original primitive type was not supported by the driver, - * the draw needs to be rewritten to not use primitive restart - */ - if (info->primitive_restart && - (!(pc->cfg.restart_primtypes_mask & BITFIELD_BIT(mode)) || - !(pc->cfg.primtypes_mask & BITFIELD_BIT(info->mode)))) { - /* step 1: rewrite draw to not use primitive primitive restart; - * this pre-filters degenerate primitives - */ - direct_draws = util_prim_restart_convert_to_direct(src, info, draw, &num_direct_draws, - &new_info.min_index, &new_info.max_index, &total_index_count); - new_info.primitive_restart = false; - /* step 2: get a translator function which does nothing but handle any index size conversions - * which may or may not occur (8bit -> 16bit) - */ - u_index_translator(0xffff, - info->mode, index_size, total_index_count, - pc->api_pv, pc->api_pv, - PR_DISABLE, - &mode, &index_size, &new_draw.count, - &direct_draw_func); - /* this should always be a direct translation */ - assert(new_draw.count == total_index_count); - /* step 3: allocate a temp buffer for an intermediate rewrite step - * if no indices were found, this was a single incomplete restart and can be discarded - */ - if (total_index_count) - rewrite_buffer = malloc(index_size * total_index_count); - if (!rewrite_buffer) { - if (src_transfer) - pipe_buffer_unmap(pc->pipe, src_transfer); - return; - } - } - /* (step 4: get the actual primitive conversion translator function) */ - u_index_translator(pc->cfg.primtypes_mask, - info->mode, index_size, total_index_count, - pc->api_pv, pc->api_pv, - new_info.primitive_restart ? PR_ENABLE : PR_DISABLE, - &mode, &index_size, &new_draw.count, - &trans_func); - assert(new_info.mode == mode); - assert(new_info.index_size == index_size); - } - else { - enum pipe_prim_type mode = 0; - unsigned index_size; - - u_index_generator(pc->cfg.primtypes_mask, - info->mode, draw->start, draw->count, - pc->api_pv, pc->api_pv, - &mode, &index_size, &new_draw.count, - &gen_func); - new_info.mode = mode; - new_info.index_size = index_size; - } - - /* (step 5: allocate gpu memory sized for the FINAL index count) */ - u_upload_alloc(pc->pipe->stream_uploader, 0, new_info.index_size * new_draw.count, 4, - &ib_offset, &new_info.index.resource, &dst); - new_draw.start = ib_offset / new_info.index_size; - new_draw.index_bias = info->index_size ? draw->index_bias : 0; - - if (info->index_size) { - if (num_direct_draws) { - uint8_t *ptr = rewrite_buffer; - uint8_t *dst_ptr = dst; - /* step 6: if rewriting a prim-restart draw to direct draws, - * loop over all the direct draws in order to rewrite them into a single index buffer - * and draw in order to match the original call - */ - for (unsigned i = 0; i < num_direct_draws; i++) { - /* step 6a: get the index count for this draw, once converted */ - unsigned tmp_count = u_index_count_converted_indices(pc->cfg.primtypes_mask, true, info->mode, direct_draws[i].count); - /* step 6b: handle index size conversion using the temp buffer; no change in index count - * TODO: this step can be optimized out if the index size is known to not change - */ - direct_draw_func(src, direct_draws[i].start, direct_draws[i].count, direct_draws[i].count, info->restart_index, ptr); - /* step 6c: handle the primitive type conversion rewriting to the converted index count */ - trans_func(ptr, 0, direct_draws[i].count, tmp_count, info->restart_index, dst_ptr); - /* step 6d: increment the temp buffer and mapped final index buffer pointers */ - ptr += new_info.index_size * direct_draws[i].count; - dst_ptr += new_info.index_size * tmp_count; - } - /* step 7: set the final index count, which is the converted total index count from the original draw rewrite */ - new_draw.count = u_index_count_converted_indices(pc->cfg.primtypes_mask, true, info->mode, total_index_count); - } else - trans_func(src, draw->start, draw->count, new_draw.count, info->restart_index, dst); - - if (pc->cfg.fixed_prim_restart && new_info.primitive_restart) { - new_info.restart_index = (1ull << (new_info.index_size * 8)) - 1; - if (info->restart_index != new_info.restart_index) - util_translate_prim_restart_data(new_info.index_size, dst, dst, - new_draw.count, - info->restart_index); - } - } - else { - gen_func(draw->start, new_draw.count, dst); - } - - if (src_transfer) - pipe_buffer_unmap(pc->pipe, src_transfer); - - u_upload_unmap(pc->pipe->stream_uploader); - - /* to the translated draw: */ - pc->pipe->draw_vbo(pc->pipe, &new_info, drawid_offset, NULL, &new_draw, 1); - free(direct_draws); - free(rewrite_buffer); - - pipe_resource_reference(&new_info.index.resource, NULL); -} diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.h b/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.h deleted file mode 100644 index 0dfd5f83c..000000000 --- a/lib/mesa/src/gallium/auxiliary/indices/u_primconvert.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark <robclark@freedesktop.org> - */ - -#ifndef U_PRIMCONVERT_H_ -#define U_PRIMCONVERT_H_ - -#include "pipe/p_state.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct primconvert_context; - -struct primconvert_config { - uint32_t primtypes_mask; - uint32_t restart_primtypes_mask; - bool fixed_prim_restart; -}; - -struct primconvert_context *util_primconvert_create(struct pipe_context *pipe, - uint32_t primtypes_mask); -struct primconvert_context *util_primconvert_create_config(struct pipe_context *pipe, - struct primconvert_config *cfg); - -void util_primconvert_destroy(struct primconvert_context *pc); -void util_primconvert_save_rasterizer_state(struct primconvert_context *pc, - const struct pipe_rasterizer_state - *rast); -void -util_primconvert_save_flatshade_first(struct primconvert_context *pc, bool flatshade_first); -void util_primconvert_draw_vbo(struct primconvert_context *pc, - const struct pipe_draw_info *info, - unsigned drawid_offset, - const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count_bias *draws, - unsigned num_draws); - -#ifdef __cplusplus -} -#endif - -#endif /* U_PRIMCONVERT_H_ */ diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_unfilled_gen.py b/lib/mesa/src/gallium/auxiliary/indices/u_unfilled_gen.py deleted file mode 100644 index 0cb3f2121..000000000 --- a/lib/mesa/src/gallium/auxiliary/indices/u_unfilled_gen.py +++ /dev/null @@ -1,272 +0,0 @@ -copyright = ''' -/* - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -''' - -GENERATE, UBYTE, USHORT, UINT = 'generate', 'ubyte', 'ushort', 'uint' -FIRST, LAST = 'first', 'last' - -INTYPES = (GENERATE, UBYTE, USHORT, UINT) -OUTTYPES = (USHORT, UINT) -PRIMS=('tris', - 'trifan', - 'tristrip', - 'quads', - 'quadstrip', - 'polygon', - 'trisadj', - 'tristripadj') - -LONGPRIMS=('PIPE_PRIM_TRIANGLES', - 'PIPE_PRIM_TRIANGLE_FAN', - 'PIPE_PRIM_TRIANGLE_STRIP', - 'PIPE_PRIM_QUADS', - 'PIPE_PRIM_QUAD_STRIP', - 'PIPE_PRIM_POLYGON', - 'PIPE_PRIM_TRIANGLES_ADJACENCY', - 'PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY') - -longprim = dict(zip(PRIMS, LONGPRIMS)) -intype_idx = dict(ubyte='IN_UBYTE', ushort='IN_USHORT', uint='IN_UINT') -outtype_idx = dict(ushort='OUT_USHORT', uint='OUT_UINT') - - -def prolog(): - print('''/* File automatically generated by u_unfilled_gen.py */''') - print(copyright) - print(r''' - -/** - * @file - * Functions to translate and generate index lists - */ - -#include "indices/u_indices.h" -#include "indices/u_indices_priv.h" -#include "pipe/p_compiler.h" -#include "util/u_debug.h" -#include "pipe/p_defines.h" -#include "util/u_memory.h" - - -static unsigned out_size_idx( unsigned index_size ) -{ - switch (index_size) { - case 4: return OUT_UINT; - case 2: return OUT_USHORT; - default: assert(0); return OUT_USHORT; - } -} - -static unsigned in_size_idx( unsigned index_size ) -{ - switch (index_size) { - case 4: return IN_UINT; - case 2: return IN_USHORT; - case 1: return IN_UBYTE; - default: assert(0); return IN_UBYTE; - } -} - - -static u_generate_func generate_line[OUT_COUNT][PRIM_COUNT]; -static u_translate_func translate_line[IN_COUNT][OUT_COUNT][PRIM_COUNT]; - -''') - -def vert( intype, outtype, v0 ): - if intype == GENERATE: - return '(' + outtype + ')(' + v0 + ')' - else: - return '(' + outtype + ')in[' + v0 + ']' - -def line( intype, outtype, ptr, v0, v1 ): - print(' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';') - print(' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';') - -# XXX: have the opportunity here to avoid over-drawing shared lines in -# tristrips, fans, etc, by integrating this into the calling functions -# and only emitting each line at most once. -# -def do_tri( intype, outtype, ptr, v0, v1, v2 ): - line( intype, outtype, ptr, v0, v1 ) - line( intype, outtype, ptr + '+2', v1, v2 ) - line( intype, outtype, ptr + '+4', v2, v0 ) - -def do_quad( intype, outtype, ptr, v0, v1, v2, v3 ): - line( intype, outtype, ptr, v0, v1 ) - line( intype, outtype, ptr + '+2', v1, v2 ) - line( intype, outtype, ptr + '+4', v2, v3 ) - line( intype, outtype, ptr + '+6', v3, v0 ) - -def name(intype, outtype, prim): - if intype == GENERATE: - return 'generate_' + prim + '_' + outtype - else: - return 'translate_' + prim + '_' + intype + '2' + outtype - -def preamble(intype, outtype, prim): - print('static void ' + name( intype, outtype, prim ) + '(') - if intype != GENERATE: - print(' const void * _in,') - print(' unsigned start,') - if intype != GENERATE: - print(' unsigned in_nr,') - print(' unsigned out_nr,') - if intype != GENERATE: - print(' unsigned restart_index,') - print(' void *_out )') - print('{') - if intype != GENERATE: - print(' const ' + intype + '*in = (const ' + intype + '*)_in;') - print(' ' + outtype + ' *out = (' + outtype + '*)_out;') - print(' unsigned i, j;') - print(' (void)j;') - -def postamble(): - print('}') - - -def tris(intype, outtype): - preamble(intype, outtype, prim='tris') - print(' for (i = start, j = 0; j < out_nr; j+=6, i+=3) { ') - do_tri( intype, outtype, 'out+j', 'i', 'i+1', 'i+2' ); - print(' }') - postamble() - - -def tristrip(intype, outtype): - preamble(intype, outtype, prim='tristrip') - print(' for (i = start, j = 0; j < out_nr; j+=6, i++) { ') - do_tri( intype, outtype, 'out+j', 'i', 'i+1/*+(i&1)*/', 'i+2/*-(i&1)*/' ); - print(' }') - postamble() - - -def trifan(intype, outtype): - preamble(intype, outtype, prim='trifan') - print(' for (i = start, j = 0; j < out_nr; j+=6, i++) { ') - do_tri( intype, outtype, 'out+j', '0', 'i+1', 'i+2' ); - print(' }') - postamble() - - - -def polygon(intype, outtype): - preamble(intype, outtype, prim='polygon') - print(' for (i = start, j = 0; j < out_nr; j+=2, i++) { ') - line( intype, outtype, 'out+j', 'i', '(i+1)%(out_nr/2)' ) - print(' }') - postamble() - - -def quads(intype, outtype): - preamble(intype, outtype, prim='quads') - print(' for (i = start, j = 0; j < out_nr; j+=8, i+=4) { ') - do_quad( intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3' ); - print(' }') - postamble() - - -def quadstrip(intype, outtype): - preamble(intype, outtype, prim='quadstrip') - print(' for (i = start, j = 0; j < out_nr; j+=8, i+=2) { ') - do_quad( intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3' ); - print(' }') - postamble() - - -def trisadj(intype, outtype): - preamble(intype, outtype, prim='trisadj') - print(' for (i = start, j = 0; j < out_nr; j+=6, i+=6) { ') - do_tri( intype, outtype, 'out+j', 'i', 'i+2', 'i+4' ); - print(' }') - postamble() - - -def tristripadj(intype, outtype): - preamble(intype, outtype, prim='tristripadj') - print(' for (i = start, j = 0; j < out_nr; j+=6, i+=2) { ') - do_tri( intype, outtype, 'out+j', 'i', 'i+2', 'i+4' ); - print(' }') - postamble() - - -def emit_funcs(): - for intype in INTYPES: - for outtype in OUTTYPES: - tris(intype, outtype) - tristrip(intype, outtype) - trifan(intype, outtype) - quads(intype, outtype) - quadstrip(intype, outtype) - polygon(intype, outtype) - trisadj(intype, outtype) - tristripadj(intype, outtype) - -def init(intype, outtype, prim): - if intype == GENERATE: - print(('generate_line[' + - outtype_idx[outtype] + - '][' + longprim[prim] + - '] = ' + name( intype, outtype, prim ) + ';')) - else: - print(('translate_line[' + - intype_idx[intype] + - '][' + outtype_idx[outtype] + - '][' + longprim[prim] + - '] = ' + name( intype, outtype, prim ) + ';')) - - -def emit_all_inits(): - for intype in INTYPES: - for outtype in OUTTYPES: - for prim in PRIMS: - init(intype, outtype, prim) - -def emit_init(): - print('void u_unfilled_init( void )') - print('{') - print(' static int firsttime = 1;') - print(' if (!firsttime) return;') - print(' firsttime = 0;') - emit_all_inits() - print('}') - - - - -def epilog(): - print('#include "indices/u_unfilled_indices.c"') - - -def main(): - prolog() - emit_funcs() - emit_init() - epilog() - - -if __name__ == '__main__': - main() diff --git a/lib/mesa/src/gallium/auxiliary/indices/u_unfilled_indices.c b/lib/mesa/src/gallium/auxiliary/indices/u_unfilled_indices.c deleted file mode 100644 index 121877a60..000000000 --- a/lib/mesa/src/gallium/auxiliary/indices/u_unfilled_indices.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "u_indices.h" -#include "u_indices_priv.h" - - -static void translate_ubyte_ushort( const void *in, - unsigned start, - unsigned in_nr, - unsigned out_nr, - unsigned restart_index, - void *out ) -{ - const ubyte *in_ub = (const ubyte *)in; - ushort *out_us = (ushort *)out; - unsigned i; - for (i = 0; i < out_nr; i++) - out_us[i] = (ushort) in_ub[i+start]; -} - -static void translate_memcpy_ushort( const void *in, - unsigned start, - unsigned in_nr, - unsigned out_nr, - unsigned restart_index, - void *out ) -{ - memcpy(out, &((short *)in)[start], out_nr*sizeof(short)); -} - -static void translate_memcpy_uint( const void *in, - unsigned start, - unsigned in_nr, - unsigned out_nr, - unsigned restart_index, - void *out ) -{ - memcpy(out, &((int *)in)[start], out_nr*sizeof(int)); -} - - -static void generate_linear_ushort( unsigned start, - unsigned nr, - void *out ) -{ - ushort *out_us = (ushort *)out; - unsigned i; - for (i = 0; i < nr; i++) - out_us[i] = (ushort)(i + start); -} - -static void generate_linear_uint( unsigned start, - unsigned nr, - void *out ) -{ - unsigned *out_ui = (unsigned *)out; - unsigned i; - for (i = 0; i < nr; i++) - out_ui[i] = i + start; -} - - -/** - * Given a primitive type and number of vertices, return the number of vertices - * needed to draw the primitive with fill mode = PIPE_POLYGON_MODE_LINE using - * separate lines (PIPE_PRIM_LINES). - */ -static unsigned nr_lines( unsigned prim, - unsigned nr ) -{ - switch (prim) { - case PIPE_PRIM_TRIANGLES: - return (nr / 3) * 6; - case PIPE_PRIM_TRIANGLE_STRIP: - return (nr - 2) * 6; - case PIPE_PRIM_TRIANGLE_FAN: - return (nr - 2) * 6; - case PIPE_PRIM_QUADS: - return (nr / 4) * 8; - case PIPE_PRIM_QUAD_STRIP: - return (nr - 2) / 2 * 8; - case PIPE_PRIM_POLYGON: - return 2 * nr; /* a line (two verts) for each polygon edge */ - default: - assert(0); - return 0; - } -} - - - -int u_unfilled_translator( unsigned prim, - unsigned in_index_size, - unsigned nr, - unsigned unfilled_mode, - unsigned *out_prim, - unsigned *out_index_size, - unsigned *out_nr, - u_translate_func *out_translate ) -{ - unsigned in_idx; - unsigned out_idx; - - u_unfilled_init(); - - in_idx = in_size_idx(in_index_size); - *out_index_size = (in_index_size == 4) ? 4 : 2; - out_idx = out_size_idx(*out_index_size); - - if (unfilled_mode == PIPE_POLYGON_MODE_POINT) - { - *out_prim = PIPE_PRIM_POINTS; - *out_nr = nr; - - switch (in_index_size) - { - case 1: - *out_translate = translate_ubyte_ushort; - return U_TRANSLATE_NORMAL; - case 2: - *out_translate = translate_memcpy_uint; - return U_TRANSLATE_MEMCPY; - case 4: - *out_translate = translate_memcpy_ushort; - return U_TRANSLATE_MEMCPY; - default: - *out_translate = translate_memcpy_uint; - *out_nr = 0; - assert(0); - return U_TRANSLATE_ERROR; - } - } - else { - assert(unfilled_mode == PIPE_POLYGON_MODE_LINE); - *out_prim = PIPE_PRIM_LINES; - *out_translate = translate_line[in_idx][out_idx][prim]; - *out_nr = nr_lines( prim, nr ); - return U_TRANSLATE_NORMAL; - } -} - - -/** - * Utility for converting unfilled polygons into points, lines, triangles. - * Few drivers have direct support for OpenGL's glPolygonMode. - * This function helps with converting triangles into points or lines - * when the front and back fill modes are the same. When there's - * different front/back fill modes, that can be handled with the - * 'draw' module. - */ -int u_unfilled_generator( unsigned prim, - unsigned start, - unsigned nr, - unsigned unfilled_mode, - unsigned *out_prim, - unsigned *out_index_size, - unsigned *out_nr, - u_generate_func *out_generate ) -{ - unsigned out_idx; - - u_unfilled_init(); - - *out_index_size = ((start + nr) > 0xfffe) ? 4 : 2; - out_idx = out_size_idx(*out_index_size); - - if (unfilled_mode == PIPE_POLYGON_MODE_POINT) { - - if (*out_index_size == 4) - *out_generate = generate_linear_uint; - else - *out_generate = generate_linear_ushort; - - *out_prim = PIPE_PRIM_POINTS; - *out_nr = nr; - return U_GENERATE_LINEAR; - } - else { - assert(unfilled_mode == PIPE_POLYGON_MODE_LINE); - *out_prim = PIPE_PRIM_LINES; - *out_generate = generate_line[out_idx][prim]; - *out_nr = nr_lines( prim, nr ); - - return U_GENERATE_REUSABLE; - } -} - diff --git a/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c b/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c index 58841029c..ad1839658 100644 --- a/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/lib/mesa/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -100,7 +100,7 @@ struct ttn_compile { #define ttn_channel(b, src, swiz) \ nir_channel(b, src, TGSI_SWIZZLE_##swiz) -gl_varying_slot +static gl_varying_slot tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index) { switch (semantic) { @@ -1280,7 +1280,7 @@ get_image_var(struct ttn_compile *c, int binding, if (!var) { const struct glsl_type *type = glsl_image_type(dim, is_array, base_type); - var = nir_variable_create(c->build.shader, nir_var_uniform, type, "image"); + var = nir_variable_create(c->build.shader, nir_var_image, type, "image"); var->data.binding = binding; var->data.explicit_binding = true; var->data.access = access; @@ -2273,9 +2273,9 @@ ttn_read_pipe_caps(struct ttn_compile *c, struct pipe_screen *screen) { c->cap_samplers_as_deref = screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF); - c->cap_face_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL); - c->cap_position_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL); - c->cap_point_is_sysval = screen->get_param(screen, PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL); + c->cap_face_is_sysval = screen->get_param(screen, PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL); + c->cap_position_is_sysval = screen->get_param(screen, PIPE_CAP_FS_POSITION_IS_SYSVAL); + c->cap_point_is_sysval = screen->get_param(screen, PIPE_CAP_FS_POINT_IS_SYSVAL); } /** @@ -2324,6 +2324,10 @@ ttn_compile_init(const void *tgsi_tokens, s->info.num_ubos = util_last_bit(scan.const_buffers_declared >> 1); s->info.num_images = util_last_bit(scan.images_declared); s->info.num_textures = util_last_bit(scan.samplers_declared); + s->info.internal = false; + + /* Default for TGSI is separate, this is assumed throughout the tree */ + s->info.separate_shader = true; for (unsigned i = 0; i < TGSI_PROPERTY_COUNT; i++) { unsigned value = scan.properties[i]; @@ -2563,7 +2567,6 @@ tgsi_to_nir(const void *tgsi_tokens, struct nir_shader *s = NULL; uint8_t key[CACHE_KEY_SIZE]; unsigned processor; - bool debug = env_var_as_boolean("TGSI_TO_NIR_DEBUG", false); if (allow_disk_cache) cache = screen->get_disk_shader_cache(screen); @@ -2581,7 +2584,11 @@ tgsi_to_nir(const void *tgsi_tokens, if (s) return s; - if (debug) { +#ifndef NDEBUG + nir_process_debug_variable(); +#endif + + if (NIR_DEBUG(TGSI)) { fprintf(stderr, "TGSI before translation to NIR:\n"); tgsi_dump(tgsi_tokens, 0); } @@ -2593,7 +2600,7 @@ tgsi_to_nir(const void *tgsi_tokens, ttn_finalize_nir(c, screen); ralloc_free(c); - if (debug) { + if (NIR_DEBUG(TGSI)) { mesa_logi("NIR after translation from TGSI:\n"); nir_log_shaderi(s); } diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader.h index 8c532e601..e2522ddf3 100644 --- a/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader.h +++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader.h @@ -144,6 +144,20 @@ pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, const struct drisw_loader_funcs *drisw_lf); /** + * Initialize vk dri device give the drisw_loader_funcs. + * + * This function is platform-specific. + * + * Function does not take ownership of the fd, but duplicates it locally. + * The local fd is closed during pipe_loader_release. + * + * \sa pipe_loader_probe + */ +bool +pipe_loader_vk_probe_dri(struct pipe_loader_device **devs, + const struct drisw_loader_funcs *drisw_lf); + +/** * Initialize a kms backed sw device given an fd. * * This function is platform-specific. diff --git a/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c index 85f8d94b8..c042b92cc 100644 --- a/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c +++ b/lib/mesa/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c @@ -43,7 +43,7 @@ #include "frontend/drisw_api.h" #include "frontend/sw_driver.h" #include "frontend/sw_winsys.h" - +#include "util/driconf.h" struct pipe_loader_sw_device { struct pipe_loader_device base; @@ -58,6 +58,9 @@ struct pipe_loader_sw_device { #define pipe_loader_sw_device(dev) ((struct pipe_loader_sw_device *)dev) static const struct pipe_loader_ops pipe_loader_sw_ops; +#if defined(HAVE_PIPE_LOADER_DRI) && defined(HAVE_ZINK) +static const struct pipe_loader_ops pipe_loader_vk_ops; +#endif #ifdef GALLIUM_STATIC_TARGETS static const struct sw_driver_descriptor driver_descriptors = { @@ -90,6 +93,35 @@ static const struct sw_driver_descriptor driver_descriptors = { }; #endif +#if defined(GALLIUM_STATIC_TARGETS) && defined(HAVE_ZINK) && defined(HAVE_PIPE_LOADER_DRI) +static const struct sw_driver_descriptor kopper_driver_descriptors = { + .create_screen = sw_screen_create_zink, + .winsys = { + { + .name = "dri", + .create_winsys = dri_create_sw_winsys, + }, +#ifdef HAVE_PIPE_LOADER_KMS + { + .name = "kms_dri", + .create_winsys = kms_dri_create_winsys, + }, +#endif +#ifndef __ANDROID__ + { + .name = "null", + .create_winsys = null_sw_create, + }, + { + .name = "wrapped", + .create_winsys = wrapper_sw_winsys_wrap_pipe_screen, + }, +#endif + { 0 }, + } +}; +#endif + static bool pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev) { @@ -124,6 +156,42 @@ pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev) return true; } +#if defined(HAVE_PIPE_LOADER_DRI) && defined(HAVE_ZINK) +static bool +pipe_loader_vk_probe_init_common(struct pipe_loader_sw_device *sdev) +{ + sdev->base.type = PIPE_LOADER_DEVICE_PLATFORM; + sdev->base.driver_name = "kopper"; + sdev->base.ops = &pipe_loader_vk_ops; + sdev->fd = -1; + +#ifdef GALLIUM_STATIC_TARGETS + sdev->dd = &kopper_driver_descriptors; + if (!sdev->dd) + return false; +#else + const char *search_dir = getenv("GALLIUM_PIPE_SEARCH_DIR"); + if (search_dir == NULL) + search_dir = PIPE_SEARCH_DIR; + + sdev->lib = pipe_loader_find_module("swrast", search_dir); + if (!sdev->lib) + return false; + + sdev->dd = (const struct sw_driver_descriptor *) + util_dl_get_proc_address(sdev->lib, "swrast_driver_descriptor"); + + if (!sdev->dd){ + util_dl_close(sdev->lib); + sdev->lib = NULL; + return false; + } +#endif + + return true; +} +#endif + static void pipe_loader_sw_probe_teardown_common(struct pipe_loader_sw_device *sdev) { @@ -163,6 +231,37 @@ fail: FREE(sdev); return false; } +#ifdef HAVE_ZINK +bool +pipe_loader_vk_probe_dri(struct pipe_loader_device **devs, const struct drisw_loader_funcs *drisw_lf) +{ + struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device); + int i; + + if (!sdev) + return false; + + if (!pipe_loader_vk_probe_init_common(sdev)) + goto fail; + + for (i = 0; sdev->dd->winsys[i].name; i++) { + if (strcmp(sdev->dd->winsys[i].name, "dri") == 0) { + sdev->ws = sdev->dd->winsys[i].create_winsys(drisw_lf); + break; + } + } + if (!sdev->ws) + goto fail; + + *devs = &sdev->base; + return true; + +fail: + pipe_loader_sw_probe_teardown_common(sdev); + FREE(sdev); + return false; +} +#endif #endif #ifdef HAVE_PIPE_LOADER_KMS @@ -303,6 +402,19 @@ pipe_loader_sw_get_driconf(struct pipe_loader_device *dev, unsigned *count) return NULL; } +#if defined(HAVE_PIPE_LOADER_DRI) && defined(HAVE_ZINK) +static const driOptionDescription zink_driconf[] = { + #include "zink/driinfo_zink.h" +}; + +static const struct driOptionDescription * +pipe_loader_vk_get_driconf(struct pipe_loader_device *dev, unsigned *count) +{ + *count = ARRAY_SIZE(zink_driconf); + return zink_driconf; +} +#endif + static struct pipe_screen * pipe_loader_sw_create_screen(struct pipe_loader_device *dev, const struct pipe_screen_config *config, bool sw_vk) @@ -310,7 +422,7 @@ pipe_loader_sw_create_screen(struct pipe_loader_device *dev, struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev); struct pipe_screen *screen; - screen = sdev->dd->create_screen(sdev->ws, sw_vk); + screen = sdev->dd->create_screen(sdev->ws, config, sw_vk); if (!screen) sdev->ws->destroy(sdev->ws); @@ -322,3 +434,11 @@ static const struct pipe_loader_ops pipe_loader_sw_ops = { .get_driconf = pipe_loader_sw_get_driconf, .release = pipe_loader_sw_release }; + +#if defined(HAVE_PIPE_LOADER_DRI) && defined(HAVE_ZINK) +static const struct pipe_loader_ops pipe_loader_vk_ops = { + .create_screen = pipe_loader_sw_create_screen, + .get_driconf = pipe_loader_vk_get_driconf, + .release = pipe_loader_sw_release +}; +#endif diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.c b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.c index 83c1a597f..cbbef09c3 100644 --- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.c +++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.c @@ -95,6 +95,17 @@ pb_slabs_reclaim_locked(struct pb_slabs *slabs) } } +static void +pb_slabs_reclaim_all_locked(struct pb_slabs *slabs) +{ + struct pb_slab_entry *entry, *next; + LIST_FOR_EACH_ENTRY_SAFE(entry, next, &slabs->reclaim, head) { + if (slabs->can_reclaim(slabs->priv, entry)) { + pb_slab_reclaim(slabs, entry); + } + } +} + /* Allocate a slab entry of the given size from the given heap. * * This will try to re-use entries that have previously been freed. However, @@ -105,7 +116,7 @@ pb_slabs_reclaim_locked(struct pb_slabs *slabs) * Note that slab_free can also be called by this function. */ struct pb_slab_entry * -pb_slab_alloc(struct pb_slabs *slabs, unsigned size, unsigned heap) +pb_slab_alloc_reclaimed(struct pb_slabs *slabs, unsigned size, unsigned heap, bool reclaim_all) { unsigned order = MAX2(slabs->min_order, util_logbase2_ceil(size)); unsigned group_index; @@ -136,8 +147,12 @@ pb_slab_alloc(struct pb_slabs *slabs, unsigned size, unsigned heap) * entries, try reclaiming entries. */ if (list_is_empty(&group->slabs) || - list_is_empty(&LIST_ENTRY(struct pb_slab, group->slabs.next, head)->free)) - pb_slabs_reclaim_locked(slabs); + list_is_empty(&LIST_ENTRY(struct pb_slab, group->slabs.next, head)->free)) { + if (reclaim_all) + pb_slabs_reclaim_all_locked(slabs); + else + pb_slabs_reclaim_locked(slabs); + } /* Remove slabs without free entries. */ while (!list_is_empty(&group->slabs)) { @@ -174,6 +189,12 @@ pb_slab_alloc(struct pb_slabs *slabs, unsigned size, unsigned heap) return entry; } +struct pb_slab_entry * +pb_slab_alloc(struct pb_slabs *slabs, unsigned size, unsigned heap) +{ + return pb_slab_alloc_reclaimed(slabs, size, heap, false); +} + /* Free the given slab entry. * * The entry may still be in use e.g. by in-flight command submissions. The diff --git a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.h b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.h index c6b115eca..e8e8f7687 100644 --- a/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.h +++ b/lib/mesa/src/gallium/auxiliary/pipebuffer/pb_slab.h @@ -135,6 +135,9 @@ struct pb_slabs }; struct pb_slab_entry * +pb_slab_alloc_reclaimed(struct pb_slabs *slabs, unsigned size, unsigned heap, bool reclaim_all); + +struct pb_slab_entry * pb_slab_alloc(struct pb_slabs *slabs, unsigned size, unsigned heap); void diff --git a/lib/mesa/src/gallium/auxiliary/renderonly/renderonly.h b/lib/mesa/src/gallium/auxiliary/renderonly/renderonly.h index 0d08a16e3..0eafbf548 100644 --- a/lib/mesa/src/gallium/auxiliary/renderonly/renderonly.h +++ b/lib/mesa/src/gallium/auxiliary/renderonly/renderonly.h @@ -55,6 +55,21 @@ struct renderonly { * resource into the scanout hw. * - Make it easier for a gallium driver to detect if anything special needs * to be done in flush_resource(..) like a resolve to linear. + * + * When the screen has renderonly enabled, drivers need to follow these + * rules: + * - Create the scanout resource in resource_create and + * resource_create_with_modifiers if PIPE_BIND_SCANOUT is set. Drivers + * can fail if the scanout resource couldn't be created. + * - Try to import the scanout resource in resource_from_handle with + * renderonly_create_gpu_import_for_resource. Drivers MUST NOT fail if + * the scanout resource couldn't be created. + * - In a resource_get_handle call for WINSYS_HANDLE_TYPE_KMS, use + * renderonly_get_handle with the scanout resource, even if the scanout + * resource is NULL. Drivers MUST NOT return their own resource here, + * because the GEM handle will not be valid for the caller's DRM FD. + * - Implement resource_get_params for at least PIPE_RESOURCE_PARAM_STRIDE, + * PIPE_RESOURCE_PARAM_OFFSET and PIPE_RESOURCE_PARAM_MODIFIER. */ struct renderonly_scanout *(*create_for_resource)(struct pipe_resource *rsc, struct renderonly *ro, diff --git a/lib/mesa/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/lib/mesa/src/gallium/auxiliary/rtasm/rtasm_execmem.c index ca84b594b..ab0938dfd 100644 --- a/lib/mesa/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/lib/mesa/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -86,7 +86,7 @@ init_heap(void) exec_heap = u_mmInit( 0, EXEC_HEAP_SIZE ); if (!exec_mem) - exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE, + exec_mem = (unsigned char *) mmap(NULL, EXEC_HEAP_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); diff --git a/lib/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h b/lib/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h index 6167a94d8..601396a54 100644 --- a/lib/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h +++ b/lib/mesa/src/gallium/auxiliary/target-helpers/drm_helper.h @@ -274,21 +274,31 @@ DRM_DRIVER_DESCRIPTOR_STUB(msm) #endif DRM_DRIVER_DESCRIPTOR_ALIAS(msm, kgsl, NULL, 0) -#ifdef GALLIUM_VIRGL +#if defined(GALLIUM_VIRGL) || (defined(GALLIUM_FREEDRENO) && !defined(PIPE_LOADER_DYNAMIC)) #include "virgl/drm/virgl_drm_public.h" #include "virgl/virgl_public.h" static struct pipe_screen * pipe_virtio_gpu_create_screen(int fd, const struct pipe_screen_config *config) { - struct pipe_screen *screen; + struct pipe_screen *screen = NULL; - screen = virgl_drm_screen_create(fd, config); + /* Try native guest driver(s) first, and then fallback to virgl: */ +#ifdef GALLIUM_FREEDRENO + if (!screen) + screen = fd_drm_screen_create(fd, NULL, config); +#endif +#ifdef GALLIUM_VIRGL + if (!screen) + screen = virgl_drm_screen_create(fd, config); +#endif return screen ? debug_screen_wrap(screen) : NULL; } const driOptionDescription virgl_driconf[] = { +#ifdef GALLIUM_VIRGL #include "virgl/virgl_driinfo.h.in" +#endif }; DRM_DRIVER_DESCRIPTOR(virtio_gpu, virgl_driconf, ARRAY_SIZE(virgl_driconf)) diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_aa_point.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_aa_point.c index cdd4fef25..58f610fc4 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_aa_point.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_aa_point.c @@ -276,12 +276,6 @@ tgsi_add_aa_point(const struct tgsi_token *tokens_in, struct aa_transform_context transform; const uint num_new_tokens = 200; /* should be enough */ const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens; - struct tgsi_token *new_tokens; - - /* allocate new tokens buffer */ - new_tokens = tgsi_alloc_tokens(new_len); - if (!new_tokens) - return NULL; /* setup transformation context */ memset(&transform, 0, sizeof(transform)); @@ -302,8 +296,5 @@ tgsi_add_aa_point(const struct tgsi_token *tokens_in, transform.num_imm = 0; transform.num_input = 0; - /* transform the shader */ - tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base); - - return new_tokens; + return tgsi_transform_shader(tokens_in, new_len, &transform.base); } diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c index a8446ff27..ae1daa6dc 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -975,23 +975,6 @@ static const union tgsi_exec_channel M128Vec = { {-128.0f, -128.0f, -128.0f, -128.0f} }; - -/** - * Assert that none of the float values in 'chan' are infinite or NaN. - * NaN and Inf may occur normally during program execution and should - * not lead to crashes, etc. But when debugging, it's helpful to catch - * them. - */ -static inline void -check_inf_or_nan(const union tgsi_exec_channel *chan) -{ - assert(!util_is_inf_or_nan((chan)->f[0])); - assert(!util_is_inf_or_nan((chan)->f[1])); - assert(!util_is_inf_or_nan((chan)->f[2])); - assert(!util_is_inf_or_nan((chan)->f[3])); -} - - #ifdef DEBUG static void print_chan(const char *msg, const union tgsi_exec_channel *chan) @@ -1518,8 +1501,6 @@ get_index_registers(const struct tgsi_exec_machine *mach, union tgsi_exec_channel *index, union tgsi_exec_channel *index2D) { - uint swizzle; - /* We start with a direct index into a register file. * * file[1], @@ -1543,35 +1524,17 @@ get_index_registers(const struct tgsi_exec_machine *mach, * .x = Indirect.SwizzleX */ if (reg->Register.Indirect) { - union tgsi_exec_channel index2; - union tgsi_exec_channel indir_index; const uint execmask = mach->ExecMask; - uint i; - /* which address register (always zero now) */ - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->Indirect.Index; - /* get current value of address register[swizzle] */ - swizzle = reg->Indirect.Swizzle; - fetch_src_file_channel(mach, - reg->Indirect.File, - swizzle, - &index2, - &ZeroVec, - &indir_index); - - /* add value of address register to the offset */ - index->i[0] += indir_index.i[0]; - index->i[1] += indir_index.i[1]; - index->i[2] += indir_index.i[2]; - index->i[3] += indir_index.i[3]; + assert(reg->Indirect.File == TGSI_FILE_ADDRESS); + const union tgsi_exec_channel *addr = &mach->Addrs[reg->Indirect.Index].xyzw[reg->Indirect.Swizzle]; + for (int i = 0; i < TGSI_QUAD_SIZE; i++) + index->i[i] += addr->u[i]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. */ - for (i = 0; i < TGSI_QUAD_SIZE; i++) { + for (int i = 0; i < TGSI_QUAD_SIZE; i++) { if ((execmask & (1 << i)) == 0) index->i[i] = 0; } @@ -1603,33 +1566,17 @@ get_index_registers(const struct tgsi_exec_machine *mach, * .y = DimIndirect.SwizzleX */ if (reg->Dimension.Indirect) { - union tgsi_exec_channel index2; - union tgsi_exec_channel indir_index; const uint execmask = mach->ExecMask; - uint i; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->DimIndirect.Index; - - swizzle = reg->DimIndirect.Swizzle; - fetch_src_file_channel(mach, - reg->DimIndirect.File, - swizzle, - &index2, - &ZeroVec, - &indir_index); - index2D->i[0] += indir_index.i[0]; - index2D->i[1] += indir_index.i[1]; - index2D->i[2] += indir_index.i[2]; - index2D->i[3] += indir_index.i[3]; + assert(reg->DimIndirect.File == TGSI_FILE_ADDRESS); + const union tgsi_exec_channel *addr = &mach->Addrs[reg->DimIndirect.Index].xyzw[reg->DimIndirect.Swizzle]; + for (int i = 0; i < TGSI_QUAD_SIZE; i++) + index2D->i[i] += addr->u[i]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. */ - for (i = 0; i < TGSI_QUAD_SIZE; i++) { + for (int i = 0; i < TGSI_QUAD_SIZE; i++) { if ((execmask & (1 << i)) == 0) { index2D->i[i] = 0; } @@ -3899,16 +3846,31 @@ exec_store_img(struct tgsi_exec_machine *mach, rgba); } + static void -exec_store_buf(struct tgsi_exec_machine *mach, +exec_store_membuf(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { uint32_t unit = fetch_store_img_unit(mach, &inst->Dst[0]); uint32_t size; - char *ptr = mach->Buffer->lookup(mach->Buffer, unit, &size); int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; + const char *ptr; + switch (inst->Dst[0].Register.File) { + case TGSI_FILE_MEMORY: + ptr = mach->LocalMem; + size = mach->LocalMemSize; + break; + + case TGSI_FILE_BUFFER: + ptr = mach->Buffer->lookup(mach->Buffer, unit, &size); + break; + + default: + unreachable("unsupported TGSI_OPCODE_STORE file"); + } + union tgsi_exec_channel offset; IFETCH(&offset, 0, TGSI_CHAN_X); @@ -3933,46 +3895,13 @@ exec_store_buf(struct tgsi_exec_machine *mach, } static void -exec_store_mem(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel r[3]; - union tgsi_exec_channel value[4]; - uint i, chan; - char *ptr = mach->LocalMem; - int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; - - IFETCH(&r[0], 0, TGSI_CHAN_X); - - for (i = 0; i < 4; i++) { - FETCH(&value[i], 1, TGSI_CHAN_X + i); - } - - if (r[0].u[0] >= mach->LocalMemSize) - return; - ptr += r[0].u[0]; - - for (i = 0; i < TGSI_QUAD_SIZE; i++) { - if (execmask & (1 << i)) { - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - memcpy(ptr + (chan * 4), &value[chan].u[0], 4); - } - } - } - } -} - -static void exec_store(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) exec_store_img(mach, inst); - else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) - exec_store_buf(mach, inst); - else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) - exec_store_mem(mach, inst); + else + exec_store_membuf(mach, inst); } static void diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c index db2302669..42f888016 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_lowering.c @@ -1573,12 +1573,10 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config, newlen += 2 * numtmp; newlen += 5; /* immediate */ - newtoks = tgsi_alloc_tokens(newlen); + newtoks = tgsi_transform_shader(tokens, newlen, &ctx.base); if (!newtoks) return NULL; - tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); - tgsi_scan_shader(newtoks, info); #if 0 /* debug */ diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index 1aff51381..684c06144 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -204,12 +204,17 @@ OP11(INTERP_CENTROID) OP12(INTERP_SAMPLE) OP12(INTERP_OFFSET) +OP11(FBFETCH) + +OP10(CLOCK) + #undef OP00 #undef OP01 #undef OP10 #undef OP11 #undef OP12 #undef OP13 +#undef OP14 #undef OP00_LBL #undef OP01_LBL diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c index 67b4b0ab4..432a137fc 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c @@ -568,13 +568,7 @@ tgsi_add_point_sprite(const struct tgsi_token *tokens_in, } - /* allocate new tokens buffer */ - new_tokens = tgsi_alloc_tokens(new_len); - if (!new_tokens) - return NULL; - - /* transform the shader */ - tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base); + new_tokens = tgsi_transform_shader(tokens_in, new_len, &transform.base); if (aa_point_coord_index) *aa_point_coord_index = transform.point_coord_aa; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c index ecb3706ed..5fab72eb6 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -663,6 +663,10 @@ scan_declaration(struct tgsi_shader_info *info, info->shader_buffers_declared |= 1u << reg; break; + case TGSI_FILE_HW_ATOMIC: + info->hw_atomic_declared |= 1u << reg; + break; + case TGSI_FILE_INPUT: info->input_semantic_name[reg] = (ubyte) semName; info->input_semantic_index[reg] = (ubyte) semIndex; diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h index ace5b0887..076c138e4 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -164,6 +164,7 @@ struct tgsi_shader_info bool uses_bindless_image_store; bool uses_bindless_image_atomic; + unsigned hw_atomic_declared; /**< bitmask of declared atomic_counter */ /** * Bitmask indicating which register files are accessed with * indirect addressing. The bits are (1 << TGSI_FILE_x), etc. diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h index 727edeb05..fc7a5877e 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -29,6 +29,7 @@ #define TGSI_TRANSFORM_H +#include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_build.h" @@ -64,10 +65,11 @@ struct tgsi_transform_context /** * Called at end of input program to allow caller to append extra - * instructions. Return number of tokens emitted. + * instructions. */ void (*epilog)(struct tgsi_transform_context *ctx); + enum pipe_shader_type processor; /*** PRIVATE ***/ @@ -88,6 +90,7 @@ struct tgsi_transform_context uint max_tokens_out; struct tgsi_token *tokens_out; uint ti; + bool fail; }; @@ -570,10 +573,9 @@ tgsi_transform_tex_inst(struct tgsi_transform_context *ctx, } -extern int +extern struct tgsi_token * tgsi_transform_shader(const struct tgsi_token *tokens_in, - struct tgsi_token *tokens_out, - uint max_tokens_out, + uint initial_tokens_len, struct tgsi_transform_context *ctx); diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c index eedba34cb..207fcc45b 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -39,6 +39,7 @@ #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_prim.h" #include "util/u_bitmask.h" #include "GL/gl.h" #include "compiler/shader_info.h" @@ -205,6 +206,8 @@ struct ureg_program struct ureg_tokens domain[2]; bool use_memory[TGSI_MEMORY_TYPE_COUNT]; + + bool precise; }; static union tgsi_any_token error_tokens[32]; @@ -1267,7 +1270,7 @@ ureg_emit_insn(struct ureg_program *ureg, out[0].insn = tgsi_default_instruction(); out[0].insn.Opcode = opcode; out[0].insn.Saturate = saturate; - out[0].insn.Precise = precise; + out[0].insn.Precise = precise || ureg->precise; out[0].insn.NumDstRegs = num_dst; out[0].insn.NumSrcRegs = num_src; @@ -2190,7 +2193,7 @@ const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg, if (nr_tokens) *nr_tokens = ureg->domain[DOMAIN_DECL].count; - ureg->domain[DOMAIN_DECL].tokens = 0; + ureg->domain[DOMAIN_DECL].tokens = NULL; ureg->domain[DOMAIN_DECL].size = 0; ureg->domain[DOMAIN_DECL].order = 0; ureg->domain[DOMAIN_DECL].count = 0; @@ -2295,11 +2298,7 @@ static void ureg_setup_tess_eval_shader(struct ureg_program *ureg, const struct shader_info *info) { - if (info->tess.primitive_mode == GL_ISOLINES) - ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES); - else - ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, - info->tess.primitive_mode); + ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, u_tess_prim_from_shader(info->tess._primitive_mode)); STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL); STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 == @@ -2437,3 +2436,8 @@ void ureg_destroy( struct ureg_program *ureg ) FREE(ureg); } + +void ureg_set_precise( struct ureg_program *ureg, bool precise ) +{ + ureg->precise = precise; +} diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 343708b6c..59041e94d 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -137,6 +137,7 @@ void ureg_free_tokens( const struct tgsi_token *tokens ); void ureg_destroy( struct ureg_program * ); +void ureg_set_precise( struct ureg_program *ureg, bool precise ); /*********************************************************************** * Convenience routine: diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c index d6ef81d28..a101cce24 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -108,16 +108,20 @@ tgsi_util_set_src_register_swizzle(struct tgsi_src_register *reg, * used by this instruction. */ unsigned -tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, - unsigned src_idx) +tgsi_util_get_src_usage_mask(enum tgsi_opcode opcode, + unsigned src_idx, + uint8_t write_mask, + uint8_t swizzle_x, + uint8_t swizzle_y, + uint8_t swizzle_z, + uint8_t swizzle_w, + enum tgsi_texture_type tex_target, + enum tgsi_texture_type mem_target) { - const struct tgsi_full_src_register *src = &inst->Src[src_idx]; - unsigned write_mask = inst->Dst[0].Register.WriteMask; unsigned read_mask; unsigned usage_mask; - unsigned chan; - switch (inst->Instruction.Opcode) { + switch (opcode) { case TGSI_OPCODE_IF: case TGSI_OPCODE_UIF: case TGSI_OPCODE_EMIT: @@ -241,20 +245,20 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_OPCODE_LODQ: case TGSI_OPCODE_TG4: { unsigned dim_layer = - tgsi_util_get_texture_coord_dim(inst->Texture.Texture); + tgsi_util_get_texture_coord_dim(tex_target); unsigned dim_layer_shadow, dim; /* Add shadow. */ - if (tgsi_is_shadow_target(inst->Texture.Texture)) { + if (tgsi_is_shadow_target(tex_target)) { dim_layer_shadow = dim_layer + 1; - if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D) + if (tex_target == TGSI_TEXTURE_SHADOW1D) dim_layer_shadow = 3; } else { dim_layer_shadow = dim_layer; } /* Remove layer. */ - if (tgsi_is_array_sampler(inst->Texture.Texture)) + if (tgsi_is_array_sampler(tex_target)) dim = dim_layer - 1; else dim = dim_layer; @@ -263,33 +267,33 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, switch (src_idx) { case 0: - if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) + if (opcode == TGSI_OPCODE_LODQ) read_mask = u_bit_consecutive(0, dim); else read_mask = u_bit_consecutive(0, dim_layer_shadow) & 0xf; - if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D) + if (tex_target == TGSI_TEXTURE_SHADOW1D) read_mask &= ~TGSI_WRITEMASK_Y; - if (inst->Instruction.Opcode == TGSI_OPCODE_TXF || - inst->Instruction.Opcode == TGSI_OPCODE_TXB || - inst->Instruction.Opcode == TGSI_OPCODE_TXL || - inst->Instruction.Opcode == TGSI_OPCODE_TXP) + if (opcode == TGSI_OPCODE_TXF || + opcode == TGSI_OPCODE_TXB || + opcode == TGSI_OPCODE_TXL || + opcode == TGSI_OPCODE_TXP) read_mask |= TGSI_WRITEMASK_W; break; case 1: - if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) + if (opcode == TGSI_OPCODE_TXD) read_mask = u_bit_consecutive(0, dim); - else if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || - inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || - inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || - inst->Instruction.Opcode == TGSI_OPCODE_TG4) + else if (opcode == TGSI_OPCODE_TEX2 || + opcode == TGSI_OPCODE_TXB2 || + opcode == TGSI_OPCODE_TXL2 || + opcode == TGSI_OPCODE_TG4) read_mask = TGSI_WRITEMASK_X; break; case 2: - if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) + if (opcode == TGSI_OPCODE_TXD) read_mask = u_bit_consecutive(0, dim); break; } @@ -300,14 +304,14 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, if (src_idx == 0) { read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */ } else { - unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture); + unsigned dim = tgsi_util_get_texture_coord_dim(mem_target); read_mask = u_bit_consecutive(0, dim); } break; case TGSI_OPCODE_STORE: if (src_idx == 0) { - unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture); + unsigned dim = tgsi_util_get_texture_coord_dim(mem_target); read_mask = u_bit_consecutive(0, dim); } else { read_mask = TGSI_WRITEMASK_XYZW; @@ -328,7 +332,7 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, if (src_idx == 0) { read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */ } else if (src_idx == 1) { - unsigned dim = tgsi_util_get_texture_coord_dim(inst->Memory.Texture); + unsigned dim = tgsi_util_get_texture_coord_dim(mem_target); read_mask = u_bit_consecutive(0, dim); } else { read_mask = TGSI_WRITEMASK_XYZW; @@ -340,14 +344,14 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_OPCODE_INTERP_OFFSET: if (src_idx == 0) read_mask = write_mask; - else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) + else if (opcode == TGSI_OPCODE_INTERP_OFFSET) read_mask = TGSI_WRITEMASK_XY; /* offset */ else read_mask = TGSI_WRITEMASK_X; /* sample */ break; default: - if (tgsi_get_opcode_info(inst->Instruction.Opcode)->output_mode == + if (tgsi_get_opcode_info(opcode)->output_mode == TGSI_OUTPUT_COMPONENTWISE) read_mask = write_mask; else @@ -356,15 +360,32 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, } usage_mask = 0; - for (chan = 0; chan < 4; ++chan) { - if (read_mask & (1 << chan)) { - usage_mask |= 1 << tgsi_util_get_full_src_register_swizzle(src, chan); - } - } + if (read_mask & TGSI_WRITEMASK_X) + usage_mask |= 1 << swizzle_x; + if (read_mask & TGSI_WRITEMASK_Y) + usage_mask |= 1 << swizzle_y; + if (read_mask & TGSI_WRITEMASK_Z) + usage_mask |= 1 << swizzle_z; + if (read_mask & TGSI_WRITEMASK_W) + usage_mask |= 1 << swizzle_w; return usage_mask; } +unsigned +tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, + unsigned src_idx) +{ + return tgsi_util_get_src_usage_mask(inst->Instruction.Opcode, src_idx, + inst->Dst[0].Register.WriteMask, + inst->Src[src_idx].Register.SwizzleX, + inst->Src[src_idx].Register.SwizzleY, + inst->Src[src_idx].Register.SwizzleZ, + inst->Src[src_idx].Register.SwizzleW, + inst->Texture.Texture, + inst->Memory.Texture); +} + /** * Convert a tgsi_ind_register into a tgsi_src_register */ diff --git a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h index e1f913d74..6dbb23929 100644 --- a/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h +++ b/lib/mesa/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -57,10 +57,25 @@ tgsi_util_set_src_register_swizzle(struct tgsi_src_register *reg, unsigned swizzle, unsigned component); +/* returns the channels of the src_idx src register used by the full instruction. */ unsigned tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, unsigned src_idx); +/* Returns the channels of the src_idx src register used by an instruction with + * these parameters. + */ +unsigned +tgsi_util_get_src_usage_mask(enum tgsi_opcode opcode, + unsigned src_idx, + uint8_t write_mask, + uint8_t swizzle_x, + uint8_t swizzle_y, + uint8_t swizzle_z, + uint8_t swizzle_w, + enum tgsi_texture_type tex_target, + enum tgsi_texture_type mem_target); + struct tgsi_src_register tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_blitter.c b/lib/mesa/src/gallium/auxiliary/util/u_blitter.c index 4623eb7b0..b45534fc3 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_blitter.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_blitter.c @@ -95,9 +95,9 @@ struct blitter_context_priv /* FS which outputs one sample from a multisample texture. */ void *fs_texfetch_col_msaa[5][PIPE_MAX_TEXTURE_TYPES]; - void *fs_texfetch_depth_msaa[PIPE_MAX_TEXTURE_TYPES]; - void *fs_texfetch_depthstencil_msaa[PIPE_MAX_TEXTURE_TYPES]; - void *fs_texfetch_stencil_msaa[PIPE_MAX_TEXTURE_TYPES]; + void *fs_texfetch_depth_msaa[PIPE_MAX_TEXTURE_TYPES][2]; + void *fs_texfetch_depthstencil_msaa[PIPE_MAX_TEXTURE_TYPES][2]; + void *fs_texfetch_stencil_msaa[PIPE_MAX_TEXTURE_TYPES][2]; /* FS which outputs an average of all samples. */ void *fs_resolve[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2]; @@ -149,7 +149,9 @@ struct blitter_context_priv bool has_texture_multisample; bool has_tex_lz; bool has_txf; + bool has_sample_shading; bool cube_as_2darray; + bool has_texrect; bool cached_all_shaders; /* The Draw module overrides these functions. @@ -214,8 +216,11 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) PIPE_CAP_TGSI_TEX_TXF_LZ); ctx->has_txf = pipe->screen->get_param(pipe->screen, PIPE_CAP_GLSL_FEATURE_LEVEL) > 130; + ctx->has_sample_shading = pipe->screen->get_param(pipe->screen, + PIPE_CAP_SAMPLE_SHADING); ctx->cube_as_2darray = pipe->screen->get_param(pipe->screen, PIPE_CAP_SAMPLER_VIEW_TARGET); + ctx->has_texrect = pipe->screen->get_param(pipe->screen, PIPE_CAP_TEXRECT); /* blend state objects */ memset(&blend, 0, sizeof(blend)); @@ -270,15 +275,19 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) sampler_state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; sampler_state.normalized_coords = 1; ctx->sampler_state = pipe->create_sampler_state(pipe, &sampler_state); - sampler_state.normalized_coords = 0; - ctx->sampler_state_rect = pipe->create_sampler_state(pipe, &sampler_state); + if (ctx->has_texrect) { + sampler_state.normalized_coords = 0; + ctx->sampler_state_rect = pipe->create_sampler_state(pipe, &sampler_state); + } sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR; sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR; sampler_state.normalized_coords = 1; ctx->sampler_state_linear = pipe->create_sampler_state(pipe, &sampler_state); - sampler_state.normalized_coords = 0; - ctx->sampler_state_rect_linear = pipe->create_sampler_state(pipe, &sampler_state); + if (ctx->has_texrect) { + sampler_state.normalized_coords = 0; + ctx->sampler_state_rect_linear = pipe->create_sampler_state(pipe, &sampler_state); + } /* rasterizer state */ memset(&rs_state, 0, sizeof(rs_state)); @@ -334,8 +343,8 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) } ctx->has_layered = - pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_INSTANCEID) && - pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT); + pipe->screen->get_param(pipe->screen, PIPE_CAP_VS_INSTANCEID) && + pipe->screen->get_param(pipe->screen, PIPE_CAP_VS_LAYER_VIEWPORT); /* set invariant vertex coordinates */ for (i = 0; i < 4; i++) { @@ -547,12 +556,14 @@ void util_blitter_destroy(struct blitter_context *blitter) ctx->delete_fs_state(pipe, ctx->fs_texfetch_stencil[i][inst]); } - if (ctx->fs_texfetch_depth_msaa[i]) - ctx->delete_fs_state(pipe, ctx->fs_texfetch_depth_msaa[i]); - if (ctx->fs_texfetch_depthstencil_msaa[i]) - ctx->delete_fs_state(pipe, ctx->fs_texfetch_depthstencil_msaa[i]); - if (ctx->fs_texfetch_stencil_msaa[i]) - ctx->delete_fs_state(pipe, ctx->fs_texfetch_stencil_msaa[i]); + for (unsigned ss = 0; ss < 2; ss++) { + if (ctx->fs_texfetch_depth_msaa[i][ss]) + ctx->delete_fs_state(pipe, ctx->fs_texfetch_depth_msaa[i][ss]); + if (ctx->fs_texfetch_depthstencil_msaa[i][ss]) + ctx->delete_fs_state(pipe, ctx->fs_texfetch_depthstencil_msaa[i][ss]); + if (ctx->fs_texfetch_stencil_msaa[i][ss]) + ctx->delete_fs_state(pipe, ctx->fs_texfetch_stencil_msaa[i][ss]); + } for (j = 0; j< ARRAY_SIZE(ctx->fs_resolve[i]); j++) for (f = 0; f < 2; f++) @@ -578,8 +589,10 @@ void util_blitter_destroy(struct blitter_context *blitter) if (ctx->fs_stencil_blit_fallback[i]) ctx->delete_fs_state(pipe, ctx->fs_stencil_blit_fallback[i]); - pipe->delete_sampler_state(pipe, ctx->sampler_state_rect_linear); - pipe->delete_sampler_state(pipe, ctx->sampler_state_rect); + if (ctx->sampler_state_rect_linear) + pipe->delete_sampler_state(pipe, ctx->sampler_state_rect_linear); + if (ctx->sampler_state_rect) + pipe->delete_sampler_state(pipe, ctx->sampler_state_rect); pipe->delete_sampler_state(pipe, ctx->sampler_state_linear); pipe->delete_sampler_state(pipe, ctx->sampler_state); FREE(ctx); @@ -711,6 +724,10 @@ void util_blitter_restore_fragment_states(struct blitter_context *blitter) ctx->base.is_sample_mask_saved = false; } + if (ctx->base.saved_min_samples != ~0 && pipe->set_min_samples) + pipe->set_min_samples(pipe, ctx->base.saved_min_samples); + ctx->base.saved_min_samples = ~0; + /* Miscellaneous states. */ /* XXX check whether these are saved and whether they need to be restored * (depending on the operation) */ @@ -769,23 +786,35 @@ static void blitter_check_saved_textures(ASSERTED struct blitter_context_priv *c assert(ctx->base.saved_num_sampler_views != ~0u); } -void util_blitter_restore_textures(struct blitter_context *blitter) +static void util_blitter_restore_textures_internal(struct blitter_context *blitter, unsigned count) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->base.pipe; unsigned i; /* Fragment sampler states. */ - pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, - ctx->base.saved_num_sampler_states, - ctx->base.saved_sampler_states); + void *states[2] = {NULL}; + assert(count <= ARRAY_SIZE(states)); + if (ctx->base.saved_num_sampler_states) + pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, + ctx->base.saved_num_sampler_states, + ctx->base.saved_sampler_states); + else if (count) + pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, + count, + states); ctx->base.saved_num_sampler_states = ~0; /* Fragment sampler views. */ - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, - ctx->base.saved_num_sampler_views, 0, true, - ctx->base.saved_sampler_views); + if (ctx->base.saved_num_sampler_views) + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, + ctx->base.saved_num_sampler_views, 0, true, + ctx->base.saved_sampler_views); + else if (count) + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, + 0, count, true, + NULL); /* Just clear them to NULL because set_sampler_views(take_ownership = true). */ for (i = 0; i < ctx->base.saved_num_sampler_views; i++) @@ -794,6 +823,11 @@ void util_blitter_restore_textures(struct blitter_context *blitter) ctx->base.saved_num_sampler_views = ~0; } +void util_blitter_restore_textures(struct blitter_context *blitter) +{ + util_blitter_restore_textures_internal(blitter, 0); +} + void util_blitter_restore_constant_buffer_state(struct blitter_context *blitter) { struct pipe_context *pipe = blitter->pipe; @@ -820,14 +854,17 @@ static void blitter_set_rectangle(struct blitter_context_priv *ctx, ctx->vertices[3][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v3.x*/ ctx->vertices[3][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v3.y*/ + for (unsigned i = 0; i < 4; ++i) + ctx->vertices[i][0][2] = depth; + /* viewport */ struct pipe_viewport_state viewport; viewport.scale[0] = 0.5f * ctx->dst_width; viewport.scale[1] = 0.5f * ctx->dst_height; - viewport.scale[2] = 0.0f; + viewport.scale[2] = 1.0f; viewport.translate[0] = 0.5f * ctx->dst_width; viewport.translate[1] = 0.5f * ctx->dst_height; - viewport.translate[2] = depth; + viewport.translate[2] = 0.0f; viewport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X; viewport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y; viewport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z; @@ -1016,7 +1053,8 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx, /* Create the fragment shader on-demand. */ if (!*shader) { assert(!ctx->cached_all_shaders); - *shader = util_make_fs_blit_msaa_color(pipe, tgsi_tex, stype, dtype); + *shader = util_make_fs_blit_msaa_color(pipe, tgsi_tex, stype, dtype, + ctx->has_sample_shading); } } @@ -1083,22 +1121,24 @@ void *blitter_get_fs_pack_color_zs(struct blitter_context_priv *ctx, static inline void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx, enum pipe_texture_target target, - unsigned nr_samples, + unsigned src_samples, unsigned dst_samples, bool use_txf) { struct pipe_context *pipe = ctx->base.pipe; assert(target < PIPE_MAX_TEXTURE_TYPES); - if (nr_samples > 1) { - void **shader = &ctx->fs_texfetch_depth_msaa[target]; + if (src_samples > 1) { + bool sample_shading = ctx->has_sample_shading && src_samples > 1 && + src_samples == dst_samples; + void **shader = &ctx->fs_texfetch_depth_msaa[target][sample_shading]; /* Create the fragment shader on-demand. */ if (!*shader) { enum tgsi_texture_type tgsi_tex; assert(!ctx->cached_all_shaders); - tgsi_tex = util_pipe_tex_to_tgsi_tex(target, nr_samples); - *shader = util_make_fs_blit_msaa_depth(pipe, tgsi_tex); + tgsi_tex = util_pipe_tex_to_tgsi_tex(target, src_samples); + *shader = util_make_fs_blit_msaa_depth(pipe, tgsi_tex, sample_shading); } return *shader; @@ -1126,22 +1166,25 @@ void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx, static inline void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx, enum pipe_texture_target target, - unsigned nr_samples, - bool use_txf) + unsigned src_samples, + unsigned dst_samples, bool use_txf) { struct pipe_context *pipe = ctx->base.pipe; assert(target < PIPE_MAX_TEXTURE_TYPES); - if (nr_samples > 1) { - void **shader = &ctx->fs_texfetch_depthstencil_msaa[target]; + if (src_samples > 1) { + bool sample_shading = ctx->has_sample_shading && src_samples > 1 && + src_samples == dst_samples; + void **shader = &ctx->fs_texfetch_depthstencil_msaa[target][sample_shading]; /* Create the fragment shader on-demand. */ if (!*shader) { enum tgsi_texture_type tgsi_tex; assert(!ctx->cached_all_shaders); - tgsi_tex = util_pipe_tex_to_tgsi_tex(target, nr_samples); - *shader = util_make_fs_blit_msaa_depthstencil(pipe, tgsi_tex); + tgsi_tex = util_pipe_tex_to_tgsi_tex(target, src_samples); + *shader = util_make_fs_blit_msaa_depthstencil(pipe, tgsi_tex, + sample_shading); } return *shader; @@ -1169,22 +1212,25 @@ void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx, static inline void *blitter_get_fs_texfetch_stencil(struct blitter_context_priv *ctx, enum pipe_texture_target target, - unsigned nr_samples, + unsigned src_samples, unsigned dst_samples, bool use_txf) { struct pipe_context *pipe = ctx->base.pipe; assert(target < PIPE_MAX_TEXTURE_TYPES); - if (nr_samples > 1) { - void **shader = &ctx->fs_texfetch_stencil_msaa[target]; + if (src_samples > 1) { + bool sample_shading = ctx->has_sample_shading && src_samples > 1 && + src_samples == dst_samples; + void **shader = &ctx->fs_texfetch_stencil_msaa[target][sample_shading]; /* Create the fragment shader on-demand. */ if (!*shader) { enum tgsi_texture_type tgsi_tex; assert(!ctx->cached_all_shaders); - tgsi_tex = util_pipe_tex_to_tgsi_tex(target, nr_samples); - *shader = util_make_fs_blit_msaa_stencil(pipe, tgsi_tex); + tgsi_tex = util_pipe_tex_to_tgsi_tex(target, src_samples); + *shader = util_make_fs_blit_msaa_stencil(pipe, tgsi_tex, + sample_shading); } return *shader; @@ -1242,6 +1288,9 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter) if (!has_cubearraytex && (target == PIPE_TEXTURE_CUBE_ARRAY)) continue; + if (!ctx->has_texrect && + (target == PIPE_TEXTURE_RECT)) + continue; if (samples > 1 && (target != PIPE_TEXTURE_2D && @@ -1269,10 +1318,18 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter) blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_SINT, PIPE_FORMAT_R32_UINT, target, samples, samples, 0, use_txf); - blitter_get_fs_texfetch_depth(ctx, target, samples, use_txf); + blitter_get_fs_texfetch_depth(ctx, target, samples, samples, use_txf); if (ctx->has_stencil_export) { - blitter_get_fs_texfetch_depthstencil(ctx, target, samples, use_txf); - blitter_get_fs_texfetch_stencil(ctx, target, samples, use_txf); + blitter_get_fs_texfetch_depthstencil(ctx, target, samples, samples, use_txf); + blitter_get_fs_texfetch_stencil(ctx, target, samples, samples, use_txf); + } + + if (samples == 2) { + blitter_get_fs_texfetch_depth(ctx, target, samples, 1, use_txf); + if (ctx->has_stencil_export) { + blitter_get_fs_texfetch_depthstencil(ctx, target, samples, 1, use_txf); + blitter_get_fs_texfetch_stencil(ctx, target, samples, 1, use_txf); + } } if (samples == 1) @@ -1398,8 +1455,16 @@ void util_blitter_draw_rectangle(struct blitter_context *blitter, ctx->vertices[i][1][2] = attrib->texcoord.z; ctx->vertices[i][1][3] = attrib->texcoord.w; } - FALLTHROUGH; + set_texcoords_in_vertices(attrib, &ctx->vertices[0][1][0], 8); + break; case UTIL_BLITTER_ATTRIB_TEXCOORD_XY: + /* We clean-up the ZW components, just in case we used before XYZW, + * to avoid feeding in the shader with wrong values (like on the lod) + */ + for (i = 0; i < 4; i++) { + ctx->vertices[i][1][2] = 0; + ctx->vertices[i][1][3] = 0; + } set_texcoords_in_vertices(attrib, &ctx->vertices[0][1][0], 8); break; @@ -1479,6 +1544,8 @@ void util_blitter_common_clear_setup(struct blitter_context *blitter, } pipe->set_sample_mask(pipe, ~0); + if (pipe->set_min_samples) + pipe->set_min_samples(pipe, 1); blitter_set_dst_dimensions(ctx, width, height); } @@ -1792,6 +1859,8 @@ static void do_blits(struct blitter_context_priv *ctx, struct pipe_context *pipe = ctx->base.pipe; unsigned src_samples = src->texture->nr_samples; unsigned dst_samples = dst->texture->nr_samples; + bool sample_shading = ctx->has_sample_shading && src_samples > 1 && + src_samples == dst_samples && !sample0_only; enum pipe_texture_target src_target = src->target; struct pipe_framebuffer_state fb_state = {0}; @@ -1805,7 +1874,7 @@ static void do_blits(struct blitter_context_priv *ctx, if ((src_target == PIPE_TEXTURE_1D || src_target == PIPE_TEXTURE_2D || src_target == PIPE_TEXTURE_RECT) && - src_samples <= 1) { + (src_samples <= 1 || sample_shading)) { /* Set framebuffer state. */ if (is_zsbuf) { fb_state.zsbuf = dst; @@ -1816,6 +1885,8 @@ static void do_blits(struct blitter_context_priv *ctx, /* Draw. */ pipe->set_sample_mask(pipe, ~0); + if (pipe->set_min_samples) + pipe->set_min_samples(pipe, sample_shading ? dst_samples : 1); blitter_draw_tex(ctx, dstbox->x, dstbox->y, dstbox->x + dstbox->width, dstbox->y + dstbox->height, @@ -1871,8 +1942,10 @@ static void do_blits(struct blitter_context_priv *ctx, /* MSAA copy. */ unsigned i, max_sample = sample0_only ? 0 : dst_samples - 1; - for (i = 0; i <= max_sample; i++) { - pipe->set_sample_mask(pipe, 1 << i); + if (sample_shading) { + pipe->set_sample_mask(pipe, ~0); + if (pipe->set_min_samples) + pipe->set_min_samples(pipe, max_sample); blitter_draw_tex(ctx, dstbox->x, dstbox->y, dstbox->x + dstbox->width, dstbox->y + dstbox->height, @@ -1880,12 +1953,30 @@ static void do_blits(struct blitter_context_priv *ctx, srcbox->x, srcbox->y, srcbox->x + srcbox->width, srcbox->y + srcbox->height, - srcbox->z + src_z, i, uses_txf, + srcbox->z + src_z, 0, uses_txf, UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW); + } else { + if (pipe->set_min_samples) + pipe->set_min_samples(pipe, 1); + + for (i = 0; i <= max_sample; i++) { + pipe->set_sample_mask(pipe, 1 << i); + blitter_draw_tex(ctx, dstbox->x, dstbox->y, + dstbox->x + dstbox->width, + dstbox->y + dstbox->height, + src, src_width0, src_height0, + srcbox->x, srcbox->y, + srcbox->x + srcbox->width, + srcbox->y + srcbox->height, + srcbox->z + src_z, i, uses_txf, + UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW); + } } } else { /* Normal copy, MSAA upsampling, or MSAA resolve. */ pipe->set_sample_mask(pipe, ~0); + if (pipe->set_min_samples) + pipe->set_min_samples(pipe, 1); blitter_draw_tex(ctx, dstbox->x, dstbox->y, dstbox->x + dstbox->width, dstbox->y + dstbox->height, @@ -2011,8 +2102,8 @@ void util_blitter_blit_generic(struct blitter_context *blitter, src_samples, dst->format, false)); } else { ctx->bind_fs_state(pipe, - blitter_get_fs_texfetch_depthstencil(ctx, src_target, - src_samples, use_txf)); + blitter_get_fs_texfetch_depthstencil(ctx, src_target, src_samples, + dst_samples, use_txf)); } } else if (dst_has_depth) { pipe->bind_blend_state(pipe, ctx->blend[0][0]); @@ -2027,8 +2118,8 @@ void util_blitter_blit_generic(struct blitter_context *blitter, src_samples, dst->format, false)); } else { ctx->bind_fs_state(pipe, - blitter_get_fs_texfetch_depth(ctx, src_target, - src_samples, use_txf)); + blitter_get_fs_texfetch_depth(ctx, src_target, src_samples, + dst_samples, use_txf)); } } else if (dst_has_stencil) { pipe->bind_blend_state(pipe, ctx->blend[0][0]); @@ -2037,8 +2128,8 @@ void util_blitter_blit_generic(struct blitter_context *blitter, assert(src_has_stencil); /* unpacking from color is unsupported */ ctx->bind_fs_state(pipe, - blitter_get_fs_texfetch_stencil(ctx, src_target, - src_samples, use_txf)); + blitter_get_fs_texfetch_stencil(ctx, src_target, src_samples, + dst_samples, use_txf)); } else { unsigned colormask = mask & PIPE_MASK_RGBA; @@ -2062,13 +2153,13 @@ void util_blitter_blit_generic(struct blitter_context *blitter, /* Set the linear filter only for scaled color non-MSAA blits. */ if (filter == PIPE_TEX_FILTER_LINEAR) { - if (src_target == PIPE_TEXTURE_RECT) { + if (src_target == PIPE_TEXTURE_RECT && ctx->has_texrect) { sampler_state = ctx->sampler_state_rect_linear; } else { sampler_state = ctx->sampler_state_linear; } } else { - if (src_target == PIPE_TEXTURE_RECT) { + if (src_target == PIPE_TEXTURE_RECT && ctx->has_texrect) { sampler_state = ctx->sampler_state_rect; } else { sampler_state = ctx->sampler_state; @@ -2076,6 +2167,7 @@ void util_blitter_blit_generic(struct blitter_context *blitter, } /* Set samplers. */ + unsigned count = 0; if (src_has_depth && src_has_stencil && (dst_has_color || (dst_has_depth && dst_has_stencil))) { /* Setup two samplers, one for depth and the other one for stencil. */ @@ -2090,6 +2182,7 @@ void util_blitter_blit_generic(struct blitter_context *blitter, views[0] = src; views[1] = pipe->create_sampler_view(pipe, src->texture, &templ); + count = 2; pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 2, 0, false, views); pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, 2, samplers); @@ -2105,12 +2198,14 @@ void util_blitter_blit_generic(struct blitter_context *blitter, view = pipe->create_sampler_view(pipe, src->texture, &templ); + count = 1; pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, false, &view); pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, 1, &sampler_state); pipe_sampler_view_reference(&view, NULL); } else { + count = 1; pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, false, &src); pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, 1, &sampler_state); @@ -2127,7 +2222,7 @@ void util_blitter_blit_generic(struct blitter_context *blitter, util_blitter_restore_vertex_states(blitter); util_blitter_restore_fragment_states(blitter); - util_blitter_restore_textures(blitter); + util_blitter_restore_textures_internal(blitter, count); util_blitter_restore_fb_state(blitter); if (scissor) { pipe->set_scissor_states(pipe, 0, 1, &ctx->base.saved_scissor); @@ -2210,7 +2305,7 @@ void util_blitter_generate_mipmap(struct blitter_context *blitter, pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_keep_stencil); ctx->bind_fs_state(pipe, - blitter_get_fs_texfetch_depth(ctx, target, 1, false)); + blitter_get_fs_texfetch_depth(ctx, target, 1, 1, false)); } else { pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA][0]); pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); @@ -2269,7 +2364,7 @@ void util_blitter_generate_mipmap(struct blitter_context *blitter, util_blitter_restore_vertex_states(blitter); util_blitter_restore_fragment_states(blitter); - util_blitter_restore_textures(blitter); + util_blitter_restore_textures_internal(blitter, 1); util_blitter_restore_fb_state(blitter); util_blitter_restore_render_cond(blitter); util_blitter_unset_running_flag(blitter); @@ -2309,9 +2404,11 @@ void util_blitter_clear_render_target(struct blitter_context *blitter, fb_state.height = dstsurf->height; fb_state.nr_cbufs = 1; fb_state.cbufs[0] = dstsurf; - fb_state.zsbuf = 0; + fb_state.zsbuf = NULL; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, ~0); + if (pipe->set_min_samples) + pipe->set_min_samples(pipe, 1); msaa = util_framebuffer_get_num_samples(&fb_state) > 1; blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); @@ -2391,10 +2488,12 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, fb_state.width = dstsurf->width; fb_state.height = dstsurf->height; fb_state.nr_cbufs = 0; - fb_state.cbufs[0] = 0; + fb_state.cbufs[0] = NULL; fb_state.zsbuf = dstsurf; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, ~0); + if (pipe->set_min_samples) + pipe->set_min_samples(pipe, 1); blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height); @@ -2464,6 +2563,8 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter, fb_state.zsbuf = zsurf; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, sample_mask); + if (pipe->set_min_samples) + pipe->set_min_samples(pipe, 1); blitter_set_common_draw_rect_state(ctx, false, util_framebuffer_get_num_samples(&fb_state) > 1); @@ -2642,6 +2743,8 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter, pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil); bind_fs_write_one_cbuf(ctx); pipe->set_sample_mask(pipe, sample_mask); + if (pipe->set_min_samples) + pipe->set_min_samples(pipe, 1); memset(&surf_tmpl, 0, sizeof(surf_tmpl)); surf_tmpl.format = format; @@ -2712,9 +2815,11 @@ void util_blitter_custom_color(struct blitter_context *blitter, fb_state.height = dstsurf->height; fb_state.nr_cbufs = 1; fb_state.cbufs[0] = dstsurf; - fb_state.zsbuf = 0; + fb_state.zsbuf = NULL; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, ~0); + if (pipe->set_min_samples) + pipe->set_min_samples(pipe, 1); blitter_set_common_draw_rect_state(ctx, false, util_framebuffer_get_num_samples(&fb_state) > 1); @@ -2776,6 +2881,8 @@ void util_blitter_custom_shader(struct blitter_context *blitter, fb_state.cbufs[0] = dstsurf; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, ~0); + if (pipe->set_min_samples) + pipe->set_min_samples(pipe, 1); blitter_set_common_draw_rect_state(ctx, false, util_framebuffer_get_num_samples(&fb_state) > 1); @@ -2865,11 +2972,13 @@ util_blitter_stencil_fallback(struct blitter_context *blitter, /* set a framebuffer state */ struct pipe_framebuffer_state fb_state = { 0 }; - fb_state.width = dstbox->width; - fb_state.height = dstbox->height; + fb_state.width = dstbox->x + dstbox->width; + fb_state.height = dstbox->y + dstbox->height; fb_state.zsbuf = dst_view; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, ~0); + if (pipe->set_min_samples) + pipe->set_min_samples(pipe, 1); blitter_set_common_draw_rect_state(ctx, scissor != NULL, util_framebuffer_get_num_samples(&fb_state) > 1); @@ -2934,7 +3043,7 @@ util_blitter_stencil_fallback(struct blitter_context *blitter, util_blitter_restore_vertex_states(blitter); util_blitter_restore_fragment_states(blitter); - util_blitter_restore_textures(blitter); + util_blitter_restore_textures_internal(blitter, 1); util_blitter_restore_fb_state(blitter); util_blitter_restore_render_cond(blitter); util_blitter_restore_constant_buffer_state(blitter); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_blitter.h b/lib/mesa/src/gallium/auxiliary/util/u_blitter.h index 48b934443..70157e5df 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_blitter.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_blitter.h @@ -118,6 +118,7 @@ struct blitter_context bool skip_viewport_restore; bool is_sample_mask_saved; unsigned saved_sample_mask; + unsigned saved_min_samples; unsigned saved_num_sampler_states; void *saved_sampler_states[PIPE_MAX_SAMPLERS]; @@ -565,10 +566,11 @@ util_blitter_save_so_targets(struct blitter_context *blitter, static inline void util_blitter_save_sample_mask(struct blitter_context *blitter, - unsigned sample_mask) + unsigned sample_mask, unsigned min_samples) { blitter->is_sample_mask_saved = true; blitter->saved_sample_mask = sample_mask; + blitter->saved_min_samples = min_samples; } static inline void diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c b/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c index 4f3c98aec..591698b50 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c @@ -51,7 +51,7 @@ #include <stdio.h> /* Future improvement: Use realloc instead? */ -#define DEBUG_FLUSH_MAP_DEPTH 32 +#define DEBUG_FLUSH_MAP_DEPTH 64 struct debug_map_item { struct debug_stack_frame *frame; diff --git a/lib/mesa/src/gallium/auxiliary/util/u_draw.c b/lib/mesa/src/gallium/auxiliary/util/u_draw.c index d65ac6a64..ed1e294a5 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_draw.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_draw.c @@ -150,20 +150,22 @@ util_draw_indirect_read(struct pipe_context *pipe, debug_printf("%s: failed to map indirect draw count buffer\n", __FUNCTION__); return NULL; } - if (dc_param[0] < draw_count) - draw_count = dc_param[0]; + draw_count = dc_param[0]; pipe_buffer_unmap(pipe, dc_transfer); } + if (!draw_count) { + *num_draws = draw_count; + return NULL; + } draws = malloc(sizeof(struct u_indirect_params) * draw_count); if (!draws) return NULL; - if (indirect->stride) - num_params = MIN2(indirect->stride / 4, num_params); + unsigned map_size = (draw_count - 1) * indirect->stride + (num_params * sizeof(uint32_t)); params = pipe_buffer_map_range(pipe, indirect->buffer, indirect->offset, - (num_params * indirect->draw_count) * sizeof(uint32_t), + map_size, PIPE_MAP_READ, &transfer); if (!transfer) { diff --git a/lib/mesa/src/gallium/auxiliary/util/u_draw_quad.c b/lib/mesa/src/gallium/auxiliary/util/u_draw_quad.c index cf431f2b0..48e85c4a6 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_draw_quad.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_draw_quad.c @@ -61,7 +61,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /* note: vertex elements already set by caller */ if (cso) { - cso_set_vertex_buffers(cso, vbuf_slot, 1, &vbuffer); + cso_set_vertex_buffers(cso, vbuf_slot, 1, 0, false, &vbuffer); cso_draw_arrays(cso, prim_type, 0, num_verts); } else { pipe->set_vertex_buffers(pipe, vbuf_slot, 1, 0, false, &vbuffer); @@ -88,6 +88,6 @@ util_draw_user_vertex_buffer(struct cso_context *cso, void *buffer, /* note: vertex elements already set by caller */ - cso_set_vertex_buffers(cso, 0, 1, &vbuffer); + cso_set_vertex_buffers(cso, 0, 1, 0, false, &vbuffer); cso_draw_arrays(cso, prim_type, 0, num_verts); } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_dump.h b/lib/mesa/src/gallium/auxiliary/util/u_dump.h index 0e7f4c271..dab2d19c1 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_dump.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_dump.h @@ -103,6 +103,9 @@ void util_dump_query_value_type(FILE *stream, unsigned value); void +util_dump_query_flags(FILE *stream, unsigned value); + +void util_dump_transfer_usage(FILE *stream, unsigned value); /* @@ -133,6 +136,10 @@ util_dump_clip_state(FILE *stream, const struct pipe_clip_state *state); void +util_dump_stream_output_info(FILE *stream, + const struct pipe_stream_output_info *state); + +void util_dump_shader_state(FILE *stream, const struct pipe_shader_state *state); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_dump_state.c b/lib/mesa/src/gallium/auxiliary/util/u_dump_state.c index f0bca1680..c950de7f3 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_dump_state.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_dump_state.c @@ -462,12 +462,37 @@ util_dump_clip_state(FILE *stream, const struct pipe_clip_state *state) util_dump_struct_end(stream); } +void +util_dump_stream_output_info(FILE *stream, + const struct pipe_stream_output_info *state) +{ + if (!state) { + util_dump_null(stream); + return; + } + + util_dump_struct_begin(stream, "pipe_stream_output_info"); + util_dump_member(stream, uint, state, num_outputs); + util_dump_array(stream, uint, state->stride, + ARRAY_SIZE(state->stride)); + util_dump_array_begin(stream); + for (unsigned i = 0; i < state->num_outputs; ++i) { + util_dump_elem_begin(stream); + util_dump_struct_begin(stream, ""); /* anonymous */ + util_dump_member(stream, uint, &state->output[i], register_index); + util_dump_member(stream, uint, &state->output[i], start_component); + util_dump_member(stream, uint, &state->output[i], num_components); + util_dump_member(stream, uint, &state->output[i], output_buffer); + util_dump_struct_end(stream); + util_dump_elem_end(stream); + } + util_dump_array_end(stream); + util_dump_struct_end(stream); +} void util_dump_shader_state(FILE *stream, const struct pipe_shader_state *state) { - unsigned i; - if (!state) { util_dump_null(stream); return; @@ -485,23 +510,7 @@ util_dump_shader_state(FILE *stream, const struct pipe_shader_state *state) if (state->stream_output.num_outputs) { util_dump_member_begin(stream, "stream_output"); - util_dump_struct_begin(stream, "pipe_stream_output_info"); - util_dump_member(stream, uint, &state->stream_output, num_outputs); - util_dump_array(stream, uint, state->stream_output.stride, - ARRAY_SIZE(state->stream_output.stride)); - util_dump_array_begin(stream); - for(i = 0; i < state->stream_output.num_outputs; ++i) { - util_dump_elem_begin(stream); - util_dump_struct_begin(stream, ""); /* anonymous */ - util_dump_member(stream, uint, &state->stream_output.output[i], register_index); - util_dump_member(stream, uint, &state->stream_output.output[i], start_component); - util_dump_member(stream, uint, &state->stream_output.output[i], num_components); - util_dump_member(stream, uint, &state->stream_output.output[i], output_buffer); - util_dump_struct_end(stream); - util_dump_elem_end(stream); - } - util_dump_array_end(stream); - util_dump_struct_end(stream); + util_dump_stream_output_info(stream, &state->stream_output); util_dump_member_end(stream); } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_helpers.c b/lib/mesa/src/gallium/auxiliary/util/u_helpers.c index dd415b990..e0e91aab4 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_helpers.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_helpers.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include "util/format/format_utils.h" #include "util/u_cpu_detect.h" #include "util/u_helpers.h" #include "util/u_inlines.h" @@ -518,3 +519,29 @@ util_init_pipe_vertex_state(struct pipe_screen *screen, state->input.elements[i] = elements[i]; state->input.full_velem_mask = full_velem_mask; } + +/** + * Clamp color value to format range. + */ +union pipe_color_union +util_clamp_color(enum pipe_format format, + const union pipe_color_union *color) +{ + union pipe_color_union clamp_color = *color; + int i; + + for (i = 0; i < util_format_get_nr_components(format); i++) { + uint8_t bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, i); + + if (util_format_is_unorm(format)) + clamp_color.ui[i] = _mesa_unorm_to_unorm(clamp_color.ui[i], bits, bits); + else if (util_format_is_snorm(format)) + clamp_color.i[i] = _mesa_snorm_to_snorm(clamp_color.i[i], bits, bits); + else if (util_format_is_pure_uint(format)) + clamp_color.ui[i] = _mesa_unsigned_to_unsigned(clamp_color.ui[i], bits); + else if (util_format_is_pure_sint(format)) + clamp_color.i[i] = _mesa_signed_to_signed(clamp_color.i[i], bits); + } + + return clamp_color; +} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_helpers.h b/lib/mesa/src/gallium/auxiliary/util/u_helpers.h index 9246d306e..299c67980 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_helpers.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_helpers.h @@ -130,6 +130,9 @@ util_init_pipe_vertex_state(struct pipe_screen *screen, uint32_t full_velem_mask, struct pipe_vertex_state *state); +union pipe_color_union util_clamp_color(enum pipe_format format, + const union pipe_color_union *color); + #ifdef __cplusplus } #endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_pstipple.c b/lib/mesa/src/gallium/auxiliary/util/u_pstipple.c index 3ace5e703..0e1da9fb9 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_pstipple.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_pstipple.c @@ -381,11 +381,6 @@ util_pstipple_create_fragment_shader(const struct tgsi_token *tokens, const uint newLen = tgsi_num_tokens(tokens) + NUM_NEW_TOKENS; struct tgsi_token *new_tokens; - new_tokens = tgsi_alloc_tokens(newLen); - if (!new_tokens) { - return NULL; - } - /* Setup shader transformation info/context. */ memset(&transform, 0, sizeof(transform)); @@ -404,7 +399,9 @@ util_pstipple_create_fragment_shader(const struct tgsi_token *tokens, transform.coordOrigin = transform.info.properties[TGSI_PROPERTY_FS_COORD_ORIGIN]; - tgsi_transform_shader(tokens, new_tokens, newLen, &transform.base); + new_tokens = tgsi_transform_shader(tokens, newLen, &transform.base); + if (!new_tokens) + return NULL; #if 0 /* DEBUG */ tgsi_dump(fs->tokens, 0); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.c b/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.c index aaaa04348..07c667b21 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.c @@ -527,6 +527,7 @@ util_make_fragment_cloneinput_shader(struct pipe_context *pipe, int num_cbufs, static void * util_make_fs_blit_msaa_gen(struct pipe_context *pipe, enum tgsi_texture_type tgsi_tex, + bool sample_shading, const char *samp_type, const char *output_semantic, const char *output_mask, @@ -541,15 +542,17 @@ util_make_fs_blit_msaa_gen(struct pipe_context *pipe, "DCL OUT[0], %s\n" "DCL TEMP[0]\n" "%s" + "%s" "F2U TEMP[0], IN[0]\n" + "%s" "TXF TEMP[0], TEMP[0], SAMP[0], %s\n" "%s" "MOV OUT[0]%s, TEMP[0]\n" "END\n"; const char *type = tgsi_texture_names[tgsi_tex]; - char text[sizeof(shader_templ)+100]; + char text[sizeof(shader_templ)+400]; struct tgsi_token tokens[1000]; struct pipe_shader_state state = {0}; @@ -557,7 +560,9 @@ util_make_fs_blit_msaa_gen(struct pipe_context *pipe, tgsi_tex == TGSI_TEXTURE_2D_ARRAY_MSAA); snprintf(text, sizeof(text), shader_templ, type, samp_type, - output_semantic, conversion_decl, type, conversion, output_mask); + output_semantic, sample_shading ? "DCL SV[0], SAMPLEID\n" : "", + conversion_decl, sample_shading ? "MOV TEMP[0].w, SV[0].xxxx\n" : "", + type, conversion, output_mask); if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { puts(text); @@ -582,7 +587,8 @@ void * util_make_fs_blit_msaa_color(struct pipe_context *pipe, enum tgsi_texture_type tgsi_tex, enum tgsi_return_type stype, - enum tgsi_return_type dtype) + enum tgsi_return_type dtype, + bool sample_shading) { const char *samp_type; const char *conversion_decl = ""; @@ -607,7 +613,7 @@ util_make_fs_blit_msaa_color(struct pipe_context *pipe, samp_type = "FLOAT"; } - return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, samp_type, + return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, sample_shading, samp_type, "COLOR[0]", "", conversion_decl, conversion); } @@ -620,10 +626,12 @@ util_make_fs_blit_msaa_color(struct pipe_context *pipe, */ void * util_make_fs_blit_msaa_depth(struct pipe_context *pipe, - enum tgsi_texture_type tgsi_tex) + enum tgsi_texture_type tgsi_tex, + bool sample_shading) { - return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, "FLOAT", - "POSITION", ".z", "", ""); + return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, sample_shading, "FLOAT", + "POSITION", ".z", "", + "MOV TEMP[0].z, TEMP[0].xxxx\n"); } @@ -634,10 +642,12 @@ util_make_fs_blit_msaa_depth(struct pipe_context *pipe, */ void * util_make_fs_blit_msaa_stencil(struct pipe_context *pipe, - enum tgsi_texture_type tgsi_tex) + enum tgsi_texture_type tgsi_tex, + bool sample_shading) { - return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, "UINT", - "STENCIL", ".y", "", ""); + return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, sample_shading, "UINT", + "STENCIL", ".y", "", + "MOV TEMP[0].y, TEMP[0].xxxx\n"); } @@ -650,7 +660,8 @@ util_make_fs_blit_msaa_stencil(struct pipe_context *pipe, */ void * util_make_fs_blit_msaa_depthstencil(struct pipe_context *pipe, - enum tgsi_texture_type tgsi_tex) + enum tgsi_texture_type tgsi_tex, + bool sample_shading) { static const char shader_templ[] = "FRAG\n" @@ -661,21 +672,26 @@ util_make_fs_blit_msaa_depthstencil(struct pipe_context *pipe, "DCL OUT[0], POSITION\n" "DCL OUT[1], STENCIL\n" "DCL TEMP[0]\n" + "%s" "F2U TEMP[0], IN[0]\n" + "%s" "TXF OUT[0].z, TEMP[0], SAMP[0], %s\n" "TXF OUT[1].y, TEMP[0], SAMP[1], %s\n" "END\n"; const char *type = tgsi_texture_names[tgsi_tex]; - char text[sizeof(shader_templ)+100]; + char text[sizeof(shader_templ)+400]; struct tgsi_token tokens[1000]; struct pipe_shader_state state = {0}; assert(tgsi_tex == TGSI_TEXTURE_2D_MSAA || tgsi_tex == TGSI_TEXTURE_2D_ARRAY_MSAA); - sprintf(text, shader_templ, type, type, type, type); + sprintf(text, shader_templ, type, type, + sample_shading ? "DCL SV[0], SAMPLEID\n" : "", + sample_shading ? "MOV TEMP[0].w, SV[0].xxxx\n" : "", + type, type); if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { assert(0); @@ -1173,7 +1189,7 @@ util_make_fs_stencil_blit(struct pipe_context *pipe, bool msaa_src) "FRAG\n" "DCL IN[0], GENERIC[0], LINEAR\n" "DCL SAMP[0]\n" - "DCL SVIEW[0], 2D, UINT\n" + "DCL SVIEW[0], %s, UINT\n" "DCL CONST[0][0]\n" "DCL TEMP[0]\n" @@ -1192,7 +1208,7 @@ util_make_fs_stencil_blit(struct pipe_context *pipe, bool msaa_src) enum tgsi_texture_type tgsi_tex = msaa_src ? TGSI_TEXTURE_2D_MSAA : TGSI_TEXTURE_2D; - sprintf(text, shader_templ, tgsi_texture_names[tgsi_tex]); + sprintf(text, shader_templ, tgsi_texture_names[tgsi_tex], tgsi_texture_names[tgsi_tex]); if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { assert(0); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.h b/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.h index 6bc794018..dfd16b9d1 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.h @@ -117,22 +117,26 @@ extern void * util_make_fs_blit_msaa_color(struct pipe_context *pipe, enum tgsi_texture_type tgsi_tex, enum tgsi_return_type stype, - enum tgsi_return_type dtype); + enum tgsi_return_type dtype, + bool sample_shading); extern void * util_make_fs_blit_msaa_depth(struct pipe_context *pipe, - enum tgsi_texture_type tgsi_tex); + enum tgsi_texture_type tgsi_tex, + bool sample_shading); extern void * util_make_fs_blit_msaa_depthstencil(struct pipe_context *pipe, - enum tgsi_texture_type tgsi_tex); + enum tgsi_texture_type tgsi_tex, + bool sample_shading); void * util_make_fs_blit_msaa_stencil(struct pipe_context *pipe, - enum tgsi_texture_type tgsi_tex); + enum tgsi_texture_type tgsi_tex, + bool sample_shading); void * diff --git a/lib/mesa/src/gallium/auxiliary/util/u_surface.c b/lib/mesa/src/gallium/auxiliary/util/u_surface.c index 3906a84b6..af406e826 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_surface.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_surface.c @@ -765,9 +765,10 @@ get_sample_count(const struct pipe_resource *res) * the blit src/dst formats are identical, ignoring the resource formats. * Otherwise, check for format casting and compatibility. */ -boolean +bool util_can_blit_via_copy_region(const struct pipe_blit_info *blit, - boolean tight_format_check) + bool tight_format_check, + bool render_condition_bound) { const struct util_format_description *src_desc, *dst_desc; @@ -797,7 +798,7 @@ util_can_blit_via_copy_region(const struct pipe_blit_info *blit, blit->scissor_enable || blit->num_window_rectangles > 0 || blit->alpha_blend || - blit->render_condition_enable) { + (blit->render_condition_enable && render_condition_bound)) { return FALSE; } @@ -840,11 +841,12 @@ util_can_blit_via_copy_region(const struct pipe_blit_info *blit, * It returns FALSE otherwise and the caller must fall back to a more generic * codepath for the blit operation. (e.g. by using u_blitter) */ -boolean +bool util_try_blit_via_copy_region(struct pipe_context *ctx, - const struct pipe_blit_info *blit) + const struct pipe_blit_info *blit, + bool render_condition_bound) { - if (util_can_blit_via_copy_region(blit, FALSE)) { + if (util_can_blit_via_copy_region(blit, FALSE, render_condition_bound)) { ctx->resource_copy_region(ctx, blit->dst.resource, blit->dst.level, blit->dst.box.x, blit->dst.box.y, blit->dst.box.z, diff --git a/lib/mesa/src/gallium/auxiliary/util/u_surface.h b/lib/mesa/src/gallium/auxiliary/util/u_surface.h index 61a8d512f..1ca8ca314 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_surface.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_surface.h @@ -105,13 +105,15 @@ util_clear_depth_stencil(struct pipe_context *pipe, unsigned dstx, unsigned dsty, unsigned width, unsigned height); -boolean +bool util_can_blit_via_copy_region(const struct pipe_blit_info *blit, - boolean tight_format_check); + bool tight_format_check, + bool render_condition_bound); -extern boolean +extern bool util_try_blit_via_copy_region(struct pipe_context *ctx, - const struct pipe_blit_info *blit); + const struct pipe_blit_info *blit, + bool render_condition_bound); #ifdef __cplusplus diff --git a/lib/mesa/src/gallium/auxiliary/util/u_tests.c b/lib/mesa/src/gallium/auxiliary/util/u_tests.c index 71e954ff9..eb58c05cc 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_tests.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_tests.c @@ -322,7 +322,7 @@ tgsi_vs_window_space_position(struct pipe_context *ctx) static const float red[] = {1, 0, 0, 1}; if (!ctx->screen->get_param(ctx->screen, - PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION)) { + PIPE_CAP_VS_WINDOW_SPACE_POSITION)) { util_report_result(SKIP); return; } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c index 6cbe2567c..6265286bd 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c @@ -72,6 +72,12 @@ typedef uint16_t (*tc_execute)(struct pipe_context *pipe, void *call, uint64_t * static const tc_execute execute_func[TC_NUM_CALLS]; static void +tc_buffer_subdata(struct pipe_context *_pipe, + struct pipe_resource *resource, + unsigned usage, unsigned offset, + unsigned size, const void *data); + +static void tc_batch_check(UNUSED struct tc_batch *batch) { tc_assert(batch->sentinel == TC_SENTINEL); @@ -155,14 +161,6 @@ tc_drop_surface_reference(struct pipe_surface *dst) /* Unreference dst but don't touch the dst pointer. */ static inline void -tc_drop_sampler_view_reference(struct pipe_sampler_view *dst) -{ - if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */ - dst->context->sampler_view_destroy(dst->context, dst); -} - -/* Unreference dst but don't touch the dst pointer. */ -static inline void tc_drop_so_target_reference(struct pipe_stream_output_target *dst) { if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */ @@ -669,18 +667,33 @@ tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf, return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage); } +/** + * allow_cpu_storage should be false for user memory and imported buffers. + */ void -threaded_resource_init(struct pipe_resource *res) +threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage) { struct threaded_resource *tres = threaded_resource(res); tres->latest = &tres->b; + tres->cpu_storage = NULL; util_range_init(&tres->valid_buffer_range); tres->is_shared = false; tres->is_user_ptr = false; tres->buffer_id_unique = 0; tres->pending_staging_uploads = 0; util_range_init(&tres->pending_staging_uploads_range); + + if (allow_cpu_storage && + !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | + PIPE_RESOURCE_FLAG_SPARSE | + PIPE_RESOURCE_FLAG_ENCRYPTED)) && + /* We need buffer invalidation and buffer busyness tracking for the CPU + * storage, which aren't supported with pipe_vertex_state. */ + !(res->bind & PIPE_BIND_VERTEX_STATE)) + tres->allow_cpu_storage = true; + else + tres->allow_cpu_storage = false; } void @@ -692,6 +705,7 @@ threaded_resource_deinit(struct pipe_resource *res) pipe_resource_reference(&tres->latest, NULL); util_range_destroy(&tres->valid_buffer_range); util_range_destroy(&tres->pending_staging_uploads_range); + align_free(tres->cpu_storage); } struct pipe_context * @@ -881,7 +895,7 @@ tc_get_query_result(struct pipe_context *_pipe, struct tc_query_result_resource { struct tc_call_base base; - bool wait; + enum pipe_query_flags flags:8; enum pipe_query_value_type result_type:8; int8_t index; /* it can be -1 */ unsigned offset; @@ -894,7 +908,7 @@ tc_call_get_query_result_resource(struct pipe_context *pipe, void *call, uint64_ { struct tc_query_result_resource *p = to_call(call, tc_query_result_resource); - pipe->get_query_result_resource(pipe, p->query, p->wait, p->result_type, + pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type, p->index, p->resource, p->offset); tc_drop_resource_reference(p->resource); return call_size(tc_query_result_resource); @@ -902,17 +916,20 @@ tc_call_get_query_result_resource(struct pipe_context *pipe, void *call, uint64_ static void tc_get_query_result_resource(struct pipe_context *_pipe, - struct pipe_query *query, bool wait, + struct pipe_query *query, + enum pipe_query_flags flags, enum pipe_query_value_type result_type, int index, struct pipe_resource *resource, unsigned offset) { struct threaded_context *tc = threaded_context(_pipe); + + tc_buffer_disable_cpu_storage(resource); + struct tc_query_result_resource *p = tc_add_call(tc, TC_CALL_get_query_result_resource, tc_query_result_resource); - p->query = query; - p->wait = wait; + p->flags = flags; p->result_type = result_type; p->index = index; tc_set_resource_reference(&p->resource, resource); @@ -1499,6 +1516,7 @@ tc_set_shader_images(struct pipe_context *_pipe, if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) { struct threaded_resource *tres = threaded_resource(resource); + tc_buffer_disable_cpu_storage(resource); util_range_add(&tres->b, &tres->valid_buffer_range, images[i].u.buf.offset, images[i].u.buf.offset + images[i].u.buf.size); @@ -1591,6 +1609,7 @@ tc_set_shader_buffers(struct pipe_context *_pipe, tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b); if (writable_bitmask & BITFIELD_BIT(i)) { + tc_buffer_disable_cpu_storage(src->buffer); util_range_add(&tres->b, &tres->valid_buffer_range, src->buffer_offset, src->buffer_offset + src->buffer_size); @@ -1737,6 +1756,7 @@ tc_set_stream_output_targets(struct pipe_context *_pipe, p->targets[i] = NULL; pipe_so_target_reference(&p->targets[i], tgs[i]); if (tgs[i]) { + tc_buffer_disable_cpu_storage(tgs[i]->buffer); tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer); } else { tc_unbind_buffer(&tc->streamout_buffers[i]); @@ -1906,6 +1926,9 @@ tc_create_image_handle(struct pipe_context *_pipe, struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; + if (image->resource->target == PIPE_BUFFER) + tc_buffer_disable_cpu_storage(image->resource); + tc_sync(tc); return pipe->create_image_handle(pipe, image); } @@ -1991,7 +2014,7 @@ tc_invalidate_buffer(struct threaded_context *tc, /* Shared, pinned, and sparse buffers can't be reallocated. */ if (tbuf->is_shared || tbuf->is_user_ptr || - tbuf->b.flags & PIPE_RESOURCE_FLAG_SPARSE) + tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) return false; /* Allocate a new one. */ @@ -2063,7 +2086,7 @@ tc_improve_map_buffer_flags(struct threaded_context *tc, * (fully invalidated). That may just be a radeonsi limitation, but * the threaded context must obey it with radeonsi. */ - if (tres->b.flags & PIPE_RESOURCE_FLAG_SPARSE) { + if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) { /* We can use DISCARD_RANGE instead of full discard. This is the only * fast path for sparse buffers that doesn't need thread synchronization. */ @@ -2142,8 +2165,37 @@ tc_buffer_map(struct pipe_context *_pipe, struct threaded_resource *tres = threaded_resource(resource); struct pipe_context *pipe = tc->pipe; + /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and + * this shouldn't normally be necessary because glthread only uses large buffers. + */ + if (usage & PIPE_MAP_THREAD_SAFE) + tc_buffer_disable_cpu_storage(resource); + usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width); + /* If the CPU storage is enabled, return it directly. */ + if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) { + /* We can't let resource_copy_region disable the CPU storage. */ + assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY)); + + if (!tres->cpu_storage) + tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment); + + if (tres->cpu_storage) { + struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers); + ttrans->b.resource = resource; + ttrans->b.usage = usage; + ttrans->b.box = *box; + ttrans->valid_buffer_range = &tres->valid_buffer_range; + ttrans->cpu_storage_mapped = true; + *transfer = &ttrans->b; + + return (uint8_t*)tres->cpu_storage + box->x; + } else { + tres->allow_cpu_storage = false; + } + } + /* Do a staging transfer within the threaded context. The driver should * only get resource_copy_region. */ @@ -2167,6 +2219,7 @@ tc_buffer_map(struct pipe_context *_pipe, ttrans->b.stride = 0; ttrans->b.layer_stride = 0; ttrans->valid_buffer_range = &tres->valid_buffer_range; + ttrans->cpu_storage_mapped = false; *transfer = &ttrans->b; p_atomic_inc(&tres->pending_staging_uploads); @@ -2201,6 +2254,7 @@ tc_buffer_map(struct pipe_context *_pipe, void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource, level, usage, box, transfer); threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range; + threaded_transfer(*transfer)->cpu_storage_mapped = false; if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) tc_clear_driver_thread(tc); @@ -2283,8 +2337,13 @@ tc_buffer_do_flush_region(struct threaded_context *tc, ttrans->staging, 0, &src_box); } - util_range_add(&tres->b, ttrans->valid_buffer_range, - box->x, box->x + box->width); + /* Don't update the valid range when we're uploading the CPU storage + * because it includes the uninitialized range too. + */ + if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) { + util_range_add(&tres->b, ttrans->valid_buffer_range, + box->x, box->x + box->width); + } } static void @@ -2306,8 +2365,14 @@ tc_transfer_flush_region(struct pipe_context *_pipe, tc_buffer_do_flush_region(tc, ttrans, &box); } - /* Staging transfers don't send the call to the driver. */ - if (ttrans->staging) + /* Staging transfers don't send the call to the driver. + * + * Transfers using the CPU storage shouldn't call transfer_flush_region + * in the driver because the buffer is not really mapped on the driver + * side and the CPU storage always re-uploads everything (flush_region + * makes no difference). + */ + if (ttrans->staging || ttrans->cpu_storage_mapped) return; } @@ -2371,12 +2436,44 @@ tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer) return; } - bool was_staging_transfer = false; - if (transfer->usage & PIPE_MAP_WRITE && !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT)) tc_buffer_do_flush_region(tc, ttrans, &transfer->box); + if (ttrans->cpu_storage_mapped) { + /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't + * touch the mapped range. That's a problem because GPU stores free the CPU storage. + * If that happens, we just ignore the unmap call and don't upload anything to prevent + * a crash. + * + * Disallow the CPU storage in the driver to work around this. + */ + assert(tres->cpu_storage); + + if (tres->cpu_storage) { + tc_invalidate_buffer(tc, tres); + tc_buffer_subdata(&tc->base, &tres->b, + PIPE_MAP_UNSYNCHRONIZED | + TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE, + 0, tres->b.width0, tres->cpu_storage); + /* This shouldn't have been freed by buffer_subdata. */ + assert(tres->cpu_storage); + } else { + static bool warned_once = false; + if (!warned_once) { + fprintf(stderr, "This application is incompatible with cpu_storage.\n"); + fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n"); + warned_once = true; + } + } + + tc_drop_resource_reference(ttrans->staging); + slab_free(&tc->pool_transfers, ttrans); + return; + } + + bool was_staging_transfer = false; + if (ttrans->staging) { was_staging_transfer = true; @@ -2481,7 +2578,8 @@ tc_buffer_subdata(struct pipe_context *_pipe, */ if (usage & (PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_DISCARD_WHOLE_RESOURCE) || - size > TC_MAX_SUBDATA_BYTES) { + size > TC_MAX_SUBDATA_BYTES || + tres->cpu_storage) { struct pipe_transfer *transfer; struct pipe_box box; uint8_t *map = NULL; @@ -2677,7 +2775,7 @@ tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream, static void tc_set_debug_callback(struct pipe_context *_pipe, - const struct pipe_debug_callback *cb) + const struct util_debug_callback *cb) { struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; @@ -2743,27 +2841,14 @@ tc_fence_server_sync(struct pipe_context *_pipe, screen->fence_reference(screen, &call->fence, fence); } -static uint16_t -tc_call_fence_server_signal(struct pipe_context *pipe, void *call, uint64_t *last) -{ - struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence; - - pipe->fence_server_signal(pipe, fence); - pipe->screen->fence_reference(pipe->screen, &fence, NULL); - return call_size(tc_fence_call); -} - static void tc_fence_server_signal(struct pipe_context *_pipe, struct pipe_fence_handle *fence) { struct threaded_context *tc = threaded_context(_pipe); - struct pipe_screen *screen = tc->pipe->screen; - struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_signal, - tc_fence_call); - - call->fence = NULL; - screen->fence_reference(screen, &call->fence, fence); + struct pipe_context *pipe = tc->pipe; + tc_sync(tc); + pipe->fence_server_signal(pipe, fence); } static struct pipe_video_codec * @@ -3135,9 +3220,6 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, unsigned index_size = info->index_size; bool has_user_indices = info->has_user_indices; - if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) - tc_add_all_gfx_bindings_to_buffer_list(tc); - if (unlikely(indirect)) { assert(!has_user_indices); assert(num_draws == 1); @@ -3171,6 +3253,10 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, memcpy(&p->indirect, indirect, sizeof(*indirect)); p->draw.start = draws[0].start; + + /* This must be after tc_add_call, which can flush the batch. */ + if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) + tc_add_all_gfx_bindings_to_buffer_list(tc); return; } @@ -3225,6 +3311,10 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, p->info.max_index = draws[0].count; p->index_bias = draws[0].index_bias; } + + /* This must be after tc_add_call, which can flush the batch. */ + if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) + tc_add_all_gfx_bindings_to_buffer_list(tc); return; } @@ -3337,6 +3427,10 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, total_offset += dr; } } + + /* This must be after tc_add_*call, which can flush the batch. */ + if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) + tc_add_all_gfx_bindings_to_buffer_list(tc); } struct tc_draw_vstate_single { @@ -3435,9 +3529,6 @@ tc_draw_vertex_state(struct pipe_context *_pipe, { struct threaded_context *tc = threaded_context(_pipe); - if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) - tc_add_all_gfx_bindings_to_buffer_list(tc); - if (num_draws == 1) { /* Single draw. */ struct tc_draw_vstate_single *p = @@ -3456,6 +3547,11 @@ tc_draw_vertex_state(struct pipe_context *_pipe, tc_set_vertex_state_reference(&p->state, state); else p->state = state; + + + /* This must be after tc_add_*call, which can flush the batch. */ + if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) + tc_add_all_gfx_bindings_to_buffer_list(tc); return; } @@ -3497,6 +3593,11 @@ tc_draw_vertex_state(struct pipe_context *_pipe, total_offset += dr; } + + + /* This must be after tc_add_*call, which can flush the batch. */ + if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) + tc_add_all_gfx_bindings_to_buffer_list(tc); } struct tc_launch_grid_call { @@ -3523,14 +3624,15 @@ tc_launch_grid(struct pipe_context *_pipe, tc_launch_grid_call); assert(info->input == NULL); - if (unlikely(tc->add_all_compute_bindings_to_buffer_list)) - tc_add_all_compute_bindings_to_buffer_list(tc); - tc_set_resource_reference(&p->info.indirect, info->indirect); memcpy(&p->info, info, sizeof(*info)); if (info->indirect) tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect); + + /* This must be after tc_add_*call, which can flush the batch. */ + if (unlikely(tc->add_all_compute_bindings_to_buffer_list)) + tc_add_all_compute_bindings_to_buffer_list(tc); } static uint16_t @@ -3558,6 +3660,9 @@ tc_resource_copy_region(struct pipe_context *_pipe, tc_add_call(tc, TC_CALL_resource_copy_region, tc_resource_copy_region); + if (dst->target == PIPE_BUFFER) + tc_buffer_disable_cpu_storage(dst); + tc_set_resource_reference(&p->dst, dst); p->dst_level = dst_level; p->dstx = dstx; @@ -3872,6 +3977,8 @@ tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res, struct tc_clear_buffer *p = tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer); + tc_buffer_disable_cpu_storage(res); + tc_set_resource_reference(&p->res, res); tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res); p->offset = offset; diff --git a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h index a961a11db..67d2d69d2 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h @@ -104,6 +104,8 @@ * TC_TRANSFER_MAP_NO_INVALIDATE into transfer_map and buffer_subdata to * indicate this. Ignoring the flag will lead to failures. * The threaded context uses its own buffer invalidation mechanism. + * Do NOT use pipe_buffer_write, as this may trigger invalidation; + * use tc_buffer_write instead. * * 4) PIPE_MAP_ONCE can no longer be used to infer that a buffer will not be mapped * a second time before it is unmapped. @@ -197,17 +199,24 @@ #include "pipe/p_state.h" #include "util/bitset.h" #include "util/u_inlines.h" +#include "util/u_memory.h" #include "util/u_queue.h" #include "util/u_range.h" #include "util/u_thread.h" #include "util/slab.h" +#ifdef __cplusplus +extern "C" { +#endif + struct threaded_context; struct tc_unflushed_batch_token; /* 0 = disabled, 1 = assertions, 2 = printfs, 3 = logging */ #define TC_DEBUG 0 +/* This is an internal flag not sent to the driver. */ +#define TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE (1u << 28) /* These are map flags sent to drivers. */ /* Never infer whether it's safe to use unsychronized mappings: */ #define TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED (1u << 29) @@ -313,6 +322,13 @@ struct threaded_resource { */ struct pipe_resource *latest; + /* Optional CPU storage of the buffer. When we get partial glBufferSubData(implemented by + * copy_buffer) + glDrawElements, we don't want to drain the gfx pipeline before executing + * the copy. For ideal pipelining, we upload to this CPU storage and then reallocate + * the GPU storage completely and reupload everything without copy_buffer. + */ + void *cpu_storage; + /* The buffer range which is initialized (with a write transfer, streamout, * or writable shader resources). The remainder of the buffer is considered * invalid and can be mapped unsynchronized. @@ -328,8 +344,9 @@ struct threaded_resource { /* Drivers are required to update this for shared resources and user * pointers. */ - bool is_shared; + bool is_shared; bool is_user_ptr; + bool allow_cpu_storage; /* Unique buffer ID. Drivers must set it to non-zero for buffers and it must * be unique. Textures must set 0. Low bits are used as a hash of the ID. @@ -360,6 +377,8 @@ struct threaded_transfer { * the base instance. Initially it's set to &b.resource->valid_buffer_range. */ struct util_range *valid_buffer_range; + + bool cpu_storage_mapped; }; struct threaded_query { @@ -497,13 +516,13 @@ struct threaded_context { uint32_t shader_buffers_writeable_mask[PIPE_SHADER_TYPES]; uint32_t image_buffers_writeable_mask[PIPE_SHADER_TYPES]; /* Don't use PIPE_MAX_SHADER_SAMPLER_VIEWS because it's too large. */ - uint32_t sampler_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; + uint32_t sampler_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; struct tc_batch batch_slots[TC_MAX_BATCHES]; struct tc_buffer_list buffer_lists[TC_MAX_BUFFER_LISTS]; }; -void threaded_resource_init(struct pipe_resource *res); +void threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage); void threaded_resource_deinit(struct pipe_resource *res); struct pipe_context *threaded_context_unwrap_sync(struct pipe_context *pipe); void tc_driver_internal_flush_notify(struct threaded_context *tc); @@ -579,4 +598,36 @@ tc_assert_driver_thread(struct threaded_context *tc) #endif } +/** + * This is called before GPU stores to disable the CPU storage because + * the CPU storage doesn't mirror the GPU storage. + * + * Drivers should also call it before exporting a DMABUF of a buffer. + */ +static inline void +tc_buffer_disable_cpu_storage(struct pipe_resource *buf) +{ + struct threaded_resource *tres = threaded_resource(buf); + + if (tres->cpu_storage) { + align_free(tres->cpu_storage); + tres->cpu_storage = NULL; + } + tres->allow_cpu_storage = false; +} + +static inline void +tc_buffer_write(struct pipe_context *pipe, + struct pipe_resource *buf, + unsigned offset, + unsigned size, + const void *data) +{ + pipe->buffer_subdata(pipe, buf, PIPE_MAP_WRITE | TC_TRANSFER_MAP_NO_INVALIDATE, offset, size, data); +} + +#ifdef __cplusplus +} +#endif + #endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context_calls.h b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context_calls.h index ab78d3de3..2dbdd885a 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context_calls.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context_calls.h @@ -1,7 +1,6 @@ CALL(flush) CALL(callback) CALL(fence_server_sync) -CALL(fence_server_signal) CALL(destroy_query) CALL(begin_query) CALL(end_query) diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c index 57e2d98f0..5e4f4f4b9 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c @@ -112,6 +112,7 @@ struct u_vbuf_elements { * its vertex data must be translated to native_format[i]. */ enum pipe_format native_format[PIPE_MAX_ATTRIBS]; unsigned native_format_size[PIPE_MAX_ATTRIBS]; + unsigned component_size[PIPE_MAX_ATTRIBS]; /* Which buffers are used by the vertex element state. */ uint32_t used_vb_mask; @@ -127,6 +128,7 @@ struct u_vbuf_elements { /* Which buffer has at least one vertex element referencing it * compatible. */ uint32_t compatible_vb_mask_any; + uint32_t vb_align_mask[2]; //which buffers require 2/4 byte alignments /* Which buffer has all vertex elements referencing it compatible. */ uint32_t compatible_vb_mask_all; @@ -163,6 +165,8 @@ struct u_vbuf { struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; uint32_t enabled_vb_mask; + uint32_t unaligned_vb_mask[2]; //16/32bit + /* Vertex buffers for the driver. * There are usually no user buffers. */ struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS]; @@ -303,6 +307,11 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps, caps->velem_src_offset_unaligned = !screen->get_param(screen, PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY); + caps->attrib_component_unaligned = + !screen->get_param(screen, + PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY); + assert(caps->attrib_component_unaligned || + (caps->velem_src_offset_unaligned && caps->buffer_stride_unaligned && caps->buffer_offset_unaligned)); caps->user_vertex_buffers = screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS); caps->max_vertex_buffers = @@ -330,6 +339,7 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps, if (!caps->buffer_offset_unaligned || !caps->buffer_stride_unaligned || + !caps->attrib_component_unaligned || !caps->velem_src_offset_unaligned) caps->fallback_always = true; @@ -669,13 +679,14 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, const struct pipe_draw_info *info, const struct pipe_draw_start_count_bias *draw, int start_vertex, unsigned num_vertices, - int min_index, boolean unroll_indices) + int min_index, boolean unroll_indices, + uint32_t misaligned) { unsigned mask[VB_NUM] = {0}; struct translate_key key[VB_NUM]; unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */ unsigned i, type; - const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask & + const unsigned incompatible_vb_mask = (misaligned | mgr->incompatible_vb_mask) & mgr->ve->used_vb_mask; const int start[VB_NUM] = { @@ -727,6 +738,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, return FALSE; } + unsigned min_alignment[VB_NUM] = {0}; /* Initialize the translate keys. */ for (i = 0; i < mgr->ve->count; i++) { struct translate_key *k; @@ -765,15 +777,25 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, te->input_offset = mgr->ve->ve[i].src_offset; te->output_format = output_format; te->output_offset = k->output_stride; + unsigned adjustment = 0; + if (!mgr->caps.attrib_component_unaligned && + te->output_offset % mgr->ve->component_size[i] != 0) { + unsigned aligned = align(te->output_offset, mgr->ve->component_size[i]); + adjustment = aligned - te->output_offset; + te->output_offset = aligned; + } - k->output_stride += mgr->ve->native_format_size[i]; + k->output_stride += mgr->ve->native_format_size[i] + adjustment; k->nr_elements++; + min_alignment[type] = MAX2(min_alignment[type], mgr->ve->component_size[i]); } /* Translate buffers. */ for (type = 0; type < VB_NUM; type++) { if (key[type].nr_elements) { enum pipe_error err; + if (!mgr->caps.attrib_component_unaligned) + key[type].output_stride = align(key[type].output_stride, min_alignment[type]); err = u_vbuf_translate_buffers(mgr, &key[type], info, draw, mask[type], mgr->fallback_vbs[type], start[type], num[type], min_index, @@ -881,13 +903,27 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, ve->native_format_size[i] = util_format_get_blocksize(ve->native_format[i]); + const struct util_format_description *desc = util_format_description(format); + bool is_packed = false; + for (unsigned c = 0; c < desc->nr_channels; c++) + is_packed |= desc->channel[c].size != desc->channel[0].size || desc->channel[c].size % 8 != 0; + unsigned component_size = is_packed ? + ve->native_format_size[i] : (ve->native_format_size[i] / desc->nr_channels); + ve->component_size[i] = component_size; + if (ve->ve[i].src_format != format || (!mgr->caps.velem_src_offset_unaligned && - ve->ve[i].src_offset % 4 != 0)) { + ve->ve[i].src_offset % 4 != 0) || + (!mgr->caps.attrib_component_unaligned && + ve->ve[i].src_offset % component_size != 0)) { ve->incompatible_elem_mask |= 1 << i; ve->incompatible_vb_mask_any |= vb_index_bit; } else { ve->compatible_vb_mask_any |= vb_index_bit; + if (component_size == 2) + ve->vb_align_mask[0] |= vb_index_bit; + else if (component_size == 4) + ve->vb_align_mask[1] |= vb_index_bit; } } @@ -951,21 +987,25 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, uint32_t incompatible_vb_mask = 0; /* which buffers have a non-zero stride */ uint32_t nonzero_stride_vb_mask = 0; - const uint32_t mask = + /* which buffers are unaligned to 2/4 bytes */ + uint32_t unaligned_vb_mask[2] = {0}; + uint32_t mask = ~(((1ull << (count + unbind_num_trailing_slots)) - 1) << start_slot); - /* Zero out the bits we are going to rewrite completely. */ - mgr->user_vb_mask &= mask; - mgr->incompatible_vb_mask &= mask; - mgr->nonzero_stride_vb_mask &= mask; - mgr->enabled_vb_mask &= mask; - if (!bufs) { struct pipe_context *pipe = mgr->pipe; /* Unbind. */ unsigned total_count = count + unbind_num_trailing_slots; mgr->dirty_real_vb_mask &= mask; + /* Zero out the bits we are going to rewrite completely. */ + mgr->user_vb_mask &= mask; + mgr->incompatible_vb_mask &= mask; + mgr->nonzero_stride_vb_mask &= mask; + mgr->enabled_vb_mask &= mask; + mgr->unaligned_vb_mask[0] &= mask; + mgr->unaligned_vb_mask[1] &= mask; + for (i = 0; i < total_count; i++) { unsigned dst_index = start_slot + i; @@ -990,6 +1030,21 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, continue; } + bool not_user = !vb->is_user_buffer && vb->is_user_buffer == orig_vb->is_user_buffer; + /* struct isn't tightly packed: do not use memcmp */ + if (not_user && orig_vb->stride == vb->stride && + orig_vb->buffer_offset == vb->buffer_offset && orig_vb->buffer.resource == vb->buffer.resource) { + mask |= BITFIELD_BIT(dst_index); + if (take_ownership) { + pipe_vertex_buffer_unreference(orig_vb); + /* the pointer was unset in the line above, so copy it back */ + orig_vb->buffer.resource = vb->buffer.resource; + } + if (mask == UINT32_MAX) + return; + continue; + } + if (take_ownership) { pipe_vertex_buffer_unreference(orig_vb); memcpy(orig_vb, vb, sizeof(*vb)); @@ -1012,6 +1067,13 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, continue; } + if (!mgr->caps.attrib_component_unaligned) { + if (vb->buffer_offset % 2 != 0 || vb->stride % 2 != 0) + unaligned_vb_mask[0] |= BITFIELD_BIT(dst_index); + if (vb->buffer_offset % 4 != 0 || vb->stride % 4 != 0) + unaligned_vb_mask[1] |= BITFIELD_BIT(dst_index); + } + if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) { user_vb_mask |= 1 << dst_index; real_vb->buffer_offset = vb->buffer_offset; @@ -1031,10 +1093,21 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]); } + + /* Zero out the bits we are going to rewrite completely. */ + mgr->user_vb_mask &= mask; + mgr->incompatible_vb_mask &= mask; + mgr->nonzero_stride_vb_mask &= mask; + mgr->enabled_vb_mask &= mask; + mgr->unaligned_vb_mask[0] &= mask; + mgr->unaligned_vb_mask[1] &= mask; + mgr->user_vb_mask |= user_vb_mask; mgr->incompatible_vb_mask |= incompatible_vb_mask; mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask; mgr->enabled_vb_mask |= enabled_vb_mask; + mgr->unaligned_vb_mask[0] |= unaligned_vb_mask[0]; + mgr->unaligned_vb_mask[1] |= unaligned_vb_mask[1]; /* All changed buffers are marked as dirty, even the NULL ones, * which will cause the NULL buffers to be unbound in the driver later. */ @@ -1184,7 +1257,7 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr, return PIPE_OK; } -static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr) +static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr, uint32_t misaligned) { /* See if there are any per-vertex attribs which will be uploaded or * translated. Use bitmasks to get the info instead of looping over vertex @@ -1192,12 +1265,13 @@ static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr) return (mgr->ve->used_vb_mask & ((mgr->user_vb_mask | mgr->incompatible_vb_mask | + misaligned | mgr->ve->incompatible_vb_mask_any) & mgr->ve->noninstance_vb_mask_any & mgr->nonzero_stride_vb_mask)) != 0; } -static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr) +static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr, uint32_t misaligned) { /* Return true if there are hw buffers which don't need to be translated. * @@ -1206,6 +1280,7 @@ static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr) return (mgr->ve->used_vb_mask & (~mgr->user_vb_mask & ~mgr->incompatible_vb_mask & + ~misaligned & mgr->ve->compatible_vb_mask_all & mgr->ve->noninstance_vb_mask_any & mgr->nonzero_stride_vb_mask)) != 0; @@ -1391,12 +1466,19 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, boolean unroll_indices = FALSE; const uint32_t used_vb_mask = mgr->ve->used_vb_mask; uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask; - const uint32_t incompatible_vb_mask = - mgr->incompatible_vb_mask & used_vb_mask; struct pipe_draw_info new_info; struct pipe_draw_start_count_bias new_draw; unsigned fixed_restart_index = info->index_size ? util_prim_restart_index_from_size(info->index_size) : 0; + uint32_t misaligned = 0; + if (!mgr->caps.attrib_component_unaligned) { + for (unsigned i = 0; i < ARRAY_SIZE(mgr->unaligned_vb_mask); i++) { + misaligned |= mgr->ve->vb_align_mask[i] & mgr->unaligned_vb_mask[i]; + } + } + const uint32_t incompatible_vb_mask = + (mgr->incompatible_vb_mask | misaligned) & used_vb_mask; + /* Normal draw. No fallback and no user buffers. */ if (!incompatible_vb_mask && !mgr->ve->incompatible_elem_mask && @@ -1584,7 +1666,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, if (new_info.index_size) { /* See if anything needs to be done for per-vertex attribs. */ - if (u_vbuf_need_minmax_index(mgr)) { + if (u_vbuf_need_minmax_index(mgr, misaligned)) { unsigned max_index; if (new_info.index_bounds_valid) { @@ -1607,7 +1689,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, if (!indirect && !new_info.primitive_restart && util_is_vbo_upload_ratio_too_large(new_draw.count, num_vertices) && - !u_vbuf_mapping_vertex_buffer_blocks(mgr)) { + !u_vbuf_mapping_vertex_buffer_blocks(mgr, misaligned)) { unroll_indices = TRUE; user_vb_mask &= ~(mgr->nonzero_stride_vb_mask & mgr->ve->noninstance_vb_mask_any); @@ -1630,7 +1712,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, mgr->ve->incompatible_elem_mask) { if (!u_vbuf_translate_begin(mgr, &new_info, &new_draw, start_vertex, num_vertices, - min_index, unroll_indices)) { + min_index, unroll_indices, misaligned)) { debug_warn_once("u_vbuf_translate_begin() failed"); goto cleanup; } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h index 5cefac567..b0bd06bc9 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h @@ -50,6 +50,7 @@ struct u_vbuf_caps { unsigned buffer_offset_unaligned:1; unsigned buffer_stride_unaligned:1; unsigned velem_src_offset_unaligned:1; + unsigned attrib_component_unaligned:1; /* Whether the driver supports user vertex buffers. */ unsigned user_vertex_buffers:1; diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_rbsp.h b/lib/mesa/src/gallium/auxiliary/vl/vl_rbsp.h deleted file mode 100644 index 7867238c4..000000000 --- a/lib/mesa/src/gallium/auxiliary/vl/vl_rbsp.h +++ /dev/null @@ -1,164 +0,0 @@ -/************************************************************************** - * - * Copyright 2013 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* - * Authors: - * Christian König <christian.koenig@amd.com> - * - */ - -/* - * Functions for reading the raw byte sequence payload of H.264 - */ - -#ifndef vl_rbsp_h -#define vl_rbsp_h - -#include "vl/vl_vlc.h" - -struct vl_rbsp { - struct vl_vlc nal; - unsigned escaped; -}; - -/** - * Initialize the RBSP object - */ -static inline void vl_rbsp_init(struct vl_rbsp *rbsp, struct vl_vlc *nal, unsigned num_bits) -{ - unsigned bits_left = vl_vlc_bits_left(nal); - - /* copy the position */ - rbsp->nal = *nal; - - rbsp->escaped = 0; - - /* search for the end of the NAL unit */ - while (vl_vlc_search_byte(nal, num_bits, 0x00)) { - if (vl_vlc_peekbits(nal, 24) == 0x000001 || - vl_vlc_peekbits(nal, 32) == 0x00000001) { - vl_vlc_limit(&rbsp->nal, bits_left - vl_vlc_bits_left(nal)); - return; - } - vl_vlc_eatbits(nal, 8); - } -} - -/** - * Make at least 16 more bits available - */ -static inline void vl_rbsp_fillbits(struct vl_rbsp *rbsp) -{ - unsigned valid = vl_vlc_valid_bits(&rbsp->nal); - unsigned i, bits; - - /* abort if we still have enough bits */ - if (valid >= 32) - return; - - vl_vlc_fillbits(&rbsp->nal); - - /* abort if we have less than 24 bits left in this nal */ - if (vl_vlc_bits_left(&rbsp->nal) < 24) - return; - - /* check that we have enough bits left from the last fillbits */ - assert(valid >= rbsp->escaped); - - /* handle the already escaped bits */ - valid -= rbsp->escaped; - - /* search for the emulation prevention three byte */ - rbsp->escaped = 16; - bits = vl_vlc_valid_bits(&rbsp->nal); - for (i = valid + 24; i <= bits; i += 8) { - if ((vl_vlc_peekbits(&rbsp->nal, i) & 0xffffff) == 0x3) { - vl_vlc_removebits(&rbsp->nal, i - 8, 8); - rbsp->escaped = bits - i; - bits -= 8; - i += 8; - } - } -} - -/** - * Return an unsigned integer from the first n bits - */ -static inline unsigned vl_rbsp_u(struct vl_rbsp *rbsp, unsigned n) -{ - if (n == 0) - return 0; - - vl_rbsp_fillbits(rbsp); - return vl_vlc_get_uimsbf(&rbsp->nal, n); -} - -/** - * Return an unsigned exponential Golomb encoded integer - */ -static inline unsigned vl_rbsp_ue(struct vl_rbsp *rbsp) -{ - unsigned bits = 0; - - vl_rbsp_fillbits(rbsp); - while (!vl_vlc_get_uimsbf(&rbsp->nal, 1)) - ++bits; - - return (1 << bits) - 1 + vl_rbsp_u(rbsp, bits); -} - -/** - * Return an signed exponential Golomb encoded integer - */ -static inline signed vl_rbsp_se(struct vl_rbsp *rbsp) -{ - signed codeNum = vl_rbsp_ue(rbsp); - if (codeNum & 1) - return (codeNum + 1) >> 1; - else - return -(codeNum >> 1); -} - -/** - * Are more data available in the RBSP ? - */ -static inline bool vl_rbsp_more_data(struct vl_rbsp *rbsp) -{ - unsigned bits, value; - - if (vl_vlc_bits_left(&rbsp->nal) > 8) - return TRUE; - - bits = vl_vlc_valid_bits(&rbsp->nal); - value = vl_vlc_peekbits(&rbsp->nal, bits); - if (value == 0 || value == (1 << (bits - 1))) - return FALSE; - - return TRUE; -} - -#endif /* vl_rbsp_h */ diff --git a/lib/mesa/src/gallium/auxiliary/vl/vl_vlc.h b/lib/mesa/src/gallium/auxiliary/vl/vl_vlc.h deleted file mode 100644 index 7821b8be0..000000000 --- a/lib/mesa/src/gallium/auxiliary/vl/vl_vlc.h +++ /dev/null @@ -1,382 +0,0 @@ -/************************************************************************** - * - * Copyright 2011 Christian König. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* - * Functions for fast bitwise access to multiple probably unaligned input buffers - */ - -#ifndef vl_vlc_h -#define vl_vlc_h - -#include "pipe/p_compiler.h" - -#include "util/u_math.h" -#include "util/u_pointer.h" -#include "util/u_debug.h" - -struct vl_vlc -{ - uint64_t buffer; - signed invalid_bits; - const uint8_t *data; - const uint8_t *end; - - const void *const *inputs; - const unsigned *sizes; - unsigned bytes_left; -}; - -struct vl_vlc_entry -{ - int8_t length; - int8_t value; -}; - -struct vl_vlc_compressed -{ - uint16_t bitcode; - struct vl_vlc_entry entry; -}; - -/** - * initalize and decompress a lookup table - */ -static inline void -vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_vlc_compressed *src, unsigned src_size) -{ - unsigned i, bits = util_logbase2(dst_size); - - assert(dst && dst_size); - assert(src && src_size); - - for (i=0;i<dst_size;++i) { - dst[i].length = 0; - dst[i].value = 0; - } - - for(; src_size > 0; --src_size, ++src) { - for(i=0; i<(1 << (bits - src->entry.length)); ++i) - dst[src->bitcode >> (16 - bits) | i] = src->entry; - } -} - -/** - * switch over to next input buffer - */ -static inline void -vl_vlc_next_input(struct vl_vlc *vlc) -{ - unsigned len = vlc->sizes[0]; - - assert(vlc); - assert(vlc->bytes_left); - - if (len < vlc->bytes_left) - vlc->bytes_left -= len; - else { - len = vlc->bytes_left; - vlc->bytes_left = 0; - } - - vlc->data = vlc->inputs[0]; - vlc->end = vlc->data + len; - - ++vlc->inputs; - ++vlc->sizes; -} - -/** - * align the data pointer to the next dword - */ -static inline void -vl_vlc_align_data_ptr(struct vl_vlc *vlc) -{ - /* align the data pointer */ - while (vlc->data != vlc->end && pointer_to_uintptr(vlc->data) & 3) { - vlc->buffer |= (uint64_t)*vlc->data << (24 + vlc->invalid_bits); - ++vlc->data; - vlc->invalid_bits -= 8; - } -} - -/** - * fill the bit buffer, so that at least 32 bits are valid - */ -static inline void -vl_vlc_fillbits(struct vl_vlc *vlc) -{ - assert(vlc); - - /* as long as the buffer needs to be filled */ - while (vlc->invalid_bits > 0) { - unsigned bytes_left = vlc->end - vlc->data; - - /* if this input is depleted */ - if (bytes_left == 0) { - - if (vlc->bytes_left) { - /* go on to next input */ - vl_vlc_next_input(vlc); - vl_vlc_align_data_ptr(vlc); - } else - /* or give up since we don't have anymore inputs */ - return; - - } else if (bytes_left >= 4) { - - /* enough bytes in buffer, read in a whole dword */ - uint64_t value = *(const uint32_t*)vlc->data; - -#ifndef PIPE_ARCH_BIG_ENDIAN - value = util_bswap32(value); -#endif - - vlc->buffer |= value << vlc->invalid_bits; - vlc->data += 4; - vlc->invalid_bits -= 32; - - /* buffer is now definitely filled up avoid the loop test */ - break; - - } else while (vlc->data < vlc->end) { - - /* not enough bytes left in buffer, read single bytes */ - vlc->buffer |= (uint64_t)*vlc->data << (24 + vlc->invalid_bits); - ++vlc->data; - vlc->invalid_bits -= 8; - } - } -} - -/** - * initialize vlc structure and start reading from first input buffer - */ -static inline void -vl_vlc_init(struct vl_vlc *vlc, unsigned num_inputs, - const void *const *inputs, const unsigned *sizes) -{ - unsigned i; - - assert(vlc); - assert(num_inputs); - - vlc->buffer = 0; - vlc->invalid_bits = 32; - vlc->inputs = inputs; - vlc->sizes = sizes; - vlc->bytes_left = 0; - - for (i = 0; i < num_inputs; ++i) - vlc->bytes_left += sizes[i]; - - if (vlc->bytes_left) { - vl_vlc_next_input(vlc); - vl_vlc_align_data_ptr(vlc); - vl_vlc_fillbits(vlc); - } -} - -/** - * number of bits still valid in bit buffer - */ -static inline unsigned -vl_vlc_valid_bits(struct vl_vlc *vlc) -{ - return 32 - vlc->invalid_bits; -} - -/** - * number of bits left over all inbut buffers - */ -static inline unsigned -vl_vlc_bits_left(struct vl_vlc *vlc) -{ - signed bytes_left = vlc->end - vlc->data; - bytes_left += vlc->bytes_left; - return bytes_left * 8 + vl_vlc_valid_bits(vlc); -} - -/** - * get num_bits from bit buffer without removing them - */ -static inline unsigned -vl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits) -{ - assert(vl_vlc_valid_bits(vlc) >= num_bits || vlc->data >= vlc->end); - return vlc->buffer >> (64 - num_bits); -} - -/** - * remove num_bits from bit buffer - */ -static inline void -vl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits) -{ - assert(vl_vlc_valid_bits(vlc) >= num_bits); - - vlc->buffer <<= num_bits; - vlc->invalid_bits += num_bits; -} - -/** - * get num_bits from bit buffer with removing them - */ -static inline unsigned -vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits) -{ - unsigned value; - - assert(vl_vlc_valid_bits(vlc) >= num_bits); - - value = vlc->buffer >> (64 - num_bits); - vl_vlc_eatbits(vlc, num_bits); - - return value; -} - -/** - * treat num_bits as signed value and remove them from bit buffer - */ -static inline signed -vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits) -{ - signed value; - - assert(vl_vlc_valid_bits(vlc) >= num_bits); - - value = ((int64_t)vlc->buffer) >> (64 - num_bits); - vl_vlc_eatbits(vlc, num_bits); - - return value; -} - -/** - * lookup a value and length in a decompressed table - */ -static inline int8_t -vl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned num_bits) -{ - tbl += vl_vlc_peekbits(vlc, num_bits); - vl_vlc_eatbits(vlc, tbl->length); - return tbl->value; -} - -/** - * fast forward search for a specific byte value - */ -static inline boolean -vl_vlc_search_byte(struct vl_vlc *vlc, unsigned num_bits, uint8_t value) -{ - /* make sure we are on a byte boundary */ - assert((vl_vlc_valid_bits(vlc) % 8) == 0); - assert(num_bits == ~0 || (num_bits % 8) == 0); - - /* deplete the bit buffer */ - while (vl_vlc_valid_bits(vlc) > 0) { - - if (vl_vlc_peekbits(vlc, 8) == value) { - vl_vlc_fillbits(vlc); - return TRUE; - } - - vl_vlc_eatbits(vlc, 8); - - if (num_bits != ~0) { - num_bits -= 8; - if (num_bits == 0) - return FALSE; - } - } - - /* deplete the byte buffers */ - while (1) { - - /* if this input is depleted */ - if (vlc->data == vlc->end) { - if (vlc->bytes_left) - /* go on to next input */ - vl_vlc_next_input(vlc); - else - /* or give up since we don't have anymore inputs */ - return FALSE; - } - - if (*vlc->data == value) { - vl_vlc_align_data_ptr(vlc); - vl_vlc_fillbits(vlc); - return TRUE; - } - - ++vlc->data; - if (num_bits != ~0) { - num_bits -= 8; - if (num_bits == 0) { - vl_vlc_align_data_ptr(vlc); - return FALSE; - } - } - } -} - -/** - * remove num_bits bits starting at pos from the bitbuffer - */ -static inline void -vl_vlc_removebits(struct vl_vlc *vlc, unsigned pos, unsigned num_bits) -{ - uint64_t lo = (vlc->buffer & (~0UL >> (pos + num_bits))) << num_bits; - uint64_t hi = (vlc->buffer & (~0UL << (64 - pos))); - vlc->buffer = lo | hi; - vlc->invalid_bits += num_bits; -} - -/** - * limit the number of bits left for fetching - */ -static inline void -vl_vlc_limit(struct vl_vlc *vlc, unsigned bits_left) -{ - assert(bits_left <= vl_vlc_bits_left(vlc)); - - vl_vlc_fillbits(vlc); - if (bits_left < vl_vlc_valid_bits(vlc)) { - vlc->invalid_bits = 32 - bits_left; - vlc->buffer &= ~0L << (vlc->invalid_bits + 32); - vlc->end = vlc->data; - vlc->bytes_left = 0; - } else { - assert((bits_left - vl_vlc_valid_bits(vlc)) % 8 == 0); - vlc->bytes_left = (bits_left - vl_vlc_valid_bits(vlc)) / 8; - if (vlc->bytes_left < (vlc->end - vlc->data)) { - vlc->end = vlc->data + vlc->bytes_left; - vlc->bytes_left = 0; - } else - vlc->bytes_left -= vlc->end - vlc->data; - } -} - -#endif /* vl_vlc_h */ |