diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-02-24 01:57:18 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-02-24 01:57:18 +0000 |
commit | b24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (patch) | |
tree | 658ca4e6b41655f49463c85edbaeda48979c394c /lib/mesa/src/gallium/auxiliary/util | |
parent | 57768bbb154c2879d34ec20e401b19472e77aaf7 (diff) |
Import Mesa 21.3.7
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary/util')
17 files changed, 963 insertions, 111 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_box.h b/lib/mesa/src/gallium/auxiliary/util/u_box.h index 764bf5037..c39e13964 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_box.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_box.h @@ -3,6 +3,7 @@ #include "pipe/p_state.h" #include "util/u_math.h" +#include "util/format/u_format.h" static inline void u_box_1d(unsigned x, unsigned w, struct pipe_box *box) @@ -239,4 +240,22 @@ u_box_minify_3d(struct pipe_box *dst, dst->depth = MAX2(src->depth >> l, 1); } +/* Converts a box specified in pixels to an equivalent box specified + * in blocks, where the boxes represent a region-of-interest of an image with + * the given format. This is trivial (a copy) for uncompressed formats. + */ +static inline void +u_box_pixels_to_blocks(struct pipe_box *blocks, + const struct pipe_box *pixels, enum pipe_format format) +{ + u_box_3d( + pixels->x / util_format_get_blockwidth(format), + pixels->y / util_format_get_blockheight(format), + pixels->z, + DIV_ROUND_UP(pixels->width, util_format_get_blockwidth(format)), + DIV_ROUND_UP(pixels->height, util_format_get_blockheight(format)), + pixels->depth, + blocks); +} + #endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_compute.c b/lib/mesa/src/gallium/auxiliary/util/u_compute.c index 79755abaf..8d4d871b2 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_compute.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_compute.c @@ -76,7 +76,7 @@ static void *blit_compute_shader(struct pipe_context *ctx) } void util_compute_blit(struct pipe_context *ctx, struct pipe_blit_info *blit_info, - void **compute_state) + void **compute_state, bool half_texel_offset) { if (blit_info->src.box.width == 0 || blit_info->src.box.height == 0 || blit_info->dst.box.width == 0 || blit_info->dst.box.height == 0) @@ -91,9 +91,10 @@ void util_compute_blit(struct pipe_context *ctx, struct pipe_blit_info *blit_inf float x_scale = blit_info->src.box.width / (float)blit_info->dst.box.width; float y_scale = blit_info->src.box.height / (float)blit_info->dst.box.height; float z_scale = blit_info->src.box.depth / (float)blit_info->dst.box.depth; + float offset = half_texel_offset ? 0.5 : 0.0; - unsigned data[] = {u_bitcast_f2u(blit_info->src.box.x / (float)src->width0), - u_bitcast_f2u(blit_info->src.box.y / (float)src->height0), + unsigned data[] = {u_bitcast_f2u((blit_info->src.box.x + offset) / (float)src->width0), + u_bitcast_f2u((blit_info->src.box.y + offset) / (float)src->height0), u_bitcast_f2u(blit_info->src.box.z), u_bitcast_f2u(0), u_bitcast_f2u(x_scale / src->width0), @@ -138,7 +139,7 @@ void util_compute_blit(struct pipe_context *ctx, struct pipe_blit_info *blit_inf u_sampler_view_default_template(&src_templ, src, src->format); src_templ.format = util_format_linear(blit_info->src.format); src_view = ctx->create_sampler_view(ctx, src, &src_templ); - ctx->set_sampler_views(ctx, PIPE_SHADER_COMPUTE, 0, 1, 0, &src_view); + ctx->set_sampler_views(ctx, PIPE_SHADER_COMPUTE, 0, 1, 0, false, &src_view); if (!*compute_state) *compute_state = blit_compute_shader(ctx); @@ -159,7 +160,7 @@ void util_compute_blit(struct pipe_context *ctx, struct pipe_blit_info *blit_inf ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL); ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, false, NULL); - ctx->set_sampler_views(ctx, PIPE_SHADER_COMPUTE, 0, 0, 1, NULL); + ctx->set_sampler_views(ctx, PIPE_SHADER_COMPUTE, 0, 0, 1, false, NULL); pipe_sampler_view_reference(&src_view, NULL); ctx->delete_sampler_state(ctx, sampler_state_p); ctx->bind_compute_state(ctx, NULL); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_compute.h b/lib/mesa/src/gallium/auxiliary/util/u_compute.h index 8c2866af8..4a6c66e0e 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_compute.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_compute.h @@ -36,7 +36,7 @@ extern "C" { #endif void util_compute_blit(struct pipe_context *ctx, struct pipe_blit_info *blit_info, - void **compute_state); + void **compute_state, bool half_texel_offset); #ifdef __cplusplus } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_image.c b/lib/mesa/src/gallium/auxiliary/util/u_debug_image.c index 91bfa10af..fd0513f65 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_image.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_debug_image.c @@ -113,10 +113,10 @@ debug_dump_surface(struct pipe_context *pipe, */ texture = surface->texture; - data = pipe_transfer_map(pipe, texture, surface->u.tex.level, - surface->u.tex.first_layer, - PIPE_MAP_READ, - 0, 0, surface->width, surface->height, &transfer); + data = pipe_texture_map(pipe, texture, surface->u.tex.level, + surface->u.tex.first_layer, + PIPE_MAP_READ, + 0, 0, surface->width, surface->height, &transfer); if (!data) return; @@ -128,7 +128,7 @@ debug_dump_surface(struct pipe_context *pipe, transfer->stride, data); - pipe->transfer_unmap(pipe, transfer); + pipe->texture_unmap(pipe, transfer); } @@ -192,13 +192,13 @@ debug_dump_surface_bmp(struct pipe_context *pipe, struct pipe_resource *texture = surface->texture; void *ptr; - ptr = pipe_transfer_map(pipe, texture, surface->u.tex.level, - surface->u.tex.first_layer, PIPE_MAP_READ, - 0, 0, surface->width, surface->height, &transfer); + ptr = pipe_texture_map(pipe, texture, surface->u.tex.level, + surface->u.tex.first_layer, PIPE_MAP_READ, + 0, 0, surface->width, surface->height, &transfer); debug_dump_transfer_bmp(pipe, filename, transfer, ptr); - pipe->transfer_unmap(pipe, transfer); + pipe->texture_unmap(pipe, transfer); } void diff --git a/lib/mesa/src/gallium/auxiliary/util/u_driconf.c b/lib/mesa/src/gallium/auxiliary/util/u_driconf.c new file mode 100644 index 000000000..8ace84747 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_driconf.c @@ -0,0 +1,69 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "u_driconf.h" + +void +u_driconf_fill_st_options(struct st_config_options *options, + const struct driOptionCache *optionCache) +{ +#define query_option_impl(option, type) \ + options->option = driQueryOption##type(optionCache, #option) +#define query_bool_option(option) query_option_impl(option, b) +#define query_int_option(option) query_option_impl(option, i) +#define query_string_option(option) \ + do { \ + char *option = driQueryOptionstr(optionCache, #option); \ + if (*option) \ + options->option = strdup(option); \ + } while (0) + + query_bool_option(disable_blend_func_extended); + query_bool_option(disable_arb_gpu_shader5); + query_bool_option(disable_glsl_line_continuations); + query_bool_option(force_glsl_extensions_warn); + query_int_option(force_glsl_version); + query_bool_option(allow_extra_pp_tokens); + query_bool_option(allow_glsl_extension_directive_midshader); + query_bool_option(allow_glsl_120_subset_in_110); + query_bool_option(allow_glsl_builtin_const_expression); + query_bool_option(allow_glsl_relaxed_es); + query_bool_option(allow_glsl_builtin_variable_redeclaration); + query_bool_option(allow_higher_compat_version); + query_bool_option(glsl_ignore_write_to_readonly_var); + query_bool_option(glsl_zero_init); + query_bool_option(force_integer_tex_nearest); + query_bool_option(vs_position_always_invariant); + query_bool_option(vs_position_always_precise); + query_bool_option(force_glsl_abs_sqrt); + query_bool_option(allow_glsl_cross_stage_interpolation_mismatch); + query_bool_option(allow_draw_out_of_order); + query_bool_option(ignore_map_unsynchronized); + query_bool_option(force_gl_names_reuse); + query_bool_option(transcode_etc); + query_bool_option(transcode_astc); + query_string_option(force_gl_vendor); + query_string_option(force_gl_renderer); + + driComputeOptionsSha1(optionCache, options->config_options_sha1); +} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_driconf.h b/lib/mesa/src/gallium/auxiliary/util/u_driconf.h new file mode 100644 index 000000000..00eead301 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_driconf.h @@ -0,0 +1,42 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef U_DRICONF_H_ +#define U_DRICONF_H_ + +#include "util/xmlconfig.h" +#include "frontend/api.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void +u_driconf_fill_st_options(struct st_config_options *options, + const struct driOptionCache *optionCache); + +#ifdef __cplusplus +} +#endif + +#endif /* U_DRICONF_H_ */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_prim.c b/lib/mesa/src/gallium/auxiliary/util/u_prim.c index cbd48e26a..a84d0e71e 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_prim.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_prim.c @@ -21,12 +21,25 @@ */ #include "u_prim.h" +#include "pipe/p_state.h" /** Return string name of given primitive type */ const char * u_prim_name(enum pipe_prim_type prim) { +#if defined(__GNUC__) + /* Check that the enum is packed: */ + STATIC_ASSERT(sizeof(enum pipe_prim_type) == 1); +#endif + + /* Draw merging in u_threaded_context requires that sizeof(mode) == 1. */ + struct pipe_draw_info info; + STATIC_ASSERT(sizeof(info.mode) == 1); + + struct pipe_draw_vertex_state_info dvs_info; + STATIC_ASSERT(sizeof(dvs_info.mode) == 1); + static const struct debug_named_value names[] = { DEBUG_NAMED_VALUE(PIPE_PRIM_POINTS), DEBUG_NAMED_VALUE(PIPE_PRIM_LINES), diff --git a/lib/mesa/src/gallium/auxiliary/util/u_prim.h b/lib/mesa/src/gallium/auxiliary/util/u_prim.h index b9d4a9e80..1fbb2f5b5 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_prim.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_prim.h @@ -201,12 +201,16 @@ u_vertices_per_prim(enum pipe_prim_type primitive) case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: return 6; + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + /* these won't be seen from geometry shaders + but prim assembly might for prim id. */ + return 4; + /* following primitives should never be used * with geometry shaders abd their size is * undefined */ case PIPE_PRIM_POLYGON: - case PIPE_PRIM_QUADS: - case PIPE_PRIM_QUAD_STRIP: default: debug_printf("Unrecognized geometry shader primitive"); return 3; diff --git a/lib/mesa/src/gallium/auxiliary/util/u_screen.c b/lib/mesa/src/gallium/auxiliary/util/u_screen.c index 6e6aadbfd..eba554600 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_screen.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_screen.c @@ -77,6 +77,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: case PIPE_CAP_DEPTH_CLIP_DISABLE: case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: + case PIPE_CAP_DEPTH_CLAMP_ENABLE: case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: @@ -87,6 +88,10 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND: return 0; + case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART: + case PIPE_CAP_SUPPORTED_PRIM_MODES: + return BITFIELD_MASK(PIPE_PRIM_MAX); + case PIPE_CAP_MIN_TEXEL_OFFSET: /* GL 3.x minimum value. */ return -8; @@ -269,7 +274,6 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: case PIPE_CAP_CULL_DISTANCE: - case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: case PIPE_CAP_TGSI_VOTE: case PIPE_CAP_MAX_WINDOW_RECTANGLES: /* Enables EXT_window_rectangles */ case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: @@ -287,6 +291,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, return 4; /* GLES 2.0 minimum value */ case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: + case PIPE_CAP_PREFER_BACK_BUFFER_REUSE: return 1; case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: @@ -460,11 +465,16 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, return 0; case PIPE_CAP_SAMPLER_REDUCTION_MINMAX: + case PIPE_CAP_SAMPLER_REDUCTION_MINMAX_ARB: return 0; case PIPE_CAP_ALLOW_DYNAMIC_VAO_FASTPATH: return 1; + case PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART: + case PIPE_CAP_DRAW_VERTEX_STATE: + return 0; + default: unreachable("bad PIPE_CAP_*"); } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_sse.h b/lib/mesa/src/gallium/auxiliary/util/u_sse.h index cae4138ba..e372d3b6b 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_sse.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_sse.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2008 VMware, Inc. + * Copyright 2008-2021 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -38,6 +38,8 @@ #define U_SSE_H_ #include "pipe/p_config.h" +#include "pipe/p_compiler.h" +#include "util/u_debug.h" #if defined(PIPE_ARCH_SSE) @@ -296,6 +298,408 @@ transpose2_64_2_32(const __m128i * restrict a01, #define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i)) +/* + * Implements (1-w)*a + w*b = a - wa + wb = w(b-a) + a + * ((b-a)*w >> 8) + a + * The math behind negative sub results (logic shift/mask) is tricky. + * + * w -- weight values + * a -- src0 values + * b -- src1 values + */ +static ALWAYS_INLINE __m128i +util_sse2_lerp_epi16(__m128i w, __m128i a, __m128i b) +{ + __m128i res; + + res = _mm_sub_epi16(b, a); + res = _mm_mullo_epi16(res, w); + res = _mm_srli_epi16(res, 8); + /* use add_epi8 instead of add_epi16 so no need to mask off upper bits */ + res = _mm_add_epi8(res, a); + + return res; +} + + +/* Apply premultiplied-alpha blending on two pixels simultaneously. + * All parameters are packed as 8.8 fixed point values in __m128i SSE + * registers, with the upper 8 bits all zero. + * + * a -- src alpha values + * d -- dst color values + * s -- src color values + */ +static inline __m128i +util_sse2_premul_blend_epi16( __m128i a, __m128i d, __m128i s) +{ + __m128i da, d_sub_da, tmp; + tmp = _mm_mullo_epi16(d, a); + da = _mm_srli_epi16(tmp, 8); + d_sub_da = _mm_sub_epi16(d, da); + + return _mm_add_epi16(s, d_sub_da); +} + + +/* Apply premultiplied-alpha blending on four pixels in packed BGRA + * format (one/inv_src_alpha blend mode). + * + * src -- four pixels (bgra8 format) + * dst -- four destination pixels (bgra8) + * return -- blended pixels (bgra8) + */ +static ALWAYS_INLINE __m128i +util_sse2_blend_premul_4(const __m128i src, + const __m128i dst) +{ + + __m128i al, ah, dl, dh, sl, sh, rl, rh; + __m128i zero = _mm_setzero_si128(); + + /* Blend first two pixels: + */ + sl = _mm_unpacklo_epi8(src, zero); + dl = _mm_unpacklo_epi8(dst, zero); + + al = _mm_shufflehi_epi16(sl, 0xff); + al = _mm_shufflelo_epi16(al, 0xff); + + rl = util_sse2_premul_blend_epi16(al, dl, sl); + + /* Blend second two pixels: + */ + sh = _mm_unpackhi_epi8(src, zero); + dh = _mm_unpackhi_epi8(dst, zero); + + ah = _mm_shufflehi_epi16(sh, 0xff); + ah = _mm_shufflelo_epi16(ah, 0xff); + + rh = util_sse2_premul_blend_epi16(ah, dh, sh); + + /* Pack the results down to four bgra8 pixels: + */ + return _mm_packus_epi16(rl, rh); +} + + +/* Apply src-alpha blending on four pixels in packed BGRA + * format (srcalpha/inv_src_alpha blend mode). + * + * src -- four pixels (bgra8 format) + * dst -- four destination pixels (bgra8) + * return -- blended pixels (bgra8) + */ +static ALWAYS_INLINE __m128i +util_sse2_blend_srcalpha_4(const __m128i src, + const __m128i dst) +{ + + __m128i al, ah, dl, dh, sl, sh, rl, rh; + __m128i zero = _mm_setzero_si128(); + + /* Blend first two pixels: + */ + sl = _mm_unpacklo_epi8(src, zero); + dl = _mm_unpacklo_epi8(dst, zero); + + al = _mm_shufflehi_epi16(sl, 0xff); + al = _mm_shufflelo_epi16(al, 0xff); + + rl = util_sse2_lerp_epi16(al, dl, sl); + + /* Blend second two pixels: + */ + sh = _mm_unpackhi_epi8(src, zero); + dh = _mm_unpackhi_epi8(dst, zero); + + ah = _mm_shufflehi_epi16(sh, 0xff); + ah = _mm_shufflelo_epi16(ah, 0xff); + + rh = util_sse2_lerp_epi16(ah, dh, sh); + + /* Pack the results down to four bgra8 pixels: + */ + return _mm_packus_epi16(rl, rh); +} + + +/** + * premultiplies src with constant alpha then + * does one/inv_src_alpha blend. + * + * src 16xi8 (normalized) + * dst 16xi8 (normalized) + * cst_alpha (constant alpha (u8 value)) + */ +static ALWAYS_INLINE __m128i +util_sse2_blend_premul_src_4(const __m128i src, + const __m128i dst, + const unsigned cst_alpha) +{ + + __m128i srca, d, s, rl, rh; + __m128i zero = _mm_setzero_si128(); + __m128i cst_alpha_vec = _mm_set1_epi16(cst_alpha); + + /* Blend first two pixels: + */ + s = _mm_unpacklo_epi8(src, zero); + s = _mm_mullo_epi16(s, cst_alpha_vec); + /* the shift will cause some precision loss */ + s = _mm_srli_epi16(s, 8); + + srca = _mm_shufflehi_epi16(s, 0xff); + srca = _mm_shufflelo_epi16(srca, 0xff); + + d = _mm_unpacklo_epi8(dst, zero); + rl = util_sse2_premul_blend_epi16(srca, d, s); + + /* Blend second two pixels: + */ + s = _mm_unpackhi_epi8(src, zero); + s = _mm_mullo_epi16(s, cst_alpha_vec); + /* the shift will cause some precision loss */ + s = _mm_srli_epi16(s, 8); + + srca = _mm_shufflehi_epi16(s, 0xff); + srca = _mm_shufflelo_epi16(srca, 0xff); + + d = _mm_unpackhi_epi8(dst, zero); + rh = util_sse2_premul_blend_epi16(srca, d, s); + + /* Pack the results down to four bgra8 pixels: + */ + return _mm_packus_epi16(rl, rh); +} + + +/** + * Linear interpolation with SSE2. + * + * dst, src0, src1 are 16 x i8 vectors, with [0..255] normalized values. + * + * weight_lo and weight_hi should be a 8 x i16 vectors, in 8.8 fixed point + * format, for the low and high components. + * We'd want to pass these as values but MSVC limitation forces us to pass these + * as pointers since it will complain if more than 3 __m128 are passed by value. + */ +static ALWAYS_INLINE __m128i +util_sse2_lerp_epi8_fixed88(__m128i src0, __m128i src1, + const __m128i * restrict weight_lo, + const __m128i * restrict weight_hi) +{ + const __m128i zero = _mm_setzero_si128(); + + __m128i src0_lo = _mm_unpacklo_epi8(src0, zero); + __m128i src0_hi = _mm_unpackhi_epi8(src0, zero); + + __m128i src1_lo = _mm_unpacklo_epi8(src1, zero); + __m128i src1_hi = _mm_unpackhi_epi8(src1, zero); + + __m128i dst_lo; + __m128i dst_hi; + + dst_lo = util_sse2_lerp_epi16(*weight_lo, src0_lo, src1_lo); + dst_hi = util_sse2_lerp_epi16(*weight_hi, src0_hi, src1_hi); + + return _mm_packus_epi16(dst_lo, dst_hi); +} + + +/** + * Linear interpolation with SSE2. + * + * dst, src0, src1 are 16 x i8 vectors, with [0..255] normalized values. + * + * weight should be a 16 x i8 vector, in 0.8 fixed point values. + */ +static ALWAYS_INLINE __m128i +util_sse2_lerp_epi8_fixed08(__m128i src0, __m128i src1, + __m128i weight) +{ + const __m128i zero = _mm_setzero_si128(); + __m128i weight_lo = _mm_unpacklo_epi8(weight, zero); + __m128i weight_hi = _mm_unpackhi_epi8(weight, zero); + + return util_sse2_lerp_epi8_fixed88(src0, src1, + &weight_lo, &weight_hi); +} + + +/** + * Linear interpolation with SSE2. + * + * dst, src0, src1, and weight are 16 x i8 vectors, with [0..255] normalized + * values. + */ +static ALWAYS_INLINE __m128i +util_sse2_lerp_unorm8(__m128i src0, __m128i src1, + __m128i weight) +{ + const __m128i zero = _mm_setzero_si128(); + __m128i weight_lo = _mm_unpacklo_epi8(weight, zero); + __m128i weight_hi = _mm_unpackhi_epi8(weight, zero); + +#if 0 + /* + * Rescale from [0..255] to [0..256]. + */ + weight_lo = _mm_add_epi16(weight_lo, _mm_srli_epi16(weight_lo, 7)); + weight_hi = _mm_add_epi16(weight_hi, _mm_srli_epi16(weight_hi, 7)); +#endif + + return util_sse2_lerp_epi8_fixed88(src0, src1, + &weight_lo, &weight_hi); +} + + +/** + * Linear interpolation with SSE2. + * + * dst, src0, src1, src2, src3 are 16 x i8 vectors, with [0..255] normalized + * values. + * + * ws_lo, ws_hi, wt_lo, wt_hi should be a 8 x i16 vectors, in 8.8 fixed point + * format, for the low and high components. + * We'd want to pass these as values but MSVC limitation forces us to pass these + * as pointers since it will complain if more than 3 __m128 are passed by value. + * + * This uses ws_lo, ws_hi to interpolate between src0 and src1, as well as to + * interpolate between src2 and src3, then uses wt_lo and wt_hi to interpolate + * between the resulting vectors. + */ +static ALWAYS_INLINE __m128i +util_sse2_lerp_2d_epi8_fixed88(__m128i src0, __m128i src1, + const __m128i * restrict src2, + const __m128i * restrict src3, + const __m128i * restrict ws_lo, + const __m128i * restrict ws_hi, + const __m128i * restrict wt_lo, + const __m128i * restrict wt_hi) +{ + const __m128i zero = _mm_setzero_si128(); + + __m128i src0_lo = _mm_unpacklo_epi8(src0, zero); + __m128i src0_hi = _mm_unpackhi_epi8(src0, zero); + + __m128i src1_lo = _mm_unpacklo_epi8(src1, zero); + __m128i src1_hi = _mm_unpackhi_epi8(src1, zero); + + __m128i src2_lo = _mm_unpacklo_epi8(*src2, zero); + __m128i src2_hi = _mm_unpackhi_epi8(*src2, zero); + + __m128i src3_lo = _mm_unpacklo_epi8(*src3, zero); + __m128i src3_hi = _mm_unpackhi_epi8(*src3, zero); + + __m128i dst_lo, dst01_lo, dst23_lo; + __m128i dst_hi, dst01_hi, dst23_hi; + + dst01_lo = util_sse2_lerp_epi16(*ws_lo, src0_lo, src1_lo); + dst01_hi = util_sse2_lerp_epi16(*ws_hi, src0_hi, src1_hi); + dst23_lo = util_sse2_lerp_epi16(*ws_lo, src2_lo, src3_lo); + dst23_hi = util_sse2_lerp_epi16(*ws_hi, src2_hi, src3_hi); + + dst_lo = util_sse2_lerp_epi16(*wt_lo, dst01_lo, dst23_lo); + dst_hi = util_sse2_lerp_epi16(*wt_hi, dst01_hi, dst23_hi); + + return _mm_packus_epi16(dst_lo, dst_hi); +} + +/** + * Stretch a row of pixels using linear filter. + * + * Uses Bresenham's line algorithm using 16.16 fixed point representation for + * the error term. + * + * @param dst_width destination width in pixels + * @param src_x start x0 in 16.16 fixed point format + * @param src_xstep step in 16.16. fixed point format + * + * @return final src_x value (i.e., src_x + dst_width*src_xstep) + */ +static ALWAYS_INLINE int32_t +util_sse2_stretch_row_8unorm(__m128i * restrict dst, + int32_t dst_width, + const uint32_t * restrict src, + int32_t src_x, + int32_t src_xstep) +{ + int16_t error0, error1, error2, error3; + __m128i error_lo, error_hi, error_step; + + assert(dst_width >= 0); + assert(dst_width % 4 == 0); + + error0 = src_x; + error1 = error0 + src_xstep; + error2 = error1 + src_xstep; + error3 = error2 + src_xstep; + + error_lo = _mm_setr_epi16(error0, error0, error0, error0, + error1, error1, error1, error1); + error_hi = _mm_setr_epi16(error2, error2, error2, error2, + error3, error3, error3, error3); + error_step = _mm_set1_epi16(src_xstep << 2); + + dst_width >>= 2; + while (dst_width) { + uint16_t src_x0; + uint16_t src_x1; + uint16_t src_x2; + uint16_t src_x3; + __m128i src0, src1; + __m128i weight_lo, weight_hi; + + /* + * It is faster to re-compute the coordinates in the scalar integer unit here, + * than to fetch the values from the SIMD integer unit. + */ + + src_x0 = src_x >> 16; + src_x += src_xstep; + src_x1 = src_x >> 16; + src_x += src_xstep; + src_x2 = src_x >> 16; + src_x += src_xstep; + src_x3 = src_x >> 16; + src_x += src_xstep; + + /* + * Fetch pairs of pixels 64bit at a time, and then swizzle them inplace. + */ + + { + __m128i src_00_10 = _mm_loadl_epi64((const __m128i *)&src[src_x0]); + __m128i src_01_11 = _mm_loadl_epi64((const __m128i *)&src[src_x1]); + __m128i src_02_12 = _mm_loadl_epi64((const __m128i *)&src[src_x2]); + __m128i src_03_13 = _mm_loadl_epi64((const __m128i *)&src[src_x3]); + + __m128i src_00_01_10_11 = _mm_unpacklo_epi32(src_00_10, src_01_11); + __m128i src_02_03_12_13 = _mm_unpacklo_epi32(src_02_12, src_03_13); + + src0 = _mm_unpacklo_epi64(src_00_01_10_11, src_02_03_12_13); + src1 = _mm_unpackhi_epi64(src_00_01_10_11, src_02_03_12_13); + } + + weight_lo = _mm_srli_epi16(error_lo, 8); + weight_hi = _mm_srli_epi16(error_hi, 8); + + *dst = util_sse2_lerp_epi8_fixed88(src0, src1, + &weight_lo, &weight_hi); + + error_lo = _mm_add_epi16(error_lo, error_step); + error_hi = _mm_add_epi16(error_hi, error_step); + + ++dst; + --dst_width; + } + + return src_x; +} + + + #endif /* PIPE_ARCH_SSE */ #endif /* U_SSE_H_ */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_trace_gallium.c b/lib/mesa/src/gallium/auxiliary/util/u_trace_gallium.c new file mode 100644 index 000000000..3e9a254a4 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_trace_gallium.c @@ -0,0 +1,96 @@ +/* + * Copyright © 2020 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "u_trace_gallium.h" +#include "u_inlines.h" +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" + +#include "u_tracepoints.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static void * +u_trace_pipe_create_ts_buffer(struct u_trace_context *utctx, uint32_t size) +{ + struct pipe_context *ctx = utctx->pctx; + + struct pipe_resource tmpl = { + .target = PIPE_BUFFER, + .format = PIPE_FORMAT_R8_UNORM, + .bind = PIPE_BIND_QUERY_BUFFER | PIPE_BIND_LINEAR, + .width0 = size, + .height0 = 1, + .depth0 = 1, + .array_size = 1, + }; + + return ctx->screen->resource_create(ctx->screen, &tmpl); +} + +static void +u_trace_pipe_delete_ts_buffer(struct u_trace_context *utctx, void *timestamps) +{ + struct pipe_resource *buffer = timestamps; + pipe_resource_reference(&buffer, NULL); +} + +void +u_trace_pipe_context_init(struct u_trace_context *utctx, + struct pipe_context *pctx, + u_trace_record_ts record_timestamp, + u_trace_read_ts read_timestamp, + u_trace_delete_flush_data delete_flush_data) +{ + u_trace_context_init(utctx, pctx, + u_trace_pipe_create_ts_buffer, + u_trace_pipe_delete_ts_buffer, + record_timestamp, + read_timestamp, + delete_flush_data); +} + +inline void +trace_framebuffer_state(struct u_trace *ut, void *cs, const struct pipe_framebuffer_state *pfb) +{ + if (likely(!ut->enabled)) + return; + + trace_framebuffer(ut, cs, pfb); + + for (unsigned i = 0; i < pfb->nr_cbufs; i++) { + if (pfb->cbufs[i]) { + trace_surface(ut, cs, pfb->cbufs[i]); + } + } + if (pfb->zsbuf) { + trace_surface(ut, cs, pfb->zsbuf); + } +} + +#ifdef __cplusplus +} +#endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_trace_gallium.h b/lib/mesa/src/gallium/auxiliary/util/u_trace_gallium.h new file mode 100644 index 000000000..e37e3e663 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_trace_gallium.h @@ -0,0 +1,57 @@ +/* + * Copyright © 2020 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _U_TRACE_GALLIUM_H +#define _U_TRACE_GALLIUM_H + +#include "util/perf/u_trace.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Gallium specific u_trace helpers */ + +struct pipe_context; +struct pipe_framebuffer_state; + +void +u_trace_pipe_context_init(struct u_trace_context *utctx, + struct pipe_context *pctx, + u_trace_record_ts record_timestamp, + u_trace_read_ts read_timestamp, + u_trace_delete_flush_data delete_flush_data); + +/* + * In some cases it is useful to have composite tracepoints like this, + * to log more complex data structures. + */ + +void +trace_framebuffer_state(struct u_trace *ut, void *cs, const struct pipe_framebuffer_state *pfb); + +#ifdef __cplusplus +} +#endif + +#endif /* _U_TRACE_GALLIUM_H */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_tracepoints.py b/lib/mesa/src/gallium/auxiliary/util/u_tracepoints.py index f8a70d05c..30aaab9df 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_tracepoints.py +++ b/lib/mesa/src/gallium/auxiliary/util/u_tracepoints.py @@ -37,6 +37,8 @@ sys.path.insert(0, args.import_path) from u_trace import Header from u_trace import Tracepoint +from u_trace import TracepointArg as Arg +from u_trace import TracepointArgStruct as ArgStruct from u_trace import utrace_generate # @@ -47,11 +49,11 @@ Header('pipe/p_state.h') Header('util/format/u_format.h') Tracepoint('surface', - args=[['const struct pipe_surface *', 'psurf']], - tp_struct=[['uint16_t', 'width', 'psurf->width'], - ['uint16_t', 'height', 'psurf->height'], - ['uint8_t', 'nr_samples', 'psurf->nr_samples'], - ['const char *', 'format', 'util_format_short_name(psurf->format)']], + args=[ArgStruct(type='const struct pipe_surface *', var='psurf')], + tp_struct=[Arg(type='uint16_t', name='width', var='psurf->width', c_format='%u'), + Arg(type='uint16_t', name='height', var='psurf->height', c_format='%u'), + Arg(type='uint8_t', name='nr_samples', var='psurf->nr_samples', c_format='%u'), + Arg(type='const char *', name='format', var='util_format_short_name(psurf->format)', c_format='%s')], tp_print=['%ux%u@%u, fmt=%s', '__entry->width', '__entry->height', @@ -61,12 +63,12 @@ Tracepoint('surface', # Note: called internally from trace_framebuffer_state() Tracepoint('framebuffer', - args=[['const struct pipe_framebuffer_state *', 'pfb']], - tp_struct=[['uint16_t', 'width', 'pfb->width'], - ['uint16_t', 'height', 'pfb->height'], - ['uint8_t', 'layers', 'pfb->layers'], - ['uint8_t', 'samples', 'pfb->samples'], - ['uint8_t', 'nr_cbufs', 'pfb->nr_cbufs']], + args=[ArgStruct(type='const struct pipe_framebuffer_state *', var='pfb')], + tp_struct=[Arg(type='uint16_t', name='width', var='pfb->width', c_format='%u'), + Arg(type='uint16_t', name='height', var='pfb->height', c_format='%u'), + Arg(type='uint8_t', name='layers', var='pfb->layers', c_format='%u'), + Arg(type='uint8_t', name='samples', var='pfb->samples', c_format='%u'), + Arg(type='uint8_t', name='nr_cbufs', var='pfb->nr_cbufs', c_format='%u')], tp_print=['%ux%ux%u@%u, nr_cbufs: %u', '__entry->width', '__entry->height', @@ -76,17 +78,17 @@ Tracepoint('framebuffer', ) Tracepoint('grid_info', - args=[['const struct pipe_grid_info *', 'pgrid']], - tp_struct=[['uint8_t', 'work_dim', 'pgrid->work_dim'], - ['uint16_t', 'block_x', 'pgrid->block[0]'], - ['uint16_t', 'block_y', 'pgrid->block[1]'], - ['uint16_t', 'block_z', 'pgrid->block[2]'], - ['uint16_t', 'grid_x', 'pgrid->grid[0]'], - ['uint16_t', 'grid_y', 'pgrid->grid[1]'], - ['uint16_t', 'grid_z', 'pgrid->grid[2]']], + args=[ArgStruct(type='const struct pipe_grid_info *', var='pgrid')], + tp_struct=[Arg(type='uint8_t', name='work_dim', var='pgrid->work_dim', c_format='%u'), + Arg(type='uint16_t', name='block_x', var='pgrid->block[0]', c_format='%u'), + Arg(type='uint16_t', name='block_y', var='pgrid->block[1]', c_format='%u'), + Arg(type='uint16_t', name='block_z', var='pgrid->block[2]', c_format='%u'), + Arg(type='uint16_t', name='grid_x', var='pgrid->grid[0]', c_format='%u'), + Arg(type='uint16_t', name='grid_y', var='pgrid->grid[1]', c_format='%u'), + Arg(type='uint16_t', name='grid_z', var='pgrid->grid[2]', c_format='%u')], tp_print=['work_dim=%u, block=%ux%ux%u, grid=%ux%ux%u', '__entry->work_dim', '__entry->block_x', '__entry->block_y', '__entry->block_z', '__entry->grid_x', '__entry->grid_y', '__entry->grid_z'], ) -utrace_generate(cpath=args.src, hpath=args.hdr) +utrace_generate(cpath=args.src, hpath=args.hdr, ctx_param='struct pipe_context *pctx') diff --git a/lib/mesa/src/gallium/auxiliary/util/u_transfer.c b/lib/mesa/src/gallium/auxiliary/util/u_transfer.c index 84b80d400..80576ddf1 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_transfer.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_transfer.c @@ -31,12 +31,12 @@ void u_default_buffer_subdata(struct pipe_context *pipe, u_box_1d(offset, size, &box); - map = pipe->transfer_map(pipe, resource, 0, usage, &box, &transfer); + map = pipe->buffer_map(pipe, resource, 0, usage, &box, &transfer); if (!map) return; memcpy(map, data, size); - pipe_transfer_unmap(pipe, transfer); + pipe_buffer_unmap(pipe, transfer); } void u_default_texture_subdata(struct pipe_context *pipe, @@ -60,7 +60,7 @@ void u_default_texture_subdata(struct pipe_context *pipe, /* texture_subdata implicitly discards the rewritten buffer range */ usage |= PIPE_MAP_DISCARD_RANGE; - map = pipe->transfer_map(pipe, + map = pipe->texture_map(pipe, resource, level, usage, @@ -81,19 +81,9 @@ void u_default_texture_subdata(struct pipe_context *pipe, layer_stride, /* bytes */ 0, 0, 0); - pipe_transfer_unmap(pipe, transfer); + pipe_texture_unmap(pipe, transfer); } - -bool u_default_resource_get_handle(UNUSED struct pipe_screen *screen, - UNUSED struct pipe_resource *resource, - UNUSED struct winsys_handle *handle) -{ - return FALSE; -} - - - void u_default_transfer_flush_region(UNUSED struct pipe_context *pipe, UNUSED struct pipe_transfer *transfer, UNUSED const struct pipe_box *box) @@ -101,59 +91,3 @@ void u_default_transfer_flush_region(UNUSED struct pipe_context *pipe, /* This is a no-op implementation, nothing to do. */ } - -void u_default_transfer_unmap(UNUSED struct pipe_context *pipe, - UNUSED struct pipe_transfer *transfer) -{ -} - - -static inline struct u_resource * -u_resource( struct pipe_resource *res ) -{ - return (struct u_resource *)res; -} - -bool u_resource_get_handle_vtbl(struct pipe_screen *screen, - UNUSED struct pipe_context *ctx, - struct pipe_resource *resource, - struct winsys_handle *handle, - UNUSED unsigned usage) -{ - struct u_resource *ur = u_resource(resource); - return ur->vtbl->resource_get_handle(screen, resource, handle); -} - -void u_resource_destroy_vtbl(struct pipe_screen *screen, - struct pipe_resource *resource) -{ - struct u_resource *ur = u_resource(resource); - ur->vtbl->resource_destroy(screen, resource); -} - -void *u_transfer_map_vtbl(struct pipe_context *context, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - struct pipe_transfer **transfer) -{ - struct u_resource *ur = u_resource(resource); - return ur->vtbl->transfer_map(context, resource, level, usage, box, - transfer); -} - -void u_transfer_flush_region_vtbl( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) -{ - struct u_resource *ur = u_resource(transfer->resource); - ur->vtbl->transfer_flush_region(pipe, transfer, box); -} - -void u_transfer_unmap_vtbl( struct pipe_context *pipe, - struct pipe_transfer *transfer ) -{ - struct u_resource *ur = u_resource(transfer->resource); - ur->vtbl->transfer_unmap(pipe, transfer); -} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_transfer_helper.c b/lib/mesa/src/gallium/auxiliary/util/u_transfer_helper.c index 47898e0bd..d1e8d123a 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_transfer_helper.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_transfer_helper.c @@ -213,7 +213,7 @@ transfer_map_msaa(struct pipe_context *pctx, map_box.x = 0; map_box.y = 0; - void *ss_map = pctx->transfer_map(pctx, trans->ss, 0, usage, &map_box, + void *ss_map = pctx->texture_map(pctx, trans->ss, 0, usage, &map_box, &trans->trans); if (!ss_map) { free(trans); @@ -505,7 +505,7 @@ u_transfer_helper_transfer_unmap(struct pipe_context *pctx, * so don't call helper->vtbl->transfer_unmap() directly */ if (trans->ss) { - pctx->transfer_unmap(pctx, trans->trans); + pctx->texture_unmap(pctx, trans->trans); pipe_resource_reference(&trans->ss, NULL); } else { helper->vtbl->transfer_unmap(pctx, trans->trans); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vertex_state_cache.c b/lib/mesa/src/gallium/auxiliary/util/u_vertex_state_cache.c new file mode 100644 index 000000000..f98a1071a --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_vertex_state_cache.c @@ -0,0 +1,134 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_vertex_state_cache.h" +#include "util/u_inlines.h" +#include "util/hash_table.h" +#include "util/set.h" + +static uint32_t key_hash(const void *key) +{ + const struct pipe_vertex_state *state = key; + + return _mesa_hash_data(&state->input, sizeof(state->input)); +} + +static bool key_equals(const void *a, const void *b) +{ + const struct pipe_vertex_state *sa = a; + const struct pipe_vertex_state *sb = b; + + return !memcmp(&sa->input, &sb->input, sizeof(sa->input)); +} + +void +util_vertex_state_cache_init(struct util_vertex_state_cache *cache, + pipe_create_vertex_state_func create, + pipe_vertex_state_destroy_func destroy) +{ + simple_mtx_init(&cache->lock, mtx_plain); + cache->set = _mesa_set_create(NULL, key_hash, key_equals); + cache->create = create; + cache->destroy = destroy; +} + +void +util_vertex_state_cache_deinit(struct util_vertex_state_cache *cache) +{ + if (cache->set) { + set_foreach(cache->set, entry) { + fprintf(stderr, "mesa: vertex state cache should be empty\n"); + assert(!"vertex state cache should be empty"); + } + + _mesa_set_destroy(cache->set, NULL); + simple_mtx_destroy(&cache->lock); + } +} + +struct pipe_vertex_state * +util_vertex_state_cache_get(struct pipe_screen *screen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask, + struct util_vertex_state_cache *cache) +{ + struct pipe_vertex_state key; + + memset(&key, 0, sizeof(key)); + key.input.indexbuf = indexbuf; + key.input.vbuffer.stride = buffer->stride; + assert(!buffer->is_user_buffer); + key.input.vbuffer.buffer_offset = buffer->buffer_offset; + key.input.vbuffer.buffer = buffer->buffer; + key.input.num_elements = num_elements; + for (unsigned i = 0; i < num_elements; i++) + key.input.elements[i] = elements[i]; + key.input.full_velem_mask = full_velem_mask; + + uint32_t hash = key_hash(&key); + + /* Find the state in the live cache. */ + simple_mtx_lock(&cache->lock); + struct set_entry *entry = _mesa_set_search_pre_hashed(cache->set, hash, &key); + struct pipe_vertex_state *state = entry ? (void*)entry->key : NULL; + + /* Return if the state already exists. */ + if (state) { + /* Increase the refcount. */ + p_atomic_inc(&state->reference.count); + assert(state->reference.count >= 1); + simple_mtx_unlock(&cache->lock); + return state; + } + + state = cache->create(screen, buffer, elements, num_elements, indexbuf, + full_velem_mask); + if (state) { + assert(key_hash(state) == hash); + _mesa_set_add_pre_hashed(cache->set, hash, state); + } + + simple_mtx_unlock(&cache->lock); + return state; +} + +void +util_vertex_state_destroy(struct pipe_screen *screen, + struct util_vertex_state_cache *cache, + struct pipe_vertex_state *state) +{ + simple_mtx_lock(&cache->lock); + /* There could have been a thread race and the cache might have returned + * the vertex state being destroyed. Check the reference count and do + * nothing if it's positive. + */ + if (p_atomic_read(&state->reference.count) <= 0) { + _mesa_set_remove_key(cache->set, state); + cache->destroy(screen, state); + } + simple_mtx_unlock(&cache->lock); +} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vertex_state_cache.h b/lib/mesa/src/gallium/auxiliary/util/u_vertex_state_cache.h new file mode 100644 index 000000000..902e91e43 --- /dev/null +++ b/lib/mesa/src/gallium/auxiliary/util/u_vertex_state_cache.h @@ -0,0 +1,67 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* This deduplicates pipe_vertex_state CSOs to enable draw merging in + * u_threaded_context because the draw merging is possible only if different + * display lists use the same pipe_vertex_state CSO. + */ + +#ifndef U_VERTEX_STATE_CACHE_H +#define U_VERTEX_STATE_CACHE_H + +#include "util/simple_mtx.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +struct util_vertex_state_cache { + simple_mtx_t lock; + struct set *set; + + pipe_create_vertex_state_func create; + pipe_vertex_state_destroy_func destroy; +}; + +void +util_vertex_state_cache_init(struct util_vertex_state_cache *cache, + pipe_create_vertex_state_func create, + pipe_vertex_state_destroy_func destroy); + +void +util_vertex_state_cache_deinit(struct util_vertex_state_cache *cache); + +struct pipe_vertex_state * +util_vertex_state_cache_get(struct pipe_screen *screen, + struct pipe_vertex_buffer *buffer, + const struct pipe_vertex_element *elements, + unsigned num_elements, + struct pipe_resource *indexbuf, + uint32_t full_velem_mask, + struct util_vertex_state_cache *cache); + +void +util_vertex_state_destroy(struct pipe_screen *screen, + struct util_vertex_state_cache *cache, + struct pipe_vertex_state *state); + +#endif |