diff options
Diffstat (limited to 'lib/mesa/src/gallium/auxiliary/util')
47 files changed, 2302 insertions, 3241 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/util/u_blit.c b/lib/mesa/src/gallium/auxiliary/util/u_blit.c deleted file mode 100644 index 1216766ee..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_blit.c +++ /dev/null @@ -1,646 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Copy/blit pixel rect between surfaces - * - * @author Brian Paul - */ - - -#include "pipe/p_context.h" -#include "util/u_debug.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_state.h" - -#include "util/u_blit.h" -#include "util/u_draw_quad.h" -#include "util/format/u_format.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "util/u_sampler.h" -#include "util/u_texture.h" -#include "util/u_simple_shaders.h" - -#include "cso_cache/cso_context.h" - - -struct blit_state -{ - struct pipe_context *pipe; - struct cso_context *cso; - - struct pipe_blend_state blend_write_color; - struct pipe_depth_stencil_alpha_state dsa_keep_depthstencil; - struct pipe_rasterizer_state rasterizer; - struct pipe_sampler_state sampler; - struct pipe_viewport_state viewport; - struct pipe_vertex_element velem[2]; - - void *vs; - void *fs[PIPE_MAX_TEXTURE_TYPES][4]; - - struct pipe_resource *vbuf; /**< quad vertices */ - unsigned vbuf_slot; - - float vertices[4][2][4]; /**< vertex/texcoords for quad */ -}; - - -/** - * Create state object for blit. - * Intended to be created once and re-used for many blit() calls. - */ -struct blit_state * -util_create_blit(struct pipe_context *pipe, struct cso_context *cso) -{ - struct blit_state *ctx; - uint i; - - ctx = CALLOC_STRUCT(blit_state); - if (!ctx) - return NULL; - - ctx->pipe = pipe; - ctx->cso = cso; - - /* disabled blending/masking */ - ctx->blend_write_color.rt[0].colormask = PIPE_MASK_RGBA; - - /* rasterizer */ - ctx->rasterizer.cull_face = PIPE_FACE_NONE; - ctx->rasterizer.half_pixel_center = 1; - ctx->rasterizer.bottom_edge_rule = 1; - ctx->rasterizer.depth_clip_near = 1; - ctx->rasterizer.depth_clip_far = 1; - - /* samplers */ - ctx->sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - ctx->sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - ctx->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - ctx->sampler.min_img_filter = 0; /* set later */ - ctx->sampler.mag_img_filter = 0; /* set later */ - - /* vertex elements state */ - for (i = 0; i < 2; i++) { - ctx->velem[i].src_offset = i * 4 * sizeof(float); - ctx->velem[i].instance_divisor = 0; - ctx->velem[i].vertex_buffer_index = 0; - ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - } - - ctx->vbuf = NULL; - - /* init vertex data that doesn't change */ - for (i = 0; i < 4; i++) { - ctx->vertices[i][0][3] = 1.0f; /* w */ - ctx->vertices[i][1][3] = 1.0f; /* q */ - } - - return ctx; -} - - -/** - * Destroy a blit context - */ -void -util_destroy_blit(struct blit_state *ctx) -{ - struct pipe_context *pipe = ctx->pipe; - unsigned i, j; - - if (ctx->vs) - pipe->delete_vs_state(pipe, ctx->vs); - - for (i = 0; i < ARRAY_SIZE(ctx->fs); i++) { - for (j = 0; j < ARRAY_SIZE(ctx->fs[i]); j++) { - if (ctx->fs[i][j]) - pipe->delete_fs_state(pipe, ctx->fs[i][j]); - } - } - - pipe_resource_reference(&ctx->vbuf, NULL); - - FREE(ctx); -} - - -/** - * Helper function to set the fragment shaders. - */ -static inline void -set_fragment_shader(struct blit_state *ctx, - enum pipe_format format, - boolean src_xrbias, - enum pipe_texture_target pipe_tex) -{ - enum tgsi_return_type stype; - unsigned idx; - - if (util_format_is_pure_uint(format)) { - stype = TGSI_RETURN_TYPE_UINT; - idx = 0; - } else if (util_format_is_pure_sint(format)) { - stype = TGSI_RETURN_TYPE_SINT; - idx = 1; - } else { - stype = TGSI_RETURN_TYPE_FLOAT; - idx = 2; - } - - if (src_xrbias) { - assert(stype == TGSI_RETURN_TYPE_FLOAT); - idx = 3; - if (!ctx->fs[pipe_tex][idx]) { - enum tgsi_texture_type tgsi_tex = - util_pipe_tex_to_tgsi_tex(pipe_tex, 0); - ctx->fs[pipe_tex][idx] = - util_make_fragment_tex_shader_xrbias(ctx->pipe, tgsi_tex); - } - } - else if (!ctx->fs[pipe_tex][idx]) { - enum tgsi_texture_type tgsi_tex = util_pipe_tex_to_tgsi_tex(pipe_tex, 0); - - /* OpenGL does not allow blits from signed to unsigned integer - * or vice versa. */ - ctx->fs[pipe_tex][idx] = - util_make_fragment_tex_shader_writemask(ctx->pipe, tgsi_tex, - TGSI_INTERPOLATE_LINEAR, - TGSI_WRITEMASK_XYZW, - stype, stype, false, false); - } - - cso_set_fragment_shader_handle(ctx->cso, ctx->fs[pipe_tex][idx]); -} - - -/** - * Helper function to set the vertex shader. - */ -static inline void -set_vertex_shader(struct blit_state *ctx) -{ - /* vertex shader - still required to provide the linkage between - * fragment shader input semantics and vertex_element/buffers. - */ - if (!ctx->vs) { - const enum tgsi_semantic semantic_names[] = { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC - }; - const uint semantic_indexes[] = { 0, 0 }; - ctx->vs = util_make_vertex_passthrough_shader(ctx->pipe, 2, - semantic_names, - semantic_indexes, FALSE); - } - - cso_set_vertex_shader_handle(ctx->cso, ctx->vs); -} - - -/** - * Get offset of next free slot in vertex buffer for quad vertices. - */ -static unsigned -get_next_slot(struct blit_state *ctx) -{ - const unsigned max_slots = 4096 / sizeof ctx->vertices; - - if (ctx->vbuf_slot >= max_slots) { - pipe_resource_reference(&ctx->vbuf, NULL); - ctx->vbuf_slot = 0; - } - - if (!ctx->vbuf) { - ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - PIPE_USAGE_STREAM, - max_slots * sizeof ctx->vertices); - } - - return ctx->vbuf_slot++ * sizeof ctx->vertices; -} - - - - -/** - * Setup vertex data for the textured quad we'll draw. - * Note: y=0=top - * - * FIXME: We should call util_map_texcoords2d_onto_cubemap - * for cubemaps. - */ -static unsigned -setup_vertex_data_tex(struct blit_state *ctx, - enum pipe_texture_target src_target, - unsigned src_face, - float x0, float y0, float x1, float y1, - float s0, float t0, float s1, float t1, - float z) -{ - unsigned offset; - - ctx->vertices[0][0][0] = x0; - ctx->vertices[0][0][1] = y0; - ctx->vertices[0][0][2] = z; - ctx->vertices[0][1][0] = s0; /*s*/ - ctx->vertices[0][1][1] = t0; /*t*/ - ctx->vertices[0][1][2] = 0; /*r*/ - - ctx->vertices[1][0][0] = x1; - ctx->vertices[1][0][1] = y0; - ctx->vertices[1][0][2] = z; - ctx->vertices[1][1][0] = s1; /*s*/ - ctx->vertices[1][1][1] = t0; /*t*/ - ctx->vertices[1][1][2] = 0; /*r*/ - - ctx->vertices[2][0][0] = x1; - ctx->vertices[2][0][1] = y1; - ctx->vertices[2][0][2] = z; - ctx->vertices[2][1][0] = s1; - ctx->vertices[2][1][1] = t1; - ctx->vertices[3][1][2] = 0; - - ctx->vertices[3][0][0] = x0; - ctx->vertices[3][0][1] = y1; - ctx->vertices[3][0][2] = z; - ctx->vertices[3][1][0] = s0; - ctx->vertices[3][1][1] = t1; - ctx->vertices[3][1][2] = 0; - - if (src_target == PIPE_TEXTURE_CUBE || - src_target == PIPE_TEXTURE_CUBE_ARRAY) { - /* Map cubemap texture coordinates inplace. */ - const unsigned stride = - sizeof ctx->vertices[0] / sizeof ctx->vertices[0][0][0]; - util_map_texcoords2d_onto_cubemap(src_face, - &ctx->vertices[0][1][0], stride, - &ctx->vertices[0][1][0], stride, - TRUE); - } - - offset = get_next_slot(ctx); - - if (ctx->vbuf) { - pipe_buffer_write_nooverlap(ctx->pipe, ctx->vbuf, - offset, sizeof(ctx->vertices), ctx->vertices); - } - - return offset; -} - - -/** - * \return TRUE if two regions overlap, FALSE otherwise - */ -static boolean -regions_overlap(int srcX0, int srcY0, - int srcX1, int srcY1, - int dstX0, int dstY0, - int dstX1, int dstY1) -{ - if (MAX2(srcX0, srcX1) <= MIN2(dstX0, dstX1)) - return FALSE; /* src completely left of dst */ - - if (MAX2(dstX0, dstX1) <= MIN2(srcX0, srcX1)) - return FALSE; /* dst completely left of src */ - - if (MAX2(srcY0, srcY1) <= MIN2(dstY0, dstY1)) - return FALSE; /* src completely above dst */ - - if (MAX2(dstY0, dstY1) <= MIN2(srcY0, srcY1)) - return FALSE; /* dst completely above src */ - - return TRUE; /* some overlap */ -} - - -/** - * Can we blit from src format to dest format with a simple copy? - */ -static boolean -formats_compatible(enum pipe_format src_format, - enum pipe_format dst_format) -{ - if (src_format == dst_format) { - return TRUE; - } - else { - const struct util_format_description *src_desc = - util_format_description(src_format); - const struct util_format_description *dst_desc = - util_format_description(dst_format); - return util_is_format_compatible(src_desc, dst_desc); - } -} - - -/** - * Copy pixel block from src surface to dst surface. - * Overlapping regions are acceptable. - * Flipping and stretching are supported. - * \param filter one of PIPE_TEX_FILTER_NEAREST/LINEAR - * \param writemask bitmask of PIPE_MASK_[RGBAZS]. Controls which channels - * in the dest surface are sourced from the src surface. - * Disabled color channels are sourced from (0,0,0,1). - */ -void -util_blit_pixels(struct blit_state *ctx, - struct pipe_resource *src_tex, - unsigned src_level, - int srcX0, int srcY0, - int srcX1, int srcY1, - int srcZ0, - struct pipe_surface *dst, - int dstX0, int dstY0, - int dstX1, int dstY1, - ASSERTED float z, - enum pipe_tex_filter filter, - uint writemask) -{ - struct pipe_context *pipe = ctx->pipe; - enum pipe_format src_format, dst_format; - const int srcW = abs(srcX1 - srcX0); - const int srcH = abs(srcY1 - srcY0); - boolean overlap; - boolean is_stencil, is_depth, blit_depth, blit_stencil; - const struct util_format_description *src_desc = - util_format_description(src_tex->format); - struct pipe_blit_info info; - - assert(filter == PIPE_TEX_FILTER_NEAREST || - filter == PIPE_TEX_FILTER_LINEAR); - - assert(src_level <= src_tex->last_level); - - /* do the regions overlap? */ - overlap = src_tex == dst->texture && - dst->u.tex.level == src_level && - dst->u.tex.first_layer == srcZ0 && - regions_overlap(srcX0, srcY0, srcX1, srcY1, - dstX0, dstY0, dstX1, dstY1); - - src_format = util_format_linear(src_tex->format); - dst_format = util_format_linear(dst->texture->format); - - /* See whether we will blit depth or stencil. */ - is_depth = util_format_has_depth(src_desc); - is_stencil = util_format_has_stencil(src_desc); - - blit_depth = is_depth && (writemask & PIPE_MASK_Z); - blit_stencil = is_stencil && (writemask & PIPE_MASK_S); - - if (is_depth || is_stencil) { - assert((writemask & PIPE_MASK_RGBA) == 0); - assert(blit_depth || blit_stencil); - } - else { - assert((writemask & PIPE_MASK_ZS) == 0); - assert(!blit_depth); - assert(!blit_stencil); - } - - /* - * XXX: z parameter is deprecated. dst->u.tex.first_layer - * specificies the destination layer. - */ - assert(z == 0.0f); - - /* - * Check for simple case: no format conversion, no flipping, no stretching, - * no overlapping, same number of samples. - * Filter mode should not matter since there's no stretching. - */ - if (formats_compatible(src_format, dst_format) && - src_tex->nr_samples == dst->texture->nr_samples && - is_stencil == blit_stencil && - is_depth == blit_depth && - srcX0 < srcX1 && - dstX0 < dstX1 && - srcY0 < srcY1 && - dstY0 < dstY1 && - (dstX1 - dstX0) == (srcX1 - srcX0) && - (dstY1 - dstY0) == (srcY1 - srcY0) && - !overlap) { - struct pipe_box src_box; - src_box.x = srcX0; - src_box.y = srcY0; - src_box.z = srcZ0; - src_box.width = srcW; - src_box.height = srcH; - src_box.depth = 1; - pipe->resource_copy_region(pipe, - dst->texture, dst->u.tex.level, - dstX0, dstY0, dst->u.tex.first_layer,/* dest */ - src_tex, src_level, - &src_box); - return; - } - - memset(&info, 0, sizeof info); - info.dst.resource = dst->texture; - info.dst.level = dst->u.tex.level; - info.dst.box.x = dstX0; - info.dst.box.y = dstY0; - info.dst.box.z = dst->u.tex.first_layer; - info.dst.box.width = dstX1 - dstX0; - info.dst.box.height = dstY1 - dstY0; - assert(info.dst.box.width >= 0); - assert(info.dst.box.height >= 0); - info.dst.box.depth = 1; - info.dst.format = dst_format; - info.src.resource = src_tex; - info.src.level = src_level; - info.src.box.x = srcX0; - info.src.box.y = srcY0; - info.src.box.z = srcZ0; - info.src.box.width = srcX1 - srcX0; - info.src.box.height = srcY1 - srcY0; - info.src.box.depth = 1; - info.src.format = src_format; - info.mask = writemask; - info.filter = filter; - info.scissor_enable = 0; - - pipe->blit(pipe, &info); -} - - -/** - * Copy pixel block from src sampler view to dst surface. - * - * The sampler view's first_level field indicates the source - * mipmap level to use. - * - * The sampler view's first_layer indicate the layer to use, but for - * cube maps it must point to the first face. Face is passed in src_face. - * - * The main advantage over util_blit_pixels is that it allows to specify - * swizzles in pipe_sampler_view::swizzle_?. - * - * But there is no control over blitting Z and/or stencil. - */ -void -util_blit_pixels_tex(struct blit_state *ctx, - struct pipe_sampler_view *src_sampler_view, - int srcX0, int srcY0, - int srcX1, int srcY1, - unsigned src_face, - struct pipe_surface *dst, - int dstX0, int dstY0, - int dstX1, int dstY1, - float z, enum pipe_tex_filter filter, - boolean src_xrbias) -{ - boolean normalized = src_sampler_view->texture->target != PIPE_TEXTURE_RECT; - struct pipe_framebuffer_state fb; - float s0, t0, s1, t1; - unsigned offset; - struct pipe_resource *tex = src_sampler_view->texture; - - assert(filter == PIPE_TEX_FILTER_NEAREST || - filter == PIPE_TEX_FILTER_LINEAR); - - assert(tex); - assert(tex->width0 != 0); - assert(tex->height0 != 0); - - s0 = (float) srcX0; - s1 = (float) srcX1; - t0 = (float) srcY0; - t1 = (float) srcY1; - - if (normalized) { - /* normalize according to the mipmap level's size */ - int level = src_sampler_view->u.tex.first_level; - float w = (float) u_minify(tex->width0, level); - float h = (float) u_minify(tex->height0, level); - s0 /= w; - s1 /= w; - t0 /= h; - t1 /= h; - } - - assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format, - PIPE_TEXTURE_2D, - dst->texture->nr_samples, - dst->texture->nr_storage_samples, - PIPE_BIND_RENDER_TARGET)); - - /* save state (restored below) */ - cso_save_state(ctx->cso, (CSO_BIT_BLEND | - CSO_BIT_DEPTH_STENCIL_ALPHA | - CSO_BIT_RASTERIZER | - CSO_BIT_SAMPLE_MASK | - CSO_BIT_MIN_SAMPLES | - CSO_BIT_FRAGMENT_SAMPLERS | - CSO_BIT_FRAGMENT_SAMPLER_VIEWS | - CSO_BIT_STREAM_OUTPUTS | - CSO_BIT_VIEWPORT | - CSO_BIT_FRAMEBUFFER | - CSO_BIT_PAUSE_QUERIES | - CSO_BIT_FRAGMENT_SHADER | - CSO_BIT_VERTEX_SHADER | - CSO_BIT_TESSCTRL_SHADER | - CSO_BIT_TESSEVAL_SHADER | - CSO_BIT_GEOMETRY_SHADER | - CSO_BIT_VERTEX_ELEMENTS | - CSO_BIT_AUX_VERTEX_BUFFER_SLOT)); - - /* set misc state we care about */ - cso_set_blend(ctx->cso, &ctx->blend_write_color); - cso_set_depth_stencil_alpha(ctx->cso, &ctx->dsa_keep_depthstencil); - cso_set_sample_mask(ctx->cso, ~0); - cso_set_min_samples(ctx->cso, 1); - cso_set_rasterizer(ctx->cso, &ctx->rasterizer); - cso_set_vertex_elements(ctx->cso, 2, ctx->velem); - cso_set_stream_outputs(ctx->cso, 0, NULL, NULL); - - /* sampler */ - ctx->sampler.normalized_coords = normalized; - ctx->sampler.min_img_filter = filter; - ctx->sampler.mag_img_filter = filter; - { - const struct pipe_sampler_state *samplers[] = {&ctx->sampler}; - cso_set_samplers(ctx->cso, PIPE_SHADER_FRAGMENT, 1, samplers); - } - - /* viewport */ - ctx->viewport.scale[0] = 0.5f * dst->width; - ctx->viewport.scale[1] = 0.5f * dst->height; - ctx->viewport.scale[2] = 0.5f; - ctx->viewport.translate[0] = 0.5f * dst->width; - ctx->viewport.translate[1] = 0.5f * dst->height; - ctx->viewport.translate[2] = 0.5f; - cso_set_viewport(ctx->cso, &ctx->viewport); - - /* texture */ - cso_set_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT, 1, &src_sampler_view); - - /* shaders */ - set_fragment_shader(ctx, src_sampler_view->format, - src_xrbias, - src_sampler_view->texture->target); - set_vertex_shader(ctx); - cso_set_tessctrl_shader_handle(ctx->cso, NULL); - cso_set_tesseval_shader_handle(ctx->cso, NULL); - cso_set_geometry_shader_handle(ctx->cso, NULL); - - /* drawing dest */ - memset(&fb, 0, sizeof(fb)); - fb.width = dst->width; - fb.height = dst->height; - fb.nr_cbufs = 1; - fb.cbufs[0] = dst; - cso_set_framebuffer(ctx->cso, &fb); - - /* draw quad */ - offset = setup_vertex_data_tex(ctx, - src_sampler_view->texture->target, - src_face, - (float) dstX0 / dst->width * 2.0f - 1.0f, - (float) dstY0 / dst->height * 2.0f - 1.0f, - (float) dstX1 / dst->width * 2.0f - 1.0f, - (float) dstY1 / dst->height * 2.0f - 1.0f, - s0, t0, s1, t1, - z); - - util_draw_vertex_buffer(ctx->pipe, ctx->cso, ctx->vbuf, 0, - offset, - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ - - /* restore state we changed */ - cso_restore_state(ctx->cso); -} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_blit.h b/lib/mesa/src/gallium/auxiliary/util/u_blit.h deleted file mode 100644 index b50edab78..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_blit.h +++ /dev/null @@ -1,81 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef U_BLIT_H -#define U_BLIT_H - - -#include "pipe/p_compiler.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -struct cso_context; -struct pipe_context; -struct pipe_resource; -struct pipe_sampler_view; -struct pipe_surface; - -extern struct blit_state * -util_create_blit(struct pipe_context *pipe, struct cso_context *cso); - -extern void -util_destroy_blit(struct blit_state *ctx); - -extern void -util_blit_pixels(struct blit_state *ctx, - struct pipe_resource *src_tex, - unsigned src_level, - int srcX0, int srcY0, - int srcX1, int srcY1, - int srcZ0, - struct pipe_surface *dst, - int dstX0, int dstY0, - int dstX1, int dstY1, - float z, uint filter, - uint writemask); - -extern void -util_blit_pixels_tex(struct blit_state *ctx, - struct pipe_sampler_view *src_sampler_view, - int srcX0, int srcY0, - int srcX1, int srcY1, - unsigned src_face, - struct pipe_surface *dst, - int dstX0, int dstY0, - int dstX1, int dstY1, - float z, uint filter); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_blitter.c b/lib/mesa/src/gallium/auxiliary/util/u_blitter.c index fd7813aae..42bbd47d9 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_blitter.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_blitter.c @@ -107,6 +107,9 @@ struct blitter_context_priv */ void *fs_pack_color_zs[TGSI_TEXTURE_COUNT][10]; + /* FS which is meant for replicating indevidual stencil-buffer bits */ + void *fs_stencil_blit_fallback[2]; + /* Blend state. */ void *blend[PIPE_MASK_RGBA+1][2]; /**< blend state with writemask */ void *blend_clear[GET_CLEAR_BLEND_STATE_IDX(PIPE_CLEAR_COLOR)+1]; @@ -116,6 +119,7 @@ struct blitter_context_priv void *dsa_write_depth_keep_stencil; void *dsa_keep_depth_stencil; void *dsa_keep_depth_write_stencil; + void *dsa_replicate_stencil_bit[8]; /* Vertex elements states. */ void *velem_state; @@ -238,9 +242,9 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->dsa_keep_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); - dsa.depth.enabled = 1; - dsa.depth.writemask = 1; - dsa.depth.func = PIPE_FUNC_ALWAYS; + dsa.depth_enabled = 1; + dsa.depth_writemask = 1; + dsa.depth_func = PIPE_FUNC_ALWAYS; ctx->dsa_write_depth_keep_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); @@ -254,8 +258,8 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->dsa_write_depth_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); - dsa.depth.enabled = 0; - dsa.depth.writemask = 0; + dsa.depth_enabled = 0; + dsa.depth_writemask = 0; ctx->dsa_keep_depth_write_stencil = pipe->create_depth_stencil_alpha_state(pipe, &dsa); @@ -494,6 +498,11 @@ void util_blitter_destroy(struct blitter_context *blitter) pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil); pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil); + for (i = 0; i < ARRAY_SIZE(ctx->dsa_replicate_stencil_bit); i++) { + if (ctx->dsa_replicate_stencil_bit[i]) + pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_replicate_stencil_bit[i]); + } + unsigned scissor, msaa; for (scissor = 0; scissor < 2; scissor++) { for (msaa = 0; msaa < 2; msaa++) { @@ -565,6 +574,10 @@ void util_blitter_destroy(struct blitter_context *blitter) if (ctx->fs_write_all_cbufs) ctx->delete_fs_state(pipe, ctx->fs_write_all_cbufs); + for (i = 0; i < ARRAY_SIZE(ctx->fs_stencil_blit_fallback); ++i) + if (ctx->fs_stencil_blit_fallback[i]) + ctx->delete_fs_state(pipe, ctx->fs_stencil_blit_fallback[i]); + pipe->delete_sampler_state(pipe, ctx->sampler_state_rect_linear); pipe->delete_sampler_state(pipe, ctx->sampler_state_rect); pipe->delete_sampler_state(pipe, ctx->sampler_state_linear); @@ -620,9 +633,9 @@ void util_blitter_restore_vertex_states(struct blitter_context *blitter) /* Vertex buffer. */ if (ctx->base.saved_vertex_buffer.buffer.resource) { - pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, + pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, 0, true, &ctx->base.saved_vertex_buffer); - pipe_vertex_buffer_unreference(&ctx->base.saved_vertex_buffer); + ctx->base.saved_vertex_buffer.buffer.resource = NULL; } /* Vertex elements. */ @@ -701,7 +714,7 @@ void util_blitter_restore_fragment_states(struct blitter_context *blitter) /* Miscellaneous states. */ /* XXX check whether these are saved and whether they need to be restored * (depending on the operation) */ - pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref); + pipe->set_stencil_ref(pipe, ctx->base.saved_stencil_ref); if (!blitter->skip_viewport_restore) pipe->set_viewport_states(pipe, 0, 1, &ctx->base.saved_viewport); @@ -771,7 +784,7 @@ void util_blitter_restore_textures(struct blitter_context *blitter) /* Fragment sampler views. */ pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, - ctx->base.saved_num_sampler_views, + ctx->base.saved_num_sampler_views, 0, ctx->base.saved_sampler_views); for (i = 0; i < ctx->base.saved_num_sampler_views; i++) @@ -785,8 +798,8 @@ void util_blitter_restore_constant_buffer_state(struct blitter_context *blitter) struct pipe_context *pipe = blitter->pipe; pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, blitter->cb_slot, - &blitter->saved_fs_constant_buffer); - pipe_resource_reference(&blitter->saved_fs_constant_buffer.buffer, NULL); + true, &blitter->saved_fs_constant_buffer); + blitter->saved_fs_constant_buffer.buffer = NULL; } static void blitter_set_rectangle(struct blitter_context_priv *ctx, @@ -814,6 +827,10 @@ static void blitter_set_rectangle(struct blitter_context_priv *ctx, viewport.translate[0] = 0.5f * ctx->dst_width; viewport.translate[1] = 0.5f * ctx->dst_height; viewport.translate[2] = depth; + viewport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X; + viewport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y; + viewport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z; + viewport.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W; ctx->base.pipe->set_viewport_states(ctx->base.pipe, 0, 1, &viewport); } @@ -1339,7 +1356,7 @@ static void blitter_draw(struct blitter_context_priv *ctx, return; u_upload_unmap(pipe->stream_uploader); - pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb); + pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, 0, false, &vb); pipe->bind_vertex_elements_state(pipe, vertex_elements_cso); pipe->bind_vs_state(pipe, get_vs(&ctx->base)); @@ -1380,7 +1397,7 @@ void util_blitter_draw_rectangle(struct blitter_context *blitter, ctx->vertices[i][1][2] = attrib->texcoord.z; ctx->vertices[i][1][3] = attrib->texcoord.w; } - /* fall through */ + FALLTHROUGH; case UTIL_BLITTER_ATTRIB_TEXCOORD_XY: set_texcoords_in_vertices(attrib, &ctx->vertices[0][1][0], 8); break; @@ -1419,6 +1436,7 @@ static void *get_clear_blend_state(struct blitter_context_priv *ctx, for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { if (clear_buffers & (PIPE_CLEAR_COLOR0 << i)) { blend.rt[i].colormask = PIPE_MASK_RGBA; + blend.max_rt = i; } } @@ -1482,7 +1500,7 @@ static void util_blitter_clear_custom(struct blitter_context *blitter, custom_blend, custom_dsa); sr.ref_value[0] = stencil & 0xff; - pipe->set_stencil_ref(pipe, &sr); + pipe->set_stencil_ref(pipe, sr); bind_fs_write_all_cbufs(ctx); @@ -1805,7 +1823,10 @@ static void do_blits(struct blitter_context_priv *ctx, int dst_z; for (dst_z = 0; dst_z < dstbox->depth; dst_z++) { struct pipe_surface *old; - float dst2src_scale = srcbox->depth / (float)dstbox->depth; + bool flipped = (srcbox->depth < 0); + float depth_center_offset = 0.0; + int src_depth = abs(srcbox->depth); + float src_z_step = src_depth / (float)dstbox->depth; /* Scale Z properly if the blit is scaled. * @@ -1821,12 +1842,17 @@ static void do_blits(struct blitter_context_priv *ctx, * src Z: 0 1 2 3 4 5 6 7 * dst Z: 0 1 2 3 * - * dst_offset defines the offset needed for centering the pixels and - * it works with any scaling (not just 2x). + * This calculation is taken from the radv driver. */ - float dst_offset = ((srcbox->depth - 1) - - (dstbox->depth - 1) * dst2src_scale) * 0.5; - float src_z = (dst_z + dst_offset) * dst2src_scale; + if (src_target == PIPE_TEXTURE_3D) + depth_center_offset = 0.5 / dstbox->depth * src_depth; + + if (flipped) { + src_z_step *= - 1; + depth_center_offset *= -1; + } + + float src_z = dst_z * src_z_step + depth_center_offset; /* Set framebuffer state. */ if (is_zsbuf) { @@ -2060,7 +2086,7 @@ void util_blitter_blit_generic(struct blitter_context *blitter, views[0] = src; views[1] = pipe->create_sampler_view(pipe, src->texture, &templ); - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 2, views); + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 2, 0, views); pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, 2, samplers); pipe_sampler_view_reference(&views[1], NULL); @@ -2075,13 +2101,13 @@ void util_blitter_blit_generic(struct blitter_context *blitter, view = pipe->create_sampler_view(pipe, src->texture, &templ); - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, &view); + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, &view); pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, 1, &sampler_state); pipe_sampler_view_reference(&view, NULL); } else { - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, &src); + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, &src); pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, 1, &sampler_state); } @@ -2228,7 +2254,7 @@ void util_blitter_generate_mipmap(struct blitter_context *blitter, src_templ.format = format; src_view = pipe->create_sampler_view(pipe, tex, &src_templ); - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, &src_view); + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, &src_view); do_blits(ctx, dst_view, &dstbox, src_view, tex->width0, tex->height0, &srcbox, is_depth, false); @@ -2341,7 +2367,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) { sr.ref_value[0] = stencil & 0xff; pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil); - pipe->set_stencil_ref(pipe, &sr); + pipe->set_stencil_ref(pipe, sr); } else if (clear_flags & PIPE_CLEAR_DEPTH) { pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_keep_stencil); @@ -2349,7 +2375,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter, else if (clear_flags & PIPE_CLEAR_STENCIL) { sr.ref_value[0] = stencil & 0xff; pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil); - pipe->set_stencil_ref(pipe, &sr); + pipe->set_stencil_ref(pipe, sr); } else /* hmm that should be illegal probably, or make it a no-op somewhere */ @@ -2495,7 +2521,7 @@ void util_blitter_copy_buffer(struct blitter_context *blitter, vb.buffer_offset = srcx; vb.stride = 4; - pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb); + pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, 0, false, &vb); pipe->bind_vertex_elements_state(pipe, ctx->velem_state_readbuf[0]); bind_vs_pos_only(ctx, 1); if (ctx->has_geometry_shader) @@ -2561,7 +2587,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter, blitter_check_saved_vertex_states(ctx); blitter_disable_render_cond(ctx); - pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb); + pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, 0, false, &vb); pipe->bind_vertex_elements_state(pipe, ctx->velem_state_readbuf[num_channels-1]); bind_vs_pos_only(ctx, num_channels); @@ -2720,7 +2746,7 @@ void util_blitter_custom_shader(struct blitter_context *blitter, { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->base.pipe; - struct pipe_framebuffer_state fb_state; + struct pipe_framebuffer_state fb_state = { 0 }; ctx->custom_vs = custom_vs; @@ -2746,7 +2772,6 @@ void util_blitter_custom_shader(struct blitter_context *blitter, fb_state.height = dstsurf->height; fb_state.nr_cbufs = 1; fb_state.cbufs[0] = dstsurf; - fb_state.zsbuf = 0; pipe->set_framebuffer_state(pipe, &fb_state); pipe->set_sample_mask(pipe, ~0); @@ -2763,3 +2788,156 @@ void util_blitter_custom_shader(struct blitter_context *blitter, util_blitter_restore_render_cond(blitter); util_blitter_unset_running_flag(blitter); } + +static void * +get_stencil_blit_fallback_fs(struct blitter_context_priv *ctx, bool msaa_src) +{ + if (!ctx->fs_stencil_blit_fallback[msaa_src]) { + ctx->fs_stencil_blit_fallback[msaa_src] = + util_make_fs_stencil_blit(ctx->base.pipe, msaa_src); + } + + return ctx->fs_stencil_blit_fallback[msaa_src]; +} + +static void * +get_stencil_blit_fallback_dsa(struct blitter_context_priv *ctx, unsigned i) +{ + assert(i < ARRAY_SIZE(ctx->dsa_replicate_stencil_bit)); + if (!ctx->dsa_replicate_stencil_bit[i]) { + struct pipe_depth_stencil_alpha_state dsa = { 0 }; + dsa.depth_func = PIPE_FUNC_ALWAYS; + dsa.stencil[0].enabled = 1; + dsa.stencil[0].func = PIPE_FUNC_ALWAYS; + dsa.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; + dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; + dsa.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; + dsa.stencil[0].valuemask = 0xff; + dsa.stencil[0].writemask = 1u << i; + + ctx->dsa_replicate_stencil_bit[i] = + ctx->base.pipe->create_depth_stencil_alpha_state(ctx->base.pipe, &dsa); + } + return ctx->dsa_replicate_stencil_bit[i]; +} + +/** + * Performs a series of draws to implement stencil blits texture without + * requiring stencil writes, updating a single bit per pixel at the time. + */ +void +util_blitter_stencil_fallback(struct blitter_context *blitter, + struct pipe_resource *dst, + unsigned dst_level, + const struct pipe_box *dstbox, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *srcbox, + const struct pipe_scissor_state *scissor) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv *)blitter; + struct pipe_context *pipe = ctx->base.pipe; + + /* check the saved state */ + util_blitter_set_running_flag(blitter); + blitter_check_saved_vertex_states(ctx); + blitter_check_saved_fragment_states(ctx); + blitter_check_saved_fb_state(ctx); + blitter_disable_render_cond(ctx); + + /* Initialize the surface. */ + struct pipe_surface *dst_view, dst_templ; + util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstbox->z); + dst_view = pipe->create_surface(pipe, dst, &dst_templ); + + /* Initialize the sampler view. */ + struct pipe_sampler_view src_templ, *src_view; + util_blitter_default_src_texture(blitter, &src_templ, src, src_level); + src_templ.format = util_format_stencil_only(src_templ.format); + src_view = pipe->create_sampler_view(pipe, src, &src_templ); + + /* bind states */ + pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA][0]); + pipe->bind_fs_state(pipe, + get_stencil_blit_fallback_fs(ctx, src->nr_samples > 1)); + + /* set a framebuffer state */ + struct pipe_framebuffer_state fb_state = { 0 }; + fb_state.width = dstbox->width; + fb_state.height = dstbox->height; + fb_state.zsbuf = dst_view; + pipe->set_framebuffer_state(pipe, &fb_state); + pipe->set_sample_mask(pipe, ~0); + + blitter_set_common_draw_rect_state(ctx, scissor != NULL, + util_framebuffer_get_num_samples(&fb_state) > 1); + blitter_set_dst_dimensions(ctx, dst_view->width, dst_view->height); + + if (scissor) { + pipe->clear_depth_stencil(pipe, dst_view, PIPE_CLEAR_STENCIL, 0.0, 0, + MAX2(dstbox->x, scissor->minx), + MAX2(dstbox->y, scissor->miny), + MIN2(dstbox->x + dstbox->width, scissor->maxx) - dstbox->x, + MIN2(dstbox->y + dstbox->height, scissor->maxy) - dstbox->y, + true); + pipe->set_scissor_states(pipe, 0, 1, scissor); + } else { + pipe->clear_depth_stencil(pipe, dst_view, PIPE_CLEAR_STENCIL, 0.0, 0, + dstbox->x, dstbox->y, + dstbox->width, dstbox->height, + true); + } + + pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, &src_view); + pipe->bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0, 1, &ctx->sampler_state); + + unsigned stencil_bits = + util_format_get_component_bits(dst->format, + UTIL_FORMAT_COLORSPACE_ZS, 1); + + struct pipe_stencil_ref sr = { { (1u << stencil_bits) - 1 } }; + pipe->set_stencil_ref(pipe, sr); + + union blitter_attrib coord; + get_texcoords(src_view, src->width0, src->height0, + srcbox->x, srcbox->y, + srcbox->x + srcbox->width, srcbox->y + srcbox->height, + srcbox->z, 0, true, + &coord); + + for (int i = 0; i < stencil_bits; ++i) { + uint32_t mask = 1 << i; + struct pipe_constant_buffer cb = { + .user_buffer = &mask, + .buffer_size = sizeof(mask), + }; + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, blitter->cb_slot, + false, &cb); + + pipe->bind_depth_stencil_alpha_state(pipe, + get_stencil_blit_fallback_dsa(ctx, i)); + + blitter->draw_rectangle(blitter, ctx->velem_state, + get_vs_passthrough_pos_generic, + dstbox->x, dstbox->y, + dstbox->x + dstbox->width, + dstbox->y + dstbox->height, + 0, 1, + UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW, + &coord); + } + + if (scissor) + pipe->set_scissor_states(pipe, 0, 1, &ctx->base.saved_scissor); + + util_blitter_restore_vertex_states(blitter); + util_blitter_restore_fragment_states(blitter); + util_blitter_restore_textures(blitter); + util_blitter_restore_fb_state(blitter); + util_blitter_restore_render_cond(blitter); + util_blitter_restore_constant_buffer_state(blitter); + util_blitter_unset_running_flag(blitter); + + pipe_surface_reference(&dst_view, NULL); + pipe_sampler_view_reference(&src_view, NULL); +} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_blitter.h b/lib/mesa/src/gallium/auxiliary/util/u_blitter.h index 72130adb4..79605d3d1 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_blitter.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_blitter.h @@ -400,6 +400,16 @@ void util_blitter_custom_shader(struct blitter_context *blitter, struct pipe_surface *dstsurf, void *custom_vs, void *custom_fs); +/* Used by D3D12 for non-MSAA -> MSAA stencil blits */ +void util_blitter_stencil_fallback(struct blitter_context *blitter, + struct pipe_resource *dst, + unsigned dst_level, + const struct pipe_box *dstbox, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *srcbox, + const struct pipe_scissor_state *scissor); + /* The functions below should be used to save currently bound constant state * objects inside a driver. The objects are automatically restored at the end * of the util_blitter_{clear, copy_region, fill_region} functions and then diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c b/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c index 62cbd1f61..4f3c98aec 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_debug_flush.c @@ -82,7 +82,7 @@ struct debug_flush_ctx { /* Contexts are used by a single thread at a time */ unsigned bt_depth; boolean catch_map_of_referenced; - struct util_hash_table *ref_hash; + struct hash_table *ref_hash; struct list_head head; }; @@ -102,18 +102,6 @@ debug_flush_capture_frame(int start, int depth) return frames; } -static int -debug_flush_pointer_compare(void *key1, void *key2) -{ - return (key1 == key2) ? 0 : 1; -} - -static unsigned -debug_flush_pointer_hash(void *key) -{ - return (unsigned) (uintptr_t) key; -} - struct debug_flush_buf * debug_flush_buf_create(boolean supports_persistent, unsigned bt_depth) { @@ -171,8 +159,7 @@ debug_flush_ctx_create(UNUSED boolean catch_reference_of_mapped, if (!fctx) goto out_no_ctx; - fctx->ref_hash = util_hash_table_create(debug_flush_pointer_hash, - debug_flush_pointer_compare); + fctx->ref_hash = util_hash_table_create_ptr_keys(); if (!fctx->ref_hash) goto out_no_ref_hash; @@ -229,9 +216,9 @@ debug_flush_map(struct debug_flush_buf *fbuf, unsigned flags) return; mtx_lock(&fbuf->mutex); - map_sync = !(flags & PIPE_TRANSFER_UNSYNCHRONIZED); + map_sync = !(flags & PIPE_MAP_UNSYNCHRONIZED); persistent = !map_sync || fbuf->supports_persistent || - !!(flags & PIPE_TRANSFER_PERSISTENT); + !!(flags & PIPE_MAP_PERSISTENT); /* Recursive maps are allowed if previous maps are persistent, * or if the current map is unsync. In other cases we might flush @@ -336,10 +323,7 @@ debug_flush_cb_reference(struct debug_flush_ctx *fctx, debug_flush_buf_reference(&item->fbuf, fbuf); item->bt_depth = fctx->bt_depth; item->ref_frame = debug_flush_capture_frame(2, item->bt_depth); - if (util_hash_table_set(fctx->ref_hash, fbuf, item) != PIPE_OK) { - debug_flush_item_destroy(item); - goto out_no_item; - } + _mesa_hash_table_insert(fctx->ref_hash, fbuf, item); return; } goto out_no_item; @@ -422,7 +406,7 @@ debug_flush_flush(struct debug_flush_ctx *fctx) util_hash_table_foreach(fctx->ref_hash, debug_flush_flush_cb, NULL); - util_hash_table_clear(fctx->ref_hash); + _mesa_hash_table_clear(fctx->ref_hash, NULL); } void @@ -435,8 +419,8 @@ debug_flush_ctx_destroy(struct debug_flush_ctx *fctx) util_hash_table_foreach(fctx->ref_hash, debug_flush_flush_cb, NULL); - util_hash_table_clear(fctx->ref_hash); - util_hash_table_destroy(fctx->ref_hash); + _mesa_hash_table_clear(fctx->ref_hash, NULL); + _mesa_hash_table_destroy(fctx->ref_hash, NULL); FREE(fctx); } #endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_gallium.c b/lib/mesa/src/gallium/auxiliary/util/u_debug_gallium.c deleted file mode 100644 index 63a1e69c0..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_gallium.c +++ /dev/null @@ -1,107 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 VMware, Inc. - * Copyright (c) 2008 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_debug.h" -#include "u_debug_gallium.h" -#include "u_dump.h" -#include "util/format/u_format.h" - -#ifdef DEBUG - -void -debug_print_format(const char *msg, unsigned fmt) -{ - debug_printf("%s: %s\n", msg, util_format_name(fmt)); -} - - -/** - * Print PIPE_TRANSFER_x flags with a message. - */ -void -debug_print_transfer_flags(const char *msg, unsigned usage) -{ - debug_printf("%s: ", msg); - util_dump_transfer_usage(stdout, usage); - printf("\n"); -} - - -/** - * Print PIPE_BIND_x flags with a message. - */ -void -debug_print_bind_flags(const char *msg, unsigned usage) -{ - static const struct debug_named_value names[] = { - DEBUG_NAMED_VALUE(PIPE_BIND_DEPTH_STENCIL), - DEBUG_NAMED_VALUE(PIPE_BIND_RENDER_TARGET), - DEBUG_NAMED_VALUE(PIPE_BIND_BLENDABLE), - DEBUG_NAMED_VALUE(PIPE_BIND_SAMPLER_VIEW), - DEBUG_NAMED_VALUE(PIPE_BIND_VERTEX_BUFFER), - DEBUG_NAMED_VALUE(PIPE_BIND_INDEX_BUFFER), - DEBUG_NAMED_VALUE(PIPE_BIND_CONSTANT_BUFFER), - DEBUG_NAMED_VALUE(PIPE_BIND_DISPLAY_TARGET), - DEBUG_NAMED_VALUE(PIPE_BIND_STREAM_OUTPUT), - DEBUG_NAMED_VALUE(PIPE_BIND_CURSOR), - DEBUG_NAMED_VALUE(PIPE_BIND_CUSTOM), - DEBUG_NAMED_VALUE(PIPE_BIND_GLOBAL), - DEBUG_NAMED_VALUE(PIPE_BIND_SHADER_BUFFER), - DEBUG_NAMED_VALUE(PIPE_BIND_SHADER_IMAGE), - DEBUG_NAMED_VALUE(PIPE_BIND_COMPUTE_RESOURCE), - DEBUG_NAMED_VALUE(PIPE_BIND_COMMAND_ARGS_BUFFER), - DEBUG_NAMED_VALUE(PIPE_BIND_SCANOUT), - DEBUG_NAMED_VALUE(PIPE_BIND_SHARED), - DEBUG_NAMED_VALUE(PIPE_BIND_LINEAR), - DEBUG_NAMED_VALUE_END - }; - - debug_printf("%s: %s\n", msg, debug_dump_flags(names, usage)); -} - - -/** - * Print PIPE_USAGE_x enum values with a message. - */ -void -debug_print_usage_enum(const char *msg, enum pipe_resource_usage usage) -{ - static const struct debug_named_value names[] = { - DEBUG_NAMED_VALUE(PIPE_USAGE_DEFAULT), - DEBUG_NAMED_VALUE(PIPE_USAGE_IMMUTABLE), - DEBUG_NAMED_VALUE(PIPE_USAGE_DYNAMIC), - DEBUG_NAMED_VALUE(PIPE_USAGE_STREAM), - DEBUG_NAMED_VALUE(PIPE_USAGE_STAGING), - DEBUG_NAMED_VALUE_END - }; - - debug_printf("%s: %s\n", msg, debug_dump_enum(names, usage)); -} - -#endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_gallium.h b/lib/mesa/src/gallium/auxiliary/util/u_debug_gallium.h deleted file mode 100644 index 0710ce2b7..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_gallium.h +++ /dev/null @@ -1,67 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 VMware, Inc. - * Copyright (c) 2008 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef _U_DEBUG_GALLIUM_H_ -#define _U_DEBUG_GALLIUM_H_ - -#include "pipe/p_defines.h" - -#ifdef __cplusplus -extern "C" { -#endif - -unsigned long -debug_memory_begin(void); - -void -debug_memory_end(unsigned long beginning); - -#ifdef DEBUG -void debug_print_format(const char *msg, unsigned fmt); -#else -#define debug_print_format(_msg, _fmt) ((void)0) -#endif - -#ifdef DEBUG - -void -debug_print_transfer_flags(const char *msg, unsigned usage); - -void -debug_print_bind_flags(const char *msg, unsigned usage); - -void -debug_print_usage_enum(const char *msg, enum pipe_resource_usage usage); - -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_refcnt.c b/lib/mesa/src/gallium/auxiliary/util/u_debug_refcnt.c index ec95f8726..a25234b46 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_refcnt.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_debug_refcnt.c @@ -44,6 +44,7 @@ #include "util/u_string.h" #include "util/u_hash_table.h" #include "os/os_thread.h" +#include "pipe/p_config.h" int debug_refcnt_state; @@ -52,31 +53,16 @@ static FILE *stream; /* TODO: maybe move this serial machinery to a stand-alone module and * expose it? */ +#ifdef PIPE_OS_WINDOWS +static mtx_t serials_mutex; +#else static mtx_t serials_mutex = _MTX_INITIALIZER_NP; +#endif -static struct util_hash_table *serials_hash; +static struct hash_table *serials_hash; static unsigned serials_last; -static unsigned -hash_ptr(void *p) -{ - return (unsigned) (uintptr_t) p; -} - - -static int -compare_ptr(void *a, void *b) -{ - if (a == b) - return 0; - else if (a < b) - return -1; - else - return 1; -} - - /** * Return a small integer serial number for the given pointer. */ @@ -96,7 +82,7 @@ debug_serial(void *p, unsigned *pserial) mtx_lock(&serials_mutex); if (!serials_hash) - serials_hash = util_hash_table_create(hash_ptr, compare_ptr); + serials_hash = util_hash_table_create_ptr_keys(); serial = (unsigned) (uintptr_t) util_hash_table_get(serials_hash, p); if (!serial) { @@ -109,7 +95,7 @@ debug_serial(void *p, unsigned *pserial) os_abort(); } - util_hash_table_set(serials_hash, p, (void *) (uintptr_t) serial); + _mesa_hash_table_insert(serials_hash, p, (void *) (uintptr_t) serial); found = FALSE; } mtx_unlock(&serials_mutex); @@ -127,12 +113,16 @@ static void debug_serial_delete(void *p) { mtx_lock(&serials_mutex); - util_hash_table_remove(serials_hash, p); + _mesa_hash_table_remove_key(serials_hash, p); mtx_unlock(&serials_mutex); } +#if defined(PIPE_OS_WINDOWS) +#define STACK_LEN 60 +#else #define STACK_LEN 64 +#endif /** * Log a reference count change to the log file (if enabled). diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_stack.c b/lib/mesa/src/gallium/auxiliary/util/u_debug_stack.c deleted file mode 100644 index 86d951441..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_stack.c +++ /dev/null @@ -1,347 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Stack backtracing. - * - * @author Jose Fonseca <jfonseca@vmware.com> - */ - -#include "util/u_debug.h" -#include "u_debug_symbol.h" -#include "u_debug_stack.h" - -#if defined(HAVE_LIBUNWIND) - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif -#include <dlfcn.h> - -#include "os/os_thread.h" -#include "u_hash_table.h" - -struct util_hash_table* symbols_hash; -static mtx_t symbols_mutex = _MTX_INITIALIZER_NP; - -static unsigned hash_ptr(void* p) -{ - return (unsigned)(uintptr_t)p; -} - -static int compare_ptr(void* a, void* b) -{ - if(a == b) - return 0; - else if(a < b) - return -1; - else - return 1; -} - -/* TODO with some refactoring we might be able to re-use debug_symbol_name_cached() - * instead.. otoh if using libunwind I think u_debug_symbol could just be excluded - * from build? - */ -static const char * -symbol_name_cached(unw_cursor_t *cursor, unw_proc_info_t *pip) -{ - void *addr = (void *)(uintptr_t)pip->start_ip; - char *name; - - mtx_lock(&symbols_mutex); - if(!symbols_hash) - symbols_hash = util_hash_table_create(hash_ptr, compare_ptr); - name = util_hash_table_get(symbols_hash, addr); - if(!name) - { - char procname[256]; - unw_word_t off; - int ret; - - ret = unw_get_proc_name(cursor, procname, sizeof(procname), &off); - if (ret && ret != -UNW_ENOMEM) { - procname[0] = '?'; - procname[1] = 0; - } - - if (asprintf(&name, "%s%s", procname, ret == -UNW_ENOMEM ? "..." : "") == -1) - name = "??"; - util_hash_table_set(symbols_hash, addr, (void*)name); - } - mtx_unlock(&symbols_mutex); - - return name; -} - -void -debug_backtrace_capture(struct debug_stack_frame *backtrace, - unsigned start_frame, - unsigned nr_frames) -{ - unw_cursor_t cursor; - unw_context_t context; - unw_proc_info_t pip; - unsigned i = 0; - - pip.unwind_info = NULL; - - unw_getcontext(&context); - unw_init_local(&cursor, &context); - - while ((start_frame > 0) && (unw_step(&cursor) > 0)) - start_frame--; - - while ((i < nr_frames) && (unw_step(&cursor) > 0)) { - unw_word_t ip; - - unw_get_reg(&cursor, UNW_REG_IP, &ip); - unw_get_proc_info(&cursor, &pip); - - backtrace[i].start_ip = pip.start_ip; - backtrace[i].off = ip - pip.start_ip; - backtrace[i].procname = symbol_name_cached(&cursor, &pip); - - i++; - } - - while (i < nr_frames) { - backtrace[i].start_ip = 0; - i++; - } -} - -static const void * -frame_ip(const struct debug_stack_frame *frame) -{ - return (void *)(uintptr_t)(frame->start_ip + frame->off); -} - -static const char * -frame_info(const struct debug_stack_frame *frame, unsigned *offset) -{ - Dl_info dlinfo; - const void *addr = frame_ip(frame); - - - if (dladdr(addr, &dlinfo) && dlinfo.dli_fname && - *dlinfo.dli_fname) { - *offset = (unsigned)((uintptr_t)addr - (uintptr_t)dlinfo.dli_fbase); - return dlinfo.dli_fname; - } - - *offset = 0; - return "?"; -} - -void -debug_backtrace_dump(const struct debug_stack_frame *backtrace, - unsigned nr_frames) -{ - unsigned i, offset; - const char *filename; - - for (i = 0; i < nr_frames; ++i) { - if (!backtrace[i].start_ip) - break; - filename = frame_info(&backtrace[i], &offset); - debug_printf("\t%s(+0x%x) (%s+0x%x) [%p]\n", filename, offset, - backtrace[i].procname, backtrace[i].off, - frame_ip(&backtrace[i])); - } -} - -void -debug_backtrace_print(FILE *f, - const struct debug_stack_frame *backtrace, - unsigned nr_frames) -{ - unsigned i, offset; - const char *filename; - - for (i = 0; i < nr_frames; ++i) { - if (!backtrace[i].start_ip) - break; - filename = frame_info(&backtrace[i], &offset); - fprintf(f, "\t%s(+0x%x) (%s+0x%x) [%p]\n", filename, offset, - backtrace[i].procname, backtrace[i].off, - frame_ip(&backtrace[i])); - } -} -#elif defined(ANDROID) - /* Not implemented here; see u_debug_stack_android.cpp */ -#else /* ! HAVE_LIBUNWIND */ - -#if defined(PIPE_OS_WINDOWS) -#include <windows.h> -#endif - - -/** - * Capture stack backtrace. - * - * NOTE: The implementation of this function is quite big, but it is important - * not to break it down in smaller functions to avoid adding new frames to the - * calling stack. - */ -void -debug_backtrace_capture(struct debug_stack_frame *backtrace, - unsigned start_frame, - unsigned nr_frames) -{ - const void **frame_pointer = NULL; - unsigned i = 0; - - if (!nr_frames) { - return; - } - - /* - * On Windows try obtaining the stack backtrace via CaptureStackBackTrace. - * - * It works reliably both for x86 for x86_64. - */ -#if defined(PIPE_OS_WINDOWS) - { - typedef USHORT (WINAPI *PFNCAPTURESTACKBACKTRACE)(ULONG, ULONG, - PVOID *, PULONG); - static PFNCAPTURESTACKBACKTRACE pfnCaptureStackBackTrace = NULL; - - if (!pfnCaptureStackBackTrace) { - static HMODULE hModule = NULL; - if (!hModule) { - hModule = LoadLibraryA("kernel32"); - assert(hModule); - } - if (hModule) { - pfnCaptureStackBackTrace = - (PFNCAPTURESTACKBACKTRACE)GetProcAddress(hModule, - "RtlCaptureStackBackTrace"); - } - } - if (pfnCaptureStackBackTrace) { - /* - * Skip this (debug_backtrace_capture) function's frame. - */ - - start_frame += 1; - - assert(start_frame + nr_frames < 63); - i = pfnCaptureStackBackTrace(start_frame, nr_frames, - (PVOID *) &backtrace->function, NULL); - - /* Pad remaing requested frames with NULL */ - while (i < nr_frames) { - backtrace[i++].function = NULL; - } - - return; - } - } -#endif - -#ifdef PIPE_ARCH_X86 -#if defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION > 404) || defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wframe-address" - frame_pointer = ((const void **)__builtin_frame_address(1)); -#pragma GCC diagnostic pop -#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) - __asm { - mov frame_pointer, ebp - } - frame_pointer = (const void **)frame_pointer[0]; -#else - frame_pointer = NULL; -#endif - - while (nr_frames) { - const void **next_frame_pointer; - - if (!frame_pointer) - break; - - if (start_frame) - --start_frame; - else { - backtrace[i++].function = frame_pointer[1]; - --nr_frames; - } - - next_frame_pointer = (const void **)frame_pointer[0]; - - /* Limit the stack walk to avoid referencing undefined memory */ - if ((uintptr_t)next_frame_pointer <= (uintptr_t)frame_pointer || - (uintptr_t)next_frame_pointer > (uintptr_t)frame_pointer + 64*1024) - break; - - frame_pointer = next_frame_pointer; - } -#else - (void) frame_pointer; -#endif - - while (nr_frames) { - backtrace[i++].function = NULL; - --nr_frames; - } -} - - -void -debug_backtrace_dump(const struct debug_stack_frame *backtrace, - unsigned nr_frames) -{ - unsigned i; - - for (i = 0; i < nr_frames; ++i) { - if (!backtrace[i].function) - break; - debug_symbol_print(backtrace[i].function); - } -} - - -void -debug_backtrace_print(FILE *f, - const struct debug_stack_frame *backtrace, - unsigned nr_frames) -{ - unsigned i; - - for (i = 0; i < nr_frames; ++i) { - const char *symbol; - if (!backtrace[i].function) - break; - symbol = debug_symbol_name_cached(backtrace[i].function); - if (symbol) - fprintf(f, "%s\n", symbol); - } -} - -#endif /* HAVE_LIBUNWIND */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_stack.h b/lib/mesa/src/gallium/auxiliary/util/u_debug_stack.h deleted file mode 100644 index fff41a5a9..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_stack.h +++ /dev/null @@ -1,90 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef U_DEBUG_STACK_H_ -#define U_DEBUG_STACK_H_ - -#include <stdio.h> - -#ifdef HAVE_LIBUNWIND -#define UNW_LOCAL_ONLY -#include <libunwind.h> -#endif - -/** - * @file - * Stack backtracing. - * - * @author Jose Fonseca <jfonseca@vmware.com> - */ - - -#ifdef __cplusplus -extern "C" { -#endif - - -/** - * Represent a frame from a stack backtrace. - * -#if defined(PIPE_OS_WINDOWS) && !defined(HAVE_LIBUNWIND) - * XXX: Do not change this. (passed to Windows' CaptureStackBackTrace()) -#endif - * - * TODO: This should be refactored as a void * typedef. - */ -struct debug_stack_frame -{ -#ifdef HAVE_LIBUNWIND - unw_word_t start_ip; - unsigned int off; - const char *procname; -#else - const void *function; -#endif -}; - - -void -debug_backtrace_capture(struct debug_stack_frame *backtrace, - unsigned start_frame, - unsigned nr_frames); - -void -debug_backtrace_dump(const struct debug_stack_frame *backtrace, - unsigned nr_frames); - -void -debug_backtrace_print(FILE *f, - const struct debug_stack_frame *backtrace, - unsigned nr_frames); - -#ifdef __cplusplus -} -#endif - -#endif /* U_DEBUG_STACK_H_ */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_stack_android.cpp b/lib/mesa/src/gallium/auxiliary/util/u_debug_stack_android.cpp deleted file mode 100644 index 395a1fe91..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_stack_android.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2018 Stefan Schake <stschake@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include <backtrace/Backtrace.h> - -#include "u_debug.h" -#include "u_debug_stack.h" -#include "util/hash_table.h" -#include "os/os_thread.h" - -static hash_table *backtrace_table; -static mtx_t table_mutex = _MTX_INITIALIZER_NP; - -void -debug_backtrace_capture(debug_stack_frame *mesa_backtrace, - unsigned start_frame, - unsigned nr_frames) -{ - hash_entry *backtrace_entry; - Backtrace *backtrace; - pid_t tid = gettid(); - - if (!nr_frames) - return; - - /* We keep an Android Backtrace handler around for each thread */ - mtx_lock(&table_mutex); - if (!backtrace_table) - backtrace_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - backtrace_entry = _mesa_hash_table_search(backtrace_table, (void*) (uintptr_t)tid); - if (!backtrace_entry) { - backtrace = Backtrace::Create(getpid(), tid); - _mesa_hash_table_insert(backtrace_table, (void*) (uintptr_t)tid, backtrace); - } else { - backtrace = (Backtrace *) backtrace_entry->data; - } - mtx_unlock(&table_mutex); - - /* Add one to exclude this call. Unwind already ignores itself. */ - backtrace->Unwind(start_frame + 1); - - /* Store the Backtrace handler in the first mesa frame for reference. - * Unwind will generally return less frames than nr_frames specified - * but we have no good way of storing the real count otherwise. - * The Backtrace handler only stores the results until the next Unwind, - * but that is how u_debug_stack is used anyway. - */ - mesa_backtrace->function = backtrace; -} - -void -debug_backtrace_dump(const debug_stack_frame *mesa_backtrace, - unsigned nr_frames) -{ - Backtrace *backtrace = (Backtrace *) mesa_backtrace->function; - size_t i; - - if (!nr_frames) - return; - - if (nr_frames > backtrace->NumFrames()) - nr_frames = backtrace->NumFrames(); - for (i = 0; i < nr_frames; i++) { - /* There is no prescribed format and this isn't interpreted further, - * so we simply use the default Android format. - */ - const std::string& frame_line = backtrace->FormatFrameData(i); - debug_printf("%s\n", frame_line.c_str()); - } -} - -void -debug_backtrace_print(FILE *f, - const debug_stack_frame *mesa_backtrace, - unsigned nr_frames) -{ - Backtrace *backtrace = (Backtrace *) mesa_backtrace->function; - size_t i; - - if (!nr_frames) - return; - - if (nr_frames > backtrace->NumFrames()) - nr_frames = backtrace->NumFrames(); - for (i = 0; i < nr_frames; i++) { - const std::string& frame_line = backtrace->FormatFrameData(i); - fprintf(f, "%s\n", frame_line.c_str()); - } -} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_symbol.c b/lib/mesa/src/gallium/auxiliary/util/u_debug_symbol.c deleted file mode 100644 index 4ea6c8d07..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_symbol.c +++ /dev/null @@ -1,318 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Symbol lookup. - * - * @author Jose Fonseca <jfonseca@vmware.com> - */ - -#include "pipe/p_compiler.h" -#include "os/os_thread.h" -#include "util/u_string.h" - -#include "util/u_debug.h" -#include "u_debug_symbol.h" -#include "u_hash_table.h" - - -#if defined(PIPE_OS_WINDOWS) - -#include <windows.h> -#include <stddef.h> - -#include "dbghelp.h" - - -/** - * SymInitialize() must be called once for each process (in this case, the - * current process), before any of the other functions can be called. - */ -static BOOL g_bSymInitialized = FALSE; - - -/** - * Lookup the address of a DbgHelp function. - */ -static FARPROC WINAPI -getDbgHelpProcAddress(LPCSTR lpProcName) -{ - static HMODULE hModule = NULL; - - if (!hModule) { - static boolean bail = FALSE; - - if (bail) { - return NULL; - } - -#ifdef PIPE_CC_GCC - /* - * DbgHelp does not understand the debug information generated by MinGW toolchain. - * - * mgwhelp.dll is a dbghelp.dll look-alike replacement, which is able to - * understand MinGW symbols, including on 64-bit builds. - */ - if (!hModule) { - hModule = LoadLibraryA("mgwhelp.dll"); - if (!hModule) { - _debug_printf("warning: mgwhelp.dll not found: symbol names will not be resolved\n" - "warning: download it from https://github.com/jrfonseca/drmingw/#mgwhelp\n"); - } - } -#endif - - /* - * Fallback to the real DbgHelp. - */ - if (!hModule) { - hModule = LoadLibraryA("dbghelp.dll"); - } - - if (!hModule) { - bail = TRUE; - return NULL; - } - } - - return GetProcAddress(hModule, lpProcName); -} - - -/** - * Generic macro to dispatch a DbgHelp functions. - */ -#define DBGHELP_DISPATCH(_name, _ret_type, _ret_default, _arg_types, _arg_names) \ - static _ret_type WINAPI \ - j_##_name _arg_types \ - { \ - typedef BOOL (WINAPI *PFN) _arg_types; \ - static PFN pfn = NULL; \ - if (!pfn) { \ - pfn = (PFN) getDbgHelpProcAddress(#_name); \ - if (!pfn) { \ - return _ret_default; \ - } \ - } \ - return pfn _arg_names; \ - } - -DBGHELP_DISPATCH(SymInitialize, - BOOL, 0, - (HANDLE hProcess, PSTR UserSearchPath, BOOL fInvadeProcess), - (hProcess, UserSearchPath, fInvadeProcess)) - -DBGHELP_DISPATCH(SymSetOptions, - DWORD, FALSE, - (DWORD SymOptions), - (SymOptions)) - -DBGHELP_DISPATCH(SymFromAddr, - BOOL, FALSE, - (HANDLE hProcess, DWORD64 Address, PDWORD64 Displacement, PSYMBOL_INFO Symbol), - (hProcess, Address, Displacement, Symbol)) - -DBGHELP_DISPATCH(SymGetLineFromAddr64, - BOOL, FALSE, - (HANDLE hProcess, DWORD64 dwAddr, PDWORD pdwDisplacement, PIMAGEHLP_LINE64 Line), - (hProcess, dwAddr, pdwDisplacement, Line)) - - -#undef DBGHELP_DISPATCH - - -static inline boolean -debug_symbol_name_dbghelp(const void *addr, char* buf, unsigned size) -{ - DWORD64 dwAddr = (DWORD64)(uintptr_t)addr; - HANDLE hProcess = GetCurrentProcess(); - - /* General purpose buffer, to back pSymbol and other temporary stuff. - * Must not be too memory hungry here to avoid stack overflows. - */ - CHAR buffer[512]; - - PSYMBOL_INFO pSymbol = (PSYMBOL_INFO) buffer; - DWORD64 dwDisplacement = 0; /* Displacement of the input address, relative to the start of the symbol */ - DWORD dwLineDisplacement = 0; - IMAGEHLP_LINE64 Line; - - memset(pSymbol, 0, sizeof *pSymbol); - pSymbol->SizeOfStruct = sizeof buffer; - pSymbol->MaxNameLen = sizeof buffer - offsetof(SYMBOL_INFO, Name); - - if (!g_bSymInitialized) { - j_SymSetOptions(/* SYMOPT_UNDNAME | */ SYMOPT_LOAD_LINES); - if (j_SymInitialize(hProcess, NULL, TRUE)) { - g_bSymInitialized = TRUE; - } - } - - /* Lookup symbol name */ - if (!g_bSymInitialized || - !j_SymFromAddr(hProcess, dwAddr, &dwDisplacement, pSymbol)) { - /* - * We couldn't obtain symbol information. At least tell which module the address belongs. - */ - - HMODULE hModule = NULL; - - if (!GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, - (LPCTSTR)addr, - &hModule)) { - return FALSE; - } - - if (GetModuleFileNameA(hModule, buffer, sizeof buffer) == sizeof buffer) { - return FALSE; - } - snprintf(buf, size, "%p at %s+0x%lx", - addr, buffer, - (unsigned long)((uintptr_t)addr - (uintptr_t)hModule)); - - return TRUE; - } - - /* - * Try to get filename and line number. - */ - memset(&Line, 0, sizeof Line); - Line.SizeOfStruct = sizeof Line; - if (!j_SymGetLineFromAddr64(hProcess, dwAddr, &dwLineDisplacement, &Line)) { - Line.FileName = NULL; - } - - if (Line.FileName) { - snprintf(buf, size, "%s at %s:%lu", pSymbol->Name, Line.FileName, Line.LineNumber); - } else { - snprintf(buf, size, "%s", pSymbol->Name); - } - - return TRUE; -} - -#endif /* PIPE_OS_WINDOWS */ - - -#if defined(HAVE_EXECINFO_H) - -#include <execinfo.h> - -/* This can only provide dynamic symbols, or binary offsets into a file. - * - * To fix this, post-process the output with tools/addr2line.sh - */ -static inline boolean -debug_symbol_name_glibc(const void *addr, char* buf, unsigned size) -{ - char** syms = backtrace_symbols((void**)&addr, 1); - if (!syms) { - return FALSE; - } - strncpy(buf, syms[0], size); - buf[size - 1] = 0; - free(syms); - return TRUE; -} - -#endif /* defined(HAVE_EXECINFO_H) */ - - -void -debug_symbol_name(const void *addr, char* buf, unsigned size) -{ -#if defined(PIPE_OS_WINDOWS) - if (debug_symbol_name_dbghelp(addr, buf, size)) { - return; - } -#endif - -#if defined(HAVE_EXECINFO_H) - if (debug_symbol_name_glibc(addr, buf, size)) { - return; - } -#endif /* defined(HAVE_EXECINFO_H) */ - - snprintf(buf, size, "%p", addr); - buf[size - 1] = 0; -} - -void -debug_symbol_print(const void *addr) -{ - char buf[1024]; - debug_symbol_name(addr, buf, sizeof(buf)); - debug_printf("\t%s\n", buf); -} - -struct util_hash_table* symbols_hash; -static mtx_t symbols_mutex = _MTX_INITIALIZER_NP; - -static unsigned hash_ptr(void* p) -{ - return (unsigned)(uintptr_t)p; -} - -static int compare_ptr(void* a, void* b) -{ - if(a == b) - return 0; - else if(a < b) - return -1; - else - return 1; -} - -const char* -debug_symbol_name_cached(const void *addr) -{ - const char* name; -#ifdef PIPE_OS_WINDOWS - static boolean first = TRUE; - - if (first) { - (void) mtx_init(&symbols_mutex, mtx_plain); - first = FALSE; - } -#endif - - mtx_lock(&symbols_mutex); - if(!symbols_hash) - symbols_hash = util_hash_table_create(hash_ptr, compare_ptr); - name = util_hash_table_get(symbols_hash, (void*)addr); - if(!name) - { - char buf[1024]; - debug_symbol_name(addr, buf, sizeof(buf)); - name = strdup(buf); - - util_hash_table_set(symbols_hash, (void*)addr, (void*)name); - } - mtx_unlock(&symbols_mutex); - return name; -} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_debug_symbol.h b/lib/mesa/src/gallium/auxiliary/util/u_debug_symbol.h deleted file mode 100644 index b247706c2..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_debug_symbol.h +++ /dev/null @@ -1,58 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef U_DEBUG_SYMBOL_H_ -#define U_DEBUG_SYMBOL_H_ - - -/** - * @file - * Symbol lookup. - * - * @author Jose Fonseca <jfonseca@vmware.com> - */ - - -#ifdef __cplusplus -extern "C" { -#endif - - -void -debug_symbol_name(const void *addr, char* buf, unsigned size); - -const char* -debug_symbol_name_cached(const void *addr); - -void -debug_symbol_print(const void *addr); - -#ifdef __cplusplus -} -#endif - -#endif /* U_DEBUG_SYMBOL_H_ */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_draw.c b/lib/mesa/src/gallium/auxiliary/util/u_draw.c index a2ce0e394..6133896bf 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_draw.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_draw.c @@ -107,7 +107,7 @@ util_draw_max_index( max_index = MIN2(max_index, buffer_max_index); } else { - /* Per-instance data. Simply make sure the state tracker didn't + /* Per-instance data. Simply make sure gallium frontends didn't * request more instances than those that fit in the buffer */ if ((info->start_instance + info->instance_count)/element->instance_divisor > (buffer_max_index + 1)) { @@ -126,31 +126,32 @@ util_draw_max_index( } -/* This extracts the draw arguments from the info_in->indirect resource, +/* This extracts the draw arguments from the indirect resource, * puts them into a new instance of pipe_draw_info, and calls draw_vbo on it. */ void util_draw_indirect(struct pipe_context *pipe, - const struct pipe_draw_info *info_in) + const struct pipe_draw_info *info_in, + const struct pipe_draw_indirect_info *indirect) { struct pipe_draw_info info; struct pipe_transfer *transfer; uint32_t *params; - const unsigned num_params = info_in->index_size ? 5 : 4; + unsigned num_params = info_in->index_size ? 5 : 4; - assert(info_in->indirect); - assert(!info_in->count_from_stream_output); + assert(indirect); + assert(!indirect->count_from_stream_output); memcpy(&info, info_in, sizeof(info)); - uint32_t draw_count = info_in->indirect->draw_count; + uint32_t draw_count = indirect->draw_count; - if (info_in->indirect->indirect_draw_count) { + if (indirect->indirect_draw_count) { struct pipe_transfer *dc_transfer; uint32_t *dc_param = pipe_buffer_map_range(pipe, - info_in->indirect->indirect_draw_count, - info_in->indirect->indirect_draw_count_offset, - 4, PIPE_TRANSFER_READ, &dc_transfer); + indirect->indirect_draw_count, + indirect->indirect_draw_count_offset, + 4, PIPE_MAP_READ, &dc_transfer); if (!dc_transfer) { debug_printf("%s: failed to map indirect draw count buffer\n", __FUNCTION__); return; @@ -160,12 +161,14 @@ util_draw_indirect(struct pipe_context *pipe, pipe_buffer_unmap(pipe, dc_transfer); } + if (indirect->stride) + num_params = MIN2(indirect->stride / 4, num_params); params = (uint32_t *) pipe_buffer_map_range(pipe, - info_in->indirect->buffer, - info_in->indirect->offset, - (num_params * info_in->indirect->draw_count) * sizeof(uint32_t), - PIPE_TRANSFER_READ, + indirect->buffer, + indirect->offset, + (num_params * indirect->draw_count) * sizeof(uint32_t), + PIPE_MAP_READ, &transfer); if (!transfer) { debug_printf("%s: failed to map indirect buffer\n", __FUNCTION__); @@ -173,17 +176,39 @@ util_draw_indirect(struct pipe_context *pipe, } for (unsigned i = 0; i < draw_count; i++) { - info.count = params[0]; + struct pipe_draw_start_count draw; + + draw.count = params[0]; info.instance_count = params[1]; - info.start = params[2]; + draw.start = params[2]; info.index_bias = info_in->index_size ? params[3] : 0; info.start_instance = info_in->index_size ? params[4] : params[3]; info.drawid = i; - info.indirect = NULL; - pipe->draw_vbo(pipe, &info); + pipe->draw_vbo(pipe, &info, NULL, &draw, 1); - params += info_in->indirect->stride / 4; + params += indirect->stride / 4; } pipe_buffer_unmap(pipe, transfer); } + +void +util_draw_multi(struct pipe_context *pctx, const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count *draws, + unsigned num_draws) +{ + struct pipe_draw_info tmp_info = *info; + + /* If you call this with num_draws==1, that is probably going to be + * an infinite loop + */ + assert(num_draws > 1); + + for (unsigned i = 0; i < num_draws; i++) { + if (indirect || (draws[i].count && info->instance_count)) + pctx->draw_vbo(pctx, &tmp_info, indirect, &draws[i], 1); + if (tmp_info.increment_draw_id) + tmp_info.drawid++; + } +} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_draw.h b/lib/mesa/src/gallium/auxiliary/util/u_draw.h index d0955fa3f..6f8601f90 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_draw.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_draw.h @@ -55,15 +55,17 @@ util_draw_arrays(struct pipe_context *pipe, uint count) { struct pipe_draw_info info; + struct pipe_draw_start_count draw; util_draw_init_info(&info); info.mode = mode; - info.start = start; - info.count = count; info.min_index = start; info.max_index = start + count - 1; - pipe->draw_vbo(pipe, &info); + draw.start = start; + draw.count = count; + + pipe->draw_vbo(pipe, &info, NULL, &draw, 1); } static inline void @@ -75,17 +77,19 @@ util_draw_elements(struct pipe_context *pipe, uint count) { struct pipe_draw_info info; + struct pipe_draw_start_count draw; util_draw_init_info(&info); info.index.user = indices; info.has_user_indices = true; info.index_size = index_size; info.mode = mode; - info.start = start; - info.count = count; info.index_bias = index_bias; - pipe->draw_vbo(pipe, &info); + draw.start = start; + draw.count = count; + + pipe->draw_vbo(pipe, &info, NULL, &draw, 1); } static inline void @@ -97,17 +101,20 @@ util_draw_arrays_instanced(struct pipe_context *pipe, uint instance_count) { struct pipe_draw_info info; + struct pipe_draw_start_count draw; util_draw_init_info(&info); info.mode = mode; - info.start = start; - info.count = count; info.start_instance = start_instance; info.instance_count = instance_count; + info.index_bounds_valid = true; info.min_index = start; info.max_index = start + count - 1; - pipe->draw_vbo(pipe, &info); + draw.start = start; + draw.count = count; + + pipe->draw_vbo(pipe, &info, NULL, &draw, 1); } static inline void @@ -122,19 +129,21 @@ util_draw_elements_instanced(struct pipe_context *pipe, uint instance_count) { struct pipe_draw_info info; + struct pipe_draw_start_count draw; util_draw_init_info(&info); info.index.user = indices; info.has_user_indices = true; info.index_size = index_size; info.mode = mode; - info.start = start; - info.count = count; info.index_bias = index_bias; info.start_instance = start_instance; info.instance_count = instance_count; - pipe->draw_vbo(pipe, &info); + draw.start = start; + draw.count = count; + + pipe->draw_vbo(pipe, &info, NULL, &draw, 1); } @@ -143,8 +152,17 @@ util_draw_elements_instanced(struct pipe_context *pipe, */ void util_draw_indirect(struct pipe_context *pipe, - const struct pipe_draw_info *info); + const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect); +/* Helper to handle multi-draw by splitting into individual draws. You + * don't want to call this if num_draws==1 + */ +void +util_draw_multi(struct pipe_context *pctx, const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count *draws, + unsigned num_draws); unsigned util_draw_max_index( diff --git a/lib/mesa/src/gallium/auxiliary/util/u_draw_quad.c b/lib/mesa/src/gallium/auxiliary/util/u_draw_quad.c index fe9558e6c..cf431f2b0 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_draw_quad.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_draw_quad.c @@ -64,7 +64,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, cso_set_vertex_buffers(cso, vbuf_slot, 1, &vbuffer); cso_draw_arrays(cso, prim_type, 0, num_verts); } else { - pipe->set_vertex_buffers(pipe, vbuf_slot, 1, &vbuffer); + pipe->set_vertex_buffers(pipe, vbuf_slot, 1, 0, false, &vbuffer); util_draw_arrays(pipe, prim_type, 0, num_verts); } } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_dump.h b/lib/mesa/src/gallium/auxiliary/util/u_dump.h index 8c1a8f3f4..bac1d1237 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_dump.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_dump.h @@ -201,6 +201,13 @@ void util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state); void +util_dump_draw_start_count(FILE *stream, const struct pipe_draw_start_count *state); + +void +util_dump_draw_indirect_info(FILE *stream, + const struct pipe_draw_indirect_info *indirect); + +void util_dump_grid_info(FILE *stream, const struct pipe_grid_info *state); void diff --git a/lib/mesa/src/gallium/auxiliary/util/u_dump_state.c b/lib/mesa/src/gallium/auxiliary/util/u_dump_state.c index 422dc8675..a72b3722f 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_dump_state.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_dump_state.c @@ -521,15 +521,11 @@ util_dump_depth_stencil_alpha_state(FILE *stream, const struct pipe_depth_stenci util_dump_struct_begin(stream, "pipe_depth_stencil_alpha_state"); - util_dump_member_begin(stream, "depth"); - util_dump_struct_begin(stream, "pipe_depth_state"); - util_dump_member(stream, bool, &state->depth, enabled); - if (state->depth.enabled) { - util_dump_member(stream, bool, &state->depth, writemask); - util_dump_member(stream, enum_func, &state->depth, func); + util_dump_member(stream, bool, state, depth_enabled); + if (state->depth_enabled) { + util_dump_member(stream, bool, state, depth_writemask); + util_dump_member(stream, enum_func, state, depth_func); } - util_dump_struct_end(stream); - util_dump_member_end(stream); util_dump_member_begin(stream, "stencil"); util_dump_array_begin(stream); @@ -554,15 +550,11 @@ util_dump_depth_stencil_alpha_state(FILE *stream, const struct pipe_depth_stenci util_dump_array_end(stream); util_dump_member_end(stream); - util_dump_member_begin(stream, "alpha"); - util_dump_struct_begin(stream, "pipe_alpha_state"); - util_dump_member(stream, bool, &state->alpha, enabled); - if (state->alpha.enabled) { - util_dump_member(stream, enum_func, &state->alpha, func); - util_dump_member(stream, float, &state->alpha, ref_value); + util_dump_member(stream, bool, state, alpha_enabled); + if (state->alpha_enabled) { + util_dump_member(stream, enum_func, state, alpha_func); + util_dump_member(stream, float, state, alpha_ref_value); } - util_dump_struct_end(stream); - util_dump_member_end(stream); util_dump_struct_end(stream); } @@ -603,6 +595,7 @@ util_dump_blend_state(FILE *stream, const struct pipe_blend_state *state) util_dump_member(stream, bool, state, dither); util_dump_member(stream, bool, state, alpha_to_coverage); util_dump_member(stream, bool, state, alpha_to_one); + util_dump_member(stream, uint, state, max_rt); util_dump_member(stream, bool, state, logicop_enable); if (state->logicop_enable) { @@ -613,7 +606,7 @@ util_dump_blend_state(FILE *stream, const struct pipe_blend_state *state) util_dump_member_begin(stream, "rt"); if (state->independent_blend_enable) - valid_entries = PIPE_MAX_COLOR_BUFS; + valid_entries = state->max_rt + 1; util_dump_struct_array(stream, rt_blend_state, state->rt, valid_entries); util_dump_member_end(stream); } @@ -916,8 +909,6 @@ util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state) util_dump_member(stream, uint, state, has_user_indices); util_dump_member(stream, enum_prim_mode, state, mode); - util_dump_member(stream, uint, state, start); - util_dump_member(stream, uint, state, count); util_dump_member(stream, uint, state, start_instance); util_dump_member(stream, uint, state, instance_count); @@ -940,19 +931,35 @@ util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state) else util_dump_member(stream, ptr, state, index.resource); } - util_dump_member(stream, ptr, state, count_from_stream_output); + util_dump_struct_end(stream); +} + +void +util_dump_draw_start_count(FILE *stream, const struct pipe_draw_start_count *state) +{ + util_dump_struct_begin(stream, "pipe_draw_start_count"); + util_dump_member(stream, uint, state, start); + util_dump_member(stream, uint, state, count); + util_dump_struct_end(stream); +} - if (!state->indirect) { - util_dump_member(stream, ptr, state, indirect); - } else { - util_dump_member(stream, uint, state, indirect->offset); - util_dump_member(stream, uint, state, indirect->stride); - util_dump_member(stream, uint, state, indirect->draw_count); - util_dump_member(stream, uint, state, indirect->indirect_draw_count_offset); - util_dump_member(stream, ptr, state, indirect->buffer); - util_dump_member(stream, ptr, state, indirect->indirect_draw_count); +void +util_dump_draw_indirect_info(FILE *stream, + const struct pipe_draw_indirect_info *state) +{ + if (!state) { + util_dump_null(stream); + return; } + util_dump_struct_begin(stream, "pipe_draw_indirect_info"); + util_dump_member(stream, uint, state, offset); + util_dump_member(stream, uint, state, stride); + util_dump_member(stream, uint, state, draw_count); + util_dump_member(stream, uint, state, indirect_draw_count_offset); + util_dump_member(stream, ptr, state, buffer); + util_dump_member(stream, ptr, state, indirect_draw_count); + util_dump_member(stream, ptr, state, count_from_stream_output); util_dump_struct_end(stream); } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_half.h b/lib/mesa/src/gallium/auxiliary/util/u_half.h deleted file mode 100644 index d28fae3c7..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_half.h +++ /dev/null @@ -1,131 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 Luca Barbieri - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef U_HALF_H -#define U_HALF_H - -#include "pipe/p_compiler.h" -#include "util/u_math.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * References for float <-> half conversions - * - * http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ - * https://gist.github.com/2156668 - * https://gist.github.com/2144712 - */ - -static inline uint16_t -util_float_to_half(float f) -{ - uint32_t sign_mask = 0x80000000; - uint32_t round_mask = ~0xfff; - uint32_t f32inf = 0xff << 23; - uint32_t f16inf = 0x1f << 23; - uint32_t sign; - union fi magic; - union fi f32; - uint16_t f16; - - magic.ui = 0xf << 23; - - f32.f = f; - - /* Sign */ - sign = f32.ui & sign_mask; - f32.ui ^= sign; - - if (f32.ui == f32inf) { - /* Inf */ - f16 = 0x7c00; - } else if (f32.ui > f32inf) { - /* NaN */ - f16 = 0x7e00; - } else { - /* Number */ - f32.ui &= round_mask; - f32.f *= magic.f; - f32.ui -= round_mask; - - /* - * Clamp to max finite value if overflowed. - * OpenGL has completely undefined rounding behavior for float to - * half-float conversions, and this matches what is mandated for float - * to fp11/fp10, which recommend round-to-nearest-finite too. - * (d3d10 is deeply unhappy about flushing such values to infinity, and - * while it also mandates round-to-zero it doesn't care nearly as much - * about that.) - */ - if (f32.ui > f16inf) - f32.ui = f16inf - 1; - - f16 = f32.ui >> 13; - } - - /* Sign */ - f16 |= sign >> 16; - - return f16; -} - -static inline float -util_half_to_float(uint16_t f16) -{ - union fi infnan; - union fi magic; - union fi f32; - - infnan.ui = 0x8f << 23; - infnan.f = 65536.0f; - magic.ui = 0xef << 23; - - /* Exponent / Mantissa */ - f32.ui = (f16 & 0x7fff) << 13; - - /* Adjust */ - f32.f *= magic.f; - - /* Inf / NaN */ - if (f32.f >= infnan.f) - f32.ui |= 0xff << 23; - - /* Sign */ - f32.ui |= (f16 & 0x8000) << 16; - - return f32.f; -} - -#ifdef __cplusplus -} -#endif - -#endif /* U_HALF_H */ - diff --git a/lib/mesa/src/gallium/auxiliary/util/u_helpers.c b/lib/mesa/src/gallium/auxiliary/util/u_helpers.c index 5e5eeedbf..791d65ac7 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_helpers.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_helpers.c @@ -45,7 +45,9 @@ void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst, uint32_t *enabled_buffers, const struct pipe_vertex_buffer *src, - unsigned start_slot, unsigned count) + unsigned start_slot, unsigned count, + unsigned unbind_num_trailing_slots, + bool take_ownership) { unsigned i; uint32_t bitmask = 0; @@ -61,7 +63,7 @@ void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst, pipe_vertex_buffer_unreference(&dst[i]); - if (!src[i].is_user_buffer) + if (!take_ownership && !src[i].is_user_buffer) pipe_resource_reference(&dst[i].buffer.resource, src[i].buffer.resource); } @@ -75,6 +77,9 @@ void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst, for (i = 0; i < count; i++) pipe_vertex_buffer_unreference(&dst[i]); } + + for (i = 0; i < unbind_num_trailing_slots; i++) + pipe_vertex_buffer_unreference(&dst[count + i]); } /** @@ -84,7 +89,9 @@ void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst, void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst, unsigned *dst_count, const struct pipe_vertex_buffer *src, - unsigned start_slot, unsigned count) + unsigned start_slot, unsigned count, + unsigned unbind_num_trailing_slots, + bool take_ownership) { unsigned i; uint32_t enabled_buffers = 0; @@ -95,7 +102,8 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst, } util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot, - count); + count, unbind_num_trailing_slots, + take_ownership); *dst_count = util_last_bit(enabled_buffers); } @@ -143,13 +151,14 @@ void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst, bool util_upload_index_buffer(struct pipe_context *pipe, const struct pipe_draw_info *info, + const struct pipe_draw_start_count *draw, struct pipe_resource **out_buffer, unsigned *out_offset, unsigned alignment) { - unsigned start_offset = info->start * info->index_size; + unsigned start_offset = draw->start * info->index_size; u_upload_data(pipe->stream_uploader, start_offset, - info->count * info->index_size, alignment, + draw->count * info->index_size, alignment, (char*)info->index.user + start_offset, out_offset, out_buffer); u_upload_unmap(pipe->stream_uploader); @@ -157,43 +166,6 @@ util_upload_index_buffer(struct pipe_context *pipe, return *out_buffer != NULL; } -/** - * Called by MakeCurrent. Used to notify the driver that the application - * thread may have been changed. - * - * The function pins the current thread and driver threads to a group of - * CPU cores that share the same L3 cache. This is needed for good multi- - * threading performance on AMD Zen CPUs. - * - * \param upper_thread thread in the state tracker that also needs to be - * pinned. - */ -void -util_pin_driver_threads_to_random_L3(struct pipe_context *ctx, - thrd_t *upper_thread) -{ - /* If pinning has no effect, don't do anything. */ - if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3) - return; - - unsigned num_L3_caches = util_cpu_caps.nr_cpus / - util_cpu_caps.cores_per_L3; - - /* Get a semi-random number. */ - int64_t t = os_time_get_nano(); - unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches; - - /* Tell the driver to pin its threads to the selected L3 cache. */ - if (ctx->set_context_param) { - ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE, - cache); - } - - /* Do the same for the upper level thread if there is any (e.g. glthread) */ - if (upper_thread) - util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3); -} - /* This is a helper for hardware bring-up. Don't remove. */ struct pipe_query * util_begin_pipestat_query(struct pipe_context *ctx) diff --git a/lib/mesa/src/gallium/auxiliary/util/u_helpers.h b/lib/mesa/src/gallium/auxiliary/util/u_helpers.h index 09a95a7ab..39ed92361 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_helpers.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_helpers.h @@ -30,6 +30,7 @@ #include "pipe/p_state.h" #include "c11/threads.h" +#include "compiler/shader_enums.h" #include <stdio.h> #ifdef __cplusplus @@ -39,12 +40,16 @@ extern "C" { void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst, uint32_t *enabled_buffers, const struct pipe_vertex_buffer *src, - unsigned start_slot, unsigned count); + unsigned start_slot, unsigned count, + unsigned unbind_num_trailing_slots, + bool take_ownership); void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst, unsigned *dst_count, const struct pipe_vertex_buffer *src, - unsigned start_slot, unsigned count); + unsigned start_slot, unsigned count, + unsigned unbind_num_trailing_slots, + bool take_ownership); void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst, uint32_t *enabled_buffers, @@ -53,12 +58,27 @@ void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst, bool util_upload_index_buffer(struct pipe_context *pipe, const struct pipe_draw_info *info, + const struct pipe_draw_start_count *draw, struct pipe_resource **out_buffer, unsigned *out_offset, unsigned alignment); -void -util_pin_driver_threads_to_random_L3(struct pipe_context *ctx, - thrd_t *upper_thread); +/* Helper function to determine if the varying should contain the point + * coordinates, given the sprite_coord_enable mask. Requires + * PIPE_CAP_TGSI_TEXCOORD to be enabled. + */ +static inline bool +util_varying_is_point_coord(gl_varying_slot slot, uint32_t sprite_coord_enable) +{ + if (slot == VARYING_SLOT_PNTC) + return true; + + if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7 && + (sprite_coord_enable & (1 << (slot - VARYING_SLOT_TEX0)))) { + return true; + } + + return false; +} struct pipe_query * util_begin_pipestat_query(struct pipe_context *ctx); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_idalloc.c b/lib/mesa/src/gallium/auxiliary/util/u_idalloc.c deleted file mode 100644 index 26104552e..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_idalloc.c +++ /dev/null @@ -1,96 +0,0 @@ -/************************************************************************** - * - * Copyright 2017 Valve Corporation - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * A simple allocator that allocates and release "numbers". - * - * @author Samuel Pitoiset <samuel.pitoiset@gmail.com> - */ - -#include "util/u_idalloc.h" -#include "util/u_math.h" -#include "util/u_memory.h" - -void -util_idalloc_init(struct util_idalloc *buf) -{ - memset(buf, 0, sizeof(*buf)); -} - -void -util_idalloc_fini(struct util_idalloc *buf) -{ - if (buf->data) - free(buf->data); -} - -void -util_idalloc_resize(struct util_idalloc *buf, unsigned new_num_elements) -{ - new_num_elements = align(new_num_elements, 32); - - if (new_num_elements > buf->num_elements) { - unsigned i; - - buf->data = realloc(buf->data, - (new_num_elements / 32) * sizeof(*buf->data)); - - for (i = buf->num_elements / 32; i < new_num_elements / 32; i++) - buf->data[i] = 0; - buf->num_elements = new_num_elements; - } -} - -unsigned -util_idalloc_alloc(struct util_idalloc *buf) -{ - unsigned num_elements = buf->num_elements; - - for (unsigned i = 0; i < num_elements / 32; i++) { - if (buf->data[i] == 0xffffffff) - continue; - - unsigned bit = ffs(~buf->data[i]) - 1; - buf->data[i] |= 1u << bit; - return i * 32 + bit; - } - - /* No slots available, resize and return the first free. */ - util_idalloc_resize(buf, num_elements * 2); - - buf->data[num_elements / 32] |= 1 << (num_elements % 32); - - return num_elements; -} - -void -util_idalloc_free(struct util_idalloc *buf, unsigned id) -{ - assert(id < buf->num_elements); - buf->data[id / 32] &= ~(1 << (id % 32)); -} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_idalloc.h b/lib/mesa/src/gallium/auxiliary/util/u_idalloc.h deleted file mode 100644 index 82469e94d..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_idalloc.h +++ /dev/null @@ -1,62 +0,0 @@ -/************************************************************************** - * - * Copyright 2017 Valve Corporation - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef U_IDALLOC_H -#define U_IDALLOC_H - -#include <inttypes.h> - -#ifdef __cplusplus -extern "C" { -#endif - -struct util_idalloc -{ - uint32_t *data; - unsigned num_elements; -}; - -void -util_idalloc_init(struct util_idalloc *buf); - -void -util_idalloc_fini(struct util_idalloc *buf); - -void -util_idalloc_resize(struct util_idalloc *buf, unsigned new_num_elements); - -unsigned -util_idalloc_alloc(struct util_idalloc *buf); - -void -util_idalloc_free(struct util_idalloc *buf, unsigned id); - -#ifdef __cplusplus -} -#endif - -#endif /* U_IDALLOC_H */ diff --git a/lib/mesa/src/gallium/auxiliary/util/u_index_modify.c b/lib/mesa/src/gallium/auxiliary/util/u_index_modify.c index 4e9349a7d..017d4c3ab 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_index_modify.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_index_modify.c @@ -43,7 +43,7 @@ void util_shorten_ubyte_elts_to_userptr(struct pipe_context *context, in_map = info->index.user; } else { in_map = pipe_buffer_map(context, info->index.resource, - PIPE_TRANSFER_READ | + PIPE_MAP_READ | add_transfer_flags, &src_transfer); } @@ -77,7 +77,7 @@ void util_rebuild_ushort_elts_to_userptr(struct pipe_context *context, in_map = info->index.user; } else { in_map = pipe_buffer_map(context, info->index.resource, - PIPE_TRANSFER_READ | + PIPE_MAP_READ | add_transfer_flags, &in_transfer); } @@ -111,7 +111,7 @@ void util_rebuild_uint_elts_to_userptr(struct pipe_context *context, in_map = info->index.user; } else { in_map = pipe_buffer_map(context, info->index.resource, - PIPE_TRANSFER_READ | + PIPE_MAP_READ | add_transfer_flags, &in_transfer); } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_index_modify.h b/lib/mesa/src/gallium/auxiliary/util/u_index_modify.h index ba96725be..7540dbaae 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_index_modify.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_index_modify.h @@ -23,6 +23,10 @@ #ifndef UTIL_INDEX_MODIFY_H #define UTIL_INDEX_MODIFY_H +#ifdef __cplusplus +extern "C" { +#endif + struct pipe_context; struct pipe_resource; @@ -48,4 +52,8 @@ void util_rebuild_uint_elts_to_userptr(struct pipe_context *context, unsigned start, unsigned count, void *out); +#ifdef __cplusplus +} +#endif + #endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_inlines.h b/lib/mesa/src/gallium/auxiliary/util/u_inlines.h index 8adf343cf..f3a74d7fb 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_inlines.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_inlines.h @@ -33,6 +33,7 @@ #include "pipe/p_shader_tokens.h" #include "pipe/p_state.h" #include "pipe/p_screen.h" +#include "util/compiler.h" #include "util/u_debug.h" #include "util/u_debug_describe.h" #include "util/u_debug_refcnt.h" @@ -222,6 +223,14 @@ static inline void pipe_vertex_buffer_reference(struct pipe_vertex_buffer *dst, const struct pipe_vertex_buffer *src) { + if (dst->buffer.resource == src->buffer.resource) { + /* Just copy the fields, don't touch reference counts. */ + dst->stride = src->stride; + dst->is_user_buffer = src->is_user_buffer; + dst->buffer_offset = src->buffer_offset; + return; + } + pipe_vertex_buffer_unreference(dst); if (!src->is_user_buffer) pipe_resource_reference(&dst->buffer.resource, src->buffer.resource); @@ -321,7 +330,7 @@ pipe_buffer_create_const0(struct pipe_screen *screen, * Map a range of a resource. * \param offset start of region, in bytes * \param length size of region, in bytes - * \param access bitmask of PIPE_TRANSFER_x flags + * \param access bitmask of PIPE_MAP_x flags * \param transfer returns a transfer object */ static inline void * @@ -352,7 +361,7 @@ pipe_buffer_map_range(struct pipe_context *pipe, /** * Map whole resource. - * \param access bitmask of PIPE_TRANSFER_x flags + * \param access bitmask of PIPE_MAP_x flags * \param transfer returns a transfer object */ static inline void * @@ -405,7 +414,7 @@ pipe_buffer_write(struct pipe_context *pipe, const void *data) { /* Don't set any other usage bits. Drivers should derive them. */ - pipe->buffer_subdata(pipe, buf, PIPE_TRANSFER_WRITE, offset, size, data); + pipe->buffer_subdata(pipe, buf, PIPE_MAP_WRITE, offset, size, data); } /** @@ -421,11 +430,28 @@ pipe_buffer_write_nooverlap(struct pipe_context *pipe, const void *data) { pipe->buffer_subdata(pipe, buf, - (PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_UNSYNCHRONIZED), + (PIPE_MAP_WRITE | + PIPE_MAP_UNSYNCHRONIZED), offset, size, data); } +/** + * Utility for simplifying pipe_context::resource_copy_region calls + */ +static inline void +pipe_buffer_copy(struct pipe_context *pipe, + struct pipe_resource *dst, + struct pipe_resource *src, + unsigned dst_offset, + unsigned src_offset, + unsigned size) +{ + struct pipe_box box; + /* only these fields are used */ + box.x = (int)src_offset; + box.width = (int)size; + pipe->resource_copy_region(pipe, dst, 0, dst_offset, 0, 0, src, 0, &box); +} /** * Create a new resource and immediately put data into it @@ -458,7 +484,7 @@ pipe_buffer_read(struct pipe_context *pipe, map = (ubyte *) pipe_buffer_map_range(pipe, buf, offset, size, - PIPE_TRANSFER_READ, + PIPE_MAP_READ, &src_transfer); if (!map) return; @@ -470,7 +496,7 @@ pipe_buffer_read(struct pipe_context *pipe, /** * Map a resource for reading/writing. - * \param access bitmask of PIPE_TRANSFER_x flags + * \param access bitmask of PIPE_MAP_x flags */ static inline void * pipe_transfer_map(struct pipe_context *context, @@ -493,7 +519,7 @@ pipe_transfer_map(struct pipe_context *context, /** * Map a 3D (texture) resource for reading/writing. - * \param access bitmask of PIPE_TRANSFER_x flags + * \param access bitmask of PIPE_MAP_x flags */ static inline void * pipe_transfer_map_3d(struct pipe_context *context, @@ -531,9 +557,9 @@ pipe_set_constant_buffer(struct pipe_context *pipe, cb.buffer_offset = 0; cb.buffer_size = buf->width0; cb.user_buffer = NULL; - pipe->set_constant_buffer(pipe, shader, index, &cb); + pipe->set_constant_buffer(pipe, shader, index, false, &cb); } else { - pipe->set_constant_buffer(pipe, shader, index, NULL); + pipe->set_constant_buffer(pipe, shader, index, false, NULL); } } @@ -649,10 +675,16 @@ util_pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target, static inline void util_copy_constant_buffer(struct pipe_constant_buffer *dst, - const struct pipe_constant_buffer *src) + const struct pipe_constant_buffer *src, + bool take_ownership) { if (src) { - pipe_resource_reference(&dst->buffer, src->buffer); + if (take_ownership) { + pipe_resource_reference(&dst->buffer, NULL); + dst->buffer = src->buffer; + } else { + pipe_resource_reference(&dst->buffer, src->buffer); + } dst->buffer_offset = src->buffer_offset; dst->buffer_size = src->buffer_size; dst->user_buffer = src->user_buffer; @@ -708,7 +740,7 @@ util_max_layer(const struct pipe_resource *r, unsigned level) return u_minify(r->depth0, level) - 1; case PIPE_TEXTURE_CUBE: assert(r->array_size == 6); - /* fall-through */ + FALLTHROUGH; case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_CUBE_ARRAY: @@ -736,6 +768,52 @@ util_texrange_covers_whole_level(const struct pipe_resource *tex, depth == util_num_layers(tex, level); } +static inline bool +util_logicop_reads_dest(enum pipe_logicop op) +{ + switch (op) { + case PIPE_LOGICOP_NOR: + case PIPE_LOGICOP_AND_INVERTED: + case PIPE_LOGICOP_AND_REVERSE: + case PIPE_LOGICOP_INVERT: + case PIPE_LOGICOP_XOR: + case PIPE_LOGICOP_NAND: + case PIPE_LOGICOP_AND: + case PIPE_LOGICOP_EQUIV: + case PIPE_LOGICOP_NOOP: + case PIPE_LOGICOP_OR_INVERTED: + case PIPE_LOGICOP_OR_REVERSE: + case PIPE_LOGICOP_OR: + return true; + case PIPE_LOGICOP_CLEAR: + case PIPE_LOGICOP_COPY_INVERTED: + case PIPE_LOGICOP_COPY: + case PIPE_LOGICOP_SET: + return false; + } + unreachable("bad logicop"); +} + +static inline bool +util_writes_stencil(const struct pipe_stencil_state *s) +{ + return s->enabled && s->writemask && + ((s->fail_op != PIPE_STENCIL_OP_KEEP) || + (s->zpass_op != PIPE_STENCIL_OP_KEEP) || + (s->zfail_op != PIPE_STENCIL_OP_KEEP)); +} + +static inline bool +util_writes_depth_stencil(const struct pipe_depth_stencil_alpha_state *zsa) +{ + if (zsa->depth_enabled && zsa->depth_writemask && + (zsa->depth_func != PIPE_FUNC_NEVER)) + return true; + + return util_writes_stencil(&zsa->stencil[0]) || + util_writes_stencil(&zsa->stencil[1]); +} + static inline struct pipe_context * pipe_create_multimedia_context(struct pipe_screen *screen) { @@ -747,6 +825,11 @@ pipe_create_multimedia_context(struct pipe_screen *screen) return screen->context_create(screen, NULL, flags); } +static inline unsigned util_res_sample_count(struct pipe_resource *res) +{ + return res->nr_samples > 0 ? res->nr_samples : 1; +} + #ifdef __cplusplus } #endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.c b/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.c index 10e39e240..071bfdc62 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.c @@ -29,6 +29,65 @@ #include "util/u_memory.h" #include "u_prim_restart.h" +typedef struct { + uint32_t count; + uint32_t primCount; + uint32_t firstIndex; + int32_t baseVertex; + uint32_t reservedMustBeZero; +} DrawElementsIndirectCommand; + +static DrawElementsIndirectCommand +read_indirect_elements(struct pipe_context *context, const struct pipe_draw_indirect_info *indirect) +{ + DrawElementsIndirectCommand ret; + struct pipe_transfer *transfer = NULL; + void *map = NULL; + /* we only need the first 3 members */ + unsigned read_size = 3 * sizeof(uint32_t); + assert(indirect->buffer->width0 > 3 * sizeof(uint32_t)); + map = pipe_buffer_map_range(context, indirect->buffer, + indirect->offset, + read_size, + PIPE_MAP_READ, + &transfer); + assert(map); + memcpy(&ret, map, read_size); + pipe_buffer_unmap(context, transfer); + return ret; +} + +void +util_translate_prim_restart_data(unsigned index_size, + void *src_map, void *dst_map, + unsigned count, unsigned restart_index) +{ + if (index_size == 1) { + uint8_t *src = (uint8_t *) src_map; + uint16_t *dst = (uint16_t *) dst_map; + unsigned i; + for (i = 0; i < count; i++) { + dst[i] = (src[i] == restart_index) ? 0xffff : src[i]; + } + } + else if (index_size == 2) { + uint16_t *src = (uint16_t *) src_map; + uint16_t *dst = (uint16_t *) dst_map; + unsigned i; + for (i = 0; i < count; i++) { + dst[i] = (src[i] == restart_index) ? 0xffff : src[i]; + } + } + else { + uint32_t *src = (uint32_t *) src_map; + uint32_t *dst = (uint32_t *) dst_map; + unsigned i; + assert(index_size == 4); + for (i = 0; i < count; i++) { + dst[i] = (src[i] == restart_index) ? 0xffffffff : src[i]; + } + } +} /** * Translate an index buffer for primitive restart. @@ -40,6 +99,8 @@ enum pipe_error util_translate_prim_restart_ib(struct pipe_context *context, const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect_info, + const struct pipe_draw_start_count *draw, struct pipe_resource **dst_buffer) { struct pipe_screen *screen = context->screen; @@ -47,64 +108,50 @@ util_translate_prim_restart_ib(struct pipe_context *context, void *src_map = NULL, *dst_map = NULL; const unsigned src_index_size = info->index_size; unsigned dst_index_size; + DrawElementsIndirectCommand indirect; + unsigned count = draw->count; + unsigned start = draw->start; /* 1-byte indexes are converted to 2-byte indexes, 4-byte stays 4-byte */ dst_index_size = MAX2(2, info->index_size); assert(dst_index_size == 2 || dst_index_size == 4); - /* no user buffers for now */ - assert(!info->has_user_indices); + if (indirect_info && indirect_info->buffer) { + indirect = read_indirect_elements(context, indirect_info); + count = indirect.count; + start = indirect.firstIndex; + } /* Create new index buffer */ *dst_buffer = pipe_buffer_create(screen, PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_STREAM, - info->count * dst_index_size); + count * dst_index_size); if (!*dst_buffer) goto error; /* Map new / dest index buffer */ dst_map = pipe_buffer_map(context, *dst_buffer, - PIPE_TRANSFER_WRITE, &dst_transfer); + PIPE_MAP_WRITE, &dst_transfer); if (!dst_map) goto error; - /* Map original / src index buffer */ - src_map = pipe_buffer_map_range(context, info->index.resource, - info->start * src_index_size, - info->count * src_index_size, - PIPE_TRANSFER_READ, - &src_transfer); + if (info->has_user_indices) + src_map = (unsigned char*)info->index.user + start * src_index_size; + else + /* Map original / src index buffer */ + src_map = pipe_buffer_map_range(context, info->index.resource, + start * src_index_size, + count * src_index_size, + PIPE_MAP_READ, + &src_transfer); if (!src_map) goto error; - if (src_index_size == 1 && dst_index_size == 2) { - uint8_t *src = (uint8_t *) src_map; - uint16_t *dst = (uint16_t *) dst_map; - unsigned i; - for (i = 0; i < info->count; i++) { - dst[i] = (src[i] == info->restart_index) ? 0xffff : src[i]; - } - } - else if (src_index_size == 2 && dst_index_size == 2) { - uint16_t *src = (uint16_t *) src_map; - uint16_t *dst = (uint16_t *) dst_map; - unsigned i; - for (i = 0; i < info->count; i++) { - dst[i] = (src[i] == info->restart_index) ? 0xffff : src[i]; - } - } - else { - uint32_t *src = (uint32_t *) src_map; - uint32_t *dst = (uint32_t *) dst_map; - unsigned i; - assert(src_index_size == 4); - assert(dst_index_size == 4); - for (i = 0; i < info->count; i++) { - dst[i] = (src[i] == info->restart_index) ? 0xffffffff : src[i]; - } - } + util_translate_prim_restart_data(src_index_size, src_map, dst_map, + count, info->restart_index); - pipe_buffer_unmap(context, src_transfer); + if (src_transfer) + pipe_buffer_unmap(context, src_transfer); pipe_buffer_unmap(context, dst_transfer); return PIPE_OK; @@ -175,24 +222,38 @@ add_range(struct range_info *info, unsigned start, unsigned count) */ enum pipe_error util_draw_vbo_without_prim_restart(struct pipe_context *context, - const struct pipe_draw_info *info) + const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect_info, + const struct pipe_draw_start_count *draw) { const void *src_map; struct range_info ranges = {0}; struct pipe_draw_info new_info; + struct pipe_draw_start_count new_draw; struct pipe_transfer *src_transfer = NULL; unsigned i, start, count; + DrawElementsIndirectCommand indirect; + unsigned info_start = draw->start; + unsigned info_count = draw->count; + unsigned info_instance_count = info->instance_count; assert(info->index_size); assert(info->primitive_restart); + if (indirect_info && indirect_info->buffer) { + indirect = read_indirect_elements(context, indirect_info); + info_count = indirect.count; + info_start = indirect.firstIndex; + info_instance_count = indirect.primCount; + } + /* Get pointer to the index data */ if (!info->has_user_indices) { /* map the index buffer (only the range we need to scan) */ src_map = pipe_buffer_map_range(context, info->index.resource, - info->start * info->index_size, - info->count * info->index_size, - PIPE_TRANSFER_READ, + info_start * info->index_size, + info_count * info->index_size, + PIPE_MAP_READ, &src_transfer); if (!src_map) { return PIPE_ERROR_OUT_OF_MEMORY; @@ -204,16 +265,16 @@ util_draw_vbo_without_prim_restart(struct pipe_context *context, return PIPE_ERROR_BAD_INPUT; } src_map = (const uint8_t *) info->index.user - + info->start * info->index_size; + + info_start * info->index_size; } #define SCAN_INDEXES(TYPE) \ - for (i = 0; i <= info->count; i++) { \ - if (i == info->count || \ + for (i = 0; i <= info_count; i++) { \ + if (i == info_count || \ ((const TYPE *) src_map)[i] == info->restart_index) { \ /* cut / restart */ \ if (count > 0) { \ - if (!add_range(&ranges, info->start + start, count)) { \ + if (!add_range(&ranges, info_start + start, count)) { \ if (src_transfer) \ pipe_buffer_unmap(context, src_transfer); \ return PIPE_ERROR_OUT_OF_MEMORY; \ @@ -250,11 +311,14 @@ util_draw_vbo_without_prim_restart(struct pipe_context *context, /* draw ranges between the restart indexes */ new_info = *info; + new_draw = *draw; + /* we've effectively remapped this to a direct draw */ + new_info.instance_count = info_instance_count; new_info.primitive_restart = FALSE; for (i = 0; i < ranges.count; i++) { - new_info.start = ranges.ranges[i].start; - new_info.count = ranges.ranges[i].count; - context->draw_vbo(context, &new_info); + new_draw.start = ranges.ranges[i].start; + new_draw.count = ranges.ranges[i].count; + context->draw_vbo(context, &new_info, NULL, &new_draw, 1); } FREE(ranges.ranges); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.h b/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.h index 0e17ce5eb..5ba89de28 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_prim_restart.h @@ -41,16 +41,36 @@ struct pipe_draw_info; union pipe_index_binding; struct pipe_resource; +void +util_translate_prim_restart_data(unsigned index_size, + void *src_map, void *dst_map, + unsigned count, unsigned restart_index); enum pipe_error util_translate_prim_restart_ib(struct pipe_context *context, const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count *draw, struct pipe_resource **dst_buffer); enum pipe_error util_draw_vbo_without_prim_restart(struct pipe_context *context, - const struct pipe_draw_info *info); + const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count *draw); +static inline unsigned +util_prim_restart_index_from_size(unsigned index_size) +{ + if (index_size == 1) + return 0xff; + if (index_size == 2) + return 0xffff; + if (index_size == 4) + return 0xffffffff; + unreachable("unknown index size passed"); + return 0; +} #ifdef __cplusplus } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_pstipple.c b/lib/mesa/src/gallium/auxiliary/util/u_pstipple.c index c3c2ca817..894ba2754 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_pstipple.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_pstipple.c @@ -70,7 +70,7 @@ util_pstipple_update_stipple_texture(struct pipe_context *pipe, /* map texture memory */ data = pipe_transfer_map(pipe, tex, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, 32, 32, &transfer); + PIPE_MAP_WRITE, 0, 0, 32, 32, &transfer); /* * Load alpha texture. diff --git a/lib/mesa/src/gallium/auxiliary/util/u_range.h b/lib/mesa/src/gallium/auxiliary/util/u_range.h index 9a158a4fc..90dc80bbc 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_range.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_range.h @@ -36,6 +36,8 @@ #include "os/os_thread.h" #include "pipe/p_state.h" +#include "pipe/p_screen.h" +#include "util/u_atomic.h" #include "util/u_math.h" #include "util/simple_mtx.h" @@ -61,7 +63,8 @@ util_range_add(struct pipe_resource *resource, struct util_range *range, unsigned start, unsigned end) { if (start < range->start || end > range->end) { - if (resource->flags & PIPE_RESOURCE_FLAG_SINGLE_THREAD_USE) { + if (resource->flags & PIPE_RESOURCE_FLAG_SINGLE_THREAD_USE || + p_atomic_read(&resource->screen->num_contexts) == 1) { range->start = MIN2(start, range->start); range->end = MAX2(end, range->end); } else { diff --git a/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.c b/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.c index b0f1ed72d..a90515037 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.c @@ -417,18 +417,18 @@ util_make_fs_blit_zs(struct pipe_context *pipe, unsigned zs_mask, if (zs_mask & PIPE_MASK_S) { stencil_sampler = ureg_DECL_sampler(ureg, zs_mask & PIPE_MASK_Z ? 1 : 0); - ureg_DECL_sampler_view(ureg, 0, tex_target, + ureg_DECL_sampler_view(ureg, zs_mask & PIPE_MASK_Z ? 1 : 0, tex_target, TGSI_RETURN_TYPE_UINT, TGSI_RETURN_TYPE_UINT, TGSI_RETURN_TYPE_UINT, TGSI_RETURN_TYPE_UINT); - ureg_load_tex(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), coord, + ureg_load_tex(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), coord, stencil_sampler, tex_target, load_level_zero, use_txf); stencil = ureg_DECL_output(ureg, TGSI_SEMANTIC_STENCIL, 0); ureg_MOV(ureg, ureg_writemask(stencil, TGSI_WRITEMASK_Y), - ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); + ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); } ureg_END(ureg); @@ -530,6 +530,7 @@ util_make_fs_blit_msaa_gen(struct pipe_context *pipe, const char *samp_type, const char *output_semantic, const char *output_mask, + const char *swizzle, const char *conversion_decl, const char *conversion) { @@ -545,7 +546,7 @@ util_make_fs_blit_msaa_gen(struct pipe_context *pipe, "F2U TEMP[0], IN[0]\n" "TXF TEMP[0], TEMP[0], SAMP[0], %s\n" "%s" - "MOV OUT[0]%s, TEMP[0]\n" + "MOV OUT[0]%s, TEMP[0]%s\n" "END\n"; const char *type = tgsi_texture_names[tgsi_tex]; @@ -557,7 +558,7 @@ util_make_fs_blit_msaa_gen(struct pipe_context *pipe, tgsi_tex == TGSI_TEXTURE_2D_ARRAY_MSAA); snprintf(text, sizeof(text), shader_templ, type, samp_type, - output_semantic, conversion_decl, type, conversion, output_mask); + output_semantic, conversion_decl, type, conversion, output_mask, swizzle); if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { puts(text); @@ -608,7 +609,7 @@ util_make_fs_blit_msaa_color(struct pipe_context *pipe, } return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, samp_type, - "COLOR[0]", "", conversion_decl, + "COLOR[0]", "", "", conversion_decl, conversion); } @@ -623,7 +624,7 @@ util_make_fs_blit_msaa_depth(struct pipe_context *pipe, enum tgsi_texture_type tgsi_tex) { return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, "FLOAT", - "POSITION", ".z", "", ""); + "POSITION", ".z", ".xxxx", "", ""); } @@ -637,7 +638,7 @@ util_make_fs_blit_msaa_stencil(struct pipe_context *pipe, enum tgsi_texture_type tgsi_tex) { return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, "UINT", - "STENCIL", ".y", "", ""); + "STENCIL", ".y", "", "", ""); } @@ -656,13 +657,15 @@ util_make_fs_blit_msaa_depthstencil(struct pipe_context *pipe, "FRAG\n" "DCL IN[0], GENERIC[0], LINEAR\n" "DCL SAMP[0..1]\n" - "DCL SVIEW[0..1], %s, FLOAT\n" + "DCL SVIEW[0], %s, FLOAT\n" + "DCL SVIEW[1], %s, UINT\n" "DCL OUT[0], POSITION\n" "DCL OUT[1], STENCIL\n" - "DCL TEMP[0]\n" + "DCL TEMP[0..1]\n" "F2U TEMP[0], IN[0]\n" - "TXF OUT[0].z, TEMP[0], SAMP[0], %s\n" + "TXF TEMP[1], TEMP[0], SAMP[0], %s\n" + "MOV OUT[0].z, TEMP[1].xxxx\n" "TXF OUT[1].y, TEMP[0], SAMP[1], %s\n" "END\n"; @@ -674,7 +677,7 @@ util_make_fs_blit_msaa_depthstencil(struct pipe_context *pipe, assert(tgsi_tex == TGSI_TEXTURE_2D_MSAA || tgsi_tex == TGSI_TEXTURE_2D_ARRAY_MSAA); - sprintf(text, shader_templ, type, type, type); + sprintf(text, shader_templ, type, type, type, type); if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { assert(0); @@ -895,6 +898,7 @@ util_make_geometry_passthrough_shader(struct pipe_context *pipe, return ureg_create_shader_and_destroy(ureg, pipe); } + /** * Blit from color to ZS or from ZS to color in a manner that is equivalent * to memcpy. @@ -1056,3 +1060,147 @@ util_make_fs_pack_color_zs(struct pipe_context *pipe, return ureg_create_shader_and_destroy(ureg, pipe); } + + +/** + * Create passthrough tessellation control shader. + * Passthrough tessellation control shader has output of vertex shader + * as input and input of tessellation eval shader as output. + */ +void * +util_make_tess_ctrl_passthrough_shader(struct pipe_context *pipe, + uint num_vs_outputs, + uint num_tes_inputs, + const ubyte *vs_semantic_names, + const ubyte *vs_semantic_indexes, + const ubyte *tes_semantic_names, + const ubyte *tes_semantic_indexes, + const unsigned vertices_per_patch) +{ + unsigned i, j; + unsigned num_regs; + + struct ureg_program *ureg; + struct ureg_dst temp, addr; + struct ureg_src invocationID; + struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS]; + struct ureg_src src[PIPE_MAX_SHADER_INPUTS]; + + ureg = ureg_create(PIPE_SHADER_TESS_CTRL); + + if (!ureg) + return NULL; + + ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT, vertices_per_patch); + + num_regs = 0; + + for (i = 0; i < num_tes_inputs; i++) { + switch (tes_semantic_names[i]) { + case TGSI_SEMANTIC_POSITION: + case TGSI_SEMANTIC_PSIZE: + case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_BCOLOR: + case TGSI_SEMANTIC_CLIPDIST: + case TGSI_SEMANTIC_CLIPVERTEX: + case TGSI_SEMANTIC_TEXCOORD: + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_GENERIC: + for (j = 0; j < num_vs_outputs; j++) { + if (tes_semantic_names[i] == vs_semantic_names[j] && + tes_semantic_indexes[i] == vs_semantic_indexes[j]) { + + dst[num_regs] = ureg_DECL_output(ureg, + tes_semantic_names[i], + tes_semantic_indexes[i]); + src[num_regs] = ureg_DECL_input(ureg, vs_semantic_names[j], + vs_semantic_indexes[j], + 0, 1); + + if (tes_semantic_names[i] == TGSI_SEMANTIC_GENERIC || + tes_semantic_names[i] == TGSI_SEMANTIC_POSITION) { + src[num_regs] = ureg_src_dimension(src[num_regs], 0); + dst[num_regs] = ureg_dst_dimension(dst[num_regs], 0); + } + + num_regs++; + break; + } + } + break; + default: + break; + } + } + + dst[num_regs] = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, + num_regs); + src[num_regs] = ureg_DECL_constant(ureg, 0); + num_regs++; + dst[num_regs] = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, + num_regs); + src[num_regs] = ureg_DECL_constant(ureg, 1); + num_regs++; + + if (vertices_per_patch > 1) { + invocationID = ureg_DECL_system_value(ureg, + TGSI_SEMANTIC_INVOCATIONID, 0); + temp = ureg_DECL_local_temporary(ureg); + addr = ureg_DECL_address(ureg); + ureg_UARL(ureg, ureg_writemask(addr, TGSI_WRITEMASK_X), + ureg_scalar(invocationID, TGSI_SWIZZLE_X)); + } + + for (i = 0; i < num_regs; i++) { + if (dst[i].Dimension && vertices_per_patch > 1) { + struct ureg_src addr_x = ureg_scalar(ureg_src(addr), TGSI_SWIZZLE_X); + ureg_MOV(ureg, temp, ureg_src_dimension_indirect(src[i], + addr_x, 0)); + ureg_MOV(ureg, ureg_dst_dimension_indirect(dst[i], + addr_x, 0), ureg_src(temp)); + } + else + ureg_MOV(ureg, dst[i], src[i]); + } + + ureg_END(ureg); + + return ureg_create_shader_and_destroy(ureg, pipe); +} + +void * +util_make_fs_stencil_blit(struct pipe_context *pipe, bool msaa_src) +{ + static const char shader_templ[] = + "FRAG\n" + "DCL IN[0], GENERIC[0], LINEAR\n" + "DCL SAMP[0]\n" + "DCL CONST[0][0]\n" + "DCL TEMP[0]\n" + + "F2U TEMP[0], IN[0]\n" + "TXF_LZ TEMP[0].x, TEMP[0], SAMP[0], %s\n" + "AND TEMP[0].x, TEMP[0], CONST[0][0]\n" + "USNE TEMP[0].x, TEMP[0], CONST[0][0]\n" + "U2F TEMP[0].x, TEMP[0]\n" + "KILL_IF -TEMP[0].xxxx\n" + "END\n"; + + char text[sizeof(shader_templ)+100]; + struct tgsi_token tokens[1000]; + struct pipe_shader_state state = { 0 }; + + enum tgsi_texture_type tgsi_tex = msaa_src ? TGSI_TEXTURE_2D_MSAA : + TGSI_TEXTURE_2D; + + sprintf(text, shader_templ, tgsi_texture_names[tgsi_tex]); + + if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { + assert(0); + return NULL; + } + + pipe_shader_state_from_tgsi(&state, tokens); + + return pipe->create_fs_state(pipe, &state); +} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.h b/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.h index 501906d6f..6bc794018 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_simple_shaders.h @@ -159,6 +159,19 @@ util_make_fs_pack_color_zs(struct pipe_context *pipe, enum pipe_format zs_format, bool dst_is_color); +extern void * +util_make_tess_ctrl_passthrough_shader(struct pipe_context *pipe, + uint num_vs_outputs, + uint num_tes_inputs, + const ubyte *vs_semantic_names, + const ubyte *vs_semantic_indexes, + const ubyte *tes_semantic_names, + const ubyte *tes_semantic_indexes, + const unsigned vertices_per_patch); + +void * +util_make_fs_stencil_blit(struct pipe_context *pipe, bool msaa_src); + #ifdef __cplusplus } #endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_suballoc.c b/lib/mesa/src/gallium/auxiliary/util/u_suballoc.c index d54026edf..c5598ebc7 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_suballoc.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_suballoc.c @@ -36,21 +36,6 @@ #include "u_suballoc.h" - -struct u_suballocator { - struct pipe_context *pipe; - - unsigned size; /* Size of the whole buffer, in bytes. */ - unsigned bind; /* Bitmask of PIPE_BIND_* flags. */ - enum pipe_resource_usage usage; - unsigned flags; /* bitmask of PIPE_RESOURCE_FLAG_x */ - boolean zero_buffer_memory; /* If the buffer contents should be zeroed. */ - - struct pipe_resource *buffer; /* The buffer we suballocate from. */ - unsigned offset; /* Aligned offset pointing at the first unused byte. */ -}; - - /** * Create a suballocator. * @@ -59,14 +44,14 @@ struct u_suballocator { * cleared to 0 after the allocation. * */ -struct u_suballocator * -u_suballocator_create(struct pipe_context *pipe, unsigned size, unsigned bind, - enum pipe_resource_usage usage, unsigned flags, - boolean zero_buffer_memory) +void +u_suballocator_init(struct u_suballocator *allocator, + struct pipe_context *pipe, + unsigned size, unsigned bind, + enum pipe_resource_usage usage, unsigned flags, + boolean zero_buffer_memory) { - struct u_suballocator *allocator = CALLOC_STRUCT(u_suballocator); - if (!allocator) - return NULL; + memset(allocator, 0, sizeof(*allocator)); allocator->pipe = pipe; allocator->size = size; @@ -74,14 +59,12 @@ u_suballocator_create(struct pipe_context *pipe, unsigned size, unsigned bind, allocator->usage = usage; allocator->flags = flags; allocator->zero_buffer_memory = zero_buffer_memory; - return allocator; } void u_suballocator_destroy(struct u_suballocator *allocator) { pipe_resource_reference(&allocator->buffer, NULL); - FREE(allocator); } void @@ -131,7 +114,7 @@ u_suballocator_alloc(struct u_suballocator *allocator, unsigned size, } else { struct pipe_transfer *transfer = NULL; void *ptr = pipe_buffer_map(pipe, allocator->buffer, - PIPE_TRANSFER_WRITE, &transfer); + PIPE_MAP_WRITE, &transfer); memset(ptr, 0, allocator->size); pipe_buffer_unmap(pipe, transfer); } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_suballoc.h b/lib/mesa/src/gallium/auxiliary/util/u_suballoc.h index e35382f04..ed95e7c32 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_suballoc.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_suballoc.h @@ -31,12 +31,31 @@ #ifndef U_SUBALLOC #define U_SUBALLOC -struct u_suballocator; +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_context; + +struct u_suballocator { + struct pipe_context *pipe; + + unsigned size; /* Size of the whole buffer, in bytes. */ + unsigned bind; /* Bitmask of PIPE_BIND_* flags. */ + enum pipe_resource_usage usage; + unsigned flags; /* bitmask of PIPE_RESOURCE_FLAG_x */ + boolean zero_buffer_memory; /* If the buffer contents should be zeroed. */ -struct u_suballocator * -u_suballocator_create(struct pipe_context *pipe, unsigned size, unsigned bind, - enum pipe_resource_usage usage, unsigned flags, - boolean zero_buffer_memory); + struct pipe_resource *buffer; /* The buffer we suballocate from. */ + unsigned offset; /* Aligned offset pointing at the first unused byte. */ +}; + +void +u_suballocator_init(struct u_suballocator *allocator, + struct pipe_context *pipe, + unsigned size, unsigned bind, + enum pipe_resource_usage usage, unsigned flags, + boolean zero_buffer_memory); void u_suballocator_destroy(struct u_suballocator *allocator); @@ -46,4 +65,8 @@ u_suballocator_alloc(struct u_suballocator *allocator, unsigned size, unsigned alignment, unsigned *out_offset, struct pipe_resource **outbuf); +#ifdef __cplusplus +} +#endif + #endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_surface.c b/lib/mesa/src/gallium/auxiliary/util/u_surface.c index 718411d8d..1a8acb358 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_surface.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_surface.c @@ -41,7 +41,7 @@ #include "util/u_rect.h" #include "util/u_surface.h" #include "util/u_pack_color.h" - +#include "util/u_memset.h" /** * Initialize a pipe_surface object. 'view' is considered to have @@ -141,9 +141,13 @@ util_fill_rect(ubyte * dst, break; case 4: for (i = 0; i < height; i++) { - uint32_t *row = (uint32_t *)dst; - for (j = 0; j < width; j++) - *row++ = uc->ui[0]; + util_memset32(dst, uc->ui[0], width); + dst += dst_stride; + } + break; + case 8: + for (i = 0; i < height; i++) { + util_memset64(dst, ((uint64_t *)uc)[0], width); dst += dst_stride; } break; @@ -287,7 +291,7 @@ util_resource_copy_region(struct pipe_context *pipe, src_map = pipe->transfer_map(pipe, src, src_level, - PIPE_TRANSFER_READ, + PIPE_MAP_READ, &src_box, &src_trans); assert(src_map); if (!src_map) { @@ -297,8 +301,8 @@ util_resource_copy_region(struct pipe_context *pipe, dst_map = pipe->transfer_map(pipe, dst, dst_level, - PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_DISCARD_RANGE, &dst_box, + PIPE_MAP_WRITE | + PIPE_MAP_DISCARD_RANGE, &dst_box, &dst_trans); assert(dst_map); if (!dst_map) { @@ -338,21 +342,7 @@ util_clear_color_texture_helper(struct pipe_transfer *dst_trans, assert(dst_trans->stride > 0); - if (util_format_is_pure_integer(format)) { - /* - * We expect int/uint clear values here, though some APIs - * might disagree (but in any case util_pack_color() - * couldn't handle it)... - */ - if (util_format_is_pure_sint(format)) { - util_format_write_4i(format, color->i, 0, &uc, 0, 0, 0, 1, 1); - } else { - assert(util_format_is_pure_uint(format)); - util_format_write_4ui(format, color->ui, 0, &uc, 0, 0, 0, 1, 1); - } - } else { - util_pack_color(color->f, format, &uc); - } + util_pack_color_union(format, &uc, color); util_fill_box(dst_map, format, dst_trans->stride, dst_trans->layer_stride, @@ -374,7 +364,7 @@ util_clear_color_texture(struct pipe_context *pipe, dst_map = pipe_transfer_map_3d(pipe, texture, level, - PIPE_TRANSFER_WRITE, + PIPE_MAP_WRITE, dstx, dsty, dstz, width, height, depth, &dst_trans); @@ -426,7 +416,7 @@ util_clear_render_target(struct pipe_context *pipe, dst_map = pipe_transfer_map(pipe, dst->texture, 0, 0, - PIPE_TRANSFER_WRITE, + PIPE_MAP_WRITE, dx, 0, w, 1, &dst_trans); if (dst_map) { @@ -444,6 +434,117 @@ util_clear_render_target(struct pipe_context *pipe, } static void +util_fill_zs_rect(ubyte *dst_map, + enum pipe_format format, + bool need_rmw, + unsigned clear_flags, + unsigned dst_stride, + unsigned width, + unsigned height, + uint64_t zstencil) +{ + unsigned i, j; + switch (util_format_get_blocksize(format)) { + case 1: + assert(format == PIPE_FORMAT_S8_UINT); + if(dst_stride == width) + memset(dst_map, (uint8_t) zstencil, height * width); + else { + for (i = 0; i < height; i++) { + memset(dst_map, (uint8_t) zstencil, width); + dst_map += dst_stride; + } + } + break; + case 2: + assert(format == PIPE_FORMAT_Z16_UNORM); + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst_map; + for (j = 0; j < width; j++) + *row++ = (uint16_t) zstencil; + dst_map += dst_stride; + } + break; + case 4: + if (!need_rmw) { + for (i = 0; i < height; i++) { + util_memset32(dst_map, (uint32_t)zstencil, width); + dst_map += dst_stride; + } + } + else { + uint32_t dst_mask; + if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) + dst_mask = 0x00ffffff; + else { + assert(format == PIPE_FORMAT_S8_UINT_Z24_UNORM); + dst_mask = 0xffffff00; + } + if (clear_flags & PIPE_CLEAR_DEPTH) + dst_mask = ~dst_mask; + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst_map; + for (j = 0; j < width; j++) { + uint32_t tmp = *row & dst_mask; + *row++ = tmp | ((uint32_t) zstencil & ~dst_mask); + } + dst_map += dst_stride; + } + } + break; + case 8: + if (!need_rmw) { + for (i = 0; i < height; i++) { + util_memset64(dst_map, zstencil, width); + dst_map += dst_stride; + } + } + else { + uint64_t src_mask; + + if (clear_flags & PIPE_CLEAR_DEPTH) + src_mask = 0x00000000ffffffffull; + else + src_mask = 0x000000ff00000000ull; + + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst_map; + for (j = 0; j < width; j++) { + uint64_t tmp = *row & ~src_mask; + *row++ = tmp | (zstencil & src_mask); + } + dst_map += dst_stride; + } + } + break; + default: + assert(0); + break; + } +} + +void +util_fill_zs_box(ubyte *dst, + enum pipe_format format, + bool need_rmw, + unsigned clear_flags, + unsigned stride, + unsigned layer_stride, + unsigned width, + unsigned height, + unsigned depth, + uint64_t zstencil) +{ + unsigned layer; + + for (layer = 0; layer < depth; layer++) { + util_fill_zs_rect(dst, format, need_rmw, clear_flags, stride, + width, height, zstencil); + dst += layer_stride; + } +} + +static void util_clear_depth_stencil_texture(struct pipe_context *pipe, struct pipe_resource *texture, enum pipe_format format, @@ -455,9 +556,6 @@ util_clear_depth_stencil_texture(struct pipe_context *pipe, struct pipe_transfer *dst_trans; ubyte *dst_map; boolean need_rmw = FALSE; - unsigned dst_stride; - ubyte *dst_layer; - unsigned i, j, layer; if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) && ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && @@ -467,104 +565,20 @@ util_clear_depth_stencil_texture(struct pipe_context *pipe, dst_map = pipe_transfer_map_3d(pipe, texture, level, - (need_rmw ? PIPE_TRANSFER_READ_WRITE : - PIPE_TRANSFER_WRITE), + (need_rmw ? PIPE_MAP_READ_WRITE : + PIPE_MAP_WRITE), dstx, dsty, dstz, width, height, depth, &dst_trans); assert(dst_map); if (!dst_map) return; - dst_stride = dst_trans->stride; - dst_layer = dst_map; assert(dst_trans->stride > 0); - for (layer = 0; layer < depth; layer++) { - dst_map = dst_layer; - - switch (util_format_get_blocksize(format)) { - case 1: - assert(format == PIPE_FORMAT_S8_UINT); - if(dst_stride == width) - memset(dst_map, (uint8_t) zstencil, height * width); - else { - for (i = 0; i < height; i++) { - memset(dst_map, (uint8_t) zstencil, width); - dst_map += dst_stride; - } - } - break; - case 2: - assert(format == PIPE_FORMAT_Z16_UNORM); - for (i = 0; i < height; i++) { - uint16_t *row = (uint16_t *)dst_map; - for (j = 0; j < width; j++) - *row++ = (uint16_t) zstencil; - dst_map += dst_stride; - } - break; - case 4: - if (!need_rmw) { - for (i = 0; i < height; i++) { - uint32_t *row = (uint32_t *)dst_map; - for (j = 0; j < width; j++) - *row++ = (uint32_t) zstencil; - dst_map += dst_stride; - } - } - else { - uint32_t dst_mask; - if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) - dst_mask = 0x00ffffff; - else { - assert(format == PIPE_FORMAT_S8_UINT_Z24_UNORM); - dst_mask = 0xffffff00; - } - if (clear_flags & PIPE_CLEAR_DEPTH) - dst_mask = ~dst_mask; - for (i = 0; i < height; i++) { - uint32_t *row = (uint32_t *)dst_map; - for (j = 0; j < width; j++) { - uint32_t tmp = *row & dst_mask; - *row++ = tmp | ((uint32_t) zstencil & ~dst_mask); - } - dst_map += dst_stride; - } - } - break; - case 8: - if (!need_rmw) { - for (i = 0; i < height; i++) { - uint64_t *row = (uint64_t *)dst_map; - for (j = 0; j < width; j++) - *row++ = zstencil; - dst_map += dst_stride; - } - } - else { - uint64_t src_mask; - - if (clear_flags & PIPE_CLEAR_DEPTH) - src_mask = 0x00000000ffffffffull; - else - src_mask = 0x000000ff00000000ull; - - for (i = 0; i < height; i++) { - uint64_t *row = (uint64_t *)dst_map; - for (j = 0; j < width; j++) { - uint64_t tmp = *row & ~src_mask; - *row++ = tmp | (zstencil & src_mask); - } - dst_map += dst_stride; - } - } - break; - default: - assert(0); - break; - } - dst_layer += dst_trans->layer_stride; - } + util_fill_zs_box(dst_map, format, need_rmw, clear_flags, + dst_trans->stride, + dst_trans->layer_stride, width, height, + depth, zstencil); pipe->transfer_unmap(pipe, dst_trans); } @@ -591,12 +605,12 @@ util_clear_texture(struct pipe_context *pipe, if (util_format_has_depth(desc)) { clear |= PIPE_CLEAR_DEPTH; - desc->unpack_z_float(&depth, 0, data, 0, 1, 1); + util_format_unpack_z_float(tex->format, &depth, data, 1); } if (util_format_has_stencil(desc)) { clear |= PIPE_CLEAR_STENCIL; - desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1); + util_format_unpack_s_8uint(tex->format, &stencil, data, 1); } zstencil = util_pack64_z_stencil(tex->format, depth, stencil); @@ -606,12 +620,7 @@ util_clear_texture(struct pipe_context *pipe, box->width, box->height, box->depth); } else { union pipe_color_union color; - if (util_format_is_pure_uint(tex->format)) - desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1); - else if (util_format_is_pure_sint(tex->format)) - desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1); - else - desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1); + util_format_unpack_rgba(tex->format, color.ui, data, 1); util_clear_color_texture(pipe, tex, tex->format, &color, level, box->x, box->y, box->z, @@ -760,7 +769,8 @@ util_can_blit_via_copy_region(const struct pipe_blit_info *blit, blit->filter != PIPE_TEX_FILTER_NEAREST || blit->scissor_enable || blit->num_window_rectangles > 0 || - blit->alpha_blend) { + blit->alpha_blend || + blit->render_condition_enable) { return FALSE; } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_surface.h b/lib/mesa/src/gallium/auxiliary/util/u_surface.h index f6149563e..61a8d512f 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_surface.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_surface.h @@ -66,6 +66,12 @@ util_fill_box(ubyte * dst, enum pipe_format format, unsigned width, unsigned height, unsigned depth, union util_color *uc); +extern void +util_fill_zs_box(ubyte *dst, enum pipe_format format, + bool need_rmw, unsigned clear_flags, unsigned stride, + unsigned layer_stride, unsigned width, + unsigned height, unsigned depth, + uint64_t zstencil); extern void util_resource_copy_region(struct pipe_context *pipe, diff --git a/lib/mesa/src/gallium/auxiliary/util/u_surfaces.c b/lib/mesa/src/gallium/auxiliary/util/u_surfaces.c deleted file mode 100644 index 78b2506e4..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_surfaces.c +++ /dev/null @@ -1,124 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 Luca Barbieri - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "u_surfaces.h" -#include "util/u_hash_table.h" -#include "util/u_inlines.h" -#include "util/u_memory.h" - -boolean -util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, - struct pipe_context *ctx, struct pipe_resource *pt, - unsigned level, unsigned layer, - struct pipe_surface **res) -{ - struct pipe_surface *ps; - - if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - { /* or 2D array */ - if(!us->u.hash) - us->u.hash = cso_hash_create(); - - ps = cso_hash_iter_data(cso_hash_find(us->u.hash, (layer << 8) | level)); - } - else - { - if(!us->u.array) - us->u.array = CALLOC(pt->last_level + 1, sizeof(struct pipe_surface *)); - ps = us->u.array[level]; - } - - if(ps && ps->context == ctx) - { - p_atomic_inc(&ps->reference.count); - *res = ps; - return FALSE; - } - - ps = (struct pipe_surface *)CALLOC(1, surface_struct_size); - if (!ps) - { - *res = NULL; - return FALSE; - } - - pipe_surface_init(ctx, ps, pt, level, layer); - - if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - cso_hash_insert(us->u.hash, (layer << 8) | level, ps); - else - us->u.array[level] = ps; - - *res = ps; - return TRUE; -} - -void -util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps) -{ - struct pipe_resource *pt = ps->texture; - if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - { /* or 2D array */ - cso_hash_erase(us->u.hash, cso_hash_find(us->u.hash, (ps->u.tex.first_layer << 8) | ps->u.tex.level)); - } - else - us->u.array[ps->u.tex.level] = 0; -} - -void -util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void (*destroy_surface) (struct pipe_surface *)) -{ - if(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE) - { /* or 2D array */ - if(us->u.hash) - { - struct cso_hash_iter iter; - iter = cso_hash_first_node(us->u.hash); - while (!cso_hash_iter_is_null(iter)) { - destroy_surface(cso_hash_iter_data(iter)); - iter = cso_hash_iter_next(iter); - } - - cso_hash_delete(us->u.hash); - us->u.hash = NULL; - } - } - else - { - if(us->u.array) - { - unsigned i; - for(i = 0; i <= pt->last_level; ++i) - { - struct pipe_surface *ps = us->u.array[i]; - if (ps) - destroy_surface(ps); - } - FREE(us->u.array); - us->u.array = NULL; - } - } -} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_surfaces.h b/lib/mesa/src/gallium/auxiliary/util/u_surfaces.h deleted file mode 100644 index b84694c54..000000000 --- a/lib/mesa/src/gallium/auxiliary/util/u_surfaces.h +++ /dev/null @@ -1,101 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 Luca Barbieri - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef U_SURFACES_H_ -#define U_SURFACES_H_ - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" -#include "util/u_atomic.h" -#include "cso_cache/cso_hash.h" - -struct util_surfaces -{ - union - { - struct cso_hash *hash; - struct pipe_surface **array; - void* pv; - } u; -}; - -/* Return value indicates if the pipe surface result is new */ -boolean -util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size, - struct pipe_context *ctx, struct pipe_resource *pt, - unsigned level, unsigned layer, - struct pipe_surface **res); - -/* fast inline path for the very common case */ -static inline boolean -util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, - struct pipe_context *ctx, struct pipe_resource *pt, - unsigned level, unsigned layer, - struct pipe_surface **res) -{ - if(likely((pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT) && us->u.array)) - { - struct pipe_surface *ps = us->u.array[level]; - if(ps && ps->context == ctx) - { - p_atomic_inc(&ps->reference.count); - *res = ps; - return FALSE; - } - } - - return util_surfaces_do_get(us, surface_struct_size, ctx, pt, level, layer, res); -} - -static inline struct pipe_surface * -util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned level, unsigned layer) -{ - if(!us->u.pv) - return 0; - - if(unlikely(pt->target == PIPE_TEXTURE_3D || pt->target == PIPE_TEXTURE_CUBE)) - return cso_hash_iter_data(cso_hash_find(us->u.hash, (layer << 8) | level)); - else - return us->u.array[level]; -} - -void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps); - -static inline void -util_surfaces_detach(struct util_surfaces *us, struct pipe_surface *ps) -{ - if(likely(ps->texture->target == PIPE_TEXTURE_2D || ps->texture->target == PIPE_TEXTURE_RECT)) - { - us->u.array[ps->u.tex.level] = 0; - return; - } - - util_surfaces_do_detach(us, ps); -} - -void util_surfaces_destroy(struct util_surfaces *us, struct pipe_resource *pt, void (*destroy_surface) (struct pipe_surface *)); - -#endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_tests.c b/lib/mesa/src/gallium/auxiliary/util/u_tests.c index efd290f89..bce9f8522 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_tests.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_tests.c @@ -38,7 +38,7 @@ #include "tgsi/tgsi_strings.h" #include "tgsi/tgsi_text.h" #include "cso_cache/cso_context.h" -#include "state_tracker/winsys_handle.h" +#include "frontend/winsys_handle.h" #include <stdio.h> #define TOLERANCE 0.01 @@ -97,7 +97,7 @@ util_set_blend_normal(struct cso_context *cso) static void util_set_dsa_disable(struct cso_context *cso) { - struct pipe_depth_stencil_alpha_state dsa = {{0}}; + struct pipe_depth_stencil_alpha_state dsa = {{{0}}}; cso_set_depth_stencil_alpha(cso, &dsa); } @@ -126,6 +126,10 @@ util_set_max_viewport(struct cso_context *cso, struct pipe_resource *tex) viewport.translate[0] = 0.5f * tex->width0; viewport.translate[1] = 0.5f * tex->height0; viewport.translate[2] = 0.0f; + viewport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X; + viewport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y; + viewport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z; + viewport.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W; cso_set_viewport(cso, &viewport); } @@ -134,17 +138,17 @@ static void util_set_interleaved_vertex_elements(struct cso_context *cso, unsigned num_elements) { + struct cso_velems_state velem; unsigned i; - struct pipe_vertex_element *velem = - calloc(1, num_elements * sizeof(struct pipe_vertex_element)); + memset(&velem, 0, sizeof(velem)); + velem.count = num_elements; for (i = 0; i < num_elements; i++) { - velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - velem[i].src_offset = i * 16; + velem.velems[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + velem.velems[i].src_offset = i * 16; } - cso_set_vertex_elements(cso, num_elements, velem); - free(velem); + cso_set_vertex_elements(cso, &velem); } static void * @@ -177,7 +181,7 @@ util_set_common_states_and_clear(struct cso_context *cso, struct pipe_context *c util_set_rasterizer_normal(cso); util_set_max_viewport(cso, cb); - ctx->clear(ctx, PIPE_CLEAR_COLOR0, (void*)clear_color, 0, 0); + ctx->clear(ctx, PIPE_CLEAR_COLOR0, NULL, (void*)clear_color, 0, 0); } static void @@ -227,9 +231,9 @@ util_probe_rect_rgba_multi(struct pipe_context *ctx, struct pipe_resource *tex, unsigned x,y,e,c; bool pass = true; - map = pipe_transfer_map(ctx, tex, 0, 0, PIPE_TRANSFER_READ, + map = pipe_transfer_map(ctx, tex, 0, 0, PIPE_MAP_READ, offx, offy, w, h, &transfer); - pipe_get_tile_rgba(transfer, map, 0, 0, w, h, pixels); + pipe_get_tile_rgba(transfer, map, 0, 0, w, h, tex->format, pixels); pipe_transfer_unmap(ctx, transfer); for (e = 0; e < num_expected_colors; e++) { @@ -388,7 +392,7 @@ null_sampler_view(struct pipe_context *ctx, unsigned tgsi_tex_target) PIPE_FORMAT_R8G8B8A8_UNORM, 0); util_set_common_states_and_clear(cso, ctx, cb); - ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT, 0, 1, NULL); + ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT, 0, 0, 1, NULL); /* Fragment shader. */ fs = util_make_fragment_tex_shader(ctx, tgsi_tex_target, @@ -473,7 +477,7 @@ util_test_constant_buffer(struct pipe_context *ctx, } static void -null_fragment_shader(struct pipe_context *ctx) +disabled_fragment_shader(struct pipe_context *ctx) { struct cso_context *cso; struct pipe_resource *cb; @@ -493,6 +497,9 @@ null_fragment_shader(struct pipe_context *ctx) vs = util_set_passthrough_vertex_shader(cso, ctx, false); + void *fs = util_make_empty_fragment_shader(ctx); + cso_set_fragment_shader_handle(cso, fs); + query = ctx->create_query(ctx, PIPE_QUERY_PRIMITIVES_GENERATED, 0); ctx->begin_query(ctx, query); util_draw_fullscreen_quad(cso); @@ -502,6 +509,7 @@ null_fragment_shader(struct pipe_context *ctx) /* Cleanup. */ cso_destroy_context(cso); ctx->delete_vs_state(ctx, vs); + ctx->delete_fs_state(ctx, fs); ctx->destroy_query(ctx, query); pipe_resource_reference(&cb, NULL); @@ -698,7 +706,7 @@ test_texture_barrier(struct pipe_context *ctx, bool use_fbfetch, templ.swizzle_b = PIPE_SWIZZLE_Z; templ.swizzle_a = PIPE_SWIZZLE_W; view = ctx->create_sampler_view(ctx, cb, &templ); - ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT, 0, 1, &view); + ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT, 0, 1, 0, &view); /* Fragment shader. */ if (num_samples > 1) { @@ -834,7 +842,7 @@ test_compute_clear_image(struct pipe_context *ctx) image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE; image.format = cb->format; - ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &image); + ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, 0, &image); /* Dispatch compute. */ struct pipe_grid_info info = {0}; @@ -907,7 +915,7 @@ test_nv12(struct pipe_screen *screen) struct pipe_resource *res = i == 2 ? tex->next : tex; unsigned plane = i == 2 ? 0 : i; - if (!screen->resource_get_param(screen, NULL, res, plane, 0, + if (!screen->resource_get_param(screen, NULL, res, plane, 0, 0, PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS, 0, &handle[i].handle)) { printf("resource_get_param failed\n"); @@ -915,7 +923,7 @@ test_nv12(struct pipe_screen *screen) goto cleanup; } - if (!screen->resource_get_param(screen, NULL, res, plane, 0, + if (!screen->resource_get_param(screen, NULL, res, plane, 0, 0, PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD, 0, &handle[i].dmabuf)) { printf("resource_get_param failed\n"); @@ -923,7 +931,7 @@ test_nv12(struct pipe_screen *screen) goto cleanup; } - if (!screen->resource_get_param(screen, NULL, res, plane, 0, + if (!screen->resource_get_param(screen, NULL, res, plane, 0, 0, PIPE_RESOURCE_PARAM_OFFSET, 0, &handle[i].offset)) { printf("resource_get_param failed\n"); @@ -931,7 +939,7 @@ test_nv12(struct pipe_screen *screen) goto cleanup; } - if (!screen->resource_get_param(screen, NULL, res, plane, 0, + if (!screen->resource_get_param(screen, NULL, res, plane, 0, 0, PIPE_RESOURCE_PARAM_STRIDE, 0, &handle[i].stride)) { printf("resource_get_param failed\n"); @@ -939,7 +947,7 @@ test_nv12(struct pipe_screen *screen) goto cleanup; } - if (!screen->resource_get_param(screen, NULL, res, plane, 0, + if (!screen->resource_get_param(screen, NULL, res, plane, 0, 0, PIPE_RESOURCE_PARAM_NPLANES, 0, &handle[i].planes)) { printf("resource_get_param failed\n"); @@ -1024,7 +1032,7 @@ util_run_tests(struct pipe_screen *screen) { struct pipe_context *ctx = screen->context_create(screen, NULL, 0); - null_fragment_shader(ctx); + disabled_fragment_shader(ctx); tgsi_vs_window_space_position(ctx); null_sampler_view(ctx, TGSI_TEXTURE_2D); null_sampler_view(ctx, TGSI_TEXTURE_BUFFER); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c index daed6c695..965321651 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.c @@ -30,6 +30,8 @@ #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" +#include "util/log.h" +#include "compiler/shader_info.h" /* 0 = disabled, 1 = assertions, 2 = printfs */ #define TC_DEBUG 0 @@ -41,7 +43,7 @@ #endif #if TC_DEBUG >= 2 -#define tc_printf printf +#define tc_printf mesa_logi #define tc_asprintf asprintf #define tc_strcmp strcmp #else @@ -59,6 +61,12 @@ enum tc_call_id { TC_NUM_CALLS, }; +/* This is actually variable-sized, because indirect isn't allocated if it's + * not needed. */ +struct tc_draw_single { + struct pipe_draw_info info; +}; + typedef void (*tc_execute)(struct pipe_context *pipe, union tc_payload *payload); static const tc_execute execute_func[TC_NUM_CALLS]; @@ -75,27 +83,152 @@ tc_debug_check(struct threaded_context *tc) { for (unsigned i = 0; i < TC_MAX_BATCHES; i++) { tc_batch_check(&tc->batch_slots[i]); - tc_assert(tc->batch_slots[i].pipe == tc->pipe); + tc_assert(tc->batch_slots[i].tc == tc); } } static void +tc_set_driver_thread(struct threaded_context *tc) +{ +#ifndef NDEBUG + tc->driver_thread = util_get_thread_id(); +#endif +} + +static void +tc_clear_driver_thread(struct threaded_context *tc) +{ +#ifndef NDEBUG + memset(&tc->driver_thread, 0, sizeof(tc->driver_thread)); +#endif +} + +/* We don't want to read or write min_index and max_index, because + * it shouldn't be needed by drivers at this point. + */ +#define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \ + offsetof(struct pipe_draw_info, min_index) + +static void +simplify_draw_info(struct pipe_draw_info *info) +{ + /* Clear these fields to facilitate draw merging. + * Drivers shouldn't use them. + */ + info->has_user_indices = false; + info->index_bounds_valid = false; + info->take_index_buffer_ownership = false; + info->_pad = 0; + + /* This shouldn't be set when merging single draws. */ + info->increment_draw_id = false; + + if (info->mode != PIPE_PRIM_PATCHES) + info->vertices_per_patch = 0; + + if (info->index_size) { + if (!info->primitive_restart) + info->restart_index = 0; + } else { + assert(!info->primitive_restart); + info->index_bias = 0; + info->primitive_restart = false; + info->restart_index = 0; + info->index.resource = NULL; + } +} + +static bool +is_next_call_a_mergeable_draw(struct tc_draw_single *first_info, + struct tc_call *next, + struct tc_draw_single **next_info) +{ + if (next->call_id != TC_CALL_draw_single) + return false; + + *next_info = (struct tc_draw_single*)&next->payload; + simplify_draw_info(&(*next_info)->info); + + STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) == + sizeof(struct pipe_draw_info) - 8); + STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) == + sizeof(struct pipe_draw_info) - 4); + + /* All fields must be the same except start and count. */ + /* u_threaded_context stores start/count in min/max_index for single draws. */ + return memcmp((uint32_t*)&first_info->info, + (uint32_t*)&(*next_info)->info, + DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0; +} + +static void tc_batch_execute(void *job, UNUSED int thread_index) { struct tc_batch *batch = job; - struct pipe_context *pipe = batch->pipe; + struct pipe_context *pipe = batch->tc->pipe; struct tc_call *last = &batch->call[batch->num_total_call_slots]; tc_batch_check(batch); + tc_set_driver_thread(batch->tc); assert(!batch->token); - for (struct tc_call *iter = batch->call; iter != last; - iter += iter->num_call_slots) { + for (struct tc_call *iter = batch->call; iter != last;) { tc_assert(iter->sentinel == TC_SENTINEL); + + /* Draw call merging. */ + if (iter->call_id == TC_CALL_draw_single) { + struct tc_call *first = iter; + struct tc_call *next = first + first->num_call_slots; + struct tc_draw_single *first_info = + (struct tc_draw_single*)&first->payload; + struct tc_draw_single *next_info; + + simplify_draw_info(&first_info->info); + + /* If at least 2 consecutive draw calls can be merged... */ + if (next != last && next->call_id == TC_CALL_draw_single && + first_info->info.drawid == 0 && + is_next_call_a_mergeable_draw(first_info, next, &next_info)) { + /* Merge up to 256 draw calls. */ + struct pipe_draw_start_count multi[256]; + unsigned num_draws = 2; + + /* u_threaded_context stores start/count in min/max_index for single draws. */ + multi[0].start = first_info->info.min_index; + multi[0].count = first_info->info.max_index; + multi[1].start = next_info->info.min_index; + multi[1].count = next_info->info.max_index; + + if (next_info->info.index_size) + pipe_resource_reference(&next_info->info.index.resource, NULL); + + /* Find how many other draws can be merged. */ + next = next + next->num_call_slots; + for (; next != last && num_draws < ARRAY_SIZE(multi) && + is_next_call_a_mergeable_draw(first_info, next, &next_info); + next += next->num_call_slots, num_draws++) { + /* u_threaded_context stores start/count in min/max_index for single draws. */ + multi[num_draws].start = next_info->info.min_index; + multi[num_draws].count = next_info->info.max_index; + + if (next_info->info.index_size) + pipe_resource_reference(&next_info->info.index.resource, NULL); + } + + pipe->draw_vbo(pipe, &first_info->info, NULL, multi, num_draws); + if (first_info->info.index_size) + pipe_resource_reference(&first_info->info.index.resource, NULL); + iter = next; + continue; + } + } + execute_func[iter->call_id](pipe, &iter->payload); + iter += iter->num_call_slots; } + tc_clear_driver_thread(batch->tc); tc_batch_check(batch); batch->num_total_call_slots = 0; } @@ -108,6 +241,7 @@ tc_batch_flush(struct threaded_context *tc) tc_assert(next->num_total_call_slots != 0); tc_batch_check(next); tc_debug_check(tc); + tc->bytes_mapped_estimate = 0; p_atomic_add(&tc->num_offloaded_slots, next->num_total_call_slots); if (next->token) { @@ -127,12 +261,10 @@ tc_batch_flush(struct threaded_context *tc) */ static union tc_payload * tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id, - unsigned payload_size) + unsigned num_call_slots) { struct tc_batch *next = &tc->batch_slots[tc->next]; - unsigned total_size = offsetof(struct tc_call, payload) + payload_size; - unsigned num_call_slots = DIV_ROUND_UP(total_size, sizeof(struct tc_call)); - + assert(num_call_slots <= TC_CALLS_PER_BATCH); tc_debug_check(tc); if (unlikely(next->num_total_call_slots + num_call_slots > TC_CALLS_PER_BATCH)) { @@ -154,19 +286,23 @@ tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id, return &call->payload; } +#define tc_payload_size_to_call_slots(size) \ + DIV_ROUND_UP(offsetof(struct tc_call, payload) + (size), sizeof(struct tc_call)) + #define tc_add_struct_typed_call(tc, execute, type) \ - ((struct type*)tc_add_sized_call(tc, execute, sizeof(struct type))) + ((struct type*)tc_add_sized_call(tc, execute, \ + tc_payload_size_to_call_slots(sizeof(struct type)))) #define tc_add_slot_based_call(tc, execute, type, num_slots) \ - ((struct type*)tc_add_sized_call(tc, execute, \ + ((struct type*)tc_add_sized_call(tc, execute, tc_payload_size_to_call_slots( \ sizeof(struct type) + \ sizeof(((struct type*)NULL)->slot[0]) * \ - (num_slots))) + (num_slots)))) static union tc_payload * tc_add_small_call(struct threaded_context *tc, enum tc_call_id id) { - return tc_add_sized_call(tc, id, 0); + return tc_add_sized_call(tc, id, tc_payload_size_to_call_slots(0)); } static bool @@ -204,6 +340,7 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char /* .. and execute unflushed calls directly. */ if (next->num_total_call_slots) { p_atomic_add(&tc->num_direct_slots, next->num_total_call_slots); + tc->bytes_mapped_estimate = 0; tc_batch_execute(next, 0); synced = true; } @@ -212,7 +349,7 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char p_atomic_inc(&tc->num_syncs); if (tc_strcmp(func, "tc_destroy") != 0) { - tc_printf("sync %s %s\n", func, info); + tc_printf("sync %s %s", func, info); } } @@ -236,7 +373,7 @@ threaded_context_flush(struct pipe_context *_pipe, { struct threaded_context *tc = threaded_context(_pipe); - /* This is called from the state-tracker / application thread. */ + /* This is called from the gallium frontend / application thread. */ if (token->tc && token->tc == tc) { struct tc_batch *last = &tc->batch_slots[tc->last]; @@ -267,6 +404,8 @@ threaded_resource_init(struct pipe_resource *res) tres->base_valid_buffer_range = &tres->valid_buffer_range; tres->is_shared = false; tres->is_user_ptr = false; + tres->pending_staging_uploads = 0; + util_range_init(&tres->pending_staging_uploads_range); } void @@ -277,6 +416,7 @@ threaded_resource_deinit(struct pipe_resource *res) if (tres->latest != &tres->b) pipe_resource_reference(&tres->latest, NULL); util_range_destroy(&tres->valid_buffer_range); + util_range_destroy(&tres->pending_staging_uploads_range); } struct pipe_context * @@ -305,14 +445,15 @@ threaded_context_unwrap_sync(struct pipe_context *pipe) tc_##func(struct pipe_context *_pipe, qualifier type deref param) \ { \ struct threaded_context *tc = threaded_context(_pipe); \ - type *p = (type*)tc_add_sized_call(tc, TC_CALL_##func, sizeof(type)); \ + type *p = (type*)tc_add_sized_call(tc, TC_CALL_##func, \ + tc_payload_size_to_call_slots(sizeof(type))); \ *p = deref(param); \ } TC_FUNC1(set_active_query_state, flags, , bool, , *) TC_FUNC1(set_blend_color, blend_color, const, struct pipe_blend_color, *, ) -TC_FUNC1(set_stencil_ref, stencil_ref, const, struct pipe_stencil_ref, *, ) +TC_FUNC1(set_stencil_ref, stencil_ref, const, struct pipe_stencil_ref, , *) TC_FUNC1(set_clip_state, clip_state, const, struct pipe_clip_state, *, ) TC_FUNC1(set_sample_mask, sample_mask, , unsigned, , *) TC_FUNC1(set_min_samples, min_samples, , unsigned, , *) @@ -351,7 +492,7 @@ tc_call_destroy_query(struct pipe_context *pipe, union tc_payload *payload) { struct threaded_query *tq = threaded_query(payload->query); - if (tq->head_unflushed.next) + if (list_is_linked(&tq->head_unflushed)) list_del(&tq->head_unflushed); pipe->destroy_query(pipe, payload->query); @@ -392,7 +533,7 @@ tc_call_end_query(struct pipe_context *pipe, union tc_payload *payload) struct tc_end_query_payload *p = (struct tc_end_query_payload *)payload; struct threaded_query *tq = threaded_query(p->query); - if (!tq->head_unflushed.next) + if (!list_is_linked(&tq->head_unflushed)) list_add(&tq->head_unflushed, &p->tc->unflushed_queries); pipe->end_query(pipe, p->query); @@ -422,15 +563,21 @@ tc_get_query_result(struct pipe_context *_pipe, struct threaded_context *tc = threaded_context(_pipe); struct threaded_query *tq = threaded_query(query); struct pipe_context *pipe = tc->pipe; + bool flushed = tq->flushed; - if (!tq->flushed) + if (!flushed) { tc_sync_msg(tc, wait ? "wait" : "nowait"); + tc_set_driver_thread(tc); + } bool success = pipe->get_query_result(pipe, query, wait, result); + if (!flushed) + tc_clear_driver_thread(tc); + if (success) { tq->flushed = true; - if (tq->head_unflushed.next) { + if (list_is_linked(&tq->head_unflushed)) { /* This is safe because it can only happen after we sync'd. */ list_del(&tq->head_unflushed); } @@ -637,14 +784,19 @@ tc_set_tess_state(struct pipe_context *_pipe, { struct threaded_context *tc = threaded_context(_pipe); float *p = (float*)tc_add_sized_call(tc, TC_CALL_set_tess_state, - sizeof(float) * 6); + tc_payload_size_to_call_slots(sizeof(float) * 6)); memcpy(p, default_outer_level, 4 * sizeof(float)); memcpy(p + 4, default_inner_level, 2 * sizeof(float)); } -struct tc_constant_buffer { +struct tc_constant_buffer_info { ubyte shader, index; + bool is_null; +}; + +struct tc_constant_buffer { + struct tc_constant_buffer_info info; struct pipe_constant_buffer cb; }; @@ -653,52 +805,117 @@ tc_call_set_constant_buffer(struct pipe_context *pipe, union tc_payload *payload { struct tc_constant_buffer *p = (struct tc_constant_buffer *)payload; - pipe->set_constant_buffer(pipe, - p->shader, - p->index, - &p->cb); - pipe_resource_reference(&p->cb.buffer, NULL); + if (unlikely(p->info.is_null)) { + pipe->set_constant_buffer(pipe, p->info.shader, p->info.index, false, NULL); + return; + } + + pipe->set_constant_buffer(pipe, p->info.shader, p->info.index, true, &p->cb); } static void tc_set_constant_buffer(struct pipe_context *_pipe, enum pipe_shader_type shader, uint index, + bool take_ownership, const struct pipe_constant_buffer *cb) { struct threaded_context *tc = threaded_context(_pipe); - struct pipe_resource *buffer = NULL; + + if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) { + struct tc_constant_buffer_info *p = + tc_add_struct_typed_call(tc, TC_CALL_set_constant_buffer, + tc_constant_buffer_info); + p->shader = shader; + p->index = index; + p->is_null = true; + return; + } + + struct pipe_resource *buffer; unsigned offset; - /* This must be done before adding set_constant_buffer, because it could - * generate e.g. transfer_unmap and flush partially-uninitialized - * set_constant_buffer to the driver if it was done afterwards. - */ - if (cb && cb->user_buffer) { - u_upload_data(tc->base.const_uploader, 0, cb->buffer_size, 64, - cb->user_buffer, &offset, &buffer); + if (cb->user_buffer) { + /* This must be done before adding set_constant_buffer, because it could + * generate e.g. transfer_unmap and flush partially-uninitialized + * set_constant_buffer to the driver if it was done afterwards. + */ + buffer = NULL; + u_upload_data(tc->base.const_uploader, 0, cb->buffer_size, + tc->ubo_alignment, cb->user_buffer, &offset, &buffer); u_upload_unmap(tc->base.const_uploader); + take_ownership = true; + } else { + buffer = cb->buffer; + offset = cb->buffer_offset; } struct tc_constant_buffer *p = tc_add_struct_typed_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer); + p->info.shader = shader; + p->info.index = index; + p->info.is_null = false; + p->cb.user_buffer = NULL; + p->cb.buffer_offset = offset; + p->cb.buffer_size = cb->buffer_size; + + if (take_ownership) + p->cb.buffer = buffer; + else + tc_set_resource_reference(&p->cb.buffer, buffer); +} + +struct tc_inlinable_constants { + ubyte shader; + ubyte num_values; + uint32_t values[MAX_INLINABLE_UNIFORMS]; +}; + +static void +tc_call_set_inlinable_constants(struct pipe_context *pipe, union tc_payload *payload) +{ + struct tc_inlinable_constants *p = (struct tc_inlinable_constants *)payload; + + pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values); +} + +static void +tc_set_inlinable_constants(struct pipe_context *_pipe, + enum pipe_shader_type shader, + uint num_values, uint32_t *values) +{ + struct threaded_context *tc = threaded_context(_pipe); + struct tc_inlinable_constants *p = + tc_add_struct_typed_call(tc, TC_CALL_set_inlinable_constants, + tc_inlinable_constants); p->shader = shader; - p->index = index; + p->num_values = num_values; + memcpy(p->values, values, num_values * 4); +} - if (cb) { - if (cb->user_buffer) { - p->cb.buffer_size = cb->buffer_size; - p->cb.user_buffer = NULL; - p->cb.buffer_offset = offset; - p->cb.buffer = buffer; - } else { - tc_set_resource_reference(&p->cb.buffer, - cb->buffer); - memcpy(&p->cb, cb, sizeof(*cb)); - } - } else { - memset(&p->cb, 0, sizeof(*cb)); - } +struct tc_sample_locations { + uint16_t size; + uint8_t locations[0]; +}; + + +static void +tc_call_set_sample_locations(struct pipe_context *pipe, union tc_payload *payload) +{ + struct tc_sample_locations *p = (struct tc_sample_locations *)payload; + pipe->set_sample_locations(pipe, p->size, &p->locations[0]); +} + +static void +tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations) +{ + struct threaded_context *tc = threaded_context(_pipe); + struct tc_sample_locations *p = (struct tc_sample_locations *)tc_add_sized_call(tc, + TC_CALL_set_sample_locations, + tc_payload_size_to_call_slots(sizeof(struct tc_sample_locations) + size)); + + p->size = size; + memcpy(&p->locations, locations, size); } struct tc_scissors { @@ -785,7 +1002,7 @@ tc_set_window_rectangles(struct pipe_context *_pipe, bool include, } struct tc_sampler_views { - ubyte shader, start, count; + ubyte shader, start, count, unbind_num_trailing_slots; struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */ }; @@ -795,7 +1012,8 @@ tc_call_set_sampler_views(struct pipe_context *pipe, union tc_payload *payload) struct tc_sampler_views *p = (struct tc_sampler_views *)payload; unsigned count = p->count; - pipe->set_sampler_views(pipe, p->shader, p->start, p->count, p->slot); + pipe->set_sampler_views(pipe, p->shader, p->start, p->count, + p->unbind_num_trailing_slots, p->slot); for (unsigned i = 0; i < count; i++) pipe_sampler_view_reference(&p->slot[i], NULL); } @@ -804,9 +1022,10 @@ static void tc_set_sampler_views(struct pipe_context *_pipe, enum pipe_shader_type shader, unsigned start, unsigned count, + unsigned unbind_num_trailing_slots, struct pipe_sampler_view **views) { - if (!count) + if (!count && !unbind_num_trailing_slots) return; struct threaded_context *tc = threaded_context(_pipe); @@ -816,6 +1035,7 @@ tc_set_sampler_views(struct pipe_context *_pipe, p->shader = shader; p->start = start; p->count = count; + p->unbind_num_trailing_slots = unbind_num_trailing_slots; if (views) { for (unsigned i = 0; i < count; i++) { @@ -829,7 +1049,7 @@ tc_set_sampler_views(struct pipe_context *_pipe, struct tc_shader_images { ubyte shader, start, count; - bool unbind; + ubyte unbind_num_trailing_slots; struct pipe_image_view slot[0]; /* more will be allocated if needed */ }; @@ -839,12 +1059,14 @@ tc_call_set_shader_images(struct pipe_context *pipe, union tc_payload *payload) struct tc_shader_images *p = (struct tc_shader_images *)payload; unsigned count = p->count; - if (p->unbind) { - pipe->set_shader_images(pipe, p->shader, p->start, p->count, NULL); + if (!p->count) { + pipe->set_shader_images(pipe, p->shader, p->start, 0, + p->unbind_num_trailing_slots, NULL); return; } - pipe->set_shader_images(pipe, p->shader, p->start, p->count, p->slot); + pipe->set_shader_images(pipe, p->shader, p->start, p->count, + p->unbind_num_trailing_slots, p->slot); for (unsigned i = 0; i < count; i++) pipe_resource_reference(&p->slot[i].resource, NULL); @@ -854,9 +1076,10 @@ static void tc_set_shader_images(struct pipe_context *_pipe, enum pipe_shader_type shader, unsigned start, unsigned count, + unsigned unbind_num_trailing_slots, const struct pipe_image_view *images) { - if (!count) + if (!count && !unbind_num_trailing_slots) return; struct threaded_context *tc = threaded_context(_pipe); @@ -866,10 +1089,11 @@ tc_set_shader_images(struct pipe_context *_pipe, p->shader = shader; p->start = start; - p->count = count; - p->unbind = images == NULL; if (images) { + p->count = count; + p->unbind_num_trailing_slots = unbind_num_trailing_slots; + for (unsigned i = 0; i < count; i++) { tc_set_resource_reference(&p->slot[i].resource, images[i].resource); @@ -885,6 +1109,9 @@ tc_set_shader_images(struct pipe_context *_pipe, } } memcpy(p->slot, images, count * sizeof(images[0])); + } else { + p->count = 0; + p->unbind_num_trailing_slots = count + unbind_num_trailing_slots; } } @@ -956,7 +1183,7 @@ tc_set_shader_buffers(struct pipe_context *_pipe, struct tc_vertex_buffers { ubyte start, count; - bool unbind; + ubyte unbind_num_trailing_slots; struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */ }; @@ -966,53 +1193,59 @@ tc_call_set_vertex_buffers(struct pipe_context *pipe, union tc_payload *payload) struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)payload; unsigned count = p->count; - if (p->unbind) { - pipe->set_vertex_buffers(pipe, p->start, count, NULL); + if (!count) { + pipe->set_vertex_buffers(pipe, p->start, 0, + p->unbind_num_trailing_slots, false, NULL); return; } for (unsigned i = 0; i < count; i++) tc_assert(!p->slot[i].is_user_buffer); - pipe->set_vertex_buffers(pipe, p->start, count, p->slot); - for (unsigned i = 0; i < count; i++) - pipe_resource_reference(&p->slot[i].buffer.resource, NULL); + pipe->set_vertex_buffers(pipe, p->start, count, + p->unbind_num_trailing_slots, true, p->slot); } static void tc_set_vertex_buffers(struct pipe_context *_pipe, unsigned start, unsigned count, + unsigned unbind_num_trailing_slots, + bool take_ownership, const struct pipe_vertex_buffer *buffers) { struct threaded_context *tc = threaded_context(_pipe); - if (!count) + if (!count && !unbind_num_trailing_slots) return; - if (buffers) { + if (count && buffers) { struct tc_vertex_buffers *p = tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count); p->start = start; p->count = count; - p->unbind = false; + p->unbind_num_trailing_slots = unbind_num_trailing_slots; - for (unsigned i = 0; i < count; i++) { - struct pipe_vertex_buffer *dst = &p->slot[i]; - const struct pipe_vertex_buffer *src = buffers + i; - - tc_assert(!src->is_user_buffer); - dst->stride = src->stride; - dst->is_user_buffer = false; - tc_set_resource_reference(&dst->buffer.resource, - src->buffer.resource); - dst->buffer_offset = src->buffer_offset; + if (take_ownership) { + memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer)); + } else { + for (unsigned i = 0; i < count; i++) { + struct pipe_vertex_buffer *dst = &p->slot[i]; + const struct pipe_vertex_buffer *src = buffers + i; + + tc_assert(!src->is_user_buffer); + dst->stride = src->stride; + dst->is_user_buffer = false; + tc_set_resource_reference(&dst->buffer.resource, + src->buffer.resource); + dst->buffer_offset = src->buffer_offset; + } } } else { struct tc_vertex_buffers *p = tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0); p->start = start; - p->count = count; - p->unbind = true; + p->count = 0; + p->unbind_num_trailing_slots = count + unbind_num_trailing_slots; } } @@ -1136,7 +1369,6 @@ tc_create_stream_output_target(struct pipe_context *_pipe, struct threaded_resource *tres = threaded_resource(res); struct pipe_stream_output_target *view; - tc_sync(threaded_context(_pipe)); util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset, buffer_offset + buffer_size); @@ -1357,17 +1589,18 @@ tc_improve_map_buffer_flags(struct threaded_context *tc, return usage; /* Use the staging upload if it's preferred. */ - if (usage & (PIPE_TRANSFER_DISCARD_RANGE | - PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) && - !(usage & PIPE_TRANSFER_PERSISTENT) && + if (usage & (PIPE_MAP_DISCARD_RANGE | + PIPE_MAP_DISCARD_WHOLE_RESOURCE) && + !(usage & PIPE_MAP_PERSISTENT) && /* Try not to decrement the counter if it's not positive. Still racy, * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */ tres->max_forced_staging_uploads > 0 && + tc->use_forced_staging_uploads && p_atomic_dec_return(&tres->max_forced_staging_uploads) >= 0) { - usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | - PIPE_TRANSFER_UNSYNCHRONIZED); + usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE | + PIPE_MAP_UNSYNCHRONIZED); - return usage | tc_flags | PIPE_TRANSFER_DISCARD_RANGE; + return usage | tc_flags | PIPE_MAP_DISCARD_RANGE; } /* Sparse buffers can't be mapped directly and can't be reallocated @@ -1378,8 +1611,8 @@ tc_improve_map_buffer_flags(struct threaded_context *tc, /* We can use DISCARD_RANGE instead of full discard. This is the only * fast path for sparse buffers that doesn't need thread synchronization. */ - if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) - usage |= PIPE_TRANSFER_DISCARD_RANGE; + if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) + usage |= PIPE_MAP_DISCARD_RANGE; /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers. * The threaded context doesn't do unsychronized mappings and invalida- @@ -1392,47 +1625,50 @@ tc_improve_map_buffer_flags(struct threaded_context *tc, usage |= tc_flags; /* Handle CPU reads trivially. */ - if (usage & PIPE_TRANSFER_READ) { + if (usage & PIPE_MAP_READ) { + if (usage & PIPE_MAP_UNSYNCHRONIZED) + usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */ + /* Drivers aren't allowed to do buffer invalidations. */ - return usage & ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE; } /* See if the buffer range being mapped has never been initialized, * in which case it can be mapped unsynchronized. */ - if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && + if (!(usage & PIPE_MAP_UNSYNCHRONIZED) && !tres->is_shared && !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) - usage |= PIPE_TRANSFER_UNSYNCHRONIZED; + usage |= PIPE_MAP_UNSYNCHRONIZED; - if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) { /* If discarding the entire range, discard the whole resource instead. */ - if (usage & PIPE_TRANSFER_DISCARD_RANGE && + if (usage & PIPE_MAP_DISCARD_RANGE && offset == 0 && size == tres->b.width0) - usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE; /* Discard the whole resource if needed. */ - if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { + if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) { if (tc_invalidate_buffer(tc, tres)) - usage |= PIPE_TRANSFER_UNSYNCHRONIZED; + usage |= PIPE_MAP_UNSYNCHRONIZED; else - usage |= PIPE_TRANSFER_DISCARD_RANGE; /* fallback */ + usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */ } } /* We won't need this flag anymore. */ /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */ - usage &= ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE; /* GL_AMD_pinned_memory and persistent mappings can't use staging * buffers. */ - if (usage & (PIPE_TRANSFER_UNSYNCHRONIZED | - PIPE_TRANSFER_PERSISTENT) || + if (usage & (PIPE_MAP_UNSYNCHRONIZED | + PIPE_MAP_PERSISTENT) || tres->is_user_ptr) - usage &= ~PIPE_TRANSFER_DISCARD_RANGE; + usage &= ~PIPE_MAP_DISCARD_RANGE; /* Unsychronized buffer mappings don't have to synchronize the thread. */ - if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { - usage &= ~PIPE_TRANSFER_DISCARD_RANGE; + if (usage & PIPE_MAP_UNSYNCHRONIZED) { + usage &= ~PIPE_MAP_DISCARD_RANGE; usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */ } @@ -1455,7 +1691,7 @@ tc_transfer_map(struct pipe_context *_pipe, /* Do a staging transfer within the threaded context. The driver should * only get resource_copy_region. */ - if (usage & PIPE_TRANSFER_DISCARD_RANGE) { + if (usage & PIPE_MAP_DISCARD_RANGE) { struct threaded_transfer *ttrans = slab_alloc(&tc->pool_transfers); uint8_t *map; @@ -1463,7 +1699,8 @@ tc_transfer_map(struct pipe_context *_pipe, u_upload_alloc(tc->base.stream_uploader, 0, box->width + (box->x % tc->map_buffer_alignment), - 64, &ttrans->offset, &ttrans->staging, (void**)&map); + tc->map_buffer_alignment, &ttrans->offset, + &ttrans->staging, (void**)&map); if (!map) { slab_free(&tc->pool_transfers, ttrans); return NULL; @@ -1476,18 +1713,45 @@ tc_transfer_map(struct pipe_context *_pipe, ttrans->b.stride = 0; ttrans->b.layer_stride = 0; *transfer = &ttrans->b; + + p_atomic_inc(&tres->pending_staging_uploads); + util_range_add(resource, &tres->pending_staging_uploads_range, + box->x, box->x + box->width); + return map + (box->x % tc->map_buffer_alignment); } + + if (usage & PIPE_MAP_UNSYNCHRONIZED && + p_atomic_read(&tres->pending_staging_uploads) && + util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) { + /* Write conflict detected between a staging transfer and the direct mapping we're + * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping + * will have to wait for the staging transfer completion. + * Note: The conflict detection is only based on the mapped range, not on the actual + * written range(s). + */ + usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC; + tc->use_forced_staging_uploads = false; + } } /* Unsychronized buffer mappings don't have to synchronize the thread. */ - if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) + if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) { tc_sync_msg(tc, resource->target != PIPE_BUFFER ? " texture" : - usage & PIPE_TRANSFER_DISCARD_RANGE ? " discard_range" : - usage & PIPE_TRANSFER_READ ? " read" : " ??"); + usage & PIPE_MAP_DISCARD_RANGE ? " discard_range" : + usage & PIPE_MAP_READ ? " read" : " staging conflict"); + tc_set_driver_thread(tc); + } + + tc->bytes_mapped_estimate += box->width; - return pipe->transfer_map(pipe, tres->latest ? tres->latest : resource, + void *ret = pipe->transfer_map(pipe, tres->latest ? tres->latest : resource, level, usage, box, transfer); + + if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) + tc_clear_driver_thread(tc); + + return ret; } struct tc_transfer_flush_region { @@ -1552,8 +1816,8 @@ tc_transfer_flush_region(struct pipe_context *_pipe, struct threaded_context *tc = threaded_context(_pipe); struct threaded_transfer *ttrans = threaded_transfer(transfer); struct threaded_resource *tres = threaded_resource(transfer->resource); - unsigned required_usage = PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_FLUSH_EXPLICIT; + unsigned required_usage = PIPE_MAP_WRITE | + PIPE_MAP_FLUSH_EXPLICIT; if (tres->b.target == PIPE_BUFFER) { if ((transfer->usage & required_usage) == required_usage) { @@ -1575,34 +1839,89 @@ tc_transfer_flush_region(struct pipe_context *_pipe, p->box = *rel_box; } +struct tc_transfer_unmap { + union { + struct pipe_transfer *transfer; + struct pipe_resource *resource; + }; + bool was_staging_transfer; +}; + static void tc_call_transfer_unmap(struct pipe_context *pipe, union tc_payload *payload) { - pipe->transfer_unmap(pipe, payload->transfer); + struct tc_transfer_unmap *p = (struct tc_transfer_unmap *) payload; + if (p->was_staging_transfer) { + struct threaded_resource *tres = threaded_resource(payload->resource); + /* Nothing to do except keeping track of staging uploads */ + assert(tres->pending_staging_uploads > 0); + p_atomic_dec(&tres->pending_staging_uploads); + pipe_resource_reference(&p->resource, NULL); + return; + } + pipe->transfer_unmap(pipe, p->transfer); } static void +tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, + unsigned flags); + +static void tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer) { struct threaded_context *tc = threaded_context(_pipe); struct threaded_transfer *ttrans = threaded_transfer(transfer); struct threaded_resource *tres = threaded_resource(transfer->resource); + /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be + * called from any thread and bypasses all multithreaded queues. + */ + if (transfer->usage & PIPE_MAP_THREAD_SAFE) { + assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED); + assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT | + PIPE_MAP_DISCARD_RANGE))); + + struct pipe_context *pipe = tc->pipe; + util_range_add(&tres->b, tres->base_valid_buffer_range, + transfer->box.x, transfer->box.x + transfer->box.width); + pipe->transfer_unmap(pipe, transfer); + return; + } + + bool was_staging_transfer = false; + if (tres->b.target == PIPE_BUFFER) { - if (transfer->usage & PIPE_TRANSFER_WRITE && - !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) + if (transfer->usage & PIPE_MAP_WRITE && + !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT)) tc_buffer_do_flush_region(tc, ttrans, &transfer->box); - /* Staging transfers don't send the call to the driver. */ if (ttrans->staging) { + was_staging_transfer = true; + pipe_resource_reference(&ttrans->staging, NULL); pipe_resource_reference(&ttrans->b.resource, NULL); slab_free(&tc->pool_transfers, ttrans); - return; } } + struct tc_transfer_unmap *p = tc_add_struct_typed_call(tc, TC_CALL_transfer_unmap, + tc_transfer_unmap); + if (was_staging_transfer) { + tc_set_resource_reference(&p->resource, &tres->b); + p->was_staging_transfer = true; + } else { + p->transfer = transfer; + p->was_staging_transfer = false; + } - tc_add_small_call(tc, TC_CALL_transfer_unmap)->transfer = transfer; + /* tc_transfer_map directly maps the buffers, but tc_transfer_unmap + * defers the unmap operation to the batch execution. + * bytes_mapped_estimate is an estimation of the map/unmap bytes delta + * and if it goes over an optional limit the current batch is flushed, + * to reclaim some RAM. */ + if (!ttrans->staging && tc->bytes_mapped_limit && + tc->bytes_mapped_estimate > tc->bytes_mapped_limit) { + tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC); + } } struct tc_buffer_subdata { @@ -1633,19 +1952,19 @@ tc_buffer_subdata(struct pipe_context *_pipe, if (!size) return; - usage |= PIPE_TRANSFER_WRITE; + usage |= PIPE_MAP_WRITE; - /* PIPE_TRANSFER_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */ - if (!(usage & PIPE_TRANSFER_MAP_DIRECTLY)) - usage |= PIPE_TRANSFER_DISCARD_RANGE; + /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */ + if (!(usage & PIPE_MAP_DIRECTLY)) + usage |= PIPE_MAP_DISCARD_RANGE; usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size); /* Unsychronized and big transfers should use transfer_map. Also handle * full invalidations, because drivers aren't allowed to do them. */ - if (usage & (PIPE_TRANSFER_UNSYNCHRONIZED | - PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) || + if (usage & (PIPE_MAP_UNSYNCHRONIZED | + PIPE_MAP_DISCARD_WHOLE_RESOURCE) || size > TC_MAX_SUBDATA_BYTES) { struct pipe_transfer *transfer; struct pipe_box box; @@ -1727,8 +2046,10 @@ tc_texture_subdata(struct pipe_context *_pipe, struct pipe_context *pipe = tc->pipe; tc_sync(tc); + tc_set_driver_thread(tc); pipe->texture_subdata(pipe, resource, level, usage, box, data, stride, layer_stride); + tc_clear_driver_thread(tc); } } @@ -1933,6 +2254,21 @@ tc_set_context_param(struct pipe_context *_pipe, { struct threaded_context *tc = threaded_context(_pipe); + if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) { + /* Pin the gallium thread as requested. */ + util_set_thread_affinity(tc->queue.threads[0], + util_get_cpu_caps()->L3_affinity_mask[value], + NULL, util_get_cpu_caps()->num_cpu_mask_bits); + + /* Execute this immediately (without enqueuing). + * It's required to be thread-safe. + */ + struct pipe_context *pipe = tc->pipe; + if (pipe->set_context_param) + pipe->set_context_param(pipe, param, value); + return; + } + if (tc->pipe->set_context_param) { struct tc_context_param *payload = tc_add_struct_typed_call(tc, TC_CALL_set_context_param, @@ -1941,12 +2277,20 @@ tc_set_context_param(struct pipe_context *_pipe, payload->param = param; payload->value = value; } +} - if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) { - /* Pin the gallium thread as requested. */ - util_pin_thread_to_L3(tc->queue.threads[0], value, - util_cpu_caps.cores_per_L3); - } +static void +tc_call_set_frontend_noop(struct pipe_context *pipe, union tc_payload *payload) +{ + pipe->set_frontend_noop(pipe, payload->boolean); +} + +static void +tc_set_frontend_noop(struct pipe_context *_pipe, bool enable) +{ + struct threaded_context *tc = threaded_context(_pipe); + + tc_add_small_call(tc, TC_CALL_set_frontend_noop)->boolean = enable; } @@ -1995,19 +2339,7 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, struct threaded_context *tc = threaded_context(_pipe); struct pipe_context *pipe = tc->pipe; struct pipe_screen *screen = pipe->screen; - bool async = flags & PIPE_FLUSH_DEFERRED; - - if (flags & PIPE_FLUSH_ASYNC) { - struct tc_batch *last = &tc->batch_slots[tc->last]; - - /* Prefer to do the flush in the driver thread, but avoid the inter-thread - * communication overhead if the driver thread is currently idle and the - * caller is going to wait for the fence immediately anyway. - */ - if (!(util_queue_fence_is_signalled(&last->fence) && - (flags & PIPE_FLUSH_HINT_FINISH))) - async = true; - } + bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC); if (async && tc->create_fence) { if (fence) { @@ -2044,90 +2376,259 @@ out_of_memory: if (!(flags & PIPE_FLUSH_DEFERRED)) tc_flush_queries(tc); + tc_set_driver_thread(tc); pipe->flush(pipe, fence, flags); + tc_clear_driver_thread(tc); } -/* This is actually variable-sized, because indirect isn't allocated if it's - * not needed. */ -struct tc_full_draw_info { - struct pipe_draw_info draw; +static void +tc_call_draw_single(struct pipe_context *pipe, union tc_payload *payload) +{ + struct tc_draw_single *info = (struct tc_draw_single*)payload; + + /* u_threaded_context stores start/count in min/max_index for single draws. */ + /* Drivers using u_threaded_context shouldn't use min/max_index. */ + struct pipe_draw_start_count *draw = + (struct pipe_draw_start_count *)&info->info.min_index; + STATIC_ASSERT(offsetof(struct pipe_draw_start_count, start) == 0); + STATIC_ASSERT(offsetof(struct pipe_draw_start_count, count) == 4); + + info->info.index_bounds_valid = false; + info->info.has_user_indices = false; + info->info.take_index_buffer_ownership = false; + + pipe->draw_vbo(pipe, &info->info, NULL, draw, 1); + if (info->info.index_size) + pipe_resource_reference(&info->info.index.resource, NULL); +} + +struct tc_draw_indirect { + struct pipe_draw_info info; struct pipe_draw_indirect_info indirect; + struct pipe_draw_start_count draw; }; static void -tc_call_draw_vbo(struct pipe_context *pipe, union tc_payload *payload) +tc_call_draw_indirect(struct pipe_context *pipe, union tc_payload *payload) { - struct tc_full_draw_info *info = (struct tc_full_draw_info*)payload; + struct tc_draw_indirect *info = (struct tc_draw_indirect*)payload; - pipe->draw_vbo(pipe, &info->draw); - pipe_so_target_reference(&info->draw.count_from_stream_output, NULL); - if (info->draw.index_size) - pipe_resource_reference(&info->draw.index.resource, NULL); - if (info->draw.indirect) { - pipe_resource_reference(&info->indirect.buffer, NULL); - pipe_resource_reference(&info->indirect.indirect_draw_count, NULL); - } + info->info.index_bounds_valid = false; + info->info.take_index_buffer_ownership = false; + + pipe->draw_vbo(pipe, &info->info, &info->indirect, &info->draw, 1); + if (info->info.index_size) + pipe_resource_reference(&info->info.index.resource, NULL); + + pipe_resource_reference(&info->indirect.buffer, NULL); + pipe_resource_reference(&info->indirect.indirect_draw_count, NULL); + pipe_so_target_reference(&info->indirect.count_from_stream_output, NULL); } -static struct tc_full_draw_info * -tc_add_draw_vbo(struct pipe_context *_pipe, bool indirect) +struct tc_draw_multi { + struct pipe_draw_info info; + unsigned num_draws; + struct pipe_draw_start_count slot[]; /* variable-sized array */ +}; + +static void +tc_call_draw_multi(struct pipe_context *pipe, union tc_payload *payload) { - return (struct tc_full_draw_info*) - tc_add_sized_call(threaded_context(_pipe), TC_CALL_draw_vbo, - indirect ? sizeof(struct tc_full_draw_info) : - sizeof(struct pipe_draw_info)); + struct tc_draw_multi *info = (struct tc_draw_multi*)payload; + + info->info.has_user_indices = false; + info->info.index_bounds_valid = false; + info->info.take_index_buffer_ownership = false; + + pipe->draw_vbo(pipe, &info->info, NULL, info->slot, info->num_draws); + if (info->info.index_size) + pipe_resource_reference(&info->info.index.resource, NULL); } -static void -tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) +#define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \ + offsetof(struct pipe_draw_info, index) + +void +tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count *draws, + unsigned num_draws) { + STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX + + sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index)); + struct threaded_context *tc = threaded_context(_pipe); - struct pipe_draw_indirect_info *indirect = info->indirect; unsigned index_size = info->index_size; bool has_user_indices = info->has_user_indices; + if (unlikely(indirect)) { + assert(!has_user_indices); + assert(num_draws == 1); + + struct tc_draw_indirect *p = + tc_add_struct_typed_call(tc, TC_CALL_draw_indirect, tc_draw_indirect); + if (index_size && !info->take_index_buffer_ownership) { + tc_set_resource_reference(&p->info.index.resource, + info->index.resource); + } + memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX); + + tc_set_resource_reference(&p->indirect.buffer, indirect->buffer); + tc_set_resource_reference(&p->indirect.indirect_draw_count, + indirect->indirect_draw_count); + p->indirect.count_from_stream_output = NULL; + pipe_so_target_reference(&p->indirect.count_from_stream_output, + indirect->count_from_stream_output); + memcpy(&p->indirect, indirect, sizeof(*indirect)); + p->draw.start = draws[0].start; + return; + } + + if (num_draws == 1) { + /* Single draw. */ + if (index_size && has_user_indices) { + unsigned size = draws[0].count * index_size; + struct pipe_resource *buffer = NULL; + unsigned offset; + + if (!size) + return; + + /* This must be done before adding draw_vbo, because it could generate + * e.g. transfer_unmap and flush partially-uninitialized draw_vbo + * to the driver if it was done afterwards. + */ + u_upload_data(tc->base.stream_uploader, 0, size, 4, + (uint8_t*)info->index.user + draws[0].start * index_size, + &offset, &buffer); + if (unlikely(!buffer)) + return; + + struct tc_draw_single *p = + tc_add_struct_typed_call(tc, TC_CALL_draw_single, tc_draw_single); + memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX); + p->info.index.resource = buffer; + /* u_threaded_context stores start/count in min/max_index for single draws. */ + p->info.min_index = offset >> util_logbase2(index_size); + p->info.max_index = draws[0].count; + } else { + /* Non-indexed call or indexed with a real index buffer. */ + struct tc_draw_single *p = + tc_add_struct_typed_call(tc, TC_CALL_draw_single, tc_draw_single); + if (index_size && !info->take_index_buffer_ownership) { + tc_set_resource_reference(&p->info.index.resource, + info->index.resource); + } + memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX); + /* u_threaded_context stores start/count in min/max_index for single draws. */ + p->info.min_index = draws[0].start; + p->info.max_index = draws[0].count; + } + return; + } + + const int draw_overhead_bytes = offsetof(struct tc_call, payload) + sizeof(struct tc_draw_multi); + const int one_draw_payload_bytes = sizeof(((struct tc_draw_multi*)NULL)->slot[0]); + const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_payload_bytes, + sizeof(struct tc_call)); + /* Multi draw. */ if (index_size && has_user_indices) { - unsigned size = info->count * index_size; struct pipe_resource *buffer = NULL; - unsigned offset; + unsigned buffer_offset, total_count = 0; + unsigned index_size_shift = util_logbase2(index_size); + uint8_t *ptr = NULL; - tc_assert(!indirect); + /* Get the total count. */ + for (unsigned i = 0; i < num_draws; i++) + total_count += draws[i].count; + + if (!total_count) + return; - /* This must be done before adding draw_vbo, because it could generate + /* Allocate space for all index buffers. + * + * This must be done before adding draw_vbo, because it could generate * e.g. transfer_unmap and flush partially-uninitialized draw_vbo * to the driver if it was done afterwards. */ - u_upload_data(tc->base.stream_uploader, 0, size, 4, info->index.user, - &offset, &buffer); + u_upload_alloc(tc->base.stream_uploader, 0, + total_count << index_size_shift, 4, + &buffer_offset, &buffer, (void**)&ptr); if (unlikely(!buffer)) return; - struct tc_full_draw_info *p = tc_add_draw_vbo(_pipe, false); - p->draw.count_from_stream_output = NULL; - pipe_so_target_reference(&p->draw.count_from_stream_output, - info->count_from_stream_output); - memcpy(&p->draw, info, sizeof(*info)); - p->draw.has_user_indices = false; - p->draw.index.resource = buffer; - p->draw.start = offset / index_size; - } else { - /* Non-indexed call or indexed with a real index buffer. */ - struct tc_full_draw_info *p = tc_add_draw_vbo(_pipe, indirect != NULL); - p->draw.count_from_stream_output = NULL; - pipe_so_target_reference(&p->draw.count_from_stream_output, - info->count_from_stream_output); - if (index_size) { - tc_set_resource_reference(&p->draw.index.resource, - info->index.resource); + int total_offset = 0; + while (num_draws) { + struct tc_batch *next = &tc->batch_slots[tc->next]; + + int nb_slots_left = TC_CALLS_PER_BATCH - next->num_total_call_slots; + /* If there isn't enough place for one draw, try to fill the next one */ + if (nb_slots_left < slots_for_one_draw) + nb_slots_left = TC_CALLS_PER_BATCH; + const int size_left_bytes = nb_slots_left * sizeof(struct tc_call); + + /* How many draws can we fit in the current batch */ + const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_payload_bytes); + + struct tc_draw_multi *p = + tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi, + dr); + memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX); + p->info.index.resource = buffer; + p->num_draws = dr; + + /* Upload index buffers. */ + for (unsigned i = 0, offset = 0; i < dr; i++) { + unsigned count = draws[i + total_offset].count; + + if (!count) { + p->slot[i].start = 0; + p->slot[i].count = 0; + continue; + } + + unsigned size = count << index_size_shift; + memcpy(ptr + offset, + (uint8_t*)info->index.user + + (draws[i + total_offset].start << index_size_shift), size); + p->slot[i].start = (buffer_offset + offset) >> index_size_shift; + p->slot[i].count = count; + offset += size; + } + + total_offset += dr; + num_draws -= dr; } - memcpy(&p->draw, info, sizeof(*info)); - - if (indirect) { - tc_set_resource_reference(&p->draw.indirect->buffer, indirect->buffer); - tc_set_resource_reference(&p->indirect.indirect_draw_count, - indirect->indirect_draw_count); - memcpy(&p->indirect, indirect, sizeof(*indirect)); - p->draw.indirect = &p->indirect; + } else { + int total_offset = 0; + bool take_index_buffer_ownership = info->take_index_buffer_ownership; + while (num_draws) { + struct tc_batch *next = &tc->batch_slots[tc->next]; + + int nb_slots_left = TC_CALLS_PER_BATCH - next->num_total_call_slots; + /* If there isn't enough place for one draw, try to fill the next one */ + if (nb_slots_left < slots_for_one_draw) + nb_slots_left = TC_CALLS_PER_BATCH; + const int size_left_bytes = nb_slots_left * sizeof(struct tc_call); + + /* How many draws can we fit in the current batch */ + const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_payload_bytes); + + /* Non-indexed call or indexed with a real index buffer. */ + struct tc_draw_multi *p = + tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi, + dr); + if (index_size && !take_index_buffer_ownership) { + tc_set_resource_reference(&p->info.index.resource, + info->index.resource); + } + take_index_buffer_ownership = false; + memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX); + p->num_draws = dr; + memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr); + num_draws -= dr; + + total_offset += dr; } } } @@ -2313,20 +2814,22 @@ tc_invalidate_resource(struct pipe_context *_pipe, struct tc_clear { unsigned buffers; + struct pipe_scissor_state scissor_state; union pipe_color_union color; double depth; unsigned stencil; + bool scissor_state_set; }; static void tc_call_clear(struct pipe_context *pipe, union tc_payload *payload) { struct tc_clear *p = (struct tc_clear *)payload; - pipe->clear(pipe, p->buffers, &p->color, p->depth, p->stencil); + pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil); } static void -tc_clear(struct pipe_context *_pipe, unsigned buffers, +tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state, const union pipe_color_union *color, double depth, unsigned stencil) { @@ -2334,6 +2837,9 @@ tc_clear(struct pipe_context *_pipe, unsigned buffers, struct tc_clear *p = tc_add_struct_typed_call(tc, TC_CALL_clear, tc_clear); p->buffers = buffers; + if (scissor_state) + p->scissor_state = *scissor_state; + p->scissor_state_set = !!scissor_state; p->color = *color; p->depth = depth; p->stencil = stencil; @@ -2470,6 +2976,133 @@ tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res, return true; /* we don't care about the return value for this call */ } +static unsigned +tc_init_intel_perf_query_info(struct pipe_context *_pipe) +{ + struct threaded_context *tc = threaded_context(_pipe); + struct pipe_context *pipe = tc->pipe; + + return pipe->init_intel_perf_query_info(pipe); +} + +static void +tc_get_intel_perf_query_info(struct pipe_context *_pipe, + unsigned query_index, + const char **name, + uint32_t *data_size, + uint32_t *n_counters, + uint32_t *n_active) +{ + struct threaded_context *tc = threaded_context(_pipe); + struct pipe_context *pipe = tc->pipe; + + tc_sync(tc); /* n_active vs begin/end_intel_perf_query */ + pipe->get_intel_perf_query_info(pipe, query_index, name, data_size, + n_counters, n_active); +} + +static void +tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe, + unsigned query_index, + unsigned counter_index, + const char **name, + const char **desc, + uint32_t *offset, + uint32_t *data_size, + uint32_t *type_enum, + uint32_t *data_type_enum, + uint64_t *raw_max) +{ + struct threaded_context *tc = threaded_context(_pipe); + struct pipe_context *pipe = tc->pipe; + + pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index, + name, desc, offset, data_size, type_enum, data_type_enum, raw_max); +} + +static struct pipe_query * +tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index) +{ + struct threaded_context *tc = threaded_context(_pipe); + struct pipe_context *pipe = tc->pipe; + + return pipe->new_intel_perf_query_obj(pipe, query_index); +} + +static void +tc_call_begin_intel_perf_query(struct pipe_context *pipe, union tc_payload *payload) +{ + (void)pipe->begin_intel_perf_query(pipe, payload->query); +} + +static bool +tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q) +{ + struct threaded_context *tc = threaded_context(_pipe); + + tc_add_small_call(tc, TC_CALL_begin_intel_perf_query)->query = q; + + /* assume success, begin failure can be signaled from get_intel_perf_query_data */ + return true; +} + +static void +tc_call_end_intel_perf_query(struct pipe_context *pipe, union tc_payload *payload) +{ + pipe->end_intel_perf_query(pipe, payload->query); +} + +static void +tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q) +{ + struct threaded_context *tc = threaded_context(_pipe); + + tc_add_small_call(tc, TC_CALL_end_intel_perf_query)->query = q; +} + +static void +tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q) +{ + struct threaded_context *tc = threaded_context(_pipe); + struct pipe_context *pipe = tc->pipe; + + tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */ + pipe->delete_intel_perf_query(pipe, q); +} + +static void +tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q) +{ + struct threaded_context *tc = threaded_context(_pipe); + struct pipe_context *pipe = tc->pipe; + + tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */ + pipe->wait_intel_perf_query(pipe, q); +} + +static bool +tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q) +{ + struct threaded_context *tc = threaded_context(_pipe); + struct pipe_context *pipe = tc->pipe; + + tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */ + return pipe->is_intel_perf_query_ready(pipe, q); +} + +static bool +tc_get_intel_perf_query_data(struct pipe_context *_pipe, + struct pipe_query *q, + size_t data_size, + uint32_t *data, + uint32_t *bytes_written) +{ + struct threaded_context *tc = threaded_context(_pipe); + struct pipe_context *pipe = tc->pipe; + + tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */ + return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written); +} /******************************************************************** * callback @@ -2537,7 +3170,7 @@ tc_destroy(struct pipe_context *_pipe) slab_destroy_child(&tc->pool_transfers); assert(tc->batch_slots[tc->next].num_total_call_slots == 0); pipe->destroy(pipe); - os_free_aligned(tc); + FREE(tc); } static const tc_execute execute_func[TC_NUM_CALLS] = { @@ -2575,22 +3208,14 @@ threaded_context_create(struct pipe_context *pipe, util_cpu_detect(); - if (!debug_get_bool_option("GALLIUM_THREAD", util_cpu_caps.nr_cpus > 1)) + if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1)) return pipe; - tc = os_malloc_aligned(sizeof(struct threaded_context), 16); + tc = CALLOC_STRUCT(threaded_context); if (!tc) { pipe->destroy(pipe); return NULL; } - memset(tc, 0, sizeof(*tc)); - - assert((uintptr_t)tc % 16 == 0); - /* These should be static asserts, but they don't work with MSVC */ - assert(offsetof(struct threaded_context, batch_slots) % 16 == 0); - assert(offsetof(struct threaded_context, batch_slots[0].call) % 16 == 0); - assert(offsetof(struct threaded_context, batch_slots[0].call[1]) % 16 == 0); - assert(offsetof(struct threaded_context, batch_slots[1].call) % 16 == 0); /* The driver context isn't wrapped, so set its "priv" to NULL. */ pipe->priv = NULL; @@ -2600,6 +3225,8 @@ threaded_context_create(struct pipe_context *pipe, tc->create_fence = create_fence; tc->map_buffer_alignment = pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT); + tc->ubo_alignment = + MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64); tc->base.priv = pipe; /* priv points to the wrapped driver context */ tc->base.screen = pipe->screen; tc->base.destroy = tc_destroy; @@ -2614,6 +3241,8 @@ threaded_context_create(struct pipe_context *pipe, if (!tc->base.stream_uploader || !tc->base.const_uploader) goto fail; + tc->use_forced_staging_uploads = true; + /* The queue size is the number of batches "waiting". Batches are removed * from the queue before being executed, so keep one tc_batch slot for that * execution. Also, keep one unused slot for an unflushed batch. @@ -2623,7 +3252,7 @@ threaded_context_create(struct pipe_context *pipe, for (unsigned i = 0; i < TC_MAX_BATCHES; i++) { tc->batch_slots[i].sentinel = TC_SENTINEL; - tc->batch_slots[i].pipe = pipe; + tc->batch_slots[i].tc = tc; util_queue_fence_init(&tc->batch_slots[i].fence); } @@ -2696,8 +3325,10 @@ threaded_context_create(struct pipe_context *pipe, CTX_INIT(set_min_samples); CTX_INIT(set_clip_state); CTX_INIT(set_constant_buffer); + CTX_INIT(set_inlinable_constants); CTX_INIT(set_framebuffer_state); CTX_INIT(set_polygon_stipple); + CTX_INIT(set_sample_locations); CTX_INIT(set_scissor_states); CTX_INIT(set_viewport_states); CTX_INIT(set_window_rectangles); @@ -2743,6 +3374,17 @@ threaded_context_create(struct pipe_context *pipe, CTX_INIT(create_image_handle); CTX_INIT(delete_image_handle); CTX_INIT(make_image_handle_resident); + CTX_INIT(set_frontend_noop); + CTX_INIT(init_intel_perf_query_info); + CTX_INIT(get_intel_perf_query_info); + CTX_INIT(get_intel_perf_query_counter_info); + CTX_INIT(new_intel_perf_query_obj); + CTX_INIT(begin_intel_perf_query); + CTX_INIT(end_intel_perf_query); + CTX_INIT(delete_intel_perf_query); + CTX_INIT(wait_intel_perf_query); + CTX_INIT(is_intel_perf_query_ready); + CTX_INIT(get_intel_perf_query_data); #undef CTX_INIT if (out) diff --git a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h index a32f89359..8356b2401 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context.h @@ -77,17 +77,15 @@ * - stream_output_target_destroy * - transfer_map (only unsychronized buffer mappings) * - get_query_result (when threaded_query::flushed == true) - * - * Create calls causing a sync that can't be async due to driver limitations: * - create_stream_output_target * * * Transfer_map rules for buffer mappings * -------------------------------------- * - * 1) If transfer_map has PIPE_TRANSFER_UNSYNCHRONIZED, the call is made + * 1) If transfer_map has PIPE_MAP_UNSYNCHRONIZED, the call is made * in the non-driver thread without flushing the queue. The driver will - * receive TC_TRANSFER_MAP_THREADED_UNSYNC in addition to PIPE_TRANSFER_- + * receive TC_TRANSFER_MAP_THREADED_UNSYNC in addition to PIPE_MAP_- * UNSYNCHRONIZED to indicate this. * Note that transfer_unmap is always enqueued and called from the driver * thread. @@ -107,6 +105,9 @@ * indicate this. Ignoring the flag will lead to failures. * The threaded context uses its own buffer invalidation mechanism. * + * 4) PIPE_MAP_ONCE can no longer be used to infer that a buffer will not be mapped + * a second time before it is unmapped. + * * * Rules for fences * ---------------- @@ -181,17 +182,19 @@ #ifndef U_THREADED_CONTEXT_H #define U_THREADED_CONTEXT_H +#include "c11/threads.h" #include "pipe/p_context.h" #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_queue.h" #include "util/u_range.h" +#include "util/u_thread.h" #include "util/slab.h" struct threaded_context; struct tc_unflushed_batch_token; -/* These are transfer flags sent to drivers. */ +/* These are map flags sent to drivers. */ /* Never infer whether it's safe to use unsychronized mappings: */ #define TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED (1u << 29) /* Don't invalidate buffers: */ @@ -280,6 +283,14 @@ struct threaded_resource { * are too large for the visible VRAM window. */ int max_forced_staging_uploads; + + /* If positive, then a staging transfer is in progress. + */ + int pending_staging_uploads; + /* If staging uploads are pending, this will hold the union of the mapped + * ranges. + */ + struct util_range pending_staging_uploads_range; }; struct threaded_transfer { @@ -311,16 +322,10 @@ union tc_payload { struct pipe_transfer *transfer; struct pipe_fence_handle *fence; uint64_t handle; + bool boolean; }; -#ifdef _MSC_VER -#define ALIGN16 __declspec(align(16)) -#else -#define ALIGN16 __attribute__((aligned(16))) -#endif - -/* Each call slot should be aligned to its own size for optimal cache usage. */ -struct ALIGN16 tc_call { +struct tc_call { unsigned sentinel; ushort num_call_slots; ushort call_id; @@ -338,7 +343,7 @@ struct tc_unflushed_batch_token { }; struct tc_batch { - struct pipe_context *pipe; + struct threaded_context *tc; unsigned sentinel; unsigned num_total_call_slots; struct tc_unflushed_batch_token *token; @@ -353,6 +358,7 @@ struct threaded_context { tc_replace_buffer_storage_func replace_buffer_storage; tc_create_fence_func create_fence; unsigned map_buffer_alignment; + unsigned ubo_alignment; struct list_head unflushed_queries; @@ -361,9 +367,26 @@ struct threaded_context { unsigned num_direct_slots; unsigned num_syncs; + bool use_forced_staging_uploads; + + /* Estimation of how much vram/gtt bytes are mmap'd in + * the current tc_batch. + */ + uint64_t bytes_mapped_estimate; + uint64_t bytes_mapped_limit; + struct util_queue queue; struct util_queue_fence *fence; +#ifndef NDEBUG + /** + * The driver thread is normally the queue thread, but + * there are cases where the queue is flushed directly + * from the frontend thread + */ + thread_id driver_thread; +#endif + unsigned last, next; struct tc_batch batch_slots[TC_MAX_BATCHES]; }; @@ -384,6 +407,12 @@ threaded_context_flush(struct pipe_context *_pipe, struct tc_unflushed_batch_token *token, bool prefer_async); +void +tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count *draws, + unsigned num_draws); + static inline struct threaded_context * threaded_context(struct pipe_context *pipe) { @@ -417,4 +446,20 @@ tc_unflushed_batch_token_reference(struct tc_unflushed_batch_token **dst, *dst = src; } +/** + * Helper for !NDEBUG builds to assert that it is called from driver + * thread. This is to help drivers ensure that various code-paths + * are not hit indirectly from pipe entry points that are called from + * front-end/state-tracker thread. + */ +static inline void +tc_assert_driver_thread(struct threaded_context *tc) +{ + if (!tc) + return; +#ifndef NDEBUG + assert(util_thread_id_equal(tc->driver_thread, util_get_thread_id())); +#endif +} + #endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context_calls.h b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context_calls.h index e6ea1b574..f1607edb4 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_threaded_context_calls.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_threaded_context_calls.h @@ -11,6 +11,8 @@ CALL(bind_sampler_states) CALL(set_framebuffer_state) CALL(set_tess_state) CALL(set_constant_buffer) +CALL(set_inlinable_constants) +CALL(set_sample_locations) CALL(set_scissor_states) CALL(set_viewport_states) CALL(set_window_rectangles) @@ -25,7 +27,9 @@ CALL(transfer_unmap) CALL(buffer_subdata) CALL(texture_subdata) CALL(emit_string_marker) -CALL(draw_vbo) +CALL(draw_single) +CALL(draw_multi) +CALL(draw_indirect) CALL(launch_grid) CALL(resource_copy_region) CALL(blit) @@ -50,6 +54,7 @@ CALL(make_texture_handle_resident) CALL(delete_image_handle) CALL(make_image_handle_resident) CALL(set_context_param) +CALL(set_frontend_noop) CALL(bind_blend_state) CALL(bind_rasterizer_state) @@ -73,3 +78,6 @@ CALL(delete_tcs_state) CALL(delete_tes_state) CALL(delete_vertex_elements_state) CALL(delete_sampler_state) + +CALL(begin_intel_perf_query) +CALL(end_intel_perf_query) diff --git a/lib/mesa/src/gallium/auxiliary/util/u_upload_mgr.c b/lib/mesa/src/gallium/auxiliary/util/u_upload_mgr.c index 4ac468524..95f434b36 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_upload_mgr.c @@ -45,15 +45,16 @@ struct u_upload_mgr { unsigned bind; /* Bitmask of PIPE_BIND_* flags. */ enum pipe_resource_usage usage; unsigned flags; - unsigned map_flags; /* Bitmask of PIPE_TRANSFER_* flags. */ + unsigned map_flags; /* Bitmask of PIPE_MAP_* flags. */ boolean map_persistent; /* If persistent mappings are supported. */ struct pipe_resource *buffer; /* Upload buffer. */ struct pipe_transfer *transfer; /* Transfer object for the upload buffer. */ uint8_t *map; /* Pointer to the mapped upload buffer. */ + unsigned buffer_size; /* Same as buffer->width0. */ unsigned offset; /* Aligned offset to the upload buffer, pointing * at the first unused byte. */ - unsigned flushed_size; /* Size we have flushed by transfer_flush_region. */ + int buffer_private_refcount; }; @@ -76,15 +77,15 @@ u_upload_create(struct pipe_context *pipe, unsigned default_size, PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT); if (upload->map_persistent) { - upload->map_flags = PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_UNSYNCHRONIZED | - PIPE_TRANSFER_PERSISTENT | - PIPE_TRANSFER_COHERENT; + upload->map_flags = PIPE_MAP_WRITE | + PIPE_MAP_UNSYNCHRONIZED | + PIPE_MAP_PERSISTENT | + PIPE_MAP_COHERENT; } else { - upload->map_flags = PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_UNSYNCHRONIZED | - PIPE_TRANSFER_FLUSH_EXPLICIT; + upload->map_flags = PIPE_MAP_WRITE | + PIPE_MAP_UNSYNCHRONIZED | + PIPE_MAP_FLUSH_EXPLICIT; } return upload; @@ -108,53 +109,34 @@ u_upload_clone(struct pipe_context *pipe, struct u_upload_mgr *upload) upload->flags); if (!upload->map_persistent && result->map_persistent) u_upload_disable_persistent(result); - else if (upload->map_persistent && - upload->map_flags & PIPE_TRANSFER_FLUSH_EXPLICIT) - u_upload_enable_flush_explicit(result); return result; } void -u_upload_enable_flush_explicit(struct u_upload_mgr *upload) -{ - assert(upload->map_persistent); - upload->map_flags &= ~PIPE_TRANSFER_COHERENT; - upload->map_flags |= PIPE_TRANSFER_FLUSH_EXPLICIT; -} - -void u_upload_disable_persistent(struct u_upload_mgr *upload) { upload->map_persistent = FALSE; - upload->map_flags &= ~(PIPE_TRANSFER_COHERENT | PIPE_TRANSFER_PERSISTENT); - upload->map_flags |= PIPE_TRANSFER_FLUSH_EXPLICIT; + upload->map_flags &= ~(PIPE_MAP_COHERENT | PIPE_MAP_PERSISTENT); + upload->map_flags |= PIPE_MAP_FLUSH_EXPLICIT; } static void upload_unmap_internal(struct u_upload_mgr *upload, boolean destroying) { - if (!upload->transfer) + if ((!destroying && upload->map_persistent) || !upload->transfer) return; - if (upload->map_flags & PIPE_TRANSFER_FLUSH_EXPLICIT) { - struct pipe_box *box = &upload->transfer->box; - unsigned flush_offset = box->x + upload->flushed_size; + struct pipe_box *box = &upload->transfer->box; - if (upload->offset > flush_offset) { - pipe_buffer_flush_mapped_range(upload->pipe, upload->transfer, - flush_offset, - upload->offset - flush_offset); - upload->flushed_size = upload->offset; - } + if (!upload->map_persistent && (int) upload->offset > box->x) { + pipe_buffer_flush_mapped_range(upload->pipe, upload->transfer, + box->x, upload->offset - box->x); } - if (destroying || !upload->map_persistent) { - pipe_transfer_unmap(upload->pipe, upload->transfer); - upload->transfer = NULL; - upload->map = NULL; - upload->flushed_size = 0; - } + pipe_transfer_unmap(upload->pipe, upload->transfer); + upload->transfer = NULL; + upload->map = NULL; } @@ -170,7 +152,17 @@ u_upload_release_buffer(struct u_upload_mgr *upload) { /* Unmap and unreference the upload buffer. */ upload_unmap_internal(upload, TRUE); + if (upload->buffer_private_refcount) { + /* Subtract the remaining private references before unreferencing + * the buffer. The mega comment below explains it. + */ + assert(upload->buffer_private_refcount > 0); + p_atomic_add(&upload->buffer->reference.count, + -upload->buffer_private_refcount); + upload->buffer_private_refcount = 0; + } pipe_resource_reference(&upload->buffer, NULL); + upload->buffer_size = 0; } @@ -181,8 +173,8 @@ u_upload_destroy(struct u_upload_mgr *upload) FREE(upload); } - -static void +/* Return the allocated buffer size or 0 if it failed. */ +static unsigned u_upload_alloc_buffer(struct u_upload_mgr *upload, unsigned min_size) { struct pipe_screen *screen = upload->pipe->screen; @@ -215,19 +207,47 @@ u_upload_alloc_buffer(struct u_upload_mgr *upload, unsigned min_size) upload->buffer = screen->resource_create(screen, &buffer); if (upload->buffer == NULL) - return; + return 0; + + /* Since atomic operations are very very slow when 2 threads are not + * sharing the same L3 cache (which happens on AMD Zen), eliminate all + * atomics in u_upload_alloc as follows: + * + * u_upload_alloc has to return a buffer reference to the caller. + * Instead of atomic_inc for every call, it does all possible future + * increments in advance here. The maximum number of times u_upload_alloc + * can be called per upload buffer is "size", because the minimum + * allocation size is 1, thus u_upload_alloc can only return "size" number + * of suballocations at most, so we will never need more. This is + * the number that is added to reference.count here. + * + * buffer_private_refcount tracks how many buffer references we can return + * without using atomics. If the buffer is full and there are still + * references left, they are atomically subtracted from reference.count + * before the buffer is unreferenced. + * + * This technique can increase CPU performance by 10%. + * + * The caller of u_upload_alloc_buffer will consume min_size bytes, + * so init the buffer_private_refcount to 1 + size - min_size, instead + * of size to avoid overflowing reference.count when size is huge. + */ + upload->buffer_private_refcount = 1 + (size - min_size); + assert(upload->buffer_private_refcount < INT32_MAX / 2); + p_atomic_add(&upload->buffer->reference.count, upload->buffer_private_refcount); /* Map the new buffer. */ upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer, 0, size, upload->map_flags, &upload->transfer); if (upload->map == NULL) { - upload->transfer = NULL; - pipe_resource_reference(&upload->buffer, NULL); - return; + u_upload_release_buffer(upload); + return 0; } + upload->buffer_size = size; upload->offset = 0; + return size; } void @@ -239,29 +259,25 @@ u_upload_alloc(struct u_upload_mgr *upload, struct pipe_resource **outbuf, void **ptr) { - unsigned buffer_size = upload->buffer ? upload->buffer->width0 : 0; - unsigned offset; + unsigned buffer_size = upload->buffer_size; + unsigned offset = MAX2(min_out_offset, upload->offset); - min_out_offset = align(min_out_offset, alignment); - - offset = align(upload->offset, alignment); - offset = MAX2(offset, min_out_offset); + offset = align(offset, alignment); /* Make sure we have enough space in the upload buffer * for the sub-allocation. */ - if (unlikely(!upload->buffer || offset + size > buffer_size)) { - u_upload_alloc_buffer(upload, min_out_offset + size); + if (unlikely(offset + size > buffer_size)) { + /* Allocate a new buffer and set the offset to the smallest one. */ + offset = align(min_out_offset, alignment); + buffer_size = u_upload_alloc_buffer(upload, offset + size); - if (unlikely(!upload->buffer)) { + if (unlikely(!buffer_size)) { *out_offset = ~0; pipe_resource_reference(outbuf, NULL); *ptr = NULL; return; } - - offset = min_out_offset; - buffer_size = upload->buffer->width0; } if (unlikely(!upload->map)) { @@ -287,9 +303,15 @@ u_upload_alloc(struct u_upload_mgr *upload, /* Emit the return values: */ *ptr = upload->map + offset; - pipe_resource_reference(outbuf, upload->buffer); *out_offset = offset; + if (*outbuf != upload->buffer) { + pipe_resource_reference(outbuf, NULL); + *outbuf = upload->buffer; + assert (upload->buffer_private_refcount > 0); + upload->buffer_private_refcount--; + } + upload->offset = offset + size; } diff --git a/lib/mesa/src/gallium/auxiliary/util/u_upload_mgr.h b/lib/mesa/src/gallium/auxiliary/util/u_upload_mgr.h index 6a4a60963..edfa0ce5f 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_upload_mgr.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_upload_mgr.h @@ -69,10 +69,6 @@ u_upload_create_default(struct pipe_context *pipe); struct u_upload_mgr * u_upload_clone(struct pipe_context *pipe, struct u_upload_mgr *upload); -/** Whether to use FLUSH_EXPLICIT with persistent mappings. */ -void -u_upload_enable_flush_explicit(struct u_upload_mgr *upload); - /** Whether to avoid persistent mappings where available */ void u_upload_disable_persistent(struct u_upload_mgr *upload); diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c index 30c4d18f6..372d61ea0 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c +++ b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.c @@ -131,6 +131,9 @@ struct u_vbuf_elements { * non-instanced. */ uint32_t noninstance_vb_mask_any; + /* Which buffers are used by multiple vertex attribs. */ + uint32_t interleaved_vb_mask; + void *driver_cso; }; @@ -147,16 +150,13 @@ struct u_vbuf { struct pipe_context *pipe; struct translate_cache *translate_cache; - struct cso_cache *cso_cache; + struct cso_cache cso_cache; /* This is what was set in set_vertex_buffers. * May contain user buffers. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; uint32_t enabled_vb_mask; - /* Saved vertex buffer. */ - struct pipe_vertex_buffer vertex_buffer0_saved; - /* Vertex buffers for the driver. * There are usually no user buffers. */ struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS]; @@ -167,13 +167,14 @@ struct u_vbuf { struct u_vbuf_elements *ve, *ve_saved; /* Vertex elements used for the translate fallback. */ - struct pipe_vertex_element fallback_velems[PIPE_MAX_ATTRIBS]; + struct cso_velems_state fallback_velems; /* If non-NULL, this is a vertex element state used for the translate * fallback and therefore used for rendering too. */ boolean using_translate; /* The vertex buffer slot index where translated vertices have been * stored in. */ unsigned fallback_vbs[VB_NUM]; + unsigned fallback_vbs_mask; /* Which buffer is a user buffer. */ uint32_t user_vb_mask; /* each bit describes a corresp. buffer */ @@ -188,7 +189,8 @@ struct u_vbuf { static void * u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, const struct pipe_vertex_element *attribs); -static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso); +static void u_vbuf_delete_vertex_elements(void *ctx, void *state, + enum cso_cache_type type); static const struct { enum pipe_format from, to; @@ -255,7 +257,8 @@ static const struct { { PIPE_FORMAT_R8G8B8A8_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT }, }; -void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps) +void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps, + bool needs64b) { unsigned i; @@ -271,6 +274,10 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps) for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) { enum pipe_format format = vbuf_format_fallbacks[i].from; + unsigned comp_bits = util_format_get_component_bits(format, 0, 0); + + if ((comp_bits > 32) && !needs64b) + continue; if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0, 0, PIPE_BIND_VERTEX_BUFFER)) { @@ -313,7 +320,6 @@ u_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps) mgr->caps = *caps; mgr->pipe = pipe; - mgr->cso_cache = cso_cache_create(); mgr->translate_cache = translate_cache_create(); memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs)); mgr->allowed_vb_mask = u_bit_consecutive(0, mgr->caps.max_vertex_buffers); @@ -322,38 +328,38 @@ u_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps) pipe->screen->get_param(pipe->screen, PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET); + cso_cache_init(&mgr->cso_cache, pipe); + cso_cache_set_delete_cso_callback(&mgr->cso_cache, + u_vbuf_delete_vertex_elements, pipe); + return mgr; } /* u_vbuf uses its own caching for vertex elements, because it needs to keep * its own preprocessed state per vertex element CSO. */ static struct u_vbuf_elements * -u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count, - const struct pipe_vertex_element *states) +u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, + const struct cso_velems_state *velems) { struct pipe_context *pipe = mgr->pipe; unsigned key_size, hash_key; struct cso_hash_iter iter; struct u_vbuf_elements *ve; - struct cso_velems_state velems_state; /* need to include the count into the stored state data too. */ - key_size = sizeof(struct pipe_vertex_element) * count + sizeof(unsigned); - velems_state.count = count; - memcpy(velems_state.velems, states, - sizeof(struct pipe_vertex_element) * count); - hash_key = cso_construct_key((void*)&velems_state, key_size); - iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS, - (void*)&velems_state, key_size); + key_size = sizeof(struct pipe_vertex_element) * velems->count + + sizeof(unsigned); + hash_key = cso_construct_key((void*)velems, key_size); + iter = cso_find_state_template(&mgr->cso_cache, hash_key, CSO_VELEMENTS, + (void*)velems, key_size); if (cso_hash_iter_is_null(iter)) { struct cso_velements *cso = MALLOC_STRUCT(cso_velements); - memcpy(&cso->state, &velems_state, key_size); - cso->data = u_vbuf_create_vertex_elements(mgr, count, states); - cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements; - cso->context = (void*)mgr; + memcpy(&cso->state, velems, key_size); + cso->data = u_vbuf_create_vertex_elements(mgr, velems->count, + velems->velems); - iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso); + iter = cso_insert_state(&mgr->cso_cache, hash_key, CSO_VELEMENTS, cso); ve = cso->data; } else { ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data; @@ -367,10 +373,10 @@ u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr, unsigned count, return ve; } -void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count, - const struct pipe_vertex_element *states) +void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, + const struct cso_velems_state *velems) { - mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, count, states); + mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, velems); } void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr) @@ -385,23 +391,22 @@ void u_vbuf_destroy(struct u_vbuf *mgr) const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_MAX_INPUTS); - mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL); + mgr->pipe->set_vertex_buffers(mgr->pipe, 0, 0, num_vb, false, NULL); for (i = 0; i < PIPE_MAX_ATTRIBS; i++) pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]); for (i = 0; i < PIPE_MAX_ATTRIBS; i++) pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]); - pipe_vertex_buffer_unreference(&mgr->vertex_buffer0_saved); - translate_cache_destroy(mgr->translate_cache); - cso_cache_delete(mgr->cso_cache); + cso_cache_delete(&mgr->cso_cache); FREE(mgr); } static enum pipe_error u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, const struct pipe_draw_info *info, + const struct pipe_draw_start_count *draw, unsigned vb_mask, unsigned out_vb, int start_vertex, unsigned num_vertices, int min_index, boolean unroll_indices) @@ -432,8 +437,23 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, unsigned size = vb->stride ? num_vertices * vb->stride : sizeof(double)*4; - if (!vb->buffer.resource) + if (!vb->buffer.resource) { + static uint64_t dummy_buf[4] = { 0 }; + tr->set_buffer(tr, i, dummy_buf, 0, 0); continue; + } + + if (vb->stride) { + /* the stride cannot be used to calculate the map size of the buffer, + * as it only determines the bytes between elements, not the size of elements + * themselves, meaning that if stride < element_size, the mapped size will + * be too small and conversion will overrun the map buffer + * + * instead, add the size of the largest possible attribute to ensure the map is large enough + */ + unsigned last_offset = offset + size - vb->stride; + size = MAX2(size, last_offset + sizeof(double)*4); + } if (offset + size > vb->buffer.resource->width0) { /* Don't try to map past end of buffer. This often happens when @@ -454,7 +474,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, } map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size, - PIPE_TRANSFER_READ, &vb_transfer[i]); + PIPE_MAP_READ, &vb_transfer[i]); } /* Subtract min_index so that indexing with the index buffer works. */ @@ -468,12 +488,12 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, /* Translate. */ if (unroll_indices) { struct pipe_transfer *transfer = NULL; - const unsigned offset = info->start * info->index_size; + const unsigned offset = draw->start * info->index_size; uint8_t *map; /* Create and map the output buffer. */ u_upload_alloc(mgr->pipe->stream_uploader, 0, - key->output_stride * info->count, 4, + key->output_stride * draw->count, 4, &out_offset, &out_buffer, (void**)&out_map); if (!out_buffer) @@ -483,19 +503,19 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, map = (uint8_t*)info->index.user + offset; } else { map = pipe_buffer_map_range(mgr->pipe, info->index.resource, offset, - info->count * info->index_size, - PIPE_TRANSFER_READ, &transfer); + draw->count * info->index_size, + PIPE_MAP_READ, &transfer); } switch (info->index_size) { case 4: - tr->run_elts(tr, (unsigned*)map, info->count, 0, 0, out_map); + tr->run_elts(tr, (unsigned*)map, draw->count, 0, 0, out_map); break; case 2: - tr->run_elts16(tr, (uint16_t*)map, info->count, 0, 0, out_map); + tr->run_elts16(tr, (uint16_t*)map, draw->count, 0, 0, out_map); break; case 1: - tr->run_elts8(tr, map, info->count, 0, 0, out_map); + tr->run_elts8(tr, map, draw->count, 0, 0, out_map); break; } @@ -558,6 +578,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, return FALSE; memset(fallback_vbs, ~0, sizeof(fallback_vbs)); + mgr->fallback_vbs_mask = 0; /* Find free slots for each type if needed. */ unused_vb_mask_orig = unused_vb_mask; @@ -572,6 +593,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, index = ffs(unused_vb_mask) - 1; fallback_vbs[type] = index; + mgr->fallback_vbs_mask |= 1 << index; unused_vb_mask &= ~(1 << index); /*printf("found slot=%i for type=%i\n", index, type);*/ } @@ -583,6 +605,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, uint32_t index = ffs(unused_vb_mask_orig) - 1; /* When sharing one vertex buffer use per-vertex frequency for everything. */ fallback_vbs[VB_VERTEX] = index; + mgr->fallback_vbs_mask = 1 << index; mask[VB_VERTEX] = mask[VB_VERTEX] | mask[VB_CONST] | mask[VB_INSTANCE]; mask[VB_CONST] = 0; mask[VB_INSTANCE] = 0; @@ -601,6 +624,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, static boolean u_vbuf_translate_begin(struct u_vbuf *mgr, const struct pipe_draw_info *info, + const struct pipe_draw_start_count *draw, int start_vertex, unsigned num_vertices, int min_index, boolean unroll_indices) { @@ -707,8 +731,8 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, for (type = 0; type < VB_NUM; type++) { if (key[type].nr_elements) { enum pipe_error err; - err = u_vbuf_translate_buffers(mgr, &key[type], info, mask[type], - mgr->fallback_vbs[type], + err = u_vbuf_translate_buffers(mgr, &key[type], info, draw, + mask[type], mgr->fallback_vbs[type], start[type], num[type], min_index, unroll_indices && type == VB_VERTEX); if (err != PIPE_OK) @@ -726,10 +750,10 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, for (type = 0; type < VB_NUM; type++) { if (elem_index[type][i] < key[type].nr_elements) { struct translate_element *te = &key[type].element[elem_index[type][i]]; - mgr->fallback_velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor; - mgr->fallback_velems[i].src_format = te->output_format; - mgr->fallback_velems[i].src_offset = te->output_offset; - mgr->fallback_velems[i].vertex_buffer_index = mgr->fallback_vbs[type]; + mgr->fallback_velems.velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor; + mgr->fallback_velems.velems[i].src_format = te->output_format; + mgr->fallback_velems.velems[i].src_offset = te->output_offset; + mgr->fallback_velems.velems[i].vertex_buffer_index = mgr->fallback_vbs[type]; /* elem_index[type][i] can only be set for one type. */ assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u); @@ -739,13 +763,14 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, } /* No translating, just copy the original vertex element over. */ if (type == VB_NUM) { - memcpy(&mgr->fallback_velems[i], &mgr->ve->ve[i], + memcpy(&mgr->fallback_velems.velems[i], &mgr->ve->ve[i], sizeof(struct pipe_vertex_element)); } } - u_vbuf_set_vertex_elements_internal(mgr, mgr->ve->count, - mgr->fallback_velems); + mgr->fallback_velems.count = mgr->ve->count; + + u_vbuf_set_vertex_elements_internal(mgr, &mgr->fallback_velems); mgr->using_translate = TRUE; return TRUE; } @@ -764,11 +789,11 @@ static void u_vbuf_translate_end(struct u_vbuf *mgr) if (vb != ~0u) { pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer.resource, NULL); mgr->fallback_vbs[i] = ~0; - - /* This will cause the buffer to be unbound in the driver later. */ - mgr->dirty_real_vb_mask |= 1 << vb; } } + /* This will cause the buffer to be unbound in the driver later. */ + mgr->dirty_real_vb_mask |= mgr->fallback_vbs_mask; + mgr->fallback_vbs_mask = 0; } static void * @@ -790,13 +815,17 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, * supported. */ for (i = 0; i < count; i++) { enum pipe_format format = ve->ve[i].src_format; + unsigned vb_index_bit = 1 << ve->ve[i].vertex_buffer_index; ve->src_format_size[i] = util_format_get_blocksize(format); - used_buffers |= 1 << ve->ve[i].vertex_buffer_index; + if (used_buffers & vb_index_bit) + ve->interleaved_vb_mask |= vb_index_bit; + + used_buffers |= vb_index_bit; if (!ve->ve[i].instance_divisor) { - ve->noninstance_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; + ve->noninstance_vb_mask_any |= vb_index_bit; } format = mgr->caps.format_translation[format]; @@ -810,9 +839,9 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, (!mgr->caps.velem_src_offset_unaligned && ve->ve[i].src_offset % 4 != 0)) { ve->incompatible_elem_mask |= 1 << i; - ve->incompatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; + ve->incompatible_vb_mask_any |= vb_index_bit; } else { - ve->compatible_vb_mask_any |= 1 << ve->ve[i].vertex_buffer_index; + ve->compatible_vb_mask_any |= vb_index_bit; } } @@ -848,18 +877,23 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, return ve; } -static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso) +static void u_vbuf_delete_vertex_elements(void *ctx, void *state, + enum cso_cache_type type) { - struct pipe_context *pipe = mgr->pipe; - struct u_vbuf_elements *ve = cso; + struct pipe_context *pipe = (struct pipe_context*)ctx; + struct cso_velements *cso = (struct cso_velements*)state; + struct u_vbuf_elements *ve = (struct u_vbuf_elements*)cso->data; if (ve->driver_cso) pipe->delete_vertex_elements_state(pipe, ve->driver_cso); FREE(ve); + FREE(cso); } void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, unsigned start_slot, unsigned count, + unsigned unbind_num_trailing_slots, + bool take_ownership, const struct pipe_vertex_buffer *bufs) { unsigned i; @@ -871,7 +905,8 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, uint32_t incompatible_vb_mask = 0; /* which buffers have a non-zero stride */ uint32_t nonzero_stride_vb_mask = 0; - const uint32_t mask = ~(((1ull << count) - 1) << start_slot); + const uint32_t mask = + ~(((1ull << (count + unbind_num_trailing_slots)) - 1) << start_slot); /* Zero out the bits we are going to rewrite completely. */ mgr->user_vb_mask &= mask; @@ -882,16 +917,18 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, if (!bufs) { struct pipe_context *pipe = mgr->pipe; /* Unbind. */ + unsigned total_count = count + unbind_num_trailing_slots; mgr->dirty_real_vb_mask &= mask; - for (i = 0; i < count; i++) { + for (i = 0; i < total_count; i++) { unsigned dst_index = start_slot + i; pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]); pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]); } - pipe->set_vertex_buffers(pipe, start_slot, count, NULL); + pipe->set_vertex_buffers(pipe, start_slot, count, + unbind_num_trailing_slots, false, NULL); return; } @@ -907,7 +944,12 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, continue; } - pipe_vertex_buffer_reference(orig_vb, vb); + if (take_ownership) { + pipe_vertex_buffer_unreference(orig_vb); + memcpy(orig_vb, vb, sizeof(*vb)); + } else { + pipe_vertex_buffer_reference(orig_vb, vb); + } if (vb->stride) { nonzero_stride_vb_mask |= 1 << dst_index; @@ -936,6 +978,13 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, pipe_vertex_buffer_reference(real_vb, vb); } + for (i = 0; i < unbind_num_trailing_slots; i++) { + unsigned dst_index = start_slot + count + i; + + pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]); + pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]); + } + mgr->user_vb_mask |= user_vb_mask; mgr->incompatible_vb_mask |= incompatible_vb_mask; mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask; @@ -946,62 +995,104 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, mgr->dirty_real_vb_mask |= ~mask; } +static ALWAYS_INLINE bool +get_upload_offset_size(struct u_vbuf *mgr, + const struct pipe_vertex_buffer *vb, + struct u_vbuf_elements *ve, + const struct pipe_vertex_element *velem, + unsigned vb_index, unsigned velem_index, + int start_vertex, unsigned num_vertices, + int start_instance, unsigned num_instances, + unsigned *offset, unsigned *size) +{ + /* Skip the buffers generated by translate. */ + if ((1 << vb_index) & mgr->fallback_vbs_mask || !vb->is_user_buffer) + return false; + + unsigned instance_div = velem->instance_divisor; + *offset = vb->buffer_offset + velem->src_offset; + + if (!vb->stride) { + /* Constant attrib. */ + *size = ve->src_format_size[velem_index]; + } else if (instance_div) { + /* Per-instance attrib. */ + + /* Figure out how many instances we'll render given instance_div. We + * can't use the typical div_round_up() pattern because the CTS uses + * instance_div = ~0 for a test, which overflows div_round_up()'s + * addition. + */ + unsigned count = num_instances / instance_div; + if (count * instance_div != num_instances) + count++; + + *offset += vb->stride * start_instance; + *size = vb->stride * (count - 1) + ve->src_format_size[velem_index]; + } else { + /* Per-vertex attrib. */ + *offset += vb->stride * start_vertex; + *size = vb->stride * (num_vertices - 1) + ve->src_format_size[velem_index]; + } + return true; +} + + static enum pipe_error u_vbuf_upload_buffers(struct u_vbuf *mgr, int start_vertex, unsigned num_vertices, int start_instance, unsigned num_instances) { unsigned i; - unsigned nr_velems = mgr->ve->count; + struct u_vbuf_elements *ve = mgr->ve; + unsigned nr_velems = ve->count; const struct pipe_vertex_element *velems = - mgr->using_translate ? mgr->fallback_velems : mgr->ve->ve; + mgr->using_translate ? mgr->fallback_velems.velems : ve->ve; + + /* Faster path when no vertex attribs are interleaved. */ + if ((ve->interleaved_vb_mask & mgr->user_vb_mask) == 0) { + for (i = 0; i < nr_velems; i++) { + const struct pipe_vertex_element *velem = &velems[i]; + unsigned index = velem->vertex_buffer_index; + struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index]; + unsigned offset, size; + + if (!get_upload_offset_size(mgr, vb, ve, velem, index, i, start_vertex, + num_vertices, start_instance, num_instances, + &offset, &size)) + continue; + + struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[index]; + const uint8_t *ptr = mgr->vertex_buffer[index].buffer.user; + + u_upload_data(mgr->pipe->stream_uploader, + mgr->has_signed_vb_offset ? 0 : offset, + size, 4, ptr + offset, &real_vb->buffer_offset, + &real_vb->buffer.resource); + if (!real_vb->buffer.resource) + return PIPE_ERROR_OUT_OF_MEMORY; + + real_vb->buffer_offset -= offset; + } + return PIPE_OK; + } + unsigned start_offset[PIPE_MAX_ATTRIBS]; unsigned end_offset[PIPE_MAX_ATTRIBS]; uint32_t buffer_mask = 0; + /* Slower path supporting interleaved vertex attribs using 2 loops. */ /* Determine how much data needs to be uploaded. */ for (i = 0; i < nr_velems; i++) { const struct pipe_vertex_element *velem = &velems[i]; unsigned index = velem->vertex_buffer_index; struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index]; - unsigned instance_div, first, size, index_bit; - - /* Skip the buffers generated by translate. */ - if (index == mgr->fallback_vbs[VB_VERTEX] || - index == mgr->fallback_vbs[VB_INSTANCE] || - index == mgr->fallback_vbs[VB_CONST]) { - continue; - } + unsigned first, size, index_bit; - if (!vb->is_user_buffer) { + if (!get_upload_offset_size(mgr, vb, ve, velem, index, i, start_vertex, + num_vertices, start_instance, num_instances, + &first, &size)) continue; - } - - instance_div = velem->instance_divisor; - first = vb->buffer_offset + velem->src_offset; - - if (!vb->stride) { - /* Constant attrib. */ - size = mgr->ve->src_format_size[i]; - } else if (instance_div) { - /* Per-instance attrib. */ - - /* Figure out how many instances we'll render given instance_div. We - * can't use the typical div_round_up() pattern because the CTS uses - * instance_div = ~0 for a test, which overflows div_round_up()'s - * addition. - */ - unsigned count = num_instances / instance_div; - if (count * instance_div != num_instances) - count++; - - first += vb->stride * start_instance; - size = vb->stride * (count - 1) + mgr->ve->src_format_size[i]; - } else { - /* Per-vertex attrib. */ - first += vb->stride * start_vertex; - size = vb->stride * (num_vertices - 1) + mgr->ve->src_format_size[i]; - } index_bit = 1 << index; @@ -1076,10 +1167,11 @@ static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr) static void u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info, + unsigned count, const void *indices, unsigned *out_min_index, unsigned *out_max_index) { - if (!info->count) { + if (!count) { *out_min_index = 0; *out_max_index = 0; return; @@ -1091,7 +1183,7 @@ u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info, unsigned max = 0; unsigned min = ~0u; if (info->primitive_restart) { - for (unsigned i = 0; i < info->count; i++) { + for (unsigned i = 0; i < count; i++) { if (ui_indices[i] != info->restart_index) { if (ui_indices[i] > max) max = ui_indices[i]; if (ui_indices[i] < min) min = ui_indices[i]; @@ -1099,7 +1191,7 @@ u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info, } } else { - for (unsigned i = 0; i < info->count; i++) { + for (unsigned i = 0; i < count; i++) { if (ui_indices[i] > max) max = ui_indices[i]; if (ui_indices[i] < min) min = ui_indices[i]; } @@ -1113,7 +1205,7 @@ u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info, unsigned short max = 0; unsigned short min = ~((unsigned short)0); if (info->primitive_restart) { - for (unsigned i = 0; i < info->count; i++) { + for (unsigned i = 0; i < count; i++) { if (us_indices[i] != info->restart_index) { if (us_indices[i] > max) max = us_indices[i]; if (us_indices[i] < min) min = us_indices[i]; @@ -1121,7 +1213,7 @@ u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info, } } else { - for (unsigned i = 0; i < info->count; i++) { + for (unsigned i = 0; i < count; i++) { if (us_indices[i] > max) max = us_indices[i]; if (us_indices[i] < min) min = us_indices[i]; } @@ -1135,7 +1227,7 @@ u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info, unsigned char max = 0; unsigned char min = ~((unsigned char)0); if (info->primitive_restart) { - for (unsigned i = 0; i < info->count; i++) { + for (unsigned i = 0; i < count; i++) { if (ub_indices[i] != info->restart_index) { if (ub_indices[i] > max) max = ub_indices[i]; if (ub_indices[i] < min) min = ub_indices[i]; @@ -1143,7 +1235,7 @@ u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info, } } else { - for (unsigned i = 0; i < info->count; i++) { + for (unsigned i = 0; i < count; i++) { if (ub_indices[i] > max) max = ub_indices[i]; if (ub_indices[i] < min) min = ub_indices[i]; } @@ -1153,12 +1245,13 @@ u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info, break; } default: - assert(0); + unreachable("bad index size"); } } void u_vbuf_get_minmax_index(struct pipe_context *pipe, const struct pipe_draw_info *info, + const struct pipe_draw_start_count *draw, unsigned *out_min_index, unsigned *out_max_index) { struct pipe_transfer *transfer = NULL; @@ -1166,15 +1259,16 @@ void u_vbuf_get_minmax_index(struct pipe_context *pipe, if (info->has_user_indices) { indices = (uint8_t*)info->index.user + - info->start * info->index_size; + draw->start * info->index_size; } else { indices = pipe_buffer_map_range(pipe, info->index.resource, - info->start * info->index_size, - info->count * info->index_size, - PIPE_TRANSFER_READ, &transfer); + draw->start * info->index_size, + draw->count * info->index_size, + PIPE_MAP_READ, &transfer); } - u_vbuf_get_minmax_index_mapped(info, indices, out_min_index, out_max_index); + u_vbuf_get_minmax_index_mapped(info, draw->count, indices, + out_min_index, out_max_index); if (transfer) { pipe_buffer_unmap(pipe, transfer); @@ -1189,7 +1283,7 @@ static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr) start_slot = ffs(mgr->dirty_real_vb_mask) - 1; count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot); - pipe->set_vertex_buffers(pipe, start_slot, count, + pipe->set_vertex_buffers(pipe, start_slot, count, 0, false, mgr->real_vertex_buffer + start_slot); mgr->dirty_real_vb_mask = 0; } @@ -1199,27 +1293,31 @@ u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info, unsigned *indirect_data, unsigned stride, unsigned draw_count) { + /* Increase refcount to be able to use take_index_buffer_ownership with + * all draws. + */ + if (draw_count > 1 && info->take_index_buffer_ownership) + p_atomic_add(&info->index.resource->reference.count, draw_count - 1); + assert(info->index_size); - info->indirect = NULL; for (unsigned i = 0; i < draw_count; i++) { + struct pipe_draw_start_count draw; unsigned offset = i * stride / 4; - info->count = indirect_data[offset + 0]; + draw.count = indirect_data[offset + 0]; info->instance_count = indirect_data[offset + 1]; - - if (!info->count || !info->instance_count) - continue; - - info->start = indirect_data[offset + 2]; + draw.start = indirect_data[offset + 2]; info->index_bias = indirect_data[offset + 3]; info->start_instance = indirect_data[offset + 4]; - u_vbuf_draw_vbo(mgr, info); + u_vbuf_draw_vbo(mgr, info, NULL, draw); } } -void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) +void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count draw) { struct pipe_context *pipe = mgr->pipe; int start_vertex; @@ -1231,6 +1329,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) const uint32_t incompatible_vb_mask = mgr->incompatible_vb_mask & used_vb_mask; struct pipe_draw_info new_info; + struct pipe_draw_start_count new_draw; /* Normal draw. No fallback and no user buffers. */ if (!incompatible_vb_mask && @@ -1242,15 +1341,15 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) u_vbuf_set_driver_vertex_buffers(mgr); } - pipe->draw_vbo(pipe, info); + pipe->draw_vbo(pipe, info, indirect, &draw, 1); return; } new_info = *info; + new_draw = draw; /* Handle indirect (multi)draws. */ - if (new_info.indirect) { - const struct pipe_draw_indirect_info *indirect = new_info.indirect; + if (indirect && indirect->buffer) { unsigned draw_count = 0; /* Get the number of draws. */ @@ -1263,13 +1362,13 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) } if (!draw_count) - return; + goto cleanup; unsigned data_size = (draw_count - 1) * indirect->stride + (new_info.index_size ? 20 : 16); unsigned *data = malloc(data_size); if (!data) - return; /* report an error? */ + goto cleanup; /* report an error? */ /* Read the used buffer range only once, because the read can be * uncached. @@ -1317,6 +1416,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) * These values determine the user buffer bounds to upload. */ new_info.index_bias = index_bias0; + new_info.index_bounds_valid = true; new_info.min_index = ~0u; new_info.max_index = 0; new_info.start_instance = ~0u; @@ -1329,7 +1429,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) indices = (uint8_t*)info->index.user; } else { indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource, - PIPE_TRANSFER_READ, &transfer); + PIPE_MAP_READ, &transfer); } for (unsigned i = 0; i < draw_count; i++) { @@ -1349,8 +1449,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) /* Update the index range. */ unsigned min, max; - new_info.count = count; /* only used by get_minmax_index */ - u_vbuf_get_minmax_index_mapped(&new_info, + u_vbuf_get_minmax_index_mapped(&new_info, count, indices + new_info.index_size * start, &min, &max); @@ -1367,7 +1466,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) new_info.instance_count = end_instance - new_info.start_instance; if (new_info.start_instance == ~0u || !new_info.instance_count) - return; + goto cleanup; } else { /* Non-indexed multidraw. * @@ -1378,7 +1477,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) * This efficiently processes the multidraw with the time complexity * equal to 1 draw call. */ - new_info.start = ~0u; + new_draw.start = ~0u; new_info.start_instance = ~0u; unsigned end_vertex = 0; unsigned end_instance = 0; @@ -1390,7 +1489,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) unsigned start_instance = data[offset + 3]; unsigned instance_count = data[offset + 1]; - new_info.start = MIN2(new_info.start, start); + new_draw.start = MIN2(new_draw.start, start); new_info.start_instance = MIN2(new_info.start_instance, start_instance); @@ -1400,12 +1499,15 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) free(data); /* Set the final counts. */ - new_info.count = end_vertex - new_info.start; + new_draw.count = end_vertex - new_draw.start; new_info.instance_count = end_instance - new_info.start_instance; - if (new_info.start == ~0u || !new_info.count || !new_info.instance_count) - return; + if (new_draw.start == ~0u || !new_draw.count || !new_info.instance_count) + goto cleanup; } + } else { + if ((!indirect && !new_draw.count) || !new_info.instance_count) + goto cleanup; } if (new_info.index_size) { @@ -1413,11 +1515,11 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) if (u_vbuf_need_minmax_index(mgr)) { unsigned max_index; - if (new_info.max_index != ~0u) { + if (new_info.index_bounds_valid) { min_index = new_info.min_index; max_index = new_info.max_index; } else { - u_vbuf_get_minmax_index(mgr->pipe, &new_info, + u_vbuf_get_minmax_index(mgr->pipe, &new_info, &new_draw, &min_index, &max_index); } @@ -1430,10 +1532,9 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) * We would have to break this drawing operation into several ones. */ /* Use some heuristic to see if unrolling indices improves * performance. */ - if (!info->indirect && + if (!indirect && !new_info.primitive_restart && - num_vertices > new_info.count*2 && - num_vertices - new_info.count > 32 && + util_is_vbo_upload_ratio_too_large(new_draw.count, num_vertices) && !u_vbuf_mapping_vertex_buffer_blocks(mgr)) { unroll_indices = TRUE; user_vb_mask &= ~(mgr->nonzero_stride_vb_mask & @@ -1446,8 +1547,8 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) min_index = 0; } } else { - start_vertex = new_info.start; - num_vertices = new_info.count; + start_vertex = new_draw.start; + num_vertices = new_draw.count; min_index = 0; } @@ -1455,18 +1556,20 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) if (unroll_indices || incompatible_vb_mask || mgr->ve->incompatible_elem_mask) { - if (!u_vbuf_translate_begin(mgr, &new_info, start_vertex, num_vertices, + if (!u_vbuf_translate_begin(mgr, &new_info, &new_draw, + start_vertex, num_vertices, min_index, unroll_indices)) { debug_warn_once("u_vbuf_translate_begin() failed"); - return; + goto cleanup; } if (unroll_indices) { new_info.index_size = 0; new_info.index_bias = 0; + new_info.index_bounds_valid = true; new_info.min_index = 0; - new_info.max_index = new_info.count - 1; - new_info.start = 0; + new_info.max_index = new_draw.count - 1; + new_draw.start = 0; } user_vb_mask &= ~(incompatible_vb_mask | @@ -1479,7 +1582,7 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) new_info.start_instance, new_info.instance_count) != PIPE_OK) { debug_warn_once("u_vbuf_upload_buffers() failed"); - return; + goto cleanup; } mgr->dirty_real_vb_mask |= user_vb_mask; @@ -1507,13 +1610,21 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) */ u_upload_unmap(pipe->stream_uploader); - u_vbuf_set_driver_vertex_buffers(mgr); + if (mgr->dirty_real_vb_mask) + u_vbuf_set_driver_vertex_buffers(mgr); - pipe->draw_vbo(pipe, &new_info); + pipe->draw_vbo(pipe, &new_info, indirect, &new_draw, 1); if (mgr->using_translate) { u_vbuf_translate_end(mgr); } + return; + +cleanup: + if (info->take_index_buffer_ownership) { + struct pipe_resource *indexbuf = info->index.resource; + pipe_resource_reference(&indexbuf, NULL); + } } void u_vbuf_save_vertex_elements(struct u_vbuf *mgr) @@ -1533,15 +1644,3 @@ void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr) } mgr->ve_saved = NULL; } - -void u_vbuf_save_vertex_buffer0(struct u_vbuf *mgr) -{ - pipe_vertex_buffer_reference(&mgr->vertex_buffer0_saved, - &mgr->vertex_buffer[0]); -} - -void u_vbuf_restore_vertex_buffer0(struct u_vbuf *mgr) -{ - u_vbuf_set_vertex_buffers(mgr, 0, 1, &mgr->vertex_buffer0_saved); - pipe_vertex_buffer_unreference(&mgr->vertex_buffer0_saved); -} diff --git a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h index 8167d997a..fcf99fcd3 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_vbuf.h @@ -38,6 +38,7 @@ #include "pipe/p_format.h" struct cso_context; +struct cso_velems_state; struct u_vbuf; /* Hardware vertex fetcher limitations can be described by this structure. */ @@ -61,7 +62,8 @@ struct u_vbuf_caps { }; -void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps); +void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps, + bool needs64b); struct u_vbuf * u_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps); @@ -69,21 +71,24 @@ u_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps); void u_vbuf_destroy(struct u_vbuf *mgr); /* State and draw functions. */ -void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, unsigned count, - const struct pipe_vertex_element *states); +void u_vbuf_set_vertex_elements(struct u_vbuf *mgr, + const struct cso_velems_state *velems); void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr); void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, unsigned start_slot, unsigned count, + unsigned unbind_num_trailing_slots, + bool take_ownership, const struct pipe_vertex_buffer *bufs); -void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info); +void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count draw); void u_vbuf_get_minmax_index(struct pipe_context *pipe, const struct pipe_draw_info *info, + const struct pipe_draw_start_count *draw, unsigned *out_min_index, unsigned *out_max_index); /* Save/restore functionality. */ void u_vbuf_save_vertex_elements(struct u_vbuf *mgr); void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr); -void u_vbuf_save_vertex_buffer0(struct u_vbuf *mgr); -void u_vbuf_restore_vertex_buffer0(struct u_vbuf *mgr); #endif diff --git a/lib/mesa/src/gallium/auxiliary/util/u_video.h b/lib/mesa/src/gallium/auxiliary/util/u_video.h index f6e93dd03..ae3f024db 100644 --- a/lib/mesa/src/gallium/auxiliary/util/u_video.h +++ b/lib/mesa/src/gallium/auxiliary/util/u_video.h @@ -83,6 +83,9 @@ u_reduce_video_profile(enum pipe_video_profile profile) case PIPE_VIDEO_PROFILE_VP9_PROFILE2: return PIPE_VIDEO_FORMAT_VP9; + case PIPE_VIDEO_PROFILE_AV1_MAIN: + return PIPE_VIDEO_FORMAT_AV1; + default: return PIPE_VIDEO_FORMAT_UNKNOWN; } |